Update Linux to v5.4.2 Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd

commit: 0f672f6c0b52b7b0700b0915c72b540721af4465 [log] [tgz]
author: David Brazdil <dbrazdil@google.com> Tue Dec 10 10:32:29 2019 +0000
committer: David Brazdil <dbrazdil@google.com> Tue Dec 10 19:03:18 2019 +0000
tree: 85c8cba019caa205e4f8920d72d93f6d6deaf29c
parent: 3a0ad55d848b50499b68d7141d4eca997fce28ef [diff]
diff --git a/net/6lowpan/6lowpan_i.h b/net/6lowpan/6lowpan_i.h
index 53cf446..01853ce 100644
--- a/net/6lowpan/6lowpan_i.h
+++ b/net/6lowpan/6lowpan_i.h

@@ -18,24 +18,16 @@
 int addrconf_ifid_802154_6lowpan(u8 *eui, struct net_device *dev);
 
 #ifdef CONFIG_6LOWPAN_DEBUGFS
-int lowpan_dev_debugfs_init(struct net_device *dev);
+void lowpan_dev_debugfs_init(struct net_device *dev);
 void lowpan_dev_debugfs_exit(struct net_device *dev);
 
-int __init lowpan_debugfs_init(void);
+void __init lowpan_debugfs_init(void);
 void lowpan_debugfs_exit(void);
 #else
-static inline int lowpan_dev_debugfs_init(struct net_device *dev)
-{
-	return 0;
-}
-
+static inline void lowpan_dev_debugfs_init(struct net_device *dev) { }
 static inline void lowpan_dev_debugfs_exit(struct net_device *dev) { }
 
-static inline int __init lowpan_debugfs_init(void)
-{
-	return 0;
-}
-
+static inline void __init lowpan_debugfs_init(void) { }
 static inline void lowpan_debugfs_exit(void) { }
 #endif /* CONFIG_6LOWPAN_DEBUGFS */
 

diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig
index 9c05151..4c1f4c0 100644
--- a/net/6lowpan/Kconfig
+++ b/net/6lowpan/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 menuconfig 6LOWPAN
 	tristate "6LoWPAN Support"
 	depends on IPV6

diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index 40d3d72..a068757 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c

@@ -1,11 +1,5 @@
-/* This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+// SPDX-License-Identifier: GPL-2.0-only
+/*
  *
  * Authors:
  * (C) 2015 Pengutronix, Alexander Aring <aar@pengutronix.de>
@@ -48,9 +42,7 @@
 	if (ret < 0)
 		return ret;
 
-	ret = lowpan_dev_debugfs_init(dev);
-	if (ret < 0)
-		unregister_netdevice(dev);
+	lowpan_dev_debugfs_init(dev);
 
 	return ret;
 }
@@ -158,9 +150,7 @@
 {
 	int ret;
 
-	ret = lowpan_debugfs_init();
-	if (ret < 0)
-		return ret;
+	lowpan_debugfs_init();
 
 	ret = register_netdevice_notifier(&lowpan_notifier);
 	if (ret < 0) {

diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c
index 24915e0..1c140af 100644
--- a/net/6lowpan/debugfs.c
+++ b/net/6lowpan/debugfs.c

@@ -1,11 +1,5 @@
-/* This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+// SPDX-License-Identifier: GPL-2.0-only
+/*
  *
  * Authors:
  * (C) 2015 Pengutronix, Alexander Aring <aar@pengutronix.de>
@@ -41,9 +35,9 @@
 	return 0;
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(lowpan_ctx_flag_active_fops,
-			lowpan_ctx_flag_active_get,
-			lowpan_ctx_flag_active_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(lowpan_ctx_flag_active_fops,
+			 lowpan_ctx_flag_active_get,
+			 lowpan_ctx_flag_active_set, "%llu\n");
 
 static int lowpan_ctx_flag_c_set(void *data, u64 val)
 {
@@ -66,8 +60,8 @@
 	return 0;
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(lowpan_ctx_flag_c_fops, lowpan_ctx_flag_c_get,
-			lowpan_ctx_flag_c_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(lowpan_ctx_flag_c_fops, lowpan_ctx_flag_c_get,
+			 lowpan_ctx_flag_c_set, "%llu\n");
 
 static int lowpan_ctx_plen_set(void *data, u64 val)
 {
@@ -97,8 +91,8 @@
 	return 0;
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(lowpan_ctx_plen_fops, lowpan_ctx_plen_get,
-			lowpan_ctx_plen_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(lowpan_ctx_plen_fops, lowpan_ctx_plen_get,
+			 lowpan_ctx_plen_set, "%llu\n");
 
 static int lowpan_ctx_pfx_show(struct seq_file *file, void *offset)
 {
@@ -169,11 +163,11 @@
 	.release	= single_release,
 };
 
-static int lowpan_dev_debugfs_ctx_init(struct net_device *dev,
-				       struct dentry *ctx, u8 id)
+static void lowpan_dev_debugfs_ctx_init(struct net_device *dev,
+					struct dentry *ctx, u8 id)
 {
 	struct lowpan_dev *ldev = lowpan_dev(dev);
-	struct dentry *dentry, *root;
+	struct dentry *root;
 	char buf[32];
 
 	WARN_ON_ONCE(id > LOWPAN_IPHC_CTX_TABLE_SIZE);
@@ -181,34 +175,18 @@
 	sprintf(buf, "%d", id);
 
 	root = debugfs_create_dir(buf, ctx);
-	if (!root)
-		return -EINVAL;
 
-	dentry = debugfs_create_file("active", 0644, root,
-				     &ldev->ctx.table[id],
-				     &lowpan_ctx_flag_active_fops);
-	if (!dentry)
-		return -EINVAL;
+	debugfs_create_file("active", 0644, root, &ldev->ctx.table[id],
+			    &lowpan_ctx_flag_active_fops);
 
-	dentry = debugfs_create_file("compression", 0644, root,
-				     &ldev->ctx.table[id],
-				     &lowpan_ctx_flag_c_fops);
-	if (!dentry)
-		return -EINVAL;
+	debugfs_create_file("compression", 0644, root, &ldev->ctx.table[id],
+			    &lowpan_ctx_flag_c_fops);
 
-	dentry = debugfs_create_file("prefix", 0644, root,
-				     &ldev->ctx.table[id],
-				     &lowpan_ctx_pfx_fops);
-	if (!dentry)
-		return -EINVAL;
+	debugfs_create_file("prefix", 0644, root, &ldev->ctx.table[id],
+			    &lowpan_ctx_pfx_fops);
 
-	dentry = debugfs_create_file("prefix_len", 0644, root,
-				     &ldev->ctx.table[id],
-				     &lowpan_ctx_plen_fops);
-	if (!dentry)
-		return -EINVAL;
-
-	return 0;
+	debugfs_create_file("prefix_len", 0644, root, &ldev->ctx.table[id],
+			    &lowpan_ctx_plen_fops);
 }
 
 static int lowpan_context_show(struct seq_file *file, void *offset)
@@ -232,18 +210,7 @@
 
 	return 0;
 }
-
-static int lowpan_context_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, lowpan_context_show, inode->i_private);
-}
-
-static const struct file_operations lowpan_context_fops = {
-	.open		= lowpan_context_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(lowpan_context);
 
 static int lowpan_short_addr_get(void *data, u64 *val)
 {
@@ -256,67 +223,42 @@
 	return 0;
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(lowpan_short_addr_fops, lowpan_short_addr_get,
-			NULL, "0x%04llx\n");
+DEFINE_DEBUGFS_ATTRIBUTE(lowpan_short_addr_fops, lowpan_short_addr_get, NULL,
+			 "0x%04llx\n");
 
-static int lowpan_dev_debugfs_802154_init(const struct net_device *dev,
+static void lowpan_dev_debugfs_802154_init(const struct net_device *dev,
 					  struct lowpan_dev *ldev)
 {
-	struct dentry *dentry, *root;
+	struct dentry *root;
 
 	if (!lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154))
-		return 0;
+		return;
 
 	root = debugfs_create_dir("ieee802154", ldev->iface_debugfs);
-	if (!root)
-		return -EINVAL;
 
-	dentry = debugfs_create_file("short_addr", 0444, root,
-				     lowpan_802154_dev(dev)->wdev->ieee802154_ptr,
-				     &lowpan_short_addr_fops);
-	if (!dentry)
-		return -EINVAL;
-
-	return 0;
+	debugfs_create_file("short_addr", 0444, root,
+			    lowpan_802154_dev(dev)->wdev->ieee802154_ptr,
+			    &lowpan_short_addr_fops);
 }
 
-int lowpan_dev_debugfs_init(struct net_device *dev)
+void lowpan_dev_debugfs_init(struct net_device *dev)
 {
 	struct lowpan_dev *ldev = lowpan_dev(dev);
-	struct dentry *contexts, *dentry;
-	int ret, i;
+	struct dentry *contexts;
+	int i;
 
 	/* creating the root */
 	ldev->iface_debugfs = debugfs_create_dir(dev->name, lowpan_debugfs);
-	if (!ldev->iface_debugfs)
-		goto fail;
 
 	contexts = debugfs_create_dir("contexts", ldev->iface_debugfs);
-	if (!contexts)
-		goto remove_root;
 
-	dentry = debugfs_create_file("show", 0644, contexts,
-				     &lowpan_dev(dev)->ctx,
-				     &lowpan_context_fops);
-	if (!dentry)
-		goto remove_root;
+	debugfs_create_file("show", 0644, contexts, &lowpan_dev(dev)->ctx,
+			    &lowpan_context_fops);
 
-	for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++) {
-		ret = lowpan_dev_debugfs_ctx_init(dev, contexts, i);
-		if (ret < 0)
-			goto remove_root;
-	}
+	for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++)
+		lowpan_dev_debugfs_ctx_init(dev, contexts, i);
 
-	ret = lowpan_dev_debugfs_802154_init(dev, ldev);
-	if (ret < 0)
-		goto remove_root;
-
-	return 0;
-
-remove_root:
-	lowpan_dev_debugfs_exit(dev);
-fail:
-	return -EINVAL;
+	lowpan_dev_debugfs_802154_init(dev, ldev);
 }
 
 void lowpan_dev_debugfs_exit(struct net_device *dev)
@@ -324,13 +266,9 @@
 	debugfs_remove_recursive(lowpan_dev(dev)->iface_debugfs);
 }
 
-int __init lowpan_debugfs_init(void)
+void __init lowpan_debugfs_init(void)
 {
 	lowpan_debugfs = debugfs_create_dir("6lowpan", NULL);
-	if (!lowpan_debugfs)
-		return -EINVAL;
-
-	return 0;
 }
 
 void lowpan_debugfs_exit(void)

diff --git a/net/6lowpan/ndisc.c b/net/6lowpan/ndisc.c
index 941df2f..16be8f8 100644
--- a/net/6lowpan/ndisc.c
+++ b/net/6lowpan/ndisc.c

@@ -1,11 +1,5 @@
-/* This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+// SPDX-License-Identifier: GPL-2.0-only
+/*
  *
  * Authors:
  * (C) 2016 Pengutronix, Alexander Aring <aar@pengutronix.de>

diff --git a/net/6lowpan/nhc.c b/net/6lowpan/nhc.c
index 4fa2fdd..d6bbbd4 100644
--- a/net/6lowpan/nhc.c
+++ b/net/6lowpan/nhc.c

@@ -1,14 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	6LoWPAN next header compression
  *
- *
  *	Authors:
  *	Alexander Aring		<aar@pengutronix.de>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/netdevice.h>
@@ -18,7 +13,7 @@
 #include "nhc.h"
 
 static struct rb_root rb_root = RB_ROOT;
-static struct lowpan_nhc *lowpan_nexthdr_nhcs[NEXTHDR_MAX];
+static struct lowpan_nhc *lowpan_nexthdr_nhcs[NEXTHDR_MAX + 1];
 static DEFINE_SPINLOCK(lowpan_nhc_lock);
 
 static int lowpan_nhc_insert(struct lowpan_nhc *nhc)

diff --git a/net/6lowpan/nhc_dest.c b/net/6lowpan/nhc_dest.c
index 0b292c9..4768a94 100644
--- a/net/6lowpan/nhc_dest.c
+++ b/net/6lowpan/nhc_dest.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	6LoWPAN IPv6 Destination Options Header compression according to
  *	RFC6282
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include "nhc.h"

diff --git a/net/6lowpan/nhc_fragment.c b/net/6lowpan/nhc_fragment.c
index 473dbc5..be85f07 100644
--- a/net/6lowpan/nhc_fragment.c
+++ b/net/6lowpan/nhc_fragment.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	6LoWPAN IPv6 Fragment Header compression according to RFC6282
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include "nhc.h"

diff --git a/net/6lowpan/nhc_ghc_ext_dest.c b/net/6lowpan/nhc_ghc_ext_dest.c
index 9887b3a..a9137f1 100644
--- a/net/6lowpan/nhc_ghc_ext_dest.c
+++ b/net/6lowpan/nhc_ghc_ext_dest.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	6LoWPAN Extension Header compression according to RFC7400
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include "nhc.h"

diff --git a/net/6lowpan/nhc_ghc_ext_frag.c b/net/6lowpan/nhc_ghc_ext_frag.c
index 1308b79..d49b745 100644
--- a/net/6lowpan/nhc_ghc_ext_frag.c
+++ b/net/6lowpan/nhc_ghc_ext_frag.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	6LoWPAN Extension Header compression according to RFC7400
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include "nhc.h"

diff --git a/net/6lowpan/nhc_ghc_ext_hop.c b/net/6lowpan/nhc_ghc_ext_hop.c
index baec86f..3beedf5 100644
--- a/net/6lowpan/nhc_ghc_ext_hop.c
+++ b/net/6lowpan/nhc_ghc_ext_hop.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	6LoWPAN Extension Header compression according to RFC7400
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include "nhc.h"

diff --git a/net/6lowpan/nhc_ghc_ext_route.c b/net/6lowpan/nhc_ghc_ext_route.c
index d7e5bd7..70dc0ea 100644
--- a/net/6lowpan/nhc_ghc_ext_route.c
+++ b/net/6lowpan/nhc_ghc_ext_route.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	6LoWPAN Extension Header compression according to RFC7400
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include "nhc.h"

diff --git a/net/6lowpan/nhc_ghc_icmpv6.c b/net/6lowpan/nhc_ghc_icmpv6.c
index 32e7c2c..339ceff 100644
--- a/net/6lowpan/nhc_ghc_icmpv6.c
+++ b/net/6lowpan/nhc_ghc_icmpv6.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	6LoWPAN ICMPv6 compression according to RFC7400
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include "nhc.h"

diff --git a/net/6lowpan/nhc_ghc_udp.c b/net/6lowpan/nhc_ghc_udp.c
index 17beefa..f47fec6 100644
--- a/net/6lowpan/nhc_ghc_udp.c
+++ b/net/6lowpan/nhc_ghc_udp.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	6LoWPAN UDP compression according to RFC7400
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include "nhc.h"

diff --git a/net/6lowpan/nhc_hop.c b/net/6lowpan/nhc_hop.c
index 1eb66be..158fc19 100644
--- a/net/6lowpan/nhc_hop.c
+++ b/net/6lowpan/nhc_hop.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	6LoWPAN IPv6 Hop-by-Hop Options Header compression according to RFC6282
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include "nhc.h"

diff --git a/net/6lowpan/nhc_ipv6.c b/net/6lowpan/nhc_ipv6.c
index 2313d16..08b7589 100644
--- a/net/6lowpan/nhc_ipv6.c
+++ b/net/6lowpan/nhc_ipv6.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	6LoWPAN IPv6 Header compression according to RFC6282
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include "nhc.h"

diff --git a/net/6lowpan/nhc_mobility.c b/net/6lowpan/nhc_mobility.c
index 60d3f38..ac8fca6 100644
--- a/net/6lowpan/nhc_mobility.c
+++ b/net/6lowpan/nhc_mobility.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	6LoWPAN IPv6 Mobility Header compression according to RFC6282
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include "nhc.h"

diff --git a/net/6lowpan/nhc_routing.c b/net/6lowpan/nhc_routing.c
index c393280..1c17402 100644
--- a/net/6lowpan/nhc_routing.c
+++ b/net/6lowpan/nhc_routing.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	6LoWPAN IPv6 Routing Header compression according to RFC6282
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include "nhc.h"

diff --git a/net/6lowpan/nhc_udp.c b/net/6lowpan/nhc_udp.c
index 225d919..8a35075 100644
--- a/net/6lowpan/nhc_udp.c
+++ b/net/6lowpan/nhc_udp.c

@@ -1,18 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	6LoWPAN IPv6 UDP compression according to RFC6282
  *
- *
  *	Authors:
  *	Alexander Aring	<aar@pengutronix.de>
  *
  *	Orignal written by:
  *	Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
  *	Jon Smirl <jonsmirl@gmail.com>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include "nhc.h"

diff --git a/net/802/Kconfig b/net/802/Kconfig
index 80d4bf7..aaa83e8 100644
--- a/net/802/Kconfig
+++ b/net/802/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config STP
 	tristate
 	select LLC

diff --git a/net/802/fc.c b/net/802/fc.c
index 058a9f7..afd3d28 100644
--- a/net/802/fc.c
+++ b/net/802/fc.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * NET3:	Fibre Channel device handling subroutines
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  *		Vineet Abraham <vma@iol.unh.edu>
  *		v 1.0 03/22/99
  */

diff --git a/net/802/fddi.c b/net/802/fddi.c
index 90f1416..7533ce2 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the BSD Socket
@@ -16,11 +17,6 @@
  *			Florian La Roche, <rzsfl@rz.uni-sb.de>
  *			Alan Cox, <gw4pts@gw4pts.ampr.org>
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  *	Changes
  *		Alan Cox		:	New arp/rebuild header
  *		Maciej W. Rozycki	:	IPv6 support

diff --git a/net/802/garp.c b/net/802/garp.c
index 7f50d47..400bd85 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *	IEEE 802.1D Generic Attribute Registration Protocol (GARP)
  *
  *	Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	version 2 as published by the Free Software Foundation.
  */
 #include <linux/kernel.h>
 #include <linux/timer.h>

diff --git a/net/802/hippi.c b/net/802/hippi.c
index 690308b..f80b33a 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -13,11 +14,6 @@
  *		Florian  La Roche, <rzsfl@rz.uni-sb.de>
  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
  *		Jes Sorensen, <Jes.Sorensen@cern.ch>
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  */
 
 #include <linux/module.h>

diff --git a/net/802/mrp.c b/net/802/mrp.c
index a808dd5..2cfdfbf 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *	IEEE 802.1Q Multiple Registration Protocol (MRP)
  *
@@ -5,10 +6,6 @@
  *
  *	Adapted from code in net/802/garp.c
  *	Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	version 2 as published by the Free Software Foundation.
  */
 #include <linux/kernel.h>
 #include <linux/timer.h>

diff --git a/net/802/p8022.c b/net/802/p8022.c
index 0bda8de..a658562 100644
--- a/net/802/p8022.c
+++ b/net/802/p8022.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	NET3:	Support for 802.2 demultiplexing off Ethernet
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  *
  *		Demultiplex 802.2 encoded protocols. We match the entry by the
  *		SSAP/DSAP pair and then deliver to the registered datalink that

diff --git a/net/802/p8023.c b/net/802/p8023.c
index 1256a40..19cd569 100644
--- a/net/802/p8023.c
+++ b/net/802/p8023.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	NET3:	802.3 data link hooks used for IPX 802.3
  *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
  *	802.3 isn't really a protocol data link layer. Some old IPX stuff
  *	uses it however. Note that there is only one 802.3 protocol layer
  *	in the system. We don't currently support different protocols

diff --git a/net/802/psnap.c b/net/802/psnap.c
index db6baf7..40ab2ae 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	SNAP data link layer. Derived from 802.2
  *
  *		Alan Cox <alan@lxorguk.ukuu.org.uk>,
  *		from the 802.2 layer by Greg Page.
  *		Merged in additions from Greg Page's psnap.c.
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  */
 
 #include <linux/module.h>

diff --git a/net/802/stp.c b/net/802/stp.c
index 2c40ba0..d550d9f 100644
--- a/net/802/stp.c
+++ b/net/802/stp.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *	STP SAP demux
  *
  *	Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	version 2 as published by the Free Software Foundation.
  */
 #include <linux/mutex.h>
 #include <linux/skbuff.h>

diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig
index 4232018..5510b4b 100644
--- a/net/8021q/Kconfig
+++ b/net/8021q/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Configuration for 802.1Q VLAN support
 #

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 5e99504..d4bcfd8 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		802.1Q VLAN
  *		Ethernet-type device handling.
@@ -11,11 +12,6 @@
  *		Add HW acceleration hooks - David S. Miller <davem@redhat.com>;
  *		Correct all the locking - David S. Miller <davem@redhat.com>;
  *		Use hash table for VLAN groups - David S. Miller <davem@redhat.com>
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -75,6 +71,14 @@
 	return 0;
 }
 
+static void vlan_stacked_transfer_operstate(const struct net_device *rootdev,
+					    struct net_device *dev,
+					    struct vlan_dev_priv *vlan)
+{
+	if (!(vlan->flags & VLAN_FLAG_BRIDGE_BINDING))
+		netif_stacked_transfer_operstate(rootdev, dev);
+}
+
 void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 {
 	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
@@ -168,7 +172,6 @@
 	if (err < 0)
 		goto out_uninit_mvrp;
 
-	vlan->nest_level = dev_get_nest_level(real_dev) + 1;
 	err = register_netdevice(dev);
 	if (err < 0)
 		goto out_uninit_mvrp;
@@ -180,7 +183,7 @@
 	/* Account for reference in struct vlan_dev_priv */
 	dev_hold(real_dev);
 
-	netif_stacked_transfer_operstate(real_dev, dev);
+	vlan_stacked_transfer_operstate(real_dev, dev, vlan);
 	linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */
 
 	/* So, got the sucker initialized, now lets place
@@ -330,6 +333,7 @@
 
 	vlandev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
 	vlandev->priv_flags |= (vlan->real_dev->priv_flags & IFF_XMIT_DST_RELEASE);
+	vlandev->hw_enc_features = vlan_tnl_features(vlan->real_dev);
 
 	netdev_update_features(vlandev);
 }
@@ -357,6 +361,7 @@
 static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 			     void *ptr)
 {
+	struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct vlan_group *grp;
 	struct vlan_info *vlan_info;
@@ -397,7 +402,8 @@
 	case NETDEV_CHANGE:
 		/* Propagate real device state to vlan devices */
 		vlan_group_for_each_dev(grp, i, vlandev)
-			netif_stacked_transfer_operstate(dev, vlandev);
+			vlan_stacked_transfer_operstate(dev, vlandev,
+							vlan_dev_priv(vlandev));
 		break;
 
 	case NETDEV_CHANGEADDR:
@@ -444,7 +450,8 @@
 		dev_close_many(&close_list, false);
 
 		list_for_each_entry_safe(vlandev, tmp, &close_list, close_list) {
-			netif_stacked_transfer_operstate(dev, vlandev);
+			vlan_stacked_transfer_operstate(dev, vlandev,
+							vlan_dev_priv(vlandev));
 			list_del_init(&vlandev->close_list);
 		}
 		list_del(&close_list);
@@ -459,8 +466,9 @@
 
 			vlan = vlan_dev_priv(vlandev);
 			if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING))
-				dev_change_flags(vlandev, flgs | IFF_UP);
-			netif_stacked_transfer_operstate(dev, vlandev);
+				dev_change_flags(vlandev, flgs | IFF_UP,
+						 extack);
+			vlan_stacked_transfer_operstate(dev, vlandev, vlan);
 		}
 		break;
 
@@ -647,93 +655,6 @@
 	return err;
 }
 
-static struct sk_buff *vlan_gro_receive(struct list_head *head,
-					struct sk_buff *skb)
-{
-	const struct packet_offload *ptype;
-	unsigned int hlen, off_vlan;
-	struct sk_buff *pp = NULL;
-	struct vlan_hdr *vhdr;
-	struct sk_buff *p;
-	__be16 type;
-	int flush = 1;
-
-	off_vlan = skb_gro_offset(skb);
-	hlen = off_vlan + sizeof(*vhdr);
-	vhdr = skb_gro_header_fast(skb, off_vlan);
-	if (skb_gro_header_hard(skb, hlen)) {
-		vhdr = skb_gro_header_slow(skb, hlen, off_vlan);
-		if (unlikely(!vhdr))
-			goto out;
-	}
-
-	type = vhdr->h_vlan_encapsulated_proto;
-
-	rcu_read_lock();
-	ptype = gro_find_receive_by_type(type);
-	if (!ptype)
-		goto out_unlock;
-
-	flush = 0;
-
-	list_for_each_entry(p, head, list) {
-		struct vlan_hdr *vhdr2;
-
-		if (!NAPI_GRO_CB(p)->same_flow)
-			continue;
-
-		vhdr2 = (struct vlan_hdr *)(p->data + off_vlan);
-		if (compare_vlan_header(vhdr, vhdr2))
-			NAPI_GRO_CB(p)->same_flow = 0;
-	}
-
-	skb_gro_pull(skb, sizeof(*vhdr));
-	skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr));
-	pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
-
-out_unlock:
-	rcu_read_unlock();
-out:
-	skb_gro_flush_final(skb, pp, flush);
-
-	return pp;
-}
-
-static int vlan_gro_complete(struct sk_buff *skb, int nhoff)
-{
-	struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data + nhoff);
-	__be16 type = vhdr->h_vlan_encapsulated_proto;
-	struct packet_offload *ptype;
-	int err = -ENOENT;
-
-	rcu_read_lock();
-	ptype = gro_find_complete_by_type(type);
-	if (ptype)
-		err = ptype->callbacks.gro_complete(skb, nhoff + sizeof(*vhdr));
-
-	rcu_read_unlock();
-	return err;
-}
-
-static struct packet_offload vlan_packet_offloads[] __read_mostly = {
-	{
-		.type = cpu_to_be16(ETH_P_8021Q),
-		.priority = 10,
-		.callbacks = {
-			.gro_receive = vlan_gro_receive,
-			.gro_complete = vlan_gro_complete,
-		},
-	},
-	{
-		.type = cpu_to_be16(ETH_P_8021AD),
-		.priority = 10,
-		.callbacks = {
-			.gro_receive = vlan_gro_receive,
-			.gro_complete = vlan_gro_complete,
-		},
-	},
-};
-
 static int __net_init vlan_init_net(struct net *net)
 {
 	struct vlan_net *vn = net_generic(net, vlan_net_id);
@@ -761,7 +682,6 @@
 static int __init vlan_proto_init(void)
 {
 	int err;
-	unsigned int i;
 
 	pr_info("%s v%s\n", vlan_fullname, vlan_version);
 
@@ -785,9 +705,6 @@
 	if (err < 0)
 		goto err5;
 
-	for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++)
-		dev_add_offload(&vlan_packet_offloads[i]);
-
 	vlan_ioctl_set(vlan_ioctl_handler);
 	return 0;
 
@@ -805,13 +722,8 @@
 
 static void __exit vlan_cleanup_module(void)
 {
-	unsigned int i;
-
 	vlan_ioctl_set(NULL);
 
-	for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++)
-		dev_remove_offload(&vlan_packet_offloads[i]);
-
 	vlan_netlink_fini();
 
 	unregister_netdevice_notifier(&vlan_notifier_block);

diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 44df1c3..c46daf0 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h

@@ -92,6 +92,18 @@
 	return NULL;
 }
 
+static inline netdev_features_t vlan_tnl_features(struct net_device *real_dev)
+{
+	netdev_features_t ret;
+
+	ret = real_dev->hw_enc_features &
+	      (NETIF_F_CSUM_MASK | NETIF_F_ALL_TSO | NETIF_F_GSO_ENCAP_ALL);
+
+	if ((ret & NETIF_F_GSO_ENCAP_ALL) && (ret & NETIF_F_CSUM_MASK))
+		return (ret & ~NETIF_F_CSUM_MASK) | NETIF_F_HW_CSUM;
+	return 0;
+}
+
 #define vlan_group_for_each_dev(grp, i, dev) \
 	for ((i) = 0; i < VLAN_PROTO_NUM * VLAN_N_VID; i++) \
 		if (((dev) = __vlan_group_get_device((grp), (i) / VLAN_N_VID, \

diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 4f60e86..a313165 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c

@@ -57,7 +57,7 @@
 	}
 
 	skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
-	skb->vlan_tci = 0;
+	__vlan_hwaccel_clear_tag(skb);
 
 	rx_stats = this_cpu_ptr(vlan_dev_priv(vlan_dev)->vlan_pcpu_stats);
 
@@ -223,6 +223,33 @@
 		return -ENODEV;
 }
 
+int vlan_for_each(struct net_device *dev,
+		  int (*action)(struct net_device *dev, int vid, void *arg),
+		  void *arg)
+{
+	struct vlan_vid_info *vid_info;
+	struct vlan_info *vlan_info;
+	struct net_device *vdev;
+	int ret;
+
+	ASSERT_RTNL();
+
+	vlan_info = rtnl_dereference(dev->vlan_info);
+	if (!vlan_info)
+		return 0;
+
+	list_for_each_entry(vid_info, &vlan_info->vid_list, list) {
+		vdev = vlan_group_get_device(&vlan_info->grp, vid_info->proto,
+					     vid_info->vid);
+		ret = action(vdev, vid_info->vid, arg);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(vlan_for_each);
+
 int vlan_filter_push_vids(struct vlan_info *vlan_info, __be16 proto)
 {
 	struct net_device *real_dev = vlan_info->real_dev;
@@ -426,3 +453,102 @@
 	return vlan_info->grp.nr_vlan_devs ? true : false;
 }
 EXPORT_SYMBOL(vlan_uses_dev);
+
+static struct sk_buff *vlan_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
+{
+	const struct packet_offload *ptype;
+	unsigned int hlen, off_vlan;
+	struct sk_buff *pp = NULL;
+	struct vlan_hdr *vhdr;
+	struct sk_buff *p;
+	__be16 type;
+	int flush = 1;
+
+	off_vlan = skb_gro_offset(skb);
+	hlen = off_vlan + sizeof(*vhdr);
+	vhdr = skb_gro_header_fast(skb, off_vlan);
+	if (skb_gro_header_hard(skb, hlen)) {
+		vhdr = skb_gro_header_slow(skb, hlen, off_vlan);
+		if (unlikely(!vhdr))
+			goto out;
+	}
+
+	type = vhdr->h_vlan_encapsulated_proto;
+
+	rcu_read_lock();
+	ptype = gro_find_receive_by_type(type);
+	if (!ptype)
+		goto out_unlock;
+
+	flush = 0;
+
+	list_for_each_entry(p, head, list) {
+		struct vlan_hdr *vhdr2;
+
+		if (!NAPI_GRO_CB(p)->same_flow)
+			continue;
+
+		vhdr2 = (struct vlan_hdr *)(p->data + off_vlan);
+		if (compare_vlan_header(vhdr, vhdr2))
+			NAPI_GRO_CB(p)->same_flow = 0;
+	}
+
+	skb_gro_pull(skb, sizeof(*vhdr));
+	skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr));
+	pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
+
+out_unlock:
+	rcu_read_unlock();
+out:
+	skb_gro_flush_final(skb, pp, flush);
+
+	return pp;
+}
+
+static int vlan_gro_complete(struct sk_buff *skb, int nhoff)
+{
+	struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data + nhoff);
+	__be16 type = vhdr->h_vlan_encapsulated_proto;
+	struct packet_offload *ptype;
+	int err = -ENOENT;
+
+	rcu_read_lock();
+	ptype = gro_find_complete_by_type(type);
+	if (ptype)
+		err = ptype->callbacks.gro_complete(skb, nhoff + sizeof(*vhdr));
+
+	rcu_read_unlock();
+	return err;
+}
+
+static struct packet_offload vlan_packet_offloads[] __read_mostly = {
+	{
+		.type = cpu_to_be16(ETH_P_8021Q),
+		.priority = 10,
+		.callbacks = {
+			.gro_receive = vlan_gro_receive,
+			.gro_complete = vlan_gro_complete,
+		},
+	},
+	{
+		.type = cpu_to_be16(ETH_P_8021AD),
+		.priority = 10,
+		.callbacks = {
+			.gro_receive = vlan_gro_receive,
+			.gro_complete = vlan_gro_complete,
+		},
+	},
+};
+
+static int __init vlan_offload_init(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++)
+		dev_add_offload(&vlan_packet_offloads[i]);
+
+	return 0;
+}
+
+fs_initcall(vlan_offload_init);

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 546af0e..e5bff5c 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* -*- linux-c -*-
  * INET		802.1Q VLAN
  *		Ethernet-type device handling.
@@ -12,12 +13,6 @@
  *              Oct 20, 2001:  Ard van Breeman:
  *                - Fix MC-list, finally.
  *                - Flush MC-list on VLAN destroy.
- *
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -31,7 +26,6 @@
 #include <linux/ethtool.h>
 #include <linux/phy.h>
 #include <net/arp.h>
-#include <net/switchdev.h>
 
 #include "vlan.h"
 #include "vlanproc.h"
@@ -224,7 +218,8 @@
 	u32 old_flags = vlan->flags;
 
 	if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP |
-		     VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP))
+		     VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP |
+		     VLAN_FLAG_BRIDGE_BINDING))
 		return -EINVAL;
 
 	vlan->flags = (old_flags & ~mask) | (flags & mask);
@@ -297,7 +292,8 @@
 	if (vlan->flags & VLAN_FLAG_MVRP)
 		vlan_mvrp_request_join(dev);
 
-	if (netif_carrier_ok(real_dev))
+	if (netif_carrier_ok(real_dev) &&
+	    !(vlan->flags & VLAN_FLAG_BRIDGE_BINDING))
 		netif_carrier_on(dev);
 	return 0;
 
@@ -327,7 +323,8 @@
 	if (!ether_addr_equal(dev->dev_addr, real_dev->dev_addr))
 		dev_uc_del(real_dev, dev->dev_addr);
 
-	netif_carrier_off(dev);
+	if (!(vlan->flags & VLAN_FLAG_BRIDGE_BINDING))
+		netif_carrier_off(dev);
 	return 0;
 }
 
@@ -368,10 +365,13 @@
 	ifrr.ifr_ifru = ifr->ifr_ifru;
 
 	switch (cmd) {
+	case SIOCSHWTSTAMP:
+		if (!net_eq(dev_net(dev), &init_net))
+			break;
+		/* fall through */
 	case SIOCGMIIPHY:
 	case SIOCGMIIREG:
 	case SIOCSMIIREG:
-	case SIOCSHWTSTAMP:
 	case SIOCGHWTSTAMP:
 		if (netif_device_present(real_dev) && ops->ndo_do_ioctl)
 			err = ops->ndo_do_ioctl(real_dev, &ifrr, cmd);
@@ -444,17 +444,6 @@
 	return rc;
 }
 
-static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type)
-{
-	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
-	const struct net_device_ops *ops = real_dev->netdev_ops;
-	int rc = -EINVAL;
-
-	if (ops->ndo_fcoe_get_wwn)
-		rc = ops->ndo_fcoe_get_wwn(real_dev, wwn, type);
-	return rc;
-}
-
 static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid,
 				    struct scatterlist *sgl, unsigned int sgc)
 {
@@ -469,6 +458,19 @@
 }
 #endif
 
+#ifdef NETDEV_FCOE_WWNN
+static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type)
+{
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
+	const struct net_device_ops *ops = real_dev->netdev_ops;
+	int rc = -EINVAL;
+
+	if (ops->ndo_fcoe_get_wwn)
+		rc = ops->ndo_fcoe_get_wwn(real_dev, wwn, type);
+	return rc;
+}
+#endif
+
 static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
 {
 	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
@@ -487,36 +489,6 @@
 	dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
 }
 
-/*
- * vlan network devices have devices nesting below it, and are a special
- * "super class" of normal network devices; split their locks off into a
- * separate class since they always nest.
- */
-static struct lock_class_key vlan_netdev_xmit_lock_key;
-static struct lock_class_key vlan_netdev_addr_lock_key;
-
-static void vlan_dev_set_lockdep_one(struct net_device *dev,
-				     struct netdev_queue *txq,
-				     void *_subclass)
-{
-	lockdep_set_class_and_subclass(&txq->_xmit_lock,
-				       &vlan_netdev_xmit_lock_key,
-				       *(int *)_subclass);
-}
-
-static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass)
-{
-	lockdep_set_class_and_subclass(&dev->addr_list_lock,
-				       &vlan_netdev_addr_lock_key,
-				       subclass);
-	netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, &subclass);
-}
-
-static int vlan_dev_get_lock_subclass(struct net_device *dev)
-{
-	return vlan_dev_priv(dev)->nest_level;
-}
-
 static const struct header_ops vlan_header_ops = {
 	.create	 = vlan_dev_hard_header,
 	.parse	 = eth_header_parse,
@@ -549,7 +521,8 @@
 
 static int vlan_dev_init(struct net_device *dev)
 {
-	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+	struct net_device *real_dev = vlan->real_dev;
 
 	netif_carrier_off(dev);
 
@@ -560,8 +533,12 @@
 					  (1<<__LINK_STATE_DORMANT))) |
 		      (1<<__LINK_STATE_PRESENT);
 
+	if (vlan->flags & VLAN_FLAG_BRIDGE_BINDING)
+		dev->state |= (1 << __LINK_STATE_NOCARRIER);
+
 	dev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG |
 			   NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE |
+			   NETIF_F_GSO_ENCAP_ALL |
 			   NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC |
 			   NETIF_F_ALL_FCOE;
 
@@ -572,6 +549,8 @@
 		netdev_warn(real_dev, "VLAN features are set incorrectly.  Q-in-Q configurations may not work correctly.\n");
 
 	dev->vlan_features = real_dev->vlan_features & ~NETIF_F_ALL_FCOE;
+	dev->hw_enc_features = vlan_tnl_features(real_dev);
+	dev->mpls_features = real_dev->mpls_features;
 
 	/* ipv6 shared card related stuff */
 	dev->dev_id = real_dev->dev_id;
@@ -588,8 +567,7 @@
 #endif
 
 	dev->needed_headroom = real_dev->needed_headroom;
-	if (vlan_hw_offload_capable(real_dev->features,
-				    vlan_dev_priv(dev)->vlan_proto)) {
+	if (vlan_hw_offload_capable(real_dev->features, vlan->vlan_proto)) {
 		dev->header_ops      = &vlan_passthru_header_ops;
 		dev->hard_header_len = real_dev->hard_header_len;
 	} else {
@@ -601,10 +579,8 @@
 
 	SET_NETDEV_DEVTYPE(dev, &vlan_type);
 
-	vlan_dev_set_lockdep_class(dev, vlan_dev_get_lock_subclass(dev));
-
-	vlan_dev_priv(dev)->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats);
-	if (!vlan_dev_priv(dev)->vlan_pcpu_stats)
+	vlan->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats);
+	if (!vlan->vlan_pcpu_stats)
 		return -ENOMEM;
 
 	return 0;
@@ -756,8 +732,7 @@
 		return;
 
 	vlan->netpoll = NULL;
-
-	__netpoll_free_async(netpoll);
+	__netpoll_free(netpoll);
 }
 #endif /* CONFIG_NET_POLL_CONTROLLER */
 
@@ -794,16 +769,17 @@
 	.ndo_fcoe_ddp_done	= vlan_dev_fcoe_ddp_done,
 	.ndo_fcoe_enable	= vlan_dev_fcoe_enable,
 	.ndo_fcoe_disable	= vlan_dev_fcoe_disable,
-	.ndo_fcoe_get_wwn	= vlan_dev_fcoe_get_wwn,
 	.ndo_fcoe_ddp_target	= vlan_dev_fcoe_ddp_target,
 #endif
+#ifdef NETDEV_FCOE_WWNN
+	.ndo_fcoe_get_wwn	= vlan_dev_fcoe_get_wwn,
+#endif
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= vlan_dev_poll_controller,
 	.ndo_netpoll_setup	= vlan_dev_netpoll_setup,
 	.ndo_netpoll_cleanup	= vlan_dev_netpoll_cleanup,
 #endif
 	.ndo_fix_features	= vlan_dev_fix_features,
-	.ndo_get_lock_subclass  = vlan_dev_get_lock_subclass,
 	.ndo_get_iflink		= vlan_dev_get_iflink,
 };
 

diff --git a/net/8021q/vlan_gvrp.c b/net/8021q/vlan_gvrp.c
index 66a8032..6b34b72 100644
--- a/net/8021q/vlan_gvrp.c
+++ b/net/8021q/vlan_gvrp.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * 	IEEE 802.1Q GARP VLAN Registration Protocol (GVRP)
  *
  * 	Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	version 2 as published by the Free Software Foundation.
  */
 #include <linux/types.h>
 #include <linux/if_vlan.h>

diff --git a/net/8021q/vlan_mvrp.c b/net/8021q/vlan_mvrp.c
index e0fe091..689ecee 100644
--- a/net/8021q/vlan_mvrp.c
+++ b/net/8021q/vlan_mvrp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *	IEEE 802.1Q Multiple VLAN Registration Protocol (MVRP)
  *
@@ -5,10 +6,6 @@
  *
  *	Adapted from code in net/8021q/vlan_gvrp.c
  *	Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	version 2 as published by the Free Software Foundation.
  */
 #include <linux/types.h>
 #include <linux/if_ether.h>

diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 9b60c1e..c482a6f 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *	VLAN netlink control interface
  *
  * 	Copyright (c) 2007 Patrick McHardy <kaber@trash.net>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	version 2 as published by the Free Software Foundation.
  */
 
 #include <linux/kernel.h>
@@ -35,8 +32,8 @@
 {
 	if (!attr)
 		return 0;
-	return nla_validate_nested(attr, IFLA_VLAN_QOS_MAX, vlan_map_policy,
-				   NULL);
+	return nla_validate_nested_deprecated(attr, IFLA_VLAN_QOS_MAX,
+					      vlan_map_policy, NULL);
 }
 
 static int vlan_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -84,7 +81,8 @@
 		flags = nla_data(data[IFLA_VLAN_FLAGS]);
 		if ((flags->flags & flags->mask) &
 		    ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP |
-		      VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP)) {
+		      VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP |
+		      VLAN_FLAG_BRIDGE_BINDING)) {
 			NL_SET_ERR_MSG_MOD(extack, "Invalid VLAN flags");
 			return -EINVAL;
 		}
@@ -226,7 +224,7 @@
 			goto nla_put_failure;
 	}
 	if (vlan->nr_ingress_mappings) {
-		nest = nla_nest_start(skb, IFLA_VLAN_INGRESS_QOS);
+		nest = nla_nest_start_noflag(skb, IFLA_VLAN_INGRESS_QOS);
 		if (nest == NULL)
 			goto nla_put_failure;
 
@@ -244,7 +242,7 @@
 	}
 
 	if (vlan->nr_egress_mappings) {
-		nest = nla_nest_start(skb, IFLA_VLAN_EGRESS_QOS);
+		nest = nla_nest_start_noflag(skb, IFLA_VLAN_EGRESS_QOS);
 		if (nest == NULL)
 			goto nla_put_failure;
 

diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index d36e8c4..ec87dea 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /******************************************************************************
  * vlanproc.c	VLAN Module. /proc filesystem interface.
  *
@@ -9,10 +10,6 @@
  *
  * Copyright:	(c) 1998 Ben Greear
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  * ============================================================================
  * Jan 20, 1998        Ben Greear     Initial Version
  *****************************************************************************/

diff --git a/net/9p/Kconfig b/net/9p/Kconfig
index e6014e0..3d11fec 100644
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # 9P protocol configuration
 #

diff --git a/net/9p/Makefile b/net/9p/Makefile
index c0486cf..aa0a564 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile

@@ -8,7 +8,6 @@
 	mod.o \
 	client.o \
 	error.o \
-	util.o \
 	protocol.o \
 	trans_fd.o \
 	trans_common.o \

diff --git a/net/9p/client.c b/net/9p/client.c
index deae53a..1d48afc 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * net/9p/clnt.c
  *
@@ -5,22 +6,6 @@
  *
  *  Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com>
  *  Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -181,6 +166,12 @@
 				ret = r;
 				continue;
 			}
+			if (option < 4096) {
+				p9_debug(P9_DEBUG_ERROR,
+					 "msize should be at least 4k\n");
+				ret = -EINVAL;
+				continue;
+			}
 			clnt->msize = option;
 			break;
 		case Opt_trans:
@@ -231,145 +222,172 @@
 	return ret;
 }
 
-static struct p9_fcall *p9_fcall_alloc(int alloc_msize)
+static int p9_fcall_init(struct p9_client *c, struct p9_fcall *fc,
+			 int alloc_msize)
 {
-	struct p9_fcall *fc;
-	fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS);
-	if (!fc)
-		return NULL;
+	if (likely(c->fcall_cache) && alloc_msize == c->msize) {
+		fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS);
+		fc->cache = c->fcall_cache;
+	} else {
+		fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
+		fc->cache = NULL;
+	}
+	if (!fc->sdata)
+		return -ENOMEM;
 	fc->capacity = alloc_msize;
-	fc->sdata = (char *) fc + sizeof(struct p9_fcall);
-	return fc;
+	return 0;
 }
 
-/**
- * p9_tag_alloc - lookup/allocate a request by tag
- * @c: client session to lookup tag within
- * @tag: numeric id for transaction
- *
- * this is a simple array lookup, but will grow the
- * request_slots as necessary to accommodate transaction
- * ids which did not previously have a slot.
- *
- * this code relies on the client spinlock to manage locks, its
- * possible we should switch to something else, but I'd rather
- * stick with something low-overhead for the common case.
- *
- */
-
-static struct p9_req_t *
-p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size)
+void p9_fcall_fini(struct p9_fcall *fc)
 {
-	unsigned long flags;
-	int row, col;
-	struct p9_req_t *req;
+	/* sdata can be NULL for interrupted requests in trans_rdma,
+	 * and kmem_cache_free does not do NULL-check for us
+	 */
+	if (unlikely(!fc->sdata))
+		return;
+
+	if (fc->cache)
+		kmem_cache_free(fc->cache, fc->sdata);
+	else
+		kfree(fc->sdata);
+}
+EXPORT_SYMBOL(p9_fcall_fini);
+
+static struct kmem_cache *p9_req_cache;
+
+/**
+ * p9_req_alloc - Allocate a new request.
+ * @c: Client session.
+ * @type: Transaction type.
+ * @max_size: Maximum packet size for this request.
+ *
+ * Context: Process context.
+ * Return: Pointer to new request.
+ */
+static struct p9_req_t *
+p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
+{
+	struct p9_req_t *req = kmem_cache_alloc(p9_req_cache, GFP_NOFS);
 	int alloc_msize = min(c->msize, max_size);
+	int tag;
 
-	/* This looks up the original request by tag so we know which
-	 * buffer to read the data into */
-	tag++;
+	if (!req)
+		return ERR_PTR(-ENOMEM);
 
-	if (tag >= c->max_tag) {
-		spin_lock_irqsave(&c->lock, flags);
-		/* check again since original check was outside of lock */
-		while (tag >= c->max_tag) {
-			row = (tag / P9_ROW_MAXTAG);
-			c->reqs[row] = kcalloc(P9_ROW_MAXTAG,
-					sizeof(struct p9_req_t), GFP_ATOMIC);
+	if (p9_fcall_init(c, &req->tc, alloc_msize))
+		goto free_req;
+	if (p9_fcall_init(c, &req->rc, alloc_msize))
+		goto free;
 
-			if (!c->reqs[row]) {
-				pr_err("Couldn't grow tag array\n");
-				spin_unlock_irqrestore(&c->lock, flags);
-				return ERR_PTR(-ENOMEM);
-			}
-			for (col = 0; col < P9_ROW_MAXTAG; col++) {
-				req = &c->reqs[row][col];
-				req->status = REQ_STATUS_IDLE;
-				init_waitqueue_head(&req->wq);
-			}
-			c->max_tag += P9_ROW_MAXTAG;
-		}
-		spin_unlock_irqrestore(&c->lock, flags);
-	}
-	row = tag / P9_ROW_MAXTAG;
-	col = tag % P9_ROW_MAXTAG;
-
-	req = &c->reqs[row][col];
-	if (!req->tc)
-		req->tc = p9_fcall_alloc(alloc_msize);
-	if (!req->rc)
-		req->rc = p9_fcall_alloc(alloc_msize);
-	if (!req->tc || !req->rc)
-		goto grow_failed;
-
-	p9pdu_reset(req->tc);
-	p9pdu_reset(req->rc);
-
-	req->tc->tag = tag-1;
+	p9pdu_reset(&req->tc);
+	p9pdu_reset(&req->rc);
+	req->t_err = 0;
 	req->status = REQ_STATUS_ALLOC;
+	init_waitqueue_head(&req->wq);
+	INIT_LIST_HEAD(&req->req_list);
+
+	idr_preload(GFP_NOFS);
+	spin_lock_irq(&c->lock);
+	if (type == P9_TVERSION)
+		tag = idr_alloc(&c->reqs, req, P9_NOTAG, P9_NOTAG + 1,
+				GFP_NOWAIT);
+	else
+		tag = idr_alloc(&c->reqs, req, 0, P9_NOTAG, GFP_NOWAIT);
+	req->tc.tag = tag;
+	spin_unlock_irq(&c->lock);
+	idr_preload_end();
+	if (tag < 0)
+		goto free;
+
+	/* Init ref to two because in the general case there is one ref
+	 * that is put asynchronously by a writer thread, one ref
+	 * temporarily given by p9_tag_lookup and put by p9_client_cb
+	 * in the recv thread, and one ref put by p9_tag_remove in the
+	 * main thread. The only exception is virtio that does not use
+	 * p9_tag_lookup but does not have a writer thread either
+	 * (the write happens synchronously in the request/zc_request
+	 * callback), so p9_client_cb eats the second ref there
+	 * as the pointer is duplicated directly by virtqueue_add_sgs()
+	 */
+	refcount_set(&req->refcount.refcount, 2);
 
 	return req;
 
-grow_failed:
-	pr_err("Couldn't grow tag array\n");
-	kfree(req->tc);
-	kfree(req->rc);
-	req->tc = req->rc = NULL;
+free:
+	p9_fcall_fini(&req->tc);
+	p9_fcall_fini(&req->rc);
+free_req:
+	kmem_cache_free(p9_req_cache, req);
 	return ERR_PTR(-ENOMEM);
 }
 
 /**
- * p9_tag_lookup - lookup a request by tag
- * @c: client session to lookup tag within
- * @tag: numeric id for transaction
+ * p9_tag_lookup - Look up a request by tag.
+ * @c: Client session.
+ * @tag: Transaction ID.
  *
+ * Context: Any context.
+ * Return: A request, or %NULL if there is no request with that tag.
  */
-
 struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag)
 {
-	int row, col;
+	struct p9_req_t *req;
 
-	/* This looks up the original request by tag so we know which
-	 * buffer to read the data into */
-	tag++;
+	rcu_read_lock();
+again:
+	req = idr_find(&c->reqs, tag);
+	if (req) {
+		/* We have to be careful with the req found under rcu_read_lock
+		 * Thanks to SLAB_TYPESAFE_BY_RCU we can safely try to get the
+		 * ref again without corrupting other data, then check again
+		 * that the tag matches once we have the ref
+		 */
+		if (!p9_req_try_get(req))
+			goto again;
+		if (req->tc.tag != tag) {
+			p9_req_put(req);
+			goto again;
+		}
+	}
+	rcu_read_unlock();
 
-	if (tag >= c->max_tag)
-		return NULL;
-
-	row = tag / P9_ROW_MAXTAG;
-	col = tag % P9_ROW_MAXTAG;
-
-	return &c->reqs[row][col];
+	return req;
 }
 EXPORT_SYMBOL(p9_tag_lookup);
 
 /**
- * p9_tag_init - setup tags structure and contents
- * @c:  v9fs client struct
+ * p9_tag_remove - Remove a tag.
+ * @c: Client session.
+ * @r: Request of reference.
  *
- * This initializes the tags structure for each client instance.
- *
+ * Context: Any context.
  */
-
-static int p9_tag_init(struct p9_client *c)
+static int p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
 {
-	int err = 0;
+	unsigned long flags;
+	u16 tag = r->tc.tag;
 
-	c->tagpool = p9_idpool_create();
-	if (IS_ERR(c->tagpool)) {
-		err = PTR_ERR(c->tagpool);
-		goto error;
-	}
-	err = p9_idpool_get(c->tagpool); /* reserve tag 0 */
-	if (err < 0) {
-		p9_idpool_destroy(c->tagpool);
-		goto error;
-	}
-	c->max_tag = 0;
-error:
-	return err;
+	p9_debug(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag);
+	spin_lock_irqsave(&c->lock, flags);
+	idr_remove(&c->reqs, tag);
+	spin_unlock_irqrestore(&c->lock, flags);
+	return p9_req_put(r);
 }
 
+static void p9_req_free(struct kref *ref)
+{
+	struct p9_req_t *r = container_of(ref, struct p9_req_t, refcount);
+	p9_fcall_fini(&r->tc);
+	p9_fcall_fini(&r->rc);
+	kmem_cache_free(p9_req_cache, r);
+}
+
+int p9_req_put(struct p9_req_t *r)
+{
+	return kref_put(&r->refcount, p9_req_free);
+}
+EXPORT_SYMBOL(p9_req_put);
+
 /**
  * p9_tag_cleanup - cleans up tags structure and reclaims resources
  * @c:  v9fs client struct
@@ -379,52 +397,17 @@
  */
 static void p9_tag_cleanup(struct p9_client *c)
 {
-	int row, col;
+	struct p9_req_t *req;
+	int id;
 
-	/* check to insure all requests are idle */
-	for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) {
-		for (col = 0; col < P9_ROW_MAXTAG; col++) {
-			if (c->reqs[row][col].status != REQ_STATUS_IDLE) {
-				p9_debug(P9_DEBUG_MUX,
-					 "Attempting to cleanup non-free tag %d,%d\n",
-					 row, col);
-				/* TODO: delay execution of cleanup */
-				return;
-			}
-		}
+	rcu_read_lock();
+	idr_for_each_entry(&c->reqs, req, id) {
+		pr_info("Tag %d still in use\n", id);
+		if (p9_tag_remove(c, req) == 0)
+			pr_warn("Packet with tag %d has still references",
+				req->tc.tag);
 	}
-
-	if (c->tagpool) {
-		p9_idpool_put(0, c->tagpool); /* free reserved tag 0 */
-		p9_idpool_destroy(c->tagpool);
-	}
-
-	/* free requests associated with tags */
-	for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) {
-		for (col = 0; col < P9_ROW_MAXTAG; col++) {
-			kfree(c->reqs[row][col].tc);
-			kfree(c->reqs[row][col].rc);
-		}
-		kfree(c->reqs[row]);
-	}
-	c->max_tag = 0;
-}
-
-/**
- * p9_free_req - free a request and clean-up as necessary
- * c: client state
- * r: request to release
- *
- */
-
-static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
-{
-	int tag = r->tc->tag;
-	p9_debug(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag);
-
-	r->status = REQ_STATUS_IDLE;
-	if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool))
-		p9_idpool_put(tag, c->tagpool);
+	rcu_read_unlock();
 }
 
 /**
@@ -435,7 +418,7 @@
  */
 void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
 {
-	p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc->tag);
+	p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc.tag);
 
 	/*
 	 * This barrier is needed to make sure any change made to req before
@@ -445,7 +428,8 @@
 	req->status = status;
 
 	wake_up(&req->wq);
-	p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
+	p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag);
+	p9_req_put(req);
 }
 EXPORT_SYMBOL(p9_client_cb);
 
@@ -516,18 +500,18 @@
 	int err;
 	int ecode;
 
-	err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
-	if (req->rc->size >= c->msize) {
+	err = p9_parse_header(&req->rc, NULL, &type, NULL, 0);
+	if (req->rc.size >= c->msize) {
 		p9_debug(P9_DEBUG_ERROR,
 			 "requested packet size too big: %d\n",
-			 req->rc->size);
+			 req->rc.size);
 		return -EIO;
 	}
 	/*
 	 * dump the response from server
 	 * This should be after check errors which poplulate pdu_fcall.
 	 */
-	trace_9p_protocol_dump(c, req->rc);
+	trace_9p_protocol_dump(c, &req->rc);
 	if (err) {
 		p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
 		return err;
@@ -537,7 +521,7 @@
 
 	if (!p9_is_proto_dotl(c)) {
 		char *ename;
-		err = p9pdu_readf(req->rc, c->proto_version, "s?d",
+		err = p9pdu_readf(&req->rc, c->proto_version, "s?d",
 				  &ename, &ecode);
 		if (err)
 			goto out_err;
@@ -553,7 +537,7 @@
 		}
 		kfree(ename);
 	} else {
-		err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
+		err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode);
 		err = -ecode;
 
 		p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
@@ -587,12 +571,12 @@
 	int8_t type;
 	char *ename = NULL;
 
-	err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
+	err = p9_parse_header(&req->rc, NULL, &type, NULL, 0);
 	/*
 	 * dump the response from server
 	 * This should be after parse_header which poplulate pdu_fcall.
 	 */
-	trace_9p_protocol_dump(c, req->rc);
+	trace_9p_protocol_dump(c, &req->rc);
 	if (err) {
 		p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
 		return err;
@@ -607,13 +591,13 @@
 		/* 7 = header size for RERROR; */
 		int inline_len = in_hdrlen - 7;
 
-		len =  req->rc->size - req->rc->offset;
+		len = req->rc.size - req->rc.offset;
 		if (len > (P9_ZC_HDR_SZ - 7)) {
 			err = -EFAULT;
 			goto out_err;
 		}
 
-		ename = &req->rc->sdata[req->rc->offset];
+		ename = &req->rc.sdata[req->rc.offset];
 		if (len > inline_len) {
 			/* We have error in external buffer */
 			if (!copy_from_iter_full(ename + inline_len,
@@ -623,7 +607,7 @@
 			}
 		}
 		ename = NULL;
-		err = p9pdu_readf(req->rc, c->proto_version, "s?d",
+		err = p9pdu_readf(&req->rc, c->proto_version, "s?d",
 				  &ename, &ecode);
 		if (err)
 			goto out_err;
@@ -639,7 +623,7 @@
 		}
 		kfree(ename);
 	} else {
-		err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
+		err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode);
 		err = -ecode;
 
 		p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
@@ -672,7 +656,7 @@
 	int16_t oldtag;
 	int err;
 
-	err = p9_parse_header(oldreq->tc, NULL, NULL, &oldtag, 1);
+	err = p9_parse_header(&oldreq->tc, NULL, NULL, &oldtag, 1);
 	if (err)
 		return err;
 
@@ -686,11 +670,12 @@
 	 * if we haven't received a response for oldreq,
 	 * remove it from the list
 	 */
-	if (oldreq->status == REQ_STATUS_SENT)
+	if (oldreq->status == REQ_STATUS_SENT) {
 		if (c->trans_mod->cancelled)
 			c->trans_mod->cancelled(c, oldreq);
+	}
 
-	p9_free_req(c, req);
+	p9_tag_remove(c, req);
 	return 0;
 }
 
@@ -698,7 +683,7 @@
 					      int8_t type, int req_size,
 					      const char *fmt, va_list ap)
 {
-	int tag, err;
+	int err;
 	struct p9_req_t *req;
 
 	p9_debug(P9_DEBUG_MUX, "client %p op %d\n", c, type);
@@ -711,27 +696,22 @@
 	if ((c->status == BeginDisconnect) && (type != P9_TCLUNK))
 		return ERR_PTR(-EIO);
 
-	tag = P9_NOTAG;
-	if (type != P9_TVERSION) {
-		tag = p9_idpool_get(c->tagpool);
-		if (tag < 0)
-			return ERR_PTR(-ENOMEM);
-	}
-
-	req = p9_tag_alloc(c, tag, req_size);
+	req = p9_tag_alloc(c, type, req_size);
 	if (IS_ERR(req))
 		return req;
 
 	/* marshall the data */
-	p9pdu_prepare(req->tc, tag, type);
-	err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap);
+	p9pdu_prepare(&req->tc, req->tc.tag, type);
+	err = p9pdu_vwritef(&req->tc, c->proto_version, fmt, ap);
 	if (err)
 		goto reterr;
-	p9pdu_finalize(c, req->tc);
-	trace_9p_client_req(c, type, tag);
+	p9pdu_finalize(c, &req->tc);
+	trace_9p_client_req(c, type, req->tc.tag);
 	return req;
 reterr:
-	p9_free_req(c, req);
+	p9_tag_remove(c, req);
+	/* We have to put also the 2nd reference as it won't be used */
+	p9_req_put(req);
 	return ERR_PTR(err);
 }
 
@@ -741,7 +721,7 @@
  * @type: type of request
  * @fmt: protocol format string (see protocol.c)
  *
- * Returns request structure (which client must free using p9_free_req)
+ * Returns request structure (which client must free using p9_tag_remove)
  */
 
 static struct p9_req_t *
@@ -766,6 +746,8 @@
 
 	err = c->trans_mod->request(c, req);
 	if (err < 0) {
+		/* write won't happen */
+		p9_req_put(req);
 		if (err != -ERESTARTSYS && err != -EFAULT)
 			c->status = Disconnected;
 		goto recalc_sigpending;
@@ -813,11 +795,11 @@
 		goto reterr;
 
 	err = p9_check_errors(c, req);
-	trace_9p_client_res(c, type, req->rc->tag, err);
+	trace_9p_client_res(c, type, req->rc.tag, err);
 	if (!err)
 		return req;
 reterr:
-	p9_free_req(c, req);
+	p9_tag_remove(c, req);
 	return ERR_PTR(safe_errno(err));
 }
 
@@ -832,7 +814,7 @@
  * @hdrlen: reader header size, This is the size of response protocol data
  * @fmt: protocol format string (see protocol.c)
  *
- * Returns request structure (which client must free using p9_free_req)
+ * Returns request structure (which client must free using p9_tag_remove)
  */
 static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
 					 struct iov_iter *uidata,
@@ -895,11 +877,11 @@
 		goto reterr;
 
 	err = p9_check_zc_errors(c, req, uidata, in_hdrlen);
-	trace_9p_client_res(c, type, req->rc->tag, err);
+	trace_9p_client_res(c, type, req->rc.tag, err);
 	if (!err)
 		return req;
 reterr:
-	p9_free_req(c, req);
+	p9_tag_remove(c, req);
 	return ERR_PTR(safe_errno(err));
 }
 
@@ -978,10 +960,10 @@
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
-	err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version);
+	err = p9pdu_readf(&req->rc, c->proto_version, "ds", &msize, &version);
 	if (err) {
 		p9_debug(P9_DEBUG_9P, "version error %d\n", err);
-		trace_9p_protocol_dump(c, req->rc);
+		trace_9p_protocol_dump(c, &req->rc);
 		goto error;
 	}
 
@@ -993,16 +975,24 @@
 	else if (!strncmp(version, "9P2000", 6))
 		c->proto_version = p9_proto_legacy;
 	else {
+		p9_debug(P9_DEBUG_ERROR,
+			 "server returned an unknown version: %s\n", version);
 		err = -EREMOTEIO;
 		goto error;
 	}
 
+	if (msize < 4096) {
+		p9_debug(P9_DEBUG_ERROR,
+			 "server returned a msize < 4096: %d\n", msize);
+		err = -EREMOTEIO;
+		goto error;
+	}
 	if (msize < c->msize)
 		c->msize = msize;
 
 error:
 	kfree(version);
-	p9_free_req(c, req);
+	p9_tag_remove(c, req);
 
 	return err;
 }
@@ -1020,20 +1010,18 @@
 
 	clnt->trans_mod = NULL;
 	clnt->trans = NULL;
+	clnt->fcall_cache = NULL;
 
 	client_id = utsname()->nodename;
 	memcpy(clnt->name, client_id, strlen(client_id) + 1);
 
 	spin_lock_init(&clnt->lock);
 	idr_init(&clnt->fids);
-
-	err = p9_tag_init(clnt);
-	if (err < 0)
-		goto free_client;
+	idr_init(&clnt->reqs);
 
 	err = parse_opts(options, clnt);
 	if (err < 0)
-		goto destroy_tagpool;
+		goto free_client;
 
 	if (!clnt->trans_mod)
 		clnt->trans_mod = v9fs_get_default_trans();
@@ -1042,7 +1030,7 @@
 		err = -EPROTONOSUPPORT;
 		p9_debug(P9_DEBUG_ERROR,
 			 "No transport defined or default transport\n");
-		goto destroy_tagpool;
+		goto free_client;
 	}
 
 	p9_debug(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n",
@@ -1055,18 +1043,32 @@
 	if (clnt->msize > clnt->trans_mod->maxsize)
 		clnt->msize = clnt->trans_mod->maxsize;
 
+	if (clnt->msize < 4096) {
+		p9_debug(P9_DEBUG_ERROR,
+			 "Please specify a msize of at least 4k\n");
+		err = -EINVAL;
+		goto close_trans;
+	}
+
 	err = p9_client_version(clnt);
 	if (err)
 		goto close_trans;
 
+	/* P9_HDRSZ + 4 is the smallest packet header we can have that is
+	 * followed by data accessed from userspace by read
+	 */
+	clnt->fcall_cache =
+		kmem_cache_create_usercopy("9p-fcall-cache", clnt->msize,
+					   0, 0, P9_HDRSZ + 4,
+					   clnt->msize - (P9_HDRSZ + 4),
+					   NULL);
+
 	return clnt;
 
 close_trans:
 	clnt->trans_mod->close(clnt);
 put_trans:
 	v9fs_put_trans(clnt->trans_mod);
-destroy_tagpool:
-	p9_idpool_destroy(clnt->tagpool);
 free_client:
 	kfree(clnt);
 	return ERR_PTR(err);
@@ -1092,6 +1094,7 @@
 
 	p9_tag_cleanup(clnt);
 
+	kmem_cache_destroy(clnt->fcall_cache);
 	kfree(clnt);
 }
 EXPORT_SYMBOL(p9_client_destroy);
@@ -1135,10 +1138,10 @@
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid);
+	err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", &qid);
 	if (err) {
-		trace_9p_protocol_dump(clnt, req->rc);
-		p9_free_req(clnt, req);
+		trace_9p_protocol_dump(clnt, &req->rc);
+		p9_tag_remove(clnt, req);
 		goto error;
 	}
 
@@ -1147,7 +1150,7 @@
 
 	memmove(&fid->qid, &qid, sizeof(struct p9_qid));
 
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 	return fid;
 
 error:
@@ -1192,13 +1195,13 @@
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids);
+	err = p9pdu_readf(&req->rc, clnt->proto_version, "R", &nwqids, &wqids);
 	if (err) {
-		trace_9p_protocol_dump(clnt, req->rc);
-		p9_free_req(clnt, req);
+		trace_9p_protocol_dump(clnt, &req->rc);
+		p9_tag_remove(clnt, req);
 		goto clunk_fid;
 	}
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 
 	p9_debug(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids);
 
@@ -1259,9 +1262,9 @@
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
+	err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", &qid, &iounit);
 	if (err) {
-		trace_9p_protocol_dump(clnt, req->rc);
+		trace_9p_protocol_dump(clnt, &req->rc);
 		goto free_and_error;
 	}
 
@@ -1273,7 +1276,7 @@
 	fid->iounit = iounit;
 
 free_and_error:
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 error:
 	return err;
 }
@@ -1303,9 +1306,9 @@
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit);
+	err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", qid, &iounit);
 	if (err) {
-		trace_9p_protocol_dump(clnt, req->rc);
+		trace_9p_protocol_dump(clnt, &req->rc);
 		goto free_and_error;
 	}
 
@@ -1318,7 +1321,7 @@
 	ofid->iounit = iounit;
 
 free_and_error:
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 error:
 	return err;
 }
@@ -1348,9 +1351,9 @@
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
+	err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", &qid, &iounit);
 	if (err) {
-		trace_9p_protocol_dump(clnt, req->rc);
+		trace_9p_protocol_dump(clnt, &req->rc);
 		goto free_and_error;
 	}
 
@@ -1363,7 +1366,7 @@
 	fid->iounit = iounit;
 
 free_and_error:
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 error:
 	return err;
 }
@@ -1387,9 +1390,9 @@
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
+	err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
 	if (err) {
-		trace_9p_protocol_dump(clnt, req->rc);
+		trace_9p_protocol_dump(clnt, &req->rc);
 		goto free_and_error;
 	}
 
@@ -1397,7 +1400,7 @@
 			qid->type, (unsigned long long)qid->path, qid->version);
 
 free_and_error:
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 error:
 	return err;
 }
@@ -1417,7 +1420,7 @@
 		return PTR_ERR(req);
 
 	p9_debug(P9_DEBUG_9P, "<<< RLINK\n");
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 	return 0;
 }
 EXPORT_SYMBOL(p9_client_link);
@@ -1441,7 +1444,7 @@
 
 	p9_debug(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid);
 
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 
 error:
 	return err;
@@ -1476,7 +1479,7 @@
 
 	p9_debug(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid);
 
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 error:
 	/*
 	 * Fid is not valid even after a failed clunk
@@ -1510,7 +1513,7 @@
 
 	p9_debug(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid);
 
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 error:
 	if (err == -ERESTARTSYS)
 		p9_client_clunk(fid);
@@ -1537,7 +1540,7 @@
 	}
 	p9_debug(P9_DEBUG_9P, "<<< RUNLINKAT fid %d %s\n", dfid->fid, name);
 
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 error:
 	return err;
 }
@@ -1585,11 +1588,11 @@
 			break;
 		}
 
-		*err = p9pdu_readf(req->rc, clnt->proto_version,
+		*err = p9pdu_readf(&req->rc, clnt->proto_version,
 				   "D", &count, &dataptr);
 		if (*err) {
-			trace_9p_protocol_dump(clnt, req->rc);
-			p9_free_req(clnt, req);
+			trace_9p_protocol_dump(clnt, &req->rc);
+			p9_tag_remove(clnt, req);
 			break;
 		}
 		if (rsize < count) {
@@ -1599,7 +1602,7 @@
 
 		p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
 		if (!count) {
-			p9_free_req(clnt, req);
+			p9_tag_remove(clnt, req);
 			break;
 		}
 
@@ -1609,7 +1612,7 @@
 			offset += n;
 			if (n != count) {
 				*err = -EFAULT;
-				p9_free_req(clnt, req);
+				p9_tag_remove(clnt, req);
 				break;
 			}
 		} else {
@@ -1617,7 +1620,7 @@
 			total += count;
 			offset += count;
 		}
-		p9_free_req(clnt, req);
+		p9_tag_remove(clnt, req);
 	}
 	return total;
 }
@@ -1658,10 +1661,10 @@
 			break;
 		}
 
-		*err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count);
+		*err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &count);
 		if (*err) {
-			trace_9p_protocol_dump(clnt, req->rc);
-			p9_free_req(clnt, req);
+			trace_9p_protocol_dump(clnt, &req->rc);
+			p9_tag_remove(clnt, req);
 			break;
 		}
 		if (rsize < count) {
@@ -1671,7 +1674,7 @@
 
 		p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count);
 
-		p9_free_req(clnt, req);
+		p9_tag_remove(clnt, req);
 		iov_iter_advance(from, count);
 		total += count;
 		offset += count;
@@ -1702,10 +1705,10 @@
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret);
+	err = p9pdu_readf(&req->rc, clnt->proto_version, "wS", &ignored, ret);
 	if (err) {
-		trace_9p_protocol_dump(clnt, req->rc);
-		p9_free_req(clnt, req);
+		trace_9p_protocol_dump(clnt, &req->rc);
+		p9_tag_remove(clnt, req);
 		goto error;
 	}
 
@@ -1722,7 +1725,7 @@
 		from_kgid(&init_user_ns, ret->n_gid),
 		from_kuid(&init_user_ns, ret->n_muid));
 
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 	return ret;
 
 error:
@@ -1755,10 +1758,10 @@
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret);
+	err = p9pdu_readf(&req->rc, clnt->proto_version, "A", ret);
 	if (err) {
-		trace_9p_protocol_dump(clnt, req->rc);
-		p9_free_req(clnt, req);
+		trace_9p_protocol_dump(clnt, &req->rc);
+		p9_tag_remove(clnt, req);
 		goto error;
 	}
 
@@ -1783,7 +1786,7 @@
 		ret->st_ctime_nsec, ret->st_btime_sec, ret->st_btime_nsec,
 		ret->st_gen, ret->st_data_version);
 
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 	return ret;
 
 error:
@@ -1852,7 +1855,7 @@
 
 	p9_debug(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid);
 
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 error:
 	return err;
 }
@@ -1884,7 +1887,7 @@
 		goto error;
 	}
 	p9_debug(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid);
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 error:
 	return err;
 }
@@ -1907,12 +1910,12 @@
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type,
-		&sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,
-		&sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
+	err = p9pdu_readf(&req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type,
+			  &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,
+			  &sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
 	if (err) {
-		trace_9p_protocol_dump(clnt, req->rc);
-		p9_free_req(clnt, req);
+		trace_9p_protocol_dump(clnt, &req->rc);
+		p9_tag_remove(clnt, req);
 		goto error;
 	}
 
@@ -1923,7 +1926,7 @@
 		sb->blocks, sb->bfree, sb->bavail, sb->files,  sb->ffree,
 		sb->fsid, (long int)sb->namelen);
 
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 error:
 	return err;
 }
@@ -1951,7 +1954,7 @@
 
 	p9_debug(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid);
 
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 error:
 	return err;
 }
@@ -1981,7 +1984,7 @@
 	p9_debug(P9_DEBUG_9P, "<<< RRENAMEAT newdirfid %d new name %s\n",
 		   newdirfid->fid, new_name);
 
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 error:
 	return err;
 }
@@ -2015,13 +2018,13 @@
 		err = PTR_ERR(req);
 		goto error;
 	}
-	err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size);
+	err = p9pdu_readf(&req->rc, clnt->proto_version, "q", attr_size);
 	if (err) {
-		trace_9p_protocol_dump(clnt, req->rc);
-		p9_free_req(clnt, req);
+		trace_9p_protocol_dump(clnt, &req->rc);
+		p9_tag_remove(clnt, req);
 		goto clunk_fid;
 	}
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 	p9_debug(P9_DEBUG_9P, "<<<  RXATTRWALK fid %d size %llu\n",
 		attr_fid->fid, *attr_size);
 	return attr_fid;
@@ -2055,7 +2058,7 @@
 		goto error;
 	}
 	p9_debug(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid);
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 error:
 	return err;
 }
@@ -2070,7 +2073,7 @@
 	struct kvec kv = {.iov_base = data, .iov_len = count};
 	struct iov_iter to;
 
-	iov_iter_kvec(&to, READ | ITER_KVEC, &kv, 1, count);
+	iov_iter_kvec(&to, READ, &kv, 1, count);
 
 	p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n",
 				fid->fid, (unsigned long long) offset, count);
@@ -2103,9 +2106,9 @@
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr);
+	err = p9pdu_readf(&req->rc, clnt->proto_version, "D", &count, &dataptr);
 	if (err) {
-		trace_9p_protocol_dump(clnt, req->rc);
+		trace_9p_protocol_dump(clnt, &req->rc);
 		goto free_and_error;
 	}
 	if (rsize < count) {
@@ -2118,11 +2121,11 @@
 	if (non_zc)
 		memmove(data, dataptr, count);
 
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 	return count;
 
 free_and_error:
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 error:
 	return err;
 }
@@ -2144,16 +2147,16 @@
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
-	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
+	err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
 	if (err) {
-		trace_9p_protocol_dump(clnt, req->rc);
+		trace_9p_protocol_dump(clnt, &req->rc);
 		goto error;
 	}
 	p9_debug(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type,
 				(unsigned long long)qid->path, qid->version);
 
 error:
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 	return err;
 
 }
@@ -2175,16 +2178,16 @@
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
-	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
+	err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
 	if (err) {
-		trace_9p_protocol_dump(clnt, req->rc);
+		trace_9p_protocol_dump(clnt, &req->rc);
 		goto error;
 	}
 	p9_debug(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type,
 				(unsigned long long)qid->path, qid->version);
 
 error:
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 	return err;
 
 }
@@ -2210,14 +2213,14 @@
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
-	err = p9pdu_readf(req->rc, clnt->proto_version, "b", status);
+	err = p9pdu_readf(&req->rc, clnt->proto_version, "b", status);
 	if (err) {
-		trace_9p_protocol_dump(clnt, req->rc);
+		trace_9p_protocol_dump(clnt, &req->rc);
 		goto error;
 	}
 	p9_debug(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status);
 error:
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 	return err;
 
 }
@@ -2241,18 +2244,18 @@
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
-	err = p9pdu_readf(req->rc, clnt->proto_version, "bqqds", &glock->type,
-			&glock->start, &glock->length, &glock->proc_id,
-			&glock->client_id);
+	err = p9pdu_readf(&req->rc, clnt->proto_version, "bqqds", &glock->type,
+			  &glock->start, &glock->length, &glock->proc_id,
+			  &glock->client_id);
 	if (err) {
-		trace_9p_protocol_dump(clnt, req->rc);
+		trace_9p_protocol_dump(clnt, &req->rc);
 		goto error;
 	}
 	p9_debug(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld "
 		"proc_id %d client_id %s\n", glock->type, glock->start,
 		glock->length, glock->proc_id, glock->client_id);
 error:
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 	return err;
 }
 EXPORT_SYMBOL(p9_client_getlock_dotl);
@@ -2271,14 +2274,25 @@
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
-	err = p9pdu_readf(req->rc, clnt->proto_version, "s", target);
+	err = p9pdu_readf(&req->rc, clnt->proto_version, "s", target);
 	if (err) {
-		trace_9p_protocol_dump(clnt, req->rc);
+		trace_9p_protocol_dump(clnt, &req->rc);
 		goto error;
 	}
 	p9_debug(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target);
 error:
-	p9_free_req(clnt, req);
+	p9_tag_remove(clnt, req);
 	return err;
 }
 EXPORT_SYMBOL(p9_client_readlink);
+
+int __init p9_client_init(void)
+{
+	p9_req_cache = KMEM_CACHE(p9_req_t, SLAB_TYPESAFE_BY_RCU);
+	return p9_req_cache ? 0 : -ENOMEM;
+}
+
+void __exit p9_client_exit(void)
+{
+	kmem_cache_destroy(p9_req_cache);
+}

diff --git a/net/9p/error.c b/net/9p/error.c
index 126fd0d..231f355 100644
--- a/net/9p/error.c
+++ b/net/9p/error.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * linux/fs/9p/error.c
  *
@@ -9,22 +10,6 @@
  *
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

diff --git a/net/9p/mod.c b/net/9p/mod.c
index 253ba82..c1b6242 100644
--- a/net/9p/mod.c
+++ b/net/9p/mod.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  net/9p/9p.c
  *
@@ -6,22 +7,6 @@
  *  Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net>
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -171,11 +156,17 @@
  */
 static int __init init_p9(void)
 {
+	int ret;
+
+	ret = p9_client_init();
+	if (ret)
+		return ret;
+
 	p9_error_init();
 	pr_info("Installing 9P2000 support\n");
 	p9_trans_fd_init();
 
-	return 0;
+	return ret;
 }
 
 /**
@@ -188,6 +179,7 @@
 	pr_info("Unloading 9P2000 support\n");
 
 	p9_trans_fd_exit();
+	p9_client_exit();
 }
 
 module_init(init_p9)

diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index ee32bbf..03593eb 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * net/9p/protocol.c
  *
@@ -7,22 +8,6 @@
  *
  *  Base on code from Anthony Liguori <aliguori@us.ibm.com>
  *  Copyright (C) 2008 by IBM, Corp.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
  */
 
 #include <linux/module.h>
@@ -571,9 +556,10 @@
 	if (ret) {
 		p9_debug(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret);
 		trace_9p_protocol_dump(clnt, &fake_pdu);
+		return ret;
 	}
 
-	return ret;
+	return fake_pdu.offset;
 }
 EXPORT_SYMBOL(p9stat_read);
 
@@ -622,13 +608,19 @@
 	if (ret) {
 		p9_debug(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret);
 		trace_9p_protocol_dump(clnt, &fake_pdu);
-		goto out;
+		return ret;
 	}
 
-	strcpy(dirent->d_name, nameptr);
+	ret = strscpy(dirent->d_name, nameptr, sizeof(dirent->d_name));
+	if (ret < 0) {
+		p9_debug(P9_DEBUG_ERROR,
+			 "On the wire dirent name too long: %s\n",
+			 nameptr);
+		kfree(nameptr);
+		return ret;
+	}
 	kfree(nameptr);
 
-out:
 	return fake_pdu.offset;
 }
 EXPORT_SYMBOL(p9dirent_read);

diff --git a/net/9p/protocol.h b/net/9p/protocol.h
index 2cc525f..6835f91 100644
--- a/net/9p/protocol.h
+++ b/net/9p/protocol.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * net/9p/protocol.h
  *
@@ -7,22 +8,6 @@
  *
  *  Base on code from Anthony Liguori <aliguori@us.ibm.com>
  *  Copyright (C) 2008 by IBM, Corp.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
  */
 
 int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,

diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
index b718db2..3dff68f 100644
--- a/net/9p/trans_common.c
+++ b/net/9p/trans_common.c

@@ -14,6 +14,7 @@
 
 #include <linux/mm.h>
 #include <linux/module.h>
+#include "trans_common.h"
 
 /**
  *  p9_release_pages - Release pages after the transaction.

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index e2ef3c7..13cd683 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * linux/fs/9p/trans_fd.c
  *
@@ -7,22 +8,6 @@
  *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
  *  Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com>
  *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -131,7 +116,8 @@
 	int err;
 	struct list_head req_list;
 	struct list_head unsent_req_list;
-	struct p9_req_t *req;
+	struct p9_req_t *rreq;
+	struct p9_req_t *wreq;
 	char tmp_buf[7];
 	struct p9_fcall rc;
 	int wpos;
@@ -291,7 +277,6 @@
 	__poll_t n;
 	int err;
 	struct p9_conn *m;
-	int status = REQ_STATUS_ERROR;
 
 	m = container_of(work, struct p9_conn, rq);
 
@@ -322,7 +307,7 @@
 	m->rc.offset += err;
 
 	/* header read in */
-	if ((!m->req) && (m->rc.offset == m->rc.capacity)) {
+	if ((!m->rreq) && (m->rc.offset == m->rc.capacity)) {
 		p9_debug(P9_DEBUG_TRANS, "got new header\n");
 
 		/* Header size */
@@ -346,23 +331,23 @@
 			 "mux %p pkt: size: %d bytes tag: %d\n",
 			 m, m->rc.size, m->rc.tag);
 
-		m->req = p9_tag_lookup(m->client, m->rc.tag);
-		if (!m->req || (m->req->status != REQ_STATUS_SENT)) {
+		m->rreq = p9_tag_lookup(m->client, m->rc.tag);
+		if (!m->rreq || (m->rreq->status != REQ_STATUS_SENT)) {
 			p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
 				 m->rc.tag);
 			err = -EIO;
 			goto error;
 		}
 
-		if (m->req->rc == NULL) {
+		if (!m->rreq->rc.sdata) {
 			p9_debug(P9_DEBUG_ERROR,
 				 "No recv fcall for tag %d (req %p), disconnecting!\n",
-				 m->rc.tag, m->req);
-			m->req = NULL;
+				 m->rc.tag, m->rreq);
+			m->rreq = NULL;
 			err = -EIO;
 			goto error;
 		}
-		m->rc.sdata = (char *)m->req->rc + sizeof(struct p9_fcall);
+		m->rc.sdata = m->rreq->rc.sdata;
 		memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity);
 		m->rc.capacity = m->rc.size;
 	}
@@ -370,20 +355,27 @@
 	/* packet is read in
 	 * not an else because some packets (like clunk) have no payload
 	 */
-	if ((m->req) && (m->rc.offset == m->rc.capacity)) {
+	if ((m->rreq) && (m->rc.offset == m->rc.capacity)) {
 		p9_debug(P9_DEBUG_TRANS, "got new packet\n");
-		m->req->rc->size = m->rc.offset;
+		m->rreq->rc.size = m->rc.offset;
 		spin_lock(&m->client->lock);
-		if (m->req->status != REQ_STATUS_ERROR)
-			status = REQ_STATUS_RCVD;
-		list_del(&m->req->req_list);
-		/* update req->status while holding client->lock  */
-		p9_client_cb(m->client, m->req, status);
+		if (m->rreq->status == REQ_STATUS_SENT) {
+			list_del(&m->rreq->req_list);
+			p9_client_cb(m->client, m->rreq, REQ_STATUS_RCVD);
+		} else {
+			spin_unlock(&m->client->lock);
+			p9_debug(P9_DEBUG_ERROR,
+				 "Request tag %d errored out while we were reading the reply\n",
+				 m->rc.tag);
+			err = -EIO;
+			goto error;
+		}
 		spin_unlock(&m->client->lock);
 		m->rc.sdata = NULL;
 		m->rc.offset = 0;
 		m->rc.capacity = 0;
-		m->req = NULL;
+		p9_req_put(m->rreq);
+		m->rreq = NULL;
 	}
 
 end_clear:
@@ -469,9 +461,11 @@
 		p9_debug(P9_DEBUG_TRANS, "move req %p\n", req);
 		list_move_tail(&req->req_list, &m->req_list);
 
-		m->wbuf = req->tc->sdata;
-		m->wsize = req->tc->size;
+		m->wbuf = req->tc.sdata;
+		m->wsize = req->tc.size;
 		m->wpos = 0;
+		p9_req_get(req);
+		m->wreq = req;
 		spin_unlock(&m->client->lock);
 	}
 
@@ -492,8 +486,11 @@
 	}
 
 	m->wpos += err;
-	if (m->wpos == m->wsize)
+	if (m->wpos == m->wsize) {
 		m->wpos = m->wsize = 0;
+		p9_req_put(m->wreq);
+		m->wreq = NULL;
+	}
 
 end_clear:
 	clear_bit(Wworksched, &m->wsched);
@@ -663,7 +660,7 @@
 	struct p9_conn *m = &ts->conn;
 
 	p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n",
-		 m, current, req->tc, req->tc->id);
+		 m, current, &req->tc, req->tc.id);
 	if (m->err < 0)
 		return m->err;
 
@@ -694,6 +691,7 @@
 	if (req->status == REQ_STATUS_UNSENT) {
 		list_del(&req->req_list);
 		req->status = REQ_STATUS_FLSHD;
+		p9_req_put(req);
 		ret = 0;
 	}
 	spin_unlock(&client->lock);
@@ -711,6 +709,7 @@
 	spin_lock(&client->lock);
 	list_del(&req->req_list);
 	spin_unlock(&client->lock);
+	p9_req_put(req);
 
 	return 0;
 }
@@ -862,7 +861,15 @@
 
 	p9_mux_poll_stop(m);
 	cancel_work_sync(&m->rq);
+	if (m->rreq) {
+		p9_req_put(m->rreq);
+		m->rreq = NULL;
+	}
 	cancel_work_sync(&m->wq);
+	if (m->wreq) {
+		p9_req_put(m->wreq);
+		m->wreq = NULL;
+	}
 
 	p9_conn_cancel(m, -ECONNRESET);
 

diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index b513cff..b21c3c2 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * linux/fs/9p/trans_rdma.c
  *
@@ -8,22 +9,6 @@
  *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
  *  Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com>
  *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -122,7 +107,7 @@
 	dma_addr_t busa;
 	union {
 		struct p9_req_t *req;
-		struct p9_fcall *rc;
+		struct p9_fcall rc;
 	};
 };
 
@@ -274,8 +259,7 @@
 	case RDMA_CM_EVENT_DISCONNECTED:
 		if (rdma)
 			rdma->state = P9_RDMA_CLOSED;
-		if (c)
-			c->status = Disconnected;
+		c->status = Disconnected;
 		break;
 
 	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
@@ -320,8 +304,8 @@
 	if (wc->status != IB_WC_SUCCESS)
 		goto err_out;
 
-	c->rc->size = wc->byte_len;
-	err = p9_parse_header(c->rc, NULL, NULL, &tag, 1);
+	c->rc.size = wc->byte_len;
+	err = p9_parse_header(&c->rc, NULL, NULL, &tag, 1);
 	if (err)
 		goto err_out;
 
@@ -331,12 +315,13 @@
 
 	/* Check that we have not yet received a reply for this request.
 	 */
-	if (unlikely(req->rc)) {
+	if (unlikely(req->rc.sdata)) {
 		pr_err("Duplicate reply for request %d", tag);
 		goto err_out;
 	}
 
-	req->rc = c->rc;
+	req->rc.size = c->rc.size;
+	req->rc.sdata = c->rc.sdata;
 	p9_client_cb(client, req, REQ_STATUS_RCVD);
 
  out:
@@ -361,9 +346,10 @@
 		container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
 
 	ib_dma_unmap_single(rdma->cm_id->device,
-			    c->busa, c->req->tc->size,
+			    c->busa, c->req->tc.size,
 			    DMA_TO_DEVICE);
 	up(&rdma->sq_sem);
+	p9_req_put(c->req);
 	kfree(c);
 }
 
@@ -401,7 +387,7 @@
 	struct ib_sge sge;
 
 	c->busa = ib_dma_map_single(rdma->cm_id->device,
-				    c->rc->sdata, client->msize,
+				    c->rc.sdata, client->msize,
 				    DMA_FROM_DEVICE);
 	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
 		goto error;
@@ -443,9 +429,9 @@
 	 **/
 	if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
 		if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
-			/* Got one ! */
-			kfree(req->rc);
-			req->rc = NULL;
+			/* Got one! */
+			p9_fcall_fini(&req->rc);
+			req->rc.sdata = NULL;
 			goto dont_need_post_recv;
 		} else {
 			/* We raced and lost. */
@@ -459,7 +445,7 @@
 		err = -ENOMEM;
 		goto recv_error;
 	}
-	rpl_context->rc = req->rc;
+	rpl_context->rc.sdata = req->rc.sdata;
 
 	/*
 	 * Post a receive buffer for this request. We need to ensure
@@ -475,11 +461,11 @@
 
 	err = post_recv(client, rpl_context);
 	if (err) {
-		p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n");
+		p9_debug(P9_DEBUG_ERROR, "POST RECV failed: %d\n", err);
 		goto recv_error;
 	}
 	/* remove posted receive buffer from request structure */
-	req->rc = NULL;
+	req->rc.sdata = NULL;
 
 dont_need_post_recv:
 	/* Post the request */
@@ -491,7 +477,7 @@
 	c->req = req;
 
 	c->busa = ib_dma_map_single(rdma->cm_id->device,
-				    c->req->tc->sdata, c->req->tc->size,
+				    c->req->tc.sdata, c->req->tc.size,
 				    DMA_TO_DEVICE);
 	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
 		err = -EIO;
@@ -501,7 +487,7 @@
 	c->cqe.done = send_done;
 
 	sge.addr = c->busa;
-	sge.length = c->req->tc->size;
+	sge.length = c->req->tc.size;
 	sge.lkey = rdma->pd->local_dma_lkey;
 
 	wr.next = NULL;
@@ -544,7 +530,7 @@
  recv_error:
 	kfree(rpl_context);
 	spin_lock_irqsave(&rdma->req_lock, flags);
-	if (rdma->state < P9_RDMA_CLOSING) {
+	if (err != -EINTR && rdma->state < P9_RDMA_CLOSING) {
 		rdma->state = P9_RDMA_CLOSING;
 		spin_unlock_irqrestore(&rdma->req_lock, flags);
 		rdma_disconnect(rdma->cm_id);
@@ -699,9 +685,9 @@
 		goto error;
 
 	/* Create the Completion Queue */
-	rdma->cq = ib_alloc_cq(rdma->cm_id->device, client,
-			opts.sq_depth + opts.rq_depth + 1,
-			0, IB_POLL_SOFTIRQ);
+	rdma->cq = ib_alloc_cq_any(rdma->cm_id->device, client,
+				   opts.sq_depth + opts.rq_depth + 1,
+				   IB_POLL_SOFTIRQ);
 	if (IS_ERR(rdma->cq))
 		goto error;
 

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 7728b0a..a3cd90a 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * The Virtio 9p transport driver
  *
@@ -8,22 +9,6 @@
  *
  *  Based on virtio console driver
  *  Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -155,7 +140,7 @@
 		}
 
 		if (len) {
-			req->rc->size = len;
+			req->rc.size = len;
 			p9_client_cb(chan->client, req, REQ_STATUS_RCVD);
 		}
 	}
@@ -207,6 +192,13 @@
 	return 1;
 }
 
+/* Reply won't come, so drop req ref */
+static int p9_virtio_cancelled(struct p9_client *client, struct p9_req_t *req)
+{
+	p9_req_put(req);
+	return 0;
+}
+
 /**
  * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer,
  * this takes a list of pages.
@@ -273,12 +265,12 @@
 	out_sgs = in_sgs = 0;
 	/* Handle out VirtIO ring buffers */
 	out = pack_sg_list(chan->sg, 0,
-			   VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
+			   VIRTQUEUE_NUM, req->tc.sdata, req->tc.size);
 	if (out)
 		sgs[out_sgs++] = chan->sg;
 
 	in = pack_sg_list(chan->sg, out,
-			  VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
+			  VIRTQUEUE_NUM, req->rc.sdata, req->rc.capacity);
 	if (in)
 		sgs[out_sgs + in_sgs++] = chan->sg + out;
 
@@ -322,7 +314,7 @@
 	if (!iov_iter_count(data))
 		return 0;
 
-	if (!(data->type & ITER_KVEC)) {
+	if (!iov_iter_is_kvec(data)) {
 		int n;
 		/*
 		 * We allow only p9_max_pages pinned. We wait for the
@@ -404,6 +396,7 @@
 	struct scatterlist *sgs[4];
 	size_t offs;
 	int need_drop = 0;
+	int kicked = 0;
 
 	p9_debug(P9_DEBUG_TRANS, "virtio request\n");
 
@@ -411,29 +404,33 @@
 		__le32 sz;
 		int n = p9_get_mapped_pages(chan, &out_pages, uodata,
 					    outlen, &offs, &need_drop);
-		if (n < 0)
-			return n;
+		if (n < 0) {
+			err = n;
+			goto err_out;
+		}
 		out_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE);
 		if (n != outlen) {
 			__le32 v = cpu_to_le32(n);
-			memcpy(&req->tc->sdata[req->tc->size - 4], &v, 4);
+			memcpy(&req->tc.sdata[req->tc.size - 4], &v, 4);
 			outlen = n;
 		}
 		/* The size field of the message must include the length of the
 		 * header and the length of the data.  We didn't actually know
 		 * the length of the data until this point so add it in now.
 		 */
-		sz = cpu_to_le32(req->tc->size + outlen);
-		memcpy(&req->tc->sdata[0], &sz, sizeof(sz));
+		sz = cpu_to_le32(req->tc.size + outlen);
+		memcpy(&req->tc.sdata[0], &sz, sizeof(sz));
 	} else if (uidata) {
 		int n = p9_get_mapped_pages(chan, &in_pages, uidata,
 					    inlen, &offs, &need_drop);
-		if (n < 0)
-			return n;
+		if (n < 0) {
+			err = n;
+			goto err_out;
+		}
 		in_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE);
 		if (n != inlen) {
 			__le32 v = cpu_to_le32(n);
-			memcpy(&req->tc->sdata[req->tc->size - 4], &v, 4);
+			memcpy(&req->tc.sdata[req->tc.size - 4], &v, 4);
 			inlen = n;
 		}
 	}
@@ -445,7 +442,7 @@
 
 	/* out data */
 	out = pack_sg_list(chan->sg, 0,
-			   VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
+			   VIRTQUEUE_NUM, req->tc.sdata, req->tc.size);
 
 	if (out)
 		sgs[out_sgs++] = chan->sg;
@@ -464,7 +461,7 @@
 	 * alloced memory and payload onto the user buffer.
 	 */
 	in = pack_sg_list(chan->sg, out,
-			  VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len);
+			  VIRTQUEUE_NUM, req->rc.sdata, in_hdr_len);
 	if (in)
 		sgs[out_sgs + in_sgs++] = chan->sg + out;
 
@@ -498,6 +495,7 @@
 	}
 	virtqueue_kick(chan->vq);
 	spin_unlock_irqrestore(&chan->lock, flags);
+	kicked = 1;
 	p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n");
 	err = wait_event_killable(req->wq, req->status >= REQ_STATUS_RCVD);
 	/*
@@ -518,6 +516,10 @@
 	}
 	kvfree(in_pages);
 	kvfree(out_pages);
+	if (!kicked) {
+		/* reply won't come */
+		p9_req_put(req);
+	}
 	return err;
 }
 
@@ -750,6 +752,7 @@
 	.request = p9_virtio_request,
 	.zc_request = p9_virtio_zc_request,
 	.cancel = p9_virtio_cancel,
+	.cancelled = p9_virtio_cancelled,
 	/*
 	 * We leave one entry for input and one entry for response
 	 * headers. We also skip one more entry to accomodate, address
@@ -764,10 +767,16 @@
 /* The standard init function */
 static int __init p9_virtio_init(void)
 {
+	int rc;
+
 	INIT_LIST_HEAD(&virtio_chan_list);
 
 	v9fs_register_trans(&p9_virtio_trans);
-	return register_virtio_driver(&p9_virtio_drv);
+	rc = register_virtio_driver(&p9_virtio_drv);
+	if (rc)
+		v9fs_unregister_trans(&p9_virtio_trans);
+
+	return rc;
 }
 
 static void __exit p9_virtio_cleanup(void)

diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index c2d54ac..3963eb1 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c

@@ -141,7 +141,7 @@
 	struct xen_9pfs_front_priv *priv = NULL;
 	RING_IDX cons, prod, masked_cons, masked_prod;
 	unsigned long flags;
-	u32 size = p9_req->tc->size;
+	u32 size = p9_req->tc.size;
 	struct xen_9pfs_dataring *ring;
 	int num;
 
@@ -154,7 +154,7 @@
 	if (!priv || priv->client != client)
 		return -EINVAL;
 
-	num = p9_req->tc->tag % priv->num_rings;
+	num = p9_req->tc.tag % priv->num_rings;
 	ring = &priv->rings[num];
 
 again:
@@ -176,7 +176,7 @@
 	masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE);
 	masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
 
-	xen_9pfs_write_packet(ring->data.out, p9_req->tc->sdata, size,
+	xen_9pfs_write_packet(ring->data.out, p9_req->tc.sdata, size,
 			      &masked_prod, masked_cons, XEN_9PFS_RING_SIZE);
 
 	p9_req->status = REQ_STATUS_SENT;
@@ -185,6 +185,7 @@
 	ring->intf->out_prod = prod;
 	spin_unlock_irqrestore(&ring->lock, flags);
 	notify_remote_via_irq(ring->irq);
+	p9_req_put(p9_req);
 
 	return 0;
 }
@@ -229,12 +230,12 @@
 			continue;
 		}
 
-		memcpy(req->rc, &h, sizeof(h));
-		req->rc->offset = 0;
+		memcpy(&req->rc, &h, sizeof(h));
+		req->rc.offset = 0;
 
 		masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
 		/* Then, read the whole packet (including the header) */
-		xen_9pfs_read_packet(req->rc->sdata, ring->data.in, h.size,
+		xen_9pfs_read_packet(req->rc.sdata, ring->data.in, h.size,
 				     masked_prod, &masked_cons,
 				     XEN_9PFS_RING_SIZE);
 
@@ -391,8 +392,8 @@
 	unsigned int max_rings, max_ring_order, len = 0;
 
 	versions = xenbus_read(XBT_NIL, dev->otherend, "versions", &len);
-	if (!len)
-		return -EINVAL;
+	if (IS_ERR(versions))
+		return PTR_ERR(versions);
 	if (strcmp(versions, "1")) {
 		kfree(versions);
 		return -EINVAL;
@@ -512,7 +513,7 @@
 	case XenbusStateClosed:
 		if (dev->state == XenbusStateClosed)
 			break;
-		/* Missed the backend's CLOSING state -- fallthrough */
+		/* fall through - Missed the backend's CLOSING state */
 	case XenbusStateClosing:
 		xenbus_frontend_closed(dev);
 		break;
@@ -529,13 +530,19 @@
 
 static int p9_trans_xen_init(void)
 {
+	int rc;
+
 	if (!xen_domain())
 		return -ENODEV;
 
 	pr_info("Initialising Xen transport for 9pfs\n");
 
 	v9fs_register_trans(&p9_xen_trans);
-	return xenbus_register_frontend(&xen_9pfs_front_driver);
+	rc = xenbus_register_frontend(&xen_9pfs_front_driver);
+	if (rc)
+		v9fs_unregister_trans(&p9_xen_trans);
+
+	return rc;
 }
 module_init(p9_trans_xen_init);
 

diff --git a/net/9p/util.c b/net/9p/util.c
deleted file mode 100644
index 55ad982..0000000
--- a/net/9p/util.c
+++ /dev/null

@@ -1,140 +0,0 @@
-/*
- *  net/9p/util.c
- *
- *  This file contains some helper functions
- *
- *  Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/parser.h>
-#include <linux/idr.h>
-#include <linux/slab.h>
-#include <net/9p/9p.h>
-
-/**
- * struct p9_idpool - per-connection accounting for tag idpool
- * @lock: protects the pool
- * @pool: idr to allocate tag id from
- *
- */
-
-struct p9_idpool {
-	spinlock_t lock;
-	struct idr pool;
-};
-
-/**
- * p9_idpool_create - create a new per-connection id pool
- *
- */
-
-struct p9_idpool *p9_idpool_create(void)
-{
-	struct p9_idpool *p;
-
-	p = kmalloc(sizeof(struct p9_idpool), GFP_KERNEL);
-	if (!p)
-		return ERR_PTR(-ENOMEM);
-
-	spin_lock_init(&p->lock);
-	idr_init(&p->pool);
-
-	return p;
-}
-EXPORT_SYMBOL(p9_idpool_create);
-
-/**
- * p9_idpool_destroy - create a new per-connection id pool
- * @p: idpool to destroy
- */
-
-void p9_idpool_destroy(struct p9_idpool *p)
-{
-	idr_destroy(&p->pool);
-	kfree(p);
-}
-EXPORT_SYMBOL(p9_idpool_destroy);
-
-/**
- * p9_idpool_get - allocate numeric id from pool
- * @p: pool to allocate from
- *
- * Bugs: This seems to be an awful generic function, should it be in idr.c with
- *            the lock included in struct idr?
- */
-
-int p9_idpool_get(struct p9_idpool *p)
-{
-	int i;
-	unsigned long flags;
-
-	idr_preload(GFP_NOFS);
-	spin_lock_irqsave(&p->lock, flags);
-
-	/* no need to store exactly p, we just need something non-null */
-	i = idr_alloc(&p->pool, p, 0, 0, GFP_NOWAIT);
-
-	spin_unlock_irqrestore(&p->lock, flags);
-	idr_preload_end();
-	if (i < 0)
-		return -1;
-
-	p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", i, p);
-	return i;
-}
-EXPORT_SYMBOL(p9_idpool_get);
-
-/**
- * p9_idpool_put - release numeric id from pool
- * @id: numeric id which is being released
- * @p: pool to release id into
- *
- * Bugs: This seems to be an awful generic function, should it be in idr.c with
- *            the lock included in struct idr?
- */
-
-void p9_idpool_put(int id, struct p9_idpool *p)
-{
-	unsigned long flags;
-
-	p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", id, p);
-
-	spin_lock_irqsave(&p->lock, flags);
-	idr_remove(&p->pool, id);
-	spin_unlock_irqrestore(&p->lock, flags);
-}
-EXPORT_SYMBOL(p9_idpool_put);
-
-/**
- * p9_idpool_check - check if the specified id is available
- * @id: id to check
- * @p: pool to check
- */
-
-int p9_idpool_check(int id, struct p9_idpool *p)
-{
-	return idr_find(&p->pool, id) != NULL;
-}
-EXPORT_SYMBOL(p9_idpool_check);

diff --git a/net/Kconfig b/net/Kconfig
index 228dfa3..3101bfc 100644
--- a/net/Kconfig
+++ b/net/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Network configuration
 #
@@ -51,6 +52,9 @@
 config NET_EGRESS
 	bool
 
+config SKB_EXTENSIONS
+	bool
+
 menu "Networking options"
 
 source "net/packet/Kconfig"
@@ -63,8 +67,6 @@
 
 config INET
 	bool "TCP/IP networking"
-	select CRYPTO
-	select CRYPTO_AES
 	---help---
 	  These are the protocols used on the Internet and on most local
 	  Ethernets. It is highly recommended to say Y here (this will enlarge
@@ -184,6 +186,7 @@
 	depends on NETFILTER && INET
 	depends on NETFILTER_ADVANCED
 	select NETFILTER_FAMILY_BRIDGE
+	select SKB_EXTENSIONS
 	default m
 	---help---
 	  Enabling this option will let arptables resp. iptables see bridged
@@ -300,8 +303,11 @@
 
 config BPF_STREAM_PARSER
 	bool "enable BPF STREAM_PARSER"
+	depends on INET
 	depends on BPF_SYSCALL
+	depends on CGROUP_BPF
 	select STREAM_PARSER
+	select NET_SOCK_MSG
 	---help---
 	 Enabling this allows a stream parser to be used with
 	 BPF_MAP_TYPE_SOCKMAP.
@@ -396,7 +402,7 @@
 
 config LWTUNNEL_BPF
 	bool "Execute BPF program as route nexthop action"
-	depends on LWTUNNEL
+	depends on LWTUNNEL && INET
 	default y if LWTUNNEL=y
 	---help---
 	  Allows to run BPF programs as a nexthop action following a route
@@ -413,21 +419,18 @@
 config SOCK_VALIDATE_XMIT
 	bool
 
-config NET_DEVLINK
-	tristate "Network physical/parent device Netlink interface"
+config NET_SOCK_MSG
+	bool
+	default n
 	help
-	  Network physical/parent device Netlink interface provides
-	  infrastructure to support access to physical chip-wide config and
-	  monitoring.
+	  The NET_SOCK_MSG provides a framework for plain sockets (e.g. TCP) or
+	  ULPs (upper layer modules, e.g. TLS) to process L7 application data
+	  with the help of BPF programs.
 
-config MAY_USE_DEVLINK
-	tristate
-	default m if NET_DEVLINK=m
-	default y if NET_DEVLINK=y || NET_DEVLINK=n
-	help
-	  Drivers using the devlink infrastructure should have a dependency
-	  on MAY_USE_DEVLINK to ensure they do not cause link errors when
-	  devlink is a loadable module and the driver using it is built-in.
+config NET_DEVLINK
+	bool
+	default n
+	imply NET_DROP_MONITOR
 
 config PAGE_POOL
        bool

diff --git a/net/Makefile b/net/Makefile
index bdaf539..449fc0b 100644
--- a/net/Makefile
+++ b/net/Makefile

@@ -18,7 +18,7 @@
 obj-$(CONFIG_INET)		+= ipv4/
 obj-$(CONFIG_TLS)		+= tls/
 obj-$(CONFIG_XFRM)		+= xfrm/
-obj-$(CONFIG_UNIX)		+= unix/
+obj-$(CONFIG_UNIX_SCM)		+= unix/
 obj-$(CONFIG_NET)		+= ipv6/
 obj-$(CONFIG_BPFILTER)		+= bpfilter/
 obj-$(CONFIG_PACKET)		+= packet/

diff --git a/net/appletalk/Makefile b/net/appletalk/Makefile
index 5cda56e..33164d9 100644
--- a/net/appletalk/Makefile
+++ b/net/appletalk/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Linux AppleTalk layer.
 #

diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 49a16ce..45f5841 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	AARP:		An implementation of the AppleTalk AARP protocol for
  *			Ethernet 'ELAP'.
@@ -13,12 +14,6 @@
  *		Use neighbour discovery code.
  *		Token Ring Support.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
- *
  *	References:
  *		Inside AppleTalk (2nd Ed).
  *	Fixes:
@@ -26,7 +21,6 @@
  *		Rob Newberry	-	Added proxy AARP and AARP proc fs,
  *					moved probing from DDP module.
  *		Arnaldo C. Melo -	don't mangle rx packets
- *
  */
 
 #include <linux/if_arp.h>
@@ -879,15 +873,24 @@
 
 static unsigned char aarp_snap_id[] = { 0x00, 0x00, 0x00, 0x80, 0xF3 };
 
-void __init aarp_proto_init(void)
+int __init aarp_proto_init(void)
 {
+	int rc;
+
 	aarp_dl = register_snap_client(aarp_snap_id, aarp_rcv);
-	if (!aarp_dl)
+	if (!aarp_dl) {
 		printk(KERN_CRIT "Unable to register AARP with SNAP.\n");
+		return -ENOMEM;
+	}
 	timer_setup(&aarp_timer, aarp_expire_timeout, 0);
 	aarp_timer.expires  = jiffies + sysctl_aarp_expiry_time;
 	add_timer(&aarp_timer);
-	register_netdevice_notifier(&aarp_notifier);
+	rc = register_netdevice_notifier(&aarp_notifier);
+	if (rc) {
+		del_timer_sync(&aarp_timer);
+		unregister_snap_client(aarp_dl);
+	}
+	return rc;
 }
 
 /* Remove the AARP entries associated with a device. */

diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index 8006295..550c6ca 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * 	atalk_proc.c - proc support for Appletalk
  *
  * 	Copyright(c) Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- *
- *	This program is free software; you can redistribute it and/or modify it
- *	under the terms of the GNU General Public License as published by the
- *	Free Software Foundation, version 2.
  */
 
 #include <linux/init.h>
@@ -210,56 +207,34 @@
 	.show   = atalk_seq_socket_show,
 };
 
-static struct proc_dir_entry *atalk_proc_dir;
-
 int __init atalk_proc_init(void)
 {
-	struct proc_dir_entry *p;
-	int rc = -ENOMEM;
+	if (!proc_mkdir("atalk", init_net.proc_net))
+		return -ENOMEM;
 
-	atalk_proc_dir = proc_mkdir("atalk", init_net.proc_net);
-	if (!atalk_proc_dir)
+	if (!proc_create_seq("atalk/interface", 0444, init_net.proc_net,
+			    &atalk_seq_interface_ops))
 		goto out;
 
-	p = proc_create_seq("interface", 0444, atalk_proc_dir,
-			&atalk_seq_interface_ops);
-	if (!p)
-		goto out_interface;
+	if (!proc_create_seq("atalk/route", 0444, init_net.proc_net,
+			    &atalk_seq_route_ops))
+		goto out;
 
-	p = proc_create_seq("route", 0444, atalk_proc_dir,
-			&atalk_seq_route_ops);
-	if (!p)
-		goto out_route;
+	if (!proc_create_seq("atalk/socket", 0444, init_net.proc_net,
+			    &atalk_seq_socket_ops))
+		goto out;
 
-	p = proc_create_seq("socket", 0444, atalk_proc_dir,
-			&atalk_seq_socket_ops);
-	if (!p)
-		goto out_socket;
+	if (!proc_create_seq_private("atalk/arp", 0444, init_net.proc_net,
+				     &aarp_seq_ops,
+				     sizeof(struct aarp_iter_state), NULL))
+		goto out;
 
-	p = proc_create_seq_private("arp", 0444, atalk_proc_dir, &aarp_seq_ops,
-			sizeof(struct aarp_iter_state), NULL);
-	if (!p)
-		goto out_arp;
-
-	rc = 0;
 out:
-	return rc;
-out_arp:
-	remove_proc_entry("socket", atalk_proc_dir);
-out_socket:
-	remove_proc_entry("route", atalk_proc_dir);
-out_route:
-	remove_proc_entry("interface", atalk_proc_dir);
-out_interface:
-	remove_proc_entry("atalk", init_net.proc_net);
-	goto out;
+	remove_proc_subtree("atalk", init_net.proc_net);
+	return -ENOMEM;
 }
 
-void __exit atalk_proc_exit(void)
+void atalk_proc_exit(void)
 {
-	remove_proc_entry("interface", atalk_proc_dir);
-	remove_proc_entry("route", atalk_proc_dir);
-	remove_proc_entry("socket", atalk_proc_dir);
-	remove_proc_entry("arp", atalk_proc_dir);
-	remove_proc_entry("atalk", init_net.proc_net);
+	remove_proc_subtree("atalk", init_net.proc_net);
 }

diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 9b6bc5a..b41375d 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	DDP:	An implementation of the AppleTalk DDP protocol for
  *		Ethernet 'ELAP'.
@@ -43,12 +44,6 @@
  *						shared skb support 8)
  *		Arnaldo C. de Melo	:	Move proc stuff to atalk_proc.c,
  *						use seq_file
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  */
 
 #include <linux/capability.h>
@@ -958,8 +953,8 @@
 			if (copy > len)
 				copy = len;
 			vaddr = kmap_atomic(skb_frag_page(frag));
-			sum = atalk_sum_partial(vaddr + frag->page_offset +
-						  offset - start, copy, sum);
+			sum = atalk_sum_partial(vaddr + skb_frag_off(frag) +
+						offset - start, copy, sum);
 			kunmap_atomic(vaddr);
 
 			if (!(len -= copy))
@@ -1028,6 +1023,11 @@
 	 */
 	if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
 		goto out;
+
+	rc = -EPERM;
+	if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
+		goto out;
+
 	rc = -ENOMEM;
 	sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto, kern);
 	if (!sk)
@@ -1806,12 +1806,6 @@
 		rc = put_user(amount, (int __user *)argp);
 		break;
 	}
-	case SIOCGSTAMP:
-		rc = sock_get_timestamp(sk, argp);
-		break;
-	case SIOCGSTAMPNS:
-		rc = sock_get_timestampns(sk, argp);
-		break;
 	/* Routing */
 	case SIOCADDRT:
 	case SIOCDELRT:
@@ -1871,6 +1865,7 @@
 	.getname	= atalk_getname,
 	.poll		= datagram_poll,
 	.ioctl		= atalk_ioctl,
+	.gettstamp	= sock_gettstamp,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= atalk_compat_ioctl,
 #endif
@@ -1904,31 +1899,61 @@
 EXPORT_SYMBOL(atrtr_get_dev);
 EXPORT_SYMBOL(atalk_find_dev_addr);
 
-static const char atalk_err_snap[] __initconst =
-	KERN_CRIT "Unable to register DDP with SNAP.\n";
-
 /* Called by proto.c on kernel start up */
 static int __init atalk_init(void)
 {
-	int rc = proto_register(&ddp_proto, 0);
+	int rc;
 
-	if (rc != 0)
+	rc = proto_register(&ddp_proto, 0);
+	if (rc)
 		goto out;
 
-	(void)sock_register(&atalk_family_ops);
+	rc = sock_register(&atalk_family_ops);
+	if (rc)
+		goto out_proto;
+
 	ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv);
-	if (!ddp_dl)
-		printk(atalk_err_snap);
+	if (!ddp_dl) {
+		pr_crit("Unable to register DDP with SNAP.\n");
+		rc = -ENOMEM;
+		goto out_sock;
+	}
 
 	dev_add_pack(&ltalk_packet_type);
 	dev_add_pack(&ppptalk_packet_type);
 
-	register_netdevice_notifier(&ddp_notifier);
-	aarp_proto_init();
-	atalk_proc_init();
-	atalk_register_sysctl();
+	rc = register_netdevice_notifier(&ddp_notifier);
+	if (rc)
+		goto out_snap;
+
+	rc = aarp_proto_init();
+	if (rc)
+		goto out_dev;
+
+	rc = atalk_proc_init();
+	if (rc)
+		goto out_aarp;
+
+	rc = atalk_register_sysctl();
+	if (rc)
+		goto out_proc;
 out:
 	return rc;
+out_proc:
+	atalk_proc_exit();
+out_aarp:
+	aarp_cleanup_module();
+out_dev:
+	unregister_netdevice_notifier(&ddp_notifier);
+out_snap:
+	dev_remove_pack(&ppptalk_packet_type);
+	dev_remove_pack(&ltalk_packet_type);
+	unregister_snap_client(ddp_dl);
+out_sock:
+	sock_unregister(PF_APPLETALK);
+out_proto:
+	proto_unregister(&ddp_proto);
+	goto out;
 }
 module_init(atalk_init);
 

diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c
index c744a85..d945b7c 100644
--- a/net/appletalk/sysctl_net_atalk.c
+++ b/net/appletalk/sysctl_net_atalk.c

@@ -45,9 +45,12 @@
 
 static struct ctl_table_header *atalk_table_header;
 
-void atalk_register_sysctl(void)
+int __init atalk_register_sysctl(void)
 {
 	atalk_table_header = register_net_sysctl(&init_net, "net/appletalk", atalk_table);
+	if (!atalk_table_header)
+		return -ENOMEM;
+	return 0;
 }
 
 void atalk_unregister_sysctl(void)

diff --git a/net/atm/Kconfig b/net/atm/Kconfig
index 754ea10..271f682 100644
--- a/net/atm/Kconfig
+++ b/net/atm/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Asynchronous Transfer Mode (ATM)
 #

diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 10462de..3e17a5e 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Ethernet netdevice using ATM AAL5 as underlying carrier
  * (RFC1483 obsoleted by RFC2684) for Linux

diff --git a/net/atm/clip.c b/net/atm/clip.c
index d795b9c..294cb9e 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* net/atm/clip.c - RFC1577 Classical IP over ATM */
 
 /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
@@ -88,7 +89,7 @@
 	struct clip_vcc **walk;
 
 	if (!entry) {
-		pr_crit("!clip_vcc->entry (clip_vcc %p)\n", clip_vcc);
+		pr_err("!clip_vcc->entry (clip_vcc %p)\n", clip_vcc);
 		return;
 	}
 	netif_tx_lock_bh(entry->neigh->dev);	/* block clip_start_xmit() */
@@ -108,10 +109,10 @@
 			error = neigh_update(entry->neigh, NULL, NUD_NONE,
 					     NEIGH_UPDATE_F_ADMIN, 0);
 			if (error)
-				pr_crit("neigh_update failed with %d\n", error);
+				pr_err("neigh_update failed with %d\n", error);
 			goto out;
 		}
-	pr_crit("ATMARP: failed (entry %p, vcc 0x%p)\n", entry, clip_vcc);
+	pr_err("ATMARP: failed (entry %p, vcc 0x%p)\n", entry, clip_vcc);
 out:
 	netif_tx_unlock_bh(entry->neigh->dev);
 }
@@ -345,8 +346,8 @@
 		return NETDEV_TX_OK;
 	}
 	rt = (struct rtable *) dst;
-	if (rt->rt_gateway)
-		daddr = &rt->rt_gateway;
+	if (rt->rt_gw_family == AF_INET)
+		daddr = &rt->rt_gw4;
 	else
 		daddr = &ip_hdr(skb)->daddr;
 	n = dst_neigh_lookup(dst, daddr);

diff --git a/net/atm/common.c b/net/atm/common.c
index a38c174..0ce530a 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* net/atm/common.c - ATM sockets (common part for PVC and SVC) */
 
 /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
@@ -667,7 +668,7 @@
 		mask |= EPOLLHUP;
 
 	/* readable? */
-	if (!skb_queue_empty(&sk->sk_receive_queue))
+	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
 		mask |= EPOLLIN | EPOLLRDNORM;
 
 	/* writable? */

diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index 2ff0e5e..d955b68 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c

@@ -81,22 +81,6 @@
 				 (int __user *)argp) ? -EFAULT : 0;
 		goto done;
 	}
-	case SIOCGSTAMP: /* borrowed from IP */
-#ifdef CONFIG_COMPAT
-		if (compat)
-			error = compat_sock_get_timestamp(sk, argp);
-		else
-#endif
-			error = sock_get_timestamp(sk, argp);
-		goto done;
-	case SIOCGSTAMPNS: /* borrowed from IP */
-#ifdef CONFIG_COMPAT
-		if (compat)
-			error = compat_sock_get_timestampns(sk, argp);
-		else
-#endif
-			error = sock_get_timestampns(sk, argp);
-		goto done;
 	case ATM_SETSC:
 		net_warn_ratelimited("ATM_SETSC is obsolete; used by %s:%d\n",
 				     current->comm, task_pid_nr(current));

diff --git a/net/atm/lec.c b/net/atm/lec.c
index d7f5cf5..5a77c23 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * lec.c: Lan Emulation driver
  *
@@ -710,7 +711,10 @@
 
 static int lec_mcast_attach(struct atm_vcc *vcc, int arg)
 {
-	if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg])
+	if (arg < 0 || arg >= MAX_LEC_ITF)
+		return -EINVAL;
+	arg = array_index_nospec(arg, MAX_LEC_ITF);
+	if (!dev_lec[arg])
 		return -EINVAL;
 	vcc->proto_data = dev_lec[arg];
 	return lec_mcast_make(netdev_priv(dev_lec[arg]), vcc);
@@ -723,11 +727,10 @@
 	struct lec_priv *priv;
 
 	if (arg < 0)
-		i = 0;
-	else
-		i = arg;
+		arg = 0;
 	if (arg >= MAX_LEC_ITF)
 		return -EINVAL;
+	i = array_index_nospec(arg, MAX_LEC_ITF);
 	if (!dev_lec[i]) {
 		int size;
 

diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 24b53c4..033871e 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
 
 #include <linux/kernel.h>

diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c
index 4bb4183..3286f9d 100644
--- a/net/atm/mpoa_caches.c
+++ b/net/atm/mpoa_caches.c

@@ -180,8 +180,7 @@
 static void in_cache_put(in_cache_entry *entry)
 {
 	if (refcount_dec_and_test(&entry->use)) {
-		memset(entry, 0, sizeof(in_cache_entry));
-		kfree(entry);
+		kzfree(entry);
 	}
 }
 
@@ -416,8 +415,7 @@
 static void eg_cache_put(eg_cache_entry *entry)
 {
 	if (refcount_dec_and_test(&entry->use)) {
-		memset(entry, 0, sizeof(eg_cache_entry));
-		kfree(entry);
+		kzfree(entry);
 	}
 }
 

diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index d84227d..45d8e1d 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* net/atm/pppoatm.c - RFC2364 PPP over ATM/AAL5 */
 
 /* Copyright 1999-2000 by Mitchell Blank Jr */
@@ -6,10 +7,6 @@
 /* And help from Jens Axboe */
 
 /*
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
  *
  * This driver provides the encapsulation and framing for sending
  * and receiving PPP frames in ATM AAL5 PDUs.
@@ -219,9 +216,7 @@
 			pvcc->chan.mtu += LLC_LEN;
 			break;
 		}
-		pr_debug("Couldn't autodetect yet (skb: %02X %02X %02X %02X %02X %02X)\n",
-			 skb->data[0], skb->data[1], skb->data[2],
-			 skb->data[3], skb->data[4], skb->data[5]);
+		pr_debug("Couldn't autodetect yet (skb: %6ph)\n", skb->data);
 		goto error;
 	case e_vc:
 		break;

diff --git a/net/atm/proc.c b/net/atm/proc.c
index 0b0495a..d79221f 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c

@@ -134,7 +134,8 @@
 static void *vcc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	v = vcc_walk(seq, 1);
-	*pos += !!PTR_ERR(v);
+	if (v)
+		(*pos)++;
 	return v;
 }
 

diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 2cb10af..02bd2a4 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c

@@ -118,6 +118,7 @@
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = vcc_compat_ioctl,
 #endif
+	.gettstamp =	sock_gettstamp,
 	.listen =	sock_no_listen,
 	.shutdown =	pvc_shutdown,
 	.setsockopt =	pvc_setsockopt,

diff --git a/net/atm/resources.c b/net/atm/resources.c
index bada395..889349c 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c

@@ -203,13 +203,9 @@
 	int __user *sioc_len;
 	int __user *iobuf_len;
 
-#ifndef CONFIG_COMPAT
-	compat = 0; /* Just so the compiler _knows_ */
-#endif
-
 	switch (cmd) {
 	case ATM_GETNAMES:
-		if (compat) {
+		if (IS_ENABLED(CONFIG_COMPAT) && compat) {
 #ifdef CONFIG_COMPAT
 			struct compat_atm_iobuf __user *ciobuf = arg;
 			compat_uptr_t cbuf;
@@ -253,7 +249,7 @@
 		break;
 	}
 
-	if (compat) {
+	if (IS_ENABLED(CONFIG_COMPAT) && compat) {
 #ifdef CONFIG_COMPAT
 		struct compat_atmif_sioc __user *csioc = arg;
 		compat_uptr_t carg;
@@ -417,7 +413,7 @@
 		}
 		/* fall through */
 	default:
-		if (compat) {
+		if (IS_ENABLED(CONFIG_COMPAT) && compat) {
 #ifdef CONFIG_COMPAT
 			if (!dev->ops->compat_ioctl) {
 				error = -EINVAL;

diff --git a/net/atm/svc.c b/net/atm/svc.c
index 2f91b76..908cbb8 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c

@@ -641,6 +641,7 @@
 #ifdef CONFIG_COMPAT
 	.compat_ioctl =	svc_compat_ioctl,
 #endif
+	.gettstamp =	sock_gettstamp,
 	.listen =	svc_listen,
 	.shutdown =	svc_shutdown,
 	.setsockopt =	svc_setsockopt,

diff --git a/net/ax25/Kconfig b/net/ax25/Kconfig
index 705e53e..043fd54 100644
--- a/net/ax25/Kconfig
+++ b/net/ax25/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Amateur Radio protocols and AX.25 device configuration
 #

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index c603d33..bb222b8 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
@@ -653,15 +650,22 @@
 			break;
 		}
 
-		dev = dev_get_by_name(&init_net, devname);
+		rtnl_lock();
+		dev = __dev_get_by_name(&init_net, devname);
 		if (!dev) {
+			rtnl_unlock();
 			res = -ENODEV;
 			break;
 		}
 
 		ax25->ax25_dev = ax25_dev_ax25dev(dev);
+		if (!ax25->ax25_dev) {
+			rtnl_unlock();
+			res = -ENODEV;
+			break;
+		}
 		ax25_fillin_cb(ax25, ax25->ax25_dev);
-		dev_put(dev);
+		rtnl_unlock();
 		break;
 
 	default:
@@ -851,6 +855,8 @@
 		break;
 
 	case SOCK_RAW:
+		if (!capable(CAP_NET_RAW))
+			return -EPERM;
 		break;
 	default:
 		return -ESOCKTNOSUPPORT;
@@ -1707,14 +1713,6 @@
 		break;
 	}
 
-	case SIOCGSTAMP:
-		res = sock_get_timestamp(sk, argp);
-		break;
-
-	case SIOCGSTAMPNS:
-		res = sock_get_timestampns(sk, argp);
-		break;
-
 	case SIOCAX25ADDUID:	/* Add a uid to the uid/call map table */
 	case SIOCAX25DELUID:	/* Delete a uid from the uid/call map table */
 	case SIOCAX25GETUID: {
@@ -1881,8 +1879,8 @@
 	 * magic dev src_addr dest_addr,digi1,digi2,.. st vs vr va t1 t1 t2 t2 t3 t3 idle idle n2 n2 rtt window paclen Snd-Q Rcv-Q inode
 	 */
 
-	seq_printf(seq, "%8.8lx %s %s%s ",
-		   (long) ax25,
+	seq_printf(seq, "%p %s %s%s ",
+		   ax25,
 		   ax25->ax25_dev == NULL? "???" : ax25->ax25_dev->dev->name,
 		   ax2asc(buf, &ax25->source_addr),
 		   ax25->iamdigi? "*":"");
@@ -1943,6 +1941,7 @@
 	.getname	= ax25_getname,
 	.poll		= datagram_poll,
 	.ioctl		= ax25_ioctl,
+	.gettstamp	= sock_gettstamp,
 	.listen		= ax25_listen,
 	.shutdown	= ax25_shutdown,
 	.setsockopt	= ax25_setsockopt,

diff --git a/net/ax25/ax25_addr.c b/net/ax25/ax25_addr.c
index a14cfa7..f68865a 100644
--- a/net/ax25/ax25_addr.c
+++ b/net/ax25/ax25_addr.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  */

diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index 9a3a301..4ac2e08 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  */
@@ -116,6 +113,7 @@
 	if ((s = ax25_dev_list) == ax25_dev) {
 		ax25_dev_list = s->next;
 		spin_unlock_bh(&ax25_dev_lock);
+		dev->ax25_ptr = NULL;
 		dev_put(dev);
 		kfree(ax25_dev);
 		return;
@@ -125,6 +123,7 @@
 		if (s->next == ax25_dev) {
 			s->next = ax25_dev->next;
 			spin_unlock_bh(&ax25_dev_lock);
+			dev->ax25_ptr = NULL;
 			dev_put(dev);
 			kfree(ax25_dev);
 			return;

diff --git a/net/ax25/ax25_ds_in.c b/net/ax25/ax25_ds_in.c
index 488fc2d..c62f8fb 100644
--- a/net/ax25/ax25_ds_in.c
+++ b/net/ax25/ax25_ds_in.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)

diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c
index bc0329f..f00e27d 100644
--- a/net/ax25/ax25_ds_subr.c
+++ b/net/ax25/ax25_ds_subr.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)

diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index e9d1131..c4f8adb 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)

diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c
index 8c07c28..b4083f3 100644
--- a/net/ax25/ax25_iface.c
+++ b/net/ax25/ax25_iface.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  */

diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index 8607526..dcdbaee 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)

diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index 70417e9..e4f63dd 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  */
@@ -114,6 +111,7 @@
 	dst = (ax25_address *)(bp + 1);
 	src = (ax25_address *)(bp + 8);
 
+	ax25_route_lock_use();
 	route = ax25_get_route(dst, NULL);
 	if (route) {
 		digipeat = route->digipeat;
@@ -206,9 +204,8 @@
 	ax25_queue_xmit(skb, dev);
 
 put:
-	if (route)
-		ax25_put_route(route);
 
+	ax25_route_lock_unuse();
 	return NETDEV_TX_OK;
 }
 

diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index 3e5afc8..f53751b 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)

diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index a0eff32..b40e0bc 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
@@ -40,7 +37,7 @@
 #include <linux/export.h>
 
 static ax25_route *ax25_route_list;
-static DEFINE_RWLOCK(ax25_route_lock);
+DEFINE_RWLOCK(ax25_route_lock);
 
 void ax25_rt_device_down(struct net_device *dev)
 {
@@ -335,6 +332,7 @@
  *	Find AX.25 route
  *
  *	Only routes with a reference count of zero can be destroyed.
+ *	Must be called with ax25_route_lock read locked.
  */
 ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev)
 {
@@ -342,7 +340,6 @@
 	ax25_route *ax25_def_rt = NULL;
 	ax25_route *ax25_rt;
 
-	read_lock(&ax25_route_lock);
 	/*
 	 *	Bind to the physical interface we heard them on, or the default
 	 *	route if none is found;
@@ -365,11 +362,6 @@
 	if (ax25_spe_rt != NULL)
 		ax25_rt = ax25_spe_rt;
 
-	if (ax25_rt != NULL)
-		ax25_hold_route(ax25_rt);
-
-	read_unlock(&ax25_route_lock);
-
 	return ax25_rt;
 }
 
@@ -400,9 +392,12 @@
 	ax25_route *ax25_rt;
 	int err = 0;
 
-	if ((ax25_rt = ax25_get_route(addr, NULL)) == NULL)
+	ax25_route_lock_use();
+	ax25_rt = ax25_get_route(addr, NULL);
+	if (!ax25_rt) {
+		ax25_route_lock_unuse();
 		return -EHOSTUNREACH;
-
+	}
 	if ((ax25->ax25_dev = ax25_dev_ax25dev(ax25_rt->dev)) == NULL) {
 		err = -EHOSTUNREACH;
 		goto put;
@@ -431,14 +426,15 @@
 	}
 
 	if (ax25->sk != NULL) {
+		local_bh_disable();
 		bh_lock_sock(ax25->sk);
 		sock_reset_flag(ax25->sk, SOCK_ZAPPED);
 		bh_unlock_sock(ax25->sk);
+		local_bh_enable();
 	}
 
 put:
-	ax25_put_route(ax25_rt);
-
+	ax25_route_lock_unuse();
 	return err;
 }
 

diff --git a/net/ax25/ax25_std_in.c b/net/ax25/ax25_std_in.c
index 8632b86..ba17619 100644
--- a/net/ax25/ax25_std_in.c
+++ b/net/ax25/ax25_std_in.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)

diff --git a/net/ax25/ax25_std_subr.c b/net/ax25/ax25_std_subr.c
index 94bd063..4c36f13 100644
--- a/net/ax25/ax25_std_subr.c
+++ b/net/ax25/ax25_std_subr.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  */

diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c
index 30bbc67..b17da41 100644
--- a/net/ax25/ax25_std_timer.c
+++ b/net/ax25/ax25_std_timer.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)

diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index 038b109..15ab812 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)

diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c
index c47b7ee..85865eb 100644
--- a/net/ax25/ax25_timer.c
+++ b/net/ax25/ax25_timer.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)

diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index 99d02e3..241e468 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  */

diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c
index 919a5ce..2154d00 100644
--- a/net/ax25/sysctl_net_ax25.c
+++ b/net/ax25/sysctl_net_ax25.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) 1996 Mike Shaver (shaver@zeroknowledge.com)
  */

diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index 361116f..d5028af 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig

@@ -1,19 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
-# Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
 #
 # Marek Lindner, Simon Wunderlich
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of version 2 of the GNU General Public
-# License as published by the Free Software Foundation.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, see <http://www.gnu.org/licenses/>.
 
 #
 # B.A.T.M.A.N meshing protocol
@@ -22,14 +10,13 @@
 config BATMAN_ADV
 	tristate "B.A.T.M.A.N. Advanced Meshing Protocol"
 	depends on NET
-	select CRC16
 	select LIBCRC32C
 	help
-          B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is
-          a routing protocol for multi-hop ad-hoc mesh networks. The
-          networks may be wired or wireless. See
-          https://www.open-mesh.org/ for more information and user space
-          tools.
+	  B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is
+	  a routing protocol for multi-hop ad-hoc mesh networks. The
+	  networks may be wired or wireless. See
+	  https://www.open-mesh.org/ for more information and user space
+	  tools.
 
 config BATMAN_ADV_BATMAN_V
 	bool "B.A.T.M.A.N. V protocol"
@@ -48,6 +35,7 @@
 config BATMAN_ADV_BLA
 	bool "Bridge Loop Avoidance"
 	depends on BATMAN_ADV && INET
+	select CRC16
 	default y
 	help
 	  This option enables BLA (Bridge Loop Avoidance), a mechanism
@@ -82,6 +70,7 @@
 config BATMAN_ADV_MCAST
 	bool "Multicast optimisation"
 	depends on BATMAN_ADV && INET && !(BRIDGE=m && BATMAN_ADV=y)
+	default y
 	help
 	  This option enables the multicast optimisation which aims to
 	  reduce the air overhead while improving the reliability of
@@ -100,9 +89,33 @@
 
 config BATMAN_ADV_DEBUG
 	bool "B.A.T.M.A.N. debugging"
-	depends on BATMAN_ADV_DEBUGFS
+	depends on BATMAN_ADV
 	help
 	  This is an option for use by developers; most people should
 	  say N here. This enables compilation of support for
-	  outputting debugging information to the kernel log. The
-	  output is controlled via the module parameter debug.
+	  outputting debugging information to the debugfs log or tracing
+	  buffer. The output is controlled via the batadv netdev specific
+	  log_level setting.
+
+config BATMAN_ADV_SYSFS
+	bool "batman-adv sysfs entries"
+	depends on BATMAN_ADV
+	default y
+	help
+	  Say Y here if you want to enable batman-adv device configuration and
+	  status interface through sysfs attributes. It is replaced by the
+	  batadv generic netlink family but still used by various userspace
+	  tools and scripts.
+
+	  If unsure, say Y.
+
+config BATMAN_ADV_TRACING
+	bool "B.A.T.M.A.N. tracing support"
+	depends on BATMAN_ADV
+	depends on EVENT_TRACING
+	help
+	  This is an option for use by developers; most people should
+	  say N here. Select this option to gather traces like the debug
+	  messages using the generic tracing infrastructure of the kernel.
+	  BATMAN_ADV_DEBUG must also be selected to get trace events for
+	  batadv_dbg.

diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index b97ba6f..fd63e11 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile

@@ -1,20 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
-# Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
 #
 # Marek Lindner, Simon Wunderlich
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of version 2 of the GNU General Public
-# License as published by the Free Software Foundation.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, see <http://www.gnu.org/licenses/>.
-#
 
 obj-$(CONFIG_BATMAN_ADV) += batman-adv.o
 batman-adv-y += bat_algo.o
@@ -41,7 +28,10 @@
 batman-adv-y += routing.o
 batman-adv-y += send.o
 batman-adv-y += soft-interface.o
-batman-adv-y += sysfs.o
+batman-adv-$(CONFIG_BATMAN_ADV_SYSFS) += sysfs.o
+batman-adv-$(CONFIG_BATMAN_ADV_TRACING) += trace.o
 batman-adv-y += tp_meter.o
 batman-adv-y += translation-table.o
 batman-adv-y += tvlv.o
+
+CFLAGS_trace.o := -I$(src)

diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c
index ea309ad..fa39eaa 100644
--- a/net/batman-adv/bat_algo.c
+++ b/net/batman-adv/bat_algo.c

@@ -1,19 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "main.h"

diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 534b790..37898da 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Linus Lüssing
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_BAT_ALGO_H_
@@ -21,12 +9,11 @@
 
 #include "main.h"
 
+#include <linux/netlink.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/types.h>
 
-struct netlink_callback;
-struct seq_file;
-struct sk_buff;
-
 extern char batadv_routing_algo[];
 extern struct list_head batadv_hardif_list;
 

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 73bf6a9..5b0b20e 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c

@@ -1,19 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "bat_iv_ogm.h"
@@ -35,6 +23,7 @@
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/lockdep.h>
+#include <linux/mutex.h>
 #include <linux/netdevice.h>
 #include <linux/netlink.h>
 #include <linux/pkt_sched.h>
@@ -138,169 +127,6 @@
 }
 
 /**
- * batadv_iv_ogm_orig_free() - free the private resources allocated for this
- *  orig_node
- * @orig_node: the orig_node for which the resources have to be free'd
- */
-static void batadv_iv_ogm_orig_free(struct batadv_orig_node *orig_node)
-{
-	kfree(orig_node->bat_iv.bcast_own);
-	kfree(orig_node->bat_iv.bcast_own_sum);
-}
-
-/**
- * batadv_iv_ogm_orig_add_if() - change the private structures of the orig_node
- *  to include the new hard-interface
- * @orig_node: the orig_node that has to be changed
- * @max_if_num: the current amount of interfaces
- *
- * Return: 0 on success, a negative error code otherwise.
- */
-static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node,
-				     unsigned int max_if_num)
-{
-	void *data_ptr;
-	size_t old_size;
-	int ret = -ENOMEM;
-
-	spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
-
-	old_size = (max_if_num - 1) * sizeof(unsigned long) * BATADV_NUM_WORDS;
-	data_ptr = kmalloc_array(max_if_num,
-				 BATADV_NUM_WORDS * sizeof(unsigned long),
-				 GFP_ATOMIC);
-	if (!data_ptr)
-		goto unlock;
-
-	memcpy(data_ptr, orig_node->bat_iv.bcast_own, old_size);
-	kfree(orig_node->bat_iv.bcast_own);
-	orig_node->bat_iv.bcast_own = data_ptr;
-
-	data_ptr = kmalloc_array(max_if_num, sizeof(u8), GFP_ATOMIC);
-	if (!data_ptr)
-		goto unlock;
-
-	memcpy(data_ptr, orig_node->bat_iv.bcast_own_sum,
-	       (max_if_num - 1) * sizeof(u8));
-	kfree(orig_node->bat_iv.bcast_own_sum);
-	orig_node->bat_iv.bcast_own_sum = data_ptr;
-
-	ret = 0;
-
-unlock:
-	spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
-
-	return ret;
-}
-
-/**
- * batadv_iv_ogm_drop_bcast_own_entry() - drop section of bcast_own
- * @orig_node: the orig_node that has to be changed
- * @max_if_num: the current amount of interfaces
- * @del_if_num: the index of the interface being removed
- */
-static void
-batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node,
-				   unsigned int max_if_num,
-				   unsigned int del_if_num)
-{
-	size_t chunk_size;
-	size_t if_offset;
-	void *data_ptr;
-
-	lockdep_assert_held(&orig_node->bat_iv.ogm_cnt_lock);
-
-	chunk_size = sizeof(unsigned long) * BATADV_NUM_WORDS;
-	data_ptr = kmalloc_array(max_if_num, chunk_size, GFP_ATOMIC);
-	if (!data_ptr)
-		/* use old buffer when new one could not be allocated */
-		data_ptr = orig_node->bat_iv.bcast_own;
-
-	/* copy first part */
-	memmove(data_ptr, orig_node->bat_iv.bcast_own, del_if_num * chunk_size);
-
-	/* copy second part */
-	if_offset = (del_if_num + 1) * chunk_size;
-	memmove((char *)data_ptr + del_if_num * chunk_size,
-		(uint8_t *)orig_node->bat_iv.bcast_own + if_offset,
-		(max_if_num - del_if_num) * chunk_size);
-
-	/* bcast_own was shrunk down in new buffer; free old one */
-	if (orig_node->bat_iv.bcast_own != data_ptr) {
-		kfree(orig_node->bat_iv.bcast_own);
-		orig_node->bat_iv.bcast_own = data_ptr;
-	}
-}
-
-/**
- * batadv_iv_ogm_drop_bcast_own_sum_entry() - drop section of bcast_own_sum
- * @orig_node: the orig_node that has to be changed
- * @max_if_num: the current amount of interfaces
- * @del_if_num: the index of the interface being removed
- */
-static void
-batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node,
-				       unsigned int max_if_num,
-				       unsigned int del_if_num)
-{
-	size_t if_offset;
-	void *data_ptr;
-
-	lockdep_assert_held(&orig_node->bat_iv.ogm_cnt_lock);
-
-	data_ptr = kmalloc_array(max_if_num, sizeof(u8), GFP_ATOMIC);
-	if (!data_ptr)
-		/* use old buffer when new one could not be allocated */
-		data_ptr = orig_node->bat_iv.bcast_own_sum;
-
-	memmove(data_ptr, orig_node->bat_iv.bcast_own_sum,
-		del_if_num * sizeof(u8));
-
-	if_offset = (del_if_num + 1) * sizeof(u8);
-	memmove((char *)data_ptr + del_if_num * sizeof(u8),
-		orig_node->bat_iv.bcast_own_sum + if_offset,
-		(max_if_num - del_if_num) * sizeof(u8));
-
-	/* bcast_own_sum was shrunk down in new buffer; free old one */
-	if (orig_node->bat_iv.bcast_own_sum != data_ptr) {
-		kfree(orig_node->bat_iv.bcast_own_sum);
-		orig_node->bat_iv.bcast_own_sum = data_ptr;
-	}
-}
-
-/**
- * batadv_iv_ogm_orig_del_if() - change the private structures of the orig_node
- *  to exclude the removed interface
- * @orig_node: the orig_node that has to be changed
- * @max_if_num: the current amount of interfaces
- * @del_if_num: the index of the interface being removed
- *
- * Return: 0 on success, a negative error code otherwise.
- */
-static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node,
-				     unsigned int max_if_num,
-				     unsigned int del_if_num)
-{
-	spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
-
-	if (max_if_num == 0) {
-		kfree(orig_node->bat_iv.bcast_own);
-		kfree(orig_node->bat_iv.bcast_own_sum);
-		orig_node->bat_iv.bcast_own = NULL;
-		orig_node->bat_iv.bcast_own_sum = NULL;
-	} else {
-		batadv_iv_ogm_drop_bcast_own_entry(orig_node, max_if_num,
-						   del_if_num);
-		batadv_iv_ogm_drop_bcast_own_sum_entry(orig_node, max_if_num,
-						       del_if_num);
-	}
-
-	spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
-
-	return 0;
-}
-
-/**
  * batadv_iv_ogm_orig_get() - retrieve or create (if does not exist) an
  *  originator
  * @bat_priv: the bat priv with all the soft interface information
@@ -315,7 +141,6 @@
 {
 	struct batadv_orig_node *orig_node;
 	int hash_added;
-	size_t size;
 
 	orig_node = batadv_orig_hash_find(bat_priv, addr);
 	if (orig_node)
@@ -327,16 +152,6 @@
 
 	spin_lock_init(&orig_node->bat_iv.ogm_cnt_lock);
 
-	size = bat_priv->num_ifaces * sizeof(unsigned long) * BATADV_NUM_WORDS;
-	orig_node->bat_iv.bcast_own = kzalloc(size, GFP_ATOMIC);
-	if (!orig_node->bat_iv.bcast_own)
-		goto free_orig_node;
-
-	size = bat_priv->num_ifaces * sizeof(u8);
-	orig_node->bat_iv.bcast_own_sum = kzalloc(size, GFP_ATOMIC);
-	if (!orig_node->bat_iv.bcast_own_sum)
-		goto free_orig_node;
-
 	kref_get(&orig_node->refcount);
 	hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig,
 				     batadv_choose_orig, orig_node,
@@ -347,8 +162,9 @@
 	return orig_node;
 
 free_orig_node_hash:
+	/* reference for batadv_hash_add */
 	batadv_orig_node_put(orig_node);
-free_orig_node:
+	/* reference from batadv_orig_node_new */
 	batadv_orig_node_put(orig_node);
 
 	return NULL;
@@ -379,14 +195,18 @@
 	unsigned char *ogm_buff;
 	u32 random_seqno;
 
+	mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex);
+
 	/* randomize initial seqno to avoid collision */
 	get_random_bytes(&random_seqno, sizeof(random_seqno));
 	atomic_set(&hard_iface->bat_iv.ogm_seqno, random_seqno);
 
 	hard_iface->bat_iv.ogm_buff_len = BATADV_OGM_HLEN;
 	ogm_buff = kmalloc(hard_iface->bat_iv.ogm_buff_len, GFP_ATOMIC);
-	if (!ogm_buff)
+	if (!ogm_buff) {
+		mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
 		return -ENOMEM;
+	}
 
 	hard_iface->bat_iv.ogm_buff = ogm_buff;
 
@@ -398,35 +218,59 @@
 	batadv_ogm_packet->reserved = 0;
 	batadv_ogm_packet->tq = BATADV_TQ_MAX_VALUE;
 
+	mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
+
 	return 0;
 }
 
 static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface)
 {
+	mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex);
+
 	kfree(hard_iface->bat_iv.ogm_buff);
 	hard_iface->bat_iv.ogm_buff = NULL;
+
+	mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
 }
 
 static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface)
 {
 	struct batadv_ogm_packet *batadv_ogm_packet;
-	unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff;
+	void *ogm_buff;
 
-	batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff;
+	mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex);
+
+	ogm_buff = hard_iface->bat_iv.ogm_buff;
+	if (!ogm_buff)
+		goto unlock;
+
+	batadv_ogm_packet = ogm_buff;
 	ether_addr_copy(batadv_ogm_packet->orig,
 			hard_iface->net_dev->dev_addr);
 	ether_addr_copy(batadv_ogm_packet->prev_sender,
 			hard_iface->net_dev->dev_addr);
+
+unlock:
+	mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
 }
 
 static void
 batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface)
 {
 	struct batadv_ogm_packet *batadv_ogm_packet;
-	unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff;
+	void *ogm_buff;
 
-	batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff;
+	mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex);
+
+	ogm_buff = hard_iface->bat_iv.ogm_buff;
+	if (!ogm_buff)
+		goto unlock;
+
+	batadv_ogm_packet = ogm_buff;
 	batadv_ogm_packet->ttl = BATADV_TTL;
+
+unlock:
+	mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
 }
 
 /* when do we schedule our own ogm to be sent */
@@ -463,17 +307,23 @@
  * batadv_iv_ogm_aggr_packet() - checks if there is another OGM attached
  * @buff_pos: current position in the skb
  * @packet_len: total length of the skb
- * @tvlv_len: tvlv length of the previously considered OGM
+ * @ogm_packet: potential OGM in buffer
  *
  * Return: true if there is enough space for another OGM, false otherwise.
  */
-static bool batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len,
-				      __be16 tvlv_len)
+static bool
+batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len,
+			  const struct batadv_ogm_packet *ogm_packet)
 {
 	int next_buff_pos = 0;
 
-	next_buff_pos += buff_pos + BATADV_OGM_HLEN;
-	next_buff_pos += ntohs(tvlv_len);
+	/* check if there is enough space for the header */
+	next_buff_pos += buff_pos + sizeof(*ogm_packet);
+	if (next_buff_pos > packet_len)
+		return false;
+
+	/* check if there is enough space for the optional TVLV */
+	next_buff_pos += ntohs(ogm_packet->tvlv_len);
 
 	return (next_buff_pos <= packet_len) &&
 	       (next_buff_pos <= BATADV_MAX_AGGREGATION_BYTES);
@@ -501,7 +351,7 @@
 
 	/* adjust all flags and log packets */
 	while (batadv_iv_ogm_aggr_packet(buff_pos, forw_packet->packet_len,
-					 batadv_ogm_packet->tvlv_len)) {
+					 batadv_ogm_packet)) {
 		/* we might have aggregated direct link packets with an
 		 * ordinary base packet
 		 */
@@ -893,32 +743,40 @@
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
 	struct hlist_head *head;
 	struct batadv_orig_node *orig_node;
+	struct batadv_orig_ifinfo *orig_ifinfo;
 	unsigned long *word;
 	u32 i;
-	size_t word_index;
 	u8 *w;
-	unsigned int if_num;
 
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
 		rcu_read_lock();
 		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
-			spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
-			word_index = hard_iface->if_num * BATADV_NUM_WORDS;
-			word = &orig_node->bat_iv.bcast_own[word_index];
+			hlist_for_each_entry_rcu(orig_ifinfo,
+						 &orig_node->ifinfo_list,
+						 list) {
+				if (orig_ifinfo->if_outgoing != hard_iface)
+					continue;
 
-			batadv_bit_get_packet(bat_priv, word, 1, 0);
-			if_num = hard_iface->if_num;
-			w = &orig_node->bat_iv.bcast_own_sum[if_num];
-			*w = bitmap_weight(word, BATADV_TQ_LOCAL_WINDOW_SIZE);
-			spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+				spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+				word = orig_ifinfo->bat_iv.bcast_own;
+				batadv_bit_get_packet(bat_priv, word, 1, 0);
+				w = &orig_ifinfo->bat_iv.bcast_own_sum;
+				*w = bitmap_weight(word,
+						   BATADV_TQ_LOCAL_WINDOW_SIZE);
+				spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+			}
 		}
 		rcu_read_unlock();
 	}
 }
 
-static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
+/**
+ * batadv_iv_ogm_schedule_buff() - schedule submission of hardif ogm buffer
+ * @hard_iface: interface whose ogm buffer should be transmitted
+ */
+static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface)
 {
 	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
 	unsigned char **ogm_buff = &hard_iface->bat_iv.ogm_buff;
@@ -929,9 +787,7 @@
 	u16 tvlv_len = 0;
 	unsigned long send_time;
 
-	if (hard_iface->if_status == BATADV_IF_NOT_IN_USE ||
-	    hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)
-		return;
+	lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex);
 
 	/* the interface gets activated here to avoid race conditions between
 	 * the moment of activating the interface in
@@ -999,6 +855,46 @@
 		batadv_hardif_put(primary_if);
 }
 
+static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
+{
+	if (hard_iface->if_status == BATADV_IF_NOT_IN_USE ||
+	    hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)
+		return;
+
+	mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex);
+	batadv_iv_ogm_schedule_buff(hard_iface);
+	mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
+}
+
+/**
+ * batadv_iv_orig_ifinfo_sum() - Get bcast_own sum for originator over iterface
+ * @orig_node: originator which reproadcasted the OGMs directly
+ * @if_outgoing: interface which transmitted the original OGM and received the
+ *  direct rebroadcast
+ *
+ * Return: Number of replied (rebroadcasted) OGMs which were transmitted by
+ *  an originator and directly (without intermediate hop) received by a specific
+ *  interface
+ */
+static u8 batadv_iv_orig_ifinfo_sum(struct batadv_orig_node *orig_node,
+				    struct batadv_hard_iface *if_outgoing)
+{
+	struct batadv_orig_ifinfo *orig_ifinfo;
+	u8 sum;
+
+	orig_ifinfo = batadv_orig_ifinfo_get(orig_node, if_outgoing);
+	if (!orig_ifinfo)
+		return 0;
+
+	spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+	sum = orig_ifinfo->bat_iv.bcast_own_sum;
+	spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+
+	batadv_orig_ifinfo_put(orig_ifinfo);
+
+	return sum;
+}
+
 /**
  * batadv_iv_ogm_orig_update() - use OGM to update corresponding data in an
  *  originator
@@ -1026,8 +922,6 @@
 	struct batadv_neigh_node *neigh_node = NULL;
 	struct batadv_neigh_node *tmp_neigh_node = NULL;
 	struct batadv_neigh_node *router = NULL;
-	struct batadv_orig_node *orig_node_tmp;
-	unsigned int if_num;
 	u8 sum_orig, sum_neigh;
 	u8 *neigh_addr;
 	u8 tq_avg;
@@ -1132,18 +1026,10 @@
 	 */
 	if (router_ifinfo &&
 	    neigh_ifinfo->bat_iv.tq_avg == router_ifinfo->bat_iv.tq_avg) {
-		orig_node_tmp = router->orig_node;
-		spin_lock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock);
-		if_num = router->if_incoming->if_num;
-		sum_orig = orig_node_tmp->bat_iv.bcast_own_sum[if_num];
-		spin_unlock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock);
-
-		orig_node_tmp = neigh_node->orig_node;
-		spin_lock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock);
-		if_num = neigh_node->if_incoming->if_num;
-		sum_neigh = orig_node_tmp->bat_iv.bcast_own_sum[if_num];
-		spin_unlock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock);
-
+		sum_orig = batadv_iv_orig_ifinfo_sum(router->orig_node,
+						     router->if_incoming);
+		sum_neigh = batadv_iv_orig_ifinfo_sum(neigh_node->orig_node,
+						      neigh_node->if_incoming);
 		if (sum_orig >= sum_neigh)
 			goto out;
 	}
@@ -1186,7 +1072,6 @@
 	u8 total_count;
 	u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own;
 	unsigned int neigh_rq_inv_cube, neigh_rq_max_cube;
-	unsigned int if_num;
 	unsigned int tq_asym_penalty, inv_asym_penalty;
 	unsigned int combined_tq;
 	unsigned int tq_iface_penalty;
@@ -1227,9 +1112,7 @@
 	orig_node->last_seen = jiffies;
 
 	/* find packet count of corresponding one hop neighbor */
-	spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
-	if_num = if_incoming->if_num;
-	orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num];
+	orig_eq_count = batadv_iv_orig_ifinfo_sum(orig_neigh_node, if_incoming);
 	neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing);
 	if (neigh_ifinfo) {
 		neigh_rq_count = neigh_ifinfo->bat_iv.real_packet_count;
@@ -1237,7 +1120,6 @@
 	} else {
 		neigh_rq_count = 0;
 	}
-	spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
 
 	/* pay attention to not get a value bigger than 100 % */
 	if (orig_eq_count > neigh_rq_count)
@@ -1622,6 +1504,49 @@
 }
 
 /**
+ * batadv_iv_ogm_process_reply() - Check OGM for direct reply and process it
+ * @ogm_packet: rebroadcast OGM packet to process
+ * @if_incoming: the interface where this packet was received
+ * @orig_node: originator which reproadcasted the OGMs
+ * @if_incoming_seqno: OGM sequence number when rebroadcast was received
+ */
+static void batadv_iv_ogm_process_reply(struct batadv_ogm_packet *ogm_packet,
+					struct batadv_hard_iface *if_incoming,
+					struct batadv_orig_node *orig_node,
+					u32 if_incoming_seqno)
+{
+	struct batadv_orig_ifinfo *orig_ifinfo;
+	s32 bit_pos;
+	u8 *weight;
+
+	/* neighbor has to indicate direct link and it has to
+	 * come via the corresponding interface
+	 */
+	if (!(ogm_packet->flags & BATADV_DIRECTLINK))
+		return;
+
+	if (!batadv_compare_eth(if_incoming->net_dev->dev_addr,
+				ogm_packet->orig))
+		return;
+
+	orig_ifinfo = batadv_orig_ifinfo_get(orig_node, if_incoming);
+	if (!orig_ifinfo)
+		return;
+
+	/* save packet seqno for bidirectional check */
+	spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+	bit_pos = if_incoming_seqno - 2;
+	bit_pos -= ntohl(ogm_packet->seqno);
+	batadv_set_bit(orig_ifinfo->bat_iv.bcast_own, bit_pos);
+	weight = &orig_ifinfo->bat_iv.bcast_own_sum;
+	*weight = bitmap_weight(orig_ifinfo->bat_iv.bcast_own,
+				BATADV_TQ_LOCAL_WINDOW_SIZE);
+	spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+
+	batadv_orig_ifinfo_put(orig_ifinfo);
+}
+
+/**
  * batadv_iv_ogm_process() - process an incoming batman iv OGM
  * @skb: the skb containing the OGM
  * @ogm_offset: offset to the OGM which should be processed (for aggregates)
@@ -1705,37 +1630,13 @@
 	}
 
 	if (is_my_orig) {
-		unsigned long *word;
-		size_t offset;
-		s32 bit_pos;
-		unsigned int if_num;
-		u8 *weight;
-
 		orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv,
 							 ethhdr->h_source);
 		if (!orig_neigh_node)
 			return;
 
-		/* neighbor has to indicate direct link and it has to
-		 * come via the corresponding interface
-		 * save packet seqno for bidirectional check
-		 */
-		if (has_directlink_flag &&
-		    batadv_compare_eth(if_incoming->net_dev->dev_addr,
-				       ogm_packet->orig)) {
-			if_num = if_incoming->if_num;
-			offset = if_num * BATADV_NUM_WORDS;
-
-			spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
-			word = &orig_neigh_node->bat_iv.bcast_own[offset];
-			bit_pos = if_incoming_seqno - 2;
-			bit_pos -= ntohl(ogm_packet->seqno);
-			batadv_set_bit(word, bit_pos);
-			weight = &orig_neigh_node->bat_iv.bcast_own_sum[if_num];
-			*weight = bitmap_weight(word,
-						BATADV_TQ_LOCAL_WINDOW_SIZE);
-			spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
-		}
+		batadv_iv_ogm_process_reply(ogm_packet, if_incoming,
+					    orig_neigh_node, if_incoming_seqno);
 
 		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
 			   "Drop packet: originator packet from myself (via neighbor)\n");
@@ -1852,7 +1753,7 @@
 
 	/* unpack the aggregated packets and process them one by one */
 	while (batadv_iv_ogm_aggr_packet(ogm_offset, skb_headlen(skb),
-					 ogm_packet->tvlv_len)) {
+					 ogm_packet)) {
 		batadv_iv_ogm_process(skb, ogm_offset, if_incoming);
 
 		ogm_offset += BATADV_OGM_HLEN;
@@ -2485,7 +2386,7 @@
 	return ret;
 }
 
-static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface)
+static void batadv_iv_iface_enabled(struct batadv_hard_iface *hard_iface)
 {
 	/* begin scheduling originator messages on that interface */
 	batadv_iv_ogm_schedule(hard_iface);
@@ -2720,13 +2621,14 @@
  * batadv_iv_gw_dump_entry() - Dump a gateway into a message
  * @msg: Netlink message to dump into
  * @portid: Port making netlink request
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
  * @bat_priv: The bat priv with all the soft interface information
  * @gw_node: Gateway to be dumped
  *
  * Return: Error code, or 0 on success
  */
-static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid,
+				   struct netlink_callback *cb,
 				   struct batadv_priv *bat_priv,
 				   struct batadv_gw_node *gw_node)
 {
@@ -2746,13 +2648,16 @@
 
 	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
 
-	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
-			  NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS);
+	hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
+			  &batadv_netlink_family, NLM_F_MULTI,
+			  BATADV_CMD_GET_GATEWAYS);
 	if (!hdr) {
 		ret = -ENOBUFS;
 		goto out;
 	}
 
+	genl_dump_check_consistent(cb, hdr);
+
 	ret = -EMSGSIZE;
 
 	if (curr_gw == gw_node)
@@ -2803,13 +2708,15 @@
 	int idx_skip = cb->args[0];
 	int idx = 0;
 
-	rcu_read_lock();
-	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
+	spin_lock_bh(&bat_priv->gw.list_lock);
+	cb->seq = bat_priv->gw.generation << 1 | 1;
+
+	hlist_for_each_entry(gw_node, &bat_priv->gw.gateway_list, list) {
 		if (idx++ < idx_skip)
 			continue;
 
-		if (batadv_iv_gw_dump_entry(msg, portid, cb->nlh->nlmsg_seq,
-					    bat_priv, gw_node)) {
+		if (batadv_iv_gw_dump_entry(msg, portid, cb, bat_priv,
+					    gw_node)) {
 			idx_skip = idx - 1;
 			goto unlock;
 		}
@@ -2817,7 +2724,7 @@
 
 	idx_skip = idx;
 unlock:
-	rcu_read_unlock();
+	spin_unlock_bh(&bat_priv->gw.list_lock);
 
 	cb->args[0] = idx_skip;
 }
@@ -2825,8 +2732,8 @@
 static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
 	.name = "BATMAN_IV",
 	.iface = {
-		.activate = batadv_iv_iface_activate,
 		.enable = batadv_iv_ogm_iface_enable,
+		.enabled = batadv_iv_iface_enabled,
 		.disable = batadv_iv_ogm_iface_disable,
 		.update_mac = batadv_iv_ogm_iface_update_mac,
 		.primary_set = batadv_iv_ogm_primary_iface_set,
@@ -2844,9 +2751,6 @@
 		.print = batadv_iv_ogm_orig_print,
 #endif
 		.dump = batadv_iv_ogm_orig_dump,
-		.free = batadv_iv_ogm_orig_free,
-		.add_if = batadv_iv_ogm_orig_add_if,
-		.del_if = batadv_iv_ogm_orig_del_if,
 	},
 	.gw = {
 		.init_sel_class = batadv_iv_init_sel_class,

diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h
index 3dc6a7a..c7a9ba3 100644
--- a/net/batman-adv/bat_iv_ogm.h
+++ b/net/batman-adv/bat_iv_ogm.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_BAT_IV_OGM_H_

diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 6baec4e..64054ed 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c

@@ -1,19 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2019  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing, Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "bat_v.h"
@@ -27,11 +15,14 @@
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/kref.h>
+#include <linux/list.h>
 #include <linux/netdevice.h>
 #include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
@@ -51,8 +42,6 @@
 #include "netlink.h"
 #include "originator.h"
 
-struct sk_buff;
-
 static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface)
 {
 	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
@@ -90,6 +79,7 @@
 
 static void batadv_v_iface_disable(struct batadv_hard_iface *hard_iface)
 {
+	batadv_v_ogm_iface_disable(hard_iface);
 	batadv_v_elp_iface_disable(hard_iface);
 }
 
@@ -915,13 +905,14 @@
  * batadv_v_gw_dump_entry() - Dump a gateway into a message
  * @msg: Netlink message to dump into
  * @portid: Port making netlink request
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
  * @bat_priv: The bat priv with all the soft interface information
  * @gw_node: Gateway to be dumped
  *
  * Return: Error code, or 0 on success
  */
-static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid,
+				  struct netlink_callback *cb,
 				  struct batadv_priv *bat_priv,
 				  struct batadv_gw_node *gw_node)
 {
@@ -941,13 +932,16 @@
 
 	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
 
-	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
-			  NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS);
+	hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
+			  &batadv_netlink_family, NLM_F_MULTI,
+			  BATADV_CMD_GET_GATEWAYS);
 	if (!hdr) {
 		ret = -ENOBUFS;
 		goto out;
 	}
 
+	genl_dump_check_consistent(cb, hdr);
+
 	ret = -EMSGSIZE;
 
 	if (curr_gw == gw_node) {
@@ -1018,13 +1012,15 @@
 	int idx_skip = cb->args[0];
 	int idx = 0;
 
-	rcu_read_lock();
-	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
+	spin_lock_bh(&bat_priv->gw.list_lock);
+	cb->seq = bat_priv->gw.generation << 1 | 1;
+
+	hlist_for_each_entry(gw_node, &bat_priv->gw.gateway_list, list) {
 		if (idx++ < idx_skip)
 			continue;
 
-		if (batadv_v_gw_dump_entry(msg, portid, cb->nlh->nlmsg_seq,
-					   bat_priv, gw_node)) {
+		if (batadv_v_gw_dump_entry(msg, portid, cb, bat_priv,
+					   gw_node)) {
 			idx_skip = idx - 1;
 			goto unlock;
 		}
@@ -1032,7 +1028,7 @@
 
 	idx_skip = idx;
 unlock:
-	rcu_read_unlock();
+	spin_unlock_bh(&bat_priv->gw.list_lock);
 
 	cb->args[0] = idx_skip;
 }
@@ -1086,6 +1082,12 @@
 	 */
 	atomic_set(&hard_iface->bat_v.throughput_override, 0);
 	atomic_set(&hard_iface->bat_v.elp_interval, 500);
+
+	hard_iface->bat_v.aggr_len = 0;
+	skb_queue_head_init(&hard_iface->bat_v.aggr_list);
+	spin_lock_init(&hard_iface->bat_v.aggr_list_lock);
+	INIT_DELAYED_WORK(&hard_iface->bat_v.aggr_wq,
+			  batadv_v_ogm_aggr_work);
 }
 
 /**

diff --git a/net/batman-adv/bat_v.h b/net/batman-adv/bat_v.h
index ec4a2a5..37833db 100644
--- a/net/batman-adv/bat_v.h
+++ b/net/batman-adv/bat_v.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Linus Lüssing
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_BAT_V_H_

diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index e8090f0..2614a9c 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c

@@ -1,19 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2019  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing, Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "bat_v_elp.h"
@@ -104,6 +92,11 @@
 
 		ret = cfg80211_get_station(real_netdev, neigh->addr, &sinfo);
 
+		if (!ret) {
+			/* free the TID stats immediately */
+			cfg80211_sinfo_release_content(&sinfo);
+		}
+
 		dev_put(real_netdev);
 		if (ret == -ENOENT) {
 			/* Node is not associated anymore! It would be

diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
index e8c7b7f..1a29505 100644
--- a/net/batman-adv/bat_v_elp.h
+++ b/net/batman-adv/bat_v_elp.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2019  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing, Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_BAT_V_ELP_H_
@@ -21,8 +9,8 @@
 
 #include "main.h"
 
-struct sk_buff;
-struct work_struct;
+#include <linux/skbuff.h>
+#include <linux/workqueue.h>
 
 int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface);
 void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface);

diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 2948b41..8033f24 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c

@@ -1,19 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2019  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "bat_v_ogm.h"
@@ -29,12 +17,15 @@
 #include <linux/kernel.h>
 #include <linux/kref.h>
 #include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/mutex.h>
 #include <linux/netdevice.h>
 #include <linux/random.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/skbuff.h>
 #include <linux/slab.h>
+#include <linux/spinlock.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/types.h>
@@ -89,6 +80,20 @@
 }
 
 /**
+ * batadv_v_ogm_start_queue_timer() - restart the OGM aggregation timer
+ * @hard_iface: the interface to use to send the OGM
+ */
+static void batadv_v_ogm_start_queue_timer(struct batadv_hard_iface *hard_iface)
+{
+	unsigned int msecs = BATADV_MAX_AGGREGATION_MS * 1000;
+
+	/* msecs * [0.9, 1.1] */
+	msecs += prandom_u32() % (msecs / 5) - (msecs / 10);
+	queue_delayed_work(batadv_event_workqueue, &hard_iface->bat_v.aggr_wq,
+			   msecs_to_jiffies(msecs / 1000));
+}
+
+/**
  * batadv_v_ogm_start_timer() - restart the OGM sending timer
  * @bat_priv: the bat priv with all the soft interface information
  */
@@ -128,14 +133,136 @@
 }
 
 /**
- * batadv_v_ogm_send() - periodic worker broadcasting the own OGM
- * @work: work queue item
+ * batadv_v_ogm_len() - OGMv2 packet length
+ * @skb: the OGM to check
+ *
+ * Return: Length of the given OGMv2 packet, including tvlv length, excluding
+ * ethernet header length.
  */
-static void batadv_v_ogm_send(struct work_struct *work)
+static unsigned int batadv_v_ogm_len(struct sk_buff *skb)
+{
+	struct batadv_ogm2_packet *ogm_packet;
+
+	ogm_packet = (struct batadv_ogm2_packet *)skb->data;
+	return BATADV_OGM2_HLEN + ntohs(ogm_packet->tvlv_len);
+}
+
+/**
+ * batadv_v_ogm_queue_left() - check if given OGM still fits aggregation queue
+ * @skb: the OGM to check
+ * @hard_iface: the interface to use to send the OGM
+ *
+ * Caller needs to hold the hard_iface->bat_v.aggr_list_lock.
+ *
+ * Return: True, if the given OGMv2 packet still fits, false otherwise.
+ */
+static bool batadv_v_ogm_queue_left(struct sk_buff *skb,
+				    struct batadv_hard_iface *hard_iface)
+{
+	unsigned int max = min_t(unsigned int, hard_iface->net_dev->mtu,
+				 BATADV_MAX_AGGREGATION_BYTES);
+	unsigned int ogm_len = batadv_v_ogm_len(skb);
+
+	lockdep_assert_held(&hard_iface->bat_v.aggr_list_lock);
+
+	return hard_iface->bat_v.aggr_len + ogm_len <= max;
+}
+
+/**
+ * batadv_v_ogm_aggr_list_free - free all elements in an aggregation queue
+ * @hard_iface: the interface holding the aggregation queue
+ *
+ * Empties the OGMv2 aggregation queue and frees all the skbs it contained.
+ *
+ * Caller needs to hold the hard_iface->bat_v.aggr_list_lock.
+ */
+static void batadv_v_ogm_aggr_list_free(struct batadv_hard_iface *hard_iface)
+{
+	struct sk_buff *skb;
+
+	lockdep_assert_held(&hard_iface->bat_v.aggr_list_lock);
+
+	while ((skb = skb_dequeue(&hard_iface->bat_v.aggr_list)))
+		kfree_skb(skb);
+
+	hard_iface->bat_v.aggr_len = 0;
+}
+
+/**
+ * batadv_v_ogm_aggr_send() - flush & send aggregation queue
+ * @hard_iface: the interface with the aggregation queue to flush
+ *
+ * Aggregates all OGMv2 packets currently in the aggregation queue into a
+ * single OGMv2 packet and transmits this aggregate.
+ *
+ * The aggregation queue is empty after this call.
+ *
+ * Caller needs to hold the hard_iface->bat_v.aggr_list_lock.
+ */
+static void batadv_v_ogm_aggr_send(struct batadv_hard_iface *hard_iface)
+{
+	unsigned int aggr_len = hard_iface->bat_v.aggr_len;
+	struct sk_buff *skb_aggr;
+	unsigned int ogm_len;
+	struct sk_buff *skb;
+
+	lockdep_assert_held(&hard_iface->bat_v.aggr_list_lock);
+
+	if (!aggr_len)
+		return;
+
+	skb_aggr = dev_alloc_skb(aggr_len + ETH_HLEN + NET_IP_ALIGN);
+	if (!skb_aggr) {
+		batadv_v_ogm_aggr_list_free(hard_iface);
+		return;
+	}
+
+	skb_reserve(skb_aggr, ETH_HLEN + NET_IP_ALIGN);
+	skb_reset_network_header(skb_aggr);
+
+	while ((skb = skb_dequeue(&hard_iface->bat_v.aggr_list))) {
+		hard_iface->bat_v.aggr_len -= batadv_v_ogm_len(skb);
+
+		ogm_len = batadv_v_ogm_len(skb);
+		skb_put_data(skb_aggr, skb->data, ogm_len);
+
+		consume_skb(skb);
+	}
+
+	batadv_v_ogm_send_to_if(skb_aggr, hard_iface);
+}
+
+/**
+ * batadv_v_ogm_queue_on_if() - queue a batman ogm on a given interface
+ * @skb: the OGM to queue
+ * @hard_iface: the interface to queue the OGM on
+ */
+static void batadv_v_ogm_queue_on_if(struct sk_buff *skb,
+				     struct batadv_hard_iface *hard_iface)
+{
+	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
+
+	if (!atomic_read(&bat_priv->aggregated_ogms)) {
+		batadv_v_ogm_send_to_if(skb, hard_iface);
+		return;
+	}
+
+	spin_lock_bh(&hard_iface->bat_v.aggr_list_lock);
+	if (!batadv_v_ogm_queue_left(skb, hard_iface))
+		batadv_v_ogm_aggr_send(hard_iface);
+
+	hard_iface->bat_v.aggr_len += batadv_v_ogm_len(skb);
+	skb_queue_tail(&hard_iface->bat_v.aggr_list, skb);
+	spin_unlock_bh(&hard_iface->bat_v.aggr_list_lock);
+}
+
+/**
+ * batadv_v_ogm_send_softif() - periodic worker broadcasting the own OGM
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static void batadv_v_ogm_send_softif(struct batadv_priv *bat_priv)
 {
 	struct batadv_hard_iface *hard_iface;
-	struct batadv_priv_bat_v *bat_v;
-	struct batadv_priv *bat_priv;
 	struct batadv_ogm2_packet *ogm_packet;
 	struct sk_buff *skb, *skb_tmp;
 	unsigned char *ogm_buff;
@@ -143,8 +270,7 @@
 	u16 tvlv_len = 0;
 	int ret;
 
-	bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work);
-	bat_priv = container_of(bat_v, struct batadv_priv, bat_v);
+	lockdep_assert_held(&bat_priv->bat_v.ogm_buff_mutex);
 
 	if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING)
 		goto out;
@@ -222,7 +348,7 @@
 			break;
 		}
 
-		batadv_v_ogm_send_to_if(skb_tmp, hard_iface);
+		batadv_v_ogm_queue_on_if(skb_tmp, hard_iface);
 		batadv_hardif_put(hard_iface);
 	}
 	rcu_read_unlock();
@@ -236,6 +362,44 @@
 }
 
 /**
+ * batadv_v_ogm_send() - periodic worker broadcasting the own OGM
+ * @work: work queue item
+ */
+static void batadv_v_ogm_send(struct work_struct *work)
+{
+	struct batadv_priv_bat_v *bat_v;
+	struct batadv_priv *bat_priv;
+
+	bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work);
+	bat_priv = container_of(bat_v, struct batadv_priv, bat_v);
+
+	mutex_lock(&bat_priv->bat_v.ogm_buff_mutex);
+	batadv_v_ogm_send_softif(bat_priv);
+	mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex);
+}
+
+/**
+ * batadv_v_ogm_aggr_work() - OGM queue periodic task per interface
+ * @work: work queue item
+ *
+ * Emits aggregated OGM message in regular intervals.
+ */
+void batadv_v_ogm_aggr_work(struct work_struct *work)
+{
+	struct batadv_hard_iface_bat_v *batv;
+	struct batadv_hard_iface *hard_iface;
+
+	batv = container_of(work, struct batadv_hard_iface_bat_v, aggr_wq.work);
+	hard_iface = container_of(batv, struct batadv_hard_iface, bat_v);
+
+	spin_lock_bh(&hard_iface->bat_v.aggr_list_lock);
+	batadv_v_ogm_aggr_send(hard_iface);
+	spin_unlock_bh(&hard_iface->bat_v.aggr_list_lock);
+
+	batadv_v_ogm_start_queue_timer(hard_iface);
+}
+
+/**
  * batadv_v_ogm_iface_enable() - prepare an interface for B.A.T.M.A.N. V
  * @hard_iface: the interface to prepare
  *
@@ -247,12 +411,26 @@
 {
 	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
 
+	batadv_v_ogm_start_queue_timer(hard_iface);
 	batadv_v_ogm_start_timer(bat_priv);
 
 	return 0;
 }
 
 /**
+ * batadv_v_ogm_iface_disable() - release OGM interface private resources
+ * @hard_iface: interface for which the resources have to be released
+ */
+void batadv_v_ogm_iface_disable(struct batadv_hard_iface *hard_iface)
+{
+	cancel_delayed_work_sync(&hard_iface->bat_v.aggr_wq);
+
+	spin_lock_bh(&hard_iface->bat_v.aggr_list_lock);
+	batadv_v_ogm_aggr_list_free(hard_iface);
+	spin_unlock_bh(&hard_iface->bat_v.aggr_list_lock);
+}
+
+/**
  * batadv_v_ogm_primary_iface_set() - set a new primary interface
  * @primary_iface: the new primary interface
  */
@@ -261,11 +439,15 @@
 	struct batadv_priv *bat_priv = netdev_priv(primary_iface->soft_iface);
 	struct batadv_ogm2_packet *ogm_packet;
 
+	mutex_lock(&bat_priv->bat_v.ogm_buff_mutex);
 	if (!bat_priv->bat_v.ogm_buff)
-		return;
+		goto unlock;
 
 	ogm_packet = (struct batadv_ogm2_packet *)bat_priv->bat_v.ogm_buff;
 	ether_addr_copy(ogm_packet->orig, primary_iface->net_dev->dev_addr);
+
+unlock:
+	mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex);
 }
 
 /**
@@ -394,7 +576,7 @@
 		   if_outgoing->net_dev->name, ntohl(ogm_forward->throughput),
 		   ogm_forward->ttl, if_incoming->net_dev->name);
 
-	batadv_v_ogm_send_to_if(skb, if_outgoing);
+	batadv_v_ogm_queue_on_if(skb, if_outgoing);
 
 out:
 	if (orig_ifinfo)
@@ -643,17 +825,23 @@
  * batadv_v_ogm_aggr_packet() - checks if there is another OGM aggregated
  * @buff_pos: current position in the skb
  * @packet_len: total length of the skb
- * @tvlv_len: tvlv length of the previously considered OGM
+ * @ogm2_packet: potential OGM2 in buffer
  *
  * Return: true if there is enough space for another OGM, false otherwise.
  */
-static bool batadv_v_ogm_aggr_packet(int buff_pos, int packet_len,
-				     __be16 tvlv_len)
+static bool
+batadv_v_ogm_aggr_packet(int buff_pos, int packet_len,
+			 const struct batadv_ogm2_packet *ogm2_packet)
 {
 	int next_buff_pos = 0;
 
-	next_buff_pos += buff_pos + BATADV_OGM2_HLEN;
-	next_buff_pos += ntohs(tvlv_len);
+	/* check if there is enough space for the header */
+	next_buff_pos += buff_pos + sizeof(*ogm2_packet);
+	if (next_buff_pos > packet_len)
+		return false;
+
+	/* check if there is enough space for the optional TVLV */
+	next_buff_pos += ntohs(ogm2_packet->tvlv_len);
 
 	return (next_buff_pos <= packet_len) &&
 	       (next_buff_pos <= BATADV_MAX_AGGREGATION_BYTES);
@@ -830,7 +1018,7 @@
 	ogm_packet = (struct batadv_ogm2_packet *)skb->data;
 
 	while (batadv_v_ogm_aggr_packet(ogm_offset, skb_headlen(skb),
-					ogm_packet->tvlv_len)) {
+					ogm_packet)) {
 		batadv_v_ogm_process(skb, ogm_offset, if_incoming);
 
 		ogm_offset += BATADV_OGM2_HLEN;
@@ -881,6 +1069,8 @@
 	atomic_set(&bat_priv->bat_v.ogm_seqno, random_seqno);
 	INIT_DELAYED_WORK(&bat_priv->bat_v.ogm_wq, batadv_v_ogm_send);
 
+	mutex_init(&bat_priv->bat_v.ogm_buff_mutex);
+
 	return 0;
 }
 
@@ -892,7 +1082,11 @@
 {
 	cancel_delayed_work_sync(&bat_priv->bat_v.ogm_wq);
 
+	mutex_lock(&bat_priv->bat_v.ogm_buff_mutex);
+
 	kfree(bat_priv->bat_v.ogm_buff);
 	bat_priv->bat_v.ogm_buff = NULL;
 	bat_priv->bat_v.ogm_buff_len = 0;
+
+	mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex);
 }

diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h
index e5be14c..bf16d04 100644
--- a/net/batman-adv/bat_v_ogm.h
+++ b/net/batman-adv/bat_v_ogm.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2019  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_BAT_V_OGM_H_
@@ -21,13 +9,15 @@
 
 #include "main.h"
 
+#include <linux/skbuff.h>
 #include <linux/types.h>
-
-struct sk_buff;
+#include <linux/workqueue.h>
 
 int batadv_v_ogm_init(struct batadv_priv *bat_priv);
 void batadv_v_ogm_free(struct batadv_priv *bat_priv);
+void batadv_v_ogm_aggr_work(struct work_struct *work);
 int batadv_v_ogm_iface_enable(struct batadv_hard_iface *hard_iface);
+void batadv_v_ogm_iface_disable(struct batadv_hard_iface *hard_iface);
 struct batadv_orig_node *batadv_v_ogm_orig_get(struct batadv_priv *bat_priv,
 					       const u8 *addr);
 void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface);

diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index a296a4d..7f04a6a 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c

@@ -1,19 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2019  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "bitarray.h"

diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index 48f6832..84ad2d2 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2019  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_BITARRAY_H_

diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 5f1aeed..663a53b 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c

@@ -1,19 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2019  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "bridge_loop_avoidance.h"
@@ -59,7 +47,6 @@
 #include "netlink.h"
 #include "originator.h"
 #include "soft-interface.h"
-#include "sysfs.h"
 #include "translation-table.h"
 
 static const u8 batadv_announce_mac[4] = {0x43, 0x05, 0x43, 0x05};
@@ -803,6 +790,8 @@
 				 const u8 *mac, const unsigned short vid)
 {
 	struct batadv_bla_claim search_claim, *claim;
+	struct batadv_bla_claim *claim_removed_entry;
+	struct hlist_node *claim_removed_node;
 
 	ether_addr_copy(search_claim.addr, mac);
 	search_claim.vid = vid;
@@ -813,10 +802,18 @@
 	batadv_dbg(BATADV_DBG_BLA, bat_priv, "%s(): %pM, vid %d\n", __func__,
 		   mac, batadv_print_vid(vid));
 
-	batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim,
-			   batadv_choose_claim, claim);
-	batadv_claim_put(claim); /* reference from the hash is gone */
+	claim_removed_node = batadv_hash_remove(bat_priv->bla.claim_hash,
+						batadv_compare_claim,
+						batadv_choose_claim, claim);
+	if (!claim_removed_node)
+		goto free_claim;
 
+	/* reference from the hash is gone */
+	claim_removed_entry = hlist_entry(claim_removed_node,
+					  struct batadv_bla_claim, hash_entry);
+	batadv_claim_put(claim_removed_entry);
+
+free_claim:
 	/* don't need the reference from hash_find() anymore */
 	batadv_claim_put(claim);
 }
@@ -2094,14 +2091,15 @@
  * to a netlink socket
  * @msg: buffer for the message
  * @portid: netlink port
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
  * @primary_if: primary interface
  * @claim: entry to dump
  *
  * Return: 0 or error code.
  */
 static int
-batadv_bla_claim_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_bla_claim_dump_entry(struct sk_buff *msg, u32 portid,
+			    struct netlink_callback *cb,
 			    struct batadv_hard_iface *primary_if,
 			    struct batadv_bla_claim *claim)
 {
@@ -2111,13 +2109,16 @@
 	void *hdr;
 	int ret = -EINVAL;
 
-	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
-			  NLM_F_MULTI, BATADV_CMD_GET_BLA_CLAIM);
+	hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
+			  &batadv_netlink_family, NLM_F_MULTI,
+			  BATADV_CMD_GET_BLA_CLAIM);
 	if (!hdr) {
 		ret = -ENOBUFS;
 		goto out;
 	}
 
+	genl_dump_check_consistent(cb, hdr);
+
 	is_own = batadv_compare_eth(claim->backbone_gw->orig,
 				    primary_addr);
 
@@ -2153,28 +2154,33 @@
  * to a netlink socket
  * @msg: buffer for the message
  * @portid: netlink port
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
  * @primary_if: primary interface
- * @head: bucket to dump
+ * @hash: hash to dump
+ * @bucket: bucket index to dump
  * @idx_skip: How many entries to skip
  *
  * Return: always 0.
  */
 static int
-batadv_bla_claim_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_bla_claim_dump_bucket(struct sk_buff *msg, u32 portid,
+			     struct netlink_callback *cb,
 			     struct batadv_hard_iface *primary_if,
-			     struct hlist_head *head, int *idx_skip)
+			     struct batadv_hashtable *hash, unsigned int bucket,
+			     int *idx_skip)
 {
 	struct batadv_bla_claim *claim;
 	int idx = 0;
 	int ret = 0;
 
-	rcu_read_lock();
-	hlist_for_each_entry_rcu(claim, head, hash_entry) {
+	spin_lock_bh(&hash->list_locks[bucket]);
+	cb->seq = atomic_read(&hash->generation) << 1 | 1;
+
+	hlist_for_each_entry(claim, &hash->table[bucket], hash_entry) {
 		if (idx++ < *idx_skip)
 			continue;
 
-		ret = batadv_bla_claim_dump_entry(msg, portid, seq,
+		ret = batadv_bla_claim_dump_entry(msg, portid, cb,
 						  primary_if, claim);
 		if (ret) {
 			*idx_skip = idx - 1;
@@ -2184,7 +2190,7 @@
 
 	*idx_skip = 0;
 unlock:
-	rcu_read_unlock();
+	spin_unlock_bh(&hash->list_locks[bucket]);
 	return ret;
 }
 
@@ -2204,7 +2210,6 @@
 	struct batadv_hashtable *hash;
 	struct batadv_priv *bat_priv;
 	int bucket = cb->args[0];
-	struct hlist_head *head;
 	int idx = cb->args[1];
 	int ifindex;
 	int ret = 0;
@@ -2230,11 +2235,8 @@
 	}
 
 	while (bucket < hash->size) {
-		head = &hash->table[bucket];
-
-		if (batadv_bla_claim_dump_bucket(msg, portid,
-						 cb->nlh->nlmsg_seq,
-						 primary_if, head, &idx))
+		if (batadv_bla_claim_dump_bucket(msg, portid, cb, primary_if,
+						 hash, bucket, &idx))
 			break;
 		bucket++;
 	}
@@ -2325,14 +2327,15 @@
  *  netlink socket
  * @msg: buffer for the message
  * @portid: netlink port
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
  * @primary_if: primary interface
  * @backbone_gw: entry to dump
  *
  * Return: 0 or error code.
  */
 static int
-batadv_bla_backbone_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_bla_backbone_dump_entry(struct sk_buff *msg, u32 portid,
+			       struct netlink_callback *cb,
 			       struct batadv_hard_iface *primary_if,
 			       struct batadv_bla_backbone_gw *backbone_gw)
 {
@@ -2343,13 +2346,16 @@
 	void *hdr;
 	int ret = -EINVAL;
 
-	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
-			  NLM_F_MULTI, BATADV_CMD_GET_BLA_BACKBONE);
+	hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
+			  &batadv_netlink_family, NLM_F_MULTI,
+			  BATADV_CMD_GET_BLA_BACKBONE);
 	if (!hdr) {
 		ret = -ENOBUFS;
 		goto out;
 	}
 
+	genl_dump_check_consistent(cb, hdr);
+
 	is_own = batadv_compare_eth(backbone_gw->orig, primary_addr);
 
 	spin_lock_bh(&backbone_gw->crc_lock);
@@ -2386,28 +2392,33 @@
  *  a netlink socket
  * @msg: buffer for the message
  * @portid: netlink port
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
  * @primary_if: primary interface
- * @head: bucket to dump
+ * @hash: hash to dump
+ * @bucket: bucket index to dump
  * @idx_skip: How many entries to skip
  *
  * Return: always 0.
  */
 static int
-batadv_bla_backbone_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_bla_backbone_dump_bucket(struct sk_buff *msg, u32 portid,
+				struct netlink_callback *cb,
 				struct batadv_hard_iface *primary_if,
-				struct hlist_head *head, int *idx_skip)
+				struct batadv_hashtable *hash,
+				unsigned int bucket, int *idx_skip)
 {
 	struct batadv_bla_backbone_gw *backbone_gw;
 	int idx = 0;
 	int ret = 0;
 
-	rcu_read_lock();
-	hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
+	spin_lock_bh(&hash->list_locks[bucket]);
+	cb->seq = atomic_read(&hash->generation) << 1 | 1;
+
+	hlist_for_each_entry(backbone_gw, &hash->table[bucket], hash_entry) {
 		if (idx++ < *idx_skip)
 			continue;
 
-		ret = batadv_bla_backbone_dump_entry(msg, portid, seq,
+		ret = batadv_bla_backbone_dump_entry(msg, portid, cb,
 						     primary_if, backbone_gw);
 		if (ret) {
 			*idx_skip = idx - 1;
@@ -2417,7 +2428,7 @@
 
 	*idx_skip = 0;
 unlock:
-	rcu_read_unlock();
+	spin_unlock_bh(&hash->list_locks[bucket]);
 	return ret;
 }
 
@@ -2437,7 +2448,6 @@
 	struct batadv_hashtable *hash;
 	struct batadv_priv *bat_priv;
 	int bucket = cb->args[0];
-	struct hlist_head *head;
 	int idx = cb->args[1];
 	int ifindex;
 	int ret = 0;
@@ -2463,11 +2473,8 @@
 	}
 
 	while (bucket < hash->size) {
-		head = &hash->table[bucket];
-
-		if (batadv_bla_backbone_dump_bucket(msg, portid,
-						    cb->nlh->nlmsg_seq,
-						    primary_if, head, &idx))
+		if (batadv_bla_backbone_dump_bucket(msg, portid, cb, primary_if,
+						    hash, bucket, &idx))
 			break;
 		bucket++;
 	}

diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 71f95a3..02b24a8 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2019  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_BLA_H_
@@ -22,14 +10,13 @@
 #include "main.h"
 
 #include <linux/compiler.h>
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
 
-struct net_device;
-struct netlink_callback;
-struct seq_file;
-struct sk_buff;
-
 /**
  * batadv_bla_is_loopdetect_mac() - check if the mac address is from a loop
  *  detect frame sent by bridge loop avoidance

diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 3cb8237..38c4d8e 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c

@@ -1,32 +1,21 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "debugfs.h"
 #include "main.h"
 
+#include <asm/current.h>
 #include <linux/dcache.h>
 #include <linux/debugfs.h>
-#include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/export.h>
 #include <linux/fs.h>
 #include <linux/netdevice.h>
 #include <linux/printk.h>
+#include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/stat.h>
 #include <linux/stddef.h>
@@ -47,8 +36,24 @@
 
 static struct dentry *batadv_debugfs;
 
+/**
+ * batadv_debugfs_deprecated() - Log use of deprecated batadv debugfs access
+ * @file: file which was accessed
+ * @alt: explanation what can be used as alternative
+ */
+void batadv_debugfs_deprecated(struct file *file, const char *alt)
+{
+	struct dentry *dentry = file_dentry(file);
+	const char *name = dentry->d_name.name;
+
+	pr_warn_ratelimited(DEPRECATED "%s (pid %d) Use of debugfs file \"%s\".\n%s",
+			    current->comm, task_pid_nr(current), name, alt);
+}
+
 static int batadv_algorithms_open(struct inode *inode, struct file *file)
 {
+	batadv_debugfs_deprecated(file,
+				  "Use genl command BATADV_CMD_GET_ROUTING_ALGOS instead\n");
 	return single_open(file, batadv_algo_seq_print_text, NULL);
 }
 
@@ -56,6 +61,8 @@
 {
 	struct net_device *net_dev = (struct net_device *)inode->i_private;
 
+	batadv_debugfs_deprecated(file,
+				  "Use genl command BATADV_CMD_GET_NEIGHBORS instead\n");
 	return single_open(file, batadv_hardif_neigh_seq_print_text, net_dev);
 }
 
@@ -63,6 +70,8 @@
 {
 	struct net_device *net_dev = (struct net_device *)inode->i_private;
 
+	batadv_debugfs_deprecated(file,
+				  "Use genl command BATADV_CMD_GET_ORIGINATORS instead\n");
 	return single_open(file, batadv_orig_seq_print_text, net_dev);
 }
 
@@ -79,6 +88,8 @@
 {
 	struct net_device *net_dev = (struct net_device *)inode->i_private;
 
+	batadv_debugfs_deprecated(file,
+				  "Use genl command BATADV_CMD_GET_HARDIFS instead\n");
 	return single_open(file, batadv_orig_hardif_seq_print_text, net_dev);
 }
 
@@ -86,6 +97,8 @@
 {
 	struct net_device *net_dev = (struct net_device *)inode->i_private;
 
+	batadv_debugfs_deprecated(file,
+				  "Use genl command BATADV_CMD_GET_GATEWAYS instead\n");
 	return single_open(file, batadv_gw_client_seq_print_text, net_dev);
 }
 
@@ -93,6 +106,8 @@
 {
 	struct net_device *net_dev = (struct net_device *)inode->i_private;
 
+	batadv_debugfs_deprecated(file,
+				  "Use genl command BATADV_CMD_GET_TRANSTABLE_GLOBAL instead\n");
 	return single_open(file, batadv_tt_global_seq_print_text, net_dev);
 }
 
@@ -101,6 +116,8 @@
 {
 	struct net_device *net_dev = (struct net_device *)inode->i_private;
 
+	batadv_debugfs_deprecated(file,
+				  "Use genl command BATADV_CMD_GET_BLA_CLAIM instead\n");
 	return single_open(file, batadv_bla_claim_table_seq_print_text,
 			   net_dev);
 }
@@ -110,6 +127,8 @@
 {
 	struct net_device *net_dev = (struct net_device *)inode->i_private;
 
+	batadv_debugfs_deprecated(file,
+				  "Use genl command BATADV_CMD_GET_BLA_BACKBONE instead\n");
 	return single_open(file, batadv_bla_backbone_table_seq_print_text,
 			   net_dev);
 }
@@ -128,6 +147,8 @@
 {
 	struct net_device *net_dev = (struct net_device *)inode->i_private;
 
+	batadv_debugfs_deprecated(file,
+				  "Use genl command BATADV_CMD_GET_DAT_CACHE instead\n");
 	return single_open(file, batadv_dat_cache_seq_print_text, net_dev);
 }
 #endif
@@ -136,6 +157,8 @@
 {
 	struct net_device *net_dev = (struct net_device *)inode->i_private;
 
+	batadv_debugfs_deprecated(file,
+				  "Use genl command BATADV_CMD_GET_TRANSTABLE_LOCAL instead\n");
 	return single_open(file, batadv_tt_local_seq_print_text, net_dev);
 }
 
@@ -149,6 +172,7 @@
 {
 	struct net_device *net_dev = (struct net_device *)inode->i_private;
 
+	batadv_debugfs_deprecated(file, "");
 	return single_open(file, batadv_nc_nodes_seq_print_text, net_dev);
 }
 #endif
@@ -165,6 +189,8 @@
 {
 	struct net_device *net_dev = (struct net_device *)inode->i_private;
 
+	batadv_debugfs_deprecated(file,
+				  "Use genl command BATADV_CMD_GET_MCAST_FLAGS instead\n");
 	return single_open(file, batadv_mcast_flags_seq_print_text, net_dev);
 }
 #endif
@@ -266,31 +292,13 @@
 void batadv_debugfs_init(void)
 {
 	struct batadv_debuginfo **bat_debug;
-	struct dentry *file;
 
 	batadv_debugfs = debugfs_create_dir(BATADV_DEBUGFS_SUBDIR, NULL);
-	if (batadv_debugfs == ERR_PTR(-ENODEV))
-		batadv_debugfs = NULL;
 
-	if (!batadv_debugfs)
-		goto err;
-
-	for (bat_debug = batadv_general_debuginfos; *bat_debug; ++bat_debug) {
-		file = debugfs_create_file(((*bat_debug)->attr).name,
-					   S_IFREG | ((*bat_debug)->attr).mode,
-					   batadv_debugfs, NULL,
-					   &(*bat_debug)->fops);
-		if (!file) {
-			pr_err("Can't add general debugfs file: %s\n",
-			       ((*bat_debug)->attr).name);
-			goto err;
-		}
-	}
-
-	return;
-err:
-	debugfs_remove_recursive(batadv_debugfs);
-	batadv_debugfs = NULL;
+	for (bat_debug = batadv_general_debuginfos; *bat_debug; ++bat_debug)
+		debugfs_create_file(((*bat_debug)->attr).name,
+				    S_IFREG | ((*bat_debug)->attr).mode,
+				    batadv_debugfs, NULL, &(*bat_debug)->fops);
 }
 
 /**
@@ -306,42 +314,23 @@
  * batadv_debugfs_add_hardif() - creates the base directory for a hard interface
  *  in debugfs.
  * @hard_iface: hard interface which should be added.
- *
- * Return: 0 on success or negative error number in case of failure
  */
-int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface)
+void batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface)
 {
 	struct net *net = dev_net(hard_iface->net_dev);
 	struct batadv_debuginfo **bat_debug;
-	struct dentry *file;
-
-	if (!batadv_debugfs)
-		goto out;
 
 	if (net != &init_net)
-		return 0;
+		return;
 
 	hard_iface->debug_dir = debugfs_create_dir(hard_iface->net_dev->name,
 						   batadv_debugfs);
-	if (!hard_iface->debug_dir)
-		goto out;
 
-	for (bat_debug = batadv_hardif_debuginfos; *bat_debug; ++bat_debug) {
-		file = debugfs_create_file(((*bat_debug)->attr).name,
-					   S_IFREG | ((*bat_debug)->attr).mode,
-					   hard_iface->debug_dir,
-					   hard_iface->net_dev,
-					   &(*bat_debug)->fops);
-		if (!file)
-			goto rem_attr;
-	}
-
-	return 0;
-rem_attr:
-	debugfs_remove_recursive(hard_iface->debug_dir);
-	hard_iface->debug_dir = NULL;
-out:
-	return -ENOMEM;
+	for (bat_debug = batadv_hardif_debuginfos; *bat_debug; ++bat_debug)
+		debugfs_create_file(((*bat_debug)->attr).name,
+				    S_IFREG | ((*bat_debug)->attr).mode,
+				    hard_iface->debug_dir, hard_iface->net_dev,
+				    &(*bat_debug)->fops);
 }
 
 /**
@@ -352,15 +341,12 @@
 {
 	const char *name = hard_iface->net_dev->name;
 	struct dentry *dir;
-	struct dentry *d;
 
 	dir = hard_iface->debug_dir;
 	if (!dir)
 		return;
 
-	d = debugfs_rename(dir->d_parent, dir, dir->d_parent, name);
-	if (!d)
-		pr_err("Can't rename debugfs dir to %s\n", name);
+	debugfs_rename(dir->d_parent, dir, dir->d_parent, name);
 }
 
 /**
@@ -392,44 +378,29 @@
 	struct batadv_priv *bat_priv = netdev_priv(dev);
 	struct batadv_debuginfo **bat_debug;
 	struct net *net = dev_net(dev);
-	struct dentry *file;
-
-	if (!batadv_debugfs)
-		goto out;
 
 	if (net != &init_net)
 		return 0;
 
 	bat_priv->debug_dir = debugfs_create_dir(dev->name, batadv_debugfs);
-	if (!bat_priv->debug_dir)
-		goto out;
 
-	if (batadv_socket_setup(bat_priv) < 0)
-		goto rem_attr;
+	batadv_socket_setup(bat_priv);
 
 	if (batadv_debug_log_setup(bat_priv) < 0)
 		goto rem_attr;
 
-	for (bat_debug = batadv_mesh_debuginfos; *bat_debug; ++bat_debug) {
-		file = debugfs_create_file(((*bat_debug)->attr).name,
-					   S_IFREG | ((*bat_debug)->attr).mode,
-					   bat_priv->debug_dir,
-					   dev, &(*bat_debug)->fops);
-		if (!file) {
-			batadv_err(dev, "Can't add debugfs file: %s/%s\n",
-				   dev->name, ((*bat_debug)->attr).name);
-			goto rem_attr;
-		}
-	}
+	for (bat_debug = batadv_mesh_debuginfos; *bat_debug; ++bat_debug)
+		debugfs_create_file(((*bat_debug)->attr).name,
+				    S_IFREG | ((*bat_debug)->attr).mode,
+				    bat_priv->debug_dir, dev,
+				    &(*bat_debug)->fops);
 
-	if (batadv_nc_init_debugfs(bat_priv) < 0)
-		goto rem_attr;
+	batadv_nc_init_debugfs(bat_priv);
 
 	return 0;
 rem_attr:
 	debugfs_remove_recursive(bat_priv->debug_dir);
 	bat_priv->debug_dir = NULL;
-out:
 	return -ENOMEM;
 }
 
@@ -442,15 +413,12 @@
 	struct batadv_priv *bat_priv = netdev_priv(dev);
 	const char *name = dev->name;
 	struct dentry *dir;
-	struct dentry *d;
 
 	dir = bat_priv->debug_dir;
 	if (!dir)
 		return;
 
-	d = debugfs_rename(dir->d_parent, dir, dir->d_parent, name);
-	if (!d)
-		pr_err("Can't rename debugfs dir to %s\n", name);
+	debugfs_rename(dir->d_parent, dir, dir->d_parent, name);
 }
 
 /**

diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 08a592f..1c5afd3 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_DEBUGFS_H_
@@ -21,23 +9,29 @@
 
 #include "main.h"
 
-struct net_device;
+#include <linux/fs.h>
+#include <linux/netdevice.h>
 
 #define BATADV_DEBUGFS_SUBDIR "batman_adv"
 
 #if IS_ENABLED(CONFIG_BATMAN_ADV_DEBUGFS)
 
+void batadv_debugfs_deprecated(struct file *file, const char *alt);
 void batadv_debugfs_init(void);
 void batadv_debugfs_destroy(void);
 int batadv_debugfs_add_meshif(struct net_device *dev);
 void batadv_debugfs_rename_meshif(struct net_device *dev);
 void batadv_debugfs_del_meshif(struct net_device *dev);
-int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface);
+void batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface);
 void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface);
 void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface);
 
 #else
 
+static inline void batadv_debugfs_deprecated(struct file *file, const char *alt)
+{
+}
+
 static inline void batadv_debugfs_init(void)
 {
 }
@@ -60,9 +54,8 @@
 }
 
 static inline
-int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface)
+void batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface)
 {
-	return 0;
 }
 
 static inline

diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index a60bacf..b0af3a1 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c

@@ -1,24 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2019  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "distributed-arp-table.h"
 #include "main.h"
 
+#include <asm/unaligned.h>
 #include <linux/atomic.h>
 #include <linux/bitops.h>
 #include <linux/byteorder/generic.h>
@@ -29,6 +18,7 @@
 #include <linux/if_ether.h>
 #include <linux/if_vlan.h>
 #include <linux/in.h>
+#include <linux/ip.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/kref.h>
@@ -42,6 +32,7 @@
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
+#include <linux/udp.h>
 #include <linux/workqueue.h>
 #include <net/arp.h>
 #include <net/genetlink.h>
@@ -60,6 +51,49 @@
 #include "translation-table.h"
 #include "tvlv.h"
 
+enum batadv_bootpop {
+	BATADV_BOOTREPLY	= 2,
+};
+
+enum batadv_boothtype {
+	BATADV_HTYPE_ETHERNET	= 1,
+};
+
+enum batadv_dhcpoptioncode {
+	BATADV_DHCP_OPT_PAD		= 0,
+	BATADV_DHCP_OPT_MSG_TYPE	= 53,
+	BATADV_DHCP_OPT_END		= 255,
+};
+
+enum batadv_dhcptype {
+	BATADV_DHCPACK		= 5,
+};
+
+/* { 99, 130, 83, 99 } */
+#define BATADV_DHCP_MAGIC 1669485411
+
+struct batadv_dhcp_packet {
+	__u8 op;
+	__u8 htype;
+	__u8 hlen;
+	__u8 hops;
+	__be32 xid;
+	__be16 secs;
+	__be16 flags;
+	__be32 ciaddr;
+	__be32 yiaddr;
+	__be32 siaddr;
+	__be32 giaddr;
+	__u8 chaddr[16];
+	__u8 sname[64];
+	__u8 file[128];
+	__be32 magic;
+	__u8 options[0];
+};
+
+#define BATADV_DHCP_YIADDR_LEN sizeof(((struct batadv_dhcp_packet *)0)->yiaddr)
+#define BATADV_DHCP_CHADDR_LEN sizeof(((struct batadv_dhcp_packet *)0)->chaddr)
+
 static void batadv_dat_purge(struct work_struct *work);
 
 /**
@@ -621,7 +655,7 @@
 }
 
 /**
- * batadv_dat_send_data() - send a payload to the selected candidates
+ * batadv_dat_forward_data() - copy and send payload to the selected candidates
  * @bat_priv: the bat priv with all the soft interface information
  * @skb: payload to send
  * @ip: the DHT key
@@ -634,9 +668,9 @@
  * Return: true if the packet is sent to at least one candidate, false
  * otherwise.
  */
-static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
-				 struct sk_buff *skb, __be32 ip,
-				 unsigned short vid, int packet_subtype)
+static bool batadv_dat_forward_data(struct batadv_priv *bat_priv,
+				    struct sk_buff *skb, __be32 ip,
+				    unsigned short vid, int packet_subtype)
 {
 	int i;
 	bool ret = false;
@@ -863,23 +897,27 @@
  *  netlink socket
  * @msg: buffer for the message
  * @portid: netlink port
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
  * @dat_entry: entry to dump
  *
  * Return: 0 or error code.
  */
 static int
-batadv_dat_cache_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_dat_cache_dump_entry(struct sk_buff *msg, u32 portid,
+			    struct netlink_callback *cb,
 			    struct batadv_dat_entry *dat_entry)
 {
 	int msecs;
 	void *hdr;
 
-	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
-			  NLM_F_MULTI, BATADV_CMD_GET_DAT_CACHE);
+	hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
+			  &batadv_netlink_family, NLM_F_MULTI,
+			  BATADV_CMD_GET_DAT_CACHE);
 	if (!hdr)
 		return -ENOBUFS;
 
+	genl_dump_check_consistent(cb, hdr);
+
 	msecs = jiffies_to_msecs(jiffies - dat_entry->last_update);
 
 	if (nla_put_in_addr(msg, BATADV_ATTR_DAT_CACHE_IP4ADDRESS,
@@ -901,27 +939,31 @@
  *  a netlink socket
  * @msg: buffer for the message
  * @portid: netlink port
- * @seq: Sequence number of netlink message
- * @head: bucket to dump
+ * @cb: Control block containing additional options
+ * @hash: hash to dump
+ * @bucket: bucket index to dump
  * @idx_skip: How many entries to skip
  *
  * Return: 0 or error code.
  */
 static int
-batadv_dat_cache_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
-			     struct hlist_head *head, int *idx_skip)
+batadv_dat_cache_dump_bucket(struct sk_buff *msg, u32 portid,
+			     struct netlink_callback *cb,
+			     struct batadv_hashtable *hash, unsigned int bucket,
+			     int *idx_skip)
 {
 	struct batadv_dat_entry *dat_entry;
 	int idx = 0;
 
-	rcu_read_lock();
-	hlist_for_each_entry_rcu(dat_entry, head, hash_entry) {
+	spin_lock_bh(&hash->list_locks[bucket]);
+	cb->seq = atomic_read(&hash->generation) << 1 | 1;
+
+	hlist_for_each_entry(dat_entry, &hash->table[bucket], hash_entry) {
 		if (idx < *idx_skip)
 			goto skip;
 
-		if (batadv_dat_cache_dump_entry(msg, portid, seq,
-						dat_entry)) {
-			rcu_read_unlock();
+		if (batadv_dat_cache_dump_entry(msg, portid, cb, dat_entry)) {
+			spin_unlock_bh(&hash->list_locks[bucket]);
 			*idx_skip = idx;
 
 			return -EMSGSIZE;
@@ -930,7 +972,7 @@
 skip:
 		idx++;
 	}
-	rcu_read_unlock();
+	spin_unlock_bh(&hash->list_locks[bucket]);
 
 	return 0;
 }
@@ -951,7 +993,6 @@
 	struct batadv_hashtable *hash;
 	struct batadv_priv *bat_priv;
 	int bucket = cb->args[0];
-	struct hlist_head *head;
 	int idx = cb->args[1];
 	int ifindex;
 	int ret = 0;
@@ -977,10 +1018,7 @@
 	}
 
 	while (bucket < hash->size) {
-		head = &hash->table[bucket];
-
-		if (batadv_dat_cache_dump_bucket(msg, portid,
-						 cb->nlh->nlmsg_seq, head,
+		if (batadv_dat_cache_dump_bucket(msg, portid, cb, hash, bucket,
 						 &idx))
 			break;
 
@@ -1227,8 +1265,8 @@
 		ret = true;
 	} else {
 		/* Send the request to the DHT */
-		ret = batadv_dat_send_data(bat_priv, skb, ip_dst, vid,
-					   BATADV_P_DAT_DHT_GET);
+		ret = batadv_dat_forward_data(bat_priv, skb, ip_dst, vid,
+					      BATADV_P_DAT_DHT_GET);
 	}
 out:
 	if (dat_entry)
@@ -1342,8 +1380,10 @@
 	/* Send the ARP reply to the candidates for both the IP addresses that
 	 * the node obtained from the ARP reply
 	 */
-	batadv_dat_send_data(bat_priv, skb, ip_src, vid, BATADV_P_DAT_DHT_PUT);
-	batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_PUT);
+	batadv_dat_forward_data(bat_priv, skb, ip_src, vid,
+				BATADV_P_DAT_DHT_PUT);
+	batadv_dat_forward_data(bat_priv, skb, ip_dst, vid,
+				BATADV_P_DAT_DHT_PUT);
 }
 
 /**
@@ -1394,7 +1434,6 @@
 			   hw_src, &ip_src, hw_dst, &ip_dst,
 			   dat_entry->mac_addr,	&dat_entry->ip);
 		dropped = true;
-		goto out;
 	}
 
 	/* Update our internal cache with both the IP addresses the node got
@@ -1403,6 +1442,9 @@
 	batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid);
 	batadv_dat_entry_add(bat_priv, ip_dst, hw_dst, vid);
 
+	if (dropped)
+		goto out;
+
 	/* If BLA is enabled, only forward ARP replies if we have claimed the
 	 * source of the ARP reply or if no one else of the same backbone has
 	 * already claimed that client. This prevents that different gateways
@@ -1436,6 +1478,363 @@
 }
 
 /**
+ * batadv_dat_check_dhcp_ipudp() - check skb for IP+UDP headers valid for DHCP
+ * @skb: the packet to check
+ * @ip_src: a buffer to store the IPv4 source address in
+ *
+ * Checks whether the given skb has an IP and UDP header valid for a DHCP
+ * message from a DHCP server. And if so, stores the IPv4 source address in
+ * the provided buffer.
+ *
+ * Return: True if valid, false otherwise.
+ */
+static bool
+batadv_dat_check_dhcp_ipudp(struct sk_buff *skb, __be32 *ip_src)
+{
+	unsigned int offset = skb_network_offset(skb);
+	struct udphdr *udphdr, _udphdr;
+	struct iphdr *iphdr, _iphdr;
+
+	iphdr = skb_header_pointer(skb, offset, sizeof(_iphdr), &_iphdr);
+	if (!iphdr || iphdr->version != 4 || iphdr->ihl * 4 < sizeof(_iphdr))
+		return false;
+
+	if (iphdr->protocol != IPPROTO_UDP)
+		return false;
+
+	offset += iphdr->ihl * 4;
+	skb_set_transport_header(skb, offset);
+
+	udphdr = skb_header_pointer(skb, offset, sizeof(_udphdr), &_udphdr);
+	if (!udphdr || udphdr->source != htons(67))
+		return false;
+
+	*ip_src = get_unaligned(&iphdr->saddr);
+
+	return true;
+}
+
+/**
+ * batadv_dat_check_dhcp() - examine packet for valid DHCP message
+ * @skb: the packet to check
+ * @proto: ethernet protocol hint (behind a potential vlan)
+ * @ip_src: a buffer to store the IPv4 source address in
+ *
+ * Checks whether the given skb is a valid DHCP packet. And if so, stores the
+ * IPv4 source address in the provided buffer.
+ *
+ * Caller needs to ensure that the skb network header is set correctly.
+ *
+ * Return: If skb is a valid DHCP packet, then returns its op code
+ * (e.g. BOOTREPLY vs. BOOTREQUEST). Otherwise returns -EINVAL.
+ */
+static int
+batadv_dat_check_dhcp(struct sk_buff *skb, __be16 proto, __be32 *ip_src)
+{
+	__be32 *magic, _magic;
+	unsigned int offset;
+	struct {
+		__u8 op;
+		__u8 htype;
+		__u8 hlen;
+		__u8 hops;
+	} *dhcp_h, _dhcp_h;
+
+	if (proto != htons(ETH_P_IP))
+		return -EINVAL;
+
+	if (!batadv_dat_check_dhcp_ipudp(skb, ip_src))
+		return -EINVAL;
+
+	offset = skb_transport_offset(skb) + sizeof(struct udphdr);
+	if (skb->len < offset + sizeof(struct batadv_dhcp_packet))
+		return -EINVAL;
+
+	dhcp_h = skb_header_pointer(skb, offset, sizeof(_dhcp_h), &_dhcp_h);
+	if (!dhcp_h || dhcp_h->htype != BATADV_HTYPE_ETHERNET ||
+	    dhcp_h->hlen != ETH_ALEN)
+		return -EINVAL;
+
+	offset += offsetof(struct batadv_dhcp_packet, magic);
+
+	magic = skb_header_pointer(skb, offset, sizeof(_magic), &_magic);
+	if (!magic || get_unaligned(magic) != htonl(BATADV_DHCP_MAGIC))
+		return -EINVAL;
+
+	return dhcp_h->op;
+}
+
+/**
+ * batadv_dat_get_dhcp_message_type() - get message type of a DHCP packet
+ * @skb: the DHCP packet to parse
+ *
+ * Iterates over the DHCP options of the given DHCP packet to find a
+ * DHCP Message Type option and parse it.
+ *
+ * Caller needs to ensure that the given skb is a valid DHCP packet and
+ * that the skb transport header is set correctly.
+ *
+ * Return: The found DHCP message type value, if found. -EINVAL otherwise.
+ */
+static int batadv_dat_get_dhcp_message_type(struct sk_buff *skb)
+{
+	unsigned int offset = skb_transport_offset(skb) + sizeof(struct udphdr);
+	u8 *type, _type;
+	struct {
+		u8 type;
+		u8 len;
+	} *tl, _tl;
+
+	offset += sizeof(struct batadv_dhcp_packet);
+
+	while ((tl = skb_header_pointer(skb, offset, sizeof(_tl), &_tl))) {
+		if (tl->type == BATADV_DHCP_OPT_MSG_TYPE)
+			break;
+
+		if (tl->type == BATADV_DHCP_OPT_END)
+			break;
+
+		if (tl->type == BATADV_DHCP_OPT_PAD)
+			offset++;
+		else
+			offset += tl->len + sizeof(_tl);
+	}
+
+	/* Option Overload Code not supported */
+	if (!tl || tl->type != BATADV_DHCP_OPT_MSG_TYPE ||
+	    tl->len != sizeof(_type))
+		return -EINVAL;
+
+	offset += sizeof(_tl);
+
+	type = skb_header_pointer(skb, offset, sizeof(_type), &_type);
+	if (!type)
+		return -EINVAL;
+
+	return *type;
+}
+
+/**
+ * batadv_dat_get_dhcp_yiaddr() - get yiaddr from a DHCP packet
+ * @skb: the DHCP packet to parse
+ * @buf: a buffer to store the yiaddr in
+ *
+ * Caller needs to ensure that the given skb is a valid DHCP packet and
+ * that the skb transport header is set correctly.
+ *
+ * Return: True on success, false otherwise.
+ */
+static bool batadv_dat_dhcp_get_yiaddr(struct sk_buff *skb, __be32 *buf)
+{
+	unsigned int offset = skb_transport_offset(skb) + sizeof(struct udphdr);
+	__be32 *yiaddr;
+
+	offset += offsetof(struct batadv_dhcp_packet, yiaddr);
+	yiaddr = skb_header_pointer(skb, offset, BATADV_DHCP_YIADDR_LEN, buf);
+
+	if (!yiaddr)
+		return false;
+
+	if (yiaddr != buf)
+		*buf = get_unaligned(yiaddr);
+
+	return true;
+}
+
+/**
+ * batadv_dat_get_dhcp_chaddr() - get chaddr from a DHCP packet
+ * @skb: the DHCP packet to parse
+ * @buf: a buffer to store the chaddr in
+ *
+ * Caller needs to ensure that the given skb is a valid DHCP packet and
+ * that the skb transport header is set correctly.
+ *
+ * Return: True on success, false otherwise
+ */
+static bool batadv_dat_get_dhcp_chaddr(struct sk_buff *skb, u8 *buf)
+{
+	unsigned int offset = skb_transport_offset(skb) + sizeof(struct udphdr);
+	u8 *chaddr;
+
+	offset += offsetof(struct batadv_dhcp_packet, chaddr);
+	chaddr = skb_header_pointer(skb, offset, BATADV_DHCP_CHADDR_LEN, buf);
+
+	if (!chaddr)
+		return false;
+
+	if (chaddr != buf)
+		memcpy(buf, chaddr, BATADV_DHCP_CHADDR_LEN);
+
+	return true;
+}
+
+/**
+ * batadv_dat_put_dhcp() - puts addresses from a DHCP packet into the DHT and
+ *  DAT cache
+ * @bat_priv: the bat priv with all the soft interface information
+ * @chaddr: the DHCP client MAC address
+ * @yiaddr: the DHCP client IP address
+ * @hw_dst: the DHCP server MAC address
+ * @ip_dst: the DHCP server IP address
+ * @vid: VLAN identifier
+ *
+ * Adds given MAC/IP pairs to the local DAT cache and propagates them further
+ * into the DHT.
+ *
+ * For the DHT propagation, client MAC + IP will appear as the ARP Reply
+ * transmitter (and hw_dst/ip_dst as the target).
+ */
+static void batadv_dat_put_dhcp(struct batadv_priv *bat_priv, u8 *chaddr,
+				__be32 yiaddr, u8 *hw_dst, __be32 ip_dst,
+				unsigned short vid)
+{
+	struct sk_buff *skb;
+
+	skb = batadv_dat_arp_create_reply(bat_priv, yiaddr, ip_dst, chaddr,
+					  hw_dst, vid);
+	if (!skb)
+		return;
+
+	skb_set_network_header(skb, ETH_HLEN);
+
+	batadv_dat_entry_add(bat_priv, yiaddr, chaddr, vid);
+	batadv_dat_entry_add(bat_priv, ip_dst, hw_dst, vid);
+
+	batadv_dat_forward_data(bat_priv, skb, yiaddr, vid,
+				BATADV_P_DAT_DHT_PUT);
+	batadv_dat_forward_data(bat_priv, skb, ip_dst, vid,
+				BATADV_P_DAT_DHT_PUT);
+
+	consume_skb(skb);
+
+	batadv_dbg(BATADV_DBG_DAT, bat_priv,
+		   "Snooped from outgoing DHCPACK (server address): %pI4, %pM (vid: %i)\n",
+		   &ip_dst, hw_dst, batadv_print_vid(vid));
+	batadv_dbg(BATADV_DBG_DAT, bat_priv,
+		   "Snooped from outgoing DHCPACK (client address): %pI4, %pM (vid: %i)\n",
+		   &yiaddr, chaddr, batadv_print_vid(vid));
+}
+
+/**
+ * batadv_dat_check_dhcp_ack() - examine packet for valid DHCP message
+ * @skb: the packet to check
+ * @proto: ethernet protocol hint (behind a potential vlan)
+ * @ip_src: a buffer to store the IPv4 source address in
+ * @chaddr: a buffer to store the DHCP Client Hardware Address in
+ * @yiaddr: a buffer to store the DHCP Your IP Address in
+ *
+ * Checks whether the given skb is a valid DHCPACK. And if so, stores the
+ * IPv4 server source address (ip_src), client MAC address (chaddr) and client
+ * IPv4 address (yiaddr) in the provided buffers.
+ *
+ * Caller needs to ensure that the skb network header is set correctly.
+ *
+ * Return: True if the skb is a valid DHCPACK. False otherwise.
+ */
+static bool
+batadv_dat_check_dhcp_ack(struct sk_buff *skb, __be16 proto, __be32 *ip_src,
+			  u8 *chaddr, __be32 *yiaddr)
+{
+	int type;
+
+	type = batadv_dat_check_dhcp(skb, proto, ip_src);
+	if (type != BATADV_BOOTREPLY)
+		return false;
+
+	type = batadv_dat_get_dhcp_message_type(skb);
+	if (type != BATADV_DHCPACK)
+		return false;
+
+	if (!batadv_dat_dhcp_get_yiaddr(skb, yiaddr))
+		return false;
+
+	if (!batadv_dat_get_dhcp_chaddr(skb, chaddr))
+		return false;
+
+	return true;
+}
+
+/**
+ * batadv_dat_snoop_outgoing_dhcp_ack() - snoop DHCPACK and fill DAT with it
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the packet to snoop
+ * @proto: ethernet protocol hint (behind a potential vlan)
+ * @vid: VLAN identifier
+ *
+ * This function first checks whether the given skb is a valid DHCPACK. If
+ * so then its source MAC and IP as well as its DHCP Client Hardware Address
+ * field and DHCP Your IP Address field are added to the local DAT cache and
+ * propagated into the DHT.
+ *
+ * Caller needs to ensure that the skb mac and network headers are set
+ * correctly.
+ */
+void batadv_dat_snoop_outgoing_dhcp_ack(struct batadv_priv *bat_priv,
+					struct sk_buff *skb,
+					__be16 proto,
+					unsigned short vid)
+{
+	u8 chaddr[BATADV_DHCP_CHADDR_LEN];
+	__be32 ip_src, yiaddr;
+
+	if (!atomic_read(&bat_priv->distributed_arp_table))
+		return;
+
+	if (!batadv_dat_check_dhcp_ack(skb, proto, &ip_src, chaddr, &yiaddr))
+		return;
+
+	batadv_dat_put_dhcp(bat_priv, chaddr, yiaddr, eth_hdr(skb)->h_source,
+			    ip_src, vid);
+}
+
+/**
+ * batadv_dat_snoop_incoming_dhcp_ack() - snoop DHCPACK and fill DAT cache
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the packet to snoop
+ * @hdr_size: header size, up to the tail of the batman-adv header
+ *
+ * This function first checks whether the given skb is a valid DHCPACK. If
+ * so then its source MAC and IP as well as its DHCP Client Hardware Address
+ * field and DHCP Your IP Address field are added to the local DAT cache.
+ */
+void batadv_dat_snoop_incoming_dhcp_ack(struct batadv_priv *bat_priv,
+					struct sk_buff *skb, int hdr_size)
+{
+	u8 chaddr[BATADV_DHCP_CHADDR_LEN];
+	struct ethhdr *ethhdr;
+	__be32 ip_src, yiaddr;
+	unsigned short vid;
+	__be16 proto;
+	u8 *hw_src;
+
+	if (!atomic_read(&bat_priv->distributed_arp_table))
+		return;
+
+	if (unlikely(!pskb_may_pull(skb, hdr_size + ETH_HLEN)))
+		return;
+
+	ethhdr = (struct ethhdr *)(skb->data + hdr_size);
+	skb_set_network_header(skb, hdr_size + ETH_HLEN);
+	proto = ethhdr->h_proto;
+
+	if (!batadv_dat_check_dhcp_ack(skb, proto, &ip_src, chaddr, &yiaddr))
+		return;
+
+	hw_src = ethhdr->h_source;
+	vid = batadv_dat_get_vid(skb, &hdr_size);
+
+	batadv_dat_entry_add(bat_priv, yiaddr, chaddr, vid);
+	batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid);
+
+	batadv_dbg(BATADV_DBG_DAT, bat_priv,
+		   "Snooped from incoming DHCPACK (server address): %pI4, %pM (vid: %i)\n",
+		   &ip_src, hw_src, batadv_print_vid(vid));
+	batadv_dbg(BATADV_DBG_DAT, bat_priv,
+		   "Snooped from incoming DHCPACK (client address): %pI4, %pM (vid: %i)\n",
+		   &yiaddr, chaddr, batadv_print_vid(vid));
+}
+
+/**
  * batadv_dat_drop_broadcast_packet() - check if an ARP request has to be
  *  dropped (because the node has already obtained the reply via DAT) or not
  * @bat_priv: the bat priv with all the soft interface information

diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index a045960..67c7729 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2019  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_DISTRIBUTED_ARP_TABLE_H_
@@ -23,15 +11,14 @@
 
 #include <linux/compiler.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/types.h>
 #include <uapi/linux/batadv_packet.h>
 
 #include "originator.h"
 
-struct netlink_callback;
-struct seq_file;
-struct sk_buff;
-
 #ifdef CONFIG_BATMAN_ADV_DAT
 
 /* BATADV_DAT_ADDR_MAX - maximum address value in the DHT space */
@@ -46,6 +33,12 @@
 					 struct sk_buff *skb);
 bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
 					 struct sk_buff *skb, int hdr_size);
+void batadv_dat_snoop_outgoing_dhcp_ack(struct batadv_priv *bat_priv,
+					struct sk_buff *skb,
+					__be16 proto,
+					unsigned short vid);
+void batadv_dat_snoop_incoming_dhcp_ack(struct batadv_priv *bat_priv,
+					struct sk_buff *skb, int hdr_size);
 bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
 				      struct batadv_forw_packet *forw_packet);
 
@@ -140,6 +133,19 @@
 	return false;
 }
 
+static inline void
+batadv_dat_snoop_outgoing_dhcp_ack(struct batadv_priv *bat_priv,
+				   struct sk_buff *skb, __be16 proto,
+				   unsigned short vid)
+{
+}
+
+static inline void
+batadv_dat_snoop_incoming_dhcp_ack(struct batadv_priv *bat_priv,
+				   struct sk_buff *skb, int hdr_size)
+{
+}
+
 static inline bool
 batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
 				 struct batadv_forw_packet *forw_packet)

diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 5b71a28..385fccd 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c

@@ -1,19 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2019  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll <martin@hundeboll.net>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "fragmentation.h"

diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index 944512e..abfe8c6 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2019  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll <martin@hundeboll.net>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_FRAGMENTATION_H_
@@ -23,11 +11,10 @@
 
 #include <linux/compiler.h>
 #include <linux/list.h>
+#include <linux/skbuff.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
 
-struct sk_buff;
-
 void batadv_frag_purge_orig(struct batadv_orig_node *orig,
 			    bool (*check_cb)(struct batadv_frag_table_entry *));
 bool batadv_frag_skb_fwd(struct sk_buff *skb,

diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 140c61a..47df4c6 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c

@@ -1,19 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "gateway_client.h"
@@ -47,14 +35,12 @@
 #include <uapi/linux/batadv_packet.h>
 #include <uapi/linux/batman_adv.h>
 
-#include "gateway_common.h"
 #include "hard-interface.h"
 #include "log.h"
 #include "netlink.h"
 #include "originator.h"
 #include "routing.h"
 #include "soft-interface.h"
-#include "sysfs.h"
 #include "translation-table.h"
 
 /* These are the offsets of the "hw type" and "hw address length" in the dhcp
@@ -377,6 +363,7 @@
 
 	kref_get(&gw_node->refcount);
 	hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.gateway_list);
+	bat_priv->gw.generation++;
 
 	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
 		   "Found new gateway %pM -> gw bandwidth: %u.%u/%u.%u MBit\n",
@@ -472,6 +459,7 @@
 		if (!hlist_unhashed(&gw_node->list)) {
 			hlist_del_init_rcu(&gw_node->list);
 			batadv_gw_node_put(gw_node);
+			bat_priv->gw.generation++;
 		}
 		spin_unlock_bh(&bat_priv->gw.list_lock);
 
@@ -518,6 +506,7 @@
 				  &bat_priv->gw.gateway_list, list) {
 		hlist_del_init_rcu(&gw_node->list);
 		batadv_gw_node_put(gw_node);
+		bat_priv->gw.generation++;
 	}
 	spin_unlock_bh(&bat_priv->gw.list_lock);
 }

diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index f0b86fc..0be8e71 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_GATEWAY_CLIENT_H_
@@ -21,12 +9,11 @@
 
 #include "main.h"
 
+#include <linux/netlink.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/types.h>
-
-struct batadv_tvlv_gateway_data;
-struct netlink_callback;
-struct seq_file;
-struct sk_buff;
+#include <uapi/linux/batadv_packet.h>
 
 void batadv_gw_check_client_stop(struct batadv_priv *bat_priv);
 void batadv_gw_reselect(struct batadv_priv *bat_priv);

diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 936c107..fc55750 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c

@@ -1,19 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "gateway_common.h"
@@ -23,11 +11,13 @@
 #include <linux/byteorder/generic.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
+#include <linux/limits.h>
 #include <linux/math64.h>
 #include <linux/netdevice.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <uapi/linux/batadv_packet.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "gateway_client.h"
 #include "log.h"

diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index 80afb27..211b14b 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_GATEWAY_COMMON_H_
@@ -21,16 +9,9 @@
 
 #include "main.h"
 
+#include <linux/netdevice.h>
 #include <linux/types.h>
 
-struct net_device;
-
-enum batadv_gw_modes {
-	BATADV_GW_MODE_OFF,
-	BATADV_GW_MODE_CLIENT,
-	BATADV_GW_MODE_SERVER,
-};
-
 /**
  * enum batadv_bandwidth_units - bandwidth unit types
  */

diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 2f0d42f..afb5228 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c

@@ -1,26 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "hard-interface.h"
 #include "main.h"
 
 #include <linux/atomic.h>
-#include <linux/bug.h>
 #include <linux/byteorder/generic.h>
 #include <linux/errno.h>
 #include <linux/gfp.h>
@@ -29,7 +16,9 @@
 #include <linux/if_ether.h>
 #include <linux/kernel.h>
 #include <linux/kref.h>
+#include <linux/limits.h>
 #include <linux/list.h>
+#include <linux/mutex.h>
 #include <linux/netdevice.h>
 #include <linux/printk.h>
 #include <linux/rculist.h>
@@ -179,8 +168,10 @@
 	parent_dev = __dev_get_by_index((struct net *)parent_net,
 					dev_get_iflink(net_dev));
 	/* if we got a NULL parent_dev there is something broken.. */
-	if (WARN(!parent_dev, "Cannot find parent device"))
+	if (!parent_dev) {
+		pr_err("Cannot find parent device\n");
 		return false;
+	}
 
 	if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net))
 		return false;
@@ -763,11 +754,6 @@
 	hard_iface->soft_iface = soft_iface;
 	bat_priv = netdev_priv(hard_iface->soft_iface);
 
-	if (bat_priv->num_ifaces >= UINT_MAX) {
-		ret = -ENOSPC;
-		goto err_dev;
-	}
-
 	ret = netdev_master_upper_dev_link(hard_iface->net_dev,
 					   soft_iface, NULL, NULL, NULL);
 	if (ret)
@@ -777,16 +763,7 @@
 	if (ret < 0)
 		goto err_upper;
 
-	hard_iface->if_num = bat_priv->num_ifaces;
-	bat_priv->num_ifaces++;
 	hard_iface->if_status = BATADV_IF_INACTIVE;
-	ret = batadv_orig_hash_add_if(hard_iface, bat_priv->num_ifaces);
-	if (ret < 0) {
-		bat_priv->algo_ops->iface.disable(hard_iface);
-		bat_priv->num_ifaces--;
-		hard_iface->if_status = BATADV_IF_NOT_IN_USE;
-		goto err_upper;
-	}
 
 	kref_get(&hard_iface->refcount);
 	hard_iface->batman_adv_ptype.type = ethertype;
@@ -820,6 +797,9 @@
 
 	batadv_hardif_recalc_extra_skbroom(soft_iface);
 
+	if (bat_priv->algo_ops->iface.enabled)
+		bat_priv->algo_ops->iface.enabled(hard_iface);
+
 out:
 	return 0;
 
@@ -834,6 +814,33 @@
 }
 
 /**
+ * batadv_hardif_cnt() - get number of interfaces enslaved to soft interface
+ * @soft_iface: soft interface to check
+ *
+ * This function is only using RCU for locking - the result can therefore be
+ * off when another functions is modifying the list at the same time. The
+ * caller can use the rtnl_lock to make sure that the count is accurate.
+ *
+ * Return: number of connected/enslaved hard interfaces
+ */
+static size_t batadv_hardif_cnt(const struct net_device *soft_iface)
+{
+	struct batadv_hard_iface *hard_iface;
+	size_t count = 0;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+		if (hard_iface->soft_iface != soft_iface)
+			continue;
+
+		count++;
+	}
+	rcu_read_unlock();
+
+	return count;
+}
+
+/**
  * batadv_hardif_disable_interface() - Remove hard interface from soft interface
  * @hard_iface: hard interface to be removed
  * @autodel: whether to delete soft interface when it doesn't contain any other
@@ -855,9 +862,6 @@
 	dev_remove_pack(&hard_iface->batman_adv_ptype);
 	batadv_hardif_put(hard_iface);
 
-	bat_priv->num_ifaces--;
-	batadv_orig_hash_del_if(hard_iface, bat_priv->num_ifaces);
-
 	primary_if = batadv_primary_if_get_selected(bat_priv);
 	if (hard_iface == primary_if) {
 		struct batadv_hard_iface *new_if;
@@ -881,7 +885,7 @@
 	batadv_hardif_recalc_extra_skbroom(hard_iface->soft_iface);
 
 	/* nobody uses this interface anymore */
-	if (bat_priv->num_ifaces == 0) {
+	if (batadv_hardif_cnt(hard_iface->soft_iface) <= 1) {
 		batadv_gw_check_client_stop(bat_priv);
 
 		if (autodel == BATADV_IF_CLEANUP_AUTO)
@@ -917,18 +921,16 @@
 	if (ret)
 		goto free_if;
 
-	hard_iface->if_num = 0;
 	hard_iface->net_dev = net_dev;
 	hard_iface->soft_iface = NULL;
 	hard_iface->if_status = BATADV_IF_NOT_IN_USE;
 
-	ret = batadv_debugfs_add_hardif(hard_iface);
-	if (ret)
-		goto free_sysfs;
+	batadv_debugfs_add_hardif(hard_iface);
 
 	INIT_LIST_HEAD(&hard_iface->list);
 	INIT_HLIST_HEAD(&hard_iface->neigh_list);
 
+	mutex_init(&hard_iface->bat_iv.ogm_buff_mutex);
 	spin_lock_init(&hard_iface->neigh_list_lock);
 	kref_init(&hard_iface->refcount);
 
@@ -942,11 +944,10 @@
 	batadv_check_known_mac_addr(hard_iface->net_dev);
 	kref_get(&hard_iface->refcount);
 	list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list);
+	batadv_hardif_generation++;
 
 	return hard_iface;
 
-free_sysfs:
-	batadv_sysfs_del_hardif(&hard_iface->hardif_obj);
 free_if:
 	kfree(hard_iface);
 release_dev:
@@ -984,6 +985,7 @@
 	list_for_each_entry_safe(hard_iface, hard_iface_tmp,
 				 &batadv_hardif_list, list) {
 		list_del_rcu(&hard_iface->list);
+		batadv_hardif_generation++;
 		batadv_hardif_remove_interface(hard_iface);
 	}
 	rtnl_unlock();
@@ -1045,6 +1047,7 @@
 	case NETDEV_UNREGISTER:
 	case NETDEV_PRE_TYPE_CHANGE:
 		list_del_rcu(&hard_iface->list);
+		batadv_hardif_generation++;
 
 		batadv_hardif_remove_interface(hard_iface);
 		break;

diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index d1c0f61..bbb8a6f 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_HARD_INTERFACE_H_
@@ -23,13 +11,12 @@
 
 #include <linux/compiler.h>
 #include <linux/kref.h>
+#include <linux/netdevice.h>
 #include <linux/notifier.h>
 #include <linux/rcupdate.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
-
-struct net_device;
-struct net;
+#include <net/net_namespace.h>
 
 /**
  * enum batadv_hard_if_state - State of a hard interface

diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index 7b49e40..a9d4e17 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c

@@ -1,19 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2019  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "hash.h"
@@ -32,6 +20,8 @@
 		INIT_HLIST_HEAD(&hash->table[i]);
 		spin_lock_init(&hash->list_locks[i]);
 	}
+
+	atomic_set(&hash->generation, 0);
 }
 
 /**

diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 9490a7c..57877f0 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2019  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_HASH_H_
@@ -21,15 +9,15 @@
 
 #include "main.h"
 
+#include <linux/atomic.h>
 #include <linux/compiler.h>
 #include <linux/list.h>
+#include <linux/lockdep.h>
 #include <linux/rculist.h>
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
 
-struct lock_class_key;
-
 /* callback to a compare function.  should compare 2 element datas for their
  * keys
  *
@@ -58,6 +46,9 @@
 
 	/** @size: size of hashtable */
 	u32 size;
+
+	/** @generation: current (generation) sequence number */
+	atomic_t generation;
 };
 
 /* allocates and clears the hash */
@@ -112,6 +103,7 @@
 
 	/* no duplicate found in list, add new element */
 	hlist_add_head_rcu(data_node, head);
+	atomic_inc(&hash->generation);
 
 	ret = 0;
 
@@ -154,6 +146,7 @@
 
 		data_save = node;
 		hlist_del_rcu(node);
+		atomic_inc(&hash->generation);
 		break;
 	}
 	spin_unlock_bh(&hash->list_locks[index]);

diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 55c358a..0a70b66 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c

@@ -1,19 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "icmp_socket.h"
@@ -47,6 +35,7 @@
 #include <linux/wait.h>
 #include <uapi/linux/batadv_packet.h>
 
+#include "debugfs.h"
 #include "hard-interface.h"
 #include "log.h"
 #include "originator.h"
@@ -74,7 +63,9 @@
 	if (!try_module_get(THIS_MODULE))
 		return -EBUSY;
 
-	nonseekable_open(inode, file);
+	batadv_debugfs_deprecated(file, "");
+
+	stream_open(inode, file);
 
 	socket_client = kmalloc(sizeof(*socket_client), GFP_KERNEL);
 	if (!socket_client) {
@@ -144,7 +135,7 @@
 	if (!buf || count < sizeof(struct batadv_icmp_packet))
 		return -EINVAL;
 
-	if (!access_ok(VERIFY_WRITE, buf, count))
+	if (!access_ok(buf, count))
 		return -EFAULT;
 
 	error = wait_event_interruptible(socket_client->queue_wait,
@@ -323,25 +314,11 @@
 /**
  * batadv_socket_setup() - Create debugfs "socket" file
  * @bat_priv: the bat priv with all the soft interface information
- *
- * Return: 0 on success or negative error number in case of failure
  */
-int batadv_socket_setup(struct batadv_priv *bat_priv)
+void batadv_socket_setup(struct batadv_priv *bat_priv)
 {
-	struct dentry *d;
-
-	if (!bat_priv->debug_dir)
-		goto err;
-
-	d = debugfs_create_file(BATADV_ICMP_SOCKET, 0600, bat_priv->debug_dir,
-				bat_priv, &batadv_fops);
-	if (!d)
-		goto err;
-
-	return 0;
-
-err:
-	return -ENOMEM;
+	debugfs_create_file(BATADV_ICMP_SOCKET, 0600, bat_priv->debug_dir,
+			    bat_priv, &batadv_fops);
 }
 
 /**

diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index 958be22..27fafff 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_ICMP_SOCKET_H_
@@ -22,12 +10,11 @@
 #include "main.h"
 
 #include <linux/types.h>
-
-struct batadv_icmp_header;
+#include <uapi/linux/batadv_packet.h>
 
 #define BATADV_ICMP_SOCKET "socket"
 
-int batadv_socket_setup(struct batadv_priv *bat_priv);
+void batadv_socket_setup(struct batadv_priv *bat_priv);
 
 #ifdef CONFIG_BATMAN_ADV_DEBUGFS
 

diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index 853773e..11941cf 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c

@@ -1,19 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "log.h"
@@ -40,6 +28,11 @@
 #include <linux/wait.h>
 #include <stdarg.h>
 
+#include "debugfs.h"
+#include "trace.h"
+
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+
 #define BATADV_LOG_BUFF_MASK (batadv_log_buff_len - 1)
 
 static const int batadv_log_buff_len = BATADV_LOG_BUF_LEN;
@@ -89,33 +82,15 @@
 	return 0;
 }
 
-/**
- * batadv_debug_log() - Add debug log entry
- * @bat_priv: the bat priv with all the soft interface information
- * @fmt: format string
- *
- * Return: 0 on success or negative error number in case of failure
- */
-int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
-{
-	va_list args;
-	char tmp_log_buf[256];
-
-	va_start(args, fmt);
-	vscnprintf(tmp_log_buf, sizeof(tmp_log_buf), fmt, args);
-	batadv_fdebug_log(bat_priv->debug_log, "[%10u] %s",
-			  jiffies_to_msecs(jiffies), tmp_log_buf);
-	va_end(args);
-
-	return 0;
-}
-
 static int batadv_log_open(struct inode *inode, struct file *file)
 {
 	if (!try_module_get(THIS_MODULE))
 		return -EBUSY;
 
-	nonseekable_open(inode, file);
+	batadv_debugfs_deprecated(file,
+				  "Use tracepoint batadv:batadv_dbg instead\n");
+
+	stream_open(inode, file);
 	file->private_data = inode->i_private;
 	return 0;
 }
@@ -149,7 +124,7 @@
 	if (count == 0)
 		return 0;
 
-	if (!access_ok(VERIFY_WRITE, buf, count))
+	if (!access_ok(buf, count))
 		return -EFAULT;
 
 	error = wait_event_interruptible(debug_log->queue_wait,
@@ -215,27 +190,16 @@
  */
 int batadv_debug_log_setup(struct batadv_priv *bat_priv)
 {
-	struct dentry *d;
-
-	if (!bat_priv->debug_dir)
-		goto err;
-
 	bat_priv->debug_log = kzalloc(sizeof(*bat_priv->debug_log), GFP_ATOMIC);
 	if (!bat_priv->debug_log)
-		goto err;
+		return -ENOMEM;
 
 	spin_lock_init(&bat_priv->debug_log->lock);
 	init_waitqueue_head(&bat_priv->debug_log->queue_wait);
 
-	d = debugfs_create_file("log", 0400, bat_priv->debug_dir, bat_priv,
-				&batadv_log_fops);
-	if (!d)
-		goto err;
-
+	debugfs_create_file("log", 0400, bat_priv->debug_dir, bat_priv,
+			    &batadv_log_fops);
 	return 0;
-
-err:
-	return -ENOMEM;
 }
 
 /**
@@ -247,3 +211,34 @@
 	kfree(bat_priv->debug_log);
 	bat_priv->debug_log = NULL;
 }
+
+#endif /* CONFIG_BATMAN_ADV_DEBUGFS */
+
+/**
+ * batadv_debug_log() - Add debug log entry
+ * @bat_priv: the bat priv with all the soft interface information
+ * @fmt: format string
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+	batadv_fdebug_log(bat_priv->debug_log, "[%10u] %pV",
+			  jiffies_to_msecs(jiffies), &vaf);
+#endif
+
+	trace_batadv_dbg(bat_priv, &vaf);
+
+	va_end(args);
+
+	return 0;
+}

diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
index 35f4f39..741cfa3 100644
--- a/net/batman-adv/log.h
+++ b/net/batman-adv/log.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_LOG_H_
@@ -21,6 +9,7 @@
 
 #include "main.h"
 
+#include <linux/atomic.h>
 #include <linux/bitops.h>
 #include <linux/compiler.h>
 #include <linux/printk.h>

diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 69c0d85..4a89177 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c

@@ -1,19 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "main.h"
@@ -22,6 +10,7 @@
 #include <linux/build_bug.h>
 #include <linux/byteorder/generic.h>
 #include <linux/crc32c.h>
+#include <linux/device.h>
 #include <linux/errno.h>
 #include <linux/genetlink.h>
 #include <linux/gfp.h>
@@ -31,6 +20,7 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/kernel.h>
+#include <linux/kobject.h>
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/module.h>
@@ -40,6 +30,7 @@
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
@@ -74,6 +65,7 @@
  * list traversals just rcu-locked
  */
 struct list_head batadv_hardif_list;
+unsigned int batadv_hardif_generation;
 static int (*batadv_rx_handler[256])(struct sk_buff *skb,
 				     struct batadv_hard_iface *recv_if);
 
@@ -83,6 +75,22 @@
 
 static void batadv_recv_handler_init(void);
 
+#define BATADV_UEV_TYPE_VAR	"BATTYPE="
+#define BATADV_UEV_ACTION_VAR	"BATACTION="
+#define BATADV_UEV_DATA_VAR	"BATDATA="
+
+static char *batadv_uev_action_str[] = {
+	"add",
+	"del",
+	"change",
+	"loopdetect",
+};
+
+static char *batadv_uev_type_str[] = {
+	"gw",
+	"bla",
+};
+
 static int __init batadv_init(void)
 {
 	int ret;
@@ -160,6 +168,7 @@
 	spin_lock_init(&bat_priv->tt.commit_lock);
 	spin_lock_init(&bat_priv->gw.list_lock);
 #ifdef CONFIG_BATMAN_ADV_MCAST
+	spin_lock_init(&bat_priv->mcast.mla_lock);
 	spin_lock_init(&bat_priv->mcast.want_lists_lock);
 #endif
 	spin_lock_init(&bat_priv->tvlv.container_list_lock);
@@ -186,6 +195,8 @@
 	INIT_HLIST_HEAD(&bat_priv->softif_vlan_list);
 	INIT_HLIST_HEAD(&bat_priv->tp_list);
 
+	bat_priv->gw.generation = 0;
+
 	ret = batadv_v_mesh_init(bat_priv);
 	if (ret < 0)
 		goto err;
@@ -675,6 +686,60 @@
 	return ap_isolation_enabled;
 }
 
+/**
+ * batadv_throw_uevent() - Send an uevent with batman-adv specific env data
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: subsystem type of event. Stored in uevent's BATTYPE
+ * @action: action type of event. Stored in uevent's BATACTION
+ * @data: string with additional information to the event (ignored for
+ *  BATADV_UEV_DEL). Stored in uevent's BATDATA
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type,
+			enum batadv_uev_action action, const char *data)
+{
+	int ret = -ENOMEM;
+	struct kobject *bat_kobj;
+	char *uevent_env[4] = { NULL, NULL, NULL, NULL };
+
+	bat_kobj = &bat_priv->soft_iface->dev.kobj;
+
+	uevent_env[0] = kasprintf(GFP_ATOMIC,
+				  "%s%s", BATADV_UEV_TYPE_VAR,
+				  batadv_uev_type_str[type]);
+	if (!uevent_env[0])
+		goto out;
+
+	uevent_env[1] = kasprintf(GFP_ATOMIC,
+				  "%s%s", BATADV_UEV_ACTION_VAR,
+				  batadv_uev_action_str[action]);
+	if (!uevent_env[1])
+		goto out;
+
+	/* If the event is DEL, ignore the data field */
+	if (action != BATADV_UEV_DEL) {
+		uevent_env[2] = kasprintf(GFP_ATOMIC,
+					  "%s%s", BATADV_UEV_DATA_VAR, data);
+		if (!uevent_env[2])
+			goto out;
+	}
+
+	ret = kobject_uevent_env(bat_kobj, KOBJ_CHANGE, uevent_env);
+out:
+	kfree(uevent_env[0]);
+	kfree(uevent_env[1]);
+	kfree(uevent_env[2]);
+
+	if (ret)
+		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+			   "Impossible to send uevent for (%s,%s,%s) event (err: %d)\n",
+			   batadv_uev_type_str[type],
+			   batadv_uev_action_str[action],
+			   (action == BATADV_UEV_DEL ? "NULL" : data), ret);
+	return ret;
+}
+
 module_init(batadv_init);
 module_exit(batadv_exit);
 

diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 3ccc75e..6967f2e 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_MAIN_H_
@@ -25,7 +13,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2018.3"
+#define BATADV_SOURCE_VERSION "2019.4"
 #endif
 
 /* B.A.T.M.A.N. parameters */
@@ -217,20 +205,20 @@
 
 /* Kernel headers */
 
+#include <linux/atomic.h>
 #include <linux/compiler.h>
 #include <linux/etherdevice.h>
 #include <linux/if_vlan.h>
 #include <linux/jiffies.h>
+#include <linux/netdevice.h>
 #include <linux/percpu.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/types.h>
 #include <uapi/linux/batadv_packet.h>
 
 #include "types.h"
-
-struct net_device;
-struct packet_type;
-struct seq_file;
-struct sk_buff;
+#include "main.h"
 
 /**
  * batadv_print_vid() - return printable version of vid information
@@ -247,6 +235,7 @@
 }
 
 extern struct list_head batadv_hardif_list;
+extern unsigned int batadv_hardif_generation;
 
 extern unsigned char batadv_broadcast_addr[];
 extern struct workqueue_struct *batadv_event_workqueue;
@@ -393,5 +382,7 @@
 
 unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len);
 bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid);
+int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type,
+			enum batadv_uev_action action, const char *data);
 
 #endif /* _NET_BATMAN_ADV_MAIN_H_ */

diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 86725d7..1d5bdf3 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c

@@ -1,19 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2014-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2019  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "multicast.h"
@@ -32,6 +20,7 @@
 #include <linux/igmp.h>
 #include <linux/in.h>
 #include <linux/in6.h>
+#include <linux/inetdevice.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/jiffies.h>
@@ -66,6 +55,7 @@
 #include "hash.h"
 #include "log.h"
 #include "netlink.h"
+#include "send.h"
 #include "soft-interface.h"
 #include "translation-table.h"
 #include "tvlv.h"
@@ -109,87 +99,236 @@
 }
 
 /**
- * batadv_mcast_addr_is_ipv4() - check if multicast MAC is IPv4
- * @addr: the MAC address to check
+ * batadv_mcast_mla_rtr_flags_softif_get_ipv4() - get mcast router flags from
+ *  node for IPv4
+ * @dev: the interface to check
  *
- * Return: True, if MAC address is one reserved for IPv4 multicast, false
- * otherwise.
+ * Checks the presence of an IPv4 multicast router on this node.
+ *
+ * Caller needs to hold rcu read lock.
+ *
+ * Return: BATADV_NO_FLAGS if present, BATADV_MCAST_WANT_NO_RTR4 otherwise.
  */
-static bool batadv_mcast_addr_is_ipv4(const u8 *addr)
+static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv4(struct net_device *dev)
 {
-	static const u8 prefix[] = {0x01, 0x00, 0x5E};
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
 
-	return memcmp(prefix, addr, sizeof(prefix)) == 0;
+	if (in_dev && IN_DEV_MFORWARD(in_dev))
+		return BATADV_NO_FLAGS;
+	else
+		return BATADV_MCAST_WANT_NO_RTR4;
 }
 
 /**
- * batadv_mcast_addr_is_ipv6() - check if multicast MAC is IPv6
- * @addr: the MAC address to check
+ * batadv_mcast_mla_rtr_flags_softif_get_ipv6() - get mcast router flags from
+ *  node for IPv6
+ * @dev: the interface to check
  *
- * Return: True, if MAC address is one reserved for IPv6 multicast, false
- * otherwise.
+ * Checks the presence of an IPv6 multicast router on this node.
+ *
+ * Caller needs to hold rcu read lock.
+ *
+ * Return: BATADV_NO_FLAGS if present, BATADV_MCAST_WANT_NO_RTR6 otherwise.
  */
-static bool batadv_mcast_addr_is_ipv6(const u8 *addr)
+#if IS_ENABLED(CONFIG_IPV6_MROUTE)
+static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev)
 {
-	static const u8 prefix[] = {0x33, 0x33};
+	struct inet6_dev *in6_dev = __in6_dev_get(dev);
 
-	return memcmp(prefix, addr, sizeof(prefix)) == 0;
+	if (in6_dev && in6_dev->cnf.mc_forwarding)
+		return BATADV_NO_FLAGS;
+	else
+		return BATADV_MCAST_WANT_NO_RTR6;
 }
+#else
+static inline u8
+batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev)
+{
+	return BATADV_MCAST_WANT_NO_RTR6;
+}
+#endif
 
 /**
- * batadv_mcast_mla_softif_get() - get softif multicast listeners
+ * batadv_mcast_mla_rtr_flags_softif_get() - get mcast router flags from node
  * @bat_priv: the bat priv with all the soft interface information
- * @dev: the device to collect multicast addresses from
- * @mcast_list: a list to put found addresses into
+ * @bridge: bridge interface on top of the soft_iface if present,
+ *  otherwise pass NULL
  *
- * Collects multicast addresses of multicast listeners residing
- * on this kernel on the given soft interface, dev, in
- * the given mcast_list. In general, multicast listeners provided by
- * your multicast receiving applications run directly on this node.
+ * Checks the presence of IPv4 and IPv6 multicast routers on this
+ * node.
  *
- * If there is a bridge interface on top of dev, collects from that one
- * instead. Just like with IP addresses and routes, multicast listeners
- * will(/should) register to the bridge interface instead of an
- * enslaved bat0.
- *
- * Return: -ENOMEM on memory allocation error or the number of
- * items added to the mcast_list otherwise.
+ * Return:
+ *	BATADV_NO_FLAGS: Both an IPv4 and IPv6 multicast router is present
+ *	BATADV_MCAST_WANT_NO_RTR4: No IPv4 multicast router is present
+ *	BATADV_MCAST_WANT_NO_RTR6: No IPv6 multicast router is present
+ *	The former two OR'd: no multicast router is present
  */
-static int batadv_mcast_mla_softif_get(struct batadv_priv *bat_priv,
-				       struct net_device *dev,
-				       struct hlist_head *mcast_list)
+static u8 batadv_mcast_mla_rtr_flags_softif_get(struct batadv_priv *bat_priv,
+						struct net_device *bridge)
 {
-	bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4;
-	bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6;
-	struct net_device *bridge = batadv_mcast_get_bridge(dev);
-	struct netdev_hw_addr *mc_list_entry;
-	struct batadv_hw_addr *new;
-	int ret = 0;
+	struct net_device *dev = bridge ? bridge : bat_priv->soft_iface;
+	u8 flags = BATADV_NO_FLAGS;
 
-	netif_addr_lock_bh(bridge ? bridge : dev);
-	netdev_for_each_mc_addr(mc_list_entry, bridge ? bridge : dev) {
-		if (all_ipv4 && batadv_mcast_addr_is_ipv4(mc_list_entry->addr))
-			continue;
+	rcu_read_lock();
 
-		if (all_ipv6 && batadv_mcast_addr_is_ipv6(mc_list_entry->addr))
-			continue;
+	flags |= batadv_mcast_mla_rtr_flags_softif_get_ipv4(dev);
+	flags |= batadv_mcast_mla_rtr_flags_softif_get_ipv6(dev);
 
-		new = kmalloc(sizeof(*new), GFP_ATOMIC);
-		if (!new) {
-			ret = -ENOMEM;
-			break;
-		}
+	rcu_read_unlock();
 
-		ether_addr_copy(new->addr, mc_list_entry->addr);
-		hlist_add_head(&new->list, mcast_list);
-		ret++;
+	return flags;
+}
+
+/**
+ * batadv_mcast_mla_rtr_flags_bridge_get() - get mcast router flags from bridge
+ * @bat_priv: the bat priv with all the soft interface information
+ * @bridge: bridge interface on top of the soft_iface if present,
+ *  otherwise pass NULL
+ *
+ * Checks the presence of IPv4 and IPv6 multicast routers behind a bridge.
+ *
+ * Return:
+ *	BATADV_NO_FLAGS: Both an IPv4 and IPv6 multicast router is present
+ *	BATADV_MCAST_WANT_NO_RTR4: No IPv4 multicast router is present
+ *	BATADV_MCAST_WANT_NO_RTR6: No IPv6 multicast router is present
+ *	The former two OR'd: no multicast router is present
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+static u8 batadv_mcast_mla_rtr_flags_bridge_get(struct batadv_priv *bat_priv,
+						struct net_device *bridge)
+{
+	struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list);
+	struct net_device *dev = bat_priv->soft_iface;
+	struct br_ip_list *br_ip_entry, *tmp;
+	u8 flags = BATADV_MCAST_WANT_NO_RTR6;
+	int ret;
+
+	if (!bridge)
+		return BATADV_MCAST_WANT_NO_RTR4 | BATADV_MCAST_WANT_NO_RTR6;
+
+	/* TODO: ask the bridge if a multicast router is present (the bridge
+	 * is capable of performing proper RFC4286 multicast multicast router
+	 * discovery) instead of searching for a ff02::2 listener here
+	 */
+	ret = br_multicast_list_adjacent(dev, &bridge_mcast_list);
+	if (ret < 0)
+		return BATADV_NO_FLAGS;
+
+	list_for_each_entry_safe(br_ip_entry, tmp, &bridge_mcast_list, list) {
+		/* the bridge snooping does not maintain IPv4 link-local
+		 * addresses - therefore we won't find any IPv4 multicast router
+		 * address here, only IPv6 ones
+		 */
+		if (br_ip_entry->addr.proto == htons(ETH_P_IPV6) &&
+		    ipv6_addr_is_ll_all_routers(&br_ip_entry->addr.u.ip6))
+			flags &= ~BATADV_MCAST_WANT_NO_RTR6;
+
+		list_del(&br_ip_entry->list);
+		kfree(br_ip_entry);
 	}
-	netif_addr_unlock_bh(bridge ? bridge : dev);
 
+	return flags;
+}
+#else
+static inline u8
+batadv_mcast_mla_rtr_flags_bridge_get(struct batadv_priv *bat_priv,
+				      struct net_device *bridge)
+{
 	if (bridge)
-		dev_put(bridge);
+		return BATADV_NO_FLAGS;
+	else
+		return BATADV_MCAST_WANT_NO_RTR4 | BATADV_MCAST_WANT_NO_RTR6;
+}
+#endif
 
-	return ret;
+/**
+ * batadv_mcast_mla_rtr_flags_get() - get multicast router flags
+ * @bat_priv: the bat priv with all the soft interface information
+ * @bridge: bridge interface on top of the soft_iface if present,
+ *  otherwise pass NULL
+ *
+ * Checks the presence of IPv4 and IPv6 multicast routers on this
+ * node or behind its bridge.
+ *
+ * Return:
+ *	BATADV_NO_FLAGS: Both an IPv4 and IPv6 multicast router is present
+ *	BATADV_MCAST_WANT_NO_RTR4: No IPv4 multicast router is present
+ *	BATADV_MCAST_WANT_NO_RTR6: No IPv6 multicast router is present
+ *	The former two OR'd: no multicast router is present
+ */
+static u8 batadv_mcast_mla_rtr_flags_get(struct batadv_priv *bat_priv,
+					 struct net_device *bridge)
+{
+	u8 flags = BATADV_MCAST_WANT_NO_RTR4 | BATADV_MCAST_WANT_NO_RTR6;
+
+	flags &= batadv_mcast_mla_rtr_flags_softif_get(bat_priv, bridge);
+	flags &= batadv_mcast_mla_rtr_flags_bridge_get(bat_priv, bridge);
+
+	return flags;
+}
+
+/**
+ * batadv_mcast_mla_flags_get() - get the new multicast flags
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: A set of flags for the current/next TVLV, querier and
+ * bridge state.
+ */
+static struct batadv_mcast_mla_flags
+batadv_mcast_mla_flags_get(struct batadv_priv *bat_priv)
+{
+	struct net_device *dev = bat_priv->soft_iface;
+	struct batadv_mcast_querier_state *qr4, *qr6;
+	struct batadv_mcast_mla_flags mla_flags;
+	struct net_device *bridge;
+
+	bridge = batadv_mcast_get_bridge(dev);
+
+	memset(&mla_flags, 0, sizeof(mla_flags));
+	mla_flags.enabled = 1;
+	mla_flags.tvlv_flags |= batadv_mcast_mla_rtr_flags_get(bat_priv,
+							       bridge);
+
+	if (!bridge)
+		return mla_flags;
+
+	dev_put(bridge);
+
+	mla_flags.bridged = 1;
+	qr4 = &mla_flags.querier_ipv4;
+	qr6 = &mla_flags.querier_ipv6;
+
+	if (!IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING))
+		pr_warn_once("No bridge IGMP snooping compiled - multicast optimizations disabled\n");
+
+	qr4->exists = br_multicast_has_querier_anywhere(dev, ETH_P_IP);
+	qr4->shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IP);
+
+	qr6->exists = br_multicast_has_querier_anywhere(dev, ETH_P_IPV6);
+	qr6->shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IPV6);
+
+	mla_flags.tvlv_flags |= BATADV_MCAST_WANT_ALL_UNSNOOPABLES;
+
+	/* 1) If no querier exists at all, then multicast listeners on
+	 *    our local TT clients behind the bridge will keep silent.
+	 * 2) If the selected querier is on one of our local TT clients,
+	 *    behind the bridge, then this querier might shadow multicast
+	 *    listeners on our local TT clients, behind this bridge.
+	 *
+	 * In both cases, we will signalize other batman nodes that
+	 * we need all multicast traffic of the according protocol.
+	 */
+	if (!qr4->exists || qr4->shadowing) {
+		mla_flags.tvlv_flags |= BATADV_MCAST_WANT_ALL_IPV4;
+		mla_flags.tvlv_flags &= ~BATADV_MCAST_WANT_NO_RTR4;
+	}
+
+	if (!qr6->exists || qr6->shadowing) {
+		mla_flags.tvlv_flags |= BATADV_MCAST_WANT_ALL_IPV6;
+		mla_flags.tvlv_flags &= ~BATADV_MCAST_WANT_NO_RTR6;
+	}
+
+	return mla_flags;
 }
 
 /**
@@ -213,6 +352,201 @@
 }
 
 /**
+ * batadv_mcast_mla_softif_get_ipv4() - get softif IPv4 multicast listeners
+ * @dev: the device to collect multicast addresses from
+ * @mcast_list: a list to put found addresses into
+ * @flags: flags indicating the new multicast state
+ *
+ * Collects multicast addresses of IPv4 multicast listeners residing
+ * on this kernel on the given soft interface, dev, in
+ * the given mcast_list. In general, multicast listeners provided by
+ * your multicast receiving applications run directly on this node.
+ *
+ * Return: -ENOMEM on memory allocation error or the number of
+ * items added to the mcast_list otherwise.
+ */
+static int
+batadv_mcast_mla_softif_get_ipv4(struct net_device *dev,
+				 struct hlist_head *mcast_list,
+				 struct batadv_mcast_mla_flags *flags)
+{
+	struct batadv_hw_addr *new;
+	struct in_device *in_dev;
+	u8 mcast_addr[ETH_ALEN];
+	struct ip_mc_list *pmc;
+	int ret = 0;
+
+	if (flags->tvlv_flags & BATADV_MCAST_WANT_ALL_IPV4)
+		return 0;
+
+	rcu_read_lock();
+
+	in_dev = __in_dev_get_rcu(dev);
+	if (!in_dev) {
+		rcu_read_unlock();
+		return 0;
+	}
+
+	for (pmc = rcu_dereference(in_dev->mc_list); pmc;
+	     pmc = rcu_dereference(pmc->next_rcu)) {
+		if (flags->tvlv_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES &&
+		    ipv4_is_local_multicast(pmc->multiaddr))
+			continue;
+
+		if (!(flags->tvlv_flags & BATADV_MCAST_WANT_NO_RTR4) &&
+		    !ipv4_is_local_multicast(pmc->multiaddr))
+			continue;
+
+		ip_eth_mc_map(pmc->multiaddr, mcast_addr);
+
+		if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list))
+			continue;
+
+		new = kmalloc(sizeof(*new), GFP_ATOMIC);
+		if (!new) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		ether_addr_copy(new->addr, mcast_addr);
+		hlist_add_head(&new->list, mcast_list);
+		ret++;
+	}
+	rcu_read_unlock();
+
+	return ret;
+}
+
+/**
+ * batadv_mcast_mla_softif_get_ipv6() - get softif IPv6 multicast listeners
+ * @dev: the device to collect multicast addresses from
+ * @mcast_list: a list to put found addresses into
+ * @flags: flags indicating the new multicast state
+ *
+ * Collects multicast addresses of IPv6 multicast listeners residing
+ * on this kernel on the given soft interface, dev, in
+ * the given mcast_list. In general, multicast listeners provided by
+ * your multicast receiving applications run directly on this node.
+ *
+ * Return: -ENOMEM on memory allocation error or the number of
+ * items added to the mcast_list otherwise.
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+static int
+batadv_mcast_mla_softif_get_ipv6(struct net_device *dev,
+				 struct hlist_head *mcast_list,
+				 struct batadv_mcast_mla_flags *flags)
+{
+	struct batadv_hw_addr *new;
+	struct inet6_dev *in6_dev;
+	u8 mcast_addr[ETH_ALEN];
+	struct ifmcaddr6 *pmc6;
+	int ret = 0;
+
+	if (flags->tvlv_flags & BATADV_MCAST_WANT_ALL_IPV6)
+		return 0;
+
+	rcu_read_lock();
+
+	in6_dev = __in6_dev_get(dev);
+	if (!in6_dev) {
+		rcu_read_unlock();
+		return 0;
+	}
+
+	read_lock_bh(&in6_dev->lock);
+	for (pmc6 = in6_dev->mc_list; pmc6; pmc6 = pmc6->next) {
+		if (IPV6_ADDR_MC_SCOPE(&pmc6->mca_addr) <
+		    IPV6_ADDR_SCOPE_LINKLOCAL)
+			continue;
+
+		if (flags->tvlv_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES &&
+		    ipv6_addr_is_ll_all_nodes(&pmc6->mca_addr))
+			continue;
+
+		if (!(flags->tvlv_flags & BATADV_MCAST_WANT_NO_RTR6) &&
+		    IPV6_ADDR_MC_SCOPE(&pmc6->mca_addr) >
+		    IPV6_ADDR_SCOPE_LINKLOCAL)
+			continue;
+
+		ipv6_eth_mc_map(&pmc6->mca_addr, mcast_addr);
+
+		if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list))
+			continue;
+
+		new = kmalloc(sizeof(*new), GFP_ATOMIC);
+		if (!new) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		ether_addr_copy(new->addr, mcast_addr);
+		hlist_add_head(&new->list, mcast_list);
+		ret++;
+	}
+	read_unlock_bh(&in6_dev->lock);
+	rcu_read_unlock();
+
+	return ret;
+}
+#else
+static inline int
+batadv_mcast_mla_softif_get_ipv6(struct net_device *dev,
+				 struct hlist_head *mcast_list,
+				 struct batadv_mcast_mla_flags *flags)
+{
+	return 0;
+}
+#endif
+
+/**
+ * batadv_mcast_mla_softif_get() - get softif multicast listeners
+ * @dev: the device to collect multicast addresses from
+ * @mcast_list: a list to put found addresses into
+ * @flags: flags indicating the new multicast state
+ *
+ * Collects multicast addresses of multicast listeners residing
+ * on this kernel on the given soft interface, dev, in
+ * the given mcast_list. In general, multicast listeners provided by
+ * your multicast receiving applications run directly on this node.
+ *
+ * If there is a bridge interface on top of dev, collects from that one
+ * instead. Just like with IP addresses and routes, multicast listeners
+ * will(/should) register to the bridge interface instead of an
+ * enslaved bat0.
+ *
+ * Return: -ENOMEM on memory allocation error or the number of
+ * items added to the mcast_list otherwise.
+ */
+static int
+batadv_mcast_mla_softif_get(struct net_device *dev,
+			    struct hlist_head *mcast_list,
+			    struct batadv_mcast_mla_flags *flags)
+{
+	struct net_device *bridge = batadv_mcast_get_bridge(dev);
+	int ret4, ret6 = 0;
+
+	if (bridge)
+		dev = bridge;
+
+	ret4 = batadv_mcast_mla_softif_get_ipv4(dev, mcast_list, flags);
+	if (ret4 < 0)
+		goto out;
+
+	ret6 = batadv_mcast_mla_softif_get_ipv6(dev, mcast_list, flags);
+	if (ret6 < 0) {
+		ret4 = 0;
+		goto out;
+	}
+
+out:
+	if (bridge)
+		dev_put(bridge);
+
+	return ret4 + ret6;
+}
+
+/**
  * batadv_mcast_mla_br_addr_cpy() - copy a bridge multicast address
  * @dst: destination to write to - a multicast MAC address
  * @src: source to read from - a multicast IP address
@@ -238,9 +572,9 @@
 
 /**
  * batadv_mcast_mla_bridge_get() - get bridged-in multicast listeners
- * @bat_priv: the bat priv with all the soft interface information
  * @dev: a bridge slave whose bridge to collect multicast addresses from
  * @mcast_list: a list to put found addresses into
+ * @flags: flags indicating the new multicast state
  *
  * Collects multicast addresses of multicast listeners residing
  * on foreign, non-mesh devices which we gave access to our mesh via
@@ -250,14 +584,13 @@
  * Return: -ENOMEM on memory allocation error or the number of
  * items added to the mcast_list otherwise.
  */
-static int batadv_mcast_mla_bridge_get(struct batadv_priv *bat_priv,
-				       struct net_device *dev,
-				       struct hlist_head *mcast_list)
+static int batadv_mcast_mla_bridge_get(struct net_device *dev,
+				       struct hlist_head *mcast_list,
+				       struct batadv_mcast_mla_flags *flags)
 {
 	struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list);
-	bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4;
-	bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6;
 	struct br_ip_list *br_ip_entry, *tmp;
+	u8 tvlv_flags = flags->tvlv_flags;
 	struct batadv_hw_addr *new;
 	u8 mcast_addr[ETH_ALEN];
 	int ret;
@@ -270,11 +603,34 @@
 		goto out;
 
 	list_for_each_entry(br_ip_entry, &bridge_mcast_list, list) {
-		if (all_ipv4 && br_ip_entry->addr.proto == htons(ETH_P_IP))
-			continue;
+		if (br_ip_entry->addr.proto == htons(ETH_P_IP)) {
+			if (tvlv_flags & BATADV_MCAST_WANT_ALL_IPV4)
+				continue;
 
-		if (all_ipv6 && br_ip_entry->addr.proto == htons(ETH_P_IPV6))
-			continue;
+			if (tvlv_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES &&
+			    ipv4_is_local_multicast(br_ip_entry->addr.u.ip4))
+				continue;
+
+			if (!(tvlv_flags & BATADV_MCAST_WANT_NO_RTR4) &&
+			    !ipv4_is_local_multicast(br_ip_entry->addr.u.ip4))
+				continue;
+		}
+
+#if IS_ENABLED(CONFIG_IPV6)
+		if (br_ip_entry->addr.proto == htons(ETH_P_IPV6)) {
+			if (tvlv_flags & BATADV_MCAST_WANT_ALL_IPV6)
+				continue;
+
+			if (tvlv_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES &&
+			    ipv6_addr_is_ll_all_nodes(&br_ip_entry->addr.u.ip6))
+				continue;
+
+			if (!(tvlv_flags & BATADV_MCAST_WANT_NO_RTR6) &&
+			    IPV6_ADDR_MC_SCOPE(&br_ip_entry->addr.u.ip6) >
+			    IPV6_ADDR_SCOPE_LINKLOCAL)
+				continue;
+		}
+#endif
 
 		batadv_mcast_mla_br_addr_cpy(mcast_addr, &br_ip_entry->addr);
 		if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list))
@@ -325,8 +681,6 @@
  * translation table except the ones listed in the given mcast_list.
  *
  * If mcast_list is NULL then all are retracted.
- *
- * Do not call outside of the mcast worker! (or cancel mcast worker first)
  */
 static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv,
 					struct hlist_head *mcast_list)
@@ -334,8 +688,6 @@
 	struct batadv_hw_addr *mcast_entry;
 	struct hlist_node *tmp;
 
-	WARN_ON(delayed_work_pending(&bat_priv->mcast.work));
-
 	hlist_for_each_entry_safe(mcast_entry, tmp, &bat_priv->mcast.mla_list,
 				  list) {
 		if (mcast_list &&
@@ -359,8 +711,6 @@
  *
  * Adds multicast listener announcements from the given mcast_list to the
  * translation table if they have not been added yet.
- *
- * Do not call outside of the mcast worker! (or cancel mcast worker first)
  */
 static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv,
 				    struct hlist_head *mcast_list)
@@ -368,8 +718,6 @@
 	struct batadv_hw_addr *mcast_entry;
 	struct hlist_node *tmp;
 
-	WARN_ON(delayed_work_pending(&bat_priv->mcast.work));
-
 	if (!mcast_list)
 		return;
 
@@ -389,27 +737,6 @@
 }
 
 /**
- * batadv_mcast_has_bridge() - check whether the soft-iface is bridged
- * @bat_priv: the bat priv with all the soft interface information
- *
- * Checks whether there is a bridge on top of our soft interface.
- *
- * Return: true if there is a bridge, false otherwise.
- */
-static bool batadv_mcast_has_bridge(struct batadv_priv *bat_priv)
-{
-	struct net_device *upper = bat_priv->soft_iface;
-
-	rcu_read_lock();
-	do {
-		upper = netdev_master_upper_dev_get_rcu(upper);
-	} while (upper && !(upper->priv_flags & IFF_EBRIDGE));
-	rcu_read_unlock();
-
-	return upper;
-}
-
-/**
  * batadv_mcast_querier_log() - debug output regarding the querier status on
  *  link
  * @bat_priv: the bat priv with all the soft interface information
@@ -443,7 +770,7 @@
 		batadv_info(bat_priv->soft_iface,
 			    "%s Querier disappeared - multicast optimizations disabled\n",
 			    str_proto);
-	else if (!bat_priv->mcast.bridged && !new_state->exists)
+	else if (!bat_priv->mcast.mla_flags.bridged && !new_state->exists)
 		batadv_info(bat_priv->soft_iface,
 			    "No %s Querier present - multicast optimizations disabled\n",
 			    str_proto);
@@ -465,9 +792,7 @@
  * batadv_mcast_bridge_log() - debug output for topology changes in bridged
  *  setups
  * @bat_priv: the bat priv with all the soft interface information
- * @bridged: a flag about whether the soft interface is currently bridged or not
- * @querier_ipv4: (maybe) new status of a potential, selected IGMP querier
- * @querier_ipv6: (maybe) new status of a potential, selected MLD querier
+ * @new_flags: flags indicating the new multicast state
  *
  * If no bridges are ever used on this node, then this function does nothing.
  *
@@ -480,126 +805,86 @@
  * multicast flags this node is going to set.
  */
 static void
-batadv_mcast_bridge_log(struct batadv_priv *bat_priv, bool bridged,
-			struct batadv_mcast_querier_state *querier_ipv4,
-			struct batadv_mcast_querier_state *querier_ipv6)
+batadv_mcast_bridge_log(struct batadv_priv *bat_priv,
+			struct batadv_mcast_mla_flags *new_flags)
 {
-	if (!bat_priv->mcast.bridged && bridged)
+	struct batadv_mcast_mla_flags *old_flags = &bat_priv->mcast.mla_flags;
+
+	if (!old_flags->bridged && new_flags->bridged)
 		batadv_dbg(BATADV_DBG_MCAST, bat_priv,
 			   "Bridge added: Setting Unsnoopables(U)-flag\n");
-	else if (bat_priv->mcast.bridged && !bridged)
+	else if (old_flags->bridged && !new_flags->bridged)
 		batadv_dbg(BATADV_DBG_MCAST, bat_priv,
 			   "Bridge removed: Unsetting Unsnoopables(U)-flag\n");
 
-	if (bridged) {
+	if (new_flags->bridged) {
 		batadv_mcast_querier_log(bat_priv, "IGMP",
-					 &bat_priv->mcast.querier_ipv4,
-					 querier_ipv4);
+					 &old_flags->querier_ipv4,
+					 &new_flags->querier_ipv4);
 		batadv_mcast_querier_log(bat_priv, "MLD",
-					 &bat_priv->mcast.querier_ipv6,
-					 querier_ipv6);
+					 &old_flags->querier_ipv6,
+					 &new_flags->querier_ipv6);
 	}
 }
 
 /**
  * batadv_mcast_flags_logs() - output debug information about mcast flag changes
  * @bat_priv: the bat priv with all the soft interface information
- * @flags: flags indicating the new multicast state
+ * @flags: TVLV flags indicating the new multicast state
  *
- * Whenever the multicast flags this nodes announces changes (@mcast_flags vs.
- * bat_priv->mcast.flags), this notifies userspace via the 'mcast' log level.
+ * Whenever the multicast TVLV flags this nodes announces change this notifies
+ * userspace via the 'mcast' log level.
  */
 static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags)
 {
-	u8 old_flags = bat_priv->mcast.flags;
-	char str_old_flags[] = "[...]";
+	bool old_enabled = bat_priv->mcast.mla_flags.enabled;
+	u8 old_flags = bat_priv->mcast.mla_flags.tvlv_flags;
+	char str_old_flags[] = "[.... . ]";
 
-	sprintf(str_old_flags, "[%c%c%c]",
+	sprintf(str_old_flags, "[%c%c%c%s%s]",
 		(old_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
 		(old_flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
-		(old_flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.');
+		(old_flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.',
+		!(old_flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ",
+		!(old_flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ");
 
 	batadv_dbg(BATADV_DBG_MCAST, bat_priv,
-		   "Changing multicast flags from '%s' to '[%c%c%c]'\n",
-		   bat_priv->mcast.enabled ? str_old_flags : "<undefined>",
+		   "Changing multicast flags from '%s' to '[%c%c%c%s%s]'\n",
+		   old_enabled ? str_old_flags : "<undefined>",
 		   (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
 		   (flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
-		   (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.');
+		   (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.',
+		   !(flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ",
+		   !(flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ");
 }
 
 /**
- * batadv_mcast_mla_tvlv_update() - update multicast tvlv
+ * batadv_mcast_mla_flags_update() - update multicast flags
  * @bat_priv: the bat priv with all the soft interface information
+ * @flags: flags indicating the new multicast state
  *
  * Updates the own multicast tvlv with our current multicast related settings,
  * capabilities and inabilities.
- *
- * Return: false if we want all IPv4 && IPv6 multicast traffic and true
- * otherwise.
  */
-static bool batadv_mcast_mla_tvlv_update(struct batadv_priv *bat_priv)
+static void
+batadv_mcast_mla_flags_update(struct batadv_priv *bat_priv,
+			      struct batadv_mcast_mla_flags *flags)
 {
 	struct batadv_tvlv_mcast_data mcast_data;
-	struct batadv_mcast_querier_state querier4 = {false, false};
-	struct batadv_mcast_querier_state querier6 = {false, false};
-	struct net_device *dev = bat_priv->soft_iface;
-	bool bridged;
 
-	mcast_data.flags = BATADV_NO_FLAGS;
+	if (!memcmp(flags, &bat_priv->mcast.mla_flags, sizeof(*flags)))
+		return;
+
+	batadv_mcast_bridge_log(bat_priv, flags);
+	batadv_mcast_flags_log(bat_priv, flags->tvlv_flags);
+
+	mcast_data.flags = flags->tvlv_flags;
 	memset(mcast_data.reserved, 0, sizeof(mcast_data.reserved));
 
-	bridged = batadv_mcast_has_bridge(bat_priv);
-	if (!bridged)
-		goto update;
+	batadv_tvlv_container_register(bat_priv, BATADV_TVLV_MCAST, 2,
+				       &mcast_data, sizeof(mcast_data));
 
-	if (!IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING))
-		pr_warn_once("No bridge IGMP snooping compiled - multicast optimizations disabled\n");
-
-	querier4.exists = br_multicast_has_querier_anywhere(dev, ETH_P_IP);
-	querier4.shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IP);
-
-	querier6.exists = br_multicast_has_querier_anywhere(dev, ETH_P_IPV6);
-	querier6.shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IPV6);
-
-	mcast_data.flags |= BATADV_MCAST_WANT_ALL_UNSNOOPABLES;
-
-	/* 1) If no querier exists at all, then multicast listeners on
-	 *    our local TT clients behind the bridge will keep silent.
-	 * 2) If the selected querier is on one of our local TT clients,
-	 *    behind the bridge, then this querier might shadow multicast
-	 *    listeners on our local TT clients, behind this bridge.
-	 *
-	 * In both cases, we will signalize other batman nodes that
-	 * we need all multicast traffic of the according protocol.
-	 */
-	if (!querier4.exists || querier4.shadowing)
-		mcast_data.flags |= BATADV_MCAST_WANT_ALL_IPV4;
-
-	if (!querier6.exists || querier6.shadowing)
-		mcast_data.flags |= BATADV_MCAST_WANT_ALL_IPV6;
-
-update:
-	batadv_mcast_bridge_log(bat_priv, bridged, &querier4, &querier6);
-
-	bat_priv->mcast.querier_ipv4.exists = querier4.exists;
-	bat_priv->mcast.querier_ipv4.shadowing = querier4.shadowing;
-
-	bat_priv->mcast.querier_ipv6.exists = querier6.exists;
-	bat_priv->mcast.querier_ipv6.shadowing = querier6.shadowing;
-
-	bat_priv->mcast.bridged = bridged;
-
-	if (!bat_priv->mcast.enabled ||
-	    mcast_data.flags != bat_priv->mcast.flags) {
-		batadv_mcast_flags_log(bat_priv, mcast_data.flags);
-		batadv_tvlv_container_register(bat_priv, BATADV_TVLV_MCAST, 2,
-					       &mcast_data, sizeof(mcast_data));
-		bat_priv->mcast.flags = mcast_data.flags;
-		bat_priv->mcast.enabled = true;
-	}
-
-	return !(mcast_data.flags & BATADV_MCAST_WANT_ALL_IPV4 &&
-		 mcast_data.flags & BATADV_MCAST_WANT_ALL_IPV6);
+	bat_priv->mcast.mla_flags = *flags;
 }
 
 /**
@@ -618,22 +903,24 @@
 {
 	struct net_device *soft_iface = bat_priv->soft_iface;
 	struct hlist_head mcast_list = HLIST_HEAD_INIT;
+	struct batadv_mcast_mla_flags flags;
 	int ret;
 
-	if (!batadv_mcast_mla_tvlv_update(bat_priv))
-		goto update;
+	flags = batadv_mcast_mla_flags_get(bat_priv);
 
-	ret = batadv_mcast_mla_softif_get(bat_priv, soft_iface, &mcast_list);
+	ret = batadv_mcast_mla_softif_get(soft_iface, &mcast_list, &flags);
 	if (ret < 0)
 		goto out;
 
-	ret = batadv_mcast_mla_bridge_get(bat_priv, soft_iface, &mcast_list);
+	ret = batadv_mcast_mla_bridge_get(soft_iface, &mcast_list, &flags);
 	if (ret < 0)
 		goto out;
 
-update:
+	spin_lock(&bat_priv->mcast.mla_lock);
 	batadv_mcast_mla_tt_retract(bat_priv, &mcast_list);
 	batadv_mcast_mla_tt_add(bat_priv, &mcast_list);
+	batadv_mcast_mla_flags_update(bat_priv, &flags);
+	spin_unlock(&bat_priv->mcast.mla_lock);
 
 out:
 	batadv_mcast_mla_list_free(&mcast_list);
@@ -674,7 +961,7 @@
  */
 static bool batadv_mcast_is_report_ipv4(struct sk_buff *skb)
 {
-	if (ip_mc_check_igmp(skb, NULL) < 0)
+	if (ip_mc_check_igmp(skb) < 0)
 		return false;
 
 	switch (igmp_hdr(skb)->type) {
@@ -693,6 +980,7 @@
  * @bat_priv: the bat priv with all the soft interface information
  * @skb: the IPv4 packet to check
  * @is_unsnoopable: stores whether the destination is snoopable
+ * @is_routable: stores whether the destination is routable
  *
  * Checks whether the given IPv4 packet has the potential to be forwarded with a
  * mode more optimal than classic flooding.
@@ -702,7 +990,8 @@
  */
 static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv,
 					     struct sk_buff *skb,
-					     bool *is_unsnoopable)
+					     bool *is_unsnoopable,
+					     int *is_routable)
 {
 	struct iphdr *iphdr;
 
@@ -715,16 +1004,13 @@
 
 	iphdr = ip_hdr(skb);
 
-	/* TODO: Implement Multicast Router Discovery (RFC4286),
-	 * then allow scope > link local, too
-	 */
-	if (!ipv4_is_local_multicast(iphdr->daddr))
-		return -EINVAL;
-
 	/* link-local multicast listeners behind a bridge are
 	 * not snoopable (see RFC4541, section 2.1.2.2)
 	 */
-	*is_unsnoopable = true;
+	if (ipv4_is_local_multicast(iphdr->daddr))
+		*is_unsnoopable = true;
+	else
+		*is_routable = ETH_P_IP;
 
 	return 0;
 }
@@ -741,7 +1027,7 @@
  */
 static bool batadv_mcast_is_report_ipv6(struct sk_buff *skb)
 {
-	if (ipv6_mc_check_mld(skb, NULL) < 0)
+	if (ipv6_mc_check_mld(skb) < 0)
 		return false;
 
 	switch (icmp6_hdr(skb)->icmp6_type) {
@@ -759,6 +1045,7 @@
  * @bat_priv: the bat priv with all the soft interface information
  * @skb: the IPv6 packet to check
  * @is_unsnoopable: stores whether the destination is snoopable
+ * @is_routable: stores whether the destination is routable
  *
  * Checks whether the given IPv6 packet has the potential to be forwarded with a
  * mode more optimal than classic flooding.
@@ -767,7 +1054,8 @@
  */
 static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
 					     struct sk_buff *skb,
-					     bool *is_unsnoopable)
+					     bool *is_unsnoopable,
+					     int *is_routable)
 {
 	struct ipv6hdr *ip6hdr;
 
@@ -780,10 +1068,7 @@
 
 	ip6hdr = ipv6_hdr(skb);
 
-	/* TODO: Implement Multicast Router Discovery (RFC4286),
-	 * then allow scope > link local, too
-	 */
-	if (IPV6_ADDR_MC_SCOPE(&ip6hdr->daddr) != IPV6_ADDR_SCOPE_LINKLOCAL)
+	if (IPV6_ADDR_MC_SCOPE(&ip6hdr->daddr) < IPV6_ADDR_SCOPE_LINKLOCAL)
 		return -EINVAL;
 
 	/* link-local-all-nodes multicast listeners behind a bridge are
@@ -791,6 +1076,8 @@
 	 */
 	if (ipv6_addr_is_ll_all_nodes(&ip6hdr->daddr))
 		*is_unsnoopable = true;
+	else if (IPV6_ADDR_MC_SCOPE(&ip6hdr->daddr) > IPV6_ADDR_SCOPE_LINKLOCAL)
+		*is_routable = ETH_P_IPV6;
 
 	return 0;
 }
@@ -800,6 +1087,7 @@
  * @bat_priv: the bat priv with all the soft interface information
  * @skb: the multicast frame to check
  * @is_unsnoopable: stores whether the destination is snoopable
+ * @is_routable: stores whether the destination is routable
  *
  * Checks whether the given multicast ethernet frame has the potential to be
  * forwarded with a mode more optimal than classic flooding.
@@ -808,7 +1096,8 @@
  */
 static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv,
 					struct sk_buff *skb,
-					bool *is_unsnoopable)
+					bool *is_unsnoopable,
+					int *is_routable)
 {
 	struct ethhdr *ethhdr = eth_hdr(skb);
 
@@ -818,13 +1107,15 @@
 	switch (ntohs(ethhdr->h_proto)) {
 	case ETH_P_IP:
 		return batadv_mcast_forw_mode_check_ipv4(bat_priv, skb,
-							 is_unsnoopable);
+							 is_unsnoopable,
+							 is_routable);
 	case ETH_P_IPV6:
 		if (!IS_ENABLED(CONFIG_IPV6))
 			return -EINVAL;
 
 		return batadv_mcast_forw_mode_check_ipv6(bat_priv, skb,
-							 is_unsnoopable);
+							 is_unsnoopable,
+							 is_routable);
 	default:
 		return -EINVAL;
 	}
@@ -855,6 +1146,29 @@
 }
 
 /**
+ * batadv_mcast_forw_rtr_count() - count nodes with a multicast router
+ * @bat_priv: the bat priv with all the soft interface information
+ * @protocol: the ethernet protocol type to count multicast routers for
+ *
+ * Return: the number of nodes which want all routable IPv4 multicast traffic
+ * if the protocol is ETH_P_IP or the number of nodes which want all routable
+ * IPv6 traffic if the protocol is ETH_P_IPV6. Otherwise returns 0.
+ */
+
+static int batadv_mcast_forw_rtr_count(struct batadv_priv *bat_priv,
+				       int protocol)
+{
+	switch (protocol) {
+	case ETH_P_IP:
+		return atomic_read(&bat_priv->mcast.num_want_all_rtr4);
+	case ETH_P_IPV6:
+		return atomic_read(&bat_priv->mcast.num_want_all_rtr6);
+	default:
+		return 0;
+	}
+}
+
+/**
  * batadv_mcast_forw_tt_node_get() - get a multicast tt node
  * @bat_priv: the bat priv with all the soft interface information
  * @ethhdr: the ether header containing the multicast destination
@@ -976,6 +1290,84 @@
 }
 
 /**
+ * batadv_mcast_forw_rtr4_node_get() - get a node with an ipv4 mcast router flag
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: an orig_node which has the BATADV_MCAST_WANT_NO_RTR4 flag unset and
+ * increases its refcount.
+ */
+static struct batadv_orig_node *
+batadv_mcast_forw_rtr4_node_get(struct batadv_priv *bat_priv)
+{
+	struct batadv_orig_node *tmp_orig_node, *orig_node = NULL;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(tmp_orig_node,
+				 &bat_priv->mcast.want_all_rtr4_list,
+				 mcast_want_all_rtr4_node) {
+		if (!kref_get_unless_zero(&tmp_orig_node->refcount))
+			continue;
+
+		orig_node = tmp_orig_node;
+		break;
+	}
+	rcu_read_unlock();
+
+	return orig_node;
+}
+
+/**
+ * batadv_mcast_forw_rtr6_node_get() - get a node with an ipv6 mcast router flag
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: an orig_node which has the BATADV_MCAST_WANT_NO_RTR6 flag unset
+ * and increases its refcount.
+ */
+static struct batadv_orig_node *
+batadv_mcast_forw_rtr6_node_get(struct batadv_priv *bat_priv)
+{
+	struct batadv_orig_node *tmp_orig_node, *orig_node = NULL;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(tmp_orig_node,
+				 &bat_priv->mcast.want_all_rtr6_list,
+				 mcast_want_all_rtr6_node) {
+		if (!kref_get_unless_zero(&tmp_orig_node->refcount))
+			continue;
+
+		orig_node = tmp_orig_node;
+		break;
+	}
+	rcu_read_unlock();
+
+	return orig_node;
+}
+
+/**
+ * batadv_mcast_forw_rtr_node_get() - get a node with an ipv4/ipv6 router flag
+ * @bat_priv: the bat priv with all the soft interface information
+ * @ethhdr: an ethernet header to determine the protocol family from
+ *
+ * Return: an orig_node which has no BATADV_MCAST_WANT_NO_RTR4 or
+ * BATADV_MCAST_WANT_NO_RTR6 flag, depending on the provided ethhdr, set and
+ * increases its refcount.
+ */
+static struct batadv_orig_node *
+batadv_mcast_forw_rtr_node_get(struct batadv_priv *bat_priv,
+			       struct ethhdr *ethhdr)
+{
+	switch (ntohs(ethhdr->h_proto)) {
+	case ETH_P_IP:
+		return batadv_mcast_forw_rtr4_node_get(bat_priv);
+	case ETH_P_IPV6:
+		return batadv_mcast_forw_rtr6_node_get(bat_priv);
+	default:
+		/* we shouldn't be here... */
+		return NULL;
+	}
+}
+
+/**
  * batadv_mcast_forw_mode() - check on how to forward a multicast packet
  * @bat_priv: the bat priv with all the soft interface information
  * @skb: The multicast packet to check
@@ -991,9 +1383,13 @@
 {
 	int ret, tt_count, ip_count, unsnoop_count, total_count;
 	bool is_unsnoopable = false;
+	unsigned int mcast_fanout;
 	struct ethhdr *ethhdr;
+	int is_routable = 0;
+	int rtr_count = 0;
 
-	ret = batadv_mcast_forw_mode_check(bat_priv, skb, &is_unsnoopable);
+	ret = batadv_mcast_forw_mode_check(bat_priv, skb, &is_unsnoopable,
+					   &is_routable);
 	if (ret == -ENOMEM)
 		return BATADV_FORW_NONE;
 	else if (ret < 0)
@@ -1006,8 +1402,9 @@
 	ip_count = batadv_mcast_forw_want_all_ip_count(bat_priv, ethhdr);
 	unsnoop_count = !is_unsnoopable ? 0 :
 			atomic_read(&bat_priv->mcast.num_want_all_unsnoopables);
+	rtr_count = batadv_mcast_forw_rtr_count(bat_priv, is_routable);
 
-	total_count = tt_count + ip_count + unsnoop_count;
+	total_count = tt_count + ip_count + unsnoop_count + rtr_count;
 
 	switch (total_count) {
 	case 1:
@@ -1017,6 +1414,9 @@
 			*orig = batadv_mcast_forw_ip_node_get(bat_priv, ethhdr);
 		else if (unsnoop_count)
 			*orig = batadv_mcast_forw_unsnoop_node_get(bat_priv);
+		else if (rtr_count)
+			*orig = batadv_mcast_forw_rtr_node_get(bat_priv,
+							       ethhdr);
 
 		if (*orig)
 			return BATADV_FORW_SINGLE;
@@ -1025,8 +1425,314 @@
 	case 0:
 		return BATADV_FORW_NONE;
 	default:
-		return BATADV_FORW_ALL;
+		mcast_fanout = atomic_read(&bat_priv->multicast_fanout);
+
+		if (!unsnoop_count && total_count <= mcast_fanout)
+			return BATADV_FORW_SOME;
 	}
+
+	return BATADV_FORW_ALL;
+}
+
+/**
+ * batadv_mcast_forw_tt() - forwards a packet to multicast listeners
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to transmit
+ * @vid: the vlan identifier
+ *
+ * Sends copies of a frame with multicast destination to any multicast
+ * listener registered in the translation table. A transmission is performed
+ * via a batman-adv unicast packet for each such destination node.
+ *
+ * Return: NET_XMIT_DROP on memory allocation failure, NET_XMIT_SUCCESS
+ * otherwise.
+ */
+static int
+batadv_mcast_forw_tt(struct batadv_priv *bat_priv, struct sk_buff *skb,
+		     unsigned short vid)
+{
+	int ret = NET_XMIT_SUCCESS;
+	struct sk_buff *newskb;
+
+	struct batadv_tt_orig_list_entry *orig_entry;
+
+	struct batadv_tt_global_entry *tt_global;
+	const u8 *addr = eth_hdr(skb)->h_dest;
+
+	tt_global = batadv_tt_global_hash_find(bat_priv, addr, vid);
+	if (!tt_global)
+		goto out;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(orig_entry, &tt_global->orig_list, list) {
+		newskb = skb_copy(skb, GFP_ATOMIC);
+		if (!newskb) {
+			ret = NET_XMIT_DROP;
+			break;
+		}
+
+		batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0,
+					orig_entry->orig_node, vid);
+	}
+	rcu_read_unlock();
+
+	batadv_tt_global_entry_put(tt_global);
+
+out:
+	return ret;
+}
+
+/**
+ * batadv_mcast_forw_want_all_ipv4() - forward to nodes with want-all-ipv4
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to transmit
+ * @vid: the vlan identifier
+ *
+ * Sends copies of a frame with multicast destination to any node with a
+ * BATADV_MCAST_WANT_ALL_IPV4 flag set. A transmission is performed via a
+ * batman-adv unicast packet for each such destination node.
+ *
+ * Return: NET_XMIT_DROP on memory allocation failure, NET_XMIT_SUCCESS
+ * otherwise.
+ */
+static int
+batadv_mcast_forw_want_all_ipv4(struct batadv_priv *bat_priv,
+				struct sk_buff *skb, unsigned short vid)
+{
+	struct batadv_orig_node *orig_node;
+	int ret = NET_XMIT_SUCCESS;
+	struct sk_buff *newskb;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(orig_node,
+				 &bat_priv->mcast.want_all_ipv4_list,
+				 mcast_want_all_ipv4_node) {
+		newskb = skb_copy(skb, GFP_ATOMIC);
+		if (!newskb) {
+			ret = NET_XMIT_DROP;
+			break;
+		}
+
+		batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0,
+					orig_node, vid);
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
+/**
+ * batadv_mcast_forw_want_all_ipv6() - forward to nodes with want-all-ipv6
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: The multicast packet to transmit
+ * @vid: the vlan identifier
+ *
+ * Sends copies of a frame with multicast destination to any node with a
+ * BATADV_MCAST_WANT_ALL_IPV6 flag set. A transmission is performed via a
+ * batman-adv unicast packet for each such destination node.
+ *
+ * Return: NET_XMIT_DROP on memory allocation failure, NET_XMIT_SUCCESS
+ * otherwise.
+ */
+static int
+batadv_mcast_forw_want_all_ipv6(struct batadv_priv *bat_priv,
+				struct sk_buff *skb, unsigned short vid)
+{
+	struct batadv_orig_node *orig_node;
+	int ret = NET_XMIT_SUCCESS;
+	struct sk_buff *newskb;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(orig_node,
+				 &bat_priv->mcast.want_all_ipv6_list,
+				 mcast_want_all_ipv6_node) {
+		newskb = skb_copy(skb, GFP_ATOMIC);
+		if (!newskb) {
+			ret = NET_XMIT_DROP;
+			break;
+		}
+
+		batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0,
+					orig_node, vid);
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
+/**
+ * batadv_mcast_forw_want_all() - forward packet to nodes in a want-all list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to transmit
+ * @vid: the vlan identifier
+ *
+ * Sends copies of a frame with multicast destination to any node with a
+ * BATADV_MCAST_WANT_ALL_IPV4 or BATADV_MCAST_WANT_ALL_IPV6 flag set. A
+ * transmission is performed via a batman-adv unicast packet for each such
+ * destination node.
+ *
+ * Return: NET_XMIT_DROP on memory allocation failure or if the protocol family
+ * is neither IPv4 nor IPv6. NET_XMIT_SUCCESS otherwise.
+ */
+static int
+batadv_mcast_forw_want_all(struct batadv_priv *bat_priv,
+			   struct sk_buff *skb, unsigned short vid)
+{
+	switch (ntohs(eth_hdr(skb)->h_proto)) {
+	case ETH_P_IP:
+		return batadv_mcast_forw_want_all_ipv4(bat_priv, skb, vid);
+	case ETH_P_IPV6:
+		return batadv_mcast_forw_want_all_ipv6(bat_priv, skb, vid);
+	default:
+		/* we shouldn't be here... */
+		return NET_XMIT_DROP;
+	}
+}
+
+/**
+ * batadv_mcast_forw_want_all_rtr4() - forward to nodes with want-all-rtr4
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to transmit
+ * @vid: the vlan identifier
+ *
+ * Sends copies of a frame with multicast destination to any node with a
+ * BATADV_MCAST_WANT_NO_RTR4 flag unset. A transmission is performed via a
+ * batman-adv unicast packet for each such destination node.
+ *
+ * Return: NET_XMIT_DROP on memory allocation failure, NET_XMIT_SUCCESS
+ * otherwise.
+ */
+static int
+batadv_mcast_forw_want_all_rtr4(struct batadv_priv *bat_priv,
+				struct sk_buff *skb, unsigned short vid)
+{
+	struct batadv_orig_node *orig_node;
+	int ret = NET_XMIT_SUCCESS;
+	struct sk_buff *newskb;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(orig_node,
+				 &bat_priv->mcast.want_all_rtr4_list,
+				 mcast_want_all_rtr4_node) {
+		newskb = skb_copy(skb, GFP_ATOMIC);
+		if (!newskb) {
+			ret = NET_XMIT_DROP;
+			break;
+		}
+
+		batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0,
+					orig_node, vid);
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
+/**
+ * batadv_mcast_forw_want_all_rtr6() - forward to nodes with want-all-rtr6
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: The multicast packet to transmit
+ * @vid: the vlan identifier
+ *
+ * Sends copies of a frame with multicast destination to any node with a
+ * BATADV_MCAST_WANT_NO_RTR6 flag unset. A transmission is performed via a
+ * batman-adv unicast packet for each such destination node.
+ *
+ * Return: NET_XMIT_DROP on memory allocation failure, NET_XMIT_SUCCESS
+ * otherwise.
+ */
+static int
+batadv_mcast_forw_want_all_rtr6(struct batadv_priv *bat_priv,
+				struct sk_buff *skb, unsigned short vid)
+{
+	struct batadv_orig_node *orig_node;
+	int ret = NET_XMIT_SUCCESS;
+	struct sk_buff *newskb;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(orig_node,
+				 &bat_priv->mcast.want_all_rtr6_list,
+				 mcast_want_all_rtr6_node) {
+		newskb = skb_copy(skb, GFP_ATOMIC);
+		if (!newskb) {
+			ret = NET_XMIT_DROP;
+			break;
+		}
+
+		batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0,
+					orig_node, vid);
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
+/**
+ * batadv_mcast_forw_want_rtr() - forward packet to nodes in a want-all-rtr list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to transmit
+ * @vid: the vlan identifier
+ *
+ * Sends copies of a frame with multicast destination to any node with a
+ * BATADV_MCAST_WANT_NO_RTR4 or BATADV_MCAST_WANT_NO_RTR6 flag unset. A
+ * transmission is performed via a batman-adv unicast packet for each such
+ * destination node.
+ *
+ * Return: NET_XMIT_DROP on memory allocation failure or if the protocol family
+ * is neither IPv4 nor IPv6. NET_XMIT_SUCCESS otherwise.
+ */
+static int
+batadv_mcast_forw_want_rtr(struct batadv_priv *bat_priv,
+			   struct sk_buff *skb, unsigned short vid)
+{
+	switch (ntohs(eth_hdr(skb)->h_proto)) {
+	case ETH_P_IP:
+		return batadv_mcast_forw_want_all_rtr4(bat_priv, skb, vid);
+	case ETH_P_IPV6:
+		return batadv_mcast_forw_want_all_rtr6(bat_priv, skb, vid);
+	default:
+		/* we shouldn't be here... */
+		return NET_XMIT_DROP;
+	}
+}
+
+/**
+ * batadv_mcast_forw_send() - send packet to any detected multicast recpient
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to transmit
+ * @vid: the vlan identifier
+ *
+ * Sends copies of a frame with multicast destination to any node that signaled
+ * interest in it, that is either via the translation table or the according
+ * want-all flags. A transmission is performed via a batman-adv unicast packet
+ * for each such destination node.
+ *
+ * The given skb is consumed/freed.
+ *
+ * Return: NET_XMIT_DROP on memory allocation failure or if the protocol family
+ * is neither IPv4 nor IPv6. NET_XMIT_SUCCESS otherwise.
+ */
+int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb,
+			   unsigned short vid)
+{
+	int ret;
+
+	ret = batadv_mcast_forw_tt(bat_priv, skb, vid);
+	if (ret != NET_XMIT_SUCCESS) {
+		kfree_skb(skb);
+		return ret;
+	}
+
+	ret = batadv_mcast_forw_want_all(bat_priv, skb, vid);
+	if (ret != NET_XMIT_SUCCESS) {
+		kfree_skb(skb);
+		return ret;
+	}
+
+	ret = batadv_mcast_forw_want_rtr(bat_priv, skb, vid);
+	if (ret != NET_XMIT_SUCCESS) {
+		kfree_skb(skb);
+		return ret;
+	}
+
+	consume_skb(skb);
+	return ret;
 }
 
 /**
@@ -1165,6 +1871,127 @@
 }
 
 /**
+ * batadv_mcast_want_rtr4_update() - update want-all-rtr4 counter and list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig: the orig_node which multicast state might have changed of
+ * @mcast_flags: flags indicating the new multicast state
+ *
+ * If the BATADV_MCAST_WANT_NO_RTR4 flag of this originator, orig, has
+ * toggled then this method updates counter and list accordingly.
+ *
+ * Caller needs to hold orig->mcast_handler_lock.
+ */
+static void batadv_mcast_want_rtr4_update(struct batadv_priv *bat_priv,
+					  struct batadv_orig_node *orig,
+					  u8 mcast_flags)
+{
+	struct hlist_node *node = &orig->mcast_want_all_rtr4_node;
+	struct hlist_head *head = &bat_priv->mcast.want_all_rtr4_list;
+
+	lockdep_assert_held(&orig->mcast_handler_lock);
+
+	/* switched from flag set to unset */
+	if (!(mcast_flags & BATADV_MCAST_WANT_NO_RTR4) &&
+	    orig->mcast_flags & BATADV_MCAST_WANT_NO_RTR4) {
+		atomic_inc(&bat_priv->mcast.num_want_all_rtr4);
+
+		spin_lock_bh(&bat_priv->mcast.want_lists_lock);
+		/* flag checks above + mcast_handler_lock prevents this */
+		WARN_ON(!hlist_unhashed(node));
+
+		hlist_add_head_rcu(node, head);
+		spin_unlock_bh(&bat_priv->mcast.want_lists_lock);
+	/* switched from flag unset to set */
+	} else if (mcast_flags & BATADV_MCAST_WANT_NO_RTR4 &&
+		   !(orig->mcast_flags & BATADV_MCAST_WANT_NO_RTR4)) {
+		atomic_dec(&bat_priv->mcast.num_want_all_rtr4);
+
+		spin_lock_bh(&bat_priv->mcast.want_lists_lock);
+		/* flag checks above + mcast_handler_lock prevents this */
+		WARN_ON(hlist_unhashed(node));
+
+		hlist_del_init_rcu(node);
+		spin_unlock_bh(&bat_priv->mcast.want_lists_lock);
+	}
+}
+
+/**
+ * batadv_mcast_want_rtr6_update() - update want-all-rtr6 counter and list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig: the orig_node which multicast state might have changed of
+ * @mcast_flags: flags indicating the new multicast state
+ *
+ * If the BATADV_MCAST_WANT_NO_RTR6 flag of this originator, orig, has
+ * toggled then this method updates counter and list accordingly.
+ *
+ * Caller needs to hold orig->mcast_handler_lock.
+ */
+static void batadv_mcast_want_rtr6_update(struct batadv_priv *bat_priv,
+					  struct batadv_orig_node *orig,
+					  u8 mcast_flags)
+{
+	struct hlist_node *node = &orig->mcast_want_all_rtr6_node;
+	struct hlist_head *head = &bat_priv->mcast.want_all_rtr6_list;
+
+	lockdep_assert_held(&orig->mcast_handler_lock);
+
+	/* switched from flag set to unset */
+	if (!(mcast_flags & BATADV_MCAST_WANT_NO_RTR6) &&
+	    orig->mcast_flags & BATADV_MCAST_WANT_NO_RTR6) {
+		atomic_inc(&bat_priv->mcast.num_want_all_rtr6);
+
+		spin_lock_bh(&bat_priv->mcast.want_lists_lock);
+		/* flag checks above + mcast_handler_lock prevents this */
+		WARN_ON(!hlist_unhashed(node));
+
+		hlist_add_head_rcu(node, head);
+		spin_unlock_bh(&bat_priv->mcast.want_lists_lock);
+	/* switched from flag unset to set */
+	} else if (mcast_flags & BATADV_MCAST_WANT_NO_RTR6 &&
+		   !(orig->mcast_flags & BATADV_MCAST_WANT_NO_RTR6)) {
+		atomic_dec(&bat_priv->mcast.num_want_all_rtr6);
+
+		spin_lock_bh(&bat_priv->mcast.want_lists_lock);
+		/* flag checks above + mcast_handler_lock prevents this */
+		WARN_ON(hlist_unhashed(node));
+
+		hlist_del_init_rcu(node);
+		spin_unlock_bh(&bat_priv->mcast.want_lists_lock);
+	}
+}
+
+/**
+ * batadv_mcast_tvlv_flags_get() - get multicast flags from an OGM TVLV
+ * @enabled: whether the originator has multicast TVLV support enabled
+ * @tvlv_value: tvlv buffer containing the multicast flags
+ * @tvlv_value_len: tvlv buffer length
+ *
+ * Return: multicast flags for the given tvlv buffer
+ */
+static u8
+batadv_mcast_tvlv_flags_get(bool enabled, void *tvlv_value, u16 tvlv_value_len)
+{
+	u8 mcast_flags = BATADV_NO_FLAGS;
+
+	if (enabled && tvlv_value && tvlv_value_len >= sizeof(mcast_flags))
+		mcast_flags = *(u8 *)tvlv_value;
+
+	if (!enabled) {
+		mcast_flags |= BATADV_MCAST_WANT_ALL_IPV4;
+		mcast_flags |= BATADV_MCAST_WANT_ALL_IPV6;
+	}
+
+	/* remove redundant flags to avoid sending duplicate packets later */
+	if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV4)
+		mcast_flags |= BATADV_MCAST_WANT_NO_RTR4;
+
+	if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV6)
+		mcast_flags |= BATADV_MCAST_WANT_NO_RTR6;
+
+	return mcast_flags;
+}
+
+/**
  * batadv_mcast_tvlv_ogm_handler() - process incoming multicast tvlv container
  * @bat_priv: the bat priv with all the soft interface information
  * @orig: the orig_node of the ogm
@@ -1179,16 +2006,10 @@
 					  u16 tvlv_value_len)
 {
 	bool orig_mcast_enabled = !(flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
-	u8 mcast_flags = BATADV_NO_FLAGS;
+	u8 mcast_flags;
 
-	if (orig_mcast_enabled && tvlv_value &&
-	    tvlv_value_len >= sizeof(mcast_flags))
-		mcast_flags = *(u8 *)tvlv_value;
-
-	if (!orig_mcast_enabled) {
-		mcast_flags |= BATADV_MCAST_WANT_ALL_IPV4;
-		mcast_flags |= BATADV_MCAST_WANT_ALL_IPV6;
-	}
+	mcast_flags = batadv_mcast_tvlv_flags_get(orig_mcast_enabled,
+						  tvlv_value, tvlv_value_len);
 
 	spin_lock_bh(&orig->mcast_handler_lock);
 
@@ -1205,6 +2026,8 @@
 	batadv_mcast_want_unsnoop_update(bat_priv, orig, mcast_flags);
 	batadv_mcast_want_ipv4_update(bat_priv, orig, mcast_flags);
 	batadv_mcast_want_ipv6_update(bat_priv, orig, mcast_flags);
+	batadv_mcast_want_rtr4_update(bat_priv, orig, mcast_flags);
+	batadv_mcast_want_rtr6_update(bat_priv, orig, mcast_flags);
 
 	orig->mcast_flags = mcast_flags;
 	spin_unlock_bh(&orig->mcast_handler_lock);
@@ -1237,15 +2060,16 @@
 static void batadv_mcast_flags_print_header(struct batadv_priv *bat_priv,
 					    struct seq_file *seq)
 {
-	u8 flags = bat_priv->mcast.flags;
+	struct batadv_mcast_mla_flags *mla_flags = &bat_priv->mcast.mla_flags;
 	char querier4, querier6, shadowing4, shadowing6;
-	bool bridged = bat_priv->mcast.bridged;
+	bool bridged = mla_flags->bridged;
+	u8 flags = mla_flags->tvlv_flags;
 
 	if (bridged) {
-		querier4 = bat_priv->mcast.querier_ipv4.exists ? '.' : '4';
-		querier6 = bat_priv->mcast.querier_ipv6.exists ? '.' : '6';
-		shadowing4 = bat_priv->mcast.querier_ipv4.shadowing ? '4' : '.';
-		shadowing6 = bat_priv->mcast.querier_ipv6.shadowing ? '6' : '.';
+		querier4 = mla_flags->querier_ipv4.exists ? '.' : '4';
+		querier6 = mla_flags->querier_ipv6.exists ? '.' : '6';
+		shadowing4 = mla_flags->querier_ipv4.shadowing ? '4' : '.';
+		shadowing6 = mla_flags->querier_ipv6.shadowing ? '6' : '.';
 	} else {
 		querier4 = '?';
 		querier6 = '?';
@@ -1253,10 +2077,12 @@
 		shadowing6 = '?';
 	}
 
-	seq_printf(seq, "Multicast flags (own flags: [%c%c%c])\n",
+	seq_printf(seq, "Multicast flags (own flags: [%c%c%c%s%s])\n",
 		   (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
 		   (flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
-		   (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.');
+		   (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.',
+		   !(flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ",
+		   !(flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ");
 	seq_printf(seq, "* Bridged [U]\t\t\t\t%c\n", bridged ? 'U' : '.');
 	seq_printf(seq, "* No IGMP/MLD Querier [4/6]:\t\t%c/%c\n",
 		   querier4, querier6);
@@ -1310,13 +2136,17 @@
 
 			flags = orig_node->mcast_flags;
 
-			seq_printf(seq, "%pM [%c%c%c]\n", orig_node->orig,
+			seq_printf(seq, "%pM [%c%c%c%s%s]\n", orig_node->orig,
 				   (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES)
 				   ? 'U' : '.',
 				   (flags & BATADV_MCAST_WANT_ALL_IPV4)
 				   ? '4' : '.',
 				   (flags & BATADV_MCAST_WANT_ALL_IPV6)
-				   ? '6' : '.');
+				   ? '6' : '.',
+				   !(flags & BATADV_MCAST_WANT_NO_RTR4)
+				   ? "R4" : ". ",
+				   !(flags & BATADV_MCAST_WANT_NO_RTR6)
+				   ? "R6" : ". ");
 		}
 		rcu_read_unlock();
 	}
@@ -1337,19 +2167,19 @@
 int batadv_mcast_mesh_info_put(struct sk_buff *msg,
 			       struct batadv_priv *bat_priv)
 {
-	u32 flags = bat_priv->mcast.flags;
+	u32 flags = bat_priv->mcast.mla_flags.tvlv_flags;
 	u32 flags_priv = BATADV_NO_FLAGS;
 
-	if (bat_priv->mcast.bridged) {
+	if (bat_priv->mcast.mla_flags.bridged) {
 		flags_priv |= BATADV_MCAST_FLAGS_BRIDGED;
 
-		if (bat_priv->mcast.querier_ipv4.exists)
+		if (bat_priv->mcast.mla_flags.querier_ipv4.exists)
 			flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS;
-		if (bat_priv->mcast.querier_ipv6.exists)
+		if (bat_priv->mcast.mla_flags.querier_ipv6.exists)
 			flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS;
-		if (bat_priv->mcast.querier_ipv4.shadowing)
+		if (bat_priv->mcast.mla_flags.querier_ipv4.shadowing)
 			flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING;
-		if (bat_priv->mcast.querier_ipv6.shadowing)
+		if (bat_priv->mcast.mla_flags.querier_ipv6.shadowing)
 			flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING;
 	}
 
@@ -1365,22 +2195,26 @@
  *  to a netlink socket
  * @msg: buffer for the message
  * @portid: netlink port
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
  * @orig_node: originator to dump the multicast flags of
  *
  * Return: 0 or error code.
  */
 static int
-batadv_mcast_flags_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_mcast_flags_dump_entry(struct sk_buff *msg, u32 portid,
+			      struct netlink_callback *cb,
 			      struct batadv_orig_node *orig_node)
 {
 	void *hdr;
 
-	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
-			  NLM_F_MULTI, BATADV_CMD_GET_MCAST_FLAGS);
+	hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
+			  &batadv_netlink_family, NLM_F_MULTI,
+			  BATADV_CMD_GET_MCAST_FLAGS);
 	if (!hdr)
 		return -ENOBUFS;
 
+	genl_dump_check_consistent(cb, hdr);
+
 	if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
 		    orig_node->orig)) {
 		genlmsg_cancel(msg, hdr);
@@ -1405,21 +2239,26 @@
  *  table to a netlink socket
  * @msg: buffer for the message
  * @portid: netlink port
- * @seq: Sequence number of netlink message
- * @head: bucket to dump
+ * @cb: Control block containing additional options
+ * @hash: hash to dump
+ * @bucket: bucket index to dump
  * @idx_skip: How many entries to skip
  *
  * Return: 0 or error code.
  */
 static int
-batadv_mcast_flags_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
-			       struct hlist_head *head, long *idx_skip)
+batadv_mcast_flags_dump_bucket(struct sk_buff *msg, u32 portid,
+			       struct netlink_callback *cb,
+			       struct batadv_hashtable *hash,
+			       unsigned int bucket, long *idx_skip)
 {
 	struct batadv_orig_node *orig_node;
 	long idx = 0;
 
-	rcu_read_lock();
-	hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+	spin_lock_bh(&hash->list_locks[bucket]);
+	cb->seq = atomic_read(&hash->generation) << 1 | 1;
+
+	hlist_for_each_entry(orig_node, &hash->table[bucket], hash_entry) {
 		if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
 			      &orig_node->capa_initialized))
 			continue;
@@ -1427,9 +2266,8 @@
 		if (idx < *idx_skip)
 			goto skip;
 
-		if (batadv_mcast_flags_dump_entry(msg, portid, seq,
-						  orig_node)) {
-			rcu_read_unlock();
+		if (batadv_mcast_flags_dump_entry(msg, portid, cb, orig_node)) {
+			spin_unlock_bh(&hash->list_locks[bucket]);
 			*idx_skip = idx;
 
 			return -EMSGSIZE;
@@ -1438,7 +2276,7 @@
 skip:
 		idx++;
 	}
-	rcu_read_unlock();
+	spin_unlock_bh(&hash->list_locks[bucket]);
 
 	return 0;
 }
@@ -1447,7 +2285,7 @@
  * __batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket
  * @msg: buffer for the message
  * @portid: netlink port
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
  * @bat_priv: the bat priv with all the soft interface information
  * @bucket: current bucket to dump
  * @idx: index in current bucket to the next entry to dump
@@ -1455,19 +2293,17 @@
  * Return: 0 or error code.
  */
 static int
-__batadv_mcast_flags_dump(struct sk_buff *msg, u32 portid, u32 seq,
+__batadv_mcast_flags_dump(struct sk_buff *msg, u32 portid,
+			  struct netlink_callback *cb,
 			  struct batadv_priv *bat_priv, long *bucket, long *idx)
 {
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
 	long bucket_tmp = *bucket;
-	struct hlist_head *head;
 	long idx_tmp = *idx;
 
 	while (bucket_tmp < hash->size) {
-		head = &hash->table[bucket_tmp];
-
-		if (batadv_mcast_flags_dump_bucket(msg, portid, seq, head,
-						   &idx_tmp))
+		if (batadv_mcast_flags_dump_bucket(msg, portid, cb, hash,
+						   bucket_tmp, &idx_tmp))
 			break;
 
 		bucket_tmp++;
@@ -1550,8 +2386,7 @@
 		return ret;
 
 	bat_priv = netdev_priv(primary_if->soft_iface);
-	ret = __batadv_mcast_flags_dump(msg, portid, cb->nlh->nlmsg_seq,
-					bat_priv, bucket, idx);
+	ret = __batadv_mcast_flags_dump(msg, portid, cb, bat_priv, bucket, idx);
 
 	batadv_hardif_put(primary_if);
 	return ret;
@@ -1585,6 +2420,10 @@
 	batadv_mcast_want_unsnoop_update(bat_priv, orig, BATADV_NO_FLAGS);
 	batadv_mcast_want_ipv4_update(bat_priv, orig, BATADV_NO_FLAGS);
 	batadv_mcast_want_ipv6_update(bat_priv, orig, BATADV_NO_FLAGS);
+	batadv_mcast_want_rtr4_update(bat_priv, orig,
+				      BATADV_MCAST_WANT_NO_RTR4);
+	batadv_mcast_want_rtr6_update(bat_priv, orig,
+				      BATADV_MCAST_WANT_NO_RTR6);
 
 	spin_unlock_bh(&orig->mcast_handler_lock);
 }

diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 3b04ab1..5d9e2bb 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2014-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2019  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_MULTICAST_H_
@@ -21,9 +9,9 @@
 
 #include "main.h"
 
-struct netlink_callback;
-struct seq_file;
-struct sk_buff;
+#include <linux/netlink.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
 
 /**
  * enum batadv_forw_mode - the way a packet should be forwarded as
@@ -36,6 +24,13 @@
 	BATADV_FORW_ALL,
 
 	/**
+	 * @BATADV_FORW_SOME: forward the packet to some nodes (currently via
+	 *  a multicast-to-unicast conversion and the BATMAN unicast routing
+	 *  protocol)
+	 */
+	BATADV_FORW_SOME,
+
+	/**
 	 * @BATADV_FORW_SINGLE: forward the packet to a single node (currently
 	 *  via the BATMAN unicast routing protocol)
 	 */
@@ -51,6 +46,9 @@
 batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
 		       struct batadv_orig_node **mcast_single_orig);
 
+int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb,
+			   unsigned short vid);
+
 void batadv_mcast_init(struct batadv_priv *bat_priv);
 
 int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset);
@@ -73,6 +71,14 @@
 	return BATADV_FORW_ALL;
 }
 
+static inline int
+batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb,
+		       unsigned short vid)
+{
+	kfree_skb(skb);
+	return NET_XMIT_DROP;
+}
+
 static inline int batadv_mcast_init(struct batadv_priv *bat_priv)
 {
 	return 0;

diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 0d9459b..7e052d6 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c

@@ -1,43 +1,37 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2016-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2019  B.A.T.M.A.N. contributors:
  *
  * Matthias Schiffer
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "netlink.h"
 #include "main.h"
 
 #include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/bug.h>
 #include <linux/byteorder/generic.h>
 #include <linux/cache.h>
+#include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/export.h>
 #include <linux/genetlink.h>
 #include <linux/gfp.h>
 #include <linux/if_ether.h>
+#include <linux/if_vlan.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/limits.h>
+#include <linux/list.h>
 #include <linux/netdevice.h>
 #include <linux/netlink.h>
 #include <linux/printk.h>
-#include <linux/rculist.h>
-#include <linux/rcupdate.h>
+#include <linux/rtnetlink.h>
 #include <linux/skbuff.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
 #include <net/genetlink.h>
+#include <net/net_namespace.h>
 #include <net/netlink.h>
 #include <net/sock.h>
 #include <uapi/linux/batadv_packet.h>
@@ -47,8 +41,11 @@
 #include "bridge_loop_avoidance.h"
 #include "distributed-arp-table.h"
 #include "gateway_client.h"
+#include "gateway_common.h"
 #include "hard-interface.h"
+#include "log.h"
 #include "multicast.h"
+#include "network-coding.h"
 #include "originator.h"
 #include "soft-interface.h"
 #include "tp_meter.h"
@@ -58,10 +55,38 @@
 
 /* multicast groups */
 enum batadv_netlink_multicast_groups {
+	BATADV_NL_MCGRP_CONFIG,
 	BATADV_NL_MCGRP_TPMETER,
 };
 
+/**
+ * enum batadv_genl_ops_flags - flags for genl_ops's internal_flags
+ */
+enum batadv_genl_ops_flags {
+	/**
+	 * @BATADV_FLAG_NEED_MESH: request requires valid soft interface in
+	 *  attribute BATADV_ATTR_MESH_IFINDEX and expects a pointer to it to be
+	 *  saved in info->user_ptr[0]
+	 */
+	BATADV_FLAG_NEED_MESH = BIT(0),
+
+	/**
+	 * @BATADV_FLAG_NEED_HARDIF: request requires valid hard interface in
+	 *  attribute BATADV_ATTR_HARD_IFINDEX and expects a pointer to it to be
+	 *  saved in info->user_ptr[1]
+	 */
+	BATADV_FLAG_NEED_HARDIF = BIT(1),
+
+	/**
+	 * @BATADV_FLAG_NEED_VLAN: request requires valid vlan in
+	 *  attribute BATADV_ATTR_VLANID and expects a pointer to it to be
+	 *  saved in info->user_ptr[1]
+	 */
+	BATADV_FLAG_NEED_VLAN = BIT(2),
+};
+
 static const struct genl_multicast_group batadv_netlink_mcgrps[] = {
+	[BATADV_NL_MCGRP_CONFIG] = { .name = BATADV_NL_MCAST_GROUP_CONFIG },
 	[BATADV_NL_MCGRP_TPMETER] = { .name = BATADV_NL_MCAST_GROUP_TPMETER },
 };
 
@@ -104,6 +129,27 @@
 	[BATADV_ATTR_DAT_CACHE_VID]		= { .type = NLA_U16 },
 	[BATADV_ATTR_MCAST_FLAGS]		= { .type = NLA_U32 },
 	[BATADV_ATTR_MCAST_FLAGS_PRIV]		= { .type = NLA_U32 },
+	[BATADV_ATTR_VLANID]			= { .type = NLA_U16 },
+	[BATADV_ATTR_AGGREGATED_OGMS_ENABLED]	= { .type = NLA_U8 },
+	[BATADV_ATTR_AP_ISOLATION_ENABLED]	= { .type = NLA_U8 },
+	[BATADV_ATTR_ISOLATION_MARK]		= { .type = NLA_U32 },
+	[BATADV_ATTR_ISOLATION_MASK]		= { .type = NLA_U32 },
+	[BATADV_ATTR_BONDING_ENABLED]		= { .type = NLA_U8 },
+	[BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED]	= { .type = NLA_U8 },
+	[BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED]	= { .type = NLA_U8 },
+	[BATADV_ATTR_FRAGMENTATION_ENABLED]	= { .type = NLA_U8 },
+	[BATADV_ATTR_GW_BANDWIDTH_DOWN]		= { .type = NLA_U32 },
+	[BATADV_ATTR_GW_BANDWIDTH_UP]		= { .type = NLA_U32 },
+	[BATADV_ATTR_GW_MODE]			= { .type = NLA_U8 },
+	[BATADV_ATTR_GW_SEL_CLASS]		= { .type = NLA_U32 },
+	[BATADV_ATTR_HOP_PENALTY]		= { .type = NLA_U8 },
+	[BATADV_ATTR_LOG_LEVEL]			= { .type = NLA_U32 },
+	[BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED]	= { .type = NLA_U8 },
+	[BATADV_ATTR_MULTICAST_FANOUT]		= { .type = NLA_U32 },
+	[BATADV_ATTR_NETWORK_CODING_ENABLED]	= { .type = NLA_U8 },
+	[BATADV_ATTR_ORIG_INTERVAL]		= { .type = NLA_U32 },
+	[BATADV_ATTR_ELP_INTERVAL]		= { .type = NLA_U32 },
+	[BATADV_ATTR_THROUGHPUT_OVERRIDE]	= { .type = NLA_U32 },
 };
 
 /**
@@ -118,24 +164,79 @@
 {
 	struct nlattr *attr = nlmsg_find_attr(nlh, GENL_HDRLEN, attrtype);
 
-	return attr ? nla_get_u32(attr) : 0;
+	return (attr && nla_len(attr) == sizeof(u32)) ? nla_get_u32(attr) : 0;
 }
 
 /**
- * batadv_netlink_mesh_info_put() - fill in generic information about mesh
- *  interface
- * @msg: netlink message to be sent back
- * @soft_iface: interface for which the data should be taken
+ * batadv_netlink_mesh_fill_ap_isolation() - Add ap_isolation softif attribute
+ * @msg: Netlink message to dump into
+ * @bat_priv: the bat priv with all the soft interface information
  *
- * Return: 0 on success, < 0 on error
+ * Return: 0 on success or negative error number in case of failure
  */
-static int
-batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
+static int batadv_netlink_mesh_fill_ap_isolation(struct sk_buff *msg,
+						 struct batadv_priv *bat_priv)
 {
-	struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+	struct batadv_softif_vlan *vlan;
+	u8 ap_isolation;
+
+	vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
+	if (!vlan)
+		return 0;
+
+	ap_isolation = atomic_read(&vlan->ap_isolation);
+	batadv_softif_vlan_put(vlan);
+
+	return nla_put_u8(msg, BATADV_ATTR_AP_ISOLATION_ENABLED,
+			  !!ap_isolation);
+}
+
+/**
+ * batadv_option_set_ap_isolation() - Set ap_isolation from genl msg
+ * @attr: parsed BATADV_ATTR_AP_ISOLATION_ENABLED attribute
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_set_mesh_ap_isolation(struct nlattr *attr,
+						struct batadv_priv *bat_priv)
+{
+	struct batadv_softif_vlan *vlan;
+
+	vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
+	if (!vlan)
+		return -ENOENT;
+
+	atomic_set(&vlan->ap_isolation, !!nla_get_u8(attr));
+	batadv_softif_vlan_put(vlan);
+
+	return 0;
+}
+
+/**
+ * batadv_netlink_mesh_fill() - Fill message with mesh attributes
+ * @msg: Netlink message to dump into
+ * @bat_priv: the bat priv with all the soft interface information
+ * @cmd: type of message to generate
+ * @portid: Port making netlink request
+ * @seq: sequence number for message
+ * @flags: Additional flags for message
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_mesh_fill(struct sk_buff *msg,
+				    struct batadv_priv *bat_priv,
+				    enum batadv_nl_commands cmd,
+				    u32 portid, u32 seq, int flags)
+{
+	struct net_device *soft_iface = bat_priv->soft_iface;
 	struct batadv_hard_iface *primary_if = NULL;
 	struct net_device *hard_iface;
-	int ret = -ENOBUFS;
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd);
+	if (!hdr)
+		return -ENOBUFS;
 
 	if (nla_put_string(msg, BATADV_ATTR_VERSION, BATADV_SOURCE_VERSION) ||
 	    nla_put_string(msg, BATADV_ATTR_ALGO_NAME,
@@ -146,16 +247,16 @@
 		    soft_iface->dev_addr) ||
 	    nla_put_u8(msg, BATADV_ATTR_TT_TTVN,
 		       (u8)atomic_read(&bat_priv->tt.vn)))
-		goto out;
+		goto nla_put_failure;
 
 #ifdef CONFIG_BATMAN_ADV_BLA
 	if (nla_put_u16(msg, BATADV_ATTR_BLA_CRC,
 			ntohs(bat_priv->bla.claim_dest.group)))
-		goto out;
+		goto nla_put_failure;
 #endif
 
 	if (batadv_mcast_mesh_info_put(msg, bat_priv))
-		goto out;
+		goto nla_put_failure;
 
 	primary_if = batadv_primary_if_get_selected(bat_priv);
 	if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) {
@@ -167,77 +268,355 @@
 				   hard_iface->name) ||
 		    nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN,
 			    hard_iface->dev_addr))
-			goto out;
+			goto nla_put_failure;
 	}
 
-	ret = 0;
+	if (nla_put_u8(msg, BATADV_ATTR_AGGREGATED_OGMS_ENABLED,
+		       !!atomic_read(&bat_priv->aggregated_ogms)))
+		goto nla_put_failure;
 
- out:
+	if (batadv_netlink_mesh_fill_ap_isolation(msg, bat_priv))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, BATADV_ATTR_ISOLATION_MARK,
+			bat_priv->isolation_mark))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, BATADV_ATTR_ISOLATION_MASK,
+			bat_priv->isolation_mark_mask))
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, BATADV_ATTR_BONDING_ENABLED,
+		       !!atomic_read(&bat_priv->bonding)))
+		goto nla_put_failure;
+
+#ifdef CONFIG_BATMAN_ADV_BLA
+	if (nla_put_u8(msg, BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED,
+		       !!atomic_read(&bat_priv->bridge_loop_avoidance)))
+		goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_BLA */
+
+#ifdef CONFIG_BATMAN_ADV_DAT
+	if (nla_put_u8(msg, BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED,
+		       !!atomic_read(&bat_priv->distributed_arp_table)))
+		goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_DAT */
+
+	if (nla_put_u8(msg, BATADV_ATTR_FRAGMENTATION_ENABLED,
+		       !!atomic_read(&bat_priv->fragmentation)))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, BATADV_ATTR_GW_BANDWIDTH_DOWN,
+			atomic_read(&bat_priv->gw.bandwidth_down)))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, BATADV_ATTR_GW_BANDWIDTH_UP,
+			atomic_read(&bat_priv->gw.bandwidth_up)))
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, BATADV_ATTR_GW_MODE,
+		       atomic_read(&bat_priv->gw.mode)))
+		goto nla_put_failure;
+
+	if (bat_priv->algo_ops->gw.get_best_gw_node &&
+	    bat_priv->algo_ops->gw.is_eligible) {
+		/* GW selection class is not available if the routing algorithm
+		 * in use does not implement the GW API
+		 */
+		if (nla_put_u32(msg, BATADV_ATTR_GW_SEL_CLASS,
+				atomic_read(&bat_priv->gw.sel_class)))
+			goto nla_put_failure;
+	}
+
+	if (nla_put_u8(msg, BATADV_ATTR_HOP_PENALTY,
+		       atomic_read(&bat_priv->hop_penalty)))
+		goto nla_put_failure;
+
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+	if (nla_put_u32(msg, BATADV_ATTR_LOG_LEVEL,
+			atomic_read(&bat_priv->log_level)))
+		goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_DEBUG */
+
+#ifdef CONFIG_BATMAN_ADV_MCAST
+	if (nla_put_u8(msg, BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED,
+		       !atomic_read(&bat_priv->multicast_mode)))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, BATADV_ATTR_MULTICAST_FANOUT,
+			atomic_read(&bat_priv->multicast_fanout)))
+		goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_MCAST */
+
+#ifdef CONFIG_BATMAN_ADV_NC
+	if (nla_put_u8(msg, BATADV_ATTR_NETWORK_CODING_ENABLED,
+		       !!atomic_read(&bat_priv->network_coding)))
+		goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_NC */
+
+	if (nla_put_u32(msg, BATADV_ATTR_ORIG_INTERVAL,
+			atomic_read(&bat_priv->orig_interval)))
+		goto nla_put_failure;
+
 	if (primary_if)
 		batadv_hardif_put(primary_if);
 
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_netlink_notify_mesh() - send softif attributes to listener
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: 0 on success, < 0 on error
+ */
+int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv)
+{
+	struct sk_buff *msg;
+	int ret;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	ret = batadv_netlink_mesh_fill(msg, bat_priv, BATADV_CMD_SET_MESH,
+				       0, 0, 0);
+	if (ret < 0) {
+		nlmsg_free(msg);
+		return ret;
+	}
+
+	genlmsg_multicast_netns(&batadv_netlink_family,
+				dev_net(bat_priv->soft_iface), msg, 0,
+				BATADV_NL_MCGRP_CONFIG, GFP_KERNEL);
+
+	return 0;
+}
+
+/**
+ * batadv_netlink_get_mesh() - Get softif attributes
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_get_mesh(struct sk_buff *skb, struct genl_info *info)
+{
+	struct batadv_priv *bat_priv = info->user_ptr[0];
+	struct sk_buff *msg;
+	int ret;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	ret = batadv_netlink_mesh_fill(msg, bat_priv, BATADV_CMD_GET_MESH,
+				       info->snd_portid, info->snd_seq, 0);
+	if (ret < 0) {
+		nlmsg_free(msg);
+		return ret;
+	}
+
+	ret = genlmsg_reply(msg, info);
+
 	return ret;
 }
 
 /**
- * batadv_netlink_get_mesh_info() - handle incoming BATADV_CMD_GET_MESH_INFO
- *  netlink request
- * @skb: received netlink message
+ * batadv_netlink_set_mesh() - Set softif attributes
+ * @skb: Netlink message with request data
  * @info: receiver information
  *
- * Return: 0 on success, < 0 on error
+ * Return: 0 on success or negative error number in case of failure
  */
-static int
-batadv_netlink_get_mesh_info(struct sk_buff *skb, struct genl_info *info)
+static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
 {
-	struct net *net = genl_info_net(info);
-	struct net_device *soft_iface;
-	struct sk_buff *msg = NULL;
-	void *msg_head;
-	int ifindex;
-	int ret;
+	struct batadv_priv *bat_priv = info->user_ptr[0];
+	struct nlattr *attr;
 
-	if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
-		return -EINVAL;
+	if (info->attrs[BATADV_ATTR_AGGREGATED_OGMS_ENABLED]) {
+		attr = info->attrs[BATADV_ATTR_AGGREGATED_OGMS_ENABLED];
 
-	ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
-	if (!ifindex)
-		return -EINVAL;
-
-	soft_iface = dev_get_by_index(net, ifindex);
-	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
-		ret = -ENODEV;
-		goto out;
+		atomic_set(&bat_priv->aggregated_ogms, !!nla_get_u8(attr));
 	}
 
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg) {
-		ret = -ENOMEM;
-		goto out;
+	if (info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]) {
+		attr = info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED];
+
+		batadv_netlink_set_mesh_ap_isolation(attr, bat_priv);
 	}
 
-	msg_head = genlmsg_put(msg, info->snd_portid, info->snd_seq,
-			       &batadv_netlink_family, 0,
-			       BATADV_CMD_GET_MESH_INFO);
-	if (!msg_head) {
-		ret = -ENOBUFS;
-		goto out;
+	if (info->attrs[BATADV_ATTR_ISOLATION_MARK]) {
+		attr = info->attrs[BATADV_ATTR_ISOLATION_MARK];
+
+		bat_priv->isolation_mark = nla_get_u32(attr);
 	}
 
-	ret = batadv_netlink_mesh_info_put(msg, soft_iface);
+	if (info->attrs[BATADV_ATTR_ISOLATION_MASK]) {
+		attr = info->attrs[BATADV_ATTR_ISOLATION_MASK];
 
- out:
-	if (soft_iface)
-		dev_put(soft_iface);
-
-	if (ret) {
-		if (msg)
-			nlmsg_free(msg);
-		return ret;
+		bat_priv->isolation_mark_mask = nla_get_u32(attr);
 	}
 
-	genlmsg_end(msg, msg_head);
-	return genlmsg_reply(msg, info);
+	if (info->attrs[BATADV_ATTR_BONDING_ENABLED]) {
+		attr = info->attrs[BATADV_ATTR_BONDING_ENABLED];
+
+		atomic_set(&bat_priv->bonding, !!nla_get_u8(attr));
+	}
+
+#ifdef CONFIG_BATMAN_ADV_BLA
+	if (info->attrs[BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED]) {
+		attr = info->attrs[BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED];
+
+		atomic_set(&bat_priv->bridge_loop_avoidance,
+			   !!nla_get_u8(attr));
+		batadv_bla_status_update(bat_priv->soft_iface);
+	}
+#endif /* CONFIG_BATMAN_ADV_BLA */
+
+#ifdef CONFIG_BATMAN_ADV_DAT
+	if (info->attrs[BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED]) {
+		attr = info->attrs[BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED];
+
+		atomic_set(&bat_priv->distributed_arp_table,
+			   !!nla_get_u8(attr));
+		batadv_dat_status_update(bat_priv->soft_iface);
+	}
+#endif /* CONFIG_BATMAN_ADV_DAT */
+
+	if (info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED]) {
+		attr = info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED];
+
+		atomic_set(&bat_priv->fragmentation, !!nla_get_u8(attr));
+		batadv_update_min_mtu(bat_priv->soft_iface);
+	}
+
+	if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN]) {
+		attr = info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN];
+
+		atomic_set(&bat_priv->gw.bandwidth_down, nla_get_u32(attr));
+		batadv_gw_tvlv_container_update(bat_priv);
+	}
+
+	if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_UP]) {
+		attr = info->attrs[BATADV_ATTR_GW_BANDWIDTH_UP];
+
+		atomic_set(&bat_priv->gw.bandwidth_up, nla_get_u32(attr));
+		batadv_gw_tvlv_container_update(bat_priv);
+	}
+
+	if (info->attrs[BATADV_ATTR_GW_MODE]) {
+		u8 gw_mode;
+
+		attr = info->attrs[BATADV_ATTR_GW_MODE];
+		gw_mode = nla_get_u8(attr);
+
+		if (gw_mode <= BATADV_GW_MODE_SERVER) {
+			/* Invoking batadv_gw_reselect() is not enough to really
+			 * de-select the current GW. It will only instruct the
+			 * gateway client code to perform a re-election the next
+			 * time that this is needed.
+			 *
+			 * When gw client mode is being switched off the current
+			 * GW must be de-selected explicitly otherwise no GW_ADD
+			 * uevent is thrown on client mode re-activation. This
+			 * is operation is performed in
+			 * batadv_gw_check_client_stop().
+			 */
+			batadv_gw_reselect(bat_priv);
+
+			/* always call batadv_gw_check_client_stop() before
+			 * changing the gateway state
+			 */
+			batadv_gw_check_client_stop(bat_priv);
+			atomic_set(&bat_priv->gw.mode, gw_mode);
+			batadv_gw_tvlv_container_update(bat_priv);
+		}
+	}
+
+	if (info->attrs[BATADV_ATTR_GW_SEL_CLASS] &&
+	    bat_priv->algo_ops->gw.get_best_gw_node &&
+	    bat_priv->algo_ops->gw.is_eligible) {
+		/* setting the GW selection class is allowed only if the routing
+		 * algorithm in use implements the GW API
+		 */
+
+		u32 sel_class_max = 0xffffffffu;
+		u32 sel_class;
+
+		attr = info->attrs[BATADV_ATTR_GW_SEL_CLASS];
+		sel_class = nla_get_u32(attr);
+
+		if (!bat_priv->algo_ops->gw.store_sel_class)
+			sel_class_max = BATADV_TQ_MAX_VALUE;
+
+		if (sel_class >= 1 && sel_class <= sel_class_max) {
+			atomic_set(&bat_priv->gw.sel_class, sel_class);
+			batadv_gw_reselect(bat_priv);
+		}
+	}
+
+	if (info->attrs[BATADV_ATTR_HOP_PENALTY]) {
+		attr = info->attrs[BATADV_ATTR_HOP_PENALTY];
+
+		atomic_set(&bat_priv->hop_penalty, nla_get_u8(attr));
+	}
+
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+	if (info->attrs[BATADV_ATTR_LOG_LEVEL]) {
+		attr = info->attrs[BATADV_ATTR_LOG_LEVEL];
+
+		atomic_set(&bat_priv->log_level,
+			   nla_get_u32(attr) & BATADV_DBG_ALL);
+	}
+#endif /* CONFIG_BATMAN_ADV_DEBUG */
+
+#ifdef CONFIG_BATMAN_ADV_MCAST
+	if (info->attrs[BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED]) {
+		attr = info->attrs[BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED];
+
+		atomic_set(&bat_priv->multicast_mode, !nla_get_u8(attr));
+	}
+
+	if (info->attrs[BATADV_ATTR_MULTICAST_FANOUT]) {
+		attr = info->attrs[BATADV_ATTR_MULTICAST_FANOUT];
+
+		atomic_set(&bat_priv->multicast_fanout, nla_get_u32(attr));
+	}
+#endif /* CONFIG_BATMAN_ADV_MCAST */
+
+#ifdef CONFIG_BATMAN_ADV_NC
+	if (info->attrs[BATADV_ATTR_NETWORK_CODING_ENABLED]) {
+		attr = info->attrs[BATADV_ATTR_NETWORK_CODING_ENABLED];
+
+		atomic_set(&bat_priv->network_coding, !!nla_get_u8(attr));
+		batadv_nc_status_update(bat_priv->soft_iface);
+	}
+#endif /* CONFIG_BATMAN_ADV_NC */
+
+	if (info->attrs[BATADV_ATTR_ORIG_INTERVAL]) {
+		u32 orig_interval;
+
+		attr = info->attrs[BATADV_ATTR_ORIG_INTERVAL];
+		orig_interval = nla_get_u32(attr);
+
+		orig_interval = min_t(u32, orig_interval, INT_MAX);
+		orig_interval = max_t(u32, orig_interval, 2 * BATADV_JITTER);
+
+		atomic_set(&bat_priv->orig_interval, orig_interval);
+	}
+
+	batadv_netlink_notify_mesh(bat_priv);
+
+	return 0;
 }
 
 /**
@@ -329,40 +708,24 @@
 static int
 batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info)
 {
-	struct net *net = genl_info_net(info);
-	struct net_device *soft_iface;
-	struct batadv_priv *bat_priv;
+	struct batadv_priv *bat_priv = info->user_ptr[0];
 	struct sk_buff *msg = NULL;
 	u32 test_length;
 	void *msg_head;
-	int ifindex;
 	u32 cookie;
 	u8 *dst;
 	int ret;
 
-	if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
-		return -EINVAL;
-
 	if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS])
 		return -EINVAL;
 
 	if (!info->attrs[BATADV_ATTR_TPMETER_TEST_TIME])
 		return -EINVAL;
 
-	ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
-	if (!ifindex)
-		return -EINVAL;
-
 	dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]);
 
 	test_length = nla_get_u32(info->attrs[BATADV_ATTR_TPMETER_TEST_TIME]);
 
-	soft_iface = dev_get_by_index(net, ifindex);
-	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
-		ret = -ENODEV;
-		goto out;
-	}
-
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!msg) {
 		ret = -ENOMEM;
@@ -377,15 +740,11 @@
 		goto out;
 	}
 
-	bat_priv = netdev_priv(soft_iface);
 	batadv_tp_start(bat_priv, dst, test_length, &cookie);
 
 	ret = batadv_netlink_tp_meter_put(msg, cookie);
 
  out:
-	if (soft_iface)
-		dev_put(soft_iface);
-
 	if (ret) {
 		if (msg)
 			nlmsg_free(msg);
@@ -406,61 +765,53 @@
 static int
 batadv_netlink_tp_meter_cancel(struct sk_buff *skb, struct genl_info *info)
 {
-	struct net *net = genl_info_net(info);
-	struct net_device *soft_iface;
-	struct batadv_priv *bat_priv;
-	int ifindex;
+	struct batadv_priv *bat_priv = info->user_ptr[0];
 	u8 *dst;
 	int ret = 0;
 
-	if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
-		return -EINVAL;
-
 	if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS])
 		return -EINVAL;
 
-	ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
-	if (!ifindex)
-		return -EINVAL;
-
 	dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]);
 
-	soft_iface = dev_get_by_index(net, ifindex);
-	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
-		ret = -ENODEV;
-		goto out;
-	}
-
-	bat_priv = netdev_priv(soft_iface);
 	batadv_tp_stop(bat_priv, dst, BATADV_TP_REASON_CANCEL);
 
-out:
-	if (soft_iface)
-		dev_put(soft_iface);
-
 	return ret;
 }
 
 /**
- * batadv_netlink_dump_hardif_entry() - Dump one hard interface into a message
+ * batadv_netlink_hardif_fill() - Fill message with hardif attributes
  * @msg: Netlink message to dump into
+ * @bat_priv: the bat priv with all the soft interface information
+ * @hard_iface: hard interface which was modified
+ * @cmd: type of message to generate
  * @portid: Port making netlink request
- * @seq: Sequence number of netlink message
- * @hard_iface: Hard interface to dump
+ * @seq: sequence number for message
+ * @flags: Additional flags for message
+ * @cb: Control block containing additional options
  *
- * Return: error code, or 0 on success
+ * Return: 0 on success or negative error number in case of failure
  */
-static int
-batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, u32 seq,
-				 struct batadv_hard_iface *hard_iface)
+static int batadv_netlink_hardif_fill(struct sk_buff *msg,
+				      struct batadv_priv *bat_priv,
+				      struct batadv_hard_iface *hard_iface,
+				      enum batadv_nl_commands cmd,
+				      u32 portid, u32 seq, int flags,
+				      struct netlink_callback *cb)
 {
 	struct net_device *net_dev = hard_iface->net_dev;
 	void *hdr;
 
-	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI,
-			  BATADV_CMD_GET_HARDIFS);
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd);
 	if (!hdr)
-		return -EMSGSIZE;
+		return -ENOBUFS;
+
+	if (cb)
+		genl_dump_check_consistent(cb, hdr);
+
+	if (nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX,
+			bat_priv->soft_iface->ifindex))
+		goto nla_put_failure;
 
 	if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
 			net_dev->ifindex) ||
@@ -475,30 +826,139 @@
 			goto nla_put_failure;
 	}
 
+#ifdef CONFIG_BATMAN_ADV_BATMAN_V
+	if (nla_put_u32(msg, BATADV_ATTR_ELP_INTERVAL,
+			atomic_read(&hard_iface->bat_v.elp_interval)))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, BATADV_ATTR_THROUGHPUT_OVERRIDE,
+			atomic_read(&hard_iface->bat_v.throughput_override)))
+		goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_BATMAN_V */
+
 	genlmsg_end(msg, hdr);
 	return 0;
 
- nla_put_failure:
+nla_put_failure:
 	genlmsg_cancel(msg, hdr);
 	return -EMSGSIZE;
 }
 
 /**
- * batadv_netlink_dump_hardifs() - Dump all hard interface into a messages
+ * batadv_netlink_notify_hardif() - send hardif attributes to listener
+ * @bat_priv: the bat priv with all the soft interface information
+ * @hard_iface: hard interface which was modified
+ *
+ * Return: 0 on success, < 0 on error
+ */
+int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv,
+				 struct batadv_hard_iface *hard_iface)
+{
+	struct sk_buff *msg;
+	int ret;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	ret = batadv_netlink_hardif_fill(msg, bat_priv, hard_iface,
+					 BATADV_CMD_SET_HARDIF, 0, 0, 0, NULL);
+	if (ret < 0) {
+		nlmsg_free(msg);
+		return ret;
+	}
+
+	genlmsg_multicast_netns(&batadv_netlink_family,
+				dev_net(bat_priv->soft_iface), msg, 0,
+				BATADV_NL_MCGRP_CONFIG, GFP_KERNEL);
+
+	return 0;
+}
+
+/**
+ * batadv_netlink_get_hardif() - Get hardif attributes
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_get_hardif(struct sk_buff *skb,
+				     struct genl_info *info)
+{
+	struct batadv_hard_iface *hard_iface = info->user_ptr[1];
+	struct batadv_priv *bat_priv = info->user_ptr[0];
+	struct sk_buff *msg;
+	int ret;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	ret = batadv_netlink_hardif_fill(msg, bat_priv, hard_iface,
+					 BATADV_CMD_GET_HARDIF,
+					 info->snd_portid, info->snd_seq, 0,
+					 NULL);
+	if (ret < 0) {
+		nlmsg_free(msg);
+		return ret;
+	}
+
+	ret = genlmsg_reply(msg, info);
+
+	return ret;
+}
+
+/**
+ * batadv_netlink_set_hardif() - Set hardif attributes
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_set_hardif(struct sk_buff *skb,
+				     struct genl_info *info)
+{
+	struct batadv_hard_iface *hard_iface = info->user_ptr[1];
+	struct batadv_priv *bat_priv = info->user_ptr[0];
+
+#ifdef CONFIG_BATMAN_ADV_BATMAN_V
+	struct nlattr *attr;
+
+	if (info->attrs[BATADV_ATTR_ELP_INTERVAL]) {
+		attr = info->attrs[BATADV_ATTR_ELP_INTERVAL];
+
+		atomic_set(&hard_iface->bat_v.elp_interval, nla_get_u32(attr));
+	}
+
+	if (info->attrs[BATADV_ATTR_THROUGHPUT_OVERRIDE]) {
+		attr = info->attrs[BATADV_ATTR_THROUGHPUT_OVERRIDE];
+
+		atomic_set(&hard_iface->bat_v.throughput_override,
+			   nla_get_u32(attr));
+	}
+#endif /* CONFIG_BATMAN_ADV_BATMAN_V */
+
+	batadv_netlink_notify_hardif(bat_priv, hard_iface);
+
+	return 0;
+}
+
+/**
+ * batadv_netlink_dump_hardif() - Dump all hard interface into a messages
  * @msg: Netlink message to dump into
  * @cb: Parameters from query
  *
  * Return: error code, or length of reply message on success
  */
 static int
-batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
+batadv_netlink_dump_hardif(struct sk_buff *msg, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(cb->skb->sk);
 	struct net_device *soft_iface;
 	struct batadv_hard_iface *hard_iface;
+	struct batadv_priv *bat_priv;
 	int ifindex;
 	int portid = NETLINK_CB(cb->skb).portid;
-	int seq = cb->nlh->nlmsg_seq;
 	int skip = cb->args[0];
 	int i = 0;
 
@@ -516,23 +976,28 @@
 		return -ENODEV;
 	}
 
-	rcu_read_lock();
+	bat_priv = netdev_priv(soft_iface);
 
-	list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+	rtnl_lock();
+	cb->seq = batadv_hardif_generation << 1 | 1;
+
+	list_for_each_entry(hard_iface, &batadv_hardif_list, list) {
 		if (hard_iface->soft_iface != soft_iface)
 			continue;
 
 		if (i++ < skip)
 			continue;
 
-		if (batadv_netlink_dump_hardif_entry(msg, portid, seq,
-						     hard_iface)) {
+		if (batadv_netlink_hardif_fill(msg, bat_priv, hard_iface,
+					       BATADV_CMD_GET_HARDIF,
+					       portid, cb->nlh->nlmsg_seq,
+					       NLM_F_MULTI, cb)) {
 			i--;
 			break;
 		}
 	}
 
-	rcu_read_unlock();
+	rtnl_unlock();
 
 	dev_put(soft_iface);
 
@@ -541,92 +1006,462 @@
 	return msg->len;
 }
 
+/**
+ * batadv_netlink_vlan_fill() - Fill message with vlan attributes
+ * @msg: Netlink message to dump into
+ * @bat_priv: the bat priv with all the soft interface information
+ * @vlan: vlan which was modified
+ * @cmd: type of message to generate
+ * @portid: Port making netlink request
+ * @seq: sequence number for message
+ * @flags: Additional flags for message
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_vlan_fill(struct sk_buff *msg,
+				    struct batadv_priv *bat_priv,
+				    struct batadv_softif_vlan *vlan,
+				    enum batadv_nl_commands cmd,
+				    u32 portid, u32 seq, int flags)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX,
+			bat_priv->soft_iface->ifindex))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, BATADV_ATTR_VLANID, vlan->vid & VLAN_VID_MASK))
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, BATADV_ATTR_AP_ISOLATION_ENABLED,
+		       !!atomic_read(&vlan->ap_isolation)))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_netlink_notify_vlan() - send vlan attributes to listener
+ * @bat_priv: the bat priv with all the soft interface information
+ * @vlan: vlan which was modified
+ *
+ * Return: 0 on success, < 0 on error
+ */
+int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv,
+			       struct batadv_softif_vlan *vlan)
+{
+	struct sk_buff *msg;
+	int ret;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	ret = batadv_netlink_vlan_fill(msg, bat_priv, vlan,
+				       BATADV_CMD_SET_VLAN, 0, 0, 0);
+	if (ret < 0) {
+		nlmsg_free(msg);
+		return ret;
+	}
+
+	genlmsg_multicast_netns(&batadv_netlink_family,
+				dev_net(bat_priv->soft_iface), msg, 0,
+				BATADV_NL_MCGRP_CONFIG, GFP_KERNEL);
+
+	return 0;
+}
+
+/**
+ * batadv_netlink_get_vlan() - Get vlan attributes
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_get_vlan(struct sk_buff *skb, struct genl_info *info)
+{
+	struct batadv_softif_vlan *vlan = info->user_ptr[1];
+	struct batadv_priv *bat_priv = info->user_ptr[0];
+	struct sk_buff *msg;
+	int ret;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	ret = batadv_netlink_vlan_fill(msg, bat_priv, vlan, BATADV_CMD_GET_VLAN,
+				       info->snd_portid, info->snd_seq, 0);
+	if (ret < 0) {
+		nlmsg_free(msg);
+		return ret;
+	}
+
+	ret = genlmsg_reply(msg, info);
+
+	return ret;
+}
+
+/**
+ * batadv_netlink_set_vlan() - Get vlan attributes
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_set_vlan(struct sk_buff *skb, struct genl_info *info)
+{
+	struct batadv_softif_vlan *vlan = info->user_ptr[1];
+	struct batadv_priv *bat_priv = info->user_ptr[0];
+	struct nlattr *attr;
+
+	if (info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]) {
+		attr = info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED];
+
+		atomic_set(&vlan->ap_isolation, !!nla_get_u8(attr));
+	}
+
+	batadv_netlink_notify_vlan(bat_priv, vlan);
+
+	return 0;
+}
+
+/**
+ * batadv_get_softif_from_info() - Retrieve soft interface from genl attributes
+ * @net: the applicable net namespace
+ * @info: receiver information
+ *
+ * Return: Pointer to soft interface (with increased refcnt) on success, error
+ *  pointer on error
+ */
+static struct net_device *
+batadv_get_softif_from_info(struct net *net, struct genl_info *info)
+{
+	struct net_device *soft_iface;
+	int ifindex;
+
+	if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
+		return ERR_PTR(-EINVAL);
+
+	ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface)
+		return ERR_PTR(-ENODEV);
+
+	if (!batadv_softif_is_valid(soft_iface))
+		goto err_put_softif;
+
+	return soft_iface;
+
+err_put_softif:
+	dev_put(soft_iface);
+
+	return ERR_PTR(-EINVAL);
+}
+
+/**
+ * batadv_get_hardif_from_info() - Retrieve hardif from genl attributes
+ * @bat_priv: the bat priv with all the soft interface information
+ * @net: the applicable net namespace
+ * @info: receiver information
+ *
+ * Return: Pointer to hard interface (with increased refcnt) on success, error
+ *  pointer on error
+ */
+static struct batadv_hard_iface *
+batadv_get_hardif_from_info(struct batadv_priv *bat_priv, struct net *net,
+			    struct genl_info *info)
+{
+	struct batadv_hard_iface *hard_iface;
+	struct net_device *hard_dev;
+	unsigned int hardif_index;
+
+	if (!info->attrs[BATADV_ATTR_HARD_IFINDEX])
+		return ERR_PTR(-EINVAL);
+
+	hardif_index = nla_get_u32(info->attrs[BATADV_ATTR_HARD_IFINDEX]);
+
+	hard_dev = dev_get_by_index(net, hardif_index);
+	if (!hard_dev)
+		return ERR_PTR(-ENODEV);
+
+	hard_iface = batadv_hardif_get_by_netdev(hard_dev);
+	if (!hard_iface)
+		goto err_put_harddev;
+
+	if (hard_iface->soft_iface != bat_priv->soft_iface)
+		goto err_put_hardif;
+
+	/* hard_dev is referenced by hard_iface and not needed here */
+	dev_put(hard_dev);
+
+	return hard_iface;
+
+err_put_hardif:
+	batadv_hardif_put(hard_iface);
+err_put_harddev:
+	dev_put(hard_dev);
+
+	return ERR_PTR(-EINVAL);
+}
+
+/**
+ * batadv_get_vlan_from_info() - Retrieve vlan from genl attributes
+ * @bat_priv: the bat priv with all the soft interface information
+ * @net: the applicable net namespace
+ * @info: receiver information
+ *
+ * Return: Pointer to vlan on success (with increased refcnt), error pointer
+ *  on error
+ */
+static struct batadv_softif_vlan *
+batadv_get_vlan_from_info(struct batadv_priv *bat_priv, struct net *net,
+			  struct genl_info *info)
+{
+	struct batadv_softif_vlan *vlan;
+	u16 vid;
+
+	if (!info->attrs[BATADV_ATTR_VLANID])
+		return ERR_PTR(-EINVAL);
+
+	vid = nla_get_u16(info->attrs[BATADV_ATTR_VLANID]);
+
+	vlan = batadv_softif_vlan_get(bat_priv, vid | BATADV_VLAN_HAS_TAG);
+	if (!vlan)
+		return ERR_PTR(-ENOENT);
+
+	return vlan;
+}
+
+/**
+ * batadv_pre_doit() - Prepare batman-adv genl doit request
+ * @ops: requested netlink operation
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
+			   struct genl_info *info)
+{
+	struct net *net = genl_info_net(info);
+	struct batadv_hard_iface *hard_iface;
+	struct batadv_priv *bat_priv = NULL;
+	struct batadv_softif_vlan *vlan;
+	struct net_device *soft_iface;
+	u8 user_ptr1_flags;
+	u8 mesh_dep_flags;
+	int ret;
+
+	user_ptr1_flags = BATADV_FLAG_NEED_HARDIF | BATADV_FLAG_NEED_VLAN;
+	if (WARN_ON(hweight8(ops->internal_flags & user_ptr1_flags) > 1))
+		return -EINVAL;
+
+	mesh_dep_flags = BATADV_FLAG_NEED_HARDIF | BATADV_FLAG_NEED_VLAN;
+	if (WARN_ON((ops->internal_flags & mesh_dep_flags) &&
+		    (~ops->internal_flags & BATADV_FLAG_NEED_MESH)))
+		return -EINVAL;
+
+	if (ops->internal_flags & BATADV_FLAG_NEED_MESH) {
+		soft_iface = batadv_get_softif_from_info(net, info);
+		if (IS_ERR(soft_iface))
+			return PTR_ERR(soft_iface);
+
+		bat_priv = netdev_priv(soft_iface);
+		info->user_ptr[0] = bat_priv;
+	}
+
+	if (ops->internal_flags & BATADV_FLAG_NEED_HARDIF) {
+		hard_iface = batadv_get_hardif_from_info(bat_priv, net, info);
+		if (IS_ERR(hard_iface)) {
+			ret = PTR_ERR(hard_iface);
+			goto err_put_softif;
+		}
+
+		info->user_ptr[1] = hard_iface;
+	}
+
+	if (ops->internal_flags & BATADV_FLAG_NEED_VLAN) {
+		vlan = batadv_get_vlan_from_info(bat_priv, net, info);
+		if (IS_ERR(vlan)) {
+			ret = PTR_ERR(vlan);
+			goto err_put_softif;
+		}
+
+		info->user_ptr[1] = vlan;
+	}
+
+	return 0;
+
+err_put_softif:
+	if (bat_priv)
+		dev_put(bat_priv->soft_iface);
+
+	return ret;
+}
+
+/**
+ * batadv_post_doit() - End batman-adv genl doit request
+ * @ops: requested netlink operation
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ */
+static void batadv_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
+			     struct genl_info *info)
+{
+	struct batadv_hard_iface *hard_iface;
+	struct batadv_softif_vlan *vlan;
+	struct batadv_priv *bat_priv;
+
+	if (ops->internal_flags & BATADV_FLAG_NEED_HARDIF &&
+	    info->user_ptr[1]) {
+		hard_iface = info->user_ptr[1];
+
+		batadv_hardif_put(hard_iface);
+	}
+
+	if (ops->internal_flags & BATADV_FLAG_NEED_VLAN && info->user_ptr[1]) {
+		vlan = info->user_ptr[1];
+		batadv_softif_vlan_put(vlan);
+	}
+
+	if (ops->internal_flags & BATADV_FLAG_NEED_MESH && info->user_ptr[0]) {
+		bat_priv = info->user_ptr[0];
+		dev_put(bat_priv->soft_iface);
+	}
+}
+
 static const struct genl_ops batadv_netlink_ops[] = {
 	{
-		.cmd = BATADV_CMD_GET_MESH_INFO,
-		.flags = GENL_ADMIN_PERM,
-		.policy = batadv_netlink_policy,
-		.doit = batadv_netlink_get_mesh_info,
+		.cmd = BATADV_CMD_GET_MESH,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		/* can be retrieved by unprivileged users */
+		.doit = batadv_netlink_get_mesh,
+		.internal_flags = BATADV_FLAG_NEED_MESH,
 	},
 	{
 		.cmd = BATADV_CMD_TP_METER,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM,
-		.policy = batadv_netlink_policy,
 		.doit = batadv_netlink_tp_meter_start,
+		.internal_flags = BATADV_FLAG_NEED_MESH,
 	},
 	{
 		.cmd = BATADV_CMD_TP_METER_CANCEL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM,
-		.policy = batadv_netlink_policy,
 		.doit = batadv_netlink_tp_meter_cancel,
+		.internal_flags = BATADV_FLAG_NEED_MESH,
 	},
 	{
 		.cmd = BATADV_CMD_GET_ROUTING_ALGOS,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM,
-		.policy = batadv_netlink_policy,
 		.dumpit = batadv_algo_dump,
 	},
 	{
-		.cmd = BATADV_CMD_GET_HARDIFS,
-		.flags = GENL_ADMIN_PERM,
-		.policy = batadv_netlink_policy,
-		.dumpit = batadv_netlink_dump_hardifs,
+		.cmd = BATADV_CMD_GET_HARDIF,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		/* can be retrieved by unprivileged users */
+		.dumpit = batadv_netlink_dump_hardif,
+		.doit = batadv_netlink_get_hardif,
+		.internal_flags = BATADV_FLAG_NEED_MESH |
+				  BATADV_FLAG_NEED_HARDIF,
 	},
 	{
 		.cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM,
-		.policy = batadv_netlink_policy,
 		.dumpit = batadv_tt_local_dump,
 	},
 	{
 		.cmd = BATADV_CMD_GET_TRANSTABLE_GLOBAL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM,
-		.policy = batadv_netlink_policy,
 		.dumpit = batadv_tt_global_dump,
 	},
 	{
 		.cmd = BATADV_CMD_GET_ORIGINATORS,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM,
-		.policy = batadv_netlink_policy,
 		.dumpit = batadv_orig_dump,
 	},
 	{
 		.cmd = BATADV_CMD_GET_NEIGHBORS,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM,
-		.policy = batadv_netlink_policy,
 		.dumpit = batadv_hardif_neigh_dump,
 	},
 	{
 		.cmd = BATADV_CMD_GET_GATEWAYS,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM,
-		.policy = batadv_netlink_policy,
 		.dumpit = batadv_gw_dump,
 	},
 	{
 		.cmd = BATADV_CMD_GET_BLA_CLAIM,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM,
-		.policy = batadv_netlink_policy,
 		.dumpit = batadv_bla_claim_dump,
 	},
 	{
 		.cmd = BATADV_CMD_GET_BLA_BACKBONE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM,
-		.policy = batadv_netlink_policy,
 		.dumpit = batadv_bla_backbone_dump,
 	},
 	{
 		.cmd = BATADV_CMD_GET_DAT_CACHE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM,
-		.policy = batadv_netlink_policy,
 		.dumpit = batadv_dat_cache_dump,
 	},
 	{
 		.cmd = BATADV_CMD_GET_MCAST_FLAGS,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM,
-		.policy = batadv_netlink_policy,
 		.dumpit = batadv_mcast_flags_dump,
 	},
-
+	{
+		.cmd = BATADV_CMD_SET_MESH,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags = GENL_ADMIN_PERM,
+		.doit = batadv_netlink_set_mesh,
+		.internal_flags = BATADV_FLAG_NEED_MESH,
+	},
+	{
+		.cmd = BATADV_CMD_SET_HARDIF,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags = GENL_ADMIN_PERM,
+		.doit = batadv_netlink_set_hardif,
+		.internal_flags = BATADV_FLAG_NEED_MESH |
+				  BATADV_FLAG_NEED_HARDIF,
+	},
+	{
+		.cmd = BATADV_CMD_GET_VLAN,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		/* can be retrieved by unprivileged users */
+		.doit = batadv_netlink_get_vlan,
+		.internal_flags = BATADV_FLAG_NEED_MESH |
+				  BATADV_FLAG_NEED_VLAN,
+	},
+	{
+		.cmd = BATADV_CMD_SET_VLAN,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags = GENL_ADMIN_PERM,
+		.doit = batadv_netlink_set_vlan,
+		.internal_flags = BATADV_FLAG_NEED_MESH |
+				  BATADV_FLAG_NEED_VLAN,
+	},
 };
 
 struct genl_family batadv_netlink_family __ro_after_init = {
@@ -634,7 +1469,10 @@
 	.name = BATADV_NL_NAME,
 	.version = 1,
 	.maxattr = BATADV_ATTR_MAX,
+	.policy = batadv_netlink_policy,
 	.netnsok = true,
+	.pre_doit = batadv_pre_doit,
+	.post_doit = batadv_post_doit,
 	.module = THIS_MODULE,
 	.ops = batadv_netlink_ops,
 	.n_ops = ARRAY_SIZE(batadv_netlink_ops),

diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
index 571d9a5..ddc674e 100644
--- a/net/batman-adv/netlink.h
+++ b/net/batman-adv/netlink.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2016-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2019  B.A.T.M.A.N. contributors:
  *
  * Matthias Schiffer
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_NETLINK_H_
@@ -21,11 +9,10 @@
 
 #include "main.h"
 
+#include <linux/netlink.h>
 #include <linux/types.h>
 #include <net/genetlink.h>
 
-struct nlmsghdr;
-
 void batadv_netlink_register(void);
 void batadv_netlink_unregister(void);
 int batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype);
@@ -34,6 +21,12 @@
 				  u8 result, u32 test_time, u64 total_bytes,
 				  u32 cookie);
 
+int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv);
+int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv,
+				 struct batadv_hard_iface *hard_iface);
+int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv,
+			       struct batadv_softif_vlan *vlan);
+
 extern struct genl_family batadv_netlink_family;
 
 #endif /* _NET_BATMAN_ADV_NETLINK_H_ */

diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 34caf12..5806093 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c

@@ -1,19 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2019  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll, Jeppe Ledet-Pedersen
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "network-coding.h"
@@ -1963,34 +1951,19 @@
 /**
  * batadv_nc_init_debugfs() - create nc folder and related files in debugfs
  * @bat_priv: the bat priv with all the soft interface information
- *
- * Return: 0 on success or negative error number in case of failure
  */
-int batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
+void batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
 {
-	struct dentry *nc_dir, *file;
+	struct dentry *nc_dir;
 
 	nc_dir = debugfs_create_dir("nc", bat_priv->debug_dir);
-	if (!nc_dir)
-		goto out;
 
-	file = debugfs_create_u8("min_tq", 0644, nc_dir, &bat_priv->nc.min_tq);
-	if (!file)
-		goto out;
+	debugfs_create_u8("min_tq", 0644, nc_dir, &bat_priv->nc.min_tq);
 
-	file = debugfs_create_u32("max_fwd_delay", 0644, nc_dir,
-				  &bat_priv->nc.max_fwd_delay);
-	if (!file)
-		goto out;
+	debugfs_create_u32("max_fwd_delay", 0644, nc_dir,
+			   &bat_priv->nc.max_fwd_delay);
 
-	file = debugfs_create_u32("max_buffer_time", 0644, nc_dir,
-				  &bat_priv->nc.max_buffer_time);
-	if (!file)
-		goto out;
-
-	return 0;
-
-out:
-	return -ENOMEM;
+	debugfs_create_u32("max_buffer_time", 0644, nc_dir,
+			   &bat_priv->nc.max_buffer_time);
 }
 #endif

diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index 65c3468..753fa49 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2019  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll, Jeppe Ledet-Pedersen
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_NETWORK_CODING_H_
@@ -21,12 +9,11 @@
 
 #include "main.h"
 
+#include <linux/netdevice.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/types.h>
-
-struct batadv_ogm_packet;
-struct net_device;
-struct seq_file;
-struct sk_buff;
+#include <uapi/linux/batadv_packet.h>
 
 #ifdef CONFIG_BATMAN_ADV_NC
 
@@ -52,7 +39,7 @@
 void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
 					 struct sk_buff *skb);
 int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset);
-int batadv_nc_init_debugfs(struct batadv_priv *bat_priv);
+void batadv_nc_init_debugfs(struct batadv_priv *bat_priv);
 
 #else /* ifdef CONFIG_BATMAN_ADV_NC */
 
@@ -123,9 +110,8 @@
 	return 0;
 }
 
-static inline int batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
+static inline void batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
 {
-	return 0;
 }
 
 #endif /* ifdef CONFIG_BATMAN_ADV_NC */

diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 1d295da..3861348 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c

@@ -1,19 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "originator.h"
@@ -39,6 +27,7 @@
 #include <linux/stddef.h>
 #include <linux/workqueue.h>
 #include <net/sock.h>
+#include <uapi/linux/batadv_packet.h>
 #include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
@@ -904,9 +893,6 @@
 
 	batadv_frag_purge_orig(orig_node, NULL);
 
-	if (orig_node->bat_priv->algo_ops->orig.free)
-		orig_node->bat_priv->algo_ops->orig.free(orig_node);
-
 	kfree(orig_node->tt_buff);
 	kfree(orig_node);
 }
@@ -1058,7 +1044,8 @@
 	orig_node->bcast_seqno_reset = reset_time;
 
 #ifdef CONFIG_BATMAN_ADV_MCAST
-	orig_node->mcast_flags = BATADV_NO_FLAGS;
+	orig_node->mcast_flags = BATADV_MCAST_WANT_NO_RTR4;
+	orig_node->mcast_flags |= BATADV_MCAST_WANT_NO_RTR6;
 	INIT_HLIST_NODE(&orig_node->mcast_want_all_unsnoopables_node);
 	INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv4_node);
 	INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv6_node);
@@ -1555,107 +1542,3 @@
 
 	return ret;
 }
-
-/**
- * batadv_orig_hash_add_if() - Add interface to originators in orig_hash
- * @hard_iface: hard interface to add (already slave of the soft interface)
- * @max_if_num: new number of interfaces
- *
- * Return: 0 on success or negative error number in case of failure
- */
-int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
-			    unsigned int max_if_num)
-{
-	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
-	struct batadv_algo_ops *bao = bat_priv->algo_ops;
-	struct batadv_hashtable *hash = bat_priv->orig_hash;
-	struct hlist_head *head;
-	struct batadv_orig_node *orig_node;
-	u32 i;
-	int ret;
-
-	/* resize all orig nodes because orig_node->bcast_own(_sum) depend on
-	 * if_num
-	 */
-	for (i = 0; i < hash->size; i++) {
-		head = &hash->table[i];
-
-		rcu_read_lock();
-		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
-			ret = 0;
-			if (bao->orig.add_if)
-				ret = bao->orig.add_if(orig_node, max_if_num);
-			if (ret == -ENOMEM)
-				goto err;
-		}
-		rcu_read_unlock();
-	}
-
-	return 0;
-
-err:
-	rcu_read_unlock();
-	return -ENOMEM;
-}
-
-/**
- * batadv_orig_hash_del_if() - Remove interface from originators in orig_hash
- * @hard_iface: hard interface to remove (still slave of the soft interface)
- * @max_if_num: new number of interfaces
- *
- * Return: 0 on success or negative error number in case of failure
- */
-int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
-			    unsigned int max_if_num)
-{
-	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
-	struct batadv_hashtable *hash = bat_priv->orig_hash;
-	struct hlist_head *head;
-	struct batadv_hard_iface *hard_iface_tmp;
-	struct batadv_orig_node *orig_node;
-	struct batadv_algo_ops *bao = bat_priv->algo_ops;
-	u32 i;
-	int ret;
-
-	/* resize all orig nodes because orig_node->bcast_own(_sum) depend on
-	 * if_num
-	 */
-	for (i = 0; i < hash->size; i++) {
-		head = &hash->table[i];
-
-		rcu_read_lock();
-		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
-			ret = 0;
-			if (bao->orig.del_if)
-				ret = bao->orig.del_if(orig_node, max_if_num,
-						       hard_iface->if_num);
-			if (ret == -ENOMEM)
-				goto err;
-		}
-		rcu_read_unlock();
-	}
-
-	/* renumber remaining batman interfaces _inside_ of orig_hash_lock */
-	rcu_read_lock();
-	list_for_each_entry_rcu(hard_iface_tmp, &batadv_hardif_list, list) {
-		if (hard_iface_tmp->if_status == BATADV_IF_NOT_IN_USE)
-			continue;
-
-		if (hard_iface == hard_iface_tmp)
-			continue;
-
-		if (hard_iface->soft_iface != hard_iface_tmp->soft_iface)
-			continue;
-
-		if (hard_iface_tmp->if_num > hard_iface->if_num)
-			hard_iface_tmp->if_num--;
-	}
-	rcu_read_unlock();
-
-	hard_iface->if_num = -1;
-	return 0;
-
-err:
-	rcu_read_unlock();
-	return -ENOMEM;
-}

diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 3b3f59b..512a1f9 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_ORIGINATOR_H_
@@ -24,12 +12,11 @@
 #include <linux/compiler.h>
 #include <linux/if_ether.h>
 #include <linux/jhash.h>
+#include <linux/netlink.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/types.h>
 
-struct netlink_callback;
-struct seq_file;
-struct sk_buff;
-
 bool batadv_compare_orig(const struct hlist_node *node, const void *data2);
 int batadv_originator_init(struct batadv_priv *bat_priv);
 void batadv_originator_free(struct batadv_priv *bat_priv);
@@ -72,10 +59,6 @@
 int batadv_orig_seq_print_text(struct seq_file *seq, void *offset);
 int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb);
 int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset);
-int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
-			    unsigned int max_if_num);
-int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
-			    unsigned int max_if_num);
 struct batadv_orig_node_vlan *
 batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
 			  unsigned short vid);

diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index cc3ed93..f0f8648 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c

@@ -1,19 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "routing.h"
@@ -1043,6 +1031,8 @@
 							hdr_size))
 			goto rx_success;
 
+		batadv_dat_snoop_incoming_dhcp_ack(bat_priv, skb, hdr_size);
+
 		batadv_interface_rx(recv_if->soft_iface, skb, hdr_size,
 				    orig_node);
 
@@ -1278,6 +1268,8 @@
 	if (batadv_dat_snoop_incoming_arp_reply(bat_priv, skb, hdr_size))
 		goto rx_success;
 
+	batadv_dat_snoop_incoming_dhcp_ack(bat_priv, skb, hdr_size);
+
 	/* broadcast for me */
 	batadv_interface_rx(recv_if->soft_iface, skb, hdr_size, orig_node);
 

diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index db54c2d..c20feac 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_ROUTING_H_
@@ -21,10 +9,9 @@
 
 #include "main.h"
 
+#include <linux/skbuff.h>
 #include <linux/types.h>
 
-struct sk_buff;
-
 bool batadv_check_management_packet(struct sk_buff *skb,
 				    struct batadv_hard_iface *hard_iface,
 				    int header_len);

diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 4a35f5c..3ce5f7b 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c

@@ -1,19 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "send.h"

diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 64cce07..5fc0fd1 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_SEND_H_
@@ -22,12 +10,11 @@
 #include "main.h"
 
 #include <linux/compiler.h>
+#include <linux/skbuff.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
 #include <uapi/linux/batadv_packet.h>
 
-struct sk_buff;
-
 void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet,
 			     bool dropped);
 struct batadv_forw_packet *

diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 626ddca..5ee8e9a 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c

@@ -1,19 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "soft-interface.h"
@@ -36,6 +24,7 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/percpu.h>
 #include <linux/printk.h>
 #include <linux/random.h>
@@ -50,13 +39,13 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <uapi/linux/batadv_packet.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
 #include "bridge_loop_avoidance.h"
 #include "debugfs.h"
 #include "distributed-arp-table.h"
 #include "gateway_client.h"
-#include "gateway_common.h"
 #include "hard-interface.h"
 #include "multicast.h"
 #include "network-coding.h"
@@ -209,9 +198,10 @@
 	unsigned short vid;
 	u32 seqno;
 	int gw_mode;
-	enum batadv_forw_mode forw_mode;
+	enum batadv_forw_mode forw_mode = BATADV_FORW_SINGLE;
 	struct batadv_orig_node *mcast_single_orig = NULL;
 	int network_offset = ETH_HLEN;
+	__be16 proto;
 
 	if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
 		goto dropped;
@@ -221,14 +211,21 @@
 
 	netif_trans_update(soft_iface);
 	vid = batadv_get_vid(skb, 0);
+
+	skb_reset_mac_header(skb);
 	ethhdr = eth_hdr(skb);
 
-	switch (ntohs(ethhdr->h_proto)) {
+	proto = ethhdr->h_proto;
+
+	switch (ntohs(proto)) {
 	case ETH_P_8021Q:
+		if (!pskb_may_pull(skb, sizeof(*vhdr)))
+			goto dropped;
 		vhdr = vlan_eth_hdr(skb);
+		proto = vhdr->h_vlan_encapsulated_proto;
 
 		/* drop batman-in-batman packets to prevent loops */
-		if (vhdr->h_vlan_encapsulated_proto != htons(ETH_P_BATMAN)) {
+		if (proto != htons(ETH_P_BATMAN)) {
 			network_offset += VLAN_HLEN;
 			break;
 		}
@@ -256,6 +253,9 @@
 			goto dropped;
 	}
 
+	/* Snoop address candidates from DHCPACKs for early DAT filling */
+	batadv_dat_snoop_outgoing_dhcp_ack(bat_priv, skb, proto, vid);
+
 	/* don't accept stp packets. STP does not help in meshes.
 	 * better use the bridge loop avoidance ...
 	 *
@@ -306,7 +306,8 @@
 			if (forw_mode == BATADV_FORW_NONE)
 				goto dropped;
 
-			if (forw_mode == BATADV_FORW_SINGLE)
+			if (forw_mode == BATADV_FORW_SINGLE ||
+			    forw_mode == BATADV_FORW_SOME)
 				do_bcast = false;
 		}
 	}
@@ -366,6 +367,8 @@
 			ret = batadv_send_skb_unicast(bat_priv, skb,
 						      BATADV_UNICAST, 0,
 						      mcast_single_orig, vid);
+		} else if (forw_mode == BATADV_FORW_SOME) {
+			ret = batadv_mcast_forw_send(bat_priv, skb, vid);
 		} else {
 			if (batadv_dat_snoop_outgoing_arp_request(bat_priv,
 								  skb))
@@ -433,7 +436,7 @@
 	/* clean the netfilter state now that the batman-adv header has been
 	 * removed
 	 */
-	nf_reset(skb);
+	nf_reset_ct(skb);
 
 	if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
 		goto dropped;
@@ -737,36 +740,6 @@
 	return 0;
 }
 
-/* batman-adv network devices have devices nesting below it and are a special
- * "super class" of normal network devices; split their locks off into a
- * separate class since they always nest.
- */
-static struct lock_class_key batadv_netdev_xmit_lock_key;
-static struct lock_class_key batadv_netdev_addr_lock_key;
-
-/**
- * batadv_set_lockdep_class_one() - Set lockdep class for a single tx queue
- * @dev: device which owns the tx queue
- * @txq: tx queue to modify
- * @_unused: always NULL
- */
-static void batadv_set_lockdep_class_one(struct net_device *dev,
-					 struct netdev_queue *txq,
-					 void *_unused)
-{
-	lockdep_set_class(&txq->_xmit_lock, &batadv_netdev_xmit_lock_key);
-}
-
-/**
- * batadv_set_lockdep_class() - Set txq and addr_list lockdep class
- * @dev: network device to modify
- */
-static void batadv_set_lockdep_class(struct net_device *dev)
-{
-	lockdep_set_class(&dev->addr_list_lock, &batadv_netdev_addr_lock_key);
-	netdev_for_each_tx_queue(dev, batadv_set_lockdep_class_one, NULL);
-}
-
 /**
  * batadv_softif_init_late() - late stage initialization of soft interface
  * @dev: registered network device to modify
@@ -780,8 +753,6 @@
 	int ret;
 	size_t cnt_len = sizeof(u64) * BATADV_CNT_NUM;
 
-	batadv_set_lockdep_class(dev);
-
 	bat_priv = netdev_priv(dev);
 	bat_priv->soft_iface = dev;
 
@@ -801,12 +772,8 @@
 	atomic_set(&bat_priv->distributed_arp_table, 1);
 #endif
 #ifdef CONFIG_BATMAN_ADV_MCAST
-	bat_priv->mcast.querier_ipv4.exists = false;
-	bat_priv->mcast.querier_ipv4.shadowing = false;
-	bat_priv->mcast.querier_ipv6.exists = false;
-	bat_priv->mcast.querier_ipv6.shadowing = false;
-	bat_priv->mcast.flags = BATADV_NO_FLAGS;
 	atomic_set(&bat_priv->multicast_mode, 1);
+	atomic_set(&bat_priv->multicast_fanout, 16);
 	atomic_set(&bat_priv->mcast.num_want_all_unsnoopables, 0);
 	atomic_set(&bat_priv->mcast.num_want_all_ipv4, 0);
 	atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0);
@@ -844,7 +811,6 @@
 	atomic_set(&bat_priv->frag_seqno, random_seqno);
 
 	bat_priv->primary_if = NULL;
-	bat_priv->num_ifaces = 0;
 
 	batadv_nc_init_bat_priv(bat_priv);
 
@@ -945,10 +911,10 @@
 static void batadv_get_drvinfo(struct net_device *dev,
 			       struct ethtool_drvinfo *info)
 {
-	strlcpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver));
-	strlcpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version));
-	strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
-	strlcpy(info->bus_info, "batman", sizeof(info->bus_info));
+	strscpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver));
+	strscpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version));
+	strscpy(info->fw_version, "N/A", sizeof(info->fw_version));
+	strscpy(info->bus_info, "batman", sizeof(info->bus_info));
 }
 
 /* Inspired by drivers/net/ethernet/dlink/sundance.c:1702
@@ -1062,6 +1028,7 @@
 	dev->needs_free_netdev = true;
 	dev->priv_destructor = batadv_softif_free;
 	dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_NETNS_LOCAL;
+	dev->features |= NETIF_F_LLTX;
 	dev->priv_flags |= IFF_NO_QUEUE;
 
 	/* can't call min_mtu, because the needed variables

diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index daf87f0..29139ad 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_SOFT_INTERFACE_H_
@@ -21,13 +9,12 @@
 
 #include "main.h"
 
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
 #include <linux/types.h>
+#include <net/net_namespace.h>
 #include <net/rtnetlink.h>
 
-struct net_device;
-struct net;
-struct sk_buff;
-
 int batadv_skb_head_push(struct sk_buff *skb, unsigned int len);
 void batadv_interface_rx(struct net_device *soft_iface,
 			 struct sk_buff *skb, int hdr_size,

diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 09427fc..e5bbc28 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c

@@ -1,24 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "sysfs.h"
 #include "main.h"
 
+#include <asm/current.h>
 #include <linux/atomic.h>
 #include <linux/compiler.h>
 #include <linux/device.h>
@@ -29,17 +18,20 @@
 #include <linux/kernel.h>
 #include <linux/kobject.h>
 #include <linux/kref.h>
+#include <linux/limits.h>
 #include <linux/netdevice.h>
 #include <linux/printk.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/rtnetlink.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/stringify.h>
 #include <linux/workqueue.h>
 #include <uapi/linux/batadv_packet.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bridge_loop_avoidance.h"
 #include "distributed-arp-table.h"
@@ -47,9 +39,20 @@
 #include "gateway_common.h"
 #include "hard-interface.h"
 #include "log.h"
+#include "netlink.h"
 #include "network-coding.h"
 #include "soft-interface.h"
 
+/**
+ * batadv_sysfs_deprecated() - Log use of deprecated batadv sysfs access
+ * @attr: attribute which was accessed
+ */
+static void batadv_sysfs_deprecated(struct attribute *attr)
+{
+	pr_warn_ratelimited(DEPRECATED "%s (pid %d) Use of sysfs file \"%s\".\nUse batadv genl family instead",
+			    current->comm, task_pid_nr(current), attr->name);
+}
+
 static struct net_device *batadv_kobj_to_netdev(struct kobject *obj)
 {
 	struct device *dev = container_of(obj->parent, struct device, kobj);
@@ -112,22 +115,6 @@
 	return vlan;
 }
 
-#define BATADV_UEV_TYPE_VAR	"BATTYPE="
-#define BATADV_UEV_ACTION_VAR	"BATACTION="
-#define BATADV_UEV_DATA_VAR	"BATDATA="
-
-static char *batadv_uev_action_str[] = {
-	"add",
-	"del",
-	"change",
-	"loopdetect",
-};
-
-static char *batadv_uev_type_str[] = {
-	"gw",
-	"bla",
-};
-
 /* Use this, if you have customized show and store functions for vlan attrs */
 #define BATADV_ATTR_VLAN(_name, _mode, _show, _store)	\
 struct batadv_attribute batadv_attr_vlan_##_name = {	\
@@ -153,9 +140,15 @@
 {									\
 	struct net_device *net_dev = batadv_kobj_to_netdev(kobj);	\
 	struct batadv_priv *bat_priv = netdev_priv(net_dev);		\
+	ssize_t length;							\
 									\
-	return __batadv_store_bool_attr(buff, count, _post_func, attr,	\
-					&bat_priv->_name, net_dev);	\
+	batadv_sysfs_deprecated(attr);					\
+	length = __batadv_store_bool_attr(buff, count, _post_func, attr,\
+					  &bat_priv->_name, net_dev);	\
+									\
+	batadv_netlink_notify_mesh(bat_priv);				\
+									\
+	return length;							\
 }
 
 #define BATADV_ATTR_SIF_SHOW_BOOL(_name)				\
@@ -164,6 +157,7 @@
 {									\
 	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);	\
 									\
+	batadv_sysfs_deprecated(attr);					\
 	return sprintf(buff, "%s\n",					\
 		       atomic_read(&bat_priv->_name) == 0 ?		\
 		       "disabled" : "enabled");				\
@@ -185,11 +179,17 @@
 {									\
 	struct net_device *net_dev = batadv_kobj_to_netdev(kobj);	\
 	struct batadv_priv *bat_priv = netdev_priv(net_dev);		\
+	ssize_t length;							\
 									\
-	return __batadv_store_uint_attr(buff, count, _min, _max,	\
-					_post_func, attr,		\
-					&bat_priv->_var, net_dev,	\
-					NULL);	\
+	batadv_sysfs_deprecated(attr);					\
+	length = __batadv_store_uint_attr(buff, count, _min, _max,	\
+					  _post_func, attr,		\
+					  &bat_priv->_var, net_dev,	\
+					  NULL);			\
+									\
+	batadv_netlink_notify_mesh(bat_priv);				\
+									\
+	return length;							\
 }
 
 #define BATADV_ATTR_SIF_SHOW_UINT(_name, _var)				\
@@ -198,6 +198,7 @@
 {									\
 	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);	\
 									\
+	batadv_sysfs_deprecated(attr);					\
 	return sprintf(buff, "%i\n", atomic_read(&bat_priv->_var));	\
 }									\
 
@@ -222,6 +223,12 @@
 					      attr, &vlan->_name,	\
 					      bat_priv->soft_iface);	\
 									\
+	batadv_sysfs_deprecated(attr);					\
+	if (vlan->vid)							\
+		batadv_netlink_notify_vlan(bat_priv, vlan);		\
+	else								\
+		batadv_netlink_notify_mesh(bat_priv);			\
+									\
 	batadv_softif_vlan_put(vlan);					\
 	return res;							\
 }
@@ -237,6 +244,7 @@
 			     atomic_read(&vlan->_name) == 0 ?		\
 			     "disabled" : "enabled");			\
 									\
+	batadv_sysfs_deprecated(attr);					\
 	batadv_softif_vlan_put(vlan);					\
 	return res;							\
 }
@@ -255,8 +263,10 @@
 {									\
 	struct net_device *net_dev = batadv_kobj_to_netdev(kobj);	\
 	struct batadv_hard_iface *hard_iface;				\
+	struct batadv_priv *bat_priv;					\
 	ssize_t length;							\
 									\
+	batadv_sysfs_deprecated(attr);					\
 	hard_iface = batadv_hardif_get_by_netdev(net_dev);		\
 	if (!hard_iface)						\
 		return 0;						\
@@ -267,6 +277,11 @@
 					  hard_iface->soft_iface,	\
 					  net_dev);			\
 									\
+	if (hard_iface->soft_iface) {					\
+		bat_priv = netdev_priv(hard_iface->soft_iface);		\
+		batadv_netlink_notify_hardif(bat_priv, hard_iface);	\
+	}								\
+									\
 	batadv_hardif_put(hard_iface);				\
 	return length;							\
 }
@@ -279,6 +294,7 @@
 	struct batadv_hard_iface *hard_iface;				\
 	ssize_t length;							\
 									\
+	batadv_sysfs_deprecated(attr);					\
 	hard_iface = batadv_hardif_get_by_netdev(net_dev);		\
 	if (!hard_iface)						\
 		return 0;						\
@@ -423,6 +439,7 @@
 {
 	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
 
+	batadv_sysfs_deprecated(attr);
 	return sprintf(buff, "%s\n", bat_priv->algo_ops->name);
 }
 
@@ -439,6 +456,8 @@
 	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
 	int bytes_written;
 
+	batadv_sysfs_deprecated(attr);
+
 	/* GW mode is not available if the routing algorithm in use does not
 	 * implement the GW API
 	 */
@@ -473,6 +492,8 @@
 	char *curr_gw_mode_str;
 	int gw_mode_tmp = -1;
 
+	batadv_sysfs_deprecated(attr);
+
 	/* toggling GW mode is allowed only if the routing algorithm in use
 	 * provides the GW API
 	 */
@@ -536,6 +557,9 @@
 	batadv_gw_check_client_stop(bat_priv);
 	atomic_set(&bat_priv->gw.mode, (unsigned int)gw_mode_tmp);
 	batadv_gw_tvlv_container_update(bat_priv);
+
+	batadv_netlink_notify_mesh(bat_priv);
+
 	return count;
 }
 
@@ -544,6 +568,8 @@
 {
 	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
 
+	batadv_sysfs_deprecated(attr);
+
 	/* GW selection class is not available if the routing algorithm in use
 	 * does not implement the GW API
 	 */
@@ -562,6 +588,9 @@
 					 size_t count)
 {
 	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
+	ssize_t length;
+
+	batadv_sysfs_deprecated(attr);
 
 	/* setting the GW selection class is allowed only if the routing
 	 * algorithm in use implements the GW API
@@ -577,10 +606,14 @@
 		return bat_priv->algo_ops->gw.store_sel_class(bat_priv, buff,
 							      count);
 
-	return __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE,
-					batadv_post_gw_reselect, attr,
-					&bat_priv->gw.sel_class,
-					bat_priv->soft_iface, NULL);
+	length = __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE,
+					  batadv_post_gw_reselect, attr,
+					  &bat_priv->gw.sel_class,
+					  bat_priv->soft_iface, NULL);
+
+	batadv_netlink_notify_mesh(bat_priv);
+
+	return length;
 }
 
 static ssize_t batadv_show_gw_bwidth(struct kobject *kobj,
@@ -589,6 +622,8 @@
 	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
 	u32 down, up;
 
+	batadv_sysfs_deprecated(attr);
+
 	down = atomic_read(&bat_priv->gw.bandwidth_down);
 	up = atomic_read(&bat_priv->gw.bandwidth_up);
 
@@ -600,12 +635,20 @@
 				      struct attribute *attr, char *buff,
 				      size_t count)
 {
+	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
 	struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
+	ssize_t length;
+
+	batadv_sysfs_deprecated(attr);
 
 	if (buff[count - 1] == '\n')
 		buff[count - 1] = '\0';
 
-	return batadv_gw_bandwidth_set(net_dev, buff, count);
+	length = batadv_gw_bandwidth_set(net_dev, buff, count);
+
+	batadv_netlink_notify_mesh(bat_priv);
+
+	return length;
 }
 
 /**
@@ -622,6 +665,7 @@
 {
 	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
 
+	batadv_sysfs_deprecated(attr);
 	return sprintf(buff, "%#.8x/%#.8x\n", bat_priv->isolation_mark,
 		       bat_priv->isolation_mark_mask);
 }
@@ -645,6 +689,8 @@
 	u32 mark, mask;
 	char *mask_ptr;
 
+	batadv_sysfs_deprecated(attr);
+
 	/* parse the mask if it has been specified, otherwise assume the mask is
 	 * the biggest possible
 	 */
@@ -673,6 +719,8 @@
 		    "New skb mark for extended isolation: %#.8x/%#.8x\n",
 		    bat_priv->isolation_mark, bat_priv->isolation_mark_mask);
 
+	batadv_netlink_notify_mesh(bat_priv);
+
 	return count;
 }
 
@@ -898,6 +946,8 @@
 	ssize_t length;
 	const char *ifname;
 
+	batadv_sysfs_deprecated(attr);
+
 	hard_iface = batadv_hardif_get_by_netdev(net_dev);
 	if (!hard_iface)
 		return 0;
@@ -1002,6 +1052,8 @@
 	struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
 	struct batadv_store_mesh_work *store_work;
 
+	batadv_sysfs_deprecated(attr);
+
 	if (buff[count - 1] == '\n')
 		buff[count - 1] = '\0';
 
@@ -1018,7 +1070,7 @@
 	dev_hold(net_dev);
 	INIT_WORK(&store_work->work, batadv_store_mesh_iface_work);
 	store_work->net_dev = net_dev;
-	strlcpy(store_work->soft_iface_name, buff,
+	strscpy(store_work->soft_iface_name, buff,
 		sizeof(store_work->soft_iface_name));
 
 	queue_work(batadv_event_workqueue, &store_work->work);
@@ -1033,6 +1085,8 @@
 	struct batadv_hard_iface *hard_iface;
 	ssize_t length;
 
+	batadv_sysfs_deprecated(attr);
+
 	hard_iface = batadv_hardif_get_by_netdev(net_dev);
 	if (!hard_iface)
 		return 0;
@@ -1079,10 +1133,13 @@
 {
 	struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
 	struct batadv_hard_iface *hard_iface;
+	struct batadv_priv *bat_priv;
 	u32 tp_override;
 	u32 old_tp_override;
 	bool ret;
 
+	batadv_sysfs_deprecated(attr);
+
 	hard_iface = batadv_hardif_get_by_netdev(net_dev);
 	if (!hard_iface)
 		return -EINVAL;
@@ -1107,6 +1164,11 @@
 
 	atomic_set(&hard_iface->bat_v.throughput_override, tp_override);
 
+	if (hard_iface->soft_iface) {
+		bat_priv = netdev_priv(hard_iface->soft_iface);
+		batadv_netlink_notify_hardif(bat_priv, hard_iface);
+	}
+
 out:
 	batadv_hardif_put(hard_iface);
 	return count;
@@ -1120,6 +1182,8 @@
 	struct batadv_hard_iface *hard_iface;
 	u32 tp_override;
 
+	batadv_sysfs_deprecated(attr);
+
 	hard_iface = batadv_hardif_get_by_netdev(net_dev);
 	if (!hard_iface)
 		return -EINVAL;
@@ -1205,57 +1269,3 @@
 	kobject_put(*hardif_obj);
 	*hardif_obj = NULL;
 }
-
-/**
- * batadv_throw_uevent() - Send an uevent with batman-adv specific env data
- * @bat_priv: the bat priv with all the soft interface information
- * @type: subsystem type of event. Stored in uevent's BATTYPE
- * @action: action type of event. Stored in uevent's BATACTION
- * @data: string with additional information to the event (ignored for
- *  BATADV_UEV_DEL). Stored in uevent's BATDATA
- *
- * Return: 0 on success or negative error number in case of failure
- */
-int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type,
-			enum batadv_uev_action action, const char *data)
-{
-	int ret = -ENOMEM;
-	struct kobject *bat_kobj;
-	char *uevent_env[4] = { NULL, NULL, NULL, NULL };
-
-	bat_kobj = &bat_priv->soft_iface->dev.kobj;
-
-	uevent_env[0] = kasprintf(GFP_ATOMIC,
-				  "%s%s", BATADV_UEV_TYPE_VAR,
-				  batadv_uev_type_str[type]);
-	if (!uevent_env[0])
-		goto out;
-
-	uevent_env[1] = kasprintf(GFP_ATOMIC,
-				  "%s%s", BATADV_UEV_ACTION_VAR,
-				  batadv_uev_action_str[action]);
-	if (!uevent_env[1])
-		goto out;
-
-	/* If the event is DEL, ignore the data field */
-	if (action != BATADV_UEV_DEL) {
-		uevent_env[2] = kasprintf(GFP_ATOMIC,
-					  "%s%s", BATADV_UEV_DATA_VAR, data);
-		if (!uevent_env[2])
-			goto out;
-	}
-
-	ret = kobject_uevent_env(bat_kobj, KOBJ_CHANGE, uevent_env);
-out:
-	kfree(uevent_env[0]);
-	kfree(uevent_env[1]);
-	kfree(uevent_env[2]);
-
-	if (ret)
-		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-			   "Impossible to send uevent for (%s,%s,%s) event (err: %d)\n",
-			   batadv_uev_type_str[type],
-			   batadv_uev_action_str[action],
-			   (action == BATADV_UEV_DEL ? "NULL" : data), ret);
-	return ret;
-}

diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h
index c1e3fb6..5e46609 100644
--- a/net/batman-adv/sysfs.h
+++ b/net/batman-adv/sysfs.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_SYSFS_H_
@@ -21,12 +9,11 @@
 
 #include "main.h"
 
+#include <linux/kobject.h>
+#include <linux/netdevice.h>
 #include <linux/sysfs.h>
 #include <linux/types.h>
 
-struct kobject;
-struct net_device;
-
 #define BATADV_SYSFS_IF_MESH_SUBDIR "mesh"
 #define BATADV_SYSFS_IF_BAT_SUBDIR "batman_adv"
 /**
@@ -57,6 +44,8 @@
 			 char *buf, size_t count);
 };
 
+#ifdef CONFIG_BATMAN_ADV_SYSFS
+
 int batadv_sysfs_add_meshif(struct net_device *dev);
 void batadv_sysfs_del_meshif(struct net_device *dev);
 int batadv_sysfs_add_hardif(struct kobject **hardif_obj,
@@ -66,7 +55,39 @@
 			  struct batadv_softif_vlan *vlan);
 void batadv_sysfs_del_vlan(struct batadv_priv *bat_priv,
 			   struct batadv_softif_vlan *vlan);
-int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type,
-			enum batadv_uev_action action, const char *data);
+
+#else
+
+static inline int batadv_sysfs_add_meshif(struct net_device *dev)
+{
+	return 0;
+}
+
+static inline void batadv_sysfs_del_meshif(struct net_device *dev)
+{
+}
+
+static inline int batadv_sysfs_add_hardif(struct kobject **hardif_obj,
+					  struct net_device *dev)
+{
+	return 0;
+}
+
+static inline void batadv_sysfs_del_hardif(struct kobject **hardif_obj)
+{
+}
+
+static inline int batadv_sysfs_add_vlan(struct net_device *dev,
+					struct batadv_softif_vlan *vlan)
+{
+	return 0;
+}
+
+static inline void batadv_sysfs_del_vlan(struct batadv_priv *bat_priv,
+					 struct batadv_softif_vlan *vlan)
+{
+}
+
+#endif
 
 #endif /* _NET_BATMAN_ADV_SYSFS_H_ */

diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 11520de..dd6a9a4 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c

@@ -1,19 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2019  B.A.T.M.A.N. contributors:
  *
  * Edo Monticelli, Antonio Quartulli
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "tp_meter.h"
@@ -33,6 +21,7 @@
 #include <linux/kernel.h>
 #include <linux/kref.h>
 #include <linux/kthread.h>
+#include <linux/limits.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
 #include <linux/param.h>

diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h
index 68e6009..78d310d 100644
--- a/net/batman-adv/tp_meter.h
+++ b/net/batman-adv/tp_meter.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2019  B.A.T.M.A.N. contributors:
  *
  * Edo Monticelli, Antonio Quartulli
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_TP_METER_H_
@@ -21,10 +9,9 @@
 
 #include "main.h"
 
+#include <linux/skbuff.h>
 #include <linux/types.h>
 
-struct sk_buff;
-
 void batadv_tp_meter_init(void);
 void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
 		     u32 test_length, u32 *cookie);

diff --git a/net/batman-adv/trace.c b/net/batman-adv/trace.c
new file mode 100644
index 0000000..3cedd2c
--- /dev/null
+++ b/net/batman-adv/trace.c

@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2010-2019  B.A.T.M.A.N. contributors:
+ *
+ * Sven Eckelmann
+ */
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"

diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h
new file mode 100644
index 0000000..d8f7645
--- /dev/null
+++ b/net/batman-adv/trace.h

@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2010-2019  B.A.T.M.A.N. contributors:
+ *
+ * Sven Eckelmann
+ */
+
+#if !defined(_NET_BATMAN_ADV_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _NET_BATMAN_ADV_TRACE_H_
+
+#include "main.h"
+
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/percpu.h>
+#include <linux/printk.h>
+#include <linux/tracepoint.h>
+#include <linux/types.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM batadv
+
+/* provide dummy function when tracing is disabled */
+#if !defined(CONFIG_BATMAN_ADV_TRACING)
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(name, proto, ...) \
+	static inline void trace_ ## name(proto) {}
+
+#endif /* CONFIG_BATMAN_ADV_TRACING */
+
+#define BATADV_MAX_MSG_LEN	256
+
+TRACE_EVENT(batadv_dbg,
+
+	    TP_PROTO(struct batadv_priv *bat_priv,
+		     struct va_format *vaf),
+
+	    TP_ARGS(bat_priv, vaf),
+
+	    TP_STRUCT__entry(
+		    __string(device, bat_priv->soft_iface->name)
+		    __string(driver, KBUILD_MODNAME)
+		    __dynamic_array(char, msg, BATADV_MAX_MSG_LEN)
+	    ),
+
+	    TP_fast_assign(
+		    __assign_str(device, bat_priv->soft_iface->name);
+		    __assign_str(driver, KBUILD_MODNAME);
+		    WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg),
+					   BATADV_MAX_MSG_LEN,
+					   vaf->fmt,
+					   *vaf->va) >= BATADV_MAX_MSG_LEN);
+	    ),
+
+	    TP_printk(
+		    "%s %s %s",
+		    __get_str(driver),
+		    __get_str(device),
+		    __get_str(msg)
+	    )
+);
+
+#endif /* _NET_BATMAN_ADV_TRACE_H_ || TRACE_HEADER_MULTI_READ */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>

diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index d21624c..8a482c5 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c

@@ -1,19 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich, Antonio Quartulli
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "translation-table.h"
@@ -205,7 +193,7 @@
  * Return: a pointer to the corresponding tt_global_entry struct if the client
  * is found, NULL otherwise.
  */
-static struct batadv_tt_global_entry *
+struct batadv_tt_global_entry *
 batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
 			   unsigned short vid)
 {
@@ -300,8 +288,7 @@
  *  possibly release it
  * @tt_global_entry: tt_global_entry to be free'd
  */
-static void
-batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry)
+void batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry)
 {
 	kref_put(&tt_global_entry->common.refcount,
 		 batadv_tt_global_entry_release);
@@ -616,14 +603,26 @@
 				  struct batadv_tt_global_entry *tt_global,
 				  const char *message)
 {
+	struct batadv_tt_global_entry *tt_removed_entry;
+	struct hlist_node *tt_removed_node;
+
 	batadv_dbg(BATADV_DBG_TT, bat_priv,
 		   "Deleting global tt entry %pM (vid: %d): %s\n",
 		   tt_global->common.addr,
 		   batadv_print_vid(tt_global->common.vid), message);
 
-	batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt,
-			   batadv_choose_tt, &tt_global->common);
-	batadv_tt_global_entry_put(tt_global);
+	tt_removed_node = batadv_hash_remove(bat_priv->tt.global_hash,
+					     batadv_compare_tt,
+					     batadv_choose_tt,
+					     &tt_global->common);
+	if (!tt_removed_node)
+		return;
+
+	/* drop reference of remove hash entry */
+	tt_removed_entry = hlist_entry(tt_removed_node,
+				       struct batadv_tt_global_entry,
+				       common.hash_entry);
+	batadv_tt_global_entry_put(tt_removed_entry);
 }
 
 /**
@@ -1145,14 +1144,15 @@
  * batadv_tt_local_dump_entry() - Dump one TT local entry into a message
  * @msg :Netlink message to dump into
  * @portid: Port making netlink request
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
  * @bat_priv: The bat priv with all the soft interface information
  * @common: tt local & tt global common data
  *
  * Return: Error code, or 0 on success
  */
 static int
-batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid,
+			   struct netlink_callback *cb,
 			   struct batadv_priv *bat_priv,
 			   struct batadv_tt_common_entry *common)
 {
@@ -1173,12 +1173,14 @@
 
 	batadv_softif_vlan_put(vlan);
 
-	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
-			  NLM_F_MULTI,
+	hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
+			  &batadv_netlink_family,  NLM_F_MULTI,
 			  BATADV_CMD_GET_TRANSTABLE_LOCAL);
 	if (!hdr)
 		return -ENOBUFS;
 
+	genl_dump_check_consistent(cb, hdr);
+
 	if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) ||
 	    nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) ||
 	    nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) ||
@@ -1201,34 +1203,39 @@
  * batadv_tt_local_dump_bucket() - Dump one TT local bucket into a message
  * @msg: Netlink message to dump into
  * @portid: Port making netlink request
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
  * @bat_priv: The bat priv with all the soft interface information
- * @head: Pointer to the list containing the local tt entries
+ * @hash: hash to dump
+ * @bucket: bucket index to dump
  * @idx_s: Number of entries to skip
  *
  * Return: Error code, or 0 on success
  */
 static int
-batadv_tt_local_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_tt_local_dump_bucket(struct sk_buff *msg, u32 portid,
+			    struct netlink_callback *cb,
 			    struct batadv_priv *bat_priv,
-			    struct hlist_head *head, int *idx_s)
+			    struct batadv_hashtable *hash, unsigned int bucket,
+			    int *idx_s)
 {
 	struct batadv_tt_common_entry *common;
 	int idx = 0;
 
-	rcu_read_lock();
-	hlist_for_each_entry_rcu(common, head, hash_entry) {
+	spin_lock_bh(&hash->list_locks[bucket]);
+	cb->seq = atomic_read(&hash->generation) << 1 | 1;
+
+	hlist_for_each_entry(common, &hash->table[bucket], hash_entry) {
 		if (idx++ < *idx_s)
 			continue;
 
-		if (batadv_tt_local_dump_entry(msg, portid, seq, bat_priv,
+		if (batadv_tt_local_dump_entry(msg, portid, cb, bat_priv,
 					       common)) {
-			rcu_read_unlock();
+			spin_unlock_bh(&hash->list_locks[bucket]);
 			*idx_s = idx - 1;
 			return -EMSGSIZE;
 		}
 	}
-	rcu_read_unlock();
+	spin_unlock_bh(&hash->list_locks[bucket]);
 
 	*idx_s = 0;
 	return 0;
@@ -1248,7 +1255,6 @@
 	struct batadv_priv *bat_priv;
 	struct batadv_hard_iface *primary_if = NULL;
 	struct batadv_hashtable *hash;
-	struct hlist_head *head;
 	int ret;
 	int ifindex;
 	int bucket = cb->args[0];
@@ -1276,10 +1282,8 @@
 	hash = bat_priv->tt.local_hash;
 
 	while (bucket < hash->size) {
-		head = &hash->table[bucket];
-
-		if (batadv_tt_local_dump_bucket(msg, portid, cb->nlh->nlmsg_seq,
-						bat_priv, head, &idx))
+		if (batadv_tt_local_dump_bucket(msg, portid, cb, bat_priv,
+						hash, bucket, &idx))
 			break;
 
 		bucket++;
@@ -1332,9 +1336,10 @@
 			   unsigned short vid, const char *message,
 			   bool roaming)
 {
+	struct batadv_tt_local_entry *tt_removed_entry;
 	struct batadv_tt_local_entry *tt_local_entry;
 	u16 flags, curr_flags = BATADV_NO_FLAGS;
-	void *tt_entry_exists;
+	struct hlist_node *tt_removed_node;
 
 	tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid);
 	if (!tt_local_entry)
@@ -1363,15 +1368,18 @@
 	 */
 	batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL);
 
-	tt_entry_exists = batadv_hash_remove(bat_priv->tt.local_hash,
+	tt_removed_node = batadv_hash_remove(bat_priv->tt.local_hash,
 					     batadv_compare_tt,
 					     batadv_choose_tt,
 					     &tt_local_entry->common);
-	if (!tt_entry_exists)
+	if (!tt_removed_node)
 		goto out;
 
-	/* extra call to free the local tt entry */
-	batadv_tt_local_entry_put(tt_local_entry);
+	/* drop reference of remove hash entry */
+	tt_removed_entry = hlist_entry(tt_removed_node,
+				       struct batadv_tt_local_entry,
+				       common.hash_entry);
+	batadv_tt_local_entry_put(tt_removed_entry);
 
 out:
 	if (tt_local_entry)
@@ -3805,6 +3813,8 @@
  */
 void batadv_tt_free(struct batadv_priv *bat_priv)
 {
+	batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_ROAM, 1);
+
 	batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_TT, 1);
 	batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_TT, 1);
 

diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 01b6c8e..4a98860 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich, Antonio Quartulli
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_TRANSLATION_TABLE_H_
@@ -21,13 +9,12 @@
 
 #include "main.h"
 
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/types.h>
 
-struct netlink_callback;
-struct net_device;
-struct seq_file;
-struct sk_buff;
-
 int batadv_tt_init(struct batadv_priv *bat_priv);
 bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
 			 unsigned short vid, int ifindex, u32 mark);
@@ -41,6 +28,10 @@
 void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
 			       struct batadv_orig_node *orig_node,
 			       s32 match_vid, const char *message);
+struct batadv_tt_global_entry *
+batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
+			   unsigned short vid);
+void batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry);
 int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
 				const u8 *addr, unsigned short vid);
 struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv,

diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 40e69c9..aae63f0 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c

@@ -1,19 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "main.h"

diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h
index ef5867f..3698500 100644
--- a/net/batman-adv/tvlv.h
+++ b/net/batman-adv/tvlv.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_TVLV_H_
@@ -22,8 +10,7 @@
 #include "main.h"
 
 #include <linux/types.h>
-
-struct batadv_ogm_packet;
+#include <uapi/linux/batadv_packet.h>
 
 void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
 				    u8 type, u8 version,

diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 343d304..4d7f1ba 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h

@@ -1,19 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _NET_BATMAN_ADV_TYPES_H_
@@ -26,20 +14,23 @@
 #include <linux/average.h>
 #include <linux/bitops.h>
 #include <linux/compiler.h>
+#include <linux/if.h>
 #include <linux/if_ether.h>
 #include <linux/kref.h>
+#include <linux/mutex.h>
 #include <linux/netdevice.h>
 #include <linux/netlink.h>
 #include <linux/sched.h> /* for linux/wait.h */
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/spinlock.h>
+#include <linux/timer.h>
 #include <linux/types.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
 #include <uapi/linux/batadv_packet.h>
 #include <uapi/linux/batman_adv.h>
 
-struct seq_file;
-
 #ifdef CONFIG_BATMAN_ADV_DAT
 
 /**
@@ -91,6 +82,9 @@
 
 	/** @ogm_seqno: OGM sequence number - used to identify each OGM */
 	atomic_t ogm_seqno;
+
+	/** @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len */
+	struct mutex ogm_buff_mutex;
 };
 
 /**
@@ -127,6 +121,18 @@
 	/** @elp_wq: workqueue used to schedule ELP transmissions */
 	struct delayed_work elp_wq;
 
+	/** @aggr_wq: workqueue used to transmit queued OGM packets */
+	struct delayed_work aggr_wq;
+
+	/** @aggr_list: queue for to be aggregated OGM packets */
+	struct sk_buff_head aggr_list;
+
+	/** @aggr_len: size of the OGM aggregate (excluding ethernet header) */
+	unsigned int aggr_len;
+
+	/** @aggr_list_lock: protects aggr_list */
+	spinlock_t aggr_list_lock;
+
 	/**
 	 * @throughput_override: throughput override to disable link
 	 *  auto-detection
@@ -167,9 +173,6 @@
 	/** @list: list node for batadv_hardif_list */
 	struct list_head list;
 
-	/** @if_num: identificator of the interface */
-	unsigned int if_num;
-
 	/** @if_status: status of the interface for batman-adv */
 	char if_status;
 
@@ -233,6 +236,20 @@
 };
 
 /**
+ * struct batadv_orig_ifinfo - B.A.T.M.A.N. IV private orig_ifinfo members
+ */
+struct batadv_orig_ifinfo_bat_iv {
+	/**
+	 * @bcast_own: bitfield which counts the number of our OGMs this
+	 * orig_node rebroadcasted "back" to us  (relative to last_real_seqno)
+	 */
+	DECLARE_BITMAP(bcast_own, BATADV_TQ_LOCAL_WINDOW_SIZE);
+
+	/** @bcast_own_sum: sum of bcast_own */
+	u8 bcast_own_sum;
+};
+
+/**
  * struct batadv_orig_ifinfo - originator info per outgoing interface
  */
 struct batadv_orig_ifinfo {
@@ -257,6 +274,9 @@
 	/** @batman_seqno_reset: time when the batman seqno window was reset */
 	unsigned long batman_seqno_reset;
 
+	/** @bat_iv: B.A.T.M.A.N. IV private structure */
+	struct batadv_orig_ifinfo_bat_iv bat_iv;
+
 	/** @refcount: number of contexts the object is used */
 	struct kref refcount;
 
@@ -339,19 +359,10 @@
  */
 struct batadv_orig_bat_iv {
 	/**
-	 * @bcast_own: set of bitfields (one per hard-interface) where each one
-	 * counts the number of our OGMs this orig_node rebroadcasted "back" to
-	 * us  (relative to last_real_seqno). Every bitfield is
-	 * BATADV_TQ_LOCAL_WINDOW_SIZE bits long.
-	 */
-	unsigned long *bcast_own;
-
-	/** @bcast_own_sum: sum of bcast_own */
-	u8 *bcast_own_sum;
-
-	/**
-	 * @ogm_cnt_lock: lock protecting bcast_own, bcast_own_sum,
-	 * neigh_node->bat_iv.real_bits & neigh_node->bat_iv.real_packet_count
+	 * @ogm_cnt_lock: lock protecting &batadv_orig_ifinfo_bat_iv.bcast_own,
+	 * &batadv_orig_ifinfo_bat_iv.bcast_own_sum,
+	 * &batadv_neigh_ifinfo_bat_iv.bat_iv.real_bits and
+	 * &batadv_neigh_ifinfo_bat_iv.real_packet_count
 	 */
 	spinlock_t ogm_cnt_lock;
 };
@@ -409,6 +420,17 @@
 	 *  list
 	 */
 	struct hlist_node mcast_want_all_ipv6_node;
+
+	/**
+	 * @mcast_want_all_rtr4_node: a list node for the mcast.want_all_rtr4
+	 *  list
+	 */
+	struct hlist_node mcast_want_all_rtr4_node;
+	/**
+	 * @mcast_want_all_rtr6_node: a list node for the mcast.want_all_rtr6
+	 *  list
+	 */
+	struct hlist_node mcast_want_all_rtr6_node;
 #endif
 
 	/** @capabilities: announced capabilities of this originator */
@@ -1091,12 +1113,15 @@
 	/** @gateway_list: list of available gateway nodes */
 	struct hlist_head gateway_list;
 
-	/** @list_lock: lock protecting gateway_list & curr_gw */
+	/** @list_lock: lock protecting gateway_list, curr_gw, generation */
 	spinlock_t list_lock;
 
 	/** @curr_gw: pointer to currently selected gateway node */
 	struct batadv_gw_node __rcu *curr_gw;
 
+	/** @generation: current (generation) sequence number */
+	unsigned int generation;
+
 	/**
 	 * @mode: gateway operation: off, client or server (see batadv_gw_modes)
 	 */
@@ -1173,6 +1198,26 @@
 };
 
 /**
+ * struct batadv_mcast_mla_flags - flags for the querier, bridge and tvlv state
+ */
+struct batadv_mcast_mla_flags {
+	/** @querier_ipv4: the current state of an IGMP querier in the mesh */
+	struct batadv_mcast_querier_state querier_ipv4;
+
+	/** @querier_ipv6: the current state of an MLD querier in the mesh */
+	struct batadv_mcast_querier_state querier_ipv6;
+
+	/** @enabled: whether the multicast tvlv is currently enabled */
+	unsigned char enabled:1;
+
+	/** @bridged: whether the soft interface has a bridge on top */
+	unsigned char bridged:1;
+
+	/** @tvlv_flags: the flags we have last sent in our mcast tvlv */
+	u8 tvlv_flags;
+};
+
+/**
  * struct batadv_priv_mcast - per mesh interface mcast data
  */
 struct batadv_priv_mcast {
@@ -1200,20 +1245,27 @@
 	 */
 	struct hlist_head want_all_ipv6_list;
 
-	/** @querier_ipv4: the current state of an IGMP querier in the mesh */
-	struct batadv_mcast_querier_state querier_ipv4;
+	/**
+	 * @want_all_rtr4_list: a list of orig_nodes wanting all routable IPv4
+	 *  multicast traffic
+	 */
+	struct hlist_head want_all_rtr4_list;
 
-	/** @querier_ipv6: the current state of an MLD querier in the mesh */
-	struct batadv_mcast_querier_state querier_ipv6;
+	/**
+	 * @want_all_rtr6_list: a list of orig_nodes wanting all routable IPv6
+	 *  multicast traffic
+	 */
+	struct hlist_head want_all_rtr6_list;
 
-	/** @flags: the flags we have last sent in our mcast tvlv */
-	u8 flags;
+	/**
+	 * @mla_flags: flags for the querier, bridge and tvlv state
+	 */
+	struct batadv_mcast_mla_flags mla_flags;
 
-	/** @enabled: whether the multicast tvlv is currently enabled */
-	unsigned char enabled:1;
-
-	/** @bridged: whether the soft interface has a bridge on top */
-	unsigned char bridged:1;
+	/**
+	 * @mla_lock: a lock protecting mla_list and mla_flags
+	 */
+	spinlock_t mla_lock;
 
 	/**
 	 * @num_want_all_unsnoopables: number of nodes wanting unsnoopable IP
@@ -1227,6 +1279,12 @@
 	/** @num_want_all_ipv6: counter for items in want_all_ipv6_list */
 	atomic_t num_want_all_ipv6;
 
+	/** @num_want_all_rtr4: counter for items in want_all_rtr4_list */
+	atomic_t num_want_all_rtr4;
+
+	/** @num_want_all_rtr6: counter for items in want_all_rtr6_list */
+	atomic_t num_want_all_rtr6;
+
 	/**
 	 * @want_lists_lock: lock for protecting modifications to mcasts
 	 *  want_all_{unsnoopables,ipv4,ipv6}_list (traversals are rcu-locked)
@@ -1485,6 +1543,9 @@
 	/** @ogm_seqno: OGM sequence number - used to identify each OGM */
 	atomic_t ogm_seqno;
 
+	/** @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len */
+	struct mutex ogm_buff_mutex;
+
 	/** @ogm_wq: workqueue used to schedule OGM transmissions */
 	struct delayed_work ogm_wq;
 };
@@ -1557,6 +1618,12 @@
 	 *  node's sender/originating side
 	 */
 	atomic_t multicast_mode;
+
+	/**
+	 * @multicast_fanout: Maximum number of packet copies to generate for a
+	 *  multicast-to-unicast conversion
+	 */
+	atomic_t multicast_fanout;
 #endif
 
 	/** @orig_interval: OGM broadcast interval in milliseconds */
@@ -1597,9 +1664,6 @@
 	/** @batman_queue_left: number of remaining OGM packet slots */
 	atomic_t batman_queue_left;
 
-	/** @num_ifaces: number of interfaces assigned to this mesh interface */
-	unsigned int num_ifaces;
-
 	/** @mesh_obj: kobject for sysfs mesh subdirectory */
 	struct kobject *mesh_obj;
 
@@ -2125,6 +2189,9 @@
 	/** @enable: init routing info when hard-interface is enabled */
 	int (*enable)(struct batadv_hard_iface *hard_iface);
 
+	/** @enabled: notification when hard-interface was enabled (optional) */
+	void (*enabled)(struct batadv_hard_iface *hard_iface);
+
 	/** @disable: de-init routing info when hard-interface is disabled */
 	void (*disable)(struct batadv_hard_iface *hard_iface);
 
@@ -2179,28 +2246,6 @@
  * struct batadv_algo_orig_ops - mesh algorithm callbacks (originator specific)
  */
 struct batadv_algo_orig_ops {
-	/**
-	 * @free: free the resources allocated by the routing algorithm for an
-	 *  orig_node object (optional)
-	 */
-	void (*free)(struct batadv_orig_node *orig_node);
-
-	/**
-	 * @add_if: ask the routing algorithm to apply the needed changes to the
-	 *  orig_node due to a new hard-interface being added into the mesh
-	 *  (optional)
-	 */
-	int (*add_if)(struct batadv_orig_node *orig_node,
-		      unsigned int max_if_num);
-
-	/**
-	 * @del_if: ask the routing algorithm to apply the needed changes to the
-	 *  orig_node due to an hard-interface being removed from the mesh
-	 *  (optional)
-	 */
-	int (*del_if)(struct batadv_orig_node *orig_node,
-		      unsigned int max_if_num, unsigned int del_if_num);
-
 #ifdef CONFIG_BATMAN_ADV_DEBUGFS
 	/** @print: print the originator table (optional) */
 	void (*print)(struct batadv_priv *priv, struct seq_file *seq,

diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 4e2576f..4febc82 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c

@@ -1,14 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
    Copyright (c) 2013-2014 Intel Corp.
 
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License version 2 and
-   only version 2 as published by the Free Software Foundation.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
 */
 
 #include <linux/if_arp.h>
@@ -167,30 +160,25 @@
 						  struct in6_addr *daddr,
 						  struct sk_buff *skb)
 {
-	struct lowpan_peer *peer;
-	struct in6_addr *nexthop;
 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 	int count = atomic_read(&dev->peer_count);
+	const struct in6_addr *nexthop;
+	struct lowpan_peer *peer;
+	struct neighbour *neigh;
 
 	BT_DBG("peers %d addr %pI6c rt %p", count, daddr, rt);
 
-	/* If we have multiple 6lowpan peers, then check where we should
-	 * send the packet. If only one peer exists, then we can send the
-	 * packet right away.
-	 */
-	if (count == 1) {
-		rcu_read_lock();
-		peer = list_first_or_null_rcu(&dev->peers, struct lowpan_peer,
-					      list);
-		rcu_read_unlock();
-		return peer;
-	}
-
 	if (!rt) {
-		nexthop = &lowpan_cb(skb)->gw;
-
-		if (ipv6_addr_any(nexthop))
-			return NULL;
+		if (ipv6_addr_any(&lowpan_cb(skb)->gw)) {
+			/* There is neither route nor gateway,
+			 * probably the destination is a direct peer.
+			 */
+			nexthop = daddr;
+		} else {
+			/* There is a known gateway
+			 */
+			nexthop = &lowpan_cb(skb)->gw;
+		}
 	} else {
 		nexthop = rt6_nexthop(rt, daddr);
 
@@ -216,6 +204,20 @@
 		}
 	}
 
+	/* use the neighbour cache for matching addresses assigned by SLAAC
+	*/
+	neigh = __ipv6_neigh_lookup(dev->netdev, nexthop);
+	if (neigh) {
+		list_for_each_entry_rcu(peer, &dev->peers, list) {
+			if (!memcmp(neigh->ha, peer->lladdr, ETH_ALEN)) {
+				neigh_release(neigh);
+				rcu_read_unlock();
+				return peer;
+			}
+		}
+		neigh_release(neigh);
+	}
+
 	rcu_read_unlock();
 
 	return NULL;
@@ -467,7 +469,7 @@
 	iv.iov_len = skb->len;
 
 	memset(&msg, 0, sizeof(msg));
-	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iv, 1, skb->len);
+	iov_iter_kvec(&msg.msg_iter, WRITE, &iv, 1, skb->len);
 
 	err = l2cap_chan_send(chan, &msg, skb->len);
 	if (err > 0) {
@@ -569,19 +571,11 @@
 	return err < 0 ? NET_XMIT_DROP : err;
 }
 
-static int bt_dev_init(struct net_device *dev)
-{
-	netdev_lockdep_set_classes(dev);
-
-	return 0;
-}
-
 static const struct net_device_ops netdev_ops = {
-	.ndo_init		= bt_dev_init,
 	.ndo_start_xmit		= bt_xmit,
 };
 
-static struct header_ops header_ops = {
+static const struct header_ops header_ops = {
 	.create	= header_create,
 };
 
@@ -607,7 +601,7 @@
 	int err;
 
 	rtnl_lock();
-	err = dev_open(netdev);
+	err = dev_open(netdev, NULL);
 	if (err < 0)
 		BT_INFO("iface %s cannot be opened (%d)", netdev->name, err);
 	rtnl_unlock();
@@ -1108,8 +1102,8 @@
 	return 0;
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(lowpan_enable_fops, lowpan_enable_get,
-			lowpan_enable_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(lowpan_enable_fops, lowpan_enable_get,
+			 lowpan_enable_set, "%llu\n");
 
 static ssize_t lowpan_control_write(struct file *fp,
 				    const char __user *user_buffer,
@@ -1278,9 +1272,10 @@
 
 static int __init bt_6lowpan_init(void)
 {
-	lowpan_enable_debugfs = debugfs_create_file("6lowpan_enable", 0644,
-						    bt_debugfs, NULL,
-						    &lowpan_enable_fops);
+	lowpan_enable_debugfs = debugfs_create_file_unsafe("6lowpan_enable",
+							   0644, bt_debugfs,
+							   NULL,
+							   &lowpan_enable_fops);
 	lowpan_control_debugfs = debugfs_create_file("6lowpan_control", 0644,
 						     bt_debugfs, NULL,
 						     &lowpan_control_fops);

diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index db82a40..3803135 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Bluetooth subsystem configuration
 #
@@ -9,7 +10,8 @@
 	select CRC16
 	select CRYPTO
 	select CRYPTO_BLKCIPHER
-	select CRYPTO_AES
+	select CRYPTO_LIB_AES
+	imply CRYPTO_AES
 	select CRYPTO_CMAC
 	select CRYPTO_ECB
 	select CRYPTO_SHA256

diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index 51c2cf2..26526be 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c

@@ -1,15 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
    Copyright (c) 2010,2011 Code Aurora Forum.  All rights reserved.
    Copyright (c) 2011,2012 Intel Corp.
 
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License version 2 and
-   only version 2 as published by the Free Software Foundation.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
 */
 
 #include <net/bluetooth/bluetooth.h>
@@ -63,7 +56,7 @@
 
 	memset(&msg, 0, sizeof(msg));
 
-	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iv, 1, total_len);
+	iov_iter_kvec(&msg.msg_iter, WRITE, &iv, 1, total_len);
 
 	l2cap_chan_send(chan, &msg, total_len);
 
@@ -174,7 +167,7 @@
 			num_ctrl++;
 	}
 
-	len = num_ctrl * sizeof(struct a2mp_cl) + sizeof(*rsp);
+	len = struct_size(rsp, cl, num_ctrl);
 	rsp = kmalloc(len, GFP_ATOMIC);
 	if (!rsp) {
 		read_unlock(&hci_dev_list_lock);

diff --git a/net/bluetooth/a2mp.h b/net/bluetooth/a2mp.h
index a4ff3ea..0029d51 100644
--- a/net/bluetooth/a2mp.h
+++ b/net/bluetooth/a2mp.h

@@ -1,15 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
    Copyright (c) 2010,2011 Code Aurora Forum.  All rights reserved.
    Copyright (c) 2011,2012 Intel Corp.
 
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License version 2 and
-   only version 2 as published by the Free Software Foundation.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
 */
 
 #ifndef __A2MP_H

diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index deacc52..5f508c5 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c

@@ -154,15 +154,25 @@
 }
 EXPORT_SYMBOL(bt_sock_unlink);
 
-void bt_accept_enqueue(struct sock *parent, struct sock *sk)
+void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh)
 {
 	BT_DBG("parent %p, sk %p", parent, sk);
 
 	sock_hold(sk);
-	lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+
+	if (bh)
+		bh_lock_sock_nested(sk);
+	else
+		lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+
 	list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q);
 	bt_sk(sk)->parent = parent;
-	release_sock(sk);
+
+	if (bh)
+		bh_unlock_sock(sk);
+	else
+		release_sock(sk);
+
 	parent->sk_ack_backlog++;
 }
 EXPORT_SYMBOL(bt_accept_enqueue);
@@ -450,7 +460,7 @@
 	if (sk->sk_state == BT_LISTEN)
 		return bt_accept_poll(sk);
 
-	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+	if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
 		mask |= EPOLLERR |
 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
 
@@ -460,7 +470,7 @@
 	if (sk->sk_shutdown == SHUTDOWN_MASK)
 		mask |= EPOLLHUP;
 
-	if (!skb_queue_empty(&sk->sk_receive_queue))
+	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
 		mask |= EPOLLIN | EPOLLRDNORM;
 
 	if (sk->sk_state == BT_CLOSED)
@@ -511,14 +521,6 @@
 		err = put_user(amount, (int __user *) arg);
 		break;
 
-	case SIOCGSTAMP:
-		err = sock_get_timestamp(sk, (struct timeval __user *) arg);
-		break;
-
-	case SIOCGSTAMPNS:
-		err = sock_get_timestampns(sk, (struct timespec __user *) arg);
-		break;
-
 	default:
 		err = -ENOIOCTLCMD;
 		break;

diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c
index 78bec8d..9c711f0 100644
--- a/net/bluetooth/amp.c
+++ b/net/bluetooth/amp.c

@@ -1,14 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
    Copyright (c) 2011,2012 Intel Corp.
 
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License version 2 and
-   only version 2 as published by the Free Software Foundation.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
 */
 
 #include <net/bluetooth/bluetooth.h>
@@ -161,7 +154,6 @@
 	}
 
 	shash->tfm = tfm;
-	shash->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
 
 	ret = crypto_shash_digest(shash, plaintext, psize, output);
 

diff --git a/net/bluetooth/amp.h b/net/bluetooth/amp.h
index 8848f81..832764d 100644
--- a/net/bluetooth/amp.h
+++ b/net/bluetooth/amp.h

@@ -1,14 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
    Copyright (c) 2011,2012 Intel Corp.
 
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License version 2 and
-   only version 2 as published by the Free Software Foundation.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
 */
 
 #ifndef __AMP_H

diff --git a/net/bluetooth/bnep/Kconfig b/net/bluetooth/bnep/Kconfig
index 9b70317..aac02b5 100644
--- a/net/bluetooth/bnep/Kconfig
+++ b/net/bluetooth/bnep/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config BT_BNEP
 	tristate "BNEP protocol support"
 	depends on BT_BREDR

diff --git a/net/bluetooth/bnep/Makefile b/net/bluetooth/bnep/Makefile
index c7821e7..8af9d56 100644
--- a/net/bluetooth/bnep/Makefile
+++ b/net/bluetooth/bnep/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Linux Bluetooth BNEP layer.
 #

diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h
index 40854c9..24f18b1 100644
--- a/net/bluetooth/bnep/bnep.h
+++ b/net/bluetooth/bnep/bnep.h

@@ -1,18 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
   BNEP protocol definition for Linux Bluetooth stack (BlueZ).
   Copyright (C) 2002 Maxim Krasnyansky <maxk@qualcomm.com>
 
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of the GNU General Public License, version 2, as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, see <http://www.gnu.org/licenses/>.
 */
 
 #ifndef _BNEP_H

diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 7b39658..43c2841 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c

@@ -489,9 +489,6 @@
 
 	add_wait_queue(sk_sleep(sk), &wait);
 	while (1) {
-		/* Ensure session->terminate is updated */
-		smp_mb__before_atomic();
-
 		if (atomic_read(&s->terminate))
 			break;
 		/* RX */
@@ -512,6 +509,10 @@
 				break;
 		netif_wake_queue(dev);
 
+		/*
+		 * wait_woken() performs the necessary memory barriers
+		 * for us; see the header comment for this primitive.
+		 */
 		wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
 	}
 	remove_wait_queue(sk_sleep(sk), &wait);

diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 00deacd..cfd83c5 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c

@@ -49,18 +49,17 @@
 	return 0;
 }
 
-static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+static int do_bnep_sock_ioctl(struct socket *sock, unsigned int cmd, void __user *argp)
 {
 	struct bnep_connlist_req cl;
 	struct bnep_connadd_req  ca;
 	struct bnep_conndel_req  cd;
 	struct bnep_conninfo ci;
 	struct socket *nsock;
-	void __user *argp = (void __user *)arg;
 	__u32 supp_feat = BIT(BNEP_SETUP_RESPONSE);
 	int err;
 
-	BT_DBG("cmd %x arg %lx", cmd, arg);
+	BT_DBG("cmd %x arg %p", cmd, argp);
 
 	switch (cmd) {
 	case BNEPCONNADD:
@@ -134,16 +133,22 @@
 	return 0;
 }
 
+static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	return do_bnep_sock_ioctl(sock, cmd, (void __user *)arg);
+}
+
 #ifdef CONFIG_COMPAT
 static int bnep_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
+	void __user *argp = compat_ptr(arg);
 	if (cmd == BNEPGETCONNLIST) {
 		struct bnep_connlist_req cl;
+		unsigned __user *p = argp;
 		u32 uci;
 		int err;
 
-		if (get_user(cl.cnum, (u32 __user *) arg) ||
-				get_user(uci, (u32 __user *) (arg + 4)))
+		if (get_user(cl.cnum, p) || get_user(uci, p + 1))
 			return -EFAULT;
 
 		cl.ci = compat_ptr(uci);
@@ -153,13 +158,13 @@
 
 		err = bnep_get_connlist(&cl);
 
-		if (!err && put_user(cl.cnum, (u32 __user *) arg))
+		if (!err && put_user(cl.cnum, p))
 			err = -EFAULT;
 
 		return err;
 	}
 
-	return bnep_sock_ioctl(sock, cmd, arg);
+	return do_bnep_sock_ioctl(sock, cmd, argp);
 }
 #endif
 

diff --git a/net/bluetooth/cmtp/Kconfig b/net/bluetooth/cmtp/Kconfig
index 939da0f..c833778 100644
--- a/net/bluetooth/cmtp/Kconfig
+++ b/net/bluetooth/cmtp/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config BT_CMTP
 	tristate "CMTP protocol support"
 	depends on BT_BREDR && ISDN_CAPI

diff --git a/net/bluetooth/cmtp/Makefile b/net/bluetooth/cmtp/Makefile
index 890a9a5..b2262ca 100644
--- a/net/bluetooth/cmtp/Makefile
+++ b/net/bluetooth/cmtp/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Linux Bluetooth CMTP layer
 #

diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 7f26a5a..07cfa32 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c

@@ -288,9 +288,6 @@
 
 	add_wait_queue(sk_sleep(sk), &wait);
 	while (1) {
-		/* Ensure session->terminate is updated */
-		smp_mb__before_atomic();
-
 		if (atomic_read(&session->terminate))
 			break;
 		if (sk->sk_state != BT_CONNECTED)
@@ -306,6 +303,10 @@
 
 		cmtp_process_transmit(session);
 
+		/*
+		 * wait_woken() performs the necessary memory barriers
+		 * for us; see the header comment for this primitive.
+		 */
 		wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
 	}
 	remove_wait_queue(sk_sleep(sk), &wait);
@@ -431,9 +432,10 @@
 		/* Stop session thread */
 		atomic_inc(&session->terminate);
 
-		/* Ensure session->terminate is updated */
-		smp_mb__after_atomic();
-
+		/*
+		 * See the comment preceding the call to wait_woken()
+		 * in cmtp_session().
+		 */
 		wake_up_interruptible(sk_sleep(session->sock->sk));
 	} else
 		err = -ENOENT;

diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index e08f28f..defdd48 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c

@@ -63,17 +63,16 @@
 	return 0;
 }
 
-static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+static int do_cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, void __user *argp)
 {
 	struct cmtp_connadd_req ca;
 	struct cmtp_conndel_req cd;
 	struct cmtp_connlist_req cl;
 	struct cmtp_conninfo ci;
 	struct socket *nsock;
-	void __user *argp = (void __user *)arg;
 	int err;
 
-	BT_DBG("cmd %x arg %lx", cmd, arg);
+	BT_DBG("cmd %x arg %p", cmd, argp);
 
 	switch (cmd) {
 	case CMTPCONNADD:
@@ -137,16 +136,22 @@
 	return -EINVAL;
 }
 
+static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	return do_cmtp_sock_ioctl(sock, cmd, (void __user *)arg);
+}
+
 #ifdef CONFIG_COMPAT
 static int cmtp_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
+	void __user *argp = compat_ptr(arg);
 	if (cmd == CMTPGETCONNLIST) {
 		struct cmtp_connlist_req cl;
+		u32 __user *p = argp;
 		u32 uci;
 		int err;
 
-		if (get_user(cl.cnum, (u32 __user *) arg) ||
-				get_user(uci, (u32 __user *) (arg + 4)))
+		if (get_user(cl.cnum, p) || get_user(uci, p + 1))
 			return -EFAULT;
 
 		cl.ci = compat_ptr(uci);
@@ -156,13 +161,13 @@
 
 		err = cmtp_get_connlist(&cl);
 
-		if (!err && put_user(cl.cnum, (u32 __user *) arg))
+		if (!err && put_user(cl.cnum, p))
 			err = -EFAULT;
 
 		return err;
 	}
 
-	return cmtp_sock_ioctl(sock, cmd, arg);
+	return do_cmtp_sock_ioctl(sock, cmd, argp);
 }
 #endif
 

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index bd4978c..ad5b0ac 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c

@@ -520,6 +520,9 @@
 	set_bit(HCI_CONN_POWER_SAVE, &conn->flags);
 	conn->disc_timeout = HCI_DISCONN_TIMEOUT;
 
+	/* Set Default Authenticated payload timeout to 30s */
+	conn->auth_payload_timeout = DEFAULT_AUTH_PAYLOAD_TIMEOUT;
+
 	if (conn->role == HCI_ROLE_MASTER)
 		conn->out = true;
 
@@ -912,7 +915,7 @@
 				    sizeof(cp), &cp);
 		}
 
-		__hci_req_enable_ext_advertising(req);
+		__hci_req_enable_ext_advertising(req, 0x00);
 	} else {
 		struct hci_cp_le_set_adv_param cp;
 
@@ -1392,8 +1395,16 @@
 		return 0;
 
 encrypt:
-	if (test_bit(HCI_CONN_ENCRYPT, &conn->flags))
+	if (test_bit(HCI_CONN_ENCRYPT, &conn->flags)) {
+		/* Ensure that the encryption key size has been read,
+		 * otherwise stall the upper layer responses.
+		 */
+		if (!conn->enc_key_size)
+			return 0;
+
+		/* Nothing else needed, all requirements are met */
 		return 1;
+	}
 
 	hci_conn_encrypt(conn);
 	return 0;

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 74b29c7..04bc793 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c

@@ -30,6 +30,7 @@
 #include <linux/rfkill.h>
 #include <linux/debugfs.h>
 #include <linux/crypto.h>
+#include <linux/property.h>
 #include <asm/unaligned.h>
 
 #include <net/bluetooth/bluetooth.h>
@@ -1355,6 +1356,32 @@
 	return err;
 }
 
+/**
+ * hci_dev_get_bd_addr_from_property - Get the Bluetooth Device Address
+ *				       (BD_ADDR) for a HCI device from
+ *				       a firmware node property.
+ * @hdev:	The HCI device
+ *
+ * Search the firmware node for 'local-bd-address'.
+ *
+ * All-zero BD addresses are rejected, because those could be properties
+ * that exist in the firmware tables, but were not updated by the firmware. For
+ * example, the DTS could define 'local-bd-address', with zero BD addresses.
+ */
+static void hci_dev_get_bd_addr_from_property(struct hci_dev *hdev)
+{
+	struct fwnode_handle *fwnode = dev_fwnode(hdev->dev.parent);
+	bdaddr_t ba;
+	int ret;
+
+	ret = fwnode_property_read_u8_array(fwnode, "local-bd-address",
+					    (u8 *)&ba, sizeof(ba));
+	if (ret < 0 || !bacmp(&ba, BDADDR_ANY))
+		return;
+
+	bacpy(&hdev->public_addr, &ba);
+}
+
 static int hci_dev_do_open(struct hci_dev *hdev)
 {
 	int ret = 0;
@@ -1422,6 +1449,20 @@
 		if (hdev->setup)
 			ret = hdev->setup(hdev);
 
+		if (ret)
+			goto setup_failed;
+
+		if (test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks)) {
+			if (!bacmp(&hdev->public_addr, BDADDR_ANY))
+				hci_dev_get_bd_addr_from_property(hdev);
+
+			if (bacmp(&hdev->public_addr, BDADDR_ANY) &&
+			    hdev->set_bdaddr)
+				ret = hdev->set_bdaddr(hdev,
+						       &hdev->public_addr);
+		}
+
+setup_failed:
 		/* The transport driver can set these quirks before
 		 * creating the HCI device or in its setup callback.
 		 *
@@ -2578,6 +2619,9 @@
 		bt_dev_err(hdev, "command tx timeout");
 	}
 
+	if (hdev->cmd_timeout)
+		hdev->cmd_timeout(hdev);
+
 	atomic_set(&hdev->cmd_cnt, 1);
 	queue_work(hdev->workqueue, &hdev->cmd_work);
 }
@@ -2783,7 +2827,7 @@
 		memset(adv_instance->scan_rsp_data, 0,
 		       sizeof(adv_instance->scan_rsp_data));
 	} else {
-		if (hdev->adv_instance_cnt >= HCI_MAX_ADV_INSTANCES ||
+		if (hdev->adv_instance_cnt >= hdev->le_num_of_adv_sets ||
 		    instance < 1 || instance > HCI_MAX_ADV_INSTANCES)
 			return -EOVERFLOW;
 
@@ -2839,6 +2883,20 @@
 	return NULL;
 }
 
+struct bdaddr_list_with_irk *hci_bdaddr_list_lookup_with_irk(
+				struct list_head *bdaddr_list, bdaddr_t *bdaddr,
+				u8 type)
+{
+	struct bdaddr_list_with_irk *b;
+
+	list_for_each_entry(b, bdaddr_list, list) {
+		if (!bacmp(&b->bdaddr, bdaddr) && b->bdaddr_type == type)
+			return b;
+	}
+
+	return NULL;
+}
+
 void hci_bdaddr_list_clear(struct list_head *bdaddr_list)
 {
 	struct bdaddr_list *b, *n;
@@ -2871,6 +2929,35 @@
 	return 0;
 }
 
+int hci_bdaddr_list_add_with_irk(struct list_head *list, bdaddr_t *bdaddr,
+					u8 type, u8 *peer_irk, u8 *local_irk)
+{
+	struct bdaddr_list_with_irk *entry;
+
+	if (!bacmp(bdaddr, BDADDR_ANY))
+		return -EBADF;
+
+	if (hci_bdaddr_list_lookup(list, bdaddr, type))
+		return -EEXIST;
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	bacpy(&entry->bdaddr, bdaddr);
+	entry->bdaddr_type = type;
+
+	if (peer_irk)
+		memcpy(entry->peer_irk, peer_irk, 16);
+
+	if (local_irk)
+		memcpy(entry->local_irk, local_irk, 16);
+
+	list_add(&entry->list, list);
+
+	return 0;
+}
+
 int hci_bdaddr_list_del(struct list_head *list, bdaddr_t *bdaddr, u8 type)
 {
 	struct bdaddr_list *entry;
@@ -2890,6 +2977,26 @@
 	return 0;
 }
 
+int hci_bdaddr_list_del_with_irk(struct list_head *list, bdaddr_t *bdaddr,
+							u8 type)
+{
+	struct bdaddr_list_with_irk *entry;
+
+	if (!bacmp(bdaddr, BDADDR_ANY)) {
+		hci_bdaddr_list_clear(list);
+		return 0;
+	}
+
+	entry = hci_bdaddr_list_lookup_with_irk(list, bdaddr, type);
+	if (!entry)
+		return -ENOENT;
+
+	list_del(&entry->list);
+	kfree(entry);
+
+	return 0;
+}
+
 /* This function requires the caller holds hdev->lock */
 struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev,
 					       bdaddr_t *addr, u8 addr_type)
@@ -3084,13 +3191,18 @@
 	hdev->le_max_tx_time = 0x0148;
 	hdev->le_max_rx_len = 0x001b;
 	hdev->le_max_rx_time = 0x0148;
+	hdev->le_max_key_size = SMP_MAX_ENC_KEY_SIZE;
+	hdev->le_min_key_size = SMP_MIN_ENC_KEY_SIZE;
 	hdev->le_tx_def_phys = HCI_LE_SET_PHY_1M;
 	hdev->le_rx_def_phys = HCI_LE_SET_PHY_1M;
+	hdev->le_num_of_adv_sets = HCI_MAX_ADV_INSTANCES;
 
 	hdev->rpa_timeout = HCI_DEFAULT_RPA_TIMEOUT;
 	hdev->discov_interleaved_timeout = DISCOV_INTERLEAVED_TIMEOUT;
 	hdev->conn_info_min_age = DEFAULT_CONN_INFO_MIN_AGE;
 	hdev->conn_info_max_age = DEFAULT_CONN_INFO_MAX_AGE;
+	hdev->auth_payload_timeout = DEFAULT_AUTH_PAYLOAD_TIMEOUT;
+	hdev->min_enc_key_size = HCI_MIN_ENC_KEY_SIZE;
 
 	mutex_init(&hdev->lock);
 	mutex_init(&hdev->req_lock);
@@ -3336,7 +3448,7 @@
 /* Reset HCI device */
 int hci_reset_dev(struct hci_dev *hdev)
 {
-	const u8 hw_err[] = { HCI_EV_HARDWARE_ERROR, 0x01, 0x00 };
+	static const u8 hw_err[] = { HCI_EV_HARDWARE_ERROR, 0x01, 0x00 };
 	struct sk_buff *skb;
 
 	skb = bt_skb_alloc(3, GFP_ATOMIC);
@@ -4272,6 +4384,9 @@
 		return;
 	}
 
+	/* If we reach this point this event matches the last command sent */
+	hci_dev_clear_flag(hdev, HCI_CMD_PENDING);
+
 	/* If the command succeeded and there's still more commands in
 	 * this request the request is not yet complete.
 	 */
@@ -4382,6 +4497,8 @@
 
 		hdev->sent_cmd = skb_clone(skb, GFP_KERNEL);
 		if (hdev->sent_cmd) {
+			if (hci_req_status_pend(hdev))
+				hci_dev_set_flag(hdev, HCI_CMD_PENDING);
 			atomic_dec(&hdev->cmd_cnt);
 			hci_send_frame(hdev, skb);
 			if (test_bit(HCI_RESET, &hdev->flags))

diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 51f5b1e..402e2cc 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c

@@ -433,6 +433,35 @@
 	return 0;
 }
 
+static int min_encrypt_key_size_set(void *data, u64 val)
+{
+	struct hci_dev *hdev = data;
+
+	if (val < 1 || val > 16)
+		return -EINVAL;
+
+	hci_dev_lock(hdev);
+	hdev->min_enc_key_size = val;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int min_encrypt_key_size_get(void *data, u64 *val)
+{
+	struct hci_dev *hdev = data;
+
+	hci_dev_lock(hdev);
+	*val = hdev->min_enc_key_size;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(min_encrypt_key_size_fops,
+			min_encrypt_key_size_get,
+			min_encrypt_key_size_set, "%llu\n");
+
 static int auto_accept_delay_get(void *data, u64 *val)
 {
 	struct hci_dev *hdev = data;
@@ -545,6 +574,8 @@
 	if (lmp_ssp_capable(hdev)) {
 		debugfs_create_file("ssp_debug_mode", 0444, hdev->debugfs,
 				    hdev, &ssp_debug_mode_fops);
+		debugfs_create_file("min_encrypt_key_size", 0644, hdev->debugfs,
+				    hdev, &min_encrypt_key_size_fops);
 		debugfs_create_file("auto_accept_delay", 0644, hdev->debugfs,
 				    hdev, &auto_accept_delay_fops);
 	}
@@ -941,6 +972,35 @@
 DEFINE_SIMPLE_ATTRIBUTE(adv_max_interval_fops, adv_max_interval_get,
 			adv_max_interval_set, "%llu\n");
 
+static int auth_payload_timeout_set(void *data, u64 val)
+{
+	struct hci_dev *hdev = data;
+
+	if (val < 0x0001 || val > 0xffff)
+		return -EINVAL;
+
+	hci_dev_lock(hdev);
+	hdev->auth_payload_timeout = val;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int auth_payload_timeout_get(void *data, u64 *val)
+{
+	struct hci_dev *hdev = data;
+
+	hci_dev_lock(hdev);
+	*val = hdev->auth_payload_timeout;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(auth_payload_timeout_fops,
+			auth_payload_timeout_get,
+			auth_payload_timeout_set, "%llu\n");
+
 DEFINE_QUIRK_ATTRIBUTE(quirk_strict_duplicate_filter,
 		       HCI_QUIRK_STRICT_DUPLICATE_FILTER);
 DEFINE_QUIRK_ATTRIBUTE(quirk_simultaneous_discovery,
@@ -994,6 +1054,8 @@
 			    &adv_max_interval_fops);
 	debugfs_create_u16("discov_interleaved_timeout", 0644, hdev->debugfs,
 			   &hdev->discov_interleaved_timeout);
+	debugfs_create_file("auth_payload_timeout", 0644, hdev->debugfs, hdev,
+			    &auth_payload_timeout_fops);
 
 	debugfs_create_file("quirk_strict_duplicate_filter", 0644,
 			    hdev->debugfs, hdev,

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index f12555f..c1d3a30 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c

@@ -579,6 +579,51 @@
 		memcpy(hdev->commands, rp->commands, sizeof(hdev->commands));
 }
 
+static void hci_cc_read_auth_payload_timeout(struct hci_dev *hdev,
+					     struct sk_buff *skb)
+{
+	struct hci_rp_read_auth_payload_to *rp = (void *)skb->data;
+	struct hci_conn *conn;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+	if (rp->status)
+		return;
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle));
+	if (conn)
+		conn->auth_payload_timeout = __le16_to_cpu(rp->timeout);
+
+	hci_dev_unlock(hdev);
+}
+
+static void hci_cc_write_auth_payload_timeout(struct hci_dev *hdev,
+					      struct sk_buff *skb)
+{
+	struct hci_rp_write_auth_payload_to *rp = (void *)skb->data;
+	struct hci_conn *conn;
+	void *sent;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+	if (rp->status)
+		return;
+
+	sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_AUTH_PAYLOAD_TO);
+	if (!sent)
+		return;
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle));
+	if (conn)
+		conn->auth_payload_timeout = get_unaligned_le16(sent + 2);
+
+	hci_dev_unlock(hdev);
+}
+
 static void hci_cc_read_local_features(struct hci_dev *hdev,
 				       struct sk_buff *skb)
 {
@@ -1454,6 +1499,45 @@
 	hdev->le_def_tx_time = le16_to_cpu(sent->tx_time);
 }
 
+static void hci_cc_le_add_to_resolv_list(struct hci_dev *hdev,
+					 struct sk_buff *skb)
+{
+	struct hci_cp_le_add_to_resolv_list *sent;
+	__u8 status = *((__u8 *) skb->data);
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+	if (status)
+		return;
+
+	sent = hci_sent_cmd_data(hdev, HCI_OP_LE_ADD_TO_RESOLV_LIST);
+	if (!sent)
+		return;
+
+	hci_bdaddr_list_add_with_irk(&hdev->le_resolv_list, &sent->bdaddr,
+				sent->bdaddr_type, sent->peer_irk,
+				sent->local_irk);
+}
+
+static void hci_cc_le_del_from_resolv_list(struct hci_dev *hdev,
+					  struct sk_buff *skb)
+{
+	struct hci_cp_le_del_from_resolv_list *sent;
+	__u8 status = *((__u8 *) skb->data);
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+	if (status)
+		return;
+
+	sent = hci_sent_cmd_data(hdev, HCI_OP_LE_DEL_FROM_RESOLV_LIST);
+	if (!sent)
+		return;
+
+	hci_bdaddr_list_del_with_irk(&hdev->le_resolv_list, &sent->bdaddr,
+			    sent->bdaddr_type);
+}
+
 static void hci_cc_le_clear_resolv_list(struct hci_dev *hdev,
 				       struct sk_buff *skb)
 {
@@ -2936,6 +3020,25 @@
 		goto unlock;
 	}
 
+	/* Set the default Authenticated Payload Timeout after
+	 * an LE Link is established. As per Core Spec v5.0, Vol 2, Part B
+	 * Section 3.3, the HCI command WRITE_AUTH_PAYLOAD_TIMEOUT should be
+	 * sent when the link is active and Encryption is enabled, the conn
+	 * type can be either LE or ACL and controller must support LMP Ping.
+	 * Ensure for AES-CCM encryption as well.
+	 */
+	if (test_bit(HCI_CONN_ENCRYPT, &conn->flags) &&
+	    test_bit(HCI_CONN_AES_CCM, &conn->flags) &&
+	    ((conn->type == ACL_LINK && lmp_ping_capable(hdev)) ||
+	     (conn->type == LE_LINK && (hdev->le_features[0] & HCI_LE_PING)))) {
+		struct hci_cp_write_auth_payload_to cp;
+
+		cp.handle = cpu_to_le16(conn->handle);
+		cp.timeout = cpu_to_le16(hdev->auth_payload_timeout);
+		hci_send_cmd(conn->hdev, HCI_OP_WRITE_AUTH_PAYLOAD_TO,
+			     sizeof(cp), &cp);
+	}
+
 notify:
 	if (conn->state == BT_CONFIG) {
 		if (!ev->status)
@@ -3131,6 +3234,14 @@
 		hci_cc_write_sc_support(hdev, skb);
 		break;
 
+	case HCI_OP_READ_AUTH_PAYLOAD_TO:
+		hci_cc_read_auth_payload_timeout(hdev, skb);
+		break;
+
+	case HCI_OP_WRITE_AUTH_PAYLOAD_TO:
+		hci_cc_write_auth_payload_timeout(hdev, skb);
+		break;
+
 	case HCI_OP_READ_LOCAL_VERSION:
 		hci_cc_read_local_version(hdev, skb);
 		break;
@@ -3279,6 +3390,14 @@
 		hci_cc_le_write_def_data_len(hdev, skb);
 		break;
 
+	case HCI_OP_LE_ADD_TO_RESOLV_LIST:
+		hci_cc_le_add_to_resolv_list(hdev, skb);
+		break;
+
+	case HCI_OP_LE_DEL_FROM_RESOLV_LIST:
+		hci_cc_le_del_from_resolv_list(hdev, skb);
+		break;
+
 	case HCI_OP_LE_CLEAR_RESOLV_LIST:
 		hci_cc_le_clear_resolv_list(hdev, skb);
 		break;
@@ -3357,6 +3476,12 @@
 	hci_req_cmd_complete(hdev, *opcode, *status, req_complete,
 			     req_complete_skb);
 
+	if (hci_dev_test_flag(hdev, HCI_CMD_PENDING)) {
+		bt_dev_err(hdev,
+			   "unexpected event for opcode 0x%4.4x", *opcode);
+		return;
+	}
+
 	if (atomic_read(&hdev->cmd_cnt) && !skb_queue_empty(&hdev->cmd_q))
 		queue_work(hdev->workqueue, &hdev->cmd_work);
 }
@@ -3464,6 +3589,12 @@
 		hci_req_cmd_complete(hdev, *opcode, ev->status, req_complete,
 				     req_complete_skb);
 
+	if (hci_dev_test_flag(hdev, HCI_CMD_PENDING)) {
+		bt_dev_err(hdev,
+			   "unexpected event for opcode 0x%4.4x", *opcode);
+		return;
+	}
+
 	if (atomic_read(&hdev->cmd_cnt) && !skb_queue_empty(&hdev->cmd_q))
 		queue_work(hdev->workqueue, &hdev->cmd_work);
 }
@@ -3509,8 +3640,8 @@
 		return;
 	}
 
-	if (skb->len < sizeof(*ev) || skb->len < sizeof(*ev) +
-	    ev->num_hndl * sizeof(struct hci_comp_pkts_info)) {
+	if (skb->len < sizeof(*ev) ||
+	    skb->len < struct_size(ev, handles, ev->num_hndl)) {
 		BT_DBG("%s bad parameters", hdev->name);
 		return;
 	}
@@ -3597,8 +3728,8 @@
 		return;
 	}
 
-	if (skb->len < sizeof(*ev) || skb->len < sizeof(*ev) +
-	    ev->num_hndl * sizeof(struct hci_comp_blocks_info)) {
+	if (skb->len < sizeof(*ev) ||
+	    skb->len < struct_size(ev, handles, ev->num_hndl)) {
 		BT_DBG("%s bad parameters", hdev->name);
 		return;
 	}
@@ -4890,31 +5021,27 @@
 	hci_debugfs_create_conn(conn);
 	hci_conn_add_sysfs(conn);
 
-	if (!status) {
-		/* The remote features procedure is defined for master
-		 * role only. So only in case of an initiated connection
-		 * request the remote features.
-		 *
-		 * If the local controller supports slave-initiated features
-		 * exchange, then requesting the remote features in slave
-		 * role is possible. Otherwise just transition into the
-		 * connected state without requesting the remote features.
-		 */
-		if (conn->out ||
-		    (hdev->le_features[0] & HCI_LE_SLAVE_FEATURES)) {
-			struct hci_cp_le_read_remote_features cp;
+	/* The remote features procedure is defined for master
+	 * role only. So only in case of an initiated connection
+	 * request the remote features.
+	 *
+	 * If the local controller supports slave-initiated features
+	 * exchange, then requesting the remote features in slave
+	 * role is possible. Otherwise just transition into the
+	 * connected state without requesting the remote features.
+	 */
+	if (conn->out ||
+	    (hdev->le_features[0] & HCI_LE_SLAVE_FEATURES)) {
+		struct hci_cp_le_read_remote_features cp;
 
-			cp.handle = __cpu_to_le16(conn->handle);
+		cp.handle = __cpu_to_le16(conn->handle);
 
-			hci_send_cmd(hdev, HCI_OP_LE_READ_REMOTE_FEATURES,
-				     sizeof(cp), &cp);
+		hci_send_cmd(hdev, HCI_OP_LE_READ_REMOTE_FEATURES,
+			     sizeof(cp), &cp);
 
-			hci_conn_hold(conn);
-		} else {
-			conn->state = BT_CONNECTED;
-			hci_connect_cfm(conn, status);
-		}
+		hci_conn_hold(conn);
 	} else {
+		conn->state = BT_CONNECTED;
 		hci_connect_cfm(conn, status);
 	}
 
@@ -5390,7 +5517,7 @@
 					   ev->data, ev->length);
 		}
 
-		ptr += sizeof(*ev) + ev->length + 1;
+		ptr += sizeof(*ev) + ev->length;
 	}
 
 	hci_dev_unlock(hdev);
@@ -5668,6 +5795,12 @@
 		return true;
 	}
 
+	/* Check if request ended in Command Status - no way to retreive
+	 * any extra parameters in this case.
+	 */
+	if (hdr->evt == HCI_EV_CMD_STATUS)
+		return false;
+
 	if (hdr->evt != HCI_EV_CMD_COMPLETE) {
 		bt_dev_err(hdev, "last event is not cmd complete (0x%2.2x)",
 			   hdr->evt);

diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index e8c9ef1..7f6a581 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c

@@ -46,6 +46,11 @@
 	skb_queue_purge(&req->cmd_q);
 }
 
+bool hci_req_status_pend(struct hci_dev *hdev)
+{
+	return hdev->req_status == HCI_REQ_PEND;
+}
+
 static int req_run(struct hci_request *req, hci_req_complete_t complete,
 		   hci_req_complete_skb_t complete_skb)
 {
@@ -1049,6 +1054,7 @@
 	struct hci_cp_le_set_adv_param cp;
 	u8 own_addr_type, enable = 0x01;
 	bool connectable;
+	u16 adv_min_interval, adv_max_interval;
 	u32 flags;
 
 	flags = get_adv_instance_flags(hdev, hdev->cur_adv_instance);
@@ -1082,16 +1088,30 @@
 		return;
 
 	memset(&cp, 0, sizeof(cp));
-	cp.min_interval = cpu_to_le16(hdev->le_adv_min_interval);
-	cp.max_interval = cpu_to_le16(hdev->le_adv_max_interval);
 
-	if (connectable)
+	if (connectable) {
 		cp.type = LE_ADV_IND;
-	else if (get_cur_adv_instance_scan_rsp_len(hdev))
-		cp.type = LE_ADV_SCAN_IND;
-	else
-		cp.type = LE_ADV_NONCONN_IND;
 
+		adv_min_interval = hdev->le_adv_min_interval;
+		adv_max_interval = hdev->le_adv_max_interval;
+	} else {
+		if (get_cur_adv_instance_scan_rsp_len(hdev))
+			cp.type = LE_ADV_SCAN_IND;
+		else
+			cp.type = LE_ADV_NONCONN_IND;
+
+		if (!hci_dev_test_flag(hdev, HCI_DISCOVERABLE) ||
+		    hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) {
+			adv_min_interval = DISCOV_LE_FAST_ADV_INT_MIN;
+			adv_max_interval = DISCOV_LE_FAST_ADV_INT_MAX;
+		} else {
+			adv_min_interval = hdev->le_adv_min_interval;
+			adv_max_interval = hdev->le_adv_max_interval;
+		}
+	}
+
+	cp.min_interval = cpu_to_le16(adv_min_interval);
+	cp.max_interval = cpu_to_le16(adv_max_interval);
 	cp.own_address_type = own_addr_type;
 	cp.channel_map = hdev->le_adv_channel_map;
 
@@ -1556,7 +1576,7 @@
 	connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) ||
 		      mgmt_get_connectable(hdev);
 
-	 if (!is_advertising_allowed(hdev, connectable))
+	if (!is_advertising_allowed(hdev, connectable))
 		return -EPERM;
 
 	/* Set require_privacy to true only when non-connectable
@@ -1596,7 +1616,7 @@
 	cp.own_addr_type = own_addr_type;
 	cp.channel_map = hdev->le_adv_channel_map;
 	cp.tx_power = 127;
-	cp.handle = 0;
+	cp.handle = instance;
 
 	if (flags & MGMT_ADV_FLAG_SEC_2M) {
 		cp.primary_phy = HCI_ADV_PHY_1M;
@@ -1638,11 +1658,21 @@
 	return 0;
 }
 
-void __hci_req_enable_ext_advertising(struct hci_request *req)
+int __hci_req_enable_ext_advertising(struct hci_request *req, u8 instance)
 {
+	struct hci_dev *hdev = req->hdev;
 	struct hci_cp_le_set_ext_adv_enable *cp;
 	struct hci_cp_ext_adv_set *adv_set;
 	u8 data[sizeof(*cp) + sizeof(*adv_set) * 1];
+	struct adv_info *adv_instance;
+
+	if (instance > 0) {
+		adv_instance = hci_find_adv_instance(hdev, instance);
+		if (!adv_instance)
+			return -EINVAL;
+	} else {
+		adv_instance = NULL;
+	}
 
 	cp = (void *) data;
 	adv_set = (void *) cp->data;
@@ -1654,11 +1684,23 @@
 
 	memset(adv_set, 0, sizeof(*adv_set));
 
-	adv_set->handle = 0;
+	adv_set->handle = instance;
+
+	/* Set duration per instance since controller is responsible for
+	 * scheduling it.
+	 */
+	if (adv_instance && adv_instance->duration) {
+		u16 duration = adv_instance->duration * MSEC_PER_SEC;
+
+		/* Time = N * 10 ms */
+		adv_set->duration = cpu_to_le16(duration / 10);
+	}
 
 	hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_ENABLE,
 		    sizeof(*cp) + sizeof(*adv_set) * cp->num_of_sets,
 		    data);
+
+	return 0;
 }
 
 int __hci_req_start_ext_adv(struct hci_request *req, u8 instance)
@@ -1674,7 +1716,7 @@
 		return err;
 
 	__hci_req_update_scan_rsp_data(req, instance);
-	__hci_req_enable_ext_advertising(req);
+	__hci_req_enable_ext_advertising(req, instance);
 
 	return 0;
 }
@@ -1718,10 +1760,13 @@
 		adv_instance->remaining_time =
 				adv_instance->remaining_time - timeout;
 
-	hdev->adv_instance_timeout = timeout;
-	queue_delayed_work(hdev->req_workqueue,
+	/* Only use work for scheduling instances with legacy advertising */
+	if (!ext_adv_capable(hdev)) {
+		hdev->adv_instance_timeout = timeout;
+		queue_delayed_work(hdev->req_workqueue,
 			   &hdev->adv_instance_expire,
 			   msecs_to_jiffies(timeout * 1000));
+	}
 
 	/* If we're just re-scheduling the same instance again then do not
 	 * execute any HCI commands. This happens when a single instance is
@@ -2739,7 +2784,8 @@
 				if (!ext_adv_capable(hdev))
 					__hci_req_enable_advertising(req);
 				else if (!err)
-					__hci_req_enable_ext_advertising(req);
+					__hci_req_enable_ext_advertising(req,
+									 0x00);
 			}
 		} else if (!list_empty(&hdev->adv_instances)) {
 			struct adv_info *adv_instance;

diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index 692cc8b..a7019fb 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h

@@ -37,6 +37,7 @@
 
 void hci_req_init(struct hci_request *req, struct hci_dev *hdev);
 void hci_req_purge(struct hci_request *req);
+bool hci_req_status_pend(struct hci_dev *hdev);
 int hci_req_run(struct hci_request *req, hci_req_complete_t complete);
 int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete);
 void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
@@ -82,7 +83,7 @@
 
 int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance);
 int __hci_req_start_ext_adv(struct hci_request *req, u8 instance);
-void __hci_req_enable_ext_advertising(struct hci_request *req);
+int __hci_req_enable_ext_advertising(struct hci_request *req, u8 instance);
 void __hci_req_clear_ext_adv_sets(struct hci_request *req);
 int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
 			   bool use_rpa, struct adv_info *adv_instance,

diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 1506e16..d32077b 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c

@@ -831,8 +831,6 @@
 	if (!sk)
 		return 0;
 
-	hdev = hci_pi(sk)->hdev;
-
 	switch (hci_pi(sk)->channel) {
 	case HCI_CHANNEL_MONITOR:
 		atomic_dec(&monitor_promisc);
@@ -854,6 +852,7 @@
 
 	bt_sock_unlink(&hci_sk_list, sk);
 
+	hdev = hci_pi(sk)->hdev;
 	if (hdev) {
 		if (hci_pi(sk)->channel == HCI_CHANNEL_USER) {
 			/* When releasing a user channel exclusive access,
@@ -1383,9 +1382,9 @@
 
 	if (mask & HCI_CMSG_TSTAMP) {
 #ifdef CONFIG_COMPAT
-		struct compat_timeval ctv;
+		struct old_timeval32 ctv;
 #endif
-		struct timeval tv;
+		struct __kernel_old_timeval tv;
 		void *data;
 		int len;
 

diff --git a/net/bluetooth/hidp/Kconfig b/net/bluetooth/hidp/Kconfig
index bc8610b..14100f3 100644
--- a/net/bluetooth/hidp/Kconfig
+++ b/net/bluetooth/hidp/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config BT_HIDP
 	tristate "HIDP protocol support"
 	depends on BT_BREDR && INPUT

diff --git a/net/bluetooth/hidp/Makefile b/net/bluetooth/hidp/Makefile
index a9ee115..f41b0aa 100644
--- a/net/bluetooth/hidp/Makefile
+++ b/net/bluetooth/hidp/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Linux Bluetooth HIDP layer
 #

diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 253975c..bef84b9 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c

@@ -101,6 +101,7 @@
 {
 	struct sk_buff *skb;
 	struct sock *sk = sock->sk;
+	int ret;
 
 	BT_DBG("session %p data %p size %d", session, data, size);
 
@@ -114,13 +115,17 @@
 	}
 
 	skb_put_u8(skb, hdr);
-	if (data && size > 0)
+	if (data && size > 0) {
 		skb_put_data(skb, data, size);
+		ret = size;
+	} else {
+		ret = 0;
+	}
 
 	skb_queue_tail(transmit, skb);
 	wake_up_interruptible(sk_sleep(sk));
 
-	return 0;
+	return ret;
 }
 
 static int hidp_send_ctrl_message(struct hidp_session *session,
@@ -262,7 +267,7 @@
 	set_bit(HIDP_WAITING_FOR_RETURN, &session->flags);
 	data[0] = report_number;
 	ret = hidp_send_ctrl_message(session, report_type, data, 1);
-	if (ret)
+	if (ret < 0)
 		goto err;
 
 	/* Wait for the return of the report. The returned report
@@ -338,7 +343,7 @@
 	data[0] = reportnum;
 	set_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags);
 	ret = hidp_send_ctrl_message(session, report_type, data, count);
-	if (ret)
+	if (ret < 0)
 		goto err;
 
 	/* Wait for the ACK from the device. */
@@ -649,7 +654,7 @@
 }
 
 static int hidp_setup_input(struct hidp_session *session,
-				struct hidp_connadd_req *req)
+				const struct hidp_connadd_req *req)
 {
 	struct input_dev *input;
 	int i;
@@ -748,7 +753,7 @@
 /* This function sets up the hid device. It does not add it
    to the HID system. That is done in hidp_add_connection(). */
 static int hidp_setup_hid(struct hidp_session *session,
-				struct hidp_connadd_req *req)
+				const struct hidp_connadd_req *req)
 {
 	struct hid_device *hid;
 	int err;
@@ -775,7 +780,7 @@
 	hid->version = req->version;
 	hid->country = req->country;
 
-	strncpy(hid->name, req->name, sizeof(hid->name));
+	strscpy(hid->name, req->name, sizeof(hid->name));
 
 	snprintf(hid->phys, sizeof(hid->phys), "%pMR",
 		 &l2cap_pi(session->ctrl_sock->sk)->chan->src);
@@ -807,7 +812,7 @@
 
 /* initialize session devices */
 static int hidp_session_dev_init(struct hidp_session *session,
-				 struct hidp_connadd_req *req)
+				 const struct hidp_connadd_req *req)
 {
 	int ret;
 
@@ -906,7 +911,7 @@
 static int hidp_session_new(struct hidp_session **out, const bdaddr_t *bdaddr,
 			    struct socket *ctrl_sock,
 			    struct socket *intr_sock,
-			    struct hidp_connadd_req *req,
+			    const struct hidp_connadd_req *req,
 			    struct l2cap_conn *conn)
 {
 	struct hidp_session *session;
@@ -1074,6 +1079,10 @@
 static void hidp_session_terminate(struct hidp_session *session)
 {
 	atomic_inc(&session->terminate);
+	/*
+	 * See the comment preceding the call to wait_woken()
+	 * in hidp_session_run().
+	 */
 	wake_up_interruptible(&hidp_session_wq);
 }
 
@@ -1193,8 +1202,6 @@
 		 *    thread is woken up by ->sk_state_changed().
 		 */
 
-		/* Ensure session->terminate is updated */
-		smp_mb__before_atomic();
 		if (atomic_read(&session->terminate))
 			break;
 
@@ -1228,14 +1235,15 @@
 		hidp_process_transmit(session, &session->ctrl_transmit,
 				      session->ctrl_sock);
 
+		/*
+		 * wait_woken() performs the necessary memory barriers
+		 * for us; see the header comment for this primitive.
+		 */
 		wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
 	}
 	remove_wait_queue(&hidp_session_wq, &wait);
 
 	atomic_inc(&session->terminate);
-
-	/* Ensure session->terminate is updated */
-	smp_mb__after_atomic();
 }
 
 static int hidp_session_wake_function(wait_queue_entry_t *wait,
@@ -1335,7 +1343,7 @@
 	return 0;
 }
 
-int hidp_connection_add(struct hidp_connadd_req *req,
+int hidp_connection_add(const struct hidp_connadd_req *req,
 			struct socket *ctrl_sock,
 			struct socket *intr_sock)
 {

diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
index 8798492..6ef88d0 100644
--- a/net/bluetooth/hidp/hidp.h
+++ b/net/bluetooth/hidp/hidp.h

@@ -122,7 +122,7 @@
 	struct hidp_conninfo __user *ci;
 };
 
-int hidp_connection_add(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock);
+int hidp_connection_add(const struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock);
 int hidp_connection_del(struct hidp_conndel_req *req);
 int hidp_get_connlist(struct hidp_connlist_req *req);
 int hidp_get_conninfo(struct hidp_conninfo *ci);

diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 1eaac01..03be6a4 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c

@@ -46,9 +46,8 @@
 	return 0;
 }
 
-static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+static int do_hidp_sock_ioctl(struct socket *sock, unsigned int cmd, void __user *argp)
 {
-	void __user *argp = (void __user *) arg;
 	struct hidp_connadd_req ca;
 	struct hidp_conndel_req cd;
 	struct hidp_connlist_req cl;
@@ -57,7 +56,7 @@
 	struct socket *isock;
 	int err;
 
-	BT_DBG("cmd %x arg %lx", cmd, arg);
+	BT_DBG("cmd %x arg %p", cmd, argp);
 
 	switch (cmd) {
 	case HIDPCONNADD:
@@ -76,6 +75,7 @@
 			sockfd_put(csock);
 			return err;
 		}
+		ca.name[sizeof(ca.name)-1] = 0;
 
 		err = hidp_connection_add(&ca, csock, isock);
 		if (!err && copy_to_user(argp, &ca, sizeof(ca)))
@@ -122,6 +122,11 @@
 	return -EINVAL;
 }
 
+static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	return do_hidp_sock_ioctl(sock, cmd, (void __user *)arg);
+}
+
 #ifdef CONFIG_COMPAT
 struct compat_hidp_connadd_req {
 	int   ctrl_sock;	/* Connected control socket */
@@ -141,13 +146,15 @@
 
 static int hidp_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
+	void __user *argp = compat_ptr(arg);
+	int err;
+
 	if (cmd == HIDPGETCONNLIST) {
 		struct hidp_connlist_req cl;
+		u32 __user *p = argp;
 		u32 uci;
-		int err;
 
-		if (get_user(cl.cnum, (u32 __user *) arg) ||
-				get_user(uci, (u32 __user *) (arg + 4)))
+		if (get_user(cl.cnum, p) || get_user(uci, p + 1))
 			return -EFAULT;
 
 		cl.ci = compat_ptr(uci);
@@ -157,39 +164,55 @@
 
 		err = hidp_get_connlist(&cl);
 
-		if (!err && put_user(cl.cnum, (u32 __user *) arg))
+		if (!err && put_user(cl.cnum, p))
 			err = -EFAULT;
 
 		return err;
 	} else if (cmd == HIDPCONNADD) {
-		struct compat_hidp_connadd_req ca;
-		struct hidp_connadd_req __user *uca;
+		struct compat_hidp_connadd_req ca32;
+		struct hidp_connadd_req ca;
+		struct socket *csock;
+		struct socket *isock;
 
-		uca = compat_alloc_user_space(sizeof(*uca));
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
 
-		if (copy_from_user(&ca, (void __user *) arg, sizeof(ca)))
+		if (copy_from_user(&ca32, (void __user *) arg, sizeof(ca32)))
 			return -EFAULT;
 
-		if (put_user(ca.ctrl_sock, &uca->ctrl_sock) ||
-				put_user(ca.intr_sock, &uca->intr_sock) ||
-				put_user(ca.parser, &uca->parser) ||
-				put_user(ca.rd_size, &uca->rd_size) ||
-				put_user(compat_ptr(ca.rd_data), &uca->rd_data) ||
-				put_user(ca.country, &uca->country) ||
-				put_user(ca.subclass, &uca->subclass) ||
-				put_user(ca.vendor, &uca->vendor) ||
-				put_user(ca.product, &uca->product) ||
-				put_user(ca.version, &uca->version) ||
-				put_user(ca.flags, &uca->flags) ||
-				put_user(ca.idle_to, &uca->idle_to) ||
-				copy_to_user(&uca->name[0], &ca.name[0], 128))
-			return -EFAULT;
+		ca.ctrl_sock = ca32.ctrl_sock;
+		ca.intr_sock = ca32.intr_sock;
+		ca.parser = ca32.parser;
+		ca.rd_size = ca32.rd_size;
+		ca.rd_data = compat_ptr(ca32.rd_data);
+		ca.country = ca32.country;
+		ca.subclass = ca32.subclass;
+		ca.vendor = ca32.vendor;
+		ca.product = ca32.product;
+		ca.version = ca32.version;
+		ca.flags = ca32.flags;
+		ca.idle_to = ca32.idle_to;
+		ca32.name[sizeof(ca32.name) - 1] = '\0';
+		memcpy(ca.name, ca32.name, 128);
 
-		arg = (unsigned long) uca;
+		csock = sockfd_lookup(ca.ctrl_sock, &err);
+		if (!csock)
+			return err;
 
-		/* Fall through. We don't actually write back any _changes_
-		   to the structure anyway, so there's no need to copy back
-		   into the original compat version */
+		isock = sockfd_lookup(ca.intr_sock, &err);
+		if (!isock) {
+			sockfd_put(csock);
+			return err;
+		}
+
+		err = hidp_connection_add(&ca, csock, isock);
+		if (!err && copy_to_user(argp, &ca32, sizeof(ca32)))
+			err = -EFAULT;
+
+		sockfd_put(csock);
+		sockfd_put(isock);
+
+		return err;
 	}
 
 	return hidp_sock_ioctl(sock, cmd, arg);

diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index d17a473..da7fdbd 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c

@@ -51,9 +51,6 @@
 static LIST_HEAD(chan_list);
 static DEFINE_RWLOCK(chan_list_lock);
 
-static u16 le_max_credits = L2CAP_LE_MAX_CREDITS;
-static u16 le_default_mps = L2CAP_LE_DEFAULT_MPS;
-
 static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
 				       u8 code, u8 ident, u16 dlen, void *data);
 static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len,
@@ -171,11 +168,18 @@
 	return c;
 }
 
-static struct l2cap_chan *__l2cap_global_chan_by_addr(__le16 psm, bdaddr_t *src)
+static struct l2cap_chan *__l2cap_global_chan_by_addr(__le16 psm, bdaddr_t *src,
+						      u8 src_type)
 {
 	struct l2cap_chan *c;
 
 	list_for_each_entry(c, &chan_list, global_l) {
+		if (src_type == BDADDR_BREDR && c->src_type != BDADDR_BREDR)
+			continue;
+
+		if (src_type != BDADDR_BREDR && c->src_type == BDADDR_BREDR)
+			continue;
+
 		if (c->sport == psm && !bacmp(&c->src, src))
 			return c;
 	}
@@ -188,7 +192,7 @@
 
 	write_lock(&chan_list_lock);
 
-	if (psm && __l2cap_global_chan_by_addr(psm, src)) {
+	if (psm && __l2cap_global_chan_by_addr(psm, src, chan->src_type)) {
 		err = -EADDRINUSE;
 		goto done;
 	}
@@ -212,7 +216,8 @@
 
 		err = -EINVAL;
 		for (p = start; p <= end; p += incr)
-			if (!__l2cap_global_chan_by_addr(cpu_to_le16(p), src)) {
+			if (!__l2cap_global_chan_by_addr(cpu_to_le16(p), src,
+							 chan->src_type)) {
 				chan->psm   = cpu_to_le16(p);
 				chan->sport = cpu_to_le16(p);
 				err = 0;
@@ -513,14 +518,16 @@
 }
 EXPORT_SYMBOL_GPL(l2cap_chan_set_defaults);
 
-static void l2cap_le_flowctl_init(struct l2cap_chan *chan)
+static void l2cap_le_flowctl_init(struct l2cap_chan *chan, u16 tx_credits)
 {
 	chan->sdu = NULL;
 	chan->sdu_last_frag = NULL;
 	chan->sdu_len = 0;
-	chan->tx_credits = 0;
-	chan->rx_credits = le_max_credits;
-	chan->mps = min_t(u16, chan->imtu, le_default_mps);
+	chan->tx_credits = tx_credits;
+	/* Derive MPS from connection MTU to stop HCI fragmentation */
+	chan->mps = min_t(u16, chan->imtu, chan->conn->mtu - L2CAP_HDR_SIZE);
+	/* Give enough credits for a full packet */
+	chan->rx_credits = (chan->imtu / chan->mps) + 1;
 
 	skb_queue_head_init(&chan->tx_q);
 }
@@ -681,9 +688,9 @@
 	u16 result;
 
 	if (test_bit(FLAG_DEFER_SETUP, &chan->flags))
-		result = L2CAP_CR_AUTHORIZATION;
+		result = L2CAP_CR_LE_AUTHORIZATION;
 	else
-		result = L2CAP_CR_BAD_PSM;
+		result = L2CAP_CR_LE_BAD_PSM;
 
 	l2cap_state_change(chan, BT_DISCONN);
 
@@ -1282,6 +1289,8 @@
 	if (test_and_set_bit(FLAG_LE_CONN_REQ_SENT, &chan->flags))
 		return;
 
+	l2cap_le_flowctl_init(chan, 0);
+
 	req.psm     = chan->psm;
 	req.scid    = cpu_to_le16(chan->scid);
 	req.mtu     = cpu_to_le16(chan->imtu);
@@ -1340,6 +1349,21 @@
 		       sizeof(req), &req);
 }
 
+static bool l2cap_check_enc_key_size(struct hci_conn *hcon)
+{
+	/* The minimum encryption key size needs to be enforced by the
+	 * host stack before establishing any L2CAP connections. The
+	 * specification in theory allows a minimum of 1, but to align
+	 * BR/EDR and LE transports, a minimum of 7 is chosen.
+	 *
+	 * This check might also be called for unencrypted connections
+	 * that have no key size requirements. Ensure that the link is
+	 * actually encrypted before enforcing a key size.
+	 */
+	return (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags) ||
+		hcon->enc_key_size >= hcon->hdev->min_enc_key_size);
+}
+
 static void l2cap_do_start(struct l2cap_chan *chan)
 {
 	struct l2cap_conn *conn = chan->conn;
@@ -1357,9 +1381,14 @@
 	if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE))
 		return;
 
-	if (l2cap_chan_check_security(chan, true) &&
-	    __l2cap_no_conn_pending(chan))
+	if (!l2cap_chan_check_security(chan, true) ||
+	    !__l2cap_no_conn_pending(chan))
+		return;
+
+	if (l2cap_check_enc_key_size(conn->hcon))
 		l2cap_start_connection(chan);
+	else
+		__set_chan_timer(chan, L2CAP_DISC_TIMEOUT);
 }
 
 static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask)
@@ -1438,7 +1467,10 @@
 				continue;
 			}
 
-			l2cap_start_connection(chan);
+			if (l2cap_check_enc_key_size(conn->hcon))
+				l2cap_start_connection(chan);
+			else
+				l2cap_chan_close(chan, ECONNREFUSED);
 
 		} else if (chan->state == BT_CONNECT2) {
 			struct l2cap_conn_rsp rsp;
@@ -3336,16 +3368,22 @@
 
 	while (len >= L2CAP_CONF_OPT_SIZE) {
 		len -= l2cap_get_conf_opt(&req, &type, &olen, &val);
+		if (len < 0)
+			break;
 
 		hint  = type & L2CAP_CONF_HINT;
 		type &= L2CAP_CONF_MASK;
 
 		switch (type) {
 		case L2CAP_CONF_MTU:
+			if (olen != 2)
+				break;
 			mtu = val;
 			break;
 
 		case L2CAP_CONF_FLUSH_TO:
+			if (olen != 2)
+				break;
 			chan->flush_to = val;
 			break;
 
@@ -3353,26 +3391,30 @@
 			break;
 
 		case L2CAP_CONF_RFC:
-			if (olen == sizeof(rfc))
-				memcpy(&rfc, (void *) val, olen);
+			if (olen != sizeof(rfc))
+				break;
+			memcpy(&rfc, (void *) val, olen);
 			break;
 
 		case L2CAP_CONF_FCS:
+			if (olen != 1)
+				break;
 			if (val == L2CAP_FCS_NONE)
 				set_bit(CONF_RECV_NO_FCS, &chan->conf_state);
 			break;
 
 		case L2CAP_CONF_EFS:
-			if (olen == sizeof(efs)) {
-				remote_efs = 1;
-				memcpy(&efs, (void *) val, olen);
-			}
+			if (olen != sizeof(efs))
+				break;
+			remote_efs = 1;
+			memcpy(&efs, (void *) val, olen);
 			break;
 
 		case L2CAP_CONF_EWS:
+			if (olen != 2)
+				break;
 			if (!(chan->conn->local_fixed_chan & L2CAP_FC_A2MP))
 				return -ECONNREFUSED;
-
 			set_bit(FLAG_EXT_CTRL, &chan->flags);
 			set_bit(CONF_EWS_RECV, &chan->conf_state);
 			chan->tx_win_max = L2CAP_DEFAULT_EXT_WINDOW;
@@ -3382,7 +3424,6 @@
 		default:
 			if (hint)
 				break;
-
 			result = L2CAP_CONF_UNKNOWN;
 			*((u8 *) ptr++) = type;
 			break;
@@ -3547,58 +3588,65 @@
 
 	while (len >= L2CAP_CONF_OPT_SIZE) {
 		len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val);
+		if (len < 0)
+			break;
 
 		switch (type) {
 		case L2CAP_CONF_MTU:
+			if (olen != 2)
+				break;
 			if (val < L2CAP_DEFAULT_MIN_MTU) {
 				*result = L2CAP_CONF_UNACCEPT;
 				chan->imtu = L2CAP_DEFAULT_MIN_MTU;
 			} else
 				chan->imtu = val;
-			l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, endptr - ptr);
+			l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu,
+					   endptr - ptr);
 			break;
 
 		case L2CAP_CONF_FLUSH_TO:
+			if (olen != 2)
+				break;
 			chan->flush_to = val;
-			l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO,
-					   2, chan->flush_to, endptr - ptr);
+			l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2,
+					   chan->flush_to, endptr - ptr);
 			break;
 
 		case L2CAP_CONF_RFC:
-			if (olen == sizeof(rfc))
-				memcpy(&rfc, (void *)val, olen);
-
+			if (olen != sizeof(rfc))
+				break;
+			memcpy(&rfc, (void *)val, olen);
 			if (test_bit(CONF_STATE2_DEVICE, &chan->conf_state) &&
 			    rfc.mode != chan->mode)
 				return -ECONNREFUSED;
-
 			chan->fcs = 0;
-
-			l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
-					   sizeof(rfc), (unsigned long) &rfc, endptr - ptr);
+			l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
+					   (unsigned long) &rfc, endptr - ptr);
 			break;
 
 		case L2CAP_CONF_EWS:
+			if (olen != 2)
+				break;
 			chan->ack_win = min_t(u16, val, chan->ack_win);
 			l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2,
 					   chan->tx_win, endptr - ptr);
 			break;
 
 		case L2CAP_CONF_EFS:
-			if (olen == sizeof(efs)) {
-				memcpy(&efs, (void *)val, olen);
-
-				if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
-				    efs.stype != L2CAP_SERV_NOTRAFIC &&
-				    efs.stype != chan->local_stype)
-					return -ECONNREFUSED;
-
-				l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
-						   (unsigned long) &efs, endptr - ptr);
-			}
+			if (olen != sizeof(efs))
+				break;
+			memcpy(&efs, (void *)val, olen);
+			if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
+			    efs.stype != L2CAP_SERV_NOTRAFIC &&
+			    efs.stype != chan->local_stype)
+				return -ECONNREFUSED;
+			l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
+					   (unsigned long) &efs, endptr - ptr);
 			break;
 
 		case L2CAP_CONF_FCS:
+			if (olen != 1)
+				break;
 			if (*result == L2CAP_CONF_PENDING)
 				if (val == L2CAP_FCS_NONE)
 					set_bit(CONF_RECV_NO_FCS,
@@ -3669,7 +3717,7 @@
 	rsp.mtu     = cpu_to_le16(chan->imtu);
 	rsp.mps     = cpu_to_le16(chan->mps);
 	rsp.credits = cpu_to_le16(chan->rx_credits);
-	rsp.result  = cpu_to_le16(L2CAP_CR_SUCCESS);
+	rsp.result  = cpu_to_le16(L2CAP_CR_LE_SUCCESS);
 
 	l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_RSP, sizeof(rsp),
 		       &rsp);
@@ -3727,13 +3775,18 @@
 
 	while (len >= L2CAP_CONF_OPT_SIZE) {
 		len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val);
+		if (len < 0)
+			break;
 
 		switch (type) {
 		case L2CAP_CONF_RFC:
-			if (olen == sizeof(rfc))
-				memcpy(&rfc, (void *)val, olen);
+			if (olen != sizeof(rfc))
+				break;
+			memcpy(&rfc, (void *)val, olen);
 			break;
 		case L2CAP_CONF_EWS:
+			if (olen != 2)
+				break;
 			txwin_ext = val;
 			break;
 		}
@@ -3815,9 +3868,17 @@
 
 	result = L2CAP_CR_NO_MEM;
 
-	/* Check if we already have channel with that dcid */
-	if (__l2cap_get_chan_by_dcid(conn, scid))
+	/* Check for valid dynamic CID range (as per Erratum 3253) */
+	if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_DYN_END) {
+		result = L2CAP_CR_INVALID_SCID;
 		goto response;
+	}
+
+	/* Check if we already have channel with that dcid */
+	if (__l2cap_get_chan_by_dcid(conn, scid)) {
+		result = L2CAP_CR_SCID_IN_USE;
+		goto response;
+	}
 
 	chan = pchan->ops->new_connection(pchan);
 	if (!chan)
@@ -4235,6 +4296,7 @@
 				goto done;
 			break;
 		}
+		/* fall through */
 
 	default:
 		l2cap_chan_set_err(chan, ECONNRESET);
@@ -4340,6 +4402,12 @@
 
 	l2cap_chan_lock(chan);
 
+	if (chan->state != BT_DISCONN) {
+		l2cap_chan_unlock(chan);
+		mutex_unlock(&conn->chan_lock);
+		return 0;
+	}
+
 	l2cap_chan_hold(chan);
 	l2cap_chan_del(chan, 0);
 
@@ -5279,7 +5347,7 @@
 	credits = __le16_to_cpu(rsp->credits);
 	result  = __le16_to_cpu(rsp->result);
 
-	if (result == L2CAP_CR_SUCCESS && (mtu < 23 || mps < 23 ||
+	if (result == L2CAP_CR_LE_SUCCESS && (mtu < 23 || mps < 23 ||
 					   dcid < L2CAP_CID_DYN_START ||
 					   dcid > L2CAP_CID_LE_DYN_END))
 		return -EPROTO;
@@ -5300,7 +5368,7 @@
 	l2cap_chan_lock(chan);
 
 	switch (result) {
-	case L2CAP_CR_SUCCESS:
+	case L2CAP_CR_LE_SUCCESS:
 		if (__l2cap_get_chan_by_dcid(conn, dcid)) {
 			err = -EBADSLT;
 			break;
@@ -5314,8 +5382,8 @@
 		l2cap_chan_ready(chan);
 		break;
 
-	case L2CAP_CR_AUTHENTICATION:
-	case L2CAP_CR_ENCRYPTION:
+	case L2CAP_CR_LE_AUTHENTICATION:
+	case L2CAP_CR_LE_ENCRYPTION:
 		/* If we already have MITM protection we can't do
 		 * anything.
 		 */
@@ -5458,7 +5526,7 @@
 	pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src,
 					 &conn->hcon->dst, LE_LINK);
 	if (!pchan) {
-		result = L2CAP_CR_BAD_PSM;
+		result = L2CAP_CR_LE_BAD_PSM;
 		chan = NULL;
 		goto response;
 	}
@@ -5468,33 +5536,31 @@
 
 	if (!smp_sufficient_security(conn->hcon, pchan->sec_level,
 				     SMP_ALLOW_STK)) {
-		result = L2CAP_CR_AUTHENTICATION;
+		result = L2CAP_CR_LE_AUTHENTICATION;
 		chan = NULL;
 		goto response_unlock;
 	}
 
 	/* Check for valid dynamic CID range */
 	if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) {
-		result = L2CAP_CR_INVALID_SCID;
+		result = L2CAP_CR_LE_INVALID_SCID;
 		chan = NULL;
 		goto response_unlock;
 	}
 
 	/* Check if we already have channel with that dcid */
 	if (__l2cap_get_chan_by_dcid(conn, scid)) {
-		result = L2CAP_CR_SCID_IN_USE;
+		result = L2CAP_CR_LE_SCID_IN_USE;
 		chan = NULL;
 		goto response_unlock;
 	}
 
 	chan = pchan->ops->new_connection(pchan);
 	if (!chan) {
-		result = L2CAP_CR_NO_MEM;
+		result = L2CAP_CR_LE_NO_MEM;
 		goto response_unlock;
 	}
 
-	l2cap_le_flowctl_init(chan);
-
 	bacpy(&chan->src, &conn->hcon->src);
 	bacpy(&chan->dst, &conn->hcon->dst);
 	chan->src_type = bdaddr_src_type(conn->hcon);
@@ -5503,9 +5569,11 @@
 	chan->dcid = scid;
 	chan->omtu = mtu;
 	chan->remote_mps = mps;
-	chan->tx_credits = __le16_to_cpu(req->credits);
 
 	__l2cap_chan_add(conn, chan);
+
+	l2cap_le_flowctl_init(chan, __le16_to_cpu(req->credits));
+
 	dcid = chan->scid;
 	credits = chan->rx_credits;
 
@@ -5524,7 +5592,7 @@
 		chan->ops->defer(chan);
 	} else {
 		l2cap_chan_ready(chan);
-		result = L2CAP_CR_SUCCESS;
+		result = L2CAP_CR_LE_SUCCESS;
 	}
 
 response_unlock:
@@ -6699,13 +6767,10 @@
 	struct l2cap_le_credits pkt;
 	u16 return_credits;
 
-	/* We return more credits to the sender only after the amount of
-	 * credits falls below half of the initial amount.
-	 */
-	if (chan->rx_credits >= (le_max_credits + 1) / 2)
-		return;
+	return_credits = ((chan->imtu / chan->mps) + 1) - chan->rx_credits;
 
-	return_credits = le_max_credits - chan->rx_credits;
+	if (!return_credits)
+		return;
 
 	BT_DBG("chan %p returning %u credits to sender", chan, return_credits);
 
@@ -6719,6 +6784,21 @@
 	l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CREDITS, sizeof(pkt), &pkt);
 }
 
+static int l2cap_le_recv(struct l2cap_chan *chan, struct sk_buff *skb)
+{
+	int err;
+
+	BT_DBG("SDU reassemble complete: chan %p skb->len %u", chan, skb->len);
+
+	/* Wait recv to confirm reception before updating the credits */
+	err = chan->ops->recv(chan, skb);
+
+	/* Update credits whenever an SDU is received */
+	l2cap_chan_le_send_credits(chan);
+
+	return err;
+}
+
 static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb)
 {
 	int err;
@@ -6737,7 +6817,11 @@
 	chan->rx_credits--;
 	BT_DBG("rx_credits %u -> %u", chan->rx_credits + 1, chan->rx_credits);
 
-	l2cap_chan_le_send_credits(chan);
+	/* Update if remote had run out of credits, this should only happens
+	 * if the remote is not using the entire MPS.
+	 */
+	if (!chan->rx_credits)
+		l2cap_chan_le_send_credits(chan);
 
 	err = 0;
 
@@ -6763,12 +6847,22 @@
 		}
 
 		if (skb->len == sdu_len)
-			return chan->ops->recv(chan, skb);
+			return l2cap_le_recv(chan, skb);
 
 		chan->sdu = skb;
 		chan->sdu_len = sdu_len;
 		chan->sdu_last_frag = skb;
 
+		/* Detect if remote is not able to use the selected MPS */
+		if (skb->len + L2CAP_SDULEN_SIZE < chan->mps) {
+			u16 mps_len = skb->len + L2CAP_SDULEN_SIZE;
+
+			/* Adjust the number of credits */
+			BT_DBG("chan->mps %u -> %u", chan->mps, mps_len);
+			chan->mps = mps_len;
+			l2cap_chan_le_send_credits(chan);
+		}
+
 		return 0;
 	}
 
@@ -6785,7 +6879,7 @@
 	skb = NULL;
 
 	if (chan->sdu->len == chan->sdu_len) {
-		err = chan->ops->recv(chan, chan->sdu);
+		err = l2cap_le_recv(chan, chan->sdu);
 		if (!err) {
 			chan->sdu = NULL;
 			chan->sdu_last_frag = NULL;
@@ -7102,7 +7196,6 @@
 	case L2CAP_MODE_BASIC:
 		break;
 	case L2CAP_MODE_LE_FLOWCTL:
-		l2cap_le_flowctl_init(chan);
 		break;
 	case L2CAP_MODE_ERTM:
 	case L2CAP_MODE_STREAMING:
@@ -7434,7 +7527,7 @@
 		}
 
 		if (chan->state == BT_CONNECT) {
-			if (!status)
+			if (!status && l2cap_check_enc_key_size(hcon))
 				l2cap_start_connection(chan);
 			else
 				__set_chan_timer(chan, L2CAP_DISC_TIMEOUT);
@@ -7443,7 +7536,7 @@
 			struct l2cap_conn_rsp rsp;
 			__u16 res, stat;
 
-			if (!status) {
+			if (!status && l2cap_check_enc_key_size(hcon)) {
 				if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) {
 					res = L2CAP_CR_PEND;
 					stat = L2CAP_CS_AUTHOR_PEND;
@@ -7615,17 +7708,7 @@
 	return 0;
 }
 
-static int l2cap_debugfs_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, l2cap_debugfs_show, inode->i_private);
-}
-
-static const struct file_operations l2cap_debugfs_fops = {
-	.open		= l2cap_debugfs_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(l2cap_debugfs);
 
 static struct dentry *l2cap_debugfs;
 
@@ -7645,11 +7728,6 @@
 	l2cap_debugfs = debugfs_create_file("l2cap", 0444, bt_debugfs,
 					    NULL, &l2cap_debugfs_fops);
 
-	debugfs_create_u16("l2cap_le_max_credits", 0644, bt_debugfs,
-			   &le_max_credits);
-	debugfs_create_u16("l2cap_le_default_mps", 0644, bt_debugfs,
-			   &le_default_mps);
-
 	return 0;
 }
 

diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 686bdc6..a7be8b5 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c

@@ -791,10 +791,13 @@
 
 		conn = chan->conn;
 
-		/*change security for LE channels */
+		/* change security for LE channels */
 		if (chan->scid == L2CAP_CID_ATT) {
-			if (smp_conn_security(conn->hcon, sec.level))
+			if (smp_conn_security(conn->hcon, sec.level)) {
+				err = -EINVAL;
 				break;
+			}
+
 			set_bit(FLAG_PENDING_SECURITY, &chan->flags);
 			sk->sk_state = BT_CONFIG;
 			chan->state = BT_CONFIG;
@@ -1252,7 +1255,7 @@
 
 	l2cap_sock_init(sk, parent);
 
-	bt_accept_enqueue(parent, sk);
+	bt_accept_enqueue(parent, sk, false);
 
 	release_sock(parent);
 
@@ -1655,6 +1658,7 @@
 	.recvmsg	= l2cap_sock_recvmsg,
 	.poll		= bt_sock_poll,
 	.ioctl		= bt_sock_ioctl,
+	.gettstamp	= sock_gettstamp,
 	.mmap		= sock_no_mmap,
 	.socketpair	= sock_no_socketpair,
 	.shutdown	= l2cap_sock_shutdown,

diff --git a/net/bluetooth/leds.c b/net/bluetooth/leds.c
index 6d59a50..f468476 100644
--- a/net/bluetooth/leds.c
+++ b/net/bluetooth/leds.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2015, Heiner Kallweit <hkallweit1@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <net/bluetooth/bluetooth.h>

diff --git a/net/bluetooth/leds.h b/net/bluetooth/leds.h
index 08725a2f..bb5e092 100644
--- a/net/bluetooth/leds.h
+++ b/net/bluetooth/leds.h

@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright 2015, Heiner Kallweit <hkallweit1@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #if IS_ENABLED(CONFIG_BT_LEDS)

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index ccce954..acb7c6d 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c

@@ -474,7 +474,6 @@
 {
 	struct mgmt_rp_read_ext_index_list *rp;
 	struct hci_dev *d;
-	size_t rp_len;
 	u16 count;
 	int err;
 
@@ -488,8 +487,7 @@
 			count++;
 	}
 
-	rp_len = sizeof(*rp) + (sizeof(rp->entry[0]) * count);
-	rp = kmalloc(rp_len, GFP_ATOMIC);
+	rp = kmalloc(struct_size(rp, entry, count), GFP_ATOMIC);
 	if (!rp) {
 		read_unlock(&hci_dev_list_lock);
 		return -ENOMEM;
@@ -525,7 +523,6 @@
 	}
 
 	rp->num_controllers = cpu_to_le16(count);
-	rp_len = sizeof(*rp) + (sizeof(rp->entry[0]) * count);
 
 	read_unlock(&hci_dev_list_lock);
 
@@ -538,7 +535,8 @@
 	hci_sock_clear_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS);
 
 	err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE,
-				MGMT_OP_READ_EXT_INDEX_LIST, 0, rp, rp_len);
+				MGMT_OP_READ_EXT_INDEX_LIST, 0, rp,
+				struct_size(rp, entry, count));
 
 	kfree(rp);
 
@@ -551,7 +549,8 @@
 	    !hci_dev_test_flag(hdev, HCI_EXT_CONFIGURED))
 		return false;
 
-	if (test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) &&
+	if ((test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) ||
+	     test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks)) &&
 	    !bacmp(&hdev->public_addr, BDADDR_ANY))
 		return false;
 
@@ -566,7 +565,8 @@
 	    !hci_dev_test_flag(hdev, HCI_EXT_CONFIGURED))
 		options |= MGMT_OPTION_EXTERNAL_CONFIG;
 
-	if (test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) &&
+	if ((test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) ||
+	     test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks)) &&
 	    !bacmp(&hdev->public_addr, BDADDR_ANY))
 		options |= MGMT_OPTION_PUBLIC_ADDRESS;
 
@@ -2301,8 +2301,7 @@
 				       MGMT_STATUS_INVALID_PARAMS);
 	}
 
-	expected_len = sizeof(*cp) + key_count *
-					sizeof(struct mgmt_link_key_info);
+	expected_len = struct_size(cp, keys, key_count);
 	if (expected_len != len) {
 		bt_dev_err(hdev, "load_link_keys: expected %u bytes, got %u bytes",
 			   expected_len, len);
@@ -2589,7 +2588,6 @@
 {
 	struct mgmt_rp_get_connections *rp;
 	struct hci_conn *c;
-	size_t rp_len;
 	int err;
 	u16 i;
 
@@ -2609,8 +2607,7 @@
 			i++;
 	}
 
-	rp_len = sizeof(*rp) + (i * sizeof(struct mgmt_addr_info));
-	rp = kmalloc(rp_len, GFP_KERNEL);
+	rp = kmalloc(struct_size(rp, addr, i), GFP_KERNEL);
 	if (!rp) {
 		err = -ENOMEM;
 		goto unlock;
@@ -2630,10 +2627,8 @@
 	rp->conn_count = cpu_to_le16(i);
 
 	/* Recalculate length in case of filtered SCO connections, etc */
-	rp_len = sizeof(*rp) + (i * sizeof(struct mgmt_addr_info));
-
 	err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONNECTIONS, 0, rp,
-				rp_len);
+				struct_size(rp, addr, i));
 
 	kfree(rp);
 
@@ -5030,7 +5025,7 @@
 				       MGMT_STATUS_INVALID_PARAMS);
 	}
 
-	expected_len = sizeof(*cp) + irk_count * sizeof(struct mgmt_irk_info);
+	expected_len = struct_size(cp, irks, irk_count);
 	if (expected_len != len) {
 		bt_dev_err(hdev, "load_irks: expected %u bytes, got %u bytes",
 			   expected_len, len);
@@ -5112,8 +5107,7 @@
 				       MGMT_STATUS_INVALID_PARAMS);
 	}
 
-	expected_len = sizeof(*cp) + key_count *
-					sizeof(struct mgmt_ltk_info);
+	expected_len = struct_size(cp, keys, key_count);
 	if (expected_len != len) {
 		bt_dev_err(hdev, "load_keys: expected %u bytes, got %u bytes",
 			   expected_len, len);
@@ -5847,8 +5841,7 @@
 				       MGMT_STATUS_INVALID_PARAMS);
 	}
 
-	expected_len = sizeof(*cp) + param_count *
-					sizeof(struct mgmt_conn_param);
+	expected_len = struct_size(cp, params, param_count);
 	if (expected_len != len) {
 		bt_dev_err(hdev, "load_conn_param: expected %u bytes, got %u bytes",
 			   expected_len, len);

diff --git a/net/bluetooth/rfcomm/Kconfig b/net/bluetooth/rfcomm/Kconfig
index 335df75..9b9953e 100644
--- a/net/bluetooth/rfcomm/Kconfig
+++ b/net/bluetooth/rfcomm/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config BT_RFCOMM
 	tristate "RFCOMM protocol support"
 	depends on BT_BREDR

diff --git a/net/bluetooth/rfcomm/Makefile b/net/bluetooth/rfcomm/Makefile
index fe07988..593e5c4 100644
--- a/net/bluetooth/rfcomm/Makefile
+++ b/net/bluetooth/rfcomm/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Linux Bluetooth RFCOMM layer.
 #

diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index b98225d..3a9e9d9 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c

@@ -483,6 +483,7 @@
 		/* if closing a dlc in a session that hasn't been started,
 		 * just close and unlink the dlc
 		 */
+		/* fall through */
 
 	default:
 		rfcomm_dlc_clear_timer(d);
@@ -2166,17 +2167,7 @@
 	return 0;
 }
 
-static int rfcomm_dlc_debugfs_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, rfcomm_dlc_debugfs_show, inode->i_private);
-}
-
-static const struct file_operations rfcomm_dlc_debugfs_fops = {
-	.open		= rfcomm_dlc_debugfs_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(rfcomm_dlc_debugfs);
 
 static struct dentry *rfcomm_dlc_debugfs;
 

diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index d606e92..90bb53a 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c

@@ -988,7 +988,7 @@
 	rfcomm_pi(sk)->channel = channel;
 
 	sk->sk_state = BT_CONFIG;
-	bt_accept_enqueue(parent, sk);
+	bt_accept_enqueue(parent, sk, true);
 
 	/* Accept connection and return socket DLC */
 	*d = rfcomm_pi(sk)->dlc;
@@ -1020,17 +1020,7 @@
 	return 0;
 }
 
-static int rfcomm_sock_debugfs_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, rfcomm_sock_debugfs_show, inode->i_private);
-}
-
-static const struct file_operations rfcomm_sock_debugfs_fops = {
-	.open		= rfcomm_sock_debugfs_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(rfcomm_sock_debugfs);
 
 static struct dentry *rfcomm_sock_debugfs;
 
@@ -1049,6 +1039,7 @@
 	.setsockopt	= rfcomm_sock_setsockopt,
 	.getsockopt	= rfcomm_sock_getsockopt,
 	.ioctl		= rfcomm_sock_ioctl,
+	.gettstamp	= sock_gettstamp,
 	.poll		= bt_sock_poll,
 	.socketpair	= sock_no_socketpair,
 	.mmap		= sock_no_mmap

diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 5e44d84..0c7d31c 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c

@@ -839,18 +839,6 @@
 		BT_DBG("TIOCMIWAIT");
 		break;
 
-	case TIOCGSERIAL:
-		BT_ERR("TIOCGSERIAL is not supported");
-		return -ENOIOCTLCMD;
-
-	case TIOCSSERIAL:
-		BT_ERR("TIOCSSERIAL is not supported");
-		return -ENOIOCTLCMD;
-
-	case TIOCSERGSTRUCT:
-		BT_ERR("TIOCSERGSTRUCT is not supported");
-		return -ENOIOCTLCMD;
-
 	case TIOCSERGETLSR:
 		BT_ERR("TIOCSERGETLSR is not supported");
 		return -ENOIOCTLCMD;

diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 8f0f927..b91d6b4 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c

@@ -193,7 +193,7 @@
 	conn->sk = sk;
 
 	if (parent)
-		bt_accept_enqueue(parent, sk);
+		bt_accept_enqueue(parent, sk, true);
 }
 
 static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
@@ -523,12 +523,12 @@
 	struct sock *sk = sock->sk;
 	int err = 0;
 
-	BT_DBG("sk %p %pMR", sk, &sa->sco_bdaddr);
-
 	if (!addr || addr_len < sizeof(struct sockaddr_sco) ||
 	    addr->sa_family != AF_BLUETOOTH)
 		return -EINVAL;
 
+	BT_DBG("sk %p %pMR", sk, &sa->sco_bdaddr);
+
 	lock_sock(sk);
 
 	if (sk->sk_state != BT_OPEN) {
@@ -1173,17 +1173,7 @@
 	return 0;
 }
 
-static int sco_debugfs_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, sco_debugfs_show, inode->i_private);
-}
-
-static const struct file_operations sco_debugfs_fops = {
-	.open		= sco_debugfs_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(sco_debugfs);
 
 static struct dentry *sco_debugfs;
 
@@ -1200,6 +1190,7 @@
 	.recvmsg	= sco_sock_recvmsg,
 	.poll		= bt_sock_poll,
 	.ioctl		= bt_sock_ioctl,
+	.gettstamp	= sock_gettstamp,
 	.mmap		= sock_no_mmap,
 	.socketpair	= sock_no_socketpair,
 	.shutdown	= sco_sock_shutdown,

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 73f7211..26e8cfa 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c

@@ -23,6 +23,7 @@
 #include <linux/debugfs.h>
 #include <linux/scatterlist.h>
 #include <linux/crypto.h>
+#include <crypto/aes.h>
 #include <crypto/algapi.h>
 #include <crypto/b128ops.h>
 #include <crypto/hash.h>
@@ -88,10 +89,6 @@
 	u8			local_rand[16];
 	bool			debug_key;
 
-	u8			min_key_size;
-	u8			max_key_size;
-
-	struct crypto_cipher	*tfm_aes;
 	struct crypto_shash	*tfm_cmac;
 	struct crypto_kpp	*tfm_ecdh;
 };
@@ -130,7 +127,6 @@
 	u8			dhkey[32];
 	u8			mackey[16];
 
-	struct crypto_cipher	*tfm_aes;
 	struct crypto_shash	*tfm_cmac;
 	struct crypto_kpp	*tfm_ecdh;
 };
@@ -186,7 +182,6 @@
 	}
 
 	desc->tfm = tfm;
-	desc->flags = 0;
 
 	/* Swap key and message from LSB to MSB */
 	swap_buf(k, tmp, 16);
@@ -381,22 +376,18 @@
  * s1 and ah.
  */
 
-static int smp_e(struct crypto_cipher *tfm, const u8 *k, u8 *r)
+static int smp_e(const u8 *k, u8 *r)
 {
+	struct crypto_aes_ctx ctx;
 	uint8_t tmp[16], data[16];
 	int err;
 
 	SMP_DBG("k %16phN r %16phN", k, r);
 
-	if (!tfm) {
-		BT_ERR("tfm %p", tfm);
-		return -EINVAL;
-	}
-
 	/* The most significant octet of key corresponds to k[0] */
 	swap_buf(k, tmp, 16);
 
-	err = crypto_cipher_setkey(tfm, tmp, 16);
+	err = aes_expandkey(&ctx, tmp, 16);
 	if (err) {
 		BT_ERR("cipher setkey failed: %d", err);
 		return err;
@@ -405,17 +396,18 @@
 	/* Most significant octet of plaintextData corresponds to data[0] */
 	swap_buf(r, data, 16);
 
-	crypto_cipher_encrypt_one(tfm, data, data);
+	aes_encrypt(&ctx, data, data);
 
 	/* Most significant octet of encryptedData corresponds to data[0] */
 	swap_buf(data, r, 16);
 
 	SMP_DBG("r %16phN", r);
 
+	memzero_explicit(&ctx, sizeof (ctx));
 	return err;
 }
 
-static int smp_c1(struct crypto_cipher *tfm_aes, const u8 k[16],
+static int smp_c1(const u8 k[16],
 		  const u8 r[16], const u8 preq[7], const u8 pres[7], u8 _iat,
 		  const bdaddr_t *ia, u8 _rat, const bdaddr_t *ra, u8 res[16])
 {
@@ -440,7 +432,7 @@
 	u128_xor((u128 *) res, (u128 *) r, (u128 *) p1);
 
 	/* res = e(k, res) */
-	err = smp_e(tfm_aes, k, res);
+	err = smp_e(k, res);
 	if (err) {
 		BT_ERR("Encrypt data error");
 		return err;
@@ -457,14 +449,14 @@
 	u128_xor((u128 *) res, (u128 *) res, (u128 *) p2);
 
 	/* res = e(k, res) */
-	err = smp_e(tfm_aes, k, res);
+	err = smp_e(k, res);
 	if (err)
 		BT_ERR("Encrypt data error");
 
 	return err;
 }
 
-static int smp_s1(struct crypto_cipher *tfm_aes, const u8 k[16],
+static int smp_s1(const u8 k[16],
 		  const u8 r1[16], const u8 r2[16], u8 _r[16])
 {
 	int err;
@@ -473,15 +465,14 @@
 	memcpy(_r, r2, 8);
 	memcpy(_r + 8, r1, 8);
 
-	err = smp_e(tfm_aes, k, _r);
+	err = smp_e(k, _r);
 	if (err)
 		BT_ERR("Encrypt data error");
 
 	return err;
 }
 
-static int smp_ah(struct crypto_cipher *tfm, const u8 irk[16],
-		  const u8 r[3], u8 res[3])
+static int smp_ah(const u8 irk[16], const u8 r[3], u8 res[3])
 {
 	u8 _res[16];
 	int err;
@@ -490,7 +481,7 @@
 	memcpy(_res, r, 3);
 	memset(_res + 3, 0, 13);
 
-	err = smp_e(tfm, irk, _res);
+	err = smp_e(irk, _res);
 	if (err) {
 		BT_ERR("Encrypt error");
 		return err;
@@ -522,7 +513,7 @@
 
 	BT_DBG("RPA %pMR IRK %*phN", bdaddr, 16, irk);
 
-	err = smp_ah(smp->tfm_aes, irk, &bdaddr->b[3], hash);
+	err = smp_ah(irk, &bdaddr->b[3], hash);
 	if (err)
 		return false;
 
@@ -545,7 +536,7 @@
 	rpa->b[5] &= 0x3f;	/* Clear two most significant bits */
 	rpa->b[5] |= 0x40;	/* Set second most significant bit */
 
-	err = smp_ah(smp->tfm_aes, irk, &rpa->b[3], rpa->b);
+	err = smp_ah(irk, &rpa->b[3], rpa->b);
 	if (err < 0)
 		return err;
 
@@ -625,7 +616,7 @@
 
 	memset(&msg, 0, sizeof(msg));
 
-	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iv, 2, 1 + len);
+	iov_iter_kvec(&msg.msg_iter, WRITE, iv, 2, 1 + len);
 
 	l2cap_chan_send(chan, &msg, 1 + len);
 
@@ -720,7 +711,7 @@
 	if (rsp == NULL) {
 		req->io_capability = conn->hcon->io_capability;
 		req->oob_flag = oob_flag;
-		req->max_key_size = SMP_DEV(hdev)->max_key_size;
+		req->max_key_size = hdev->le_max_key_size;
 		req->init_key_dist = local_dist;
 		req->resp_key_dist = remote_dist;
 		req->auth_req = (authreq & AUTH_REQ_MASK(hdev));
@@ -731,7 +722,7 @@
 
 	rsp->io_capability = conn->hcon->io_capability;
 	rsp->oob_flag = oob_flag;
-	rsp->max_key_size = SMP_DEV(hdev)->max_key_size;
+	rsp->max_key_size = hdev->le_max_key_size;
 	rsp->init_key_dist = req->init_key_dist & remote_dist;
 	rsp->resp_key_dist = req->resp_key_dist & local_dist;
 	rsp->auth_req = (authreq & AUTH_REQ_MASK(hdev));
@@ -745,7 +736,7 @@
 	struct hci_dev *hdev = conn->hcon->hdev;
 	struct smp_chan *smp = chan->data;
 
-	if (max_key_size > SMP_DEV(hdev)->max_key_size ||
+	if (max_key_size > hdev->le_max_key_size ||
 	    max_key_size < SMP_MIN_ENC_KEY_SIZE)
 		return SMP_ENC_KEY_SIZE;
 
@@ -772,7 +763,6 @@
 	kzfree(smp->slave_csrk);
 	kzfree(smp->link_key);
 
-	crypto_free_cipher(smp->tfm_aes);
 	crypto_free_shash(smp->tfm_cmac);
 	crypto_free_kpp(smp->tfm_ecdh);
 
@@ -961,7 +951,7 @@
 
 	BT_DBG("conn %p", conn);
 
-	ret = smp_c1(smp->tfm_aes, smp->tk, smp->prnd, smp->preq, smp->prsp,
+	ret = smp_c1(smp->tk, smp->prnd, smp->preq, smp->prsp,
 		     conn->hcon->init_addr_type, &conn->hcon->init_addr,
 		     conn->hcon->resp_addr_type, &conn->hcon->resp_addr,
 		     cp.confirm_val);
@@ -987,12 +977,9 @@
 	u8 confirm[16];
 	int ret;
 
-	if (IS_ERR_OR_NULL(smp->tfm_aes))
-		return SMP_UNSPECIFIED;
-
 	BT_DBG("conn %p %s", conn, conn->hcon->out ? "master" : "slave");
 
-	ret = smp_c1(smp->tfm_aes, smp->tk, smp->rrnd, smp->preq, smp->prsp,
+	ret = smp_c1(smp->tk, smp->rrnd, smp->preq, smp->prsp,
 		     hcon->init_addr_type, &hcon->init_addr,
 		     hcon->resp_addr_type, &hcon->resp_addr, confirm);
 	if (ret)
@@ -1009,7 +996,7 @@
 		__le64 rand = 0;
 		__le16 ediv = 0;
 
-		smp_s1(smp->tfm_aes, smp->tk, smp->rrnd, smp->prnd, stk);
+		smp_s1(smp->tk, smp->rrnd, smp->prnd, stk);
 
 		if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags))
 			return SMP_UNSPECIFIED;
@@ -1025,7 +1012,7 @@
 		smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd),
 			     smp->prnd);
 
-		smp_s1(smp->tfm_aes, smp->tk, smp->prnd, smp->rrnd, stk);
+		smp_s1(smp->tk, smp->prnd, smp->rrnd, stk);
 
 		if (hcon->pending_sec_level == BT_SECURITY_HIGH)
 			auth = 1;
@@ -1393,16 +1380,10 @@
 	if (!smp)
 		return NULL;
 
-	smp->tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(smp->tfm_aes)) {
-		BT_ERR("Unable to create AES crypto context");
-		goto zfree_smp;
-	}
-
 	smp->tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
 	if (IS_ERR(smp->tfm_cmac)) {
 		BT_ERR("Unable to create CMAC crypto context");
-		goto free_cipher;
+		goto zfree_smp;
 	}
 
 	smp->tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
@@ -1424,8 +1405,6 @@
 
 free_shash:
 	crypto_free_shash(smp->tfm_cmac);
-free_cipher:
-	crypto_free_cipher(smp->tfm_aes);
 zfree_smp:
 	kzfree(smp);
 	return NULL;
@@ -2583,6 +2562,19 @@
 		goto distribute;
 	}
 
+	/* Drop IRK if peer is using identity address during pairing but is
+	 * providing different address as identity information.
+	 *
+	 * Microsoft Surface Precision Mouse is known to have this bug.
+	 */
+	if (hci_is_identity_address(&hcon->dst, hcon->dst_type) &&
+	    (bacmp(&info->bdaddr, &hcon->dst) ||
+	     info->addr_type != hcon->dst_type)) {
+		bt_dev_err(hcon->hdev,
+			   "ignoring IRK with invalid identity address");
+		goto distribute;
+	}
+
 	bacpy(&smp->id_addr, &info->bdaddr);
 	smp->id_addr_type = info->addr_type;
 
@@ -3223,7 +3215,6 @@
 {
 	struct l2cap_chan *chan;
 	struct smp_dev *smp;
-	struct crypto_cipher *tfm_aes;
 	struct crypto_shash *tfm_cmac;
 	struct crypto_kpp *tfm_ecdh;
 
@@ -3236,17 +3227,9 @@
 	if (!smp)
 		return ERR_PTR(-ENOMEM);
 
-	tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(tfm_aes)) {
-		BT_ERR("Unable to create AES crypto context");
-		kzfree(smp);
-		return ERR_CAST(tfm_aes);
-	}
-
 	tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
 	if (IS_ERR(tfm_cmac)) {
 		BT_ERR("Unable to create CMAC crypto context");
-		crypto_free_cipher(tfm_aes);
 		kzfree(smp);
 		return ERR_CAST(tfm_cmac);
 	}
@@ -3255,23 +3238,18 @@
 	if (IS_ERR(tfm_ecdh)) {
 		BT_ERR("Unable to create ECDH crypto context");
 		crypto_free_shash(tfm_cmac);
-		crypto_free_cipher(tfm_aes);
 		kzfree(smp);
 		return ERR_CAST(tfm_ecdh);
 	}
 
 	smp->local_oob = false;
-	smp->tfm_aes = tfm_aes;
 	smp->tfm_cmac = tfm_cmac;
 	smp->tfm_ecdh = tfm_ecdh;
-	smp->min_key_size = SMP_MIN_ENC_KEY_SIZE;
-	smp->max_key_size = SMP_MAX_ENC_KEY_SIZE;
 
 create_chan:
 	chan = l2cap_chan_create();
 	if (!chan) {
 		if (smp) {
-			crypto_free_cipher(smp->tfm_aes);
 			crypto_free_shash(smp->tfm_cmac);
 			crypto_free_kpp(smp->tfm_ecdh);
 			kzfree(smp);
@@ -3319,7 +3297,6 @@
 	smp = chan->data;
 	if (smp) {
 		chan->data = NULL;
-		crypto_free_cipher(smp->tfm_aes);
 		crypto_free_shash(smp->tfm_cmac);
 		crypto_free_kpp(smp->tfm_ecdh);
 		kzfree(smp);
@@ -3391,7 +3368,7 @@
 	struct hci_dev *hdev = file->private_data;
 	char buf[4];
 
-	snprintf(buf, sizeof(buf), "%2u\n", SMP_DEV(hdev)->min_key_size);
+	snprintf(buf, sizeof(buf), "%2u\n", hdev->le_min_key_size);
 
 	return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
 }
@@ -3412,11 +3389,11 @@
 
 	sscanf(buf, "%hhu", &key_size);
 
-	if (key_size > SMP_DEV(hdev)->max_key_size ||
+	if (key_size > hdev->le_max_key_size ||
 	    key_size < SMP_MIN_ENC_KEY_SIZE)
 		return -EINVAL;
 
-	SMP_DEV(hdev)->min_key_size = key_size;
+	hdev->le_min_key_size = key_size;
 
 	return count;
 }
@@ -3435,7 +3412,7 @@
 	struct hci_dev *hdev = file->private_data;
 	char buf[4];
 
-	snprintf(buf, sizeof(buf), "%2u\n", SMP_DEV(hdev)->max_key_size);
+	snprintf(buf, sizeof(buf), "%2u\n", hdev->le_max_key_size);
 
 	return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
 }
@@ -3457,10 +3434,10 @@
 	sscanf(buf, "%hhu", &key_size);
 
 	if (key_size > SMP_MAX_ENC_KEY_SIZE ||
-	    key_size < SMP_DEV(hdev)->min_key_size)
+	    key_size < hdev->le_min_key_size)
 		return -EINVAL;
 
-	SMP_DEV(hdev)->max_key_size = key_size;
+	hdev->le_max_key_size = key_size;
 
 	return count;
 }
@@ -3575,7 +3552,7 @@
 	return 0;
 }
 
-static int __init test_ah(struct crypto_cipher *tfm_aes)
+static int __init test_ah(void)
 {
 	const u8 irk[16] = {
 			0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34,
@@ -3585,7 +3562,7 @@
 	u8 res[3];
 	int err;
 
-	err = smp_ah(tfm_aes, irk, r, res);
+	err = smp_ah(irk, r, res);
 	if (err)
 		return err;
 
@@ -3595,7 +3572,7 @@
 	return 0;
 }
 
-static int __init test_c1(struct crypto_cipher *tfm_aes)
+static int __init test_c1(void)
 {
 	const u8 k[16] = {
 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -3615,7 +3592,7 @@
 	u8 res[16];
 	int err;
 
-	err = smp_c1(tfm_aes, k, r, preq, pres, _iat, &ia, _rat, &ra, res);
+	err = smp_c1(k, r, preq, pres, _iat, &ia, _rat, &ra, res);
 	if (err)
 		return err;
 
@@ -3625,7 +3602,7 @@
 	return 0;
 }
 
-static int __init test_s1(struct crypto_cipher *tfm_aes)
+static int __init test_s1(void)
 {
 	const u8 k[16] = {
 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -3640,7 +3617,7 @@
 	u8 res[16];
 	int err;
 
-	err = smp_s1(tfm_aes, k, r1, r2, res);
+	err = smp_s1(k, r1, r2, res);
 	if (err)
 		return err;
 
@@ -3821,8 +3798,7 @@
 	.llseek		= default_llseek,
 };
 
-static int __init run_selftests(struct crypto_cipher *tfm_aes,
-				struct crypto_shash *tfm_cmac,
+static int __init run_selftests(struct crypto_shash *tfm_cmac,
 				struct crypto_kpp *tfm_ecdh)
 {
 	ktime_t calltime, delta, rettime;
@@ -3837,19 +3813,19 @@
 		goto done;
 	}
 
-	err = test_ah(tfm_aes);
+	err = test_ah();
 	if (err) {
 		BT_ERR("smp_ah test failed");
 		goto done;
 	}
 
-	err = test_c1(tfm_aes);
+	err = test_c1();
 	if (err) {
 		BT_ERR("smp_c1 test failed");
 		goto done;
 	}
 
-	err = test_s1(tfm_aes);
+	err = test_s1();
 	if (err) {
 		BT_ERR("smp_s1 test failed");
 		goto done;
@@ -3906,21 +3882,13 @@
 
 int __init bt_selftest_smp(void)
 {
-	struct crypto_cipher *tfm_aes;
 	struct crypto_shash *tfm_cmac;
 	struct crypto_kpp *tfm_ecdh;
 	int err;
 
-	tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(tfm_aes)) {
-		BT_ERR("Unable to create AES crypto context");
-		return PTR_ERR(tfm_aes);
-	}
-
-	tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, CRYPTO_ALG_ASYNC);
+	tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
 	if (IS_ERR(tfm_cmac)) {
 		BT_ERR("Unable to create CMAC crypto context");
-		crypto_free_cipher(tfm_aes);
 		return PTR_ERR(tfm_cmac);
 	}
 
@@ -3928,14 +3896,12 @@
 	if (IS_ERR(tfm_ecdh)) {
 		BT_ERR("Unable to create ECDH crypto context");
 		crypto_free_shash(tfm_cmac);
-		crypto_free_cipher(tfm_aes);
 		return PTR_ERR(tfm_ecdh);
 	}
 
-	err = run_selftests(tfm_aes, tfm_cmac, tfm_ecdh);
+	err = run_selftests(tfm_cmac, tfm_ecdh);
 
 	crypto_free_shash(tfm_cmac);
-	crypto_free_cipher(tfm_aes);
 	crypto_free_kpp(tfm_ecdh);
 
 	return err;

diff --git a/net/bpf/Makefile b/net/bpf/Makefile
index 27b2992..1c0a98d 100644
--- a/net/bpf/Makefile
+++ b/net/bpf/Makefile

@@ -1 +1,2 @@
-obj-y	:= test_run.o
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_BPF_SYSCALL)	:= test_run.o

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index f407883..1153bbc 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2017 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
  */
 #include <linux/bpf.h>
 #include <linux/slab.h>
@@ -10,50 +7,68 @@
 #include <linux/etherdevice.h>
 #include <linux/filter.h>
 #include <linux/sched/signal.h>
+#include <net/bpf_sk_storage.h>
+#include <net/sock.h>
+#include <net/tcp.h>
 
-static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
-					    struct bpf_cgroup_storage *storage)
+#define CREATE_TRACE_POINTS
+#include <trace/events/bpf_test_run.h>
+
+static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
+			u32 *retval, u32 *time)
 {
-	u32 ret;
-
-	preempt_disable();
-	rcu_read_lock();
-	bpf_cgroup_storage_set(storage);
-	ret = BPF_PROG_RUN(prog, ctx);
-	rcu_read_unlock();
-	preempt_enable();
-
-	return ret;
-}
-
-static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
-{
-	struct bpf_cgroup_storage *storage = NULL;
+	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL };
+	enum bpf_cgroup_storage_type stype;
 	u64 time_start, time_spent = 0;
-	u32 ret = 0, i;
+	int ret = 0;
+	u32 i;
 
-	storage = bpf_cgroup_storage_alloc(prog);
-	if (IS_ERR(storage))
-		return PTR_ERR(storage);
+	for_each_cgroup_storage_type(stype) {
+		storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
+		if (IS_ERR(storage[stype])) {
+			storage[stype] = NULL;
+			for_each_cgroup_storage_type(stype)
+				bpf_cgroup_storage_free(storage[stype]);
+			return -ENOMEM;
+		}
+	}
 
 	if (!repeat)
 		repeat = 1;
+
+	rcu_read_lock();
+	preempt_disable();
 	time_start = ktime_get_ns();
 	for (i = 0; i < repeat; i++) {
-		ret = bpf_test_run_one(prog, ctx, storage);
+		bpf_cgroup_storage_set(storage);
+		*retval = BPF_PROG_RUN(prog, ctx);
+
+		if (signal_pending(current)) {
+			ret = -EINTR;
+			break;
+		}
+
 		if (need_resched()) {
-			if (signal_pending(current))
-				break;
 			time_spent += ktime_get_ns() - time_start;
+			preempt_enable();
+			rcu_read_unlock();
+
 			cond_resched();
+
+			rcu_read_lock();
+			preempt_disable();
 			time_start = ktime_get_ns();
 		}
 	}
 	time_spent += ktime_get_ns() - time_start;
+	preempt_enable();
+	rcu_read_unlock();
+
 	do_div(time_spent, repeat);
 	*time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
 
-	bpf_cgroup_storage_free(storage);
+	for_each_cgroup_storage_type(stype)
+		bpf_cgroup_storage_free(storage[stype]);
 
 	return ret;
 }
@@ -64,8 +79,18 @@
 {
 	void __user *data_out = u64_to_user_ptr(kattr->test.data_out);
 	int err = -EFAULT;
+	u32 copy_size = size;
 
-	if (data_out && copy_to_user(data_out, data, size))
+	/* Clamp copy if the user has provided a size hint, but copy the full
+	 * buffer if not to retain old behaviour.
+	 */
+	if (kattr->test.data_size_out &&
+	    copy_size > kattr->test.data_size_out) {
+		copy_size = kattr->test.data_size_out;
+		err = -ENOSPC;
+	}
+
+	if (data_out && copy_to_user(data_out, data, copy_size))
 		goto out;
 	if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size)))
 		goto out;
@@ -73,8 +98,10 @@
 		goto out;
 	if (copy_to_user(&uattr->test.duration, &duration, sizeof(duration)))
 		goto out;
-	err = 0;
+	if (err != -ENOSPC)
+		err = 0;
 out:
+	trace_bpf_test_finish(&err);
 	return err;
 }
 
@@ -98,15 +125,130 @@
 	return data;
 }
 
+static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size)
+{
+	void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in);
+	void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out);
+	u32 size = kattr->test.ctx_size_in;
+	void *data;
+	int err;
+
+	if (!data_in && !data_out)
+		return NULL;
+
+	data = kzalloc(max_size, GFP_USER);
+	if (!data)
+		return ERR_PTR(-ENOMEM);
+
+	if (data_in) {
+		err = bpf_check_uarg_tail_zero(data_in, max_size, size);
+		if (err) {
+			kfree(data);
+			return ERR_PTR(err);
+		}
+
+		size = min_t(u32, max_size, size);
+		if (copy_from_user(data, data_in, size)) {
+			kfree(data);
+			return ERR_PTR(-EFAULT);
+		}
+	}
+	return data;
+}
+
+static int bpf_ctx_finish(const union bpf_attr *kattr,
+			  union bpf_attr __user *uattr, const void *data,
+			  u32 size)
+{
+	void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out);
+	int err = -EFAULT;
+	u32 copy_size = size;
+
+	if (!data || !data_out)
+		return 0;
+
+	if (copy_size > kattr->test.ctx_size_out) {
+		copy_size = kattr->test.ctx_size_out;
+		err = -ENOSPC;
+	}
+
+	if (copy_to_user(data_out, data, copy_size))
+		goto out;
+	if (copy_to_user(&uattr->test.ctx_size_out, &size, sizeof(size)))
+		goto out;
+	if (err != -ENOSPC)
+		err = 0;
+out:
+	return err;
+}
+
+/**
+ * range_is_zero - test whether buffer is initialized
+ * @buf: buffer to check
+ * @from: check from this position
+ * @to: check up until (excluding) this position
+ *
+ * This function returns true if the there is a non-zero byte
+ * in the buf in the range [from,to).
+ */
+static inline bool range_is_zero(void *buf, size_t from, size_t to)
+{
+	return !memchr_inv((u8 *)buf + from, 0, to - from);
+}
+
+static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
+{
+	struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb;
+
+	if (!__skb)
+		return 0;
+
+	/* make sure the fields we don't use are zeroed */
+	if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, priority)))
+		return -EINVAL;
+
+	/* priority is allowed */
+
+	if (!range_is_zero(__skb, offsetof(struct __sk_buff, priority) +
+			   FIELD_SIZEOF(struct __sk_buff, priority),
+			   offsetof(struct __sk_buff, cb)))
+		return -EINVAL;
+
+	/* cb is allowed */
+
+	if (!range_is_zero(__skb, offsetof(struct __sk_buff, cb) +
+			   FIELD_SIZEOF(struct __sk_buff, cb),
+			   sizeof(struct __sk_buff)))
+		return -EINVAL;
+
+	skb->priority = __skb->priority;
+	memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN);
+
+	return 0;
+}
+
+static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
+{
+	struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb;
+
+	if (!__skb)
+		return;
+
+	__skb->priority = skb->priority;
+	memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN);
+}
+
 int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
 			  union bpf_attr __user *uattr)
 {
 	bool is_l2 = false, is_direct_pkt_access = false;
 	u32 size = kattr->test.data_size_in;
 	u32 repeat = kattr->test.repeat;
+	struct __sk_buff *ctx = NULL;
 	u32 retval, duration;
 	int hh_len = ETH_HLEN;
 	struct sk_buff *skb;
+	struct sock *sk;
 	void *data;
 	int ret;
 
@@ -115,6 +257,12 @@
 	if (IS_ERR(data))
 		return PTR_ERR(data);
 
+	ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff));
+	if (IS_ERR(ctx)) {
+		kfree(data);
+		return PTR_ERR(ctx);
+	}
+
 	switch (prog->type) {
 	case BPF_PROG_TYPE_SCHED_CLS:
 	case BPF_PROG_TYPE_SCHED_ACT:
@@ -129,11 +277,23 @@
 		break;
 	}
 
+	sk = kzalloc(sizeof(struct sock), GFP_USER);
+	if (!sk) {
+		kfree(data);
+		kfree(ctx);
+		return -ENOMEM;
+	}
+	sock_net_set(sk, current->nsproxy->net_ns);
+	sock_init_data(NULL, sk);
+
 	skb = build_skb(data, 0);
 	if (!skb) {
 		kfree(data);
+		kfree(ctx);
+		kfree(sk);
 		return -ENOMEM;
 	}
+	skb->sk = sk;
 
 	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
 	__skb_put(skb, size);
@@ -144,25 +304,38 @@
 		__skb_push(skb, hh_len);
 	if (is_direct_pkt_access)
 		bpf_compute_data_pointers(skb);
-	retval = bpf_test_run(prog, skb, repeat, &duration);
+	ret = convert___skb_to_skb(skb, ctx);
+	if (ret)
+		goto out;
+	ret = bpf_test_run(prog, skb, repeat, &retval, &duration);
+	if (ret)
+		goto out;
 	if (!is_l2) {
 		if (skb_headroom(skb) < hh_len) {
 			int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
 
 			if (pskb_expand_head(skb, nhead, 0, GFP_USER)) {
-				kfree_skb(skb);
-				return -ENOMEM;
+				ret = -ENOMEM;
+				goto out;
 			}
 		}
 		memset(__skb_push(skb, hh_len), 0, hh_len);
 	}
+	convert_skb_to___skb(skb, ctx);
 
 	size = skb->len;
 	/* bpf program can never convert linear skb to non-linear */
 	if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
 		size = skb_headlen(skb);
 	ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration);
+	if (!ret)
+		ret = bpf_ctx_finish(kattr, uattr, ctx,
+				     sizeof(struct __sk_buff));
+out:
 	kfree_skb(skb);
+	bpf_sk_storage_free(sk);
+	kfree(sk);
+	kfree(ctx);
 	return ret;
 }
 
@@ -177,6 +350,9 @@
 	void *data;
 	int ret;
 
+	if (kattr->test.ctx_in || kattr->test.ctx_out)
+		return -EINVAL;
+
 	data = bpf_test_init(kattr, size, XDP_PACKET_HEADROOM + NET_IP_ALIGN, 0);
 	if (IS_ERR(data))
 		return PTR_ERR(data);
@@ -189,11 +365,124 @@
 	rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
 	xdp.rxq = &rxqueue->xdp_rxq;
 
-	retval = bpf_test_run(prog, &xdp, repeat, &duration);
+	ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration);
+	if (ret)
+		goto out;
 	if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN ||
 	    xdp.data_end != xdp.data + size)
 		size = xdp.data_end - xdp.data;
 	ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration);
+out:
+	kfree(data);
+	return ret;
+}
+
+static int verify_user_bpf_flow_keys(struct bpf_flow_keys *ctx)
+{
+	/* make sure the fields we don't use are zeroed */
+	if (!range_is_zero(ctx, 0, offsetof(struct bpf_flow_keys, flags)))
+		return -EINVAL;
+
+	/* flags is allowed */
+
+	if (!range_is_zero(ctx, offsetof(struct bpf_flow_keys, flags) +
+			   FIELD_SIZEOF(struct bpf_flow_keys, flags),
+			   sizeof(struct bpf_flow_keys)))
+		return -EINVAL;
+
+	return 0;
+}
+
+int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
+				     const union bpf_attr *kattr,
+				     union bpf_attr __user *uattr)
+{
+	u32 size = kattr->test.data_size_in;
+	struct bpf_flow_dissector ctx = {};
+	u32 repeat = kattr->test.repeat;
+	struct bpf_flow_keys *user_ctx;
+	struct bpf_flow_keys flow_keys;
+	u64 time_start, time_spent = 0;
+	const struct ethhdr *eth;
+	unsigned int flags = 0;
+	u32 retval, duration;
+	void *data;
+	int ret;
+	u32 i;
+
+	if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
+		return -EINVAL;
+
+	if (size < ETH_HLEN)
+		return -EINVAL;
+
+	data = bpf_test_init(kattr, size, 0, 0);
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	eth = (struct ethhdr *)data;
+
+	if (!repeat)
+		repeat = 1;
+
+	user_ctx = bpf_ctx_init(kattr, sizeof(struct bpf_flow_keys));
+	if (IS_ERR(user_ctx)) {
+		kfree(data);
+		return PTR_ERR(user_ctx);
+	}
+	if (user_ctx) {
+		ret = verify_user_bpf_flow_keys(user_ctx);
+		if (ret)
+			goto out;
+		flags = user_ctx->flags;
+	}
+
+	ctx.flow_keys = &flow_keys;
+	ctx.data = data;
+	ctx.data_end = (__u8 *)data + size;
+
+	rcu_read_lock();
+	preempt_disable();
+	time_start = ktime_get_ns();
+	for (i = 0; i < repeat; i++) {
+		retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
+					  size, flags);
+
+		if (signal_pending(current)) {
+			preempt_enable();
+			rcu_read_unlock();
+
+			ret = -EINTR;
+			goto out;
+		}
+
+		if (need_resched()) {
+			time_spent += ktime_get_ns() - time_start;
+			preempt_enable();
+			rcu_read_unlock();
+
+			cond_resched();
+
+			rcu_read_lock();
+			preempt_disable();
+			time_start = ktime_get_ns();
+		}
+	}
+	time_spent += ktime_get_ns() - time_start;
+	preempt_enable();
+	rcu_read_unlock();
+
+	do_div(time_spent, repeat);
+	duration = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
+
+	ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
+			      retval, duration);
+	if (!ret)
+		ret = bpf_ctx_finish(kattr, uattr, user_ctx,
+				     sizeof(struct bpf_flow_keys));
+
+out:
+	kfree(user_ctx);
 	kfree(data);
 	return ret;
 }

diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig
index e558b46..fed9290 100644
--- a/net/bpfilter/Kconfig
+++ b/net/bpfilter/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 menuconfig BPFILTER
 	bool "BPF based packet filtering framework (BPFILTER)"
 	depends on NET && BPF && INET
@@ -8,7 +9,7 @@
 if BPFILTER
 config BPFILTER_UMH
 	tristate "bpfilter kernel module with user mode helper"
-	depends on $(success,$(srctree)/scripts/cc-can-link.sh $(CC))
+	depends on CC_CAN_LINK
 	default m
 	help
 	  This builds bpfilter kernel module with embedded user mode helper

diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile
index 0947ee7..aa945ab 100644
--- a/net/bpfilter/Makefile
+++ b/net/bpfilter/Makefile

@@ -5,7 +5,7 @@
 
 hostprogs-y := bpfilter_umh
 bpfilter_umh-objs := main.o
-KBUILD_HOSTCFLAGS += -I. -Itools/include/ -Itools/include/uapi
+KBUILD_HOSTCFLAGS += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi
 HOSTCC := $(CC)
 
 ifeq ($(CONFIG_BPFILTER_UMH), y)

diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index 94e88f5..c0f0990 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c

@@ -13,39 +13,24 @@
 extern char bpfilter_umh_start;
 extern char bpfilter_umh_end;
 
-static struct umh_info info;
-/* since ip_getsockopt() can run in parallel, serialize access to umh */
-static DEFINE_MUTEX(bpfilter_lock);
-
-static void shutdown_umh(struct umh_info *info)
+static void shutdown_umh(void)
 {
 	struct task_struct *tsk;
 
-	if (!info->pid)
+	if (bpfilter_ops.stop)
 		return;
-	tsk = get_pid_task(find_vpid(info->pid), PIDTYPE_PID);
+
+	tsk = get_pid_task(find_vpid(bpfilter_ops.info.pid), PIDTYPE_PID);
 	if (tsk) {
-		force_sig(SIGKILL, tsk);
+		send_sig(SIGKILL, tsk, 1);
 		put_task_struct(tsk);
 	}
-	fput(info->pipe_to_umh);
-	fput(info->pipe_from_umh);
-	info->pid = 0;
 }
 
 static void __stop_umh(void)
 {
-	if (IS_ENABLED(CONFIG_INET)) {
-		bpfilter_process_sockopt = NULL;
-		shutdown_umh(&info);
-	}
-}
-
-static void stop_umh(void)
-{
-	mutex_lock(&bpfilter_lock);
-	__stop_umh();
-	mutex_unlock(&bpfilter_lock);
+	if (IS_ENABLED(CONFIG_INET))
+		shutdown_umh();
 }
 
 static int __bpfilter_process_sockopt(struct sock *sk, int optname,
@@ -63,10 +48,10 @@
 	req.cmd = optname;
 	req.addr = (long __force __user)optval;
 	req.len = optlen;
-	mutex_lock(&bpfilter_lock);
-	if (!info.pid)
+	if (!bpfilter_ops.info.pid)
 		goto out;
-	n = __kernel_write(info.pipe_to_umh, &req, sizeof(req), &pos);
+	n = __kernel_write(bpfilter_ops.info.pipe_to_umh, &req, sizeof(req),
+			   &pos);
 	if (n != sizeof(req)) {
 		pr_err("write fail %zd\n", n);
 		__stop_umh();
@@ -74,7 +59,8 @@
 		goto out;
 	}
 	pos = 0;
-	n = kernel_read(info.pipe_from_umh, &reply, sizeof(reply), &pos);
+	n = kernel_read(bpfilter_ops.info.pipe_from_umh, &reply, sizeof(reply),
+			&pos);
 	if (n != sizeof(reply)) {
 		pr_err("read fail %zd\n", n);
 		__stop_umh();
@@ -83,36 +69,59 @@
 	}
 	ret = reply.status;
 out:
-	mutex_unlock(&bpfilter_lock);
 	return ret;
 }
 
-static int __init load_umh(void)
+static int start_umh(void)
 {
 	int err;
 
 	/* fork usermode process */
 	err = fork_usermode_blob(&bpfilter_umh_start,
 				 &bpfilter_umh_end - &bpfilter_umh_start,
-				 &info);
+				 &bpfilter_ops.info);
 	if (err)
 		return err;
-	pr_info("Loaded bpfilter_umh pid %d\n", info.pid);
+	bpfilter_ops.stop = false;
+	pr_info("Loaded bpfilter_umh pid %d\n", bpfilter_ops.info.pid);
 
 	/* health check that usermode process started correctly */
 	if (__bpfilter_process_sockopt(NULL, 0, NULL, 0, 0) != 0) {
-		stop_umh();
+		shutdown_umh();
 		return -EFAULT;
 	}
-	if (IS_ENABLED(CONFIG_INET))
-		bpfilter_process_sockopt = &__bpfilter_process_sockopt;
 
 	return 0;
 }
 
+static int __init load_umh(void)
+{
+	int err;
+
+	mutex_lock(&bpfilter_ops.lock);
+	if (!bpfilter_ops.stop) {
+		err = -EFAULT;
+		goto out;
+	}
+	err = start_umh();
+	if (!err && IS_ENABLED(CONFIG_INET)) {
+		bpfilter_ops.sockopt = &__bpfilter_process_sockopt;
+		bpfilter_ops.start = &start_umh;
+	}
+out:
+	mutex_unlock(&bpfilter_ops.lock);
+	return err;
+}
+
 static void __exit fini_umh(void)
 {
-	stop_umh();
+	mutex_lock(&bpfilter_ops.lock);
+	if (IS_ENABLED(CONFIG_INET)) {
+		shutdown_umh();
+		bpfilter_ops.start = NULL;
+		bpfilter_ops.sockopt = NULL;
+	}
+	mutex_unlock(&bpfilter_ops.lock);
 }
 module_init(load_umh);
 module_exit(fini_umh);

diff --git a/net/bpfilter/bpfilter_umh_blob.S b/net/bpfilter/bpfilter_umh_blob.S
index 40311d1..9ea6100 100644
--- a/net/bpfilter/bpfilter_umh_blob.S
+++ b/net/bpfilter/bpfilter_umh_blob.S

@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-	.section .init.rodata, "a"
+	.section .rodata, "a"
 	.global bpfilter_umh_start
 bpfilter_umh_start:
 	.incbin "net/bpfilter/bpfilter_umh"

diff --git a/net/bpfilter/main.c b/net/bpfilter/main.c
index 1317f10..77396a0 100644
--- a/net/bpfilter/main.c
+++ b/net/bpfilter/main.c

@@ -6,7 +6,7 @@
 #include <sys/socket.h>
 #include <fcntl.h>
 #include <unistd.h>
-#include "include/uapi/linux/bpf.h"
+#include "../../include/uapi/linux/bpf.h"
 #include <asm/unistd.h>
 #include "msgfmt.h"
 
@@ -55,7 +55,7 @@
 
 int main(void)
 {
-	debug_fd = open("/dev/console", 00000002);
+	debug_fd = open("/dev/kmsg", 00000002);
 	dprintf(debug_fd, "Started bpfilter\n");
 	loop();
 	close(debug_fd);

diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index aa0d3b2..e4fb050 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # 802.1d Ethernet Bridging
 #
@@ -17,7 +18,7 @@
 	  other third party bridge products.
 
 	  In order to use the Ethernet bridge, you'll need the bridge
-	  configuration tools; see <file:Documentation/networking/bridge.txt>
+	  configuration tools; see <file:Documentation/networking/bridge.rst>
 	  for location. Please read the Bridge mini-HOWTO for more
 	  information.
 

diff --git a/net/bridge/br.c b/net/bridge/br.c
index b0a0b82..8a8f9e5 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Generic parts
  *	Linux ethernet bridge
  *
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -31,6 +27,8 @@
  */
 static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
 {
+	struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
+	struct netdev_notifier_pre_changeaddr_info *prechaddr_info;
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct net_bridge_port *p;
 	struct net_bridge *br;
@@ -38,10 +36,16 @@
 	bool changed_addr;
 	int err;
 
-	/* register of bridge completed, add sysfs entries */
-	if ((dev->priv_flags & IFF_EBRIDGE) && event == NETDEV_REGISTER) {
-		br_sysfs_addbr(dev);
-		return NOTIFY_DONE;
+	if (dev->priv_flags & IFF_EBRIDGE) {
+		err = br_vlan_bridge_event(dev, event, ptr);
+		if (err)
+			return notifier_from_errno(err);
+
+		if (event == NETDEV_REGISTER) {
+			/* register of bridge completed, add sysfs entries */
+			br_sysfs_addbr(dev);
+			return NOTIFY_DONE;
+		}
 	}
 
 	/* not a port of a bridge */
@@ -56,6 +60,17 @@
 		br_mtu_auto_adjust(br);
 		break;
 
+	case NETDEV_PRE_CHANGEADDR:
+		if (br->dev->addr_assign_type == NET_ADDR_SET)
+			break;
+		prechaddr_info = ptr;
+		err = dev_pre_changeaddr_notify(br->dev,
+						prechaddr_info->dev_addr,
+						extack);
+		if (err)
+			return notifier_from_errno(err);
+		break;
+
 	case NETDEV_CHANGEADDR:
 		spin_lock_bh(&br->lock);
 		br_fdb_changeaddr(p, dev->dev_addr);
@@ -113,6 +128,9 @@
 		break;
 	}
 
+	if (event != NETDEV_UNREGISTER)
+		br_vlan_port_event(p, event);
+
 	/* Events that may cause spanning tree to refresh */
 	if (!notified && (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
 			  event == NETDEV_CHANGE || event == NETDEV_DOWN))
@@ -151,7 +169,7 @@
 			break;
 		}
 		br_fdb_offloaded_set(br, p, fdb_info->addr,
-				     fdb_info->vid);
+				     fdb_info->vid, true);
 		break;
 	case SWITCHDEV_FDB_DEL_TO_BRIDGE:
 		fdb_info = ptr;
@@ -163,7 +181,7 @@
 	case SWITCHDEV_FDB_OFFLOADED:
 		fdb_info = ptr;
 		br_fdb_offloaded_set(br, p, fdb_info->addr,
-				     fdb_info->vid);
+				     fdb_info->vid, fdb_info->offloaded);
 		break;
 	}
 
@@ -175,6 +193,98 @@
 	.notifier_call = br_switchdev_event,
 };
 
+/* br_boolopt_toggle - change user-controlled boolean option
+ *
+ * @br: bridge device
+ * @opt: id of the option to change
+ * @on: new option value
+ * @extack: extack for error messages
+ *
+ * Changes the value of the respective boolean option to @on taking care of
+ * any internal option value mapping and configuration.
+ */
+int br_boolopt_toggle(struct net_bridge *br, enum br_boolopt_id opt, bool on,
+		      struct netlink_ext_ack *extack)
+{
+	switch (opt) {
+	case BR_BOOLOPT_NO_LL_LEARN:
+		br_opt_toggle(br, BROPT_NO_LL_LEARN, on);
+		break;
+	default:
+		/* shouldn't be called with unsupported options */
+		WARN_ON(1);
+		break;
+	}
+
+	return 0;
+}
+
+int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt)
+{
+	switch (opt) {
+	case BR_BOOLOPT_NO_LL_LEARN:
+		return br_opt_get(br, BROPT_NO_LL_LEARN);
+	default:
+		/* shouldn't be called with unsupported options */
+		WARN_ON(1);
+		break;
+	}
+
+	return 0;
+}
+
+int br_boolopt_multi_toggle(struct net_bridge *br,
+			    struct br_boolopt_multi *bm,
+			    struct netlink_ext_ack *extack)
+{
+	unsigned long bitmap = bm->optmask;
+	int err = 0;
+	int opt_id;
+
+	for_each_set_bit(opt_id, &bitmap, BR_BOOLOPT_MAX) {
+		bool on = !!(bm->optval & BIT(opt_id));
+
+		err = br_boolopt_toggle(br, opt_id, on, extack);
+		if (err) {
+			br_debug(br, "boolopt multi-toggle error: option: %d current: %d new: %d error: %d\n",
+				 opt_id, br_boolopt_get(br, opt_id), on, err);
+			break;
+		}
+	}
+
+	return err;
+}
+
+void br_boolopt_multi_get(const struct net_bridge *br,
+			  struct br_boolopt_multi *bm)
+{
+	u32 optval = 0;
+	int opt_id;
+
+	for (opt_id = 0; opt_id < BR_BOOLOPT_MAX; opt_id++)
+		optval |= (br_boolopt_get(br, opt_id) << opt_id);
+
+	bm->optval = optval;
+	bm->optmask = GENMASK((BR_BOOLOPT_MAX - 1), 0);
+}
+
+/* private bridge options, controlled by the kernel */
+void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on)
+{
+	bool cur = !!br_opt_get(br, opt);
+
+	br_debug(br, "toggle option: %d state: %d -> %d\n",
+		 opt, cur, on);
+
+	if (cur == on)
+		return;
+
+	if (on)
+		set_bit(opt, &br->options);
+	else
+		clear_bit(opt, &br->options);
+}
+
 static void __net_exit br_net_exit(struct net *net)
 {
 	struct net_device *dev;

diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c
index 2cf7716..3790856 100644
--- a/net/bridge/br_arp_nd_proxy.c
+++ b/net/bridge/br_arp_nd_proxy.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  Handle bridge arp/nd proxy/suppress
  *
@@ -6,11 +7,6 @@
  *
  *  Authors:
  *	Roopa Prabhu <roopa@cumulusnetworks.com>
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
  */
 
 #include <linux/kernel.h>
@@ -21,6 +17,7 @@
 #include <linux/if_vlan.h>
 #include <linux/inetdevice.h>
 #include <net/addrconf.h>
+#include <net/ipv6_stubs.h>
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ip6_checksum.h>
 #endif
@@ -39,7 +36,7 @@
 		}
 	}
 
-	br->neigh_suppress_enabled = neigh_suppress;
+	br_opt_toggle(br, BROPT_NEIGH_SUPPRESS_ENABLED, neigh_suppress);
 }
 
 #if IS_ENABLED(CONFIG_INET)
@@ -130,7 +127,7 @@
 	u8 *arpptr, *sha;
 	__be32 sip, tip;
 
-	BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
+	BR_INPUT_SKB_CB(skb)->proxyarp_replied = 0;
 
 	if ((dev->flags & IFF_NOARP) ||
 	    !pskb_may_pull(skb, arp_hdr_len(dev)))
@@ -155,12 +152,12 @@
 	    ipv4_is_multicast(tip))
 		return;
 
-	if (br->neigh_suppress_enabled) {
+	if (br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) {
 		if (p && (p->flags & BR_NEIGH_SUPPRESS))
 			return;
 		if (ipv4_is_zeronet(sip) || sip == tip) {
 			/* prevent flooding to neigh suppress ports */
-			BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+			BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1;
 			return;
 		}
 	}
@@ -175,11 +172,12 @@
 			return;
 	}
 
-	if (br->neigh_suppress_enabled && br_is_local_ip(vlandev, tip)) {
+	if (br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED) &&
+	    br_is_local_ip(vlandev, tip)) {
 		/* its our local ip, so don't proxy reply
 		 * and don't forward to neigh suppress ports
 		 */
-		BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+		BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1;
 		return;
 	}
 
@@ -213,8 +211,9 @@
 			/* If we have replied or as long as we know the
 			 * mac, indicate to arp replied
 			 */
-			if (replied || br->neigh_suppress_enabled)
-				BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+			if (replied ||
+			    br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED))
+				BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1;
 		}
 
 		neigh_release(n);
@@ -311,7 +310,7 @@
 	/* Neighbor Advertisement */
 	memset(na, 0, sizeof(*na) + na_olen);
 	na->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
-	na->icmph.icmp6_router = 0; /* XXX: should be 1 ? */
+	na->icmph.icmp6_router = (n->flags & NTF_ROUTER) ? 1 : 0;
 	na->icmph.icmp6_override = 1;
 	na->icmph.icmp6_solicited = 1;
 	na->target = ns->target;
@@ -390,7 +389,7 @@
 	struct ipv6hdr *iphdr;
 	struct neighbour *n;
 
-	BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
+	BR_INPUT_SKB_CB(skb)->proxyarp_replied = 0;
 
 	if (p && (p->flags & BR_NEIGH_SUPPRESS))
 		return;
@@ -398,7 +397,7 @@
 	if (msg->icmph.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT &&
 	    !msg->icmph.icmp6_solicited) {
 		/* prevent flooding to neigh suppress ports */
-		BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+		BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1;
 		return;
 	}
 
@@ -411,7 +410,7 @@
 
 	if (ipv6_addr_any(saddr) || !ipv6_addr_cmp(saddr, daddr)) {
 		/* prevent flooding to neigh suppress ports */
-		BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+		BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1;
 		return;
 	}
 
@@ -429,7 +428,7 @@
 		/* its our own ip, so don't proxy reply
 		 * and don't forward to arp suppress ports
 		 */
-		BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+		BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1;
 		return;
 	}
 
@@ -460,8 +459,9 @@
 			 * mac, indicate to NEIGH_SUPPRESS ports that we
 			 * have replied
 			 */
-			if (replied || br->neigh_suppress_enabled)
-				BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+			if (replied ||
+			    br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED))
+				BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1;
 		}
 		neigh_release(n);
 	}

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index e682a66..e804a30 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Device handling code
  *	Linux ethernet bridge
  *
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/kernel.h>
@@ -28,8 +24,6 @@
 const struct nf_br_ops __rcu *nf_br_ops __read_mostly;
 EXPORT_SYMBOL_GPL(nf_br_ops);
 
-static struct lock_class_key bridge_netdev_addr_lock_key;
-
 /* net device transmit always called with BH disabled */
 netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 {
@@ -56,6 +50,7 @@
 
 	br_switchdev_frame_unmark(skb);
 	BR_INPUT_SKB_CB(skb)->brdev = dev;
+	BR_INPUT_SKB_CB(skb)->frag_max_size = 0;
 
 	skb_reset_mac_header(skb);
 	eth = eth_hdr(skb);
@@ -67,11 +62,11 @@
 	if (IS_ENABLED(CONFIG_INET) &&
 	    (eth->h_proto == htons(ETH_P_ARP) ||
 	     eth->h_proto == htons(ETH_P_RARP)) &&
-	    br->neigh_suppress_enabled) {
+	    br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) {
 		br_do_proxy_suppress_arp(skb, br, vid, NULL);
 	} else if (IS_ENABLED(CONFIG_IPV6) &&
 		   skb->protocol == htons(ETH_P_IPV6) &&
-		   br->neigh_suppress_enabled &&
+		   br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED) &&
 		   pskb_may_pull(skb, sizeof(struct ipv6hdr) +
 				 sizeof(struct nd_msg)) &&
 		   ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
@@ -111,11 +106,6 @@
 	return NETDEV_TX_OK;
 }
 
-static void br_set_lockdep_class(struct net_device *dev)
-{
-	lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key);
-}
-
 static int br_dev_init(struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
@@ -131,9 +121,17 @@
 		return err;
 	}
 
+	err = br_mdb_hash_init(br);
+	if (err) {
+		free_percpu(br->stats);
+		br_fdb_hash_fini(br);
+		return err;
+	}
+
 	err = br_vlan_init(br);
 	if (err) {
 		free_percpu(br->stats);
+		br_mdb_hash_fini(br);
 		br_fdb_hash_fini(br);
 		return err;
 	}
@@ -142,9 +140,9 @@
 	if (err) {
 		free_percpu(br->stats);
 		br_vlan_flush(br);
+		br_mdb_hash_fini(br);
 		br_fdb_hash_fini(br);
 	}
-	br_set_lockdep_class(dev);
 
 	return err;
 }
@@ -156,6 +154,7 @@
 	br_multicast_dev_del(br);
 	br_multicast_uninit_stats(br);
 	br_vlan_flush(br);
+	br_mdb_hash_fini(br);
 	br_fdb_hash_fini(br);
 	free_percpu(br->stats);
 }
@@ -228,7 +227,7 @@
 	dev->mtu = new_mtu;
 
 	/* this flag will be cleared if the MTU was automatically adjusted */
-	br->mtu_set_by_user = true;
+	br_opt_toggle(br, BROPT_MTU_SET_BY_USER, true);
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	/* remember the MTU in the rtable for PMTU */
 	dst_metric_set(&br->fake_rtable.dst, RTAX_MTU, new_mtu);
@@ -344,7 +343,7 @@
 
 	p->np = NULL;
 
-	__netpoll_free_async(np);
+	__netpoll_free(np);
 }
 
 #endif
@@ -393,6 +392,7 @@
 	.ndo_fdb_add		 = br_fdb_add,
 	.ndo_fdb_del		 = br_fdb_delete,
 	.ndo_fdb_dump		 = br_fdb_dump,
+	.ndo_fdb_get		 = br_fdb_get,
 	.ndo_bridge_getlink	 = br_getlink,
 	.ndo_bridge_setlink	 = br_setlink,
 	.ndo_bridge_dellink	 = br_dellink,

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 502f663..b1d3248 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Forwarding database
  *	Linux ethernet bridge
  *
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/kernel.h>
@@ -33,7 +29,6 @@
 	.key_offset = offsetof(struct net_bridge_fdb_entry, key),
 	.key_len = sizeof(struct net_bridge_fdb_key),
 	.automatic_shrinking = true,
-	.locks_mul = 1,
 };
 
 static struct kmem_cache *br_fdb_cache __read_mostly;
@@ -504,6 +499,7 @@
 		fdb->added_by_user = 0;
 		fdb->added_by_external_learn = 0;
 		fdb->offloaded = 0;
+		fdb->is_sticky = 0;
 		fdb->updated = fdb->used = jiffies;
 		if (rhashtable_lookup_insert_fast(&br->fdb_hash_tbl,
 						  &fdb->rhnode,
@@ -584,7 +580,7 @@
 			unsigned long now = jiffies;
 
 			/* fastpath: update of existing entry */
-			if (unlikely(source != fdb->dst)) {
+			if (unlikely(source != fdb->dst && !fdb->is_sticky)) {
 				fdb->dst = source;
 				fdb_modified = true;
 				/* Take over HW learned entry */
@@ -656,6 +652,8 @@
 		ndm->ndm_flags |= NTF_OFFLOADED;
 	if (fdb->added_by_external_learn)
 		ndm->ndm_flags |= NTF_EXT_LEARNED;
+	if (fdb->is_sticky)
+		ndm->ndm_flags |= NTF_STICKY;
 
 	if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->key.addr))
 		goto nla_put_failure;
@@ -770,10 +768,38 @@
 	return err;
 }
 
+int br_fdb_get(struct sk_buff *skb,
+	       struct nlattr *tb[],
+	       struct net_device *dev,
+	       const unsigned char *addr,
+	       u16 vid, u32 portid, u32 seq,
+	       struct netlink_ext_ack *extack)
+{
+	struct net_bridge *br = netdev_priv(dev);
+	struct net_bridge_fdb_entry *f;
+	int err = 0;
+
+	rcu_read_lock();
+	f = br_fdb_find_rcu(br, addr, vid);
+	if (!f) {
+		NL_SET_ERR_MSG(extack, "Fdb entry not found");
+		err = -ENOENT;
+		goto errout;
+	}
+
+	err = fdb_fill_info(skb, br, f, portid, seq,
+			    RTM_NEWNEIGH, 0);
+errout:
+	rcu_read_unlock();
+	return err;
+}
+
 /* Update (create or replace) forwarding database entry */
 static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
-			 const __u8 *addr, __u16 state, __u16 flags, __u16 vid)
+			 const u8 *addr, u16 state, u16 flags, u16 vid,
+			 u8 ndm_flags)
 {
+	u8 is_sticky = !!(ndm_flags & NTF_STICKY);
 	struct net_bridge_fdb_entry *fdb;
 	bool modified = false;
 
@@ -789,6 +815,9 @@
 		return -EINVAL;
 	}
 
+	if (is_sticky && (state & NUD_PERMANENT))
+		return -EINVAL;
+
 	fdb = br_fdb_find(br, addr, vid);
 	if (fdb == NULL) {
 		if (!(flags & NLM_F_CREATE))
@@ -832,6 +861,12 @@
 
 		modified = true;
 	}
+
+	if (is_sticky != fdb->is_sticky) {
+		fdb->is_sticky = is_sticky;
+		modified = true;
+	}
+
 	fdb->added_by_user = 1;
 
 	fdb->used = jiffies;
@@ -865,7 +900,7 @@
 	} else {
 		spin_lock_bh(&br->hash_lock);
 		err = fdb_add_entry(br, p, addr, ndm->ndm_state,
-				    nlh_flags, vid);
+				    nlh_flags, vid, ndm->ndm_flags);
 		spin_unlock_bh(&br->hash_lock);
 	}
 
@@ -875,7 +910,8 @@
 /* Add new permanent fdb entry with RTM_NEWNEIGH */
 int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	       struct net_device *dev,
-	       const unsigned char *addr, u16 vid, u16 nlh_flags)
+	       const unsigned char *addr, u16 vid, u16 nlh_flags,
+	       struct netlink_ext_ack *extack)
 {
 	struct net_bridge_vlan_group *vg;
 	struct net_bridge_port *p = NULL;
@@ -1088,6 +1124,8 @@
 			err = -ENOMEM;
 			goto err_unlock;
 		}
+		if (swdev_notify)
+			fdb->added_by_user = 1;
 		fdb->added_by_external_learn = 1;
 		fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify);
 	} else {
@@ -1107,6 +1145,9 @@
 			modified = true;
 		}
 
+		if (swdev_notify)
+			fdb->added_by_user = 1;
+
 		if (modified)
 			fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify);
 	}
@@ -1138,7 +1179,7 @@
 }
 
 void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
-			  const unsigned char *addr, u16 vid)
+			  const unsigned char *addr, u16 vid, bool offloaded)
 {
 	struct net_bridge_fdb_entry *fdb;
 
@@ -1146,7 +1187,27 @@
 
 	fdb = br_fdb_find(br, addr, vid);
 	if (fdb)
-		fdb->offloaded = 1;
+		fdb->offloaded = offloaded;
 
 	spin_unlock_bh(&br->hash_lock);
 }
+
+void br_fdb_clear_offload(const struct net_device *dev, u16 vid)
+{
+	struct net_bridge_fdb_entry *f;
+	struct net_bridge_port *p;
+
+	ASSERT_RTNL();
+
+	p = br_port_get_rtnl(dev);
+	if (!p)
+		return;
+
+	spin_lock_bh(&p->br->hash_lock);
+	hlist_for_each_entry(f, &p->br->fdb_list, fdb_node) {
+		if (f->dst == p && f->key.vlan_id == vid)
+			f->offloaded = 0;
+	}
+	spin_unlock_bh(&p->br->hash_lock);
+}
+EXPORT_SYMBOL_GPL(br_fdb_clear_offload);

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 5372e20..8663700 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Forwarding decision
  *	Linux ethernet bridge
  *
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/err.h>
@@ -36,10 +32,10 @@
 
 int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
+	skb_push(skb, ETH_HLEN);
 	if (!is_skb_forwardable(skb->dev, skb))
 		goto drop;
 
-	skb_push(skb, ETH_HLEN);
 	br_drop_fake_rtable(skb);
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
@@ -65,6 +61,7 @@
 
 int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
+	skb->tstamp = 0;
 	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING,
 		       net, sk, skb, NULL, skb->dev,
 		       br_dev_queue_push_xmit);
@@ -97,12 +94,11 @@
 		net = dev_net(indev);
 	} else {
 		if (unlikely(netpoll_tx_running(to->br->dev))) {
-			if (!is_skb_forwardable(skb->dev, skb)) {
+			skb_push(skb, ETH_HLEN);
+			if (!is_skb_forwardable(skb->dev, skb))
 				kfree_skb(skb);
-			} else {
-				skb_push(skb, ETH_HLEN);
+			else
 				br_netpoll_send_skb(to, skb);
-			}
 			return;
 		}
 		br_hook = NF_BR_LOCAL_OUT;
@@ -173,6 +169,7 @@
 	struct net_bridge_port *prev, struct net_bridge_port *p,
 	struct sk_buff *skb, bool local_orig)
 {
+	u8 igmp_type = br_multicast_igmp_type(skb);
 	int err;
 
 	if (!should_deliver(p, skb))
@@ -184,8 +181,9 @@
 	err = deliver_clone(prev, skb, local_orig);
 	if (err)
 		return ERR_PTR(err);
-
 out:
+	br_multicast_count(p->br, p, skb, igmp_type, BR_MCAST_DIR_TX);
+
 	return p;
 }
 
@@ -193,7 +191,6 @@
 void br_flood(struct net_bridge *br, struct sk_buff *skb,
 	      enum br_pkt_type pkt_type, bool local_rcv, bool local_orig)
 {
-	u8 igmp_type = br_multicast_igmp_type(skb);
 	struct net_bridge_port *prev = NULL;
 	struct net_bridge_port *p;
 
@@ -226,9 +223,6 @@
 		prev = maybe_deliver(prev, p, skb, local_orig);
 		if (IS_ERR(prev))
 			goto out;
-		if (prev == p)
-			br_multicast_count(p->br, p, skb, igmp_type,
-					   BR_MCAST_DIR_TX);
 	}
 
 	if (!prev)
@@ -277,7 +271,6 @@
 			bool local_rcv, bool local_orig)
 {
 	struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
-	u8 igmp_type = br_multicast_igmp_type(skb);
 	struct net_bridge *br = netdev_priv(dev);
 	struct net_bridge_port *prev = NULL;
 	struct net_bridge_port_group *p;
@@ -304,13 +297,9 @@
 		}
 
 		prev = maybe_deliver(prev, port, skb, local_orig);
-delivered:
 		if (IS_ERR(prev))
 			goto out;
-		if (prev == port)
-			br_multicast_count(port->br, port, skb, igmp_type,
-					   BR_MCAST_DIR_TX);
-
+delivered:
 		if ((unsigned long)lport >= (unsigned long)port)
 			p = rcu_dereference(p->next);
 		if ((unsigned long)rport >= (unsigned long)port)

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 0363f1b..4fe30b1 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Userspace interface
  *	Linux ethernet bridge
  *
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/kernel.h>
@@ -179,7 +175,7 @@
 	ASSERT_RTNL();
 
 	if (backup_dev) {
-		if (!br_port_exists(backup_dev))
+		if (!netif_is_bridge_port(backup_dev))
 			return -ENOENT;
 
 		backup_p = br_port_get_rtnl(backup_dev);
@@ -394,8 +390,7 @@
 	struct net_bridge_port *p;
 	unsigned long *inuse;
 
-	inuse = kcalloc(BITS_TO_LONGS(BR_MAX_PORTS), sizeof(unsigned long),
-			GFP_KERNEL);
+	inuse = bitmap_zalloc(BR_MAX_PORTS, GFP_KERNEL);
 	if (!inuse)
 		return -ENOMEM;
 
@@ -404,7 +399,7 @@
 		set_bit(p->port_no, inuse);
 	}
 	index = find_first_zero_bit(inuse, BR_MAX_PORTS);
-	kfree(inuse);
+	bitmap_free(inuse);
 
 	return (index >= BR_MAX_PORTS) ? -EXFULL : index;
 }
@@ -509,14 +504,14 @@
 	ASSERT_RTNL();
 
 	/* if the bridge MTU was manually configured don't mess with it */
-	if (br->mtu_set_by_user)
+	if (br_opt_get(br, BROPT_MTU_SET_BY_USER))
 		return;
 
 	/* change to the minimum MTU and clear the flag which was set by
 	 * the bridge ndo_change_mtu callback
 	 */
 	dev_set_mtu(br->dev, br_mtu_min(br));
-	br->mtu_set_by_user = false;
+	br_opt_toggle(br, BROPT_MTU_SET_BY_USER, false);
 }
 
 static void br_set_gso_limits(struct net_bridge *br)
@@ -603,13 +598,15 @@
 	call_netdevice_notifiers(NETDEV_JOIN, dev);
 
 	err = dev_set_allmulti(dev, 1);
-	if (err)
-		goto put_back;
+	if (err) {
+		kfree(p);	/* kobject not yet init'd, manually free */
+		goto err1;
+	}
 
 	err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
 				   SYSFS_BRIDGE_PORT_ATTR);
 	if (err)
-		goto err1;
+		goto err2;
 
 	err = br_sysfs_addif(p);
 	if (err)
@@ -651,7 +648,16 @@
 	if (br_fdb_insert(br, p, dev->dev_addr, 0))
 		netdev_err(dev, "failed insert local address bridge forwarding table\n");
 
-	err = nbp_vlan_init(p);
+	if (br->dev->addr_assign_type != NET_ADDR_SET) {
+		/* Ask for permission to use this MAC address now, even if we
+		 * don't end up choosing it below.
+		 */
+		err = dev_pre_changeaddr_notify(br->dev, dev->dev_addr, extack);
+		if (err)
+			goto err7;
+	}
+
+	err = nbp_vlan_init(p, extack);
 	if (err) {
 		netdev_err(dev, "failed to initialize vlan filtering on this port\n");
 		goto err7;
@@ -692,12 +698,9 @@
 	sysfs_remove_link(br->ifobj, p->dev->name);
 err2:
 	kobject_put(&p->kobj);
-	p = NULL; /* kobject_put frees */
-err1:
 	dev_set_allmulti(dev, -1);
-put_back:
+err1:
 	dev_put(dev);
-	kfree(p);
 	return err;
 }
 
@@ -742,3 +745,15 @@
 	if (mask & BR_NEIGH_SUPPRESS)
 		br_recalculate_neigh_suppress_enabled(br);
 }
+
+bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag)
+{
+	struct net_bridge_port *p;
+
+	p = br_port_get_rtnl_rcu(dev);
+	if (!p)
+		return false;
+
+	return p->flags & flag;
+}
+EXPORT_SYMBOL_GPL(br_port_flag_is_set);

diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 7207427..09b1dd8 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Handle incoming frames
  *	Linux ethernet bridge
  *
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/slab.h>
@@ -16,6 +12,9 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/netfilter_bridge.h>
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+#include <net/netfilter/nf_queue.h>
+#endif
 #include <linux/neighbour.h>
 #include <net/arp.h>
 #include <linux/export.h>
@@ -23,10 +22,6 @@
 #include "br_private.h"
 #include "br_private_tunnel.h"
 
-/* Hook for brouter */
-br_should_route_hook_t __rcu *br_should_route_hook __read_mostly;
-EXPORT_SYMBOL(br_should_route_hook);
-
 static int
 br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
@@ -79,7 +74,6 @@
 	struct net_bridge_fdb_entry *dst = NULL;
 	struct net_bridge_mdb_entry *mdst;
 	bool local_rcv, mcast_hit = false;
-	const unsigned char *dest;
 	struct net_bridge *br;
 	u16 vid = 0;
 
@@ -97,10 +91,9 @@
 		br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false);
 
 	local_rcv = !!(br->dev->flags & IFF_PROMISC);
-	dest = eth_hdr(skb)->h_dest;
-	if (is_multicast_ether_addr(dest)) {
+	if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) {
 		/* by definition the broadcast is also a multicast address */
-		if (is_broadcast_ether_addr(dest)) {
+		if (is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) {
 			pkt_type = BR_PKT_BROADCAST;
 			local_rcv = true;
 		} else {
@@ -122,7 +115,7 @@
 		br_do_proxy_suppress_arp(skb, br, vid, p);
 	} else if (IS_ENABLED(CONFIG_IPV6) &&
 		   skb->protocol == htons(ETH_P_IPV6) &&
-		   br->neigh_suppress_enabled &&
+		   br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED) &&
 		   pskb_may_pull(skb, sizeof(struct ipv6hdr) +
 				 sizeof(struct nd_msg)) &&
 		   ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
@@ -150,7 +143,7 @@
 		}
 		break;
 	case BR_PKT_UNICAST:
-		dst = br_fdb_find_rcu(br, dest, vid);
+		dst = br_fdb_find_rcu(br, eth_hdr(skb)->h_dest, vid);
 	default:
 		break;
 	}
@@ -188,20 +181,72 @@
 	u16 vid = 0;
 
 	/* check if vlan is allowed, to avoid spoofing */
-	if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid))
+	if ((p->flags & BR_LEARNING) &&
+	    !br_opt_get(p->br, BROPT_NO_LL_LEARN) &&
+	    br_should_learn(p, skb, &vid))
 		br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false);
 }
 
 /* note: already called with rcu_read_lock */
 static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
-
 	__br_handle_local_finish(skb);
 
-	BR_INPUT_SKB_CB(skb)->brdev = p->br->dev;
-	br_pass_frame_up(skb);
-	return 0;
+	/* return 1 to signal the okfn() was called so it's ok to use the skb */
+	return 1;
+}
+
+static int nf_hook_bridge_pre(struct sk_buff *skb, struct sk_buff **pskb)
+{
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+	struct nf_hook_entries *e = NULL;
+	struct nf_hook_state state;
+	unsigned int verdict, i;
+	struct net *net;
+	int ret;
+
+	net = dev_net(skb->dev);
+#ifdef HAVE_JUMP_LABEL
+	if (!static_key_false(&nf_hooks_needed[NFPROTO_BRIDGE][NF_BR_PRE_ROUTING]))
+		goto frame_finish;
+#endif
+
+	e = rcu_dereference(net->nf.hooks_bridge[NF_BR_PRE_ROUTING]);
+	if (!e)
+		goto frame_finish;
+
+	nf_hook_state_init(&state, NF_BR_PRE_ROUTING,
+			   NFPROTO_BRIDGE, skb->dev, NULL, NULL,
+			   net, br_handle_frame_finish);
+
+	for (i = 0; i < e->num_hook_entries; i++) {
+		verdict = nf_hook_entry_hookfn(&e->hooks[i], skb, &state);
+		switch (verdict & NF_VERDICT_MASK) {
+		case NF_ACCEPT:
+			if (BR_INPUT_SKB_CB(skb)->br_netfilter_broute) {
+				*pskb = skb;
+				return RX_HANDLER_PASS;
+			}
+			break;
+		case NF_DROP:
+			kfree_skb(skb);
+			return RX_HANDLER_CONSUMED;
+		case NF_QUEUE:
+			ret = nf_queue(skb, &state, i, verdict);
+			if (ret == 1)
+				continue;
+			return RX_HANDLER_CONSUMED;
+		default: /* STOLEN */
+			return RX_HANDLER_CONSUMED;
+		}
+	}
+frame_finish:
+	net = dev_net(skb->dev);
+	br_handle_frame_finish(net, NULL, skb);
+#else
+	br_handle_frame_finish(dev_net(skb->dev), NULL, skb);
+#endif
+	return RX_HANDLER_CONSUMED;
 }
 
 /*
@@ -213,7 +258,6 @@
 	struct net_bridge_port *p;
 	struct sk_buff *skb = *pskb;
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
-	br_should_route_hook_t *rhook;
 
 	if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
 		return RX_HANDLER_PASS;
@@ -225,6 +269,8 @@
 	if (!skb)
 		return RX_HANDLER_CONSUMED;
 
+	memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
+
 	p = br_port_get_rcu(skb->dev);
 	if (p->flags & BR_VLAN_TUNNEL) {
 		if (br_handle_ingress_vlan_tunnel(skb, p,
@@ -278,32 +324,28 @@
 				goto forward;
 		}
 
-		/* Deliver packet to local host only */
-		NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(skb->dev),
-			NULL, skb, skb->dev, NULL, br_handle_local_finish);
-		return RX_HANDLER_CONSUMED;
+		/* The else clause should be hit when nf_hook():
+		 *   - returns < 0 (drop/error)
+		 *   - returns = 0 (stolen/nf_queue)
+		 * Thus return 1 from the okfn() to signal the skb is ok to pass
+		 */
+		if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
+			    dev_net(skb->dev), NULL, skb, skb->dev, NULL,
+			    br_handle_local_finish) == 1) {
+			return RX_HANDLER_PASS;
+		} else {
+			return RX_HANDLER_CONSUMED;
+		}
 	}
 
 forward:
 	switch (p->state) {
 	case BR_STATE_FORWARDING:
-		rhook = rcu_dereference(br_should_route_hook);
-		if (rhook) {
-			if ((*rhook)(skb)) {
-				*pskb = skb;
-				return RX_HANDLER_PASS;
-			}
-			dest = eth_hdr(skb)->h_dest;
-		}
-		/* fall through */
 	case BR_STATE_LEARNING:
 		if (ether_addr_equal(p->br->dev->dev_addr, dest))
 			skb->pkt_type = PACKET_HOST;
 
-		NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
-			dev_net(skb->dev), NULL, skb, skb->dev, NULL,
-			br_handle_frame_finish);
-		break;
+		return nf_hook_bridge_pre(skb, pskb);
 	default:
 drop:
 		kfree_skb(skb);

diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 73b957f..ae22d78 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Ioctl handler
  *	Linux ethernet bridge
  *
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/capability.h>

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 6d9f48b..da5ed4c 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c

@@ -26,14 +26,14 @@
 	if (!br->multicast_router || hlist_empty(&br->router_list))
 		return 0;
 
-	nest = nla_nest_start(skb, MDBA_ROUTER);
+	nest = nla_nest_start_noflag(skb, MDBA_ROUTER);
 	if (nest == NULL)
 		return -EMSGSIZE;
 
 	hlist_for_each_entry_rcu(p, &br->router_list, rlist) {
 		if (!p)
 			continue;
-		port_nest = nla_nest_start(skb, MDBA_ROUTER_PORT);
+		port_nest = nla_nest_start_noflag(skb, MDBA_ROUTER_PORT);
 		if (!port_nest)
 			goto fail;
 		if (nla_put_nohdr(skb, sizeof(u32), &p->dev->ifindex) ||
@@ -60,6 +60,8 @@
 	e->flags = 0;
 	if (flags & MDB_PG_FLAGS_OFFLOAD)
 		e->flags |= MDB_FLAGS_OFFLOAD;
+	if (flags & MDB_PG_FLAGS_FAST_LEAVE)
+		e->flags |= MDB_FLAGS_FAST_LEAVE;
 }
 
 static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip)
@@ -75,85 +77,103 @@
 #endif
 }
 
+static int __mdb_fill_info(struct sk_buff *skb,
+			   struct net_bridge_mdb_entry *mp,
+			   struct net_bridge_port_group *p)
+{
+	struct timer_list *mtimer;
+	struct nlattr *nest_ent;
+	struct br_mdb_entry e;
+	u8 flags = 0;
+	int ifindex;
+
+	memset(&e, 0, sizeof(e));
+	if (p) {
+		ifindex = p->port->dev->ifindex;
+		mtimer = &p->timer;
+		flags = p->flags;
+	} else {
+		ifindex = mp->br->dev->ifindex;
+		mtimer = &mp->timer;
+	}
+
+	__mdb_entry_fill_flags(&e, flags);
+	e.ifindex = ifindex;
+	e.vid = mp->addr.vid;
+	if (mp->addr.proto == htons(ETH_P_IP))
+		e.addr.u.ip4 = mp->addr.u.ip4;
+#if IS_ENABLED(CONFIG_IPV6)
+	if (mp->addr.proto == htons(ETH_P_IPV6))
+		e.addr.u.ip6 = mp->addr.u.ip6;
+#endif
+	e.addr.proto = mp->addr.proto;
+	nest_ent = nla_nest_start_noflag(skb,
+					 MDBA_MDB_ENTRY_INFO);
+	if (!nest_ent)
+		return -EMSGSIZE;
+
+	if (nla_put_nohdr(skb, sizeof(e), &e) ||
+	    nla_put_u32(skb,
+			MDBA_MDB_EATTR_TIMER,
+			br_timer_value(mtimer))) {
+		nla_nest_cancel(skb, nest_ent);
+		return -EMSGSIZE;
+	}
+	nla_nest_end(skb, nest_ent);
+
+	return 0;
+}
+
 static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 			    struct net_device *dev)
 {
+	int idx = 0, s_idx = cb->args[1], err = 0;
 	struct net_bridge *br = netdev_priv(dev);
-	struct net_bridge_mdb_htable *mdb;
+	struct net_bridge_mdb_entry *mp;
 	struct nlattr *nest, *nest2;
-	int i, err = 0;
-	int idx = 0, s_idx = cb->args[1];
 
-	if (br->multicast_disabled)
+	if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
 		return 0;
 
-	mdb = rcu_dereference(br->mdb);
-	if (!mdb)
-		return 0;
-
-	nest = nla_nest_start(skb, MDBA_MDB);
+	nest = nla_nest_start_noflag(skb, MDBA_MDB);
 	if (nest == NULL)
 		return -EMSGSIZE;
 
-	for (i = 0; i < mdb->max; i++) {
-		struct net_bridge_mdb_entry *mp;
+	hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
 		struct net_bridge_port_group *p;
 		struct net_bridge_port_group __rcu **pp;
-		struct net_bridge_port *port;
 
-		hlist_for_each_entry_rcu(mp, &mdb->mhash[i], hlist[mdb->ver]) {
-			if (idx < s_idx)
-				goto skip;
+		if (idx < s_idx)
+			goto skip;
 
-			nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY);
-			if (nest2 == NULL) {
-				err = -EMSGSIZE;
+		nest2 = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY);
+		if (!nest2) {
+			err = -EMSGSIZE;
+			break;
+		}
+
+		if (mp->host_joined) {
+			err = __mdb_fill_info(skb, mp, NULL);
+			if (err) {
+				nla_nest_cancel(skb, nest2);
+				break;
+			}
+		}
+
+		for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
+		      pp = &p->next) {
+			if (!p->port)
+				continue;
+
+			err = __mdb_fill_info(skb, mp, p);
+			if (err) {
+				nla_nest_cancel(skb, nest2);
 				goto out;
 			}
-
-			for (pp = &mp->ports;
-			     (p = rcu_dereference(*pp)) != NULL;
-			      pp = &p->next) {
-				struct nlattr *nest_ent;
-				struct br_mdb_entry e;
-
-				port = p->port;
-				if (!port)
-					continue;
-
-				memset(&e, 0, sizeof(e));
-				e.ifindex = port->dev->ifindex;
-				e.vid = p->addr.vid;
-				__mdb_entry_fill_flags(&e, p->flags);
-				if (p->addr.proto == htons(ETH_P_IP))
-					e.addr.u.ip4 = p->addr.u.ip4;
-#if IS_ENABLED(CONFIG_IPV6)
-				if (p->addr.proto == htons(ETH_P_IPV6))
-					e.addr.u.ip6 = p->addr.u.ip6;
-#endif
-				e.addr.proto = p->addr.proto;
-				nest_ent = nla_nest_start(skb,
-							  MDBA_MDB_ENTRY_INFO);
-				if (!nest_ent) {
-					nla_nest_cancel(skb, nest2);
-					err = -EMSGSIZE;
-					goto out;
-				}
-				if (nla_put_nohdr(skb, sizeof(e), &e) ||
-				    nla_put_u32(skb,
-						MDBA_MDB_EATTR_TIMER,
-						br_timer_value(&p->timer))) {
-					nla_nest_cancel(skb, nest_ent);
-					nla_nest_cancel(skb, nest2);
-					err = -EMSGSIZE;
-					goto out;
-				}
-				nla_nest_end(skb, nest_ent);
-			}
-			nla_nest_end(skb, nest2);
-		skip:
-			idx++;
 		}
+		nla_nest_end(skb, nest2);
+skip:
+		idx++;
 	}
 
 out:
@@ -162,6 +182,29 @@
 	return err;
 }
 
+static int br_mdb_valid_dump_req(const struct nlmsghdr *nlh,
+				 struct netlink_ext_ack *extack)
+{
+	struct br_port_msg *bpm;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*bpm))) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid header for mdb dump request");
+		return -EINVAL;
+	}
+
+	bpm = nlmsg_data(nlh);
+	if (bpm->ifindex) {
+		NL_SET_ERR_MSG_MOD(extack, "Filtering by device index is not supported for mdb dump request");
+		return -EINVAL;
+	}
+	if (nlmsg_attrlen(nlh, sizeof(*bpm))) {
+		NL_SET_ERR_MSG(extack, "Invalid data after header in mdb dump request");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net_device *dev;
@@ -169,12 +212,18 @@
 	struct nlmsghdr *nlh = NULL;
 	int idx = 0, s_idx;
 
+	if (cb->strict_check) {
+		int err = br_mdb_valid_dump_req(cb->nlh, cb->extack);
+
+		if (err < 0)
+			return err;
+	}
+
 	s_idx = cb->args[0];
 
 	rcu_read_lock();
 
-	/* In theory this could be wrapped to 0... */
-	cb->seq = net->dev_base_seq + br_mdb_rehash_seq;
+	cb->seq = net->dev_base_seq;
 
 	for_each_netdev_rcu(net, dev) {
 		if (dev->priv_flags & IFF_EBRIDGE) {
@@ -229,10 +278,10 @@
 	memset(bpm, 0, sizeof(*bpm));
 	bpm->family  = AF_BRIDGE;
 	bpm->ifindex = dev->ifindex;
-	nest = nla_nest_start(skb, MDBA_MDB);
+	nest = nla_nest_start_noflag(skb, MDBA_MDB);
 	if (nest == NULL)
 		goto cancel;
-	nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY);
+	nest2 = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY);
 	if (nest2 == NULL)
 		goto end;
 
@@ -267,7 +316,6 @@
 	struct br_mdb_complete_info *data = priv;
 	struct net_bridge_port_group __rcu **pp;
 	struct net_bridge_port_group *p;
-	struct net_bridge_mdb_htable *mdb;
 	struct net_bridge_mdb_entry *mp;
 	struct net_bridge_port *port = data->port;
 	struct net_bridge *br = port->br;
@@ -276,8 +324,7 @@
 		goto err;
 
 	spin_lock_bh(&br->multicast_lock);
-	mdb = mlock_dereference(br->mdb, br);
-	mp = br_mdb_ip_get(mdb, &data->ip);
+	mp = br_mdb_ip_get(br, &data->ip);
 	if (!mp)
 		goto out;
 	for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;
@@ -314,7 +361,7 @@
 	mdb.obj.orig_dev = dev;
 	switch (type) {
 	case RTM_NEWMDB:
-		switchdev_port_obj_add(lower_dev, &mdb.obj);
+		switchdev_port_obj_add(lower_dev, &mdb.obj, NULL);
 		break;
 	case RTM_DELMDB:
 		switchdev_port_obj_del(lower_dev, &mdb.obj);
@@ -364,7 +411,7 @@
 			__mdb_entry_to_br_ip(entry, &complete_info->ip);
 			mdb.obj.complete_priv = complete_info;
 			mdb.obj.complete = br_mdb_complete;
-			if (switchdev_port_obj_add(port_dev, &mdb.obj))
+			if (switchdev_port_obj_add(port_dev, &mdb.obj, NULL))
 				kfree(complete_info);
 		}
 	} else if (p && port_dev && type == RTM_DELMDB) {
@@ -419,7 +466,7 @@
 	struct nlmsghdr *nlh;
 	struct nlattr *nest;
 
-	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), NLM_F_MULTI);
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), 0);
 	if (!nlh)
 		return -EMSGSIZE;
 
@@ -427,7 +474,7 @@
 	memset(bpm, 0, sizeof(*bpm));
 	bpm->family = AF_BRIDGE;
 	bpm->ifindex = dev->ifindex;
-	nest = nla_nest_start(skb, MDBA_ROUTER);
+	nest = nla_nest_start_noflag(skb, MDBA_ROUTER);
 	if (!nest)
 		goto cancel;
 
@@ -512,8 +559,8 @@
 	struct net_device *dev;
 	int err;
 
-	err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, NULL,
-			  NULL);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb,
+				     MDBA_SET_ENTRY_MAX, NULL, NULL);
 	if (err < 0)
 		return err;
 
@@ -558,19 +605,30 @@
 	struct net_bridge_mdb_entry *mp;
 	struct net_bridge_port_group *p;
 	struct net_bridge_port_group __rcu **pp;
-	struct net_bridge_mdb_htable *mdb;
 	unsigned long now = jiffies;
 	int err;
 
-	mdb = mlock_dereference(br->mdb, br);
-	mp = br_mdb_ip_get(mdb, group);
+	mp = br_mdb_ip_get(br, group);
 	if (!mp) {
-		mp = br_multicast_new_group(br, port, group);
+		mp = br_multicast_new_group(br, group);
 		err = PTR_ERR_OR_ZERO(mp);
 		if (err)
 			return err;
 	}
 
+	/* host join */
+	if (!port) {
+		/* don't allow any flags for host-joined groups */
+		if (state)
+			return -EINVAL;
+		if (mp->host_joined)
+			return -EEXIST;
+
+		br_multicast_host_join(mp, false);
+
+		return 0;
+	}
+
 	for (pp = &mp->ports;
 	     (p = mlock_dereference(*pp, br)) != NULL;
 	     pp = &p->next) {
@@ -595,19 +653,21 @@
 {
 	struct br_ip ip;
 	struct net_device *dev;
-	struct net_bridge_port *p;
+	struct net_bridge_port *p = NULL;
 	int ret;
 
-	if (!netif_running(br->dev) || br->multicast_disabled)
+	if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED))
 		return -EINVAL;
 
-	dev = __dev_get_by_index(net, entry->ifindex);
-	if (!dev)
-		return -ENODEV;
+	if (entry->ifindex != br->dev->ifindex) {
+		dev = __dev_get_by_index(net, entry->ifindex);
+		if (!dev)
+			return -ENODEV;
 
-	p = br_port_get_rtnl(dev);
-	if (!p || p->br != br || p->state == BR_STATE_DISABLED)
-		return -EINVAL;
+		p = br_port_get_rtnl(dev);
+		if (!p || p->br != br || p->state == BR_STATE_DISABLED)
+			return -EINVAL;
+	}
 
 	__mdb_entry_to_br_ip(entry, &ip);
 
@@ -622,9 +682,9 @@
 {
 	struct net *net = sock_net(skb->sk);
 	struct net_bridge_vlan_group *vg;
+	struct net_bridge_port *p = NULL;
 	struct net_device *dev, *pdev;
 	struct br_mdb_entry *entry;
-	struct net_bridge_port *p;
 	struct net_bridge_vlan *v;
 	struct net_bridge *br;
 	int err;
@@ -635,18 +695,22 @@
 
 	br = netdev_priv(dev);
 
+	if (entry->ifindex != br->dev->ifindex) {
+		pdev = __dev_get_by_index(net, entry->ifindex);
+		if (!pdev)
+			return -ENODEV;
+
+		p = br_port_get_rtnl(pdev);
+		if (!p || p->br != br || p->state == BR_STATE_DISABLED)
+			return -EINVAL;
+		vg = nbp_vlan_group(p);
+	} else {
+		vg = br_vlan_group(br);
+	}
+
 	/* If vlan filtering is enabled and VLAN is not specified
 	 * install mdb entry on all vlans configured on the port.
 	 */
-	pdev = __dev_get_by_index(net, entry->ifindex);
-	if (!pdev)
-		return -ENODEV;
-
-	p = br_port_get_rtnl(pdev);
-	if (!p || p->br != br || p->state == BR_STATE_DISABLED)
-		return -EINVAL;
-
-	vg = nbp_vlan_group(p);
 	if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
 		list_for_each_entry(v, &vg->vlan_list, vlist) {
 			entry->vid = v->vid;
@@ -666,25 +730,31 @@
 
 static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
 {
-	struct net_bridge_mdb_htable *mdb;
 	struct net_bridge_mdb_entry *mp;
 	struct net_bridge_port_group *p;
 	struct net_bridge_port_group __rcu **pp;
 	struct br_ip ip;
 	int err = -EINVAL;
 
-	if (!netif_running(br->dev) || br->multicast_disabled)
+	if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED))
 		return -EINVAL;
 
 	__mdb_entry_to_br_ip(entry, &ip);
 
 	spin_lock_bh(&br->multicast_lock);
-	mdb = mlock_dereference(br->mdb, br);
-
-	mp = br_mdb_ip_get(mdb, &ip);
+	mp = br_mdb_ip_get(br, &ip);
 	if (!mp)
 		goto unlock;
 
+	/* host leave */
+	if (entry->ifindex == mp->br->dev->ifindex && mp->host_joined) {
+		br_multicast_host_leave(mp, false);
+		err = 0;
+		if (!mp->ports && netif_running(br->dev))
+			mod_timer(&mp->timer, jiffies);
+		goto unlock;
+	}
+
 	for (pp = &mp->ports;
 	     (p = mlock_dereference(*pp, br)) != NULL;
 	     pp = &p->next) {
@@ -698,7 +768,7 @@
 		rcu_assign_pointer(*pp, p->next);
 		hlist_del_init(&p->mglist);
 		del_timer(&p->timer);
-		call_rcu_bh(&p->rcu, br_multicast_free_pg);
+		kfree_rcu(p, rcu);
 		err = 0;
 
 		if (!mp->ports && !mp->host_joined &&
@@ -717,9 +787,9 @@
 {
 	struct net *net = sock_net(skb->sk);
 	struct net_bridge_vlan_group *vg;
+	struct net_bridge_port *p = NULL;
 	struct net_device *dev, *pdev;
 	struct br_mdb_entry *entry;
-	struct net_bridge_port *p;
 	struct net_bridge_vlan *v;
 	struct net_bridge *br;
 	int err;
@@ -730,18 +800,22 @@
 
 	br = netdev_priv(dev);
 
+	if (entry->ifindex != br->dev->ifindex) {
+		pdev = __dev_get_by_index(net, entry->ifindex);
+		if (!pdev)
+			return -ENODEV;
+
+		p = br_port_get_rtnl(pdev);
+		if (!p || p->br != br || p->state == BR_STATE_DISABLED)
+			return -EINVAL;
+		vg = nbp_vlan_group(p);
+	} else {
+		vg = br_vlan_group(br);
+	}
+
 	/* If vlan filtering is enabled and VLAN is not specified
 	 * delete mdb entry on all vlans configured on the port.
 	 */
-	pdev = __dev_get_by_index(net, entry->ifindex);
-	if (!pdev)
-		return -ENODEV;
-
-	p = br_port_get_rtnl(pdev);
-	if (!p || p->br != br || p->state == BR_STATE_DISABLED)
-		return -EINVAL;
-
-	vg = nbp_vlan_group(p);
 	if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
 		list_for_each_entry(v, &vg->vlan_list, vlist) {
 			entry->vid = v->vid;

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 6dec8e9..ad12fe3 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c

@@ -1,19 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Bridge multicast support.
  *
  * Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
  */
 
 #include <linux/err.h>
 #include <linux/export.h>
 #include <linux/if_ether.h>
 #include <linux/igmp.h>
+#include <linux/in.h>
 #include <linux/jhash.h>
 #include <linux/kernel.h>
 #include <linux/log2.h>
@@ -29,6 +25,7 @@
 #include <net/ip.h>
 #include <net/switchdev.h>
 #if IS_ENABLED(CONFIG_IPV6)
+#include <linux/icmpv6.h>
 #include <net/ipv6.h>
 #include <net/mld.h>
 #include <net/ip6_checksum.h>
@@ -37,6 +34,13 @@
 
 #include "br_private.h"
 
+static const struct rhashtable_params br_mdb_rht_params = {
+	.head_offset = offsetof(struct net_bridge_mdb_entry, rhnode),
+	.key_offset = offsetof(struct net_bridge_mdb_entry, addr),
+	.key_len = sizeof(struct br_ip),
+	.automatic_shrinking = true,
+};
+
 static void br_multicast_start_querier(struct net_bridge *br,
 				       struct bridge_mcast_own_query *query);
 static void br_multicast_add_router(struct net_bridge *br,
@@ -54,116 +58,68 @@
 					 const struct in6_addr *group,
 					 __u16 vid, const unsigned char *src);
 #endif
-unsigned int br_mdb_rehash_seq;
 
-static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
+static struct net_bridge_mdb_entry *br_mdb_ip_get_rcu(struct net_bridge *br,
+						      struct br_ip *dst)
 {
-	if (a->proto != b->proto)
-		return 0;
-	if (a->vid != b->vid)
-		return 0;
-	switch (a->proto) {
-	case htons(ETH_P_IP):
-		return a->u.ip4 == b->u.ip4;
-#if IS_ENABLED(CONFIG_IPV6)
-	case htons(ETH_P_IPV6):
-		return ipv6_addr_equal(&a->u.ip6, &b->u.ip6);
-#endif
-	}
-	return 0;
+	return rhashtable_lookup(&br->mdb_hash_tbl, dst, br_mdb_rht_params);
 }
 
-static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip,
-				__u16 vid)
-{
-	return jhash_2words((__force u32)ip, vid, mdb->secret) & (mdb->max - 1);
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static inline int __br_ip6_hash(struct net_bridge_mdb_htable *mdb,
-				const struct in6_addr *ip,
-				__u16 vid)
-{
-	return jhash_2words(ipv6_addr_hash(ip), vid,
-			    mdb->secret) & (mdb->max - 1);
-}
-#endif
-
-static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb,
-			     struct br_ip *ip)
-{
-	switch (ip->proto) {
-	case htons(ETH_P_IP):
-		return __br_ip4_hash(mdb, ip->u.ip4, ip->vid);
-#if IS_ENABLED(CONFIG_IPV6)
-	case htons(ETH_P_IPV6):
-		return __br_ip6_hash(mdb, &ip->u.ip6, ip->vid);
-#endif
-	}
-	return 0;
-}
-
-static struct net_bridge_mdb_entry *__br_mdb_ip_get(
-	struct net_bridge_mdb_htable *mdb, struct br_ip *dst, int hash)
-{
-	struct net_bridge_mdb_entry *mp;
-
-	hlist_for_each_entry_rcu(mp, &mdb->mhash[hash], hlist[mdb->ver]) {
-		if (br_ip_equal(&mp->addr, dst))
-			return mp;
-	}
-
-	return NULL;
-}
-
-struct net_bridge_mdb_entry *br_mdb_ip_get(struct net_bridge_mdb_htable *mdb,
+struct net_bridge_mdb_entry *br_mdb_ip_get(struct net_bridge *br,
 					   struct br_ip *dst)
 {
-	if (!mdb)
-		return NULL;
+	struct net_bridge_mdb_entry *ent;
 
-	return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst));
+	lockdep_assert_held_once(&br->multicast_lock);
+
+	rcu_read_lock();
+	ent = rhashtable_lookup(&br->mdb_hash_tbl, dst, br_mdb_rht_params);
+	rcu_read_unlock();
+
+	return ent;
 }
 
-static struct net_bridge_mdb_entry *br_mdb_ip4_get(
-	struct net_bridge_mdb_htable *mdb, __be32 dst, __u16 vid)
+static struct net_bridge_mdb_entry *br_mdb_ip4_get(struct net_bridge *br,
+						   __be32 dst, __u16 vid)
 {
 	struct br_ip br_dst;
 
+	memset(&br_dst, 0, sizeof(br_dst));
 	br_dst.u.ip4 = dst;
 	br_dst.proto = htons(ETH_P_IP);
 	br_dst.vid = vid;
 
-	return br_mdb_ip_get(mdb, &br_dst);
+	return br_mdb_ip_get(br, &br_dst);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static struct net_bridge_mdb_entry *br_mdb_ip6_get(
-	struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst,
-	__u16 vid)
+static struct net_bridge_mdb_entry *br_mdb_ip6_get(struct net_bridge *br,
+						   const struct in6_addr *dst,
+						   __u16 vid)
 {
 	struct br_ip br_dst;
 
+	memset(&br_dst, 0, sizeof(br_dst));
 	br_dst.u.ip6 = *dst;
 	br_dst.proto = htons(ETH_P_IPV6);
 	br_dst.vid = vid;
 
-	return br_mdb_ip_get(mdb, &br_dst);
+	return br_mdb_ip_get(br, &br_dst);
 }
 #endif
 
 struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
 					struct sk_buff *skb, u16 vid)
 {
-	struct net_bridge_mdb_htable *mdb = rcu_dereference(br->mdb);
 	struct br_ip ip;
 
-	if (br->multicast_disabled)
+	if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
 		return NULL;
 
 	if (BR_INPUT_SKB_CB(skb)->igmp)
 		return NULL;
 
+	memset(&ip, 0, sizeof(ip));
 	ip.proto = skb->protocol;
 	ip.vid = vid;
 
@@ -180,87 +136,28 @@
 		return NULL;
 	}
 
-	return br_mdb_ip_get(mdb, &ip);
-}
-
-static void br_mdb_free(struct rcu_head *head)
-{
-	struct net_bridge_mdb_htable *mdb =
-		container_of(head, struct net_bridge_mdb_htable, rcu);
-	struct net_bridge_mdb_htable *old = mdb->old;
-
-	mdb->old = NULL;
-	kfree(old->mhash);
-	kfree(old);
-}
-
-static int br_mdb_copy(struct net_bridge_mdb_htable *new,
-		       struct net_bridge_mdb_htable *old,
-		       int elasticity)
-{
-	struct net_bridge_mdb_entry *mp;
-	int maxlen;
-	int len;
-	int i;
-
-	for (i = 0; i < old->max; i++)
-		hlist_for_each_entry(mp, &old->mhash[i], hlist[old->ver])
-			hlist_add_head(&mp->hlist[new->ver],
-				       &new->mhash[br_ip_hash(new, &mp->addr)]);
-
-	if (!elasticity)
-		return 0;
-
-	maxlen = 0;
-	for (i = 0; i < new->max; i++) {
-		len = 0;
-		hlist_for_each_entry(mp, &new->mhash[i], hlist[new->ver])
-			len++;
-		if (len > maxlen)
-			maxlen = len;
-	}
-
-	return maxlen > elasticity ? -EINVAL : 0;
-}
-
-void br_multicast_free_pg(struct rcu_head *head)
-{
-	struct net_bridge_port_group *p =
-		container_of(head, struct net_bridge_port_group, rcu);
-
-	kfree(p);
-}
-
-static void br_multicast_free_group(struct rcu_head *head)
-{
-	struct net_bridge_mdb_entry *mp =
-		container_of(head, struct net_bridge_mdb_entry, rcu);
-
-	kfree(mp);
+	return br_mdb_ip_get_rcu(br, &ip);
 }
 
 static void br_multicast_group_expired(struct timer_list *t)
 {
 	struct net_bridge_mdb_entry *mp = from_timer(mp, t, timer);
 	struct net_bridge *br = mp->br;
-	struct net_bridge_mdb_htable *mdb;
 
 	spin_lock(&br->multicast_lock);
 	if (!netif_running(br->dev) || timer_pending(&mp->timer))
 		goto out;
 
-	mp->host_joined = false;
-	br_mdb_notify(br->dev, NULL, &mp->addr, RTM_DELMDB, 0);
+	br_multicast_host_leave(mp, true);
 
 	if (mp->ports)
 		goto out;
 
-	mdb = mlock_dereference(br->mdb, br);
+	rhashtable_remove_fast(&br->mdb_hash_tbl, &mp->rhnode,
+			       br_mdb_rht_params);
+	hlist_del_rcu(&mp->mdb_node);
 
-	hlist_del_rcu(&mp->hlist[mdb->ver]);
-	mdb->size--;
-
-	call_rcu_bh(&mp->rcu, br_multicast_free_group);
+	kfree_rcu(mp, rcu);
 
 out:
 	spin_unlock(&br->multicast_lock);
@@ -269,14 +166,11 @@
 static void br_multicast_del_pg(struct net_bridge *br,
 				struct net_bridge_port_group *pg)
 {
-	struct net_bridge_mdb_htable *mdb;
 	struct net_bridge_mdb_entry *mp;
 	struct net_bridge_port_group *p;
 	struct net_bridge_port_group __rcu **pp;
 
-	mdb = mlock_dereference(br->mdb, br);
-
-	mp = br_mdb_ip_get(mdb, &pg->addr);
+	mp = br_mdb_ip_get(br, &pg->addr);
 	if (WARN_ON(!mp))
 		return;
 
@@ -291,7 +185,7 @@
 		del_timer(&p->timer);
 		br_mdb_notify(br->dev, p->port, &pg->addr, RTM_DELMDB,
 			      p->flags);
-		call_rcu_bh(&p->rcu, br_multicast_free_pg);
+		kfree_rcu(p, rcu);
 
 		if (!mp->ports && !mp->host_joined &&
 		    netif_running(br->dev))
@@ -319,53 +213,6 @@
 	spin_unlock(&br->multicast_lock);
 }
 
-static int br_mdb_rehash(struct net_bridge_mdb_htable __rcu **mdbp, int max,
-			 int elasticity)
-{
-	struct net_bridge_mdb_htable *old = rcu_dereference_protected(*mdbp, 1);
-	struct net_bridge_mdb_htable *mdb;
-	int err;
-
-	mdb = kmalloc(sizeof(*mdb), GFP_ATOMIC);
-	if (!mdb)
-		return -ENOMEM;
-
-	mdb->max = max;
-	mdb->old = old;
-
-	mdb->mhash = kcalloc(max, sizeof(*mdb->mhash), GFP_ATOMIC);
-	if (!mdb->mhash) {
-		kfree(mdb);
-		return -ENOMEM;
-	}
-
-	mdb->size = old ? old->size : 0;
-	mdb->ver = old ? old->ver ^ 1 : 0;
-
-	if (!old || elasticity)
-		get_random_bytes(&mdb->secret, sizeof(mdb->secret));
-	else
-		mdb->secret = old->secret;
-
-	if (!old)
-		goto out;
-
-	err = br_mdb_copy(mdb, old, elasticity);
-	if (err) {
-		kfree(mdb->mhash);
-		kfree(mdb);
-		return err;
-	}
-
-	br_mdb_rehash_seq++;
-	call_rcu_bh(&mdb->rcu, br_mdb_free);
-
-out:
-	rcu_assign_pointer(*mdbp, mdb);
-
-	return 0;
-}
-
 static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
 						    __be32 group,
 						    u8 *igmp_type)
@@ -411,7 +258,7 @@
 	iph->frag_off = htons(IP_DF);
 	iph->ttl = 1;
 	iph->protocol = IPPROTO_IGMP;
-	iph->saddr = br->multicast_query_use_ifaddr ?
+	iph->saddr = br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR) ?
 		     inet_select_addr(br->dev, 0, RT_SCOPE_LINK) : 0;
 	iph->daddr = htonl(INADDR_ALLHOSTS_GROUP);
 	((u8 *)&iph[1])[0] = IPOPT_RA;
@@ -503,11 +350,11 @@
 	if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
 			       &ip6h->saddr)) {
 		kfree_skb(skb);
-		br->has_ipv6_addr = 0;
+		br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, false);
 		return NULL;
 	}
 
-	br->has_ipv6_addr = 1;
+	br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true);
 	ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
 
 	hopopt = (u8 *)(ip6h + 1);
@@ -589,111 +436,19 @@
 	return NULL;
 }
 
-static struct net_bridge_mdb_entry *br_multicast_get_group(
-	struct net_bridge *br, struct net_bridge_port *port,
-	struct br_ip *group, int hash)
-{
-	struct net_bridge_mdb_htable *mdb;
-	struct net_bridge_mdb_entry *mp;
-	unsigned int count = 0;
-	unsigned int max;
-	int elasticity;
-	int err;
-
-	mdb = rcu_dereference_protected(br->mdb, 1);
-	hlist_for_each_entry(mp, &mdb->mhash[hash], hlist[mdb->ver]) {
-		count++;
-		if (unlikely(br_ip_equal(group, &mp->addr)))
-			return mp;
-	}
-
-	elasticity = 0;
-	max = mdb->max;
-
-	if (unlikely(count > br->hash_elasticity && count)) {
-		if (net_ratelimit())
-			br_info(br, "Multicast hash table "
-				"chain limit reached: %s\n",
-				port ? port->dev->name : br->dev->name);
-
-		elasticity = br->hash_elasticity;
-	}
-
-	if (mdb->size >= max) {
-		max *= 2;
-		if (unlikely(max > br->hash_max)) {
-			br_warn(br, "Multicast hash table maximum of %d "
-				"reached, disabling snooping: %s\n",
-				br->hash_max,
-				port ? port->dev->name : br->dev->name);
-			err = -E2BIG;
-disable:
-			br->multicast_disabled = 1;
-			goto err;
-		}
-	}
-
-	if (max > mdb->max || elasticity) {
-		if (mdb->old) {
-			if (net_ratelimit())
-				br_info(br, "Multicast hash table "
-					"on fire: %s\n",
-					port ? port->dev->name : br->dev->name);
-			err = -EEXIST;
-			goto err;
-		}
-
-		err = br_mdb_rehash(&br->mdb, max, elasticity);
-		if (err) {
-			br_warn(br, "Cannot rehash multicast "
-				"hash table, disabling snooping: %s, %d, %d\n",
-				port ? port->dev->name : br->dev->name,
-				mdb->size, err);
-			goto disable;
-		}
-
-		err = -EAGAIN;
-		goto err;
-	}
-
-	return NULL;
-
-err:
-	mp = ERR_PTR(err);
-	return mp;
-}
-
 struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
-						    struct net_bridge_port *p,
 						    struct br_ip *group)
 {
-	struct net_bridge_mdb_htable *mdb;
 	struct net_bridge_mdb_entry *mp;
-	int hash;
 	int err;
 
-	mdb = rcu_dereference_protected(br->mdb, 1);
-	if (!mdb) {
-		err = br_mdb_rehash(&br->mdb, BR_HASH_SIZE, 0);
-		if (err)
-			return ERR_PTR(err);
-		goto rehash;
-	}
+	mp = br_mdb_ip_get(br, group);
+	if (mp)
+		return mp;
 
-	hash = br_ip_hash(mdb, group);
-	mp = br_multicast_get_group(br, p, group, hash);
-	switch (PTR_ERR(mp)) {
-	case 0:
-		break;
-
-	case -EAGAIN:
-rehash:
-		mdb = rcu_dereference_protected(br->mdb, 1);
-		hash = br_ip_hash(mdb, group);
-		break;
-
-	default:
-		goto out;
+	if (atomic_read(&br->mdb_hash_tbl.nelems) >= br->hash_max) {
+		br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false);
+		return ERR_PTR(-E2BIG);
 	}
 
 	mp = kzalloc(sizeof(*mp), GFP_ATOMIC);
@@ -703,11 +458,15 @@
 	mp->br = br;
 	mp->addr = *group;
 	timer_setup(&mp->timer, br_multicast_group_expired, 0);
+	err = rhashtable_lookup_insert_fast(&br->mdb_hash_tbl, &mp->rhnode,
+					    br_mdb_rht_params);
+	if (err) {
+		kfree(mp);
+		mp = ERR_PTR(err);
+	} else {
+		hlist_add_head_rcu(&mp->mdb_node, &br->mdb_list);
+	}
 
-	hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]);
-	mdb->size++;
-
-out:
 	return mp;
 }
 
@@ -734,7 +493,7 @@
 	if (src)
 		memcpy(p->eth_addr, src, ETH_ALEN);
 	else
-		memset(p->eth_addr, 0xff, ETH_ALEN);
+		eth_broadcast_addr(p->eth_addr);
 
 	return p;
 }
@@ -752,6 +511,27 @@
 	return ether_addr_equal(src, p->eth_addr);
 }
 
+void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify)
+{
+	if (!mp->host_joined) {
+		mp->host_joined = true;
+		if (notify)
+			br_mdb_notify(mp->br->dev, NULL, &mp->addr,
+				      RTM_NEWMDB, 0);
+	}
+	mod_timer(&mp->timer, jiffies + mp->br->multicast_membership_interval);
+}
+
+void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify)
+{
+	if (!mp->host_joined)
+		return;
+
+	mp->host_joined = false;
+	if (notify)
+		br_mdb_notify(mp->br->dev, NULL, &mp->addr, RTM_DELMDB, 0);
+}
+
 static int br_multicast_add_group(struct net_bridge *br,
 				  struct net_bridge_port *port,
 				  struct br_ip *group,
@@ -768,17 +548,13 @@
 	    (port && port->state == BR_STATE_DISABLED))
 		goto out;
 
-	mp = br_multicast_new_group(br, port, group);
+	mp = br_multicast_new_group(br, group);
 	err = PTR_ERR(mp);
 	if (IS_ERR(mp))
 		goto err;
 
 	if (!port) {
-		if (!mp->host_joined) {
-			mp->host_joined = true;
-			br_mdb_notify(br->dev, NULL, &mp->addr, RTM_NEWMDB, 0);
-		}
-		mod_timer(&mp->timer, now + br->multicast_membership_interval);
+		br_multicast_host_join(mp, true);
 		goto out;
 	}
 
@@ -818,6 +594,7 @@
 	if (ipv4_is_local_multicast(group))
 		return 0;
 
+	memset(&br_group, 0, sizeof(br_group));
 	br_group.u.ip4 = group;
 	br_group.proto = htons(ETH_P_IP);
 	br_group.vid = vid;
@@ -837,6 +614,7 @@
 	if (ipv6_addr_is_ll_all_nodes(group))
 		return 0;
 
+	memset(&br_group, 0, sizeof(br_group));
 	br_group.u.ip6 = *group;
 	br_group.proto = htons(ETH_P_IPV6);
 	br_group.vid = vid;
@@ -894,7 +672,7 @@
 					 struct bridge_mcast_own_query *query)
 {
 	spin_lock(&br->multicast_lock);
-	if (!netif_running(br->dev) || br->multicast_disabled)
+	if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED))
 		goto out;
 
 	br_multicast_start_querier(br, query);
@@ -965,8 +743,9 @@
 	struct br_ip br_group;
 	unsigned long time;
 
-	if (!netif_running(br->dev) || br->multicast_disabled ||
-	    !br->multicast_querier)
+	if (!netif_running(br->dev) ||
+	    !br_opt_get(br, BROPT_MULTICAST_ENABLED) ||
+	    !br_opt_get(br, BROPT_MULTICAST_QUERIER))
 		return;
 
 	memset(&br_group.u, 0, sizeof(br_group.u));
@@ -1036,7 +815,7 @@
 		.orig_dev = dev,
 		.id = SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED,
 		.flags = SWITCHDEV_F_DEFER,
-		.u.mc_disabled = value,
+		.u.mc_disabled = !value,
 	};
 
 	switchdev_port_attr_set(dev, &attr);
@@ -1054,7 +833,8 @@
 	timer_setup(&port->ip6_own_query.timer,
 		    br_ip6_multicast_port_query_expired, 0);
 #endif
-	br_mc_disabled_update(port->dev, port->br->multicast_disabled);
+	br_mc_disabled_update(port->dev,
+			      br_opt_get(port->br, BROPT_MULTICAST_ENABLED));
 
 	port->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats);
 	if (!port->mcast_stats)
@@ -1091,7 +871,7 @@
 {
 	struct net_bridge *br = port->br;
 
-	if (br->multicast_disabled || !netif_running(br->dev))
+	if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) || !netif_running(br->dev))
 		return;
 
 	br_multicast_enable(&port->ip4_own_query);
@@ -1147,6 +927,7 @@
 	int type;
 	int err = 0;
 	__be32 group;
+	u16 nsrcs;
 
 	ih = igmpv3_report_hdr(skb);
 	num = ntohs(ih->ngrec);
@@ -1154,15 +935,16 @@
 
 	for (i = 0; i < num; i++) {
 		len += sizeof(*grec);
-		if (!pskb_may_pull(skb, len))
+		if (!ip_mc_may_pull(skb, len))
 			return -EINVAL;
 
 		grec = (void *)(skb->data + len - sizeof(*grec));
 		group = grec->grec_mca;
 		type = grec->grec_type;
+		nsrcs = ntohs(grec->grec_nsrcs);
 
-		len += ntohs(grec->grec_nsrcs) * 4;
-		if (!pskb_may_pull(skb, len))
+		len += nsrcs * 4;
+		if (!ip_mc_may_pull(skb, len))
 			return -EINVAL;
 
 		/* We treat this as an IGMPv2 report for now. */
@@ -1182,7 +964,7 @@
 		src = eth_hdr(skb)->h_source;
 		if ((type == IGMPV3_CHANGE_TO_INCLUDE ||
 		     type == IGMPV3_MODE_IS_INCLUDE) &&
-		    ntohs(grec->grec_nsrcs) == 0) {
+		    nsrcs == 0) {
 			br_ip4_multicast_leave_group(br, port, group, vid, src);
 		} else {
 			err = br_ip4_multicast_add_group(br, port, group, vid,
@@ -1201,15 +983,17 @@
 					struct sk_buff *skb,
 					u16 vid)
 {
+	unsigned int nsrcs_offset;
 	const unsigned char *src;
 	struct icmp6hdr *icmp6h;
 	struct mld2_grec *grec;
+	unsigned int grec_len;
 	int i;
 	int len;
 	int num;
 	int err = 0;
 
-	if (!pskb_may_pull(skb, sizeof(*icmp6h)))
+	if (!ipv6_mc_may_pull(skb, sizeof(*icmp6h)))
 		return -EINVAL;
 
 	icmp6h = icmp6_hdr(skb);
@@ -1217,23 +1001,28 @@
 	len = skb_transport_offset(skb) + sizeof(*icmp6h);
 
 	for (i = 0; i < num; i++) {
-		__be16 *nsrcs, _nsrcs;
+		__be16 *_nsrcs, __nsrcs;
+		u16 nsrcs;
 
-		nsrcs = skb_header_pointer(skb,
-					   len + offsetof(struct mld2_grec,
-							  grec_nsrcs),
-					   sizeof(_nsrcs), &_nsrcs);
-		if (!nsrcs)
+		nsrcs_offset = len + offsetof(struct mld2_grec, grec_nsrcs);
+
+		if (skb_transport_offset(skb) + ipv6_transport_len(skb) <
+		    nsrcs_offset + sizeof(_nsrcs))
 			return -EINVAL;
 
-		if (!pskb_may_pull(skb,
-				   len + sizeof(*grec) +
-				   sizeof(struct in6_addr) * ntohs(*nsrcs)))
+		_nsrcs = skb_header_pointer(skb, nsrcs_offset,
+					    sizeof(__nsrcs), &__nsrcs);
+		if (!_nsrcs)
+			return -EINVAL;
+
+		nsrcs = ntohs(*_nsrcs);
+		grec_len = struct_size(grec, grec_src, nsrcs);
+
+		if (!ipv6_mc_may_pull(skb, len + grec_len))
 			return -EINVAL;
 
 		grec = (struct mld2_grec *)(skb->data + len);
-		len += sizeof(*grec) +
-		       sizeof(struct in6_addr) * ntohs(*nsrcs);
+		len += grec_len;
 
 		/* We treat these as MLDv1 reports for now. */
 		switch (grec->grec_type) {
@@ -1252,7 +1041,7 @@
 		src = eth_hdr(skb)->h_source;
 		if ((grec->grec_type == MLD2_CHANGE_TO_INCLUDE ||
 		     grec->grec_type == MLD2_MODE_IS_INCLUDE) &&
-		    ntohs(*nsrcs) == 0) {
+		    nsrcs == 0) {
 			br_ip6_multicast_leave_group(br, port, &grec->grec_mca,
 						     vid, src);
 		} else {
@@ -1420,14 +1209,7 @@
 		return;
 
 	br_multicast_update_query_timer(br, query, max_delay);
-
-	/* Based on RFC4541, section 2.1.1 IGMP Forwarding Rules,
-	 * the arrival port for IGMP Queries where the source address
-	 * is 0.0.0.0 should not be added to router port list.
-	 */
-	if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) ||
-	    saddr->proto == htons(ETH_P_IPV6))
-		br_multicast_mark_router(br, port);
+	br_multicast_mark_router(br, port);
 }
 
 static void br_ip4_multicast_query(struct net_bridge *br,
@@ -1435,6 +1217,7 @@
 				   struct sk_buff *skb,
 				   u16 vid)
 {
+	unsigned int transport_len = ip_transport_len(skb);
 	const struct iphdr *iph = ip_hdr(skb);
 	struct igmphdr *ih = igmp_hdr(skb);
 	struct net_bridge_mdb_entry *mp;
@@ -1444,7 +1227,6 @@
 	struct br_ip saddr;
 	unsigned long max_delay;
 	unsigned long now = jiffies;
-	unsigned int offset = skb_transport_offset(skb);
 	__be32 group;
 
 	spin_lock(&br->multicast_lock);
@@ -1454,14 +1236,14 @@
 
 	group = ih->group;
 
-	if (skb->len == offset + sizeof(*ih)) {
+	if (transport_len == sizeof(*ih)) {
 		max_delay = ih->code * (HZ / IGMP_TIMER_SCALE);
 
 		if (!max_delay) {
 			max_delay = 10 * HZ;
 			group = 0;
 		}
-	} else if (skb->len >= offset + sizeof(*ih3)) {
+	} else if (transport_len >= sizeof(*ih3)) {
 		ih3 = igmpv3_query_hdr(skb);
 		if (ih3->nsrcs)
 			goto out;
@@ -1481,7 +1263,7 @@
 		goto out;
 	}
 
-	mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group, vid);
+	mp = br_mdb_ip4_get(br, group, vid);
 	if (!mp)
 		goto out;
 
@@ -1512,7 +1294,7 @@
 				  struct sk_buff *skb,
 				  u16 vid)
 {
-	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	unsigned int transport_len = ipv6_transport_len(skb);
 	struct mld_msg *mld;
 	struct net_bridge_mdb_entry *mp;
 	struct mld2_query *mld2q;
@@ -1531,7 +1313,7 @@
 	    (port && port->state == BR_STATE_DISABLED))
 		goto out;
 
-	if (skb->len == offset + sizeof(*mld)) {
+	if (transport_len == sizeof(*mld)) {
 		if (!pskb_may_pull(skb, offset + sizeof(*mld))) {
 			err = -EINVAL;
 			goto out;
@@ -1556,7 +1338,7 @@
 
 	if (is_general_query) {
 		saddr.proto = htons(ETH_P_IPV6);
-		saddr.u.ip6 = ip6h->saddr;
+		saddr.u.ip6 = ipv6_hdr(skb)->saddr;
 
 		br_multicast_query_received(br, port, &br->ip6_other_query,
 					    &saddr, max_delay);
@@ -1565,7 +1347,7 @@
 		goto out;
 	}
 
-	mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group, vid);
+	mp = br_mdb_ip6_get(br, group, vid);
 	if (!mp)
 		goto out;
 
@@ -1599,7 +1381,6 @@
 			 struct bridge_mcast_own_query *own_query,
 			 const unsigned char *src)
 {
-	struct net_bridge_mdb_htable *mdb;
 	struct net_bridge_mdb_entry *mp;
 	struct net_bridge_port_group *p;
 	unsigned long now;
@@ -1610,8 +1391,7 @@
 	    (port && port->state == BR_STATE_DISABLED))
 		goto out;
 
-	mdb = mlock_dereference(br->mdb, br);
-	mp = br_mdb_ip_get(mdb, group);
+	mp = br_mdb_ip_get(br, group);
 	if (!mp)
 		goto out;
 
@@ -1624,12 +1404,15 @@
 			if (!br_port_group_equal(p, port, src))
 				continue;
 
+			if (p->flags & MDB_PG_FLAGS_PERMANENT)
+				break;
+
 			rcu_assign_pointer(*pp, p->next);
 			hlist_del_init(&p->mglist);
 			del_timer(&p->timer);
-			call_rcu_bh(&p->rcu, br_multicast_free_pg);
+			kfree_rcu(p, rcu);
 			br_mdb_notify(br->dev, port, group, RTM_DELMDB,
-				      p->flags);
+				      p->flags | MDB_PG_FLAGS_FAST_LEAVE);
 
 			if (!mp->ports && !mp->host_joined &&
 			    netif_running(br->dev))
@@ -1641,7 +1424,7 @@
 	if (timer_pending(&other_query->timer))
 		goto out;
 
-	if (br->multicast_querier) {
+	if (br_opt_get(br, BROPT_MULTICAST_QUERIER)) {
 		__br_multicast_send_query(br, port, &mp->addr);
 
 		time = jiffies + br->multicast_last_member_count *
@@ -1714,6 +1497,7 @@
 
 	own_query = port ? &port->ip4_own_query : &br->ip4_own_query;
 
+	memset(&br_group, 0, sizeof(br_group));
 	br_group.u.ip4 = group;
 	br_group.proto = htons(ETH_P_IP);
 	br_group.vid = vid;
@@ -1737,6 +1521,7 @@
 
 	own_query = port ? &port->ip6_own_query : &br->ip6_own_query;
 
+	memset(&br_group, 0, sizeof(br_group));
 	br_group.u.ip6 = *group;
 	br_group.proto = htons(ETH_P_IPV6);
 	br_group.vid = vid;
@@ -1753,7 +1538,7 @@
 	struct bridge_mcast_stats __percpu *stats;
 	struct bridge_mcast_stats *pstats;
 
-	if (!br->multicast_stats_enabled)
+	if (!br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED))
 		return;
 
 	if (p)
@@ -1794,17 +1579,29 @@
 	br_multicast_mark_router(br, port);
 }
 
+static int br_ip4_multicast_mrd_rcv(struct net_bridge *br,
+				    struct net_bridge_port *port,
+				    struct sk_buff *skb)
+{
+	if (ip_hdr(skb)->protocol != IPPROTO_IGMP ||
+	    igmp_hdr(skb)->type != IGMP_MRDISC_ADV)
+		return -ENOMSG;
+
+	br_multicast_mark_router(br, port);
+
+	return 0;
+}
+
 static int br_multicast_ipv4_rcv(struct net_bridge *br,
 				 struct net_bridge_port *port,
 				 struct sk_buff *skb,
 				 u16 vid)
 {
-	struct sk_buff *skb_trimmed = NULL;
 	const unsigned char *src;
 	struct igmphdr *ih;
 	int err;
 
-	err = ip_mc_check_igmp(skb, &skb_trimmed);
+	err = ip_mc_check_igmp(skb);
 
 	if (err == -ENOMSG) {
 		if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr)) {
@@ -1812,7 +1609,10 @@
 		} else if (pim_ipv4_all_pim_routers(ip_hdr(skb)->daddr)) {
 			if (ip_hdr(skb)->protocol == IPPROTO_PIM)
 				br_multicast_pim(br, port, skb);
+		} else if (ipv4_is_all_snoopers(ip_hdr(skb)->daddr)) {
+			br_ip4_multicast_mrd_rcv(br, port, skb);
 		}
+
 		return 0;
 	} else if (err < 0) {
 		br_multicast_err_count(br, port, skb->protocol);
@@ -1830,19 +1630,16 @@
 		err = br_ip4_multicast_add_group(br, port, ih->group, vid, src);
 		break;
 	case IGMPV3_HOST_MEMBERSHIP_REPORT:
-		err = br_ip4_multicast_igmp3_report(br, port, skb_trimmed, vid);
+		err = br_ip4_multicast_igmp3_report(br, port, skb, vid);
 		break;
 	case IGMP_HOST_MEMBERSHIP_QUERY:
-		br_ip4_multicast_query(br, port, skb_trimmed, vid);
+		br_ip4_multicast_query(br, port, skb, vid);
 		break;
 	case IGMP_HOST_LEAVE_MESSAGE:
 		br_ip4_multicast_leave_group(br, port, ih->group, vid, src);
 		break;
 	}
 
-	if (skb_trimmed && skb_trimmed != skb)
-		kfree_skb(skb_trimmed);
-
 	br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
 			   BR_MCAST_DIR_RX);
 
@@ -1850,21 +1647,51 @@
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
+static int br_ip6_multicast_mrd_rcv(struct net_bridge *br,
+				    struct net_bridge_port *port,
+				    struct sk_buff *skb)
+{
+	int ret;
+
+	if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6)
+		return -ENOMSG;
+
+	ret = ipv6_mc_check_icmpv6(skb);
+	if (ret < 0)
+		return ret;
+
+	if (icmp6_hdr(skb)->icmp6_type != ICMPV6_MRDISC_ADV)
+		return -ENOMSG;
+
+	br_multicast_mark_router(br, port);
+
+	return 0;
+}
+
 static int br_multicast_ipv6_rcv(struct net_bridge *br,
 				 struct net_bridge_port *port,
 				 struct sk_buff *skb,
 				 u16 vid)
 {
-	struct sk_buff *skb_trimmed = NULL;
 	const unsigned char *src;
 	struct mld_msg *mld;
 	int err;
 
-	err = ipv6_mc_check_mld(skb, &skb_trimmed);
+	err = ipv6_mc_check_mld(skb);
 
 	if (err == -ENOMSG) {
 		if (!ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr))
 			BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
+
+		if (ipv6_addr_is_all_snoopers(&ipv6_hdr(skb)->daddr)) {
+			err = br_ip6_multicast_mrd_rcv(br, port, skb);
+
+			if (err < 0 && err != -ENOMSG) {
+				br_multicast_err_count(br, port, skb->protocol);
+				return err;
+			}
+		}
+
 		return 0;
 	} else if (err < 0) {
 		br_multicast_err_count(br, port, skb->protocol);
@@ -1882,10 +1709,10 @@
 						 src);
 		break;
 	case ICMPV6_MLD2_REPORT:
-		err = br_ip6_multicast_mld2_report(br, port, skb_trimmed, vid);
+		err = br_ip6_multicast_mld2_report(br, port, skb, vid);
 		break;
 	case ICMPV6_MGM_QUERY:
-		err = br_ip6_multicast_query(br, port, skb_trimmed, vid);
+		err = br_ip6_multicast_query(br, port, skb, vid);
 		break;
 	case ICMPV6_MGM_REDUCTION:
 		src = eth_hdr(skb)->h_source;
@@ -1893,9 +1720,6 @@
 		break;
 	}
 
-	if (skb_trimmed && skb_trimmed != skb)
-		kfree_skb(skb_trimmed);
-
 	br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
 			   BR_MCAST_DIR_RX);
 
@@ -1911,7 +1735,7 @@
 	BR_INPUT_SKB_CB(skb)->igmp = 0;
 	BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
 
-	if (br->multicast_disabled)
+	if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
 		return 0;
 
 	switch (skb->protocol) {
@@ -1959,12 +1783,9 @@
 
 void br_multicast_init(struct net_bridge *br)
 {
-	br->hash_elasticity = 4;
-	br->hash_max = 512;
+	br->hash_max = BR_MULTICAST_DEFAULT_HASH_MAX;
 
 	br->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
-	br->multicast_querier = 0;
-	br->multicast_query_use_ifaddr = 0;
 	br->multicast_last_member_count = 2;
 	br->multicast_startup_query_count = 2;
 
@@ -1983,7 +1804,8 @@
 	br->ip6_other_query.delay_time = 0;
 	br->ip6_querier.port = NULL;
 #endif
-	br->has_ipv6_addr = 1;
+	br_opt_toggle(br, BROPT_MULTICAST_ENABLED, true);
+	br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true);
 
 	spin_lock_init(&br->multicast_lock);
 	timer_setup(&br->multicast_router_timer,
@@ -1998,6 +1820,69 @@
 	timer_setup(&br->ip6_own_query.timer,
 		    br_ip6_multicast_query_expired, 0);
 #endif
+	INIT_HLIST_HEAD(&br->mdb_list);
+}
+
+static void br_ip4_multicast_join_snoopers(struct net_bridge *br)
+{
+	struct in_device *in_dev = in_dev_get(br->dev);
+
+	if (!in_dev)
+		return;
+
+	__ip_mc_inc_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP), GFP_ATOMIC);
+	in_dev_put(in_dev);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void br_ip6_multicast_join_snoopers(struct net_bridge *br)
+{
+	struct in6_addr addr;
+
+	ipv6_addr_set(&addr, htonl(0xff020000), 0, 0, htonl(0x6a));
+	ipv6_dev_mc_inc(br->dev, &addr);
+}
+#else
+static inline void br_ip6_multicast_join_snoopers(struct net_bridge *br)
+{
+}
+#endif
+
+static void br_multicast_join_snoopers(struct net_bridge *br)
+{
+	br_ip4_multicast_join_snoopers(br);
+	br_ip6_multicast_join_snoopers(br);
+}
+
+static void br_ip4_multicast_leave_snoopers(struct net_bridge *br)
+{
+	struct in_device *in_dev = in_dev_get(br->dev);
+
+	if (WARN_ON(!in_dev))
+		return;
+
+	__ip_mc_dec_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP), GFP_ATOMIC);
+	in_dev_put(in_dev);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void br_ip6_multicast_leave_snoopers(struct net_bridge *br)
+{
+	struct in6_addr addr;
+
+	ipv6_addr_set(&addr, htonl(0xff020000), 0, 0, htonl(0x6a));
+	ipv6_dev_mc_dec(br->dev, &addr);
+}
+#else
+static inline void br_ip6_multicast_leave_snoopers(struct net_bridge *br)
+{
+}
+#endif
+
+static void br_multicast_leave_snoopers(struct net_bridge *br)
+{
+	br_ip4_multicast_leave_snoopers(br);
+	br_ip6_multicast_leave_snoopers(br);
 }
 
 static void __br_multicast_open(struct net_bridge *br,
@@ -2005,7 +1890,7 @@
 {
 	query->startup_sent = 0;
 
-	if (br->multicast_disabled)
+	if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
 		return;
 
 	mod_timer(&query->timer, jiffies);
@@ -2013,6 +1898,9 @@
 
 void br_multicast_open(struct net_bridge *br)
 {
+	if (br_opt_get(br, BROPT_MULTICAST_ENABLED))
+		br_multicast_join_snoopers(br);
+
 	__br_multicast_open(br, &br->ip4_own_query);
 #if IS_ENABLED(CONFIG_IPV6)
 	__br_multicast_open(br, &br->ip6_own_query);
@@ -2028,44 +1916,27 @@
 	del_timer_sync(&br->ip6_other_query.timer);
 	del_timer_sync(&br->ip6_own_query.timer);
 #endif
+
+	if (br_opt_get(br, BROPT_MULTICAST_ENABLED))
+		br_multicast_leave_snoopers(br);
 }
 
 void br_multicast_dev_del(struct net_bridge *br)
 {
-	struct net_bridge_mdb_htable *mdb;
 	struct net_bridge_mdb_entry *mp;
-	struct hlist_node *n;
-	u32 ver;
-	int i;
+	struct hlist_node *tmp;
 
 	spin_lock_bh(&br->multicast_lock);
-	mdb = mlock_dereference(br->mdb, br);
-	if (!mdb)
-		goto out;
-
-	br->mdb = NULL;
-
-	ver = mdb->ver;
-	for (i = 0; i < mdb->max; i++) {
-		hlist_for_each_entry_safe(mp, n, &mdb->mhash[i],
-					  hlist[ver]) {
-			del_timer(&mp->timer);
-			call_rcu_bh(&mp->rcu, br_multicast_free_group);
-		}
+	hlist_for_each_entry_safe(mp, tmp, &br->mdb_list, mdb_node) {
+		del_timer(&mp->timer);
+		rhashtable_remove_fast(&br->mdb_hash_tbl, &mp->rhnode,
+				       br_mdb_rht_params);
+		hlist_del_rcu(&mp->mdb_node);
+		kfree_rcu(mp, rcu);
 	}
-
-	if (mdb->old) {
-		spin_unlock_bh(&br->multicast_lock);
-		rcu_barrier_bh();
-		spin_lock_bh(&br->multicast_lock);
-		WARN_ON(mdb->old);
-	}
-
-	mdb->old = mdb;
-	call_rcu_bh(&mdb->rcu, br_mdb_free);
-
-out:
 	spin_unlock_bh(&br->multicast_lock);
+
+	rcu_barrier();
 }
 
 int br_multicast_set_router(struct net_bridge *br, unsigned long val)
@@ -2159,7 +2030,8 @@
 
 	__br_multicast_open(br, query);
 
-	list_for_each_entry(port, &br->port_list, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(port, &br->port_list, list) {
 		if (port->state == BR_STATE_DISABLED ||
 		    port->state == BR_STATE_BLOCKING)
 			continue;
@@ -2171,41 +2043,27 @@
 			br_multicast_enable(&port->ip6_own_query);
 #endif
 	}
+	rcu_read_unlock();
 }
 
 int br_multicast_toggle(struct net_bridge *br, unsigned long val)
 {
-	struct net_bridge_mdb_htable *mdb;
 	struct net_bridge_port *port;
-	int err = 0;
 
 	spin_lock_bh(&br->multicast_lock);
-	if (br->multicast_disabled == !val)
+	if (!!br_opt_get(br, BROPT_MULTICAST_ENABLED) == !!val)
 		goto unlock;
 
-	br_mc_disabled_update(br->dev, !val);
-	br->multicast_disabled = !val;
-	if (br->multicast_disabled)
+	br_mc_disabled_update(br->dev, val);
+	br_opt_toggle(br, BROPT_MULTICAST_ENABLED, !!val);
+	if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) {
+		br_multicast_leave_snoopers(br);
 		goto unlock;
+	}
 
 	if (!netif_running(br->dev))
 		goto unlock;
 
-	mdb = mlock_dereference(br->mdb, br);
-	if (mdb) {
-		if (mdb->old) {
-			err = -EEXIST;
-rollback:
-			br->multicast_disabled = !!val;
-			goto unlock;
-		}
-
-		err = br_mdb_rehash(&br->mdb, mdb->max,
-				    br->hash_elasticity);
-		if (err)
-			goto rollback;
-	}
-
 	br_multicast_open(br);
 	list_for_each_entry(port, &br->port_list, list)
 		__br_multicast_enable_port(port);
@@ -2213,14 +2071,14 @@
 unlock:
 	spin_unlock_bh(&br->multicast_lock);
 
-	return err;
+	return 0;
 }
 
 bool br_multicast_enabled(const struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
 
-	return !br->multicast_disabled;
+	return !!br_opt_get(br, BROPT_MULTICAST_ENABLED);
 }
 EXPORT_SYMBOL_GPL(br_multicast_enabled);
 
@@ -2243,10 +2101,10 @@
 	val = !!val;
 
 	spin_lock_bh(&br->multicast_lock);
-	if (br->multicast_querier == val)
+	if (br_opt_get(br, BROPT_MULTICAST_QUERIER) == val)
 		goto unlock;
 
-	br->multicast_querier = val;
+	br_opt_toggle(br, BROPT_MULTICAST_QUERIER, !!val);
 	if (!val)
 		goto unlock;
 
@@ -2270,45 +2128,6 @@
 	return 0;
 }
 
-int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val)
-{
-	int err = -EINVAL;
-	u32 old;
-	struct net_bridge_mdb_htable *mdb;
-
-	spin_lock_bh(&br->multicast_lock);
-	if (!is_power_of_2(val))
-		goto unlock;
-
-	mdb = mlock_dereference(br->mdb, br);
-	if (mdb && val < mdb->size)
-		goto unlock;
-
-	err = 0;
-
-	old = br->hash_max;
-	br->hash_max = val;
-
-	if (mdb) {
-		if (mdb->old) {
-			err = -EEXIST;
-rollback:
-			br->hash_max = old;
-			goto unlock;
-		}
-
-		err = br_mdb_rehash(&br->mdb, br->hash_max,
-				    br->hash_elasticity);
-		if (err)
-			goto rollback;
-	}
-
-unlock:
-	spin_unlock_bh(&br->multicast_lock);
-
-	return err;
-}
-
 int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val)
 {
 	/* Currently we support only version 2 and 3 */
@@ -2374,7 +2193,7 @@
 	int count = 0;
 
 	rcu_read_lock();
-	if (!br_ip_list || !br_port_exists(dev))
+	if (!br_ip_list || !netif_is_bridge_port(dev))
 		goto unlock;
 
 	port = br_port_get_rcu(dev);
@@ -2421,7 +2240,7 @@
 	bool ret = false;
 
 	rcu_read_lock();
-	if (!br_port_exists(dev))
+	if (!netif_is_bridge_port(dev))
 		goto unlock;
 
 	port = br_port_get_rcu(dev);
@@ -2457,7 +2276,7 @@
 	bool ret = false;
 
 	rcu_read_lock();
-	if (!br_port_exists(dev))
+	if (!netif_is_bridge_port(dev))
 		goto unlock;
 
 	port = br_port_get_rcu(dev);
@@ -2567,7 +2386,7 @@
 	struct bridge_mcast_stats __percpu *stats;
 
 	/* if multicast_disabled is true then igmp type can't be set */
-	if (!type || !br->multicast_stats_enabled)
+	if (!type || !br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED))
 		return;
 
 	if (p)
@@ -2645,3 +2464,13 @@
 	}
 	memcpy(dest, &tdst, sizeof(*dest));
 }
+
+int br_mdb_hash_init(struct net_bridge *br)
+{
+	return rhashtable_init(&br->mdb_hash_tbl, &br_mdb_rht_params);
+}
+
+void br_mdb_hash_fini(struct net_bridge *br)
+{
+	rhashtable_destroy(&br->mdb_hash_tbl);
+}

diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 37278dc..af78001 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Handle firewalling
  *	Linux ethernet bridge
@@ -6,11 +7,6 @@
  *	Lennert Buytenhek		<buytenh@gnu.org>
  *	Bart De Schuymer		<bdschuym@pandora.be>
  *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
  *	Lennert dedicates this file to Kerstin Wurdinger.
  */
 
@@ -51,25 +47,22 @@
 
 struct brnf_net {
 	bool enabled;
-};
 
 #ifdef CONFIG_SYSCTL
-static struct ctl_table_header *brnf_sysctl_header;
-static int brnf_call_iptables __read_mostly = 1;
-static int brnf_call_ip6tables __read_mostly = 1;
-static int brnf_call_arptables __read_mostly = 1;
-static int brnf_filter_vlan_tagged __read_mostly;
-static int brnf_filter_pppoe_tagged __read_mostly;
-static int brnf_pass_vlan_indev __read_mostly;
-#else
-#define brnf_call_iptables 1
-#define brnf_call_ip6tables 1
-#define brnf_call_arptables 1
-#define brnf_filter_vlan_tagged 0
-#define brnf_filter_pppoe_tagged 0
-#define brnf_pass_vlan_indev 0
+	struct ctl_table_header *ctl_hdr;
 #endif
 
+	/* default value is 1 */
+	int call_iptables;
+	int call_ip6tables;
+	int call_arptables;
+
+	/* default value is 0 */
+	int filter_vlan_tagged;
+	int filter_pppoe_tagged;
+	int pass_vlan_indev;
+};
+
 #define IS_IP(skb) \
 	(!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IP))
 
@@ -89,17 +82,28 @@
 		return 0;
 }
 
-#define IS_VLAN_IP(skb) \
-	(vlan_proto(skb) == htons(ETH_P_IP) && \
-	 brnf_filter_vlan_tagged)
+static inline bool is_vlan_ip(const struct sk_buff *skb, const struct net *net)
+{
+	struct brnf_net *brnet = net_generic(net, brnf_net_id);
 
-#define IS_VLAN_IPV6(skb) \
-	(vlan_proto(skb) == htons(ETH_P_IPV6) && \
-	 brnf_filter_vlan_tagged)
+	return vlan_proto(skb) == htons(ETH_P_IP) && brnet->filter_vlan_tagged;
+}
 
-#define IS_VLAN_ARP(skb) \
-	(vlan_proto(skb) == htons(ETH_P_ARP) &&	\
-	 brnf_filter_vlan_tagged)
+static inline bool is_vlan_ipv6(const struct sk_buff *skb,
+				const struct net *net)
+{
+	struct brnf_net *brnet = net_generic(net, brnf_net_id);
+
+	return vlan_proto(skb) == htons(ETH_P_IPV6) &&
+	       brnet->filter_vlan_tagged;
+}
+
+static inline bool is_vlan_arp(const struct sk_buff *skb, const struct net *net)
+{
+	struct brnf_net *brnet = net_generic(net, brnf_net_id);
+
+	return vlan_proto(skb) == htons(ETH_P_ARP) && brnet->filter_vlan_tagged;
+}
 
 static inline __be16 pppoe_proto(const struct sk_buff *skb)
 {
@@ -107,15 +111,23 @@
 			    sizeof(struct pppoe_hdr)));
 }
 
-#define IS_PPPOE_IP(skb) \
-	(skb->protocol == htons(ETH_P_PPP_SES) && \
-	 pppoe_proto(skb) == htons(PPP_IP) && \
-	 brnf_filter_pppoe_tagged)
+static inline bool is_pppoe_ip(const struct sk_buff *skb, const struct net *net)
+{
+	struct brnf_net *brnet = net_generic(net, brnf_net_id);
 
-#define IS_PPPOE_IPV6(skb) \
-	(skb->protocol == htons(ETH_P_PPP_SES) && \
-	 pppoe_proto(skb) == htons(PPP_IPV6) && \
-	 brnf_filter_pppoe_tagged)
+	return skb->protocol == htons(ETH_P_PPP_SES) &&
+	       pppoe_proto(skb) == htons(PPP_IP) && brnet->filter_pppoe_tagged;
+}
+
+static inline bool is_pppoe_ipv6(const struct sk_buff *skb,
+				 const struct net *net)
+{
+	struct brnf_net *brnet = net_generic(net, brnf_net_id);
+
+	return skb->protocol == htons(ETH_P_PPP_SES) &&
+	       pppoe_proto(skb) == htons(PPP_IPV6) &&
+	       brnet->filter_pppoe_tagged;
+}
 
 /* largest possible L2 header, see br_nf_dev_queue_xmit() */
 #define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN)
@@ -132,10 +144,7 @@
 
 static void nf_bridge_info_free(struct sk_buff *skb)
 {
-	if (skb->nf_bridge) {
-		nf_bridge_put(skb->nf_bridge);
-		skb->nf_bridge = NULL;
-	}
+	skb_ext_del(skb, SKB_EXT_BRIDGE_NF);
 }
 
 static inline struct net_device *bridge_parent(const struct net_device *dev)
@@ -148,19 +157,7 @@
 
 static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb)
 {
-	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
-
-	if (refcount_read(&nf_bridge->use) > 1) {
-		struct nf_bridge_info *tmp = nf_bridge_alloc(skb);
-
-		if (tmp) {
-			memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info));
-			refcount_set(&tmp->use, 1);
-		}
-		nf_bridge_put(nf_bridge);
-		nf_bridge = tmp;
-	}
-	return nf_bridge;
+	return skb_ext_add(skb, SKB_EXT_BRIDGE_NF);
 }
 
 unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
@@ -247,7 +244,9 @@
 
 void nf_bridge_update_protocol(struct sk_buff *skb)
 {
-	switch (skb->nf_bridge->orig_proto) {
+	const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+
+	switch (nf_bridge->orig_proto) {
 	case BRNF_PROTO_8021Q:
 		skb->protocol = htons(ETH_P_8021Q);
 		break;
@@ -278,7 +277,7 @@
 		struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
 		int ret;
 
-		if (neigh->hh.hh_len) {
+		if ((neigh->nud_state & NUD_CONNECTED) && neigh->hh.hh_len) {
 			neigh_hh_bridge(&neigh->hh, skb);
 			skb->dev = nf_bridge->physindev;
 			ret = br_handle_frame_finish(net, sk, skb);
@@ -425,12 +424,16 @@
 	return 0;
 }
 
-static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct net_device *dev)
+static struct net_device *brnf_get_logical_dev(struct sk_buff *skb,
+					       const struct net_device *dev,
+					       const struct net *net)
 {
 	struct net_device *vlan, *br;
+	struct brnf_net *brnet = net_generic(net, brnf_net_id);
 
 	br = bridge_parent(dev);
-	if (brnf_pass_vlan_indev == 0 || !skb_vlan_tag_present(skb))
+
+	if (brnet->pass_vlan_indev == 0 || !skb_vlan_tag_present(skb))
 		return br;
 
 	vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto,
@@ -440,7 +443,7 @@
 }
 
 /* Some common code for IPv4/IPv6 */
-struct net_device *setup_pre_routing(struct sk_buff *skb)
+struct net_device *setup_pre_routing(struct sk_buff *skb, const struct net *net)
 {
 	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
 
@@ -451,7 +454,7 @@
 
 	nf_bridge->in_prerouting = 1;
 	nf_bridge->physindev = skb->dev;
-	skb->dev = brnf_get_logical_dev(skb, skb->dev);
+	skb->dev = brnf_get_logical_dev(skb, skb->dev, net);
 
 	if (skb->protocol == htons(ETH_P_8021Q))
 		nf_bridge->orig_proto = BRNF_PROTO_8021Q;
@@ -477,6 +480,7 @@
 	struct net_bridge_port *p;
 	struct net_bridge *br;
 	__u32 len = nf_bridge_encap_header_len(skb);
+	struct brnf_net *brnet;
 
 	if (unlikely(!pskb_may_pull(skb, len)))
 		return NF_DROP;
@@ -486,18 +490,26 @@
 		return NF_DROP;
 	br = p->br;
 
-	if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) {
-		if (!brnf_call_ip6tables && !br->nf_call_ip6tables)
+	brnet = net_generic(state->net, brnf_net_id);
+	if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) ||
+	    is_pppoe_ipv6(skb, state->net)) {
+		if (!brnet->call_ip6tables &&
+		    !br_opt_get(br, BROPT_NF_CALL_IP6TABLES))
 			return NF_ACCEPT;
+		if (!ipv6_mod_enabled()) {
+			pr_warn_once("Module ipv6 is disabled, so call_ip6tables is not supported.");
+			return NF_DROP;
+		}
 
 		nf_bridge_pull_encap_header_rcsum(skb);
 		return br_nf_pre_routing_ipv6(priv, skb, state);
 	}
 
-	if (!brnf_call_iptables && !br->nf_call_iptables)
+	if (!brnet->call_iptables && !br_opt_get(br, BROPT_NF_CALL_IPTABLES))
 		return NF_ACCEPT;
 
-	if (!IS_IP(skb) && !IS_VLAN_IP(skb) && !IS_PPPOE_IP(skb))
+	if (!IS_IP(skb) && !is_vlan_ip(skb, state->net) &&
+	    !is_pppoe_ip(skb, state->net))
 		return NF_ACCEPT;
 
 	nf_bridge_pull_encap_header_rcsum(skb);
@@ -505,16 +517,16 @@
 	if (br_validate_ipv4(state->net, skb))
 		return NF_DROP;
 
-	nf_bridge_put(skb->nf_bridge);
 	if (!nf_bridge_alloc(skb))
 		return NF_DROP;
-	if (!setup_pre_routing(skb))
+	if (!setup_pre_routing(skb, state->net))
 		return NF_DROP;
 
 	nf_bridge = nf_bridge_info_get(skb);
 	nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr;
 
 	skb->protocol = htons(ETH_P_IP);
+	skb->transport_header = skb->network_header + ip_hdr(skb)->ihl * 4;
 
 	NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb,
 		skb->dev, NULL,
@@ -530,7 +542,7 @@
 	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
 	struct net_device *in;
 
-	if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) {
+	if (!IS_ARP(skb) && !is_vlan_arp(skb, net)) {
 
 		if (skb->protocol == htons(ETH_P_IP))
 			nf_bridge->frag_max_size = IPCB(skb)->frag_max_size;
@@ -568,7 +580,8 @@
 	struct net_device *parent;
 	u_int8_t pf;
 
-	if (!skb->nf_bridge)
+	nf_bridge = nf_bridge_info_get(skb);
+	if (!nf_bridge)
 		return NF_ACCEPT;
 
 	/* Need exclusive nf_bridge_info since we might have multiple
@@ -584,9 +597,11 @@
 	if (!parent)
 		return NF_DROP;
 
-	if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb))
+	if (IS_IP(skb) || is_vlan_ip(skb, state->net) ||
+	    is_pppoe_ip(skb, state->net))
 		pf = NFPROTO_IPV4;
-	else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb))
+	else if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) ||
+		 is_pppoe_ipv6(skb, state->net))
 		pf = NFPROTO_IPV6;
 	else
 		return NF_ACCEPT;
@@ -617,7 +632,7 @@
 		skb->protocol = htons(ETH_P_IPV6);
 
 	NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb,
-		brnf_get_logical_dev(skb, state->in),
+		brnf_get_logical_dev(skb, state->in, state->net),
 		parent,	br_nf_forward_finish);
 
 	return NF_STOLEN;
@@ -630,23 +645,25 @@
 	struct net_bridge_port *p;
 	struct net_bridge *br;
 	struct net_device **d = (struct net_device **)(skb->cb);
+	struct brnf_net *brnet;
 
 	p = br_port_get_rcu(state->out);
 	if (p == NULL)
 		return NF_ACCEPT;
 	br = p->br;
 
-	if (!brnf_call_arptables && !br->nf_call_arptables)
+	brnet = net_generic(state->net, brnf_net_id);
+	if (!brnet->call_arptables && !br_opt_get(br, BROPT_NF_CALL_ARPTABLES))
 		return NF_ACCEPT;
 
 	if (!IS_ARP(skb)) {
-		if (!IS_VLAN_ARP(skb))
+		if (!is_vlan_arp(skb, state->net))
 			return NF_ACCEPT;
 		nf_bridge_pull_encap_header(skb);
 	}
 
 	if (arp_hdr(skb)->ar_pln != 4) {
-		if (IS_VLAN_ARP(skb))
+		if (is_vlan_arp(skb, state->net))
 			nf_bridge_push_encap_header(skb);
 		return NF_ACCEPT;
 	}
@@ -670,10 +687,8 @@
 		return 0;
 	}
 
-	if (data->vlan_tci) {
-		skb->vlan_tci = data->vlan_tci;
-		skb->vlan_proto = data->vlan_proto;
-	}
+	if (data->vlan_proto)
+		__vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci);
 
 	skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size);
 	__skb_push(skb, data->encap_size);
@@ -702,7 +717,9 @@
 
 static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
 {
-	if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE)
+	const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+
+	if (nf_bridge->orig_proto == BRNF_PROTO_PPPOE)
 		return PPPOE_SES_HLEN;
 	return 0;
 }
@@ -739,8 +756,13 @@
 
 		data = this_cpu_ptr(&brnf_frag_data_storage);
 
-		data->vlan_tci = skb->vlan_tci;
-		data->vlan_proto = skb->vlan_proto;
+		if (skb_vlan_tag_present(skb)) {
+			data->vlan_tci = skb->vlan_tci;
+			data->vlan_proto = skb->vlan_proto;
+		} else {
+			data->vlan_proto = 0;
+		}
+
 		data->encap_size = nf_bridge_encap_header_len(skb);
 		data->size = ETH_HLEN + data->encap_size;
 
@@ -801,9 +823,11 @@
 	if (!realoutdev)
 		return NF_DROP;
 
-	if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb))
+	if (IS_IP(skb) || is_vlan_ip(skb, state->net) ||
+	    is_pppoe_ip(skb, state->net))
 		pf = NFPROTO_IPV4;
-	else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb))
+	else if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) ||
+		 is_pppoe_ipv6(skb, state->net))
 		pf = NFPROTO_IPV6;
 	else
 		return NF_ACCEPT;
@@ -835,8 +859,11 @@
 				   struct sk_buff *skb,
 				   const struct nf_hook_state *state)
 {
-	if (skb->nf_bridge && !skb->nf_bridge->in_prerouting &&
-	    !netif_is_l3_master(skb->dev)) {
+	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+
+	if (nf_bridge && !nf_bridge->in_prerouting &&
+	    !netif_is_l3_master(skb->dev) &&
+	    !netif_is_l3_slave(skb->dev)) {
 		state->okfn(state->net, state->sk, skb);
 		return NF_STOLEN;
 	}
@@ -873,7 +900,9 @@
 
 static int br_nf_dev_xmit(struct sk_buff *skb)
 {
-	if (skb->nf_bridge && skb->nf_bridge->bridged_dnat) {
+	const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+
+	if (nf_bridge && nf_bridge->bridged_dnat) {
 		br_nf_pre_routing_finish_bridge_slow(skb);
 		return 1;
 	}
@@ -884,11 +913,6 @@
 	.br_dev_xmit_hook =	br_nf_dev_xmit,
 };
 
-void br_netfilter_enable(void)
-{
-}
-EXPORT_SYMBOL_GPL(br_netfilter_enable);
-
 /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
  * br_dev_queue_push_xmit is called afterwards */
 static const struct nf_hook_ops br_nf_ops[] = {
@@ -956,23 +980,6 @@
 	return NOTIFY_OK;
 }
 
-static void __net_exit brnf_exit_net(struct net *net)
-{
-	struct brnf_net *brnet = net_generic(net, brnf_net_id);
-
-	if (!brnet->enabled)
-		return;
-
-	nf_unregister_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops));
-	brnet->enabled = false;
-}
-
-static struct pernet_operations brnf_net_ops __read_mostly = {
-	.exit = brnf_exit_net,
-	.id   = &brnf_net_id,
-	.size = sizeof(struct brnf_net),
-};
-
 static struct notifier_block brnf_notifier __read_mostly = {
 	.notifier_call = brnf_device_event,
 };
@@ -1031,50 +1038,125 @@
 static struct ctl_table brnf_table[] = {
 	{
 		.procname	= "bridge-nf-call-arptables",
-		.data		= &brnf_call_arptables,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= brnf_sysctl_call_tables,
 	},
 	{
 		.procname	= "bridge-nf-call-iptables",
-		.data		= &brnf_call_iptables,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= brnf_sysctl_call_tables,
 	},
 	{
 		.procname	= "bridge-nf-call-ip6tables",
-		.data		= &brnf_call_ip6tables,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= brnf_sysctl_call_tables,
 	},
 	{
 		.procname	= "bridge-nf-filter-vlan-tagged",
-		.data		= &brnf_filter_vlan_tagged,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= brnf_sysctl_call_tables,
 	},
 	{
 		.procname	= "bridge-nf-filter-pppoe-tagged",
-		.data		= &brnf_filter_pppoe_tagged,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= brnf_sysctl_call_tables,
 	},
 	{
 		.procname	= "bridge-nf-pass-vlan-input-dev",
-		.data		= &brnf_pass_vlan_indev,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= brnf_sysctl_call_tables,
 	},
 	{ }
 };
+
+static inline void br_netfilter_sysctl_default(struct brnf_net *brnf)
+{
+	brnf->call_iptables = 1;
+	brnf->call_ip6tables = 1;
+	brnf->call_arptables = 1;
+	brnf->filter_vlan_tagged = 0;
+	brnf->filter_pppoe_tagged = 0;
+	brnf->pass_vlan_indev = 0;
+}
+
+static int br_netfilter_sysctl_init_net(struct net *net)
+{
+	struct ctl_table *table = brnf_table;
+	struct brnf_net *brnet;
+
+	if (!net_eq(net, &init_net)) {
+		table = kmemdup(table, sizeof(brnf_table), GFP_KERNEL);
+		if (!table)
+			return -ENOMEM;
+	}
+
+	brnet = net_generic(net, brnf_net_id);
+	table[0].data = &brnet->call_arptables;
+	table[1].data = &brnet->call_iptables;
+	table[2].data = &brnet->call_ip6tables;
+	table[3].data = &brnet->filter_vlan_tagged;
+	table[4].data = &brnet->filter_pppoe_tagged;
+	table[5].data = &brnet->pass_vlan_indev;
+
+	br_netfilter_sysctl_default(brnet);
+
+	brnet->ctl_hdr = register_net_sysctl(net, "net/bridge", table);
+	if (!brnet->ctl_hdr) {
+		if (!net_eq(net, &init_net))
+			kfree(table);
+
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void br_netfilter_sysctl_exit_net(struct net *net,
+					 struct brnf_net *brnet)
+{
+	struct ctl_table *table = brnet->ctl_hdr->ctl_table_arg;
+
+	unregister_net_sysctl_table(brnet->ctl_hdr);
+	if (!net_eq(net, &init_net))
+		kfree(table);
+}
+
+static int __net_init brnf_init_net(struct net *net)
+{
+	return br_netfilter_sysctl_init_net(net);
+}
 #endif
 
+static void __net_exit brnf_exit_net(struct net *net)
+{
+	struct brnf_net *brnet;
+
+	brnet = net_generic(net, brnf_net_id);
+	if (brnet->enabled) {
+		nf_unregister_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops));
+		brnet->enabled = false;
+	}
+
+#ifdef CONFIG_SYSCTL
+	br_netfilter_sysctl_exit_net(net, brnet);
+#endif
+}
+
+static struct pernet_operations brnf_net_ops __read_mostly = {
+#ifdef CONFIG_SYSCTL
+	.init = brnf_init_net,
+#endif
+	.exit = brnf_exit_net,
+	.id   = &brnf_net_id,
+	.size = sizeof(struct brnf_net),
+};
+
 static int __init br_netfilter_init(void)
 {
 	int ret;
@@ -1089,16 +1171,6 @@
 		return ret;
 	}
 
-#ifdef CONFIG_SYSCTL
-	brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table);
-	if (brnf_sysctl_header == NULL) {
-		printk(KERN_WARNING
-		       "br_netfilter: can't register to sysctl.\n");
-		unregister_netdevice_notifier(&brnf_notifier);
-		unregister_pernet_subsys(&brnf_net_ops);
-		return -ENOMEM;
-	}
-#endif
 	RCU_INIT_POINTER(nf_br_ops, &br_ops);
 	printk(KERN_NOTICE "Bridge firewalling registered\n");
 	return 0;
@@ -1109,9 +1181,6 @@
 	RCU_INIT_POINTER(nf_br_ops, NULL);
 	unregister_netdevice_notifier(&brnf_notifier);
 	unregister_pernet_subsys(&brnf_net_ops);
-#ifdef CONFIG_SYSCTL
-	unregister_net_sysctl_table(brnf_sysctl_header);
-#endif
 }
 
 module_init(br_netfilter_init);

diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 96c072e..e4e0c83 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Handle firewalling
  *	Linux ethernet bridge
@@ -6,11 +7,6 @@
  *	Lennert Buytenhek		<buytenh@gnu.org>
  *	Bart De Schuymer		<bdschuym@pandora.be>
  *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
  *	Lennert dedicates this file to Kerstin Wurdinger.
  */
 
@@ -131,6 +127,7 @@
 					IPSTATS_MIB_INDISCARDS);
 			goto drop;
 		}
+		hdr = ipv6_hdr(skb);
 	}
 	if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb))
 		goto drop;
@@ -224,16 +221,18 @@
 	if (br_validate_ipv6(state->net, skb))
 		return NF_DROP;
 
-	nf_bridge_put(skb->nf_bridge);
-	if (!nf_bridge_alloc(skb))
+	nf_bridge = nf_bridge_alloc(skb);
+	if (!nf_bridge)
 		return NF_DROP;
-	if (!setup_pre_routing(skb))
+	if (!setup_pre_routing(skb, state->net))
 		return NF_DROP;
 
 	nf_bridge = nf_bridge_info_get(skb);
 	nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr;
 
 	skb->protocol = htons(ETH_P_IPV6);
+	skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
+
 	NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->net, state->sk, skb,
 		skb->dev, NULL,
 		br_nf_pre_routing_finish_ipv6);

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index ec2b58a..a0a5448 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Bridge netlink control interface
  *
  *	Authors:
  *	Stephen Hemminger		<shemminger@osdl.org>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/kernel.h>
@@ -102,7 +98,7 @@
 	size_t vinfo_sz = 0;
 
 	rcu_read_lock();
-	if (br_port_exists(dev)) {
+	if (netif_is_bridge_port(dev)) {
 		p = br_port_get_rcu(dev);
 		vg = nbp_vlan_group_rcu(p);
 	} else if (dev->priv_flags & IFF_EBRIDGE) {
@@ -413,9 +409,9 @@
 		goto nla_put_failure;
 
 	if (event == RTM_NEWLINK && port) {
-		struct nlattr *nest
-			= nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED);
+		struct nlattr *nest;
 
+		nest = nla_nest_start(skb, IFLA_PROTINFO);
 		if (nest == NULL || br_port_fill_attrs(skb, port) < 0)
 			goto nla_put_failure;
 		nla_nest_end(skb, nest);
@@ -439,7 +435,7 @@
 			rcu_read_unlock();
 			goto done;
 		}
-		af = nla_nest_start(skb, IFLA_AF_SPEC);
+		af = nla_nest_start_noflag(skb, IFLA_AF_SPEC);
 		if (!af) {
 			rcu_read_unlock();
 			goto nla_put_failure;
@@ -525,7 +521,8 @@
 }
 
 static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
-			int cmd, struct bridge_vlan_info *vinfo, bool *changed)
+			int cmd, struct bridge_vlan_info *vinfo, bool *changed,
+			struct netlink_ext_ack *extack)
 {
 	bool curr_change;
 	int err = 0;
@@ -537,11 +534,11 @@
 			 * per-VLAN entry as well
 			 */
 			err = nbp_vlan_add(p, vinfo->vid, vinfo->flags,
-					   &curr_change);
+					   &curr_change, extack);
 		} else {
 			vinfo->flags |= BRIDGE_VLAN_INFO_BRENTRY;
 			err = br_vlan_add(br, vinfo->vid, vinfo->flags,
-					  &curr_change);
+					  &curr_change, extack);
 		}
 		if (curr_change)
 			*changed = true;
@@ -568,7 +565,8 @@
 				struct net_bridge_port *p, int cmd,
 				struct bridge_vlan_info *vinfo_curr,
 				struct bridge_vlan_info **vinfo_last,
-				bool *changed)
+				bool *changed,
+				struct netlink_ext_ack *extack)
 {
 	if (!vinfo_curr->vid || vinfo_curr->vid >= VLAN_VID_MASK)
 		return -EINVAL;
@@ -598,7 +596,8 @@
 		       sizeof(struct bridge_vlan_info));
 		for (v = (*vinfo_last)->vid; v <= vinfo_curr->vid; v++) {
 			tmp_vinfo.vid = v;
-			err = br_vlan_info(br, p, cmd, &tmp_vinfo, changed);
+			err = br_vlan_info(br, p, cmd, &tmp_vinfo, changed,
+					   extack);
 			if (err)
 				break;
 		}
@@ -607,13 +606,14 @@
 		return err;
 	}
 
-	return br_vlan_info(br, p, cmd, vinfo_curr, changed);
+	return br_vlan_info(br, p, cmd, vinfo_curr, changed, extack);
 }
 
 static int br_afspec(struct net_bridge *br,
 		     struct net_bridge_port *p,
 		     struct nlattr *af_spec,
-		     int cmd, bool *changed)
+		     int cmd, bool *changed,
+		     struct netlink_ext_ack *extack)
 {
 	struct bridge_vlan_info *vinfo_curr = NULL;
 	struct bridge_vlan_info *vinfo_last = NULL;
@@ -643,7 +643,8 @@
 				return -EINVAL;
 			vinfo_curr = nla_data(attr);
 			err = br_process_vlan_info(br, p, cmd, vinfo_curr,
-						   &vinfo_last, changed);
+						   &vinfo_last, changed,
+						   extack);
 			if (err)
 				return err;
 			break;
@@ -850,7 +851,8 @@
 }
 
 /* Change state and parameters on port. */
-int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
+int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags,
+	       struct netlink_ext_ack *extack)
 {
 	struct net_bridge *br = (struct net_bridge *)netdev_priv(dev);
 	struct nlattr *tb[IFLA_BRPORT_MAX + 1];
@@ -874,8 +876,10 @@
 
 	if (p && protinfo) {
 		if (protinfo->nla_type & NLA_F_NESTED) {
-			err = nla_parse_nested(tb, IFLA_BRPORT_MAX, protinfo,
-					       br_port_policy, NULL);
+			err = nla_parse_nested_deprecated(tb, IFLA_BRPORT_MAX,
+							  protinfo,
+							  br_port_policy,
+							  NULL);
 			if (err)
 				return err;
 
@@ -897,7 +901,7 @@
 	}
 
 	if (afspec)
-		err = br_afspec(br, p, afspec, RTM_SETLINK, &changed);
+		err = br_afspec(br, p, afspec, RTM_SETLINK, &changed, extack);
 
 	if (changed)
 		br_ifinfo_notify(RTM_NEWLINK, br, p);
@@ -923,7 +927,7 @@
 	if (!p && !(dev->priv_flags & IFF_EBRIDGE))
 		return -EINVAL;
 
-	err = br_afspec(br, p, afspec, RTM_DELLINK, &changed);
+	err = br_afspec(br, p, afspec, RTM_DELLINK, &changed, NULL);
 	if (changed)
 		/* Send RTM_NEWLINK because userspace
 		 * expects RTM_NEWLINK for vlan dels
@@ -1034,6 +1038,9 @@
 	[IFLA_BR_MCAST_STATS_ENABLED] = { .type = NLA_U8 },
 	[IFLA_BR_MCAST_IGMP_VERSION] = { .type = NLA_U8 },
 	[IFLA_BR_MCAST_MLD_VERSION] = { .type = NLA_U8 },
+	[IFLA_BR_VLAN_STATS_PER_PORT] = { .type = NLA_U8 },
+	[IFLA_BR_MULTI_BOOLOPT] = { .type = NLA_EXACT_LEN,
+				    .len = sizeof(struct br_boolopt_multi) },
 };
 
 static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
@@ -1102,7 +1109,7 @@
 	if (data[IFLA_BR_VLAN_DEFAULT_PVID]) {
 		__u16 defpvid = nla_get_u16(data[IFLA_BR_VLAN_DEFAULT_PVID]);
 
-		err = __br_vlan_set_default_pvid(br, defpvid);
+		err = __br_vlan_set_default_pvid(br, defpvid, extack);
 		if (err)
 			return err;
 	}
@@ -1114,6 +1121,14 @@
 		if (err)
 			return err;
 	}
+
+	if (data[IFLA_BR_VLAN_STATS_PER_PORT]) {
+		__u8 per_port = nla_get_u8(data[IFLA_BR_VLAN_STATS_PER_PORT]);
+
+		err = br_vlan_set_stats_per_port(br, per_port);
+		if (err)
+			return err;
+	}
 #endif
 
 	if (data[IFLA_BR_GROUP_FWD_MASK]) {
@@ -1139,7 +1154,7 @@
 		spin_lock_bh(&br->lock);
 		memcpy(br->group_addr, new_addr, sizeof(br->group_addr));
 		spin_unlock_bh(&br->lock);
-		br->group_addr_set = true;
+		br_opt_toggle(br, BROPT_GROUP_ADDR_SET, true);
 		br_recalculate_fwd_mask(br);
 	}
 
@@ -1158,16 +1173,14 @@
 	if (data[IFLA_BR_MCAST_SNOOPING]) {
 		u8 mcast_snooping = nla_get_u8(data[IFLA_BR_MCAST_SNOOPING]);
 
-		err = br_multicast_toggle(br, mcast_snooping);
-		if (err)
-			return err;
+		br_multicast_toggle(br, mcast_snooping);
 	}
 
 	if (data[IFLA_BR_MCAST_QUERY_USE_IFADDR]) {
 		u8 val;
 
 		val = nla_get_u8(data[IFLA_BR_MCAST_QUERY_USE_IFADDR]);
-		br->multicast_query_use_ifaddr = !!val;
+		br_opt_toggle(br, BROPT_MULTICAST_QUERY_USE_IFADDR, !!val);
 	}
 
 	if (data[IFLA_BR_MCAST_QUERIER]) {
@@ -1178,19 +1191,12 @@
 			return err;
 	}
 
-	if (data[IFLA_BR_MCAST_HASH_ELASTICITY]) {
-		u32 val = nla_get_u32(data[IFLA_BR_MCAST_HASH_ELASTICITY]);
+	if (data[IFLA_BR_MCAST_HASH_ELASTICITY])
+		br_warn(br, "the hash_elasticity option has been deprecated and is always %u\n",
+			RHT_ELASTICITY);
 
-		br->hash_elasticity = val;
-	}
-
-	if (data[IFLA_BR_MCAST_HASH_MAX]) {
-		u32 hash_max = nla_get_u32(data[IFLA_BR_MCAST_HASH_MAX]);
-
-		err = br_multicast_set_hash_max(br, hash_max);
-		if (err)
-			return err;
-	}
+	if (data[IFLA_BR_MCAST_HASH_MAX])
+		br->hash_max = nla_get_u32(data[IFLA_BR_MCAST_HASH_MAX]);
 
 	if (data[IFLA_BR_MCAST_LAST_MEMBER_CNT]) {
 		u32 val = nla_get_u32(data[IFLA_BR_MCAST_LAST_MEMBER_CNT]);
@@ -1244,7 +1250,7 @@
 		__u8 mcast_stats;
 
 		mcast_stats = nla_get_u8(data[IFLA_BR_MCAST_STATS_ENABLED]);
-		br->multicast_stats_enabled = !!mcast_stats;
+		br_opt_toggle(br, BROPT_MULTICAST_STATS_ENABLED, !!mcast_stats);
 	}
 
 	if (data[IFLA_BR_MCAST_IGMP_VERSION]) {
@@ -1271,22 +1277,31 @@
 	if (data[IFLA_BR_NF_CALL_IPTABLES]) {
 		u8 val = nla_get_u8(data[IFLA_BR_NF_CALL_IPTABLES]);
 
-		br->nf_call_iptables = val ? true : false;
+		br_opt_toggle(br, BROPT_NF_CALL_IPTABLES, !!val);
 	}
 
 	if (data[IFLA_BR_NF_CALL_IP6TABLES]) {
 		u8 val = nla_get_u8(data[IFLA_BR_NF_CALL_IP6TABLES]);
 
-		br->nf_call_ip6tables = val ? true : false;
+		br_opt_toggle(br, BROPT_NF_CALL_IP6TABLES, !!val);
 	}
 
 	if (data[IFLA_BR_NF_CALL_ARPTABLES]) {
 		u8 val = nla_get_u8(data[IFLA_BR_NF_CALL_ARPTABLES]);
 
-		br->nf_call_arptables = val ? true : false;
+		br_opt_toggle(br, BROPT_NF_CALL_ARPTABLES, !!val);
 	}
 #endif
 
+	if (data[IFLA_BR_MULTI_BOOLOPT]) {
+		struct br_boolopt_multi *bm;
+
+		bm = nla_data(data[IFLA_BR_MULTI_BOOLOPT]);
+		err = br_boolopt_multi_toggle(br, bm, extack);
+		if (err)
+			return err;
+	}
+
 	return 0;
 }
 
@@ -1327,6 +1342,7 @@
 	       nla_total_size(sizeof(__be16)) +	/* IFLA_BR_VLAN_PROTOCOL */
 	       nla_total_size(sizeof(u16)) +    /* IFLA_BR_VLAN_DEFAULT_PVID */
 	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_VLAN_STATS_ENABLED */
+	       nla_total_size(sizeof(u8)) +	/* IFLA_BR_VLAN_STATS_PER_PORT */
 #endif
 	       nla_total_size(sizeof(u16)) +    /* IFLA_BR_GROUP_FWD_MASK */
 	       nla_total_size(sizeof(struct ifla_bridge_id)) +   /* IFLA_BR_ROOT_ID */
@@ -1364,6 +1380,7 @@
 	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_NF_CALL_IP6TABLES */
 	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_NF_CALL_ARPTABLES */
 #endif
+	       nla_total_size(sizeof(struct br_boolopt_multi)) + /* IFLA_BR_MULTI_BOOLOPT */
 	       0;
 }
 
@@ -1377,6 +1394,7 @@
 	u32 stp_enabled = br->stp_enabled;
 	u16 priority = (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1];
 	u8 vlan_enabled = br_vlan_enabled(br->dev);
+	struct br_boolopt_multi bm;
 	u64 clockval;
 
 	clockval = br_timer_value(&br->hello_timer);
@@ -1393,6 +1411,7 @@
 	if (nla_put_u64_64bit(skb, IFLA_BR_GC_TIMER, clockval, IFLA_BR_PAD))
 		return -EMSGSIZE;
 
+	br_boolopt_multi_get(br, &bm);
 	if (nla_put_u32(skb, IFLA_BR_FORWARD_DELAY, forward_delay) ||
 	    nla_put_u32(skb, IFLA_BR_HELLO_TIME, hello_time) ||
 	    nla_put_u32(skb, IFLA_BR_MAX_AGE, age_time) ||
@@ -1410,25 +1429,30 @@
 	    nla_put_u8(skb, IFLA_BR_TOPOLOGY_CHANGE, br->topology_change) ||
 	    nla_put_u8(skb, IFLA_BR_TOPOLOGY_CHANGE_DETECTED,
 		       br->topology_change_detected) ||
-	    nla_put(skb, IFLA_BR_GROUP_ADDR, ETH_ALEN, br->group_addr))
+	    nla_put(skb, IFLA_BR_GROUP_ADDR, ETH_ALEN, br->group_addr) ||
+	    nla_put(skb, IFLA_BR_MULTI_BOOLOPT, sizeof(bm), &bm))
 		return -EMSGSIZE;
 
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
 	if (nla_put_be16(skb, IFLA_BR_VLAN_PROTOCOL, br->vlan_proto) ||
 	    nla_put_u16(skb, IFLA_BR_VLAN_DEFAULT_PVID, br->default_pvid) ||
-	    nla_put_u8(skb, IFLA_BR_VLAN_STATS_ENABLED, br->vlan_stats_enabled))
+	    nla_put_u8(skb, IFLA_BR_VLAN_STATS_ENABLED,
+		       br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) ||
+	    nla_put_u8(skb, IFLA_BR_VLAN_STATS_PER_PORT,
+		       br_opt_get(br, BROPT_VLAN_STATS_PER_PORT)))
 		return -EMSGSIZE;
 #endif
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 	if (nla_put_u8(skb, IFLA_BR_MCAST_ROUTER, br->multicast_router) ||
-	    nla_put_u8(skb, IFLA_BR_MCAST_SNOOPING, !br->multicast_disabled) ||
+	    nla_put_u8(skb, IFLA_BR_MCAST_SNOOPING,
+		       br_opt_get(br, BROPT_MULTICAST_ENABLED)) ||
 	    nla_put_u8(skb, IFLA_BR_MCAST_QUERY_USE_IFADDR,
-		       br->multicast_query_use_ifaddr) ||
-	    nla_put_u8(skb, IFLA_BR_MCAST_QUERIER, br->multicast_querier) ||
+		       br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR)) ||
+	    nla_put_u8(skb, IFLA_BR_MCAST_QUERIER,
+		       br_opt_get(br, BROPT_MULTICAST_QUERIER)) ||
 	    nla_put_u8(skb, IFLA_BR_MCAST_STATS_ENABLED,
-		       br->multicast_stats_enabled) ||
-	    nla_put_u32(skb, IFLA_BR_MCAST_HASH_ELASTICITY,
-			br->hash_elasticity) ||
+		       br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED)) ||
+	    nla_put_u32(skb, IFLA_BR_MCAST_HASH_ELASTICITY, RHT_ELASTICITY) ||
 	    nla_put_u32(skb, IFLA_BR_MCAST_HASH_MAX, br->hash_max) ||
 	    nla_put_u32(skb, IFLA_BR_MCAST_LAST_MEMBER_CNT,
 			br->multicast_last_member_count) ||
@@ -1469,11 +1493,11 @@
 #endif
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	if (nla_put_u8(skb, IFLA_BR_NF_CALL_IPTABLES,
-		       br->nf_call_iptables ? 1 : 0) ||
+		       br_opt_get(br, BROPT_NF_CALL_IPTABLES) ? 1 : 0) ||
 	    nla_put_u8(skb, IFLA_BR_NF_CALL_IP6TABLES,
-		       br->nf_call_ip6tables ? 1 : 0) ||
+		       br_opt_get(br, BROPT_NF_CALL_IP6TABLES) ? 1 : 0) ||
 	    nla_put_u8(skb, IFLA_BR_NF_CALL_ARPTABLES,
-		       br->nf_call_arptables ? 1 : 0))
+		       br_opt_get(br, BROPT_NF_CALL_ARPTABLES) ? 1 : 0))
 		return -EMSGSIZE;
 #endif
 
@@ -1543,7 +1567,7 @@
 		return -EINVAL;
 	}
 
-	nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE);
+	nest = nla_nest_start_noflag(skb, LINK_XSTATS_TYPE_BRIDGE);
 	if (!nest)
 		return -EMSGSIZE;
 

diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c
index da8cb99..afee292 100644
--- a/net/bridge/br_netlink_tunnel.c
+++ b/net/bridge/br_netlink_tunnel.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Bridge per vlan tunnel port dst_metadata netlink control interface
  *
  *	Authors:
  *	Roopa Prabhu		<roopa@cumulusnetworks.com>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/kernel.h>
@@ -97,7 +93,7 @@
 	__be32 tid = tunnel_id_to_key32(tunnel_id);
 	struct nlattr *tmap;
 
-	tmap = nla_nest_start(skb, IFLA_BRIDGE_VLAN_TUNNEL_INFO);
+	tmap = nla_nest_start_noflag(skb, IFLA_BRIDGE_VLAN_TUNNEL_INFO);
 	if (!tmap)
 		return -EMSGSIZE;
 	if (nla_put_u32(skb, IFLA_BRIDGE_VLAN_TUNNEL_ID,
@@ -230,8 +226,8 @@
 
 	memset(tinfo, 0, sizeof(*tinfo));
 
-	err = nla_parse_nested(tb, IFLA_BRIDGE_VLAN_TUNNEL_MAX, attr,
-			       vlan_tunnel_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, IFLA_BRIDGE_VLAN_TUNNEL_MAX,
+					  attr, vlan_tunnel_policy, NULL);
 	if (err < 0)
 		return err;
 

diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c
index 8e2d7cf..2cdfc5d 100644
--- a/net/bridge/br_nf_core.c
+++ b/net/bridge/br_nf_core.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Handle firewalling core
  *	Linux ethernet bridge
@@ -6,11 +7,6 @@
  *	Lennert Buytenhek		<buytenh@gnu.org>
  *	Bart De Schuymer		<bdschuym@pandora.be>
  *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
  *	Lennert dedicates this file to Kerstin Wurdinger.
  */
 

diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 11ed202..ce2ab14 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h

@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  *	Linux ethernet bridge
  *
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #ifndef _BR_PRIVATE_H
@@ -31,6 +27,8 @@
 #define BR_PORT_BITS	10
 #define BR_MAX_PORTS	(1<<BR_PORT_BITS)
 
+#define BR_MULTICAST_DEFAULT_HASH_MAX 4096
+
 #define BR_VERSION	"2.3"
 
 /* Control of forwarding link local multicast */
@@ -54,14 +52,12 @@
 typedef struct mac_addr mac_addr;
 typedef __u16 port_id;
 
-struct bridge_id
-{
+struct bridge_id {
 	unsigned char	prio[2];
 	unsigned char	addr[ETH_ALEN];
 };
 
-struct mac_addr
-{
+struct mac_addr {
 	unsigned char	addr[ETH_ALEN];
 };
 
@@ -104,12 +100,19 @@
 	struct metadata_dst	*tunnel_dst;
 };
 
+/* private vlan flags */
+enum {
+	BR_VLFLAG_PER_PORT_STATS = BIT(0),
+	BR_VLFLAG_ADDED_BY_SWITCHDEV = BIT(1),
+};
+
 /**
  * struct net_bridge_vlan - per-vlan entry
  *
  * @vnode: rhashtable member
  * @vid: VLAN id
  * @flags: bridge vlan flags
+ * @priv_flags: private (in-kernel) bridge vlan flags
  * @stats: per-cpu VLAN statistics
  * @br: if MASTER flag set, this points to a bridge struct
  * @port: if MASTER flag unset, this points to a port struct
@@ -129,6 +132,7 @@
 	struct rhash_head		tnode;
 	u16				vid;
 	u16				flags;
+	u16				priv_flags;
 	struct br_vlan_stats __percpu	*stats;
 	union {
 		struct net_bridge	*br;
@@ -181,6 +185,7 @@
 	struct hlist_node		fdb_node;
 	unsigned char			is_local:1,
 					is_static:1,
+					is_sticky:1,
 					added_by_user:1,
 					added_by_external_learn:1,
 					offloaded:1;
@@ -194,6 +199,7 @@
 
 #define MDB_PG_FLAGS_PERMANENT	BIT(0)
 #define MDB_PG_FLAGS_OFFLOAD	BIT(1)
+#define MDB_PG_FLAGS_FAST_LEAVE	BIT(2)
 
 struct net_bridge_port_group {
 	struct net_bridge_port		*port;
@@ -206,26 +212,15 @@
 	unsigned char			eth_addr[ETH_ALEN];
 };
 
-struct net_bridge_mdb_entry
-{
-	struct hlist_node		hlist[2];
+struct net_bridge_mdb_entry {
+	struct rhash_head		rhnode;
 	struct net_bridge		*br;
 	struct net_bridge_port_group __rcu *ports;
 	struct rcu_head			rcu;
 	struct timer_list		timer;
 	struct br_ip			addr;
 	bool				host_joined;
-};
-
-struct net_bridge_mdb_htable
-{
-	struct hlist_head		*mhash;
-	struct rcu_head			rcu;
-	struct net_bridge_mdb_htable	*old;
-	u32				size;
-	u32				max;
-	u32				secret;
-	u32				ver;
+	struct hlist_node		mdb_node;
 };
 
 struct net_bridge_port {
@@ -290,8 +285,6 @@
 #define br_auto_port(p) ((p)->flags & BR_AUTO_MASK)
 #define br_promisc_port(p) ((p)->flags & BR_PROMISC)
 
-#define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
-
 static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev)
 {
 	return rcu_dereference(dev->rx_handler_data);
@@ -299,26 +292,44 @@
 
 static inline struct net_bridge_port *br_port_get_rtnl(const struct net_device *dev)
 {
-	return br_port_exists(dev) ?
+	return netif_is_bridge_port(dev) ?
 		rtnl_dereference(dev->rx_handler_data) : NULL;
 }
 
 static inline struct net_bridge_port *br_port_get_rtnl_rcu(const struct net_device *dev)
 {
-	return br_port_exists(dev) ?
+	return netif_is_bridge_port(dev) ?
 		rcu_dereference_rtnl(dev->rx_handler_data) : NULL;
 }
 
+enum net_bridge_opts {
+	BROPT_VLAN_ENABLED,
+	BROPT_VLAN_STATS_ENABLED,
+	BROPT_NF_CALL_IPTABLES,
+	BROPT_NF_CALL_IP6TABLES,
+	BROPT_NF_CALL_ARPTABLES,
+	BROPT_GROUP_ADDR_SET,
+	BROPT_MULTICAST_ENABLED,
+	BROPT_MULTICAST_QUERIER,
+	BROPT_MULTICAST_QUERY_USE_IFADDR,
+	BROPT_MULTICAST_STATS_ENABLED,
+	BROPT_HAS_IPV6_ADDR,
+	BROPT_NEIGH_SUPPRESS_ENABLED,
+	BROPT_MTU_SET_BY_USER,
+	BROPT_VLAN_STATS_PER_PORT,
+	BROPT_NO_LL_LEARN,
+	BROPT_VLAN_BRIDGE_BINDING,
+};
+
 struct net_bridge {
 	spinlock_t			lock;
 	spinlock_t			hash_lock;
 	struct list_head		port_list;
 	struct net_device		*dev;
 	struct pcpu_sw_netstats		__percpu *stats;
+	unsigned long			options;
 	/* These fields are accessed on each packet */
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
-	u8				vlan_enabled;
-	u8				vlan_stats_enabled;
 	__be16				vlan_proto;
 	u16				default_pvid;
 	struct net_bridge_vlan_group	__rcu *vlgrp;
@@ -330,9 +341,6 @@
 		struct rtable		fake_rtable;
 		struct rt6_info		fake_rt6_info;
 	};
-	bool				nf_call_iptables;
-	bool				nf_call_ip6tables;
-	bool				nf_call_arptables;
 #endif
 	u16				group_fwd_mask;
 	u16				group_fwd_mask_required;
@@ -340,7 +348,6 @@
 	/* STP */
 	bridge_id			designated_root;
 	bridge_id			bridge_id;
-	u32				root_path_cost;
 	unsigned char			topology_change;
 	unsigned char			topology_change_detected;
 	u16				root_port;
@@ -352,9 +359,9 @@
 	unsigned long			bridge_hello_time;
 	unsigned long			bridge_forward_delay;
 	unsigned long			bridge_ageing_time;
+	u32				root_path_cost;
 
 	u8				group_addr[ETH_ALEN];
-	bool				group_addr_set;
 
 	enum {
 		BR_NO_STP, 		/* no spanning tree */
@@ -363,22 +370,18 @@
 	} stp_enabled;
 
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
-	unsigned char			multicast_router;
 
-	u8				multicast_disabled:1;
-	u8				multicast_querier:1;
-	u8				multicast_query_use_ifaddr:1;
-	u8				has_ipv6_addr:1;
-	u8				multicast_stats_enabled:1;
-
-	u32				hash_elasticity;
 	u32				hash_max;
 
 	u32				multicast_last_member_count;
 	u32				multicast_startup_query_count;
 
 	u8				multicast_igmp_version;
-
+	u8				multicast_router;
+#if IS_ENABLED(CONFIG_IPV6)
+	u8				multicast_mld_version;
+#endif
+	spinlock_t			multicast_lock;
 	unsigned long			multicast_last_member_interval;
 	unsigned long			multicast_membership_interval;
 	unsigned long			multicast_querier_interval;
@@ -386,8 +389,9 @@
 	unsigned long			multicast_query_response_interval;
 	unsigned long			multicast_startup_query_interval;
 
-	spinlock_t			multicast_lock;
-	struct net_bridge_mdb_htable __rcu *mdb;
+	struct rhashtable		mdb_hash_tbl;
+
+	struct hlist_head		mdb_list;
 	struct hlist_head		router_list;
 
 	struct timer_list		multicast_router_timer;
@@ -399,7 +403,6 @@
 	struct bridge_mcast_other_query	ip6_other_query;
 	struct bridge_mcast_own_query	ip6_own_query;
 	struct bridge_mcast_querier	ip6_querier;
-	u8				multicast_mld_version;
 #endif /* IS_ENABLED(CONFIG_IPV6) */
 #endif
 
@@ -413,24 +416,24 @@
 #ifdef CONFIG_NET_SWITCHDEV
 	int offload_fwd_mark;
 #endif
-	bool				neigh_suppress_enabled;
-	bool				mtu_set_by_user;
 	struct hlist_head		fdb_list;
 };
 
 struct br_input_skb_cb {
 	struct net_device *brdev;
 
+	u16 frag_max_size;
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
-	int igmp;
-	int mrouters_only;
+	u8 igmp;
+	u8 mrouters_only:1;
 #endif
-
-	bool proxyarp_replied;
-	bool src_port_isolated;
-
+	u8 proxyarp_replied:1;
+	u8 src_port_isolated:1;
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
-	bool vlan_filtered;
+	u8 vlan_filtered:1;
+#endif
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+	u8 br_netfilter_broute:1;
 #endif
 
 #ifdef CONFIG_NET_SWITCHDEV
@@ -492,6 +495,22 @@
 	return true;
 }
 
+static inline int br_opt_get(const struct net_bridge *br,
+			     enum net_bridge_opts opt)
+{
+	return test_bit(opt, &br->options);
+}
+
+int br_boolopt_toggle(struct net_bridge *br, enum br_boolopt_id opt, bool on,
+		      struct netlink_ext_ack *extack);
+int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt);
+int br_boolopt_multi_toggle(struct net_bridge *br,
+			    struct br_boolopt_multi *bm,
+			    struct netlink_ext_ack *extack);
+void br_boolopt_multi_get(const struct net_bridge *br,
+			  struct br_boolopt_multi *bm);
+void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on);
+
 /* br_device.c */
 void br_dev_setup(struct net_device *dev);
 void br_dev_delete(struct net_device *dev, struct list_head *list);
@@ -552,9 +571,13 @@
 int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
 		  struct net_device *dev, const unsigned char *addr, u16 vid);
 int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev,
-	       const unsigned char *addr, u16 vid, u16 nlh_flags);
+	       const unsigned char *addr, u16 vid, u16 nlh_flags,
+	       struct netlink_ext_ack *extack);
 int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 		struct net_device *dev, struct net_device *fdev, int *idx);
+int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev,
+	       const unsigned char *addr, u16 vid, u32 portid, u32 seq,
+	       struct netlink_ext_ack *extack);
 int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
 void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p);
 int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
@@ -564,7 +587,7 @@
 			      const unsigned char *addr, u16 vid,
 			      bool swdev_notify);
 void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
-			  const unsigned char *addr, u16 vid);
+			  const unsigned char *addr, u16 vid, bool offloaded);
 
 /* br_forward.c */
 enum br_pkt_type {
@@ -633,7 +656,6 @@
 
 /* br_multicast.c */
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
-extern unsigned int br_mdb_rehash_seq;
 int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
 		     struct sk_buff *skb, u16 vid);
 struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
@@ -658,17 +680,15 @@
 int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val);
 #endif
 struct net_bridge_mdb_entry *
-br_mdb_ip_get(struct net_bridge_mdb_htable *mdb, struct br_ip *dst);
+br_mdb_ip_get(struct net_bridge *br, struct br_ip *dst);
 struct net_bridge_mdb_entry *
-br_multicast_new_group(struct net_bridge *br, struct net_bridge_port *port,
-		       struct br_ip *group);
-void br_multicast_free_pg(struct rcu_head *head);
+br_multicast_new_group(struct net_bridge *br, struct br_ip *group);
 struct net_bridge_port_group *
 br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group,
 			    struct net_bridge_port_group __rcu *next,
 			    unsigned char flags, const unsigned char *src);
-void br_mdb_init(void);
-void br_mdb_uninit(void);
+int br_mdb_hash_init(struct net_bridge *br);
+void br_mdb_hash_fini(struct net_bridge *br);
 void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
 		   struct br_ip *group, int type, u8 flags);
 void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
@@ -680,6 +700,10 @@
 void br_multicast_get_stats(const struct net_bridge *br,
 			    const struct net_bridge_port *p,
 			    struct br_mcast_stats *dest);
+void br_mdb_init(void);
+void br_mdb_uninit(void);
+void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify);
+void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify);
 
 #define mlock_dereference(X, br) \
 	rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
@@ -698,8 +722,8 @@
 {
 	bool own_querier_enabled;
 
-	if (br->multicast_querier) {
-		if (is_ipv6 && !br->has_ipv6_addr)
+	if (br_opt_get(br, BROPT_MULTICAST_QUERIER)) {
+		if (is_ipv6 && !br_opt_get(br, BROPT_HAS_IPV6_ADDR))
 			own_querier_enabled = false;
 		else
 			own_querier_enabled = true;
@@ -805,6 +829,15 @@
 {
 }
 
+static inline int br_mdb_hash_init(struct net_bridge *br)
+{
+	return 0;
+}
+
+static inline void br_mdb_hash_fini(struct net_bridge *br)
+{
+}
+
 static inline void br_multicast_count(struct net_bridge *br,
 				      const struct net_bridge_port *p,
 				      const struct sk_buff *skb,
@@ -840,7 +873,7 @@
 			       struct net_bridge_vlan_group *vg,
 			       struct sk_buff *skb);
 int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags,
-		bool *changed);
+		bool *changed, struct netlink_ext_ack *extack);
 int br_vlan_delete(struct net_bridge *br, u16 vid);
 void br_vlan_flush(struct net_bridge *br);
 struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid);
@@ -850,17 +883,22 @@
 int __br_vlan_set_proto(struct net_bridge *br, __be16 proto);
 int br_vlan_set_proto(struct net_bridge *br, unsigned long val);
 int br_vlan_set_stats(struct net_bridge *br, unsigned long val);
+int br_vlan_set_stats_per_port(struct net_bridge *br, unsigned long val);
 int br_vlan_init(struct net_bridge *br);
 int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val);
-int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid);
+int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid,
+			       struct netlink_ext_ack *extack);
 int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
-		 bool *changed);
+		 bool *changed, struct netlink_ext_ack *extack);
 int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
 void nbp_vlan_flush(struct net_bridge_port *port);
-int nbp_vlan_init(struct net_bridge_port *port);
+int nbp_vlan_init(struct net_bridge_port *port, struct netlink_ext_ack *extack);
 int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask);
 void br_vlan_get_stats(const struct net_bridge_vlan *v,
 		       struct br_vlan_stats *stats);
+void br_vlan_port_event(struct net_bridge_port *p, unsigned long event);
+int br_vlan_bridge_event(struct net_device *dev, unsigned long event,
+			 void *ptr);
 
 static inline struct net_bridge_vlan_group *br_vlan_group(
 					const struct net_bridge *br)
@@ -894,7 +932,7 @@
 	int err = 0;
 
 	if (skb_vlan_tag_present(skb)) {
-		*vid = skb_vlan_tag_get(skb) & VLAN_VID_MASK;
+		*vid = skb_vlan_tag_get_id(skb);
 	} else {
 		*vid = 0;
 		err = -EINVAL;
@@ -942,7 +980,7 @@
 }
 
 static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags,
-			      bool *changed)
+			      bool *changed, struct netlink_ext_ack *extack)
 {
 	*changed = false;
 	return -EOPNOTSUPP;
@@ -967,7 +1005,7 @@
 }
 
 static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
-			       bool *changed)
+			       bool *changed, struct netlink_ext_ack *extack)
 {
 	*changed = false;
 	return -EOPNOTSUPP;
@@ -988,7 +1026,8 @@
 	return NULL;
 }
 
-static inline int nbp_vlan_init(struct net_bridge_port *port)
+static inline int nbp_vlan_init(struct net_bridge_port *port,
+				struct netlink_ext_ack *extack)
 {
 	return 0;
 }
@@ -1043,6 +1082,17 @@
 				     struct br_vlan_stats *stats)
 {
 }
+
+static inline void br_vlan_port_event(struct net_bridge_port *p,
+				      unsigned long event)
+{
+}
+
+static inline int br_vlan_bridge_event(struct net_device *dev,
+				       unsigned long event, void *ptr)
+{
+	return 0;
+}
 #endif
 
 struct nf_br_ops {
@@ -1109,7 +1159,8 @@
 void br_netlink_fini(void);
 void br_ifinfo_notify(int event, const struct net_bridge *br,
 		      const struct net_bridge_port *port);
-int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags);
+int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags,
+	       struct netlink_ext_ack *extack);
 int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags);
 int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev,
 	       u32 filter_mask, int nlflags);
@@ -1144,7 +1195,8 @@
 			       unsigned long mask);
 void br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb,
 			     int type);
-int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags);
+int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags,
+			       struct netlink_ext_ack *extack);
 int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid);
 
 static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
@@ -1176,7 +1228,8 @@
 }
 
 static inline int br_switchdev_port_vlan_add(struct net_device *dev,
-					     u16 vid, u16 flags)
+					     u16 vid, u16 flags,
+					     struct netlink_ext_ack *extack)
 {
 	return -EOPNOTSUPP;
 }

diff --git a/net/bridge/br_private_stp.h b/net/bridge/br_private_stp.h
index 3f7543a..814cf13 100644
--- a/net/bridge/br_private_stp.h
+++ b/net/bridge/br_private_stp.h

@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  *	Linux ethernet bridge
  *
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #ifndef _BR_PRIVATE_STP_H

diff --git a/net/bridge/br_private_tunnel.h b/net/bridge/br_private_tunnel.h
index a259471..2bdef2e 100644
--- a/net/bridge/br_private_tunnel.h
+++ b/net/bridge/br_private_tunnel.h

@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  *	Bridge per vlan tunnels
  *
  *	Authors:
  *	Roopa Prabhu		<roopa@cumulusnetworks.com>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #ifndef _BR_PRIVATE_TUNNEL_H

diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index b694196..1f1410f 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Spanning tree protocol; generic parts
  *	Linux ethernet bridge
  *
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
 #include <linux/rculist.h>

diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 1b75d6b..7796dd9 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Spanning tree protocol; BPDU handling
  *	Linux ethernet bridge
  *
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/kernel.h>
@@ -147,7 +143,6 @@
 void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
 		struct net_device *dev)
 {
-	const unsigned char *dest = eth_hdr(skb)->h_dest;
 	struct net_bridge_port *p;
 	struct net_bridge *br;
 	const unsigned char *buf;
@@ -176,7 +171,7 @@
 	if (p->state == BR_STATE_DISABLED)
 		goto out;
 
-	if (!ether_addr_equal(dest, br->group_addr))
+	if (!ether_addr_equal(eth_hdr(skb)->h_dest, br->group_addr))
 		goto out;
 
 	if (p->flags & BR_BPDU_GUARD) {

diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 808e2b9..d174d3a 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Spanning tree protocol; interface code
  *	Linux ethernet bridge
  *
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/kernel.h>
@@ -117,7 +113,8 @@
 	del_timer(&p->forward_delay_timer);
 	del_timer(&p->hold_timer);
 
-	br_fdb_delete_by_port(br, p, 0, 0);
+	if (!rcu_access_pointer(p->backup_port))
+		br_fdb_delete_by_port(br, p, 0, 0);
 	br_multicast_disable_port(p);
 
 	br_configuration_update(br);

diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index e7739de..27bf197 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Spanning tree protocol; timer-related code
  *	Linux ethernet bridge
  *
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/kernel.h>

diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index d77f807..921310d 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c

@@ -14,7 +14,7 @@
 
 	/* dev is yet to be added to the port list. */
 	list_for_each_entry(p, &br->port_list, list) {
-		if (switchdev_port_same_parent_id(dev, p->dev))
+		if (netdev_port_same_parent_id(dev, p->dev))
 			return p->offload_fwd_mark;
 	}
 
@@ -23,15 +23,12 @@
 
 int nbp_switchdev_mark_set(struct net_bridge_port *p)
 {
-	struct switchdev_attr attr = {
-		.orig_dev = p->dev,
-		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
-	};
+	struct netdev_phys_item_id ppid = { };
 	int err;
 
 	ASSERT_RTNL();
 
-	err = switchdev_port_attr_get(p->dev, &attr);
+	err = dev_get_port_parent_id(p->dev, &ppid, true);
 	if (err) {
 		if (err == -EOPNOTSUPP)
 			return 0;
@@ -67,21 +64,25 @@
 {
 	struct switchdev_attr attr = {
 		.orig_dev = p->dev,
-		.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT,
+		.id = SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS,
+		.u.brport_flags = mask,
+	};
+	struct switchdev_notifier_port_attr_info info = {
+		.attr = &attr,
 	};
 	int err;
 
 	if (mask & ~BR_PORT_FLAGS_HW_OFFLOAD)
 		return 0;
 
-	err = switchdev_port_attr_get(p->dev, &attr);
+	/* We run from atomic context here */
+	err = call_switchdev_notifiers(SWITCHDEV_PORT_ATTR_SET, p->dev,
+				       &info.info, NULL);
+	err = notifier_to_errno(err);
 	if (err == -EOPNOTSUPP)
 		return 0;
-	if (err)
-		return err;
 
-	/* Check if specific bridge flag attribute offload is supported */
-	if (!(attr.u.brport_flags_support & mask)) {
+	if (err) {
 		br_warn(p->br, "bridge flag offload is not supported %u(%s)\n",
 			(unsigned int)p->port_no, p->dev->name);
 		return -EOPNOTSUPP;
@@ -90,6 +91,7 @@
 	attr.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS;
 	attr.flags = SWITCHDEV_F_DEFER;
 	attr.u.brport_flags = flags;
+
 	err = switchdev_port_attr_set(p->dev, &attr);
 	if (err) {
 		br_warn(p->br, "error setting offload flag on port %u(%s)\n",
@@ -103,7 +105,7 @@
 static void
 br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac,
 				u16 vid, struct net_device *dev,
-				bool added_by_user)
+				bool added_by_user, bool offloaded)
 {
 	struct switchdev_notifier_fdb_info info;
 	unsigned long notifier_type;
@@ -111,8 +113,9 @@
 	info.addr = mac;
 	info.vid = vid;
 	info.added_by_user = added_by_user;
+	info.offloaded = offloaded;
 	notifier_type = adding ? SWITCHDEV_FDB_ADD_TO_DEVICE : SWITCHDEV_FDB_DEL_TO_DEVICE;
-	call_switchdev_notifiers(notifier_type, dev, &info.info);
+	call_switchdev_notifiers(notifier_type, dev, &info.info, NULL);
 }
 
 void
@@ -126,18 +129,21 @@
 		br_switchdev_fdb_call_notifiers(false, fdb->key.addr.addr,
 						fdb->key.vlan_id,
 						fdb->dst->dev,
-						fdb->added_by_user);
+						fdb->added_by_user,
+						fdb->offloaded);
 		break;
 	case RTM_NEWNEIGH:
 		br_switchdev_fdb_call_notifiers(true, fdb->key.addr.addr,
 						fdb->key.vlan_id,
 						fdb->dst->dev,
-						fdb->added_by_user);
+						fdb->added_by_user,
+						fdb->offloaded);
 		break;
 	}
 }
 
-int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags)
+int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags,
+			       struct netlink_ext_ack *extack)
 {
 	struct switchdev_obj_port_vlan v = {
 		.obj.orig_dev = dev,
@@ -147,7 +153,7 @@
 		.vid_end = vid,
 	};
 
-	return switchdev_port_obj_add(dev, &v.obj);
+	return switchdev_port_obj_add(dev, &v.obj, extack);
 }
 
 int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid)

diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 0318a69..9ab0f00 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Sysfs attributes of bridge
  *	Linux ethernet bridge
  *
  *	Authors:
  *	Stephen Hemminger		<shemminger@osdl.org>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/capability.h>
@@ -303,7 +299,7 @@
 	ether_addr_copy(br->group_addr, new_addr);
 	spin_unlock_bh(&br->lock);
 
-	br->group_addr_set = true;
+	br_opt_toggle(br, BROPT_GROUP_ADDR_SET, true);
 	br_recalculate_fwd_mask(br);
 	netdev_state_change(br->dev);
 
@@ -328,6 +324,27 @@
 }
 static DEVICE_ATTR_WO(flush);
 
+static ssize_t no_linklocal_learn_show(struct device *d,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%d\n", br_boolopt_get(br, BR_BOOLOPT_NO_LL_LEARN));
+}
+
+static int set_no_linklocal_learn(struct net_bridge *br, unsigned long val)
+{
+	return br_boolopt_toggle(br, BR_BOOLOPT_NO_LL_LEARN, !!val, NULL);
+}
+
+static ssize_t no_linklocal_learn_store(struct device *d,
+					struct device_attribute *attr,
+					const char *buf, size_t len)
+{
+	return store_bridge_parm(d, buf, len, set_no_linklocal_learn);
+}
+static DEVICE_ATTR_RW(no_linklocal_learn);
+
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 static ssize_t multicast_router_show(struct device *d,
 				     struct device_attribute *attr, char *buf)
@@ -349,7 +366,7 @@
 				       char *buf)
 {
 	struct net_bridge *br = to_bridge(d);
-	return sprintf(buf, "%d\n", !br->multicast_disabled);
+	return sprintf(buf, "%d\n", br_opt_get(br, BROPT_MULTICAST_ENABLED));
 }
 
 static ssize_t multicast_snooping_store(struct device *d,
@@ -365,12 +382,13 @@
 					       char *buf)
 {
 	struct net_bridge *br = to_bridge(d);
-	return sprintf(buf, "%d\n", br->multicast_query_use_ifaddr);
+	return sprintf(buf, "%d\n",
+		       br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR));
 }
 
 static int set_query_use_ifaddr(struct net_bridge *br, unsigned long val)
 {
-	br->multicast_query_use_ifaddr = !!val;
+	br_opt_toggle(br, BROPT_MULTICAST_QUERY_USE_IFADDR, !!val);
 	return 0;
 }
 
@@ -388,7 +406,7 @@
 				      char *buf)
 {
 	struct net_bridge *br = to_bridge(d);
-	return sprintf(buf, "%d\n", br->multicast_querier);
+	return sprintf(buf, "%d\n", br_opt_get(br, BROPT_MULTICAST_QUERIER));
 }
 
 static ssize_t multicast_querier_store(struct device *d,
@@ -402,13 +420,13 @@
 static ssize_t hash_elasticity_show(struct device *d,
 				    struct device_attribute *attr, char *buf)
 {
-	struct net_bridge *br = to_bridge(d);
-	return sprintf(buf, "%u\n", br->hash_elasticity);
+	return sprintf(buf, "%u\n", RHT_ELASTICITY);
 }
 
 static int set_elasticity(struct net_bridge *br, unsigned long val)
 {
-	br->hash_elasticity = val;
+	br_warn(br, "the hash_elasticity option has been deprecated and is always %u\n",
+		RHT_ELASTICITY);
 	return 0;
 }
 
@@ -427,10 +445,16 @@
 	return sprintf(buf, "%u\n", br->hash_max);
 }
 
+static int set_hash_max(struct net_bridge *br, unsigned long val)
+{
+	br->hash_max = val;
+	return 0;
+}
+
 static ssize_t hash_max_store(struct device *d, struct device_attribute *attr,
 			      const char *buf, size_t len)
 {
-	return store_bridge_parm(d, buf, len, br_multicast_set_hash_max);
+	return store_bridge_parm(d, buf, len, set_hash_max);
 }
 static DEVICE_ATTR_RW(hash_max);
 
@@ -636,12 +660,13 @@
 {
 	struct net_bridge *br = to_bridge(d);
 
-	return sprintf(buf, "%u\n", br->multicast_stats_enabled);
+	return sprintf(buf, "%d\n",
+		       br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED));
 }
 
 static int set_stats_enabled(struct net_bridge *br, unsigned long val)
 {
-	br->multicast_stats_enabled = !!val;
+	br_opt_toggle(br, BROPT_MULTICAST_STATS_ENABLED, !!val);
 	return 0;
 }
 
@@ -678,12 +703,12 @@
 	struct device *d, struct device_attribute *attr, char *buf)
 {
 	struct net_bridge *br = to_bridge(d);
-	return sprintf(buf, "%u\n", br->nf_call_iptables);
+	return sprintf(buf, "%u\n", br_opt_get(br, BROPT_NF_CALL_IPTABLES));
 }
 
 static int set_nf_call_iptables(struct net_bridge *br, unsigned long val)
 {
-	br->nf_call_iptables = val ? true : false;
+	br_opt_toggle(br, BROPT_NF_CALL_IPTABLES, !!val);
 	return 0;
 }
 
@@ -699,12 +724,12 @@
 	struct device *d, struct device_attribute *attr, char *buf)
 {
 	struct net_bridge *br = to_bridge(d);
-	return sprintf(buf, "%u\n", br->nf_call_ip6tables);
+	return sprintf(buf, "%u\n", br_opt_get(br, BROPT_NF_CALL_IP6TABLES));
 }
 
 static int set_nf_call_ip6tables(struct net_bridge *br, unsigned long val)
 {
-	br->nf_call_ip6tables = val ? true : false;
+	br_opt_toggle(br, BROPT_NF_CALL_IP6TABLES, !!val);
 	return 0;
 }
 
@@ -720,12 +745,12 @@
 	struct device *d, struct device_attribute *attr, char *buf)
 {
 	struct net_bridge *br = to_bridge(d);
-	return sprintf(buf, "%u\n", br->nf_call_arptables);
+	return sprintf(buf, "%u\n", br_opt_get(br, BROPT_NF_CALL_ARPTABLES));
 }
 
 static int set_nf_call_arptables(struct net_bridge *br, unsigned long val)
 {
-	br->nf_call_arptables = val ? true : false;
+	br_opt_toggle(br, BROPT_NF_CALL_ARPTABLES, !!val);
 	return 0;
 }
 
@@ -743,7 +768,7 @@
 				   char *buf)
 {
 	struct net_bridge *br = to_bridge(d);
-	return sprintf(buf, "%d\n", br->vlan_enabled);
+	return sprintf(buf, "%d\n", br_opt_get(br, BROPT_VLAN_ENABLED));
 }
 
 static ssize_t vlan_filtering_store(struct device *d,
@@ -791,7 +816,7 @@
 				       char *buf)
 {
 	struct net_bridge *br = to_bridge(d);
-	return sprintf(buf, "%u\n", br->vlan_stats_enabled);
+	return sprintf(buf, "%u\n", br_opt_get(br, BROPT_VLAN_STATS_ENABLED));
 }
 
 static ssize_t vlan_stats_enabled_store(struct device *d,
@@ -801,6 +826,22 @@
 	return store_bridge_parm(d, buf, len, br_vlan_set_stats);
 }
 static DEVICE_ATTR_RW(vlan_stats_enabled);
+
+static ssize_t vlan_stats_per_port_show(struct device *d,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%u\n", br_opt_get(br, BROPT_VLAN_STATS_PER_PORT));
+}
+
+static ssize_t vlan_stats_per_port_store(struct device *d,
+					 struct device_attribute *attr,
+					 const char *buf, size_t len)
+{
+	return store_bridge_parm(d, buf, len, br_vlan_set_stats_per_port);
+}
+static DEVICE_ATTR_RW(vlan_stats_per_port);
 #endif
 
 static struct attribute *bridge_attrs[] = {
@@ -823,6 +864,7 @@
 	&dev_attr_gc_timer.attr,
 	&dev_attr_group_addr.attr,
 	&dev_attr_flush.attr,
+	&dev_attr_no_linklocal_learn.attr,
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 	&dev_attr_multicast_router.attr,
 	&dev_attr_multicast_snooping.attr,
@@ -854,6 +896,7 @@
 	&dev_attr_vlan_protocol.attr,
 	&dev_attr_default_pvid.attr,
 	&dev_attr_vlan_stats_enabled.attr,
+	&dev_attr_vlan_stats_per_port.attr,
 #endif
 	NULL
 };

diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 7c87a2f..7a59cdd 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Sysfs attributes of bridge ports
  *	Linux ethernet bridge
  *
  *	Authors:
  *	Stephen Hemminger		<shemminger@osdl.org>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/capability.h>
@@ -320,9 +316,6 @@
 	if (!rtnl_trylock())
 		return restart_syscall();
 
-	if (!p->dev || !p->br)
-		goto out_unlock;
-
 	if (brport_attr->store_raw) {
 		char *buf_copy;
 

diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 7df2690..bb98984 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
@@ -7,6 +8,8 @@
 #include "br_private.h"
 #include "br_private_tunnel.h"
 
+static void nbp_vlan_set_vlan_dev_state(struct net_bridge_port *p, u16 vid);
+
 static inline int br_vlan_cmp(struct rhashtable_compare_arg *arg,
 			      const void *ptr)
 {
@@ -21,7 +24,6 @@
 	.key_offset = offsetof(struct net_bridge_vlan, vid),
 	.key_len = sizeof(u16),
 	.nelem_hint = 3,
-	.locks_mul = 1,
 	.max_size = VLAN_N_VID,
 	.obj_cmpfn = br_vlan_cmp,
 	.automatic_shrinking = true,
@@ -80,16 +82,18 @@
 }
 
 static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br,
-			  u16 vid, u16 flags)
+			  struct net_bridge_vlan *v, u16 flags,
+			  struct netlink_ext_ack *extack)
 {
 	int err;
 
 	/* Try switchdev op first. In case it is not supported, fallback to
 	 * 8021q add.
 	 */
-	err = br_switchdev_port_vlan_add(dev, vid, flags);
+	err = br_switchdev_port_vlan_add(dev, v->vid, flags, extack);
 	if (err == -EOPNOTSUPP)
-		return vlan_vid_add(dev, br->vlan_proto, vid);
+		return vlan_vid_add(dev, br->vlan_proto, v->vid);
+	v->priv_flags |= BR_VLFLAG_ADDED_BY_SWITCHDEV;
 	return err;
 }
 
@@ -121,25 +125,25 @@
 }
 
 static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br,
-			  u16 vid)
+			  const struct net_bridge_vlan *v)
 {
 	int err;
 
 	/* Try switchdev op first. In case it is not supported, fallback to
 	 * 8021q del.
 	 */
-	err = br_switchdev_port_vlan_del(dev, vid);
-	if (err == -EOPNOTSUPP) {
-		vlan_vid_del(dev, br->vlan_proto, vid);
-		return 0;
-	}
-	return err;
+	err = br_switchdev_port_vlan_del(dev, v->vid);
+	if (!(v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV))
+		vlan_vid_del(dev, br->vlan_proto, v->vid);
+	return err == -EOPNOTSUPP ? 0 : err;
 }
 
 /* Returns a master vlan, if it didn't exist it gets created. In all cases a
  * a reference is taken to the master vlan before returning.
  */
-static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid)
+static struct net_bridge_vlan *
+br_vlan_get_master(struct net_bridge *br, u16 vid,
+		   struct netlink_ext_ack *extack)
 {
 	struct net_bridge_vlan_group *vg;
 	struct net_bridge_vlan *masterv;
@@ -150,7 +154,7 @@
 		bool changed;
 
 		/* missing global ctx, create it now */
-		if (br_vlan_add(br, vid, 0, &changed))
+		if (br_vlan_add(br, vid, 0, &changed, extack))
 			return NULL;
 		masterv = br_vlan_find(vg, vid);
 		if (WARN_ON(!masterv))
@@ -190,6 +194,19 @@
 	}
 }
 
+static void nbp_vlan_rcu_free(struct rcu_head *rcu)
+{
+	struct net_bridge_vlan *v;
+
+	v = container_of(rcu, struct net_bridge_vlan, rcu);
+	WARN_ON(br_vlan_is_master(v));
+	/* if we had per-port stats configured then free them here */
+	if (v->priv_flags & BR_VLFLAG_PER_PORT_STATS)
+		free_percpu(v->stats);
+	v->stats = NULL;
+	kfree(v);
+}
+
 /* This is the shared VLAN add function which works for both ports and bridge
  * devices. There are four possible calls to this function in terms of the
  * vlan entry type:
@@ -201,7 +218,8 @@
  * 4. same as 3 but with both master and brentry flags set so the entry
  *    will be used for filtering in both the port and the bridge
  */
-static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
+static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
+		      struct netlink_ext_ack *extack)
 {
 	struct net_bridge_vlan *masterv = NULL;
 	struct net_bridge_port *p = NULL;
@@ -226,7 +244,7 @@
 		 * This ensures tagged traffic enters the bridge when
 		 * promiscuous mode is disabled by br_manage_promisc().
 		 */
-		err = __vlan_vid_add(dev, br, v->vid, flags);
+		err = __vlan_vid_add(dev, br, v, flags, extack);
 		if (err)
 			goto out;
 
@@ -236,18 +254,27 @@
 
 			err = br_vlan_add(br, v->vid,
 					  flags | BRIDGE_VLAN_INFO_BRENTRY,
-					  &changed);
+					  &changed, extack);
 			if (err)
 				goto out_filt;
 		}
 
-		masterv = br_vlan_get_master(br, v->vid);
+		masterv = br_vlan_get_master(br, v->vid, extack);
 		if (!masterv)
 			goto out_filt;
 		v->brvlan = masterv;
-		v->stats = masterv->stats;
+		if (br_opt_get(br, BROPT_VLAN_STATS_PER_PORT)) {
+			v->stats = netdev_alloc_pcpu_stats(struct br_vlan_stats);
+			if (!v->stats) {
+				err = -ENOMEM;
+				goto out_filt;
+			}
+			v->priv_flags |= BR_VLFLAG_PER_PORT_STATS;
+		} else {
+			v->stats = masterv->stats;
+		}
 	} else {
-		err = br_switchdev_port_vlan_add(dev, v->vid, flags);
+		err = br_switchdev_port_vlan_add(dev, v->vid, flags, extack);
 		if (err && err != -EOPNOTSUPP)
 			goto out;
 	}
@@ -269,6 +296,9 @@
 
 	__vlan_add_list(v);
 	__vlan_add_flags(v, flags);
+
+	if (p)
+		nbp_vlan_set_vlan_dev_state(p, v->vid);
 out:
 	return err;
 
@@ -280,8 +310,12 @@
 
 out_filt:
 	if (p) {
-		__vlan_vid_del(dev, br, v->vid);
+		__vlan_vid_del(dev, br, v);
 		if (masterv) {
+			if (v->stats && masterv->stats != v->stats)
+				free_percpu(v->stats);
+			v->stats = NULL;
+
 			br_vlan_put_master(masterv);
 			v->brvlan = NULL;
 		}
@@ -309,7 +343,7 @@
 
 	__vlan_delete_pvid(vg, v->vid);
 	if (p) {
-		err = __vlan_vid_del(p->dev, p->br, v->vid);
+		err = __vlan_vid_del(p->dev, p->br, v);
 		if (err)
 			goto out;
 	} else {
@@ -329,7 +363,8 @@
 		rhashtable_remove_fast(&vg->vlan_hash, &v->vnode,
 				       br_vlan_rht_params);
 		__vlan_del_list(v);
-		kfree_rcu(v, rcu);
+		nbp_vlan_set_vlan_dev_state(p, v->vid);
+		call_rcu(&v->rcu, nbp_vlan_rcu_free);
 	}
 
 	br_vlan_put_master(masterv);
@@ -386,7 +421,7 @@
 			return NULL;
 		}
 	}
-	if (br->vlan_stats_enabled) {
+	if (br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) {
 		stats = this_cpu_ptr(v->stats);
 		u64_stats_update_begin(&stats->syncp);
 		stats->tx_bytes += skb->len;
@@ -395,7 +430,7 @@
 	}
 
 	if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)
-		skb->vlan_tci = 0;
+		__vlan_hwaccel_clear_tag(skb);
 
 	if (p && (p->flags & BR_VLAN_TUNNEL) &&
 	    br_handle_egress_vlan_tunnel(skb, v)) {
@@ -468,21 +503,21 @@
 			__vlan_hwaccel_put_tag(skb, br->vlan_proto, pvid);
 		else
 			/* Priority-tagged Frame.
-			 * At this point, We know that skb->vlan_tci had
-			 * VLAN_TAG_PRESENT bit and its VID field was 0x000.
+			 * At this point, we know that skb->vlan_tci VID
+			 * field was 0.
 			 * We update only VID field and preserve PCP field.
 			 */
 			skb->vlan_tci |= pvid;
 
 		/* if stats are disabled we can avoid the lookup */
-		if (!br->vlan_stats_enabled)
+		if (!br_opt_get(br, BROPT_VLAN_STATS_ENABLED))
 			return true;
 	}
 	v = br_vlan_find(vg, *vid);
 	if (!v || !br_vlan_should_use(v))
 		goto drop;
 
-	if (br->vlan_stats_enabled) {
+	if (br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) {
 		stats = this_cpu_ptr(v->stats);
 		u64_stats_update_begin(&stats->syncp);
 		stats->rx_bytes += skb->len;
@@ -504,7 +539,7 @@
 	/* If VLAN filtering is disabled on the bridge, all packets are
 	 * permitted.
 	 */
-	if (!br->vlan_enabled) {
+	if (!br_opt_get(br, BROPT_VLAN_ENABLED)) {
 		BR_INPUT_SKB_CB(skb)->vlan_filtered = false;
 		return true;
 	}
@@ -538,7 +573,7 @@
 	struct net_bridge *br = p->br;
 
 	/* If filtering was disabled at input, let it pass. */
-	if (!br->vlan_enabled)
+	if (!br_opt_get(br, BROPT_VLAN_ENABLED))
 		return true;
 
 	vg = nbp_vlan_group_rcu(p);
@@ -565,11 +600,12 @@
 static int br_vlan_add_existing(struct net_bridge *br,
 				struct net_bridge_vlan_group *vg,
 				struct net_bridge_vlan *vlan,
-				u16 flags, bool *changed)
+				u16 flags, bool *changed,
+				struct netlink_ext_ack *extack)
 {
 	int err;
 
-	err = br_switchdev_port_vlan_add(br->dev, vlan->vid, flags);
+	err = br_switchdev_port_vlan_add(br->dev, vlan->vid, flags, extack);
 	if (err && err != -EOPNOTSUPP)
 		return err;
 
@@ -608,7 +644,8 @@
  * Must be called with vid in range from 1 to 4094 inclusive.
  * changed must be true only if the vlan was created or updated
  */
-int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed)
+int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed,
+		struct netlink_ext_ack *extack)
 {
 	struct net_bridge_vlan_group *vg;
 	struct net_bridge_vlan *vlan;
@@ -620,7 +657,8 @@
 	vg = br_vlan_group(br);
 	vlan = br_vlan_find(vg, vid);
 	if (vlan)
-		return br_vlan_add_existing(br, vg, vlan, flags, changed);
+		return br_vlan_add_existing(br, vg, vlan, flags, changed,
+					    extack);
 
 	vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
 	if (!vlan)
@@ -637,7 +675,7 @@
 	vlan->br = br;
 	if (flags & BRIDGE_VLAN_INFO_BRENTRY)
 		refcount_set(&vlan->refcnt, 1);
-	ret = __vlan_add(vlan, flags);
+	ret = __vlan_add(vlan, flags, extack);
 	if (ret) {
 		free_percpu(vlan->stats);
 		kfree(vlan);
@@ -695,11 +733,12 @@
 /* Must be protected by RTNL. */
 static void recalculate_group_addr(struct net_bridge *br)
 {
-	if (br->group_addr_set)
+	if (br_opt_get(br, BROPT_GROUP_ADDR_SET))
 		return;
 
 	spin_lock_bh(&br->lock);
-	if (!br->vlan_enabled || br->vlan_proto == htons(ETH_P_8021Q)) {
+	if (!br_opt_get(br, BROPT_VLAN_ENABLED) ||
+	    br->vlan_proto == htons(ETH_P_8021Q)) {
 		/* Bridge Group Address */
 		br->group_addr[5] = 0x00;
 	} else { /* vlan_enabled && ETH_P_8021AD */
@@ -712,7 +751,8 @@
 /* Must be protected by RTNL. */
 void br_recalculate_fwd_mask(struct net_bridge *br)
 {
-	if (!br->vlan_enabled || br->vlan_proto == htons(ETH_P_8021Q))
+	if (!br_opt_get(br, BROPT_VLAN_ENABLED) ||
+	    br->vlan_proto == htons(ETH_P_8021Q))
 		br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT;
 	else /* vlan_enabled && ETH_P_8021AD */
 		br->group_fwd_mask_required = BR_GROUPFWD_8021AD &
@@ -729,14 +769,14 @@
 	};
 	int err;
 
-	if (br->vlan_enabled == val)
+	if (br_opt_get(br, BROPT_VLAN_ENABLED) == !!val)
 		return 0;
 
 	err = switchdev_port_attr_set(br->dev, &attr);
 	if (err && err != -EOPNOTSUPP)
 		return err;
 
-	br->vlan_enabled = val;
+	br_opt_toggle(br, BROPT_VLAN_ENABLED, !!val);
 	br_manage_promisc(br);
 	recalculate_group_addr(br);
 	br_recalculate_fwd_mask(br);
@@ -753,10 +793,20 @@
 {
 	struct net_bridge *br = netdev_priv(dev);
 
-	return !!br->vlan_enabled;
+	return br_opt_get(br, BROPT_VLAN_ENABLED);
 }
 EXPORT_SYMBOL_GPL(br_vlan_enabled);
 
+int br_vlan_get_proto(const struct net_device *dev, u16 *p_proto)
+{
+	struct net_bridge *br = netdev_priv(dev);
+
+	*p_proto = ntohs(br->vlan_proto);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(br_vlan_get_proto);
+
 int __br_vlan_set_proto(struct net_bridge *br, __be16 proto)
 {
 	int err = 0;
@@ -819,7 +869,31 @@
 	switch (val) {
 	case 0:
 	case 1:
-		br->vlan_stats_enabled = val;
+		br_opt_toggle(br, BROPT_VLAN_STATS_ENABLED, !!val);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int br_vlan_set_stats_per_port(struct net_bridge *br, unsigned long val)
+{
+	struct net_bridge_port *p;
+
+	/* allow to change the option if there are no port vlans configured */
+	list_for_each_entry(p, &br->port_list, list) {
+		struct net_bridge_vlan_group *vg = nbp_vlan_group(p);
+
+		if (vg->num_vlans)
+			return -EBUSY;
+	}
+
+	switch (val) {
+	case 0:
+	case 1:
+		br_opt_toggle(br, BROPT_VLAN_STATS_PER_PORT, !!val);
 		break;
 	default:
 		return -EINVAL;
@@ -862,7 +936,8 @@
 	br->default_pvid = 0;
 }
 
-int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
+int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid,
+			       struct netlink_ext_ack *extack)
 {
 	const struct net_bridge_vlan *pvent;
 	struct net_bridge_vlan_group *vg;
@@ -877,8 +952,7 @@
 		return 0;
 	}
 
-	changed = kcalloc(BITS_TO_LONGS(BR_MAX_PORTS), sizeof(unsigned long),
-			  GFP_KERNEL);
+	changed = bitmap_zalloc(BR_MAX_PORTS, GFP_KERNEL);
 	if (!changed)
 		return -ENOMEM;
 
@@ -895,7 +969,7 @@
 				  BRIDGE_VLAN_INFO_PVID |
 				  BRIDGE_VLAN_INFO_UNTAGGED |
 				  BRIDGE_VLAN_INFO_BRENTRY,
-				  &vlchange);
+				  &vlchange, extack);
 		if (err)
 			goto out;
 		br_vlan_delete(br, old_pvid);
@@ -915,7 +989,7 @@
 		err = nbp_vlan_add(p, pvid,
 				   BRIDGE_VLAN_INFO_PVID |
 				   BRIDGE_VLAN_INFO_UNTAGGED,
-				   &vlchange);
+				   &vlchange, extack);
 		if (err)
 			goto err_port;
 		nbp_vlan_delete(p, old_pvid);
@@ -925,7 +999,7 @@
 	br->default_pvid = pvid;
 
 out:
-	kfree(changed);
+	bitmap_free(changed);
 	return err;
 
 err_port:
@@ -937,7 +1011,7 @@
 			nbp_vlan_add(p, old_pvid,
 				     BRIDGE_VLAN_INFO_PVID |
 				     BRIDGE_VLAN_INFO_UNTAGGED,
-				     &vlchange);
+				     &vlchange, NULL);
 		nbp_vlan_delete(p, pvid);
 	}
 
@@ -947,7 +1021,7 @@
 				    BRIDGE_VLAN_INFO_PVID |
 				    BRIDGE_VLAN_INFO_UNTAGGED |
 				    BRIDGE_VLAN_INFO_BRENTRY,
-				    &vlchange);
+				    &vlchange, NULL);
 		br_vlan_delete(br, pvid);
 	}
 	goto out;
@@ -965,12 +1039,12 @@
 		goto out;
 
 	/* Only allow default pvid change when filtering is disabled */
-	if (br->vlan_enabled) {
+	if (br_opt_get(br, BROPT_VLAN_ENABLED)) {
 		pr_info_once("Please disable vlan filtering to change default_pvid\n");
 		err = -EPERM;
 		goto out;
 	}
-	err = __br_vlan_set_default_pvid(br, pvid);
+	err = __br_vlan_set_default_pvid(br, pvid, NULL);
 out:
 	return err;
 }
@@ -979,7 +1053,6 @@
 {
 	struct net_bridge_vlan_group *vg;
 	int ret = -ENOMEM;
-	bool changed;
 
 	vg = kzalloc(sizeof(*vg), GFP_KERNEL);
 	if (!vg)
@@ -994,17 +1067,10 @@
 	br->vlan_proto = htons(ETH_P_8021Q);
 	br->default_pvid = 1;
 	rcu_assign_pointer(br->vlgrp, vg);
-	ret = br_vlan_add(br, 1,
-			  BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED |
-			  BRIDGE_VLAN_INFO_BRENTRY, &changed);
-	if (ret)
-		goto err_vlan_add;
 
 out:
 	return ret;
 
-err_vlan_add:
-	vlan_tunnel_deinit(vg);
 err_tunnel_init:
 	rhashtable_destroy(&vg->vlan_hash);
 err_rhtbl:
@@ -1013,13 +1079,13 @@
 	goto out;
 }
 
-int nbp_vlan_init(struct net_bridge_port *p)
+int nbp_vlan_init(struct net_bridge_port *p, struct netlink_ext_ack *extack)
 {
 	struct switchdev_attr attr = {
 		.orig_dev = p->br->dev,
 		.id = SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING,
 		.flags = SWITCHDEV_F_SKIP_EOPNOTSUPP,
-		.u.vlan_filtering = p->br->vlan_enabled,
+		.u.vlan_filtering = br_opt_get(p->br, BROPT_VLAN_ENABLED),
 	};
 	struct net_bridge_vlan_group *vg;
 	int ret = -ENOMEM;
@@ -1046,7 +1112,7 @@
 		ret = nbp_vlan_add(p, p->br->default_pvid,
 				   BRIDGE_VLAN_INFO_PVID |
 				   BRIDGE_VLAN_INFO_UNTAGGED,
-				   &changed);
+				   &changed, extack);
 		if (ret)
 			goto err_vlan_add;
 	}
@@ -1071,7 +1137,7 @@
  * changed must be true only if the vlan was created or updated
  */
 int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
-		 bool *changed)
+		 bool *changed, struct netlink_ext_ack *extack)
 {
 	struct net_bridge_vlan *vlan;
 	int ret;
@@ -1082,7 +1148,7 @@
 	vlan = br_vlan_find(nbp_vlan_group(port), vid);
 	if (vlan) {
 		/* Pass the flags to the hardware bridge */
-		ret = br_switchdev_port_vlan_add(port->dev, vid, flags);
+		ret = br_switchdev_port_vlan_add(port->dev, vid, flags, extack);
 		if (ret && ret != -EOPNOTSUPP)
 			return ret;
 		*changed = __vlan_add_flags(vlan, flags);
@@ -1096,7 +1162,7 @@
 
 	vlan->vid = vid;
 	vlan->port = port;
-	ret = __vlan_add(vlan, flags);
+	ret = __vlan_add(vlan, flags, extack);
 	if (ret)
 		kfree(vlan);
 	else
@@ -1163,12 +1229,14 @@
 	}
 }
 
-int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid)
+static int __br_vlan_get_pvid(const struct net_device *dev,
+			      struct net_bridge_port *p, u16 *p_pvid)
 {
 	struct net_bridge_vlan_group *vg;
 
-	ASSERT_RTNL();
-	if (netif_is_bridge_master(dev))
+	if (p)
+		vg = nbp_vlan_group(p);
+	else if (netif_is_bridge_master(dev))
 		vg = br_vlan_group(netdev_priv(dev));
 	else
 		return -EINVAL;
@@ -1176,8 +1244,21 @@
 	*p_pvid = br_get_pvid(vg);
 	return 0;
 }
+
+int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid)
+{
+	ASSERT_RTNL();
+
+	return __br_vlan_get_pvid(dev, br_port_get_check_rtnl(dev), p_pvid);
+}
 EXPORT_SYMBOL_GPL(br_vlan_get_pvid);
 
+int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid)
+{
+	return __br_vlan_get_pvid(dev, br_port_get_check_rcu(dev), p_pvid);
+}
+EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu);
+
 int br_vlan_get_info(const struct net_device *dev, u16 vid,
 		     struct bridge_vlan_info *p_vinfo)
 {
@@ -1200,6 +1281,227 @@
 
 	p_vinfo->vid = vid;
 	p_vinfo->flags = v->flags;
+	if (vid == br_get_pvid(vg))
+		p_vinfo->flags |= BRIDGE_VLAN_INFO_PVID;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(br_vlan_get_info);
+
+static int br_vlan_is_bind_vlan_dev(const struct net_device *dev)
+{
+	return is_vlan_dev(dev) &&
+		!!(vlan_dev_priv(dev)->flags & VLAN_FLAG_BRIDGE_BINDING);
+}
+
+static int br_vlan_is_bind_vlan_dev_fn(struct net_device *dev,
+				       __always_unused void *data)
+{
+	return br_vlan_is_bind_vlan_dev(dev);
+}
+
+static bool br_vlan_has_upper_bind_vlan_dev(struct net_device *dev)
+{
+	int found;
+
+	rcu_read_lock();
+	found = netdev_walk_all_upper_dev_rcu(dev, br_vlan_is_bind_vlan_dev_fn,
+					      NULL);
+	rcu_read_unlock();
+
+	return !!found;
+}
+
+struct br_vlan_bind_walk_data {
+	u16 vid;
+	struct net_device *result;
+};
+
+static int br_vlan_match_bind_vlan_dev_fn(struct net_device *dev,
+					  void *data_in)
+{
+	struct br_vlan_bind_walk_data *data = data_in;
+	int found = 0;
+
+	if (br_vlan_is_bind_vlan_dev(dev) &&
+	    vlan_dev_priv(dev)->vlan_id == data->vid) {
+		data->result = dev;
+		found = 1;
+	}
+
+	return found;
+}
+
+static struct net_device *
+br_vlan_get_upper_bind_vlan_dev(struct net_device *dev, u16 vid)
+{
+	struct br_vlan_bind_walk_data data = {
+		.vid = vid,
+	};
+
+	rcu_read_lock();
+	netdev_walk_all_upper_dev_rcu(dev, br_vlan_match_bind_vlan_dev_fn,
+				      &data);
+	rcu_read_unlock();
+
+	return data.result;
+}
+
+static bool br_vlan_is_dev_up(const struct net_device *dev)
+{
+	return  !!(dev->flags & IFF_UP) && netif_oper_up(dev);
+}
+
+static void br_vlan_set_vlan_dev_state(const struct net_bridge *br,
+				       struct net_device *vlan_dev)
+{
+	u16 vid = vlan_dev_priv(vlan_dev)->vlan_id;
+	struct net_bridge_vlan_group *vg;
+	struct net_bridge_port *p;
+	bool has_carrier = false;
+
+	if (!netif_carrier_ok(br->dev)) {
+		netif_carrier_off(vlan_dev);
+		return;
+	}
+
+	list_for_each_entry(p, &br->port_list, list) {
+		vg = nbp_vlan_group(p);
+		if (br_vlan_find(vg, vid) && br_vlan_is_dev_up(p->dev)) {
+			has_carrier = true;
+			break;
+		}
+	}
+
+	if (has_carrier)
+		netif_carrier_on(vlan_dev);
+	else
+		netif_carrier_off(vlan_dev);
+}
+
+static void br_vlan_set_all_vlan_dev_state(struct net_bridge_port *p)
+{
+	struct net_bridge_vlan_group *vg = nbp_vlan_group(p);
+	struct net_bridge_vlan *vlan;
+	struct net_device *vlan_dev;
+
+	list_for_each_entry(vlan, &vg->vlan_list, vlist) {
+		vlan_dev = br_vlan_get_upper_bind_vlan_dev(p->br->dev,
+							   vlan->vid);
+		if (vlan_dev) {
+			if (br_vlan_is_dev_up(p->dev)) {
+				if (netif_carrier_ok(p->br->dev))
+					netif_carrier_on(vlan_dev);
+			} else {
+				br_vlan_set_vlan_dev_state(p->br, vlan_dev);
+			}
+		}
+	}
+}
+
+static void br_vlan_upper_change(struct net_device *dev,
+				 struct net_device *upper_dev,
+				 bool linking)
+{
+	struct net_bridge *br = netdev_priv(dev);
+
+	if (!br_vlan_is_bind_vlan_dev(upper_dev))
+		return;
+
+	if (linking) {
+		br_vlan_set_vlan_dev_state(br, upper_dev);
+		br_opt_toggle(br, BROPT_VLAN_BRIDGE_BINDING, true);
+	} else {
+		br_opt_toggle(br, BROPT_VLAN_BRIDGE_BINDING,
+			      br_vlan_has_upper_bind_vlan_dev(dev));
+	}
+}
+
+struct br_vlan_link_state_walk_data {
+	struct net_bridge *br;
+};
+
+static int br_vlan_link_state_change_fn(struct net_device *vlan_dev,
+					void *data_in)
+{
+	struct br_vlan_link_state_walk_data *data = data_in;
+
+	if (br_vlan_is_bind_vlan_dev(vlan_dev))
+		br_vlan_set_vlan_dev_state(data->br, vlan_dev);
+
+	return 0;
+}
+
+static void br_vlan_link_state_change(struct net_device *dev,
+				      struct net_bridge *br)
+{
+	struct br_vlan_link_state_walk_data data = {
+		.br = br
+	};
+
+	rcu_read_lock();
+	netdev_walk_all_upper_dev_rcu(dev, br_vlan_link_state_change_fn,
+				      &data);
+	rcu_read_unlock();
+}
+
+/* Must be protected by RTNL. */
+static void nbp_vlan_set_vlan_dev_state(struct net_bridge_port *p, u16 vid)
+{
+	struct net_device *vlan_dev;
+
+	if (!br_opt_get(p->br, BROPT_VLAN_BRIDGE_BINDING))
+		return;
+
+	vlan_dev = br_vlan_get_upper_bind_vlan_dev(p->br->dev, vid);
+	if (vlan_dev)
+		br_vlan_set_vlan_dev_state(p->br, vlan_dev);
+}
+
+/* Must be protected by RTNL. */
+int br_vlan_bridge_event(struct net_device *dev, unsigned long event, void *ptr)
+{
+	struct netdev_notifier_changeupper_info *info;
+	struct net_bridge *br = netdev_priv(dev);
+	bool changed;
+	int ret = 0;
+
+	switch (event) {
+	case NETDEV_REGISTER:
+		ret = br_vlan_add(br, br->default_pvid,
+				  BRIDGE_VLAN_INFO_PVID |
+				  BRIDGE_VLAN_INFO_UNTAGGED |
+				  BRIDGE_VLAN_INFO_BRENTRY, &changed, NULL);
+		break;
+	case NETDEV_UNREGISTER:
+		br_vlan_delete(br, br->default_pvid);
+		break;
+	case NETDEV_CHANGEUPPER:
+		info = ptr;
+		br_vlan_upper_change(dev, info->upper_dev, info->linking);
+		break;
+
+	case NETDEV_CHANGE:
+	case NETDEV_UP:
+		if (!br_opt_get(br, BROPT_VLAN_BRIDGE_BINDING))
+			break;
+		br_vlan_link_state_change(dev, br);
+		break;
+	}
+
+	return ret;
+}
+
+/* Must be protected by RTNL. */
+void br_vlan_port_event(struct net_bridge_port *p, unsigned long event)
+{
+	if (!br_opt_get(p->br, BROPT_VLAN_BRIDGE_BINDING))
+		return;
+
+	switch (event) {
+	case NETDEV_CHANGE:
+	case NETDEV_DOWN:
+	case NETDEV_UP:
+		br_vlan_set_all_vlan_dev_state(p);
+		break;
+	}
+}

diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c
index 6d2c4ee..d13d208 100644
--- a/net/bridge/br_vlan_tunnel.c
+++ b/net/bridge/br_vlan_tunnel.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Bridge per vlan tunnel port dst_metadata handling code
  *
  *	Authors:
  *	Roopa Prabhu		<roopa@cumulusnetworks.com>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/kernel.h>
@@ -34,7 +30,6 @@
 	.key_offset = offsetof(struct net_bridge_vlan, tinfo.tunnel_id),
 	.key_len = sizeof(__be64),
 	.nelem_hint = 3,
-	.locks_mul = 1,
 	.obj_cmpfn = br_vlan_tunid_cmp,
 	.automatic_shrinking = true,
 };

diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 9a0159a..5040fe4 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Bridge netfilter configuration
 #
@@ -5,9 +6,15 @@
 menuconfig NF_TABLES_BRIDGE
 	depends on BRIDGE && NETFILTER && NF_TABLES
 	select NETFILTER_FAMILY_BRIDGE
-	bool "Ethernet Bridge nf_tables support"
+	tristate "Ethernet Bridge nf_tables support"
 
 if NF_TABLES_BRIDGE
+
+config NFT_BRIDGE_META
+	tristate "Netfilter nf_table bridge meta support"
+	help
+	  Add support for bridge dedicated meta key.
+
 config NFT_BRIDGE_REJECT
 	tristate "Netfilter nf_tables bridge reject support"
 	depends on NFT_REJECT && NFT_REJECT_IPV4 && NFT_REJECT_IPV6
@@ -20,6 +27,20 @@
 
 endif # NF_TABLES_BRIDGE
 
+config NF_CONNTRACK_BRIDGE
+	tristate "IPv4/IPV6 bridge connection tracking support"
+	depends on NF_CONNTRACK
+	default n
+	help
+	  Connection tracking keeps a record of what packets have passed
+	  through your machine, in order to figure out how they are related
+	  into connections. This is used to enhance packet filtering via
+	  stateful policies. Enable this if you want native tracking from
+	  the bridge. This provides a replacement for the `br_netfilter'
+	  infrastructure.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 menuconfig BRIDGE_NF_EBTABLES
 	tristate "Ethernet Bridge tables (ebtables) support"
 	depends on BRIDGE && NETFILTER && NETFILTER_XTABLES
@@ -113,7 +134,7 @@
 	  equivalent of the iptables limit match.
 
 	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
+	  <file:Documentation/kbuild/modules.rst>.  If unsure, say `N'.
 
 config BRIDGE_EBT_MARK
 	tristate "ebt: mark filter support"

diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index 9b86886..8e2c575 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile

@@ -3,8 +3,12 @@
 # Makefile for the netfilter modules for Link Layer filtering on a bridge.
 #
 
+obj-$(CONFIG_NFT_BRIDGE_META)  += nft_meta_bridge.o
 obj-$(CONFIG_NFT_BRIDGE_REJECT)  += nft_reject_bridge.o
 
+# connection tracking
+obj-$(CONFIG_NF_CONNTRACK_BRIDGE) += nf_conntrack_bridge.o
+
 # packet logging
 obj-$(CONFIG_NF_LOG_BRIDGE) += nf_log_bridge.o
 

diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index 5fc4aff..68c2519 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * 802_3
  *
@@ -10,7 +11,13 @@
 #include <linux/module.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_bridge/ebtables.h>
-#include <linux/netfilter_bridge/ebt_802_3.h>
+#include <linux/skbuff.h>
+#include <uapi/linux/netfilter_bridge/ebt_802_3.h>
+
+static struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb)
+{
+	return (struct ebt_802_3_hdr *)skb_mac_header(skb);
+}
 
 static bool
 ebt_802_3_mt(const struct sk_buff *skb, struct xt_action_param *par)

diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index 620e54f..96f7243 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  ebt_among
  *

diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index 2271422..0707cc0 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  ebt_arp
  *

diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index db85230..d9e77e2 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  ebt_arpreply
  *

diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index dfc86a0..12a4f4d 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  ebt_dnat
  *
@@ -19,9 +20,8 @@
 ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_nat_info *info = par->targinfo;
-	struct net_device *dev;
 
-	if (!skb_make_writable(skb, 0))
+	if (skb_ensure_writable(skb, ETH_ALEN))
 		return EBT_DROP;
 
 	ether_addr_copy(eth_hdr(skb)->h_dest, info->mac);
@@ -32,10 +32,22 @@
 		else
 			skb->pkt_type = PACKET_MULTICAST;
 	} else {
-		if (xt_hooknum(par) != NF_BR_BROUTING)
-			dev = br_port_get_rcu(xt_in(par))->br->dev;
-		else
+		const struct net_device *dev;
+
+		switch (xt_hooknum(par)) {
+		case NF_BR_BROUTING:
 			dev = xt_in(par);
+			break;
+		case NF_BR_PRE_ROUTING:
+			dev = br_port_get_rcu(xt_in(par))->br->dev;
+			break;
+		default:
+			dev = NULL;
+			break;
+		}
+
+		if (!dev) /* NF_BR_LOCAL_OUT */
+			return info->target;
 
 		if (ether_addr_equal(info->mac, dev->dev_addr))
 			skb->pkt_type = PACKET_HOST;

diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index ffaa8ce..df37249 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  ebt_ip
  *

diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 2a5a52a..f3225bc 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  ebt_ip6
  *

diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 165b9d6..fa19955 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  ebt_limit
  *

diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 707caea..e2eea1d 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  ebt_log
  *

diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 19f0f95..21fd3d3 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  ebt_mark
  *

diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index d98baef..81fb59d 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  ebt_mark_m
  *

diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index da1c2fd..61bf8f4 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * ebt_nflog
  *

diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
index 496a565..c9e3061 100644
--- a/net/bridge/netfilter/ebt_pkttype.c
+++ b/net/bridge/netfilter/ebt_pkttype.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  ebt_pkttype
  *

diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index a7223ea..0cad62a 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  ebt_redirect
  *
@@ -20,7 +21,7 @@
 {
 	const struct ebt_redirect_info *info = par->targinfo;
 
-	if (!skb_make_writable(skb, 0))
+	if (skb_ensure_writable(skb, ETH_ALEN))
 		return EBT_DROP;
 
 	if (xt_hooknum(par) != NF_BR_BROUTING)

diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index 11cf9e9..27443bf 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  ebt_snat
  *
@@ -21,7 +22,7 @@
 {
 	const struct ebt_nat_info *info = par->targinfo;
 
-	if (!skb_make_writable(skb, 0))
+	if (skb_ensure_writable(skb, ETH_ALEN * 2))
 		return EBT_DROP;
 
 	ether_addr_copy(eth_hdr(skb)->h_source, info->mac);

diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 46c1fe7..0d6d20c 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  ebt_stp
  *

diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index 98c221d..80ede37 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c

@@ -1,20 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Description: EBTables 802.1Q match extension kernelspace module.
  * Authors: Nick Fedchik <nick@fedchik.org.ua>
  *          Bart De Schuymer <bdschuym@pandora.be>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/if_ether.h>

diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 276b602..66e7af1 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  ebtable_broute
  *
@@ -15,6 +16,8 @@
 #include <linux/module.h>
 #include <linux/if_bridge.h>
 
+#include "../br_private.h"
+
 /* EBT_ACCEPT means the frame will be bridged
  * EBT_DROP means the frame will be routed
  */
@@ -48,30 +51,63 @@
 	.me		= THIS_MODULE,
 };
 
-static int ebt_broute(struct sk_buff *skb)
+static unsigned int ebt_broute(void *priv, struct sk_buff *skb,
+			       const struct nf_hook_state *s)
 {
+	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
 	struct nf_hook_state state;
+	unsigned char *dest;
 	int ret;
 
+	if (!p || p->state != BR_STATE_FORWARDING)
+		return NF_ACCEPT;
+
 	nf_hook_state_init(&state, NF_BR_BROUTING,
-			   NFPROTO_BRIDGE, skb->dev, NULL, NULL,
-			   dev_net(skb->dev), NULL);
+			   NFPROTO_BRIDGE, s->in, NULL, NULL,
+			   s->net, NULL);
 
 	ret = ebt_do_table(skb, &state, state.net->xt.broute_table);
-	if (ret == NF_DROP)
-		return 1; /* route it */
-	return 0; /* bridge it */
+
+	if (ret != NF_DROP)
+		return ret;
+
+	/* DROP in ebtables -t broute means that the
+	 * skb should be routed, not bridged.
+	 * This is awkward, but can't be changed for compatibility
+	 * reasons.
+	 *
+	 * We map DROP to ACCEPT and set the ->br_netfilter_broute flag.
+	 */
+	BR_INPUT_SKB_CB(skb)->br_netfilter_broute = 1;
+
+	/* undo PACKET_HOST mangling done in br_input in case the dst
+	 * address matches the logical bridge but not the port.
+	 */
+	dest = eth_hdr(skb)->h_dest;
+	if (skb->pkt_type == PACKET_HOST &&
+	    !ether_addr_equal(skb->dev->dev_addr, dest) &&
+	     ether_addr_equal(p->br->dev->dev_addr, dest))
+		skb->pkt_type = PACKET_OTHERHOST;
+
+	return NF_ACCEPT;
 }
 
+static const struct nf_hook_ops ebt_ops_broute = {
+	.hook		= ebt_broute,
+	.pf		= NFPROTO_BRIDGE,
+	.hooknum	= NF_BR_PRE_ROUTING,
+	.priority	= NF_BR_PRI_FIRST,
+};
+
 static int __net_init broute_net_init(struct net *net)
 {
-	return ebt_register_table(net, &broute_table, NULL,
+	return ebt_register_table(net, &broute_table, &ebt_ops_broute,
 				  &net->xt.broute_table);
 }
 
 static void __net_exit broute_net_exit(struct net *net)
 {
-	ebt_unregister_table(net, net->xt.broute_table, NULL);
+	ebt_unregister_table(net, net->xt.broute_table, &ebt_ops_broute);
 }
 
 static struct pernet_operations broute_net_ops = {
@@ -81,21 +117,11 @@
 
 static int __init ebtable_broute_init(void)
 {
-	int ret;
-
-	ret = register_pernet_subsys(&broute_net_ops);
-	if (ret < 0)
-		return ret;
-	/* see br_input.c */
-	RCU_INIT_POINTER(br_should_route_hook,
-			   (br_should_route_hook_t *)ebt_broute);
-	return 0;
+	return register_pernet_subsys(&broute_net_ops);
 }
 
 static void __exit ebtable_broute_fini(void)
 {
-	RCU_INIT_POINTER(br_should_route_hook, NULL);
-	synchronize_net();
 	unregister_pernet_subsys(&broute_net_ops);
 }
 

diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 550324c..78cb9b2 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  ebtable_filter
  *

diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index c0fb3ca..0888936 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  ebtable_nat
  *

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 4918287..4096d8a 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  ebtables
  *
@@ -8,11 +9,6 @@
  *
  *  This code is strongly inspired by the iptables code which is
  *  Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/kmod.h>
@@ -31,10 +27,6 @@
 /* needed for logical [in,out]-dev filtering */
 #include "../br_private.h"
 
-#define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\
-					 "report to author: "format, ## args)
-/* #define BUGPRINT(format, args...) */
-
 /* Each cpu has its own set of counters, so there is no need for write_lock in
  * the softirq
  * For reading or updating the counters, the user context needs to
@@ -229,7 +221,7 @@
 			return NF_DROP;
 		}
 
-		ADD_COUNTER(*(counter_base + i), 1, skb->len);
+		ADD_COUNTER(*(counter_base + i), skb->len, 1);
 
 		/* these should only watch: not modify, nor tell us
 		 * what to do with the packet
@@ -385,7 +377,7 @@
 	par->match     = match;
 	par->matchinfo = m->data;
 	ret = xt_check_match(par, m->match_size,
-	      e->ethproto, e->invflags & EBT_IPROTO);
+	      ntohs(e->ethproto), e->invflags & EBT_IPROTO);
 	if (ret < 0) {
 		module_put(match->me);
 		return ret;
@@ -422,7 +414,7 @@
 	par->target   = watcher;
 	par->targinfo = w->data;
 	ret = xt_check_target(par, w->watcher_size,
-	      e->ethproto, e->invflags & EBT_IPROTO);
+	      ntohs(e->ethproto), e->invflags & EBT_IPROTO);
 	if (ret < 0) {
 		module_put(watcher->me);
 		return ret;
@@ -466,8 +458,6 @@
 				/* we make userspace set this right,
 				 * so there is no misunderstanding
 				 */
-				BUGPRINT("EBT_ENTRY_OR_ENTRIES shouldn't be set "
-					 "in distinguisher\n");
 				return -EINVAL;
 			}
 			if (i != NF_BR_NUMHOOKS)
@@ -485,18 +475,14 @@
 			offset += e->next_offset;
 		}
 	}
-	if (offset != limit) {
-		BUGPRINT("entries_size too small\n");
+	if (offset != limit)
 		return -EINVAL;
-	}
 
 	/* check if all valid hooks have a chain */
 	for (i = 0; i < NF_BR_NUMHOOKS; i++) {
 		if (!newinfo->hook_entry[i] &&
-		   (valid_hooks & (1 << i))) {
-			BUGPRINT("Valid hook without chain\n");
+		   (valid_hooks & (1 << i)))
 			return -EINVAL;
-		}
 	}
 	return 0;
 }
@@ -523,26 +509,20 @@
 		/* this checks if the previous chain has as many entries
 		 * as it said it has
 		 */
-		if (*n != *cnt) {
-			BUGPRINT("nentries does not equal the nr of entries "
-				 "in the chain\n");
+		if (*n != *cnt)
 			return -EINVAL;
-		}
+
 		if (((struct ebt_entries *)e)->policy != EBT_DROP &&
 		   ((struct ebt_entries *)e)->policy != EBT_ACCEPT) {
 			/* only RETURN from udc */
 			if (i != NF_BR_NUMHOOKS ||
-			   ((struct ebt_entries *)e)->policy != EBT_RETURN) {
-				BUGPRINT("bad policy\n");
+			   ((struct ebt_entries *)e)->policy != EBT_RETURN)
 				return -EINVAL;
-			}
 		}
 		if (i == NF_BR_NUMHOOKS) /* it's a user defined chain */
 			(*udc_cnt)++;
-		if (((struct ebt_entries *)e)->counter_offset != *totalcnt) {
-			BUGPRINT("counter_offset != totalcnt");
+		if (((struct ebt_entries *)e)->counter_offset != *totalcnt)
 			return -EINVAL;
-		}
 		*n = ((struct ebt_entries *)e)->nentries;
 		*cnt = 0;
 		return 0;
@@ -550,15 +530,13 @@
 	/* a plain old entry, heh */
 	if (sizeof(struct ebt_entry) > e->watchers_offset ||
 	   e->watchers_offset > e->target_offset ||
-	   e->target_offset >= e->next_offset) {
-		BUGPRINT("entry offsets not in right order\n");
+	   e->target_offset >= e->next_offset)
 		return -EINVAL;
-	}
+
 	/* this is not checked anywhere else */
-	if (e->next_offset - e->target_offset < sizeof(struct ebt_entry_target)) {
-		BUGPRINT("target size too small\n");
+	if (e->next_offset - e->target_offset < sizeof(struct ebt_entry_target))
 		return -EINVAL;
-	}
+
 	(*cnt)++;
 	(*totalcnt)++;
 	return 0;
@@ -678,18 +656,15 @@
 	if (e->bitmask == 0)
 		return 0;
 
-	if (e->bitmask & ~EBT_F_MASK) {
-		BUGPRINT("Unknown flag for bitmask\n");
+	if (e->bitmask & ~EBT_F_MASK)
 		return -EINVAL;
-	}
-	if (e->invflags & ~EBT_INV_MASK) {
-		BUGPRINT("Unknown flag for inv bitmask\n");
+
+	if (e->invflags & ~EBT_INV_MASK)
 		return -EINVAL;
-	}
-	if ((e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3)) {
-		BUGPRINT("NOPROTO & 802_3 not allowed\n");
+
+	if ((e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3))
 		return -EINVAL;
-	}
+
 	/* what hook do we belong to? */
 	for (i = 0; i < NF_BR_NUMHOOKS; i++) {
 		if (!newinfo->hook_entry[i])
@@ -748,13 +723,11 @@
 	t->u.target = target;
 	if (t->u.target == &ebt_standard_target) {
 		if (gap < sizeof(struct ebt_standard_target)) {
-			BUGPRINT("Standard target size too big\n");
 			ret = -EFAULT;
 			goto cleanup_watchers;
 		}
 		if (((struct ebt_standard_target *)t)->verdict <
 		   -NUM_STANDARD_TARGETS) {
-			BUGPRINT("Invalid standard target\n");
 			ret = -EFAULT;
 			goto cleanup_watchers;
 		}
@@ -767,7 +740,7 @@
 	tgpar.target   = target;
 	tgpar.targinfo = t->data;
 	ret = xt_check_target(&tgpar, t->target_size,
-	      e->ethproto, e->invflags & EBT_IPROTO);
+	      ntohs(e->ethproto), e->invflags & EBT_IPROTO);
 	if (ret < 0) {
 		module_put(target->me);
 		goto cleanup_watchers;
@@ -813,10 +786,9 @@
 		if (strcmp(t->u.name, EBT_STANDARD_TARGET))
 			goto letscontinue;
 		if (e->target_offset + sizeof(struct ebt_standard_target) >
-		   e->next_offset) {
-			BUGPRINT("Standard target size too big\n");
+		   e->next_offset)
 			return -1;
-		}
+
 		verdict = ((struct ebt_standard_target *)t)->verdict;
 		if (verdict >= 0) { /* jump to another chain */
 			struct ebt_entries *hlp2 =
@@ -825,14 +797,12 @@
 				if (hlp2 == cl_s[i].cs.chaininfo)
 					break;
 			/* bad destination or loop */
-			if (i == udc_cnt) {
-				BUGPRINT("bad destination\n");
+			if (i == udc_cnt)
 				return -1;
-			}
-			if (cl_s[i].cs.n) {
-				BUGPRINT("loop\n");
+
+			if (cl_s[i].cs.n)
 				return -1;
-			}
+
 			if (cl_s[i].hookmask & (1 << hooknr))
 				goto letscontinue;
 			/* this can't be 0, so the loop test is correct */
@@ -865,24 +835,21 @@
 	i = 0;
 	while (i < NF_BR_NUMHOOKS && !newinfo->hook_entry[i])
 		i++;
-	if (i == NF_BR_NUMHOOKS) {
-		BUGPRINT("No valid hooks specified\n");
+	if (i == NF_BR_NUMHOOKS)
 		return -EINVAL;
-	}
-	if (newinfo->hook_entry[i] != (struct ebt_entries *)newinfo->entries) {
-		BUGPRINT("Chains don't start at beginning\n");
+
+	if (newinfo->hook_entry[i] != (struct ebt_entries *)newinfo->entries)
 		return -EINVAL;
-	}
+
 	/* make sure chains are ordered after each other in same order
 	 * as their corresponding hooks
 	 */
 	for (j = i + 1; j < NF_BR_NUMHOOKS; j++) {
 		if (!newinfo->hook_entry[j])
 			continue;
-		if (newinfo->hook_entry[j] <= newinfo->hook_entry[i]) {
-			BUGPRINT("Hook order must be followed\n");
+		if (newinfo->hook_entry[j] <= newinfo->hook_entry[i])
 			return -EINVAL;
-		}
+
 		i = j;
 	}
 
@@ -900,15 +867,11 @@
 	if (ret != 0)
 		return ret;
 
-	if (i != j) {
-		BUGPRINT("nentries does not equal the nr of entries in the "
-			 "(last) chain\n");
+	if (i != j)
 		return -EINVAL;
-	}
-	if (k != newinfo->nentries) {
-		BUGPRINT("Total nentries is wrong\n");
+
+	if (k != newinfo->nentries)
 		return -EINVAL;
-	}
 
 	/* get the location of the udc, put them in an array
 	 * while we're at it, allocate the chainstack
@@ -942,7 +905,6 @@
 		   ebt_get_udc_positions, newinfo, &i, cl_s);
 		/* sanity check */
 		if (i != udc_cnt) {
-			BUGPRINT("i != udc_cnt\n");
 			vfree(cl_s);
 			return -EFAULT;
 		}
@@ -997,8 +959,8 @@
 			continue;
 		counter_base = COUNTER_BASE(oldcounters, nentries, cpu);
 		for (i = 0; i < nentries; i++)
-			ADD_COUNTER(counters[i], counter_base[i].pcnt,
-				    counter_base[i].bcnt);
+			ADD_COUNTER(counters[i], counter_base[i].bcnt,
+				    counter_base[i].pcnt);
 	}
 }
 
@@ -1042,7 +1004,6 @@
 		goto free_unlock;
 
 	if (repl->num_counters && repl->num_counters != t->private->nentries) {
-		BUGPRINT("Wrong nr. of counters requested\n");
 		ret = -EINVAL;
 		goto free_unlock;
 	}
@@ -1118,15 +1079,12 @@
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
 
-	if (len != sizeof(tmp) + tmp.entries_size) {
-		BUGPRINT("Wrong len argument\n");
+	if (len != sizeof(tmp) + tmp.entries_size)
 		return -EINVAL;
-	}
 
-	if (tmp.entries_size == 0) {
-		BUGPRINT("Entries_size never zero\n");
+	if (tmp.entries_size == 0)
 		return -EINVAL;
-	}
+
 	/* overflow check */
 	if (tmp.nentries >= ((INT_MAX - sizeof(struct ebt_table_info)) /
 			NR_CPUS - SMP_CACHE_BYTES) / sizeof(struct ebt_counter))
@@ -1137,21 +1095,22 @@
 	tmp.name[sizeof(tmp.name) - 1] = 0;
 
 	countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids;
-	newinfo = vmalloc(sizeof(*newinfo) + countersize);
+	newinfo = __vmalloc(sizeof(*newinfo) + countersize, GFP_KERNEL_ACCOUNT,
+			    PAGE_KERNEL);
 	if (!newinfo)
 		return -ENOMEM;
 
 	if (countersize)
 		memset(newinfo->counters, 0, countersize);
 
-	newinfo->entries = vmalloc(tmp.entries_size);
+	newinfo->entries = __vmalloc(tmp.entries_size, GFP_KERNEL_ACCOUNT,
+				     PAGE_KERNEL);
 	if (!newinfo->entries) {
 		ret = -ENOMEM;
 		goto free_newinfo;
 	}
 	if (copy_from_user(
 	   newinfo->entries, tmp.entries, tmp.entries_size) != 0) {
-		BUGPRINT("Couldn't copy entries from userspace\n");
 		ret = -EFAULT;
 		goto free_entries;
 	}
@@ -1192,10 +1151,8 @@
 
 	if (input_table == NULL || (repl = input_table->table) == NULL ||
 	    repl->entries == NULL || repl->entries_size == 0 ||
-	    repl->counters != NULL || input_table->private != NULL) {
-		BUGPRINT("Bad table data for ebt_register_table!!!\n");
+	    repl->counters != NULL || input_table->private != NULL)
 		return -EINVAL;
-	}
 
 	/* Don't add one table to multiple lists. */
 	table = kmemdup(input_table, sizeof(struct ebt_table), GFP_KERNEL);
@@ -1233,13 +1190,10 @@
 				((char *)repl->hook_entry[i] - repl->entries);
 	}
 	ret = translate_table(net, repl->name, newinfo);
-	if (ret != 0) {
-		BUGPRINT("Translate_table failed\n");
+	if (ret != 0)
 		goto free_chainstack;
-	}
 
 	if (table->check && table->check(newinfo, table->valid_hooks)) {
-		BUGPRINT("The table doesn't like its own initial data, lol\n");
 		ret = -EINVAL;
 		goto free_chainstack;
 	}
@@ -1250,7 +1204,6 @@
 	list_for_each_entry(t, &net->xt.tables[NFPROTO_BRIDGE], list) {
 		if (strcmp(t->name, table->name) == 0) {
 			ret = -EEXIST;
-			BUGPRINT("Table name already exists\n");
 			goto free_unlock;
 		}
 	}
@@ -1264,10 +1217,6 @@
 	mutex_unlock(&ebt_mutex);
 
 	WRITE_ONCE(*res, table);
-
-	if (!ops)
-		return 0;
-
 	ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
 	if (ret) {
 		__ebt_unregister_table(net, table);
@@ -1291,8 +1240,7 @@
 void ebt_unregister_table(struct net *net, struct ebt_table *table,
 			  const struct nf_hook_ops *ops)
 {
-	if (ops)
-		nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+	nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
 	__ebt_unregister_table(net, table);
 }
 
@@ -1318,7 +1266,6 @@
 		goto free_tmp;
 
 	if (num_counters != t->private->nentries) {
-		BUGPRINT("Wrong nr of counters\n");
 		ret = -EINVAL;
 		goto unlock_mutex;
 	}
@@ -1333,7 +1280,7 @@
 
 	/* we add to the counters of the first cpu */
 	for (i = 0; i < num_counters; i++)
-		ADD_COUNTER(t->private->counters[i], tmp[i].pcnt, tmp[i].bcnt);
+		ADD_COUNTER(t->private->counters[i], tmp[i].bcnt, tmp[i].pcnt);
 
 	write_unlock_bh(&t->lock);
 	ret = 0;
@@ -1445,10 +1392,8 @@
 	if (num_counters == 0)
 		return 0;
 
-	if (num_counters != nentries) {
-		BUGPRINT("Num_counters wrong\n");
+	if (num_counters != nentries)
 		return -EINVAL;
-	}
 
 	counterstmp = vmalloc(array_size(nentries, sizeof(*counterstmp)));
 	if (!counterstmp)
@@ -1494,15 +1439,11 @@
 	   (tmp.num_counters ? nentries * sizeof(struct ebt_counter) : 0))
 		return -EINVAL;
 
-	if (tmp.nentries != nentries) {
-		BUGPRINT("Nentries wrong\n");
+	if (tmp.nentries != nentries)
 		return -EINVAL;
-	}
 
-	if (tmp.entries_size != entries_size) {
-		BUGPRINT("Wrong size\n");
+	if (tmp.entries_size != entries_size)
 		return -EINVAL;
-	}
 
 	ret = copy_counters_to_user(t, oldcounters, tmp.counters,
 					tmp.num_counters, nentries);
@@ -1574,7 +1515,6 @@
 		}
 		mutex_unlock(&ebt_mutex);
 		if (copy_to_user(user, &tmp, *len) != 0) {
-			BUGPRINT("c2u Didn't work\n");
 			ret = -EFAULT;
 			break;
 		}
@@ -1830,20 +1770,28 @@
 	return 0;
 }
 
+static int ebt_compat_init_offsets(unsigned int number)
+{
+	if (number > INT_MAX)
+		return -EINVAL;
+
+	/* also count the base chain policies */
+	number += NF_BR_NUMHOOKS;
+
+	return xt_compat_init_offsets(NFPROTO_BRIDGE, number);
+}
 
 static int compat_table_info(const struct ebt_table_info *info,
 			     struct compat_ebt_replace *newinfo)
 {
 	unsigned int size = info->entries_size;
 	const void *entries = info->entries;
+	int ret;
 
 	newinfo->entries_size = size;
-	if (info->nentries) {
-		int ret = xt_compat_init_offsets(NFPROTO_BRIDGE,
-						 info->nentries);
-		if (ret)
-			return ret;
-	}
+	ret = ebt_compat_init_offsets(info->nentries);
+	if (ret)
+		return ret;
 
 	return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info,
 							entries, newinfo);
@@ -2083,7 +2031,8 @@
 		if (match_kern)
 			match_kern->match_size = ret;
 
-		if (WARN_ON(type == EBT_COMPAT_TARGET && size_left))
+		/* rule should have no remaining data after target */
+		if (type == EBT_COMPAT_TARGET && size_left)
 			return -EINVAL;
 
 		match32 = (struct compat_ebt_entry_mwt *) buf;
@@ -2208,7 +2157,9 @@
 	if (ret < 0)
 		return ret;
 
-	WARN_ON(size_remaining);
+	if (size_remaining)
+		return -EINVAL;
+
 	return state->buf_kern_offset;
 }
 
@@ -2291,9 +2242,10 @@
 
 	xt_compat_lock(NFPROTO_BRIDGE);
 
-	ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries);
+	ret = ebt_compat_init_offsets(tmp.nentries);
 	if (ret < 0)
 		goto out_unlock;
+
 	ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state);
 	if (ret < 0)
 		goto out_unlock;
@@ -2315,8 +2267,10 @@
 	state.buf_kern_len = size64;
 
 	ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state);
-	if (WARN_ON(ret < 0))
+	if (WARN_ON(ret < 0)) {
+		vfree(entries_tmp);
 		goto out_unlock;
+	}
 
 	vfree(entries_tmp);
 	tmp.entries_size = size64;

diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
new file mode 100644
index 0000000..8096732
--- /dev/null
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c

@@ -0,0 +1,437 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmp.h>
+#include <linux/sysctl.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_bridge.h>
+
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+#include "../br_private.h"
+
+/* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff
+ * has been linearized or cloned.
+ */
+static int nf_br_ip_fragment(struct net *net, struct sock *sk,
+			     struct sk_buff *skb,
+			     struct nf_bridge_frag_data *data,
+			     int (*output)(struct net *, struct sock *sk,
+					   const struct nf_bridge_frag_data *data,
+					   struct sk_buff *))
+{
+	int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
+	unsigned int hlen, ll_rs, mtu;
+	ktime_t tstamp = skb->tstamp;
+	struct ip_frag_state state;
+	struct iphdr *iph;
+	int err;
+
+	/* for offloaded checksums cleanup checksum before fragmentation */
+	if (skb->ip_summed == CHECKSUM_PARTIAL &&
+	    (err = skb_checksum_help(skb)))
+		goto blackhole;
+
+	iph = ip_hdr(skb);
+
+	/*
+	 *	Setup starting values
+	 */
+
+	hlen = iph->ihl * 4;
+	frag_max_size -= hlen;
+	ll_rs = LL_RESERVED_SPACE(skb->dev);
+	mtu = skb->dev->mtu;
+
+	if (skb_has_frag_list(skb)) {
+		unsigned int first_len = skb_pagelen(skb);
+		struct ip_fraglist_iter iter;
+		struct sk_buff *frag;
+
+		if (first_len - hlen > mtu ||
+		    skb_headroom(skb) < ll_rs)
+			goto blackhole;
+
+		if (skb_cloned(skb))
+			goto slow_path;
+
+		skb_walk_frags(skb, frag) {
+			if (frag->len > mtu ||
+			    skb_headroom(frag) < hlen + ll_rs)
+				goto blackhole;
+
+			if (skb_shared(frag))
+				goto slow_path;
+		}
+
+		ip_fraglist_init(skb, iph, hlen, &iter);
+
+		for (;;) {
+			if (iter.frag)
+				ip_fraglist_prepare(skb, &iter);
+
+			skb->tstamp = tstamp;
+			err = output(net, sk, data, skb);
+			if (err || !iter.frag)
+				break;
+
+			skb = ip_fraglist_next(&iter);
+		}
+		return err;
+	}
+slow_path:
+	/* This is a linearized skbuff, the original geometry is lost for us.
+	 * This may also be a clone skbuff, we could preserve the geometry for
+	 * the copies but probably not worth the effort.
+	 */
+	ip_frag_init(skb, hlen, ll_rs, frag_max_size, false, &state);
+
+	while (state.left > 0) {
+		struct sk_buff *skb2;
+
+		skb2 = ip_frag_next(skb, &state);
+		if (IS_ERR(skb2)) {
+			err = PTR_ERR(skb2);
+			goto blackhole;
+		}
+
+		skb2->tstamp = tstamp;
+		err = output(net, sk, data, skb2);
+		if (err)
+			goto blackhole;
+	}
+	consume_skb(skb);
+	return err;
+
+blackhole:
+	kfree_skb(skb);
+	return 0;
+}
+
+/* ip_defrag() expects IPCB() in place. */
+static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb,
+			   size_t inet_skb_parm_size)
+{
+	memcpy(cb, skb->cb, sizeof(*cb));
+	memset(skb->cb, 0, inet_skb_parm_size);
+}
+
+static void br_skb_cb_restore(struct sk_buff *skb,
+			      const struct br_input_skb_cb *cb,
+			      u16 fragsz)
+{
+	memcpy(skb->cb, cb, sizeof(*cb));
+	BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz;
+}
+
+static unsigned int nf_ct_br_defrag4(struct sk_buff *skb,
+				     const struct nf_hook_state *state)
+{
+	u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
+	enum ip_conntrack_info ctinfo;
+	struct br_input_skb_cb cb;
+	const struct nf_conn *ct;
+	int err;
+
+	if (!ip_is_fragment(ip_hdr(skb)))
+		return NF_ACCEPT;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct)
+		zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
+
+	br_skb_cb_save(skb, &cb, sizeof(struct inet_skb_parm));
+	local_bh_disable();
+	err = ip_defrag(state->net, skb,
+			IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
+	local_bh_enable();
+	if (!err) {
+		br_skb_cb_restore(skb, &cb, IPCB(skb)->frag_max_size);
+		skb->ignore_df = 1;
+		return NF_ACCEPT;
+	}
+
+	return NF_STOLEN;
+}
+
+static unsigned int nf_ct_br_defrag6(struct sk_buff *skb,
+				     const struct nf_hook_state *state)
+{
+	u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
+	enum ip_conntrack_info ctinfo;
+	struct br_input_skb_cb cb;
+	const struct nf_conn *ct;
+	int err;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct)
+		zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
+
+	br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm));
+
+	err = nf_ipv6_br_defrag(state->net, skb,
+				IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
+	/* queued */
+	if (err == -EINPROGRESS)
+		return NF_STOLEN;
+
+	br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size);
+	return err == 0 ? NF_ACCEPT : NF_DROP;
+}
+
+static int nf_ct_br_ip_check(const struct sk_buff *skb)
+{
+	const struct iphdr *iph;
+	int nhoff, len;
+
+	nhoff = skb_network_offset(skb);
+	iph = ip_hdr(skb);
+	if (iph->ihl < 5 ||
+	    iph->version != 4)
+		return -1;
+
+	len = ntohs(iph->tot_len);
+	if (skb->len < nhoff + len ||
+	    len < (iph->ihl * 4))
+                return -1;
+
+	return 0;
+}
+
+static int nf_ct_br_ipv6_check(const struct sk_buff *skb)
+{
+	const struct ipv6hdr *hdr;
+	int nhoff, len;
+
+	nhoff = skb_network_offset(skb);
+	hdr = ipv6_hdr(skb);
+	if (hdr->version != 6)
+		return -1;
+
+	len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff;
+	if (skb->len < len)
+		return -1;
+
+	return 0;
+}
+
+static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
+				     const struct nf_hook_state *state)
+{
+	struct nf_hook_state bridge_state = *state;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+	u32 len;
+	int ret;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if ((ct && !nf_ct_is_template(ct)) ||
+	    ctinfo == IP_CT_UNTRACKED)
+		return NF_ACCEPT;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+			return NF_ACCEPT;
+
+		len = ntohs(ip_hdr(skb)->tot_len);
+		if (pskb_trim_rcsum(skb, len))
+			return NF_ACCEPT;
+
+		if (nf_ct_br_ip_check(skb))
+			return NF_ACCEPT;
+
+		bridge_state.pf = NFPROTO_IPV4;
+		ret = nf_ct_br_defrag4(skb, &bridge_state);
+		break;
+	case htons(ETH_P_IPV6):
+		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+			return NF_ACCEPT;
+
+		len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
+		if (pskb_trim_rcsum(skb, len))
+			return NF_ACCEPT;
+
+		if (nf_ct_br_ipv6_check(skb))
+			return NF_ACCEPT;
+
+		bridge_state.pf = NFPROTO_IPV6;
+		ret = nf_ct_br_defrag6(skb, &bridge_state);
+		break;
+	default:
+		nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+		return NF_ACCEPT;
+	}
+
+	if (ret != NF_ACCEPT)
+		return ret;
+
+	return nf_conntrack_in(skb, &bridge_state);
+}
+
+static void nf_ct_bridge_frag_save(struct sk_buff *skb,
+				   struct nf_bridge_frag_data *data)
+{
+	if (skb_vlan_tag_present(skb)) {
+		data->vlan_present = true;
+		data->vlan_tci = skb->vlan_tci;
+		data->vlan_proto = skb->vlan_proto;
+	} else {
+		data->vlan_present = false;
+	}
+	skb_copy_from_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
+}
+
+static unsigned int
+nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state,
+		    int (*output)(struct net *, struct sock *sk,
+				  const struct nf_bridge_frag_data *data,
+				  struct sk_buff *))
+{
+	struct nf_bridge_frag_data data;
+
+	if (!BR_INPUT_SKB_CB(skb)->frag_max_size)
+		return NF_ACCEPT;
+
+	nf_ct_bridge_frag_save(skb, &data);
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		nf_br_ip_fragment(state->net, state->sk, skb, &data, output);
+		break;
+	case htons(ETH_P_IPV6):
+		nf_br_ip6_fragment(state->net, state->sk, skb, &data, output);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return NF_DROP;
+	}
+
+	return NF_STOLEN;
+}
+
+/* Actually only slow path refragmentation needs this. */
+static int nf_ct_bridge_frag_restore(struct sk_buff *skb,
+				     const struct nf_bridge_frag_data *data)
+{
+	int err;
+
+	err = skb_cow_head(skb, ETH_HLEN);
+	if (err) {
+		kfree_skb(skb);
+		return -ENOMEM;
+	}
+	if (data->vlan_present)
+		__vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci);
+	else if (skb_vlan_tag_present(skb))
+		__vlan_hwaccel_clear_tag(skb);
+
+	skb_copy_to_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
+	skb_reset_mac_header(skb);
+
+	return 0;
+}
+
+static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk,
+				    const struct nf_bridge_frag_data *data,
+				    struct sk_buff *skb)
+{
+	int err;
+
+	err = nf_ct_bridge_frag_restore(skb, data);
+	if (err < 0)
+		return err;
+
+	return br_dev_queue_push_xmit(net, sk, skb);
+}
+
+static unsigned int nf_ct_bridge_confirm(struct sk_buff *skb)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+	int protoff;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+		return nf_conntrack_confirm(skb);
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		protoff = skb_network_offset(skb) + ip_hdrlen(skb);
+		break;
+	case htons(ETH_P_IPV6): {
+		 unsigned char pnum = ipv6_hdr(skb)->nexthdr;
+		__be16 frag_off;
+
+		protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
+					   &frag_off);
+		if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
+			return nf_conntrack_confirm(skb);
+		}
+		break;
+	default:
+		return NF_ACCEPT;
+	}
+	return nf_confirm(skb, protoff, ct, ctinfo);
+}
+
+static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb,
+				      const struct nf_hook_state *state)
+{
+	int ret;
+
+	ret = nf_ct_bridge_confirm(skb);
+	if (ret != NF_ACCEPT)
+		return ret;
+
+	return nf_ct_bridge_refrag(skb, state, nf_ct_bridge_refrag_post);
+}
+
+static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
+	{
+		.hook		= nf_ct_bridge_pre,
+		.pf		= NFPROTO_BRIDGE,
+		.hooknum	= NF_BR_PRE_ROUTING,
+		.priority	= NF_IP_PRI_CONNTRACK,
+	},
+	{
+		.hook		= nf_ct_bridge_post,
+		.pf		= NFPROTO_BRIDGE,
+		.hooknum	= NF_BR_POST_ROUTING,
+		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
+	},
+};
+
+static struct nf_ct_bridge_info bridge_info = {
+	.ops		= nf_ct_bridge_hook_ops,
+	.ops_size	= ARRAY_SIZE(nf_ct_bridge_hook_ops),
+	.me		= THIS_MODULE,
+};
+
+static int __init nf_conntrack_l3proto_bridge_init(void)
+{
+	nf_ct_bridge_register(&bridge_info);
+
+	return 0;
+}
+
+static void __exit nf_conntrack_l3proto_bridge_fini(void)
+{
+	nf_ct_bridge_unregister(&bridge_info);
+}
+
+module_init(nf_conntrack_l3proto_bridge_init);
+module_exit(nf_conntrack_l3proto_bridge_fini);
+
+MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE));
+MODULE_LICENSE("GPL");

diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c
index bd2b3c7..1ad61d1 100644
--- a/net/bridge/netfilter/nf_log_bridge.c
+++ b/net/bridge/netfilter/nf_log_bridge.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * (C) 2014 by Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>

diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
new file mode 100644
index 0000000..7c9e92b
--- /dev/null
+++ b/net/bridge/netfilter/nft_meta_bridge.c

@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_meta.h>
+#include <linux/if_bridge.h>
+
+static const struct net_device *
+nft_meta_get_bridge(const struct net_device *dev)
+{
+	if (dev && netif_is_bridge_port(dev))
+		return netdev_master_upper_dev_get_rcu((struct net_device *)dev);
+
+	return NULL;
+}
+
+static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
+				     struct nft_regs *regs,
+				     const struct nft_pktinfo *pkt)
+{
+	const struct nft_meta *priv = nft_expr_priv(expr);
+	const struct net_device *in = nft_in(pkt), *out = nft_out(pkt);
+	u32 *dest = &regs->data[priv->dreg];
+	const struct net_device *br_dev;
+
+	switch (priv->key) {
+	case NFT_META_BRI_IIFNAME:
+		br_dev = nft_meta_get_bridge(in);
+		break;
+	case NFT_META_BRI_OIFNAME:
+		br_dev = nft_meta_get_bridge(out);
+		break;
+	case NFT_META_BRI_IIFPVID: {
+		u16 p_pvid;
+
+		br_dev = nft_meta_get_bridge(in);
+		if (!br_dev || !br_vlan_enabled(br_dev))
+			goto err;
+
+		br_vlan_get_pvid_rcu(in, &p_pvid);
+		nft_reg_store16(dest, p_pvid);
+		return;
+	}
+	case NFT_META_BRI_IIFVPROTO: {
+		u16 p_proto;
+
+		br_dev = nft_meta_get_bridge(in);
+		if (!br_dev || !br_vlan_enabled(br_dev))
+			goto err;
+
+		br_vlan_get_proto(br_dev, &p_proto);
+		nft_reg_store16(dest, htons(p_proto));
+		return;
+	}
+	default:
+		return nft_meta_get_eval(expr, regs, pkt);
+	}
+
+	strncpy((char *)dest, br_dev ? br_dev->name : "", IFNAMSIZ);
+	return;
+err:
+	regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_meta_bridge_get_init(const struct nft_ctx *ctx,
+				    const struct nft_expr *expr,
+				    const struct nlattr * const tb[])
+{
+	struct nft_meta *priv = nft_expr_priv(expr);
+	unsigned int len;
+
+	priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+	switch (priv->key) {
+	case NFT_META_BRI_IIFNAME:
+	case NFT_META_BRI_OIFNAME:
+		len = IFNAMSIZ;
+		break;
+	case NFT_META_BRI_IIFPVID:
+	case NFT_META_BRI_IIFVPROTO:
+		len = sizeof(u16);
+		break;
+	default:
+		return nft_meta_get_init(ctx, expr, tb);
+	}
+
+	priv->dreg = nft_parse_register(tb[NFTA_META_DREG]);
+	return nft_validate_register_store(ctx, priv->dreg, NULL,
+					   NFT_DATA_VALUE, len);
+}
+
+static struct nft_expr_type nft_meta_bridge_type;
+static const struct nft_expr_ops nft_meta_bridge_get_ops = {
+	.type		= &nft_meta_bridge_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+	.eval		= nft_meta_bridge_get_eval,
+	.init		= nft_meta_bridge_get_init,
+	.dump		= nft_meta_get_dump,
+};
+
+static const struct nft_expr_ops nft_meta_bridge_set_ops = {
+	.type		= &nft_meta_bridge_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+	.eval		= nft_meta_set_eval,
+	.init		= nft_meta_set_init,
+	.destroy	= nft_meta_set_destroy,
+	.dump		= nft_meta_set_dump,
+	.validate	= nft_meta_set_validate,
+};
+
+static const struct nft_expr_ops *
+nft_meta_bridge_select_ops(const struct nft_ctx *ctx,
+			   const struct nlattr * const tb[])
+{
+	if (tb[NFTA_META_KEY] == NULL)
+		return ERR_PTR(-EINVAL);
+
+	if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG])
+		return ERR_PTR(-EINVAL);
+
+	if (tb[NFTA_META_DREG])
+		return &nft_meta_bridge_get_ops;
+
+	if (tb[NFTA_META_SREG])
+		return &nft_meta_bridge_set_ops;
+
+	return ERR_PTR(-EINVAL);
+}
+
+static struct nft_expr_type nft_meta_bridge_type __read_mostly = {
+	.family         = NFPROTO_BRIDGE,
+	.name           = "meta",
+	.select_ops     = nft_meta_bridge_select_ops,
+	.policy         = nft_meta_policy,
+	.maxattr        = NFTA_META_MAX,
+	.owner          = THIS_MODULE,
+};
+
+static int __init nft_meta_bridge_module_init(void)
+{
+	return nft_register_expr(&nft_meta_bridge_type);
+}
+
+static void __exit nft_meta_bridge_module_exit(void)
+{
+	nft_unregister_expr(&nft_meta_bridge_type);
+}
+
+module_init(nft_meta_bridge_module_init);
+module_exit(nft_meta_bridge_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("wenxu <wenxu@ucloud.cn>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "meta");

diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 08cbed7..b325b56 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2014 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/kernel.h>
@@ -125,13 +122,10 @@
 	if (pskb_trim_rcsum(oldskb, ntohs(ip_hdr(oldskb)->tot_len)))
 		return;
 
-	if (ip_hdr(oldskb)->protocol == IPPROTO_TCP ||
-	    ip_hdr(oldskb)->protocol == IPPROTO_UDP)
-		proto = ip_hdr(oldskb)->protocol;
-	else
-		proto = 0;
+	proto = ip_hdr(oldskb)->protocol;
 
 	if (!skb_csum_unnecessary(oldskb) &&
+	    nf_reject_verify_csum(proto) &&
 	    nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), proto))
 		return;
 
@@ -229,10 +223,14 @@
 	    pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h)))
 		return false;
 
+	ip6h = ipv6_hdr(skb);
 	thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo);
 	if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
 		return false;
 
+	if (!nf_reject_verify_csum(proto))
+		return true;
+
 	return nf_ip6_checksum(skb, hook, thoff, proto) == 0;
 }
 

diff --git a/net/caif/Kconfig b/net/caif/Kconfig
index d369495..eb83051 100644
--- a/net/caif/Kconfig
+++ b/net/caif/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # CAIF net configurations
 #

diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 711d715..03c7cdd 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c

@@ -1,8 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * CAIF Interface registration.
  * Copyright (C) ST-Ericsson AB 2010
  * Author:	Sjur Brendeland
- * License terms: GNU General Public License (GPL) version 2
  *
  * Borrowed heavily from file: pn_dev.c. Thanks to Remi Denis-Courmont
  *  and Sakari Ailus <sakari.ailus@nokia.com>
@@ -186,15 +186,19 @@
 		goto noxoff;
 
 	if (likely(!netif_queue_stopped(caifd->netdev))) {
+		struct Qdisc *sch;
+
 		/* If we run with a TX queue, check if the queue is too long*/
 		txq = netdev_get_tx_queue(skb->dev, 0);
-		qlen = qdisc_qlen(rcu_dereference_bh(txq->qdisc));
-
-		if (likely(qlen == 0))
+		sch = rcu_dereference_bh(txq->qdisc);
+		if (likely(qdisc_is_empty(sch)))
 			goto noxoff;
 
+		/* can check for explicit qdisc len value only !NOLOCK,
+		 * always set flow off otherwise
+		 */
 		high = (caifd->netdev->tx_queue_len * q_high) / 100;
-		if (likely(qlen < high))
+		if (!(sch->flags & TCQ_F_NOLOCK) && likely(sch->q.qlen < high))
 			goto noxoff;
 	}
 

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 416717c..ef14da5 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c

@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) ST-Ericsson AB 2010
  * Author:	Sjur Brendeland
- * License terms: GNU General Public License (GPL) version 2
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
@@ -953,7 +953,7 @@
 		mask |= EPOLLRDHUP;
 
 	/* readable? */
-	if (!skb_queue_empty(&sk->sk_receive_queue) ||
+	if (!skb_queue_empty_lockless(&sk->sk_receive_queue) ||
 		(sk->sk_shutdown & RCV_SHUTDOWN))
 		mask |= EPOLLIN | EPOLLRDNORM;
 

diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index 1a082a9..76bd678 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c

@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * CAIF USB handler
  * Copyright (C) ST-Ericsson AB 2011
  * Author:	Sjur Brendeland
- * License terms: GNU General Public License (GPL) version 2
- *
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__

diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 8f00bea..399239a 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c

@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) ST-Ericsson AB 2010
  * Author:	Sjur Brendeland
- * License terms: GNU General Public License (GPL) version 2
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__

diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index a1e85f0..2809cbd 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c

@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) ST-Ericsson AB 2010
  * Author:	Sjur Brendeland
- * License terms: GNU General Public License (GPL) version 2
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__

diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index 7aae0b5..77f4284 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c

@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) ST-Ericsson AB 2010
  * Author:	Sjur Brendeland
- * License terms: GNU General Public License (GPL) version 2
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
@@ -26,7 +26,7 @@
 	cfsrvl_init(dbg, channel_id, dev_info, false);
 	dbg->layer.receive = cfdbgl_receive;
 	dbg->layer.transmit = cfdbgl_transmit;
-	snprintf(dbg->layer.name, CAIF_LAYER_NAME_SZ - 1, "dbg%d", channel_id);
+	snprintf(dbg->layer.name, CAIF_LAYER_NAME_SZ, "dbg%d", channel_id);
 	return &dbg->layer;
 }
 

diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index 3bdddb3..eb6f8ef 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c

@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) ST-Ericsson AB 2010
  * Author:	Sjur Brendeland
- * License terms: GNU General Public License (GPL) version 2
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
@@ -33,8 +33,7 @@
 	cfsrvl_init(dgm, channel_id, dev_info, true);
 	dgm->layer.receive = cfdgml_receive;
 	dgm->layer.transmit = cfdgml_transmit;
-	snprintf(dgm->layer.name, CAIF_LAYER_NAME_SZ - 1, "dgm%d", channel_id);
-	dgm->layer.name[CAIF_LAYER_NAME_SZ - 1] = '\0';
+	snprintf(dgm->layer.name, CAIF_LAYER_NAME_SZ, "dgm%d", channel_id);
 	return &dgm->layer;
 }
 

diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
index 434ba85..6651a8d 100644
--- a/net/caif/cffrml.c
+++ b/net/caif/cffrml.c

@@ -1,9 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * CAIF Framing Layer.
  *
  * Copyright (C) ST-Ericsson AB 2010
  * Author:	Sjur Brendeland
- * License terms: GNU General Public License (GPL) version 2
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__

diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
index 510aa5a..4172b0d 100644
--- a/net/caif/cfmuxl.c
+++ b/net/caif/cfmuxl.c

@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) ST-Ericsson AB 2010
  * Author:	Sjur Brendeland
- * License terms: GNU General Public License (GPL) version 2
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__

diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 38c2b7a..7796414 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c

@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) ST-Ericsson AB 2010
  * Author:	Sjur Brendeland
- * License terms: GNU General Public License (GPL) version 2
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
@@ -319,16 +319,12 @@
 		if (tmppkt == NULL)
 			return NULL;
 		tmp = pkt_to_skb(tmppkt);
-		skb_set_tail_pointer(tmp, dstlen);
-		tmp->len = dstlen;
-		memcpy(tmp->data, dst->data, dstlen);
+		skb_put_data(tmp, dst->data, dstlen);
 		cfpkt_destroy(dstpkt);
 		dst = tmp;
 	}
-	memcpy(skb_tail_pointer(dst), add->data, skb_headlen(add));
+	skb_put_data(dst, add->data, skb_headlen(add));
 	cfpkt_destroy(addpkt);
-	dst->tail += addlen;
-	dst->len += addlen;
 	return skb_to_pkt(dst);
 }
 
@@ -359,13 +355,11 @@
 	if (skb2 == NULL)
 		return NULL;
 
-	/* Reduce the length of the original packet */
-	skb_set_tail_pointer(skb, pos);
-	skb->len = pos;
+	skb_put_data(skb2, split, len2nd);
 
-	memcpy(skb2->data, split, len2nd);
-	skb2->tail += len2nd;
-	skb2->len += len2nd;
+	/* Reduce the length of the original packet */
+	skb_trim(skb, pos);
+
 	skb2->priority = skb->priority;
 	return skb_to_pkt(skb2);
 }

diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index b82440e..ce2767e 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c

@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) ST-Ericsson AB 2010
  * Author:	Sjur Brendeland
- * License terms: GNU General Public License (GPL) version 2
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
@@ -264,9 +264,6 @@
 		frontpkt = rearpkt;
 		rearpkt = NULL;
 
-		err = -ENOMEM;
-		if (frontpkt == NULL)
-			goto out;
 		err = -EPROTO;
 		if (cfpkt_add_head(frontpkt, head, 6) < 0)
 			goto out;

diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index ce60f06..e11725a 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c

@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) ST-Ericsson AB 2010
  * Author:	Sjur Brendeland
- * License terms: GNU General Public License (GPL) version 2
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__

diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index a6e1154..d0a4d0a 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c

@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) ST-Ericsson AB 2010
  * Author:	Sjur Brendeland
- * License terms: GNU General Public License (GPL) version 2
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__

diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index 1728fa4..b2e47ed 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c

@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) ST-Ericsson AB 2010
  * Author:	Sjur Brendeland
- * License terms: GNU General Public License (GPL) version 2
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
@@ -33,7 +33,7 @@
 	cfsrvl_init(util, channel_id, dev_info, true);
 	util->layer.receive = cfutill_receive;
 	util->layer.transmit = cfutill_transmit;
-	snprintf(util->layer.name, CAIF_LAYER_NAME_SZ - 1, "util1");
+	snprintf(util->layer.name, CAIF_LAYER_NAME_SZ, "util1");
 	return &util->layer;
 }
 

diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 2622245..db2274b 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c

@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) ST-Ericsson AB 2010
  * Author:	Sjur Brendeland
- * License terms: GNU General Public License (GPL) version 2
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
@@ -32,7 +32,7 @@
 	cfsrvl_init(vei, channel_id, dev_info, true);
 	vei->layer.receive = cfvei_receive;
 	vei->layer.transmit = cfvei_transmit;
-	snprintf(vei->layer.name, CAIF_LAYER_NAME_SZ - 1, "vei%d", channel_id);
+	snprintf(vei->layer.name, CAIF_LAYER_NAME_SZ, "vei%d", channel_id);
 	return &vei->layer;
 }
 

diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
index b3b110e..134bad4 100644
--- a/net/caif/cfvidl.c
+++ b/net/caif/cfvidl.c

@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) ST-Ericsson AB 2010
  * Author:	Sjur Brendeland
- * License terms: GNU General Public License (GPL) version 2
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
@@ -29,7 +29,7 @@
 	cfsrvl_init(vid, channel_id, dev_info, false);
 	vid->layer.receive = cfvidl_receive;
 	vid->layer.transmit = cfvidl_transmit;
-	snprintf(vid->layer.name, CAIF_LAYER_NAME_SZ - 1, "vid1");
+	snprintf(vid->layer.name, CAIF_LAYER_NAME_SZ, "vid1");
 	return &vid->layer;
 }
 

diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 13e2ae6..a566289 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c

@@ -1,8 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) ST-Ericsson AB 2010
  * Authors:	Sjur Brendeland
  *		Daniel Martensson
- * License terms: GNU General Public License (GPL) version 2
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__

diff --git a/net/can/Kconfig b/net/can/Kconfig
index a4399be..d770427 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Controller Area Network (CAN) network layer core configuration
 #
@@ -7,11 +8,12 @@
 	tristate "CAN bus subsystem support"
 	---help---
 	  Controller Area Network (CAN) is a slow (up to 1Mbit/s) serial
-	  communications protocol which was developed by Bosch in
-	  1991, mainly for automotive, but now widely used in marine
-	  (NMEA2000), industrial, and medical applications.
-	  More information on the CAN network protocol family PF_CAN
-	  is contained in <Documentation/networking/can.rst>.
+	  communications protocol. Development of the CAN bus started in
+	  1983 at Robert Bosch GmbH, and the protocol was officially
+	  released in 1986. The CAN bus was originally mainly for automotive,
+	  but is now widely used in marine (NMEA2000), industrial, and medical
+	  applications. More information on the CAN network protocol family
+	  PF_CAN is contained in <Documentation/networking/can.rst>.
 
 	  If you want CAN support you should say Y here and also to the
 	  specific driver for your controller(s) below.
@@ -51,6 +53,8 @@
 	  They can be modified with AND/OR/XOR/SET operations as configured
 	  by the netlink configuration interface known e.g. from iptables.
 
+source "net/can/j1939/Kconfig"
+
 source "drivers/net/can/Kconfig"
 
 endif

diff --git a/net/can/Makefile b/net/can/Makefile
index 1242bbb..08bd217 100644
--- a/net/can/Makefile
+++ b/net/can/Makefile

@@ -15,3 +15,5 @@
 
 obj-$(CONFIG_CAN_GW)	+= can-gw.o
 can-gw-y		:= gw.o
+
+obj-$(CONFIG_CAN_J1939)	+= j1939/

diff --git a/net/can/af_can.c b/net/can/af_can.c
index 1684ba5..128d37a 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c

@@ -1,5 +1,5 @@
-/*
- * af_can.c - Protocol family CAN core module
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/* af_can.c - Protocol family CAN core module
  *            (used by different CAN protocol modules)
  *
  * Copyright (c) 2002-2017 Volkswagen Group Electronic Research
@@ -58,6 +58,7 @@
 #include <linux/can.h>
 #include <linux/can/core.h>
 #include <linux/can/skb.h>
+#include <linux/can/can-ml.h>
 #include <linux/ratelimit.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
@@ -83,29 +84,14 @@
 
 static atomic_t skbcounter = ATOMIC_INIT(0);
 
-/*
- * af_can socket functions
- */
+/* af_can socket functions */
 
-int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
-{
-	struct sock *sk = sock->sk;
-
-	switch (cmd) {
-
-	case SIOCGSTAMP:
-		return sock_get_timestamp(sk, (struct timeval __user *)arg);
-
-	default:
-		return -ENOIOCTLCMD;
-	}
-}
-EXPORT_SYMBOL(can_ioctl);
-
-static void can_sock_destruct(struct sock *sk)
+void can_sock_destruct(struct sock *sk)
 {
 	skb_queue_purge(&sk->sk_receive_queue);
+	skb_queue_purge(&sk->sk_error_queue);
 }
+EXPORT_SYMBOL(can_sock_destruct);
 
 static const struct can_proto *can_get_proto(int protocol)
 {
@@ -145,14 +131,13 @@
 
 		err = request_module("can-proto-%d", protocol);
 
-		/*
-		 * In case of error we only print a message but don't
+		/* In case of error we only print a message but don't
 		 * return the error code immediately.  Below we will
 		 * return -EPROTONOSUPPORT
 		 */
 		if (err)
-			printk_ratelimited(KERN_ERR "can: request_module "
-			       "(can-proto-%d) failed.\n", protocol);
+			pr_err_ratelimited("can: request_module (can-proto-%d) failed.\n",
+					   protocol);
 
 		cp = can_get_proto(protocol);
 	}
@@ -193,9 +178,7 @@
 	return err;
 }
 
-/*
- * af_can tx path
- */
+/* af_can tx path */
 
 /**
  * can_send - transmit a CAN frame (optional with local loopback)
@@ -217,7 +200,7 @@
 {
 	struct sk_buff *newskb = NULL;
 	struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
-	struct s_stats *can_stats = dev_net(skb->dev)->can.can_stats;
+	struct can_pkg_stats *pkg_stats = dev_net(skb->dev)->can.pkg_stats;
 	int err = -EINVAL;
 
 	if (skb->len == CAN_MTU) {
@@ -228,11 +211,11 @@
 		skb->protocol = htons(ETH_P_CANFD);
 		if (unlikely(cfd->len > CANFD_MAX_DLEN))
 			goto inval_skb;
-	} else
+	} else {
 		goto inval_skb;
+	}
 
-	/*
-	 * Make sure the CAN frame can pass the selected CAN netdevice.
+	/* Make sure the CAN frame can pass the selected CAN netdevice.
 	 * As structs can_frame and canfd_frame are similar, we can provide
 	 * CAN FD frames to legacy CAN drivers as long as the length is <= 8
 	 */
@@ -263,8 +246,7 @@
 		/* indication for the CAN driver: do loopback */
 		skb->pkt_type = PACKET_LOOPBACK;
 
-		/*
-		 * The reference to the originating sock may be required
+		/* The reference to the originating sock may be required
 		 * by the receiving socket to check whether the frame is
 		 * its own. Example: can_raw sockopt CAN_RAW_RECV_OWN_MSGS
 		 * Therefore we have to ensure that skb->sk remains the
@@ -273,8 +255,7 @@
 		 */
 
 		if (!(skb->dev->flags & IFF_ECHO)) {
-			/*
-			 * If the interface is not capable to do loopback
+			/* If the interface is not capable to do loopback
 			 * itself, we do it here.
 			 */
 			newskb = skb_clone(skb, GFP_ATOMIC);
@@ -306,8 +287,8 @@
 		netif_rx_ni(newskb);
 
 	/* update statistics */
-	can_stats->tx_frames++;
-	can_stats->tx_frames_delta++;
+	pkg_stats->tx_frames++;
+	pkg_stats->tx_frames_delta++;
 
 	return 0;
 
@@ -317,17 +298,17 @@
 }
 EXPORT_SYMBOL(can_send);
 
-/*
- * af_can rx path
- */
+/* af_can rx path */
 
-static struct can_dev_rcv_lists *find_dev_rcv_lists(struct net *net,
-						struct net_device *dev)
+static struct can_dev_rcv_lists *can_dev_rcv_lists_find(struct net *net,
+							struct net_device *dev)
 {
-	if (!dev)
-		return net->can.can_rx_alldev_list;
-	else
-		return (struct can_dev_rcv_lists *)dev->ml_priv;
+	if (dev) {
+		struct can_ml_priv *ml_priv = dev->ml_priv;
+		return &ml_priv->dev_rcv_lists;
+	} else {
+		return net->can.rx_alldev_list;
+	}
 }
 
 /**
@@ -354,7 +335,7 @@
 }
 
 /**
- * find_rcv_list - determine optimal filterlist inside device filter struct
+ * can_rcv_list_find - determine optimal filterlist inside device filter struct
  * @can_id: pointer to CAN identifier of a given can_filter
  * @mask: pointer to CAN mask of a given can_filter
  * @d: pointer to the device filter struct
@@ -380,8 +361,8 @@
  *  Constistency checked mask.
  *  Reduced can_id to have a preprocessed filter compare value.
  */
-static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
-					struct can_dev_rcv_lists *d)
+static struct hlist_head *can_rcv_list_find(canid_t *can_id, canid_t *mask,
+					    struct can_dev_rcv_lists *dev_rcv_lists)
 {
 	canid_t inv = *can_id & CAN_INV_FILTER; /* save flag before masking */
 
@@ -389,7 +370,7 @@
 	if (*mask & CAN_ERR_FLAG) {
 		/* clear CAN_ERR_FLAG in filter entry */
 		*mask &= CAN_ERR_MASK;
-		return &d->rx[RX_ERR];
+		return &dev_rcv_lists->rx[RX_ERR];
 	}
 
 	/* with cleared CAN_ERR_FLAG we have a simple mask/value filterpair */
@@ -405,27 +386,26 @@
 
 	/* inverse can_id/can_mask filter */
 	if (inv)
-		return &d->rx[RX_INV];
+		return &dev_rcv_lists->rx[RX_INV];
 
 	/* mask == 0 => no condition testing at receive time */
 	if (!(*mask))
-		return &d->rx[RX_ALL];
+		return &dev_rcv_lists->rx[RX_ALL];
 
 	/* extra filterlists for the subscription of a single non-RTR can_id */
 	if (((*mask & CAN_EFF_RTR_FLAGS) == CAN_EFF_RTR_FLAGS) &&
 	    !(*can_id & CAN_RTR_FLAG)) {
-
 		if (*can_id & CAN_EFF_FLAG) {
 			if (*mask == (CAN_EFF_MASK | CAN_EFF_RTR_FLAGS))
-				return &d->rx_eff[effhash(*can_id)];
+				return &dev_rcv_lists->rx_eff[effhash(*can_id)];
 		} else {
 			if (*mask == (CAN_SFF_MASK | CAN_EFF_RTR_FLAGS))
-				return &d->rx_sff[*can_id];
+				return &dev_rcv_lists->rx_sff[*can_id];
 		}
 	}
 
 	/* default: filter via can_id/can_mask */
-	return &d->rx[RX_FIL];
+	return &dev_rcv_lists->rx[RX_FIL];
 }
 
 /**
@@ -462,10 +442,10 @@
 		    canid_t mask, void (*func)(struct sk_buff *, void *),
 		    void *data, char *ident, struct sock *sk)
 {
-	struct receiver *r;
-	struct hlist_head *rl;
-	struct can_dev_rcv_lists *d;
-	struct s_pstats *can_pstats = net->can.can_pstats;
+	struct receiver *rcv;
+	struct hlist_head *rcv_list;
+	struct can_dev_rcv_lists *dev_rcv_lists;
+	struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats;
 	int err = 0;
 
 	/* insert new receiver  (dev,canid,mask) -> (func,data) */
@@ -476,50 +456,42 @@
 	if (dev && !net_eq(net, dev_net(dev)))
 		return -ENODEV;
 
-	r = kmem_cache_alloc(rcv_cache, GFP_KERNEL);
-	if (!r)
+	rcv = kmem_cache_alloc(rcv_cache, GFP_KERNEL);
+	if (!rcv)
 		return -ENOMEM;
 
-	spin_lock(&net->can.can_rcvlists_lock);
+	spin_lock_bh(&net->can.rcvlists_lock);
 
-	d = find_dev_rcv_lists(net, dev);
-	if (d) {
-		rl = find_rcv_list(&can_id, &mask, d);
+	dev_rcv_lists = can_dev_rcv_lists_find(net, dev);
+	rcv_list = can_rcv_list_find(&can_id, &mask, dev_rcv_lists);
 
-		r->can_id  = can_id;
-		r->mask    = mask;
-		r->matches = 0;
-		r->func    = func;
-		r->data    = data;
-		r->ident   = ident;
-		r->sk      = sk;
+	rcv->can_id = can_id;
+	rcv->mask = mask;
+	rcv->matches = 0;
+	rcv->func = func;
+	rcv->data = data;
+	rcv->ident = ident;
+	rcv->sk = sk;
 
-		hlist_add_head_rcu(&r->list, rl);
-		d->entries++;
+	hlist_add_head_rcu(&rcv->list, rcv_list);
+	dev_rcv_lists->entries++;
 
-		can_pstats->rcv_entries++;
-		if (can_pstats->rcv_entries_max < can_pstats->rcv_entries)
-			can_pstats->rcv_entries_max = can_pstats->rcv_entries;
-	} else {
-		kmem_cache_free(rcv_cache, r);
-		err = -ENODEV;
-	}
-
-	spin_unlock(&net->can.can_rcvlists_lock);
+	rcv_lists_stats->rcv_entries++;
+	rcv_lists_stats->rcv_entries_max = max(rcv_lists_stats->rcv_entries_max,
+					       rcv_lists_stats->rcv_entries);
+	spin_unlock_bh(&net->can.rcvlists_lock);
 
 	return err;
 }
 EXPORT_SYMBOL(can_rx_register);
 
-/*
- * can_rx_delete_receiver - rcu callback for single receiver entry removal
- */
+/* can_rx_delete_receiver - rcu callback for single receiver entry removal */
 static void can_rx_delete_receiver(struct rcu_head *rp)
 {
-	struct receiver *r = container_of(rp, struct receiver, rcu);
-	struct sock *sk = r->sk;
+	struct receiver *rcv = container_of(rp, struct receiver, rcu);
+	struct sock *sk = rcv->sk;
 
-	kmem_cache_free(rcv_cache, r);
+	kmem_cache_free(rcv_cache, rcv);
 	if (sk)
 		sock_put(sk);
 }
@@ -539,10 +511,10 @@
 		       canid_t mask, void (*func)(struct sk_buff *, void *),
 		       void *data)
 {
-	struct receiver *r = NULL;
-	struct hlist_head *rl;
-	struct s_pstats *can_pstats = net->can.can_pstats;
-	struct can_dev_rcv_lists *d;
+	struct receiver *rcv = NULL;
+	struct hlist_head *rcv_list;
+	struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats;
+	struct can_dev_rcv_lists *dev_rcv_lists;
 
 	if (dev && dev->type != ARPHRD_CAN)
 		return;
@@ -550,86 +522,69 @@
 	if (dev && !net_eq(net, dev_net(dev)))
 		return;
 
-	spin_lock(&net->can.can_rcvlists_lock);
+	spin_lock_bh(&net->can.rcvlists_lock);
 
-	d = find_dev_rcv_lists(net, dev);
-	if (!d) {
-		pr_err("BUG: receive list not found for "
-		       "dev %s, id %03X, mask %03X\n",
-		       DNAME(dev), can_id, mask);
-		goto out;
-	}
+	dev_rcv_lists = can_dev_rcv_lists_find(net, dev);
+	rcv_list = can_rcv_list_find(&can_id, &mask, dev_rcv_lists);
 
-	rl = find_rcv_list(&can_id, &mask, d);
-
-	/*
-	 * Search the receiver list for the item to delete.  This should
+	/* Search the receiver list for the item to delete.  This should
 	 * exist, since no receiver may be unregistered that hasn't
 	 * been registered before.
 	 */
-
-	hlist_for_each_entry_rcu(r, rl, list) {
-		if (r->can_id == can_id && r->mask == mask &&
-		    r->func == func && r->data == data)
+	hlist_for_each_entry_rcu(rcv, rcv_list, list) {
+		if (rcv->can_id == can_id && rcv->mask == mask &&
+		    rcv->func == func && rcv->data == data)
 			break;
 	}
 
-	/*
-	 * Check for bugs in CAN protocol implementations using af_can.c:
-	 * 'r' will be NULL if no matching list item was found for removal.
+	/* Check for bugs in CAN protocol implementations using af_can.c:
+	 * 'rcv' will be NULL if no matching list item was found for removal.
 	 */
-
-	if (!r) {
-		WARN(1, "BUG: receive list entry not found for dev %s, "
-		     "id %03X, mask %03X\n", DNAME(dev), can_id, mask);
+	if (!rcv) {
+		WARN(1, "BUG: receive list entry not found for dev %s, id %03X, mask %03X\n",
+		     DNAME(dev), can_id, mask);
 		goto out;
 	}
 
-	hlist_del_rcu(&r->list);
-	d->entries--;
+	hlist_del_rcu(&rcv->list);
+	dev_rcv_lists->entries--;
 
-	if (can_pstats->rcv_entries > 0)
-		can_pstats->rcv_entries--;
-
-	/* remove device structure requested by NETDEV_UNREGISTER */
-	if (d->remove_on_zero_entries && !d->entries) {
-		kfree(d);
-		dev->ml_priv = NULL;
-	}
+	if (rcv_lists_stats->rcv_entries > 0)
+		rcv_lists_stats->rcv_entries--;
 
  out:
-	spin_unlock(&net->can.can_rcvlists_lock);
+	spin_unlock_bh(&net->can.rcvlists_lock);
 
 	/* schedule the receiver item for deletion */
-	if (r) {
-		if (r->sk)
-			sock_hold(r->sk);
-		call_rcu(&r->rcu, can_rx_delete_receiver);
+	if (rcv) {
+		if (rcv->sk)
+			sock_hold(rcv->sk);
+		call_rcu(&rcv->rcu, can_rx_delete_receiver);
 	}
 }
 EXPORT_SYMBOL(can_rx_unregister);
 
-static inline void deliver(struct sk_buff *skb, struct receiver *r)
+static inline void deliver(struct sk_buff *skb, struct receiver *rcv)
 {
-	r->func(skb, r->data);
-	r->matches++;
+	rcv->func(skb, rcv->data);
+	rcv->matches++;
 }
 
-static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb)
+static int can_rcv_filter(struct can_dev_rcv_lists *dev_rcv_lists, struct sk_buff *skb)
 {
-	struct receiver *r;
+	struct receiver *rcv;
 	int matches = 0;
 	struct can_frame *cf = (struct can_frame *)skb->data;
 	canid_t can_id = cf->can_id;
 
-	if (d->entries == 0)
+	if (dev_rcv_lists->entries == 0)
 		return 0;
 
 	if (can_id & CAN_ERR_FLAG) {
 		/* check for error message frame entries only */
-		hlist_for_each_entry_rcu(r, &d->rx[RX_ERR], list) {
-			if (can_id & r->mask) {
-				deliver(skb, r);
+		hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_ERR], list) {
+			if (can_id & rcv->mask) {
+				deliver(skb, rcv);
 				matches++;
 			}
 		}
@@ -637,23 +592,23 @@
 	}
 
 	/* check for unfiltered entries */
-	hlist_for_each_entry_rcu(r, &d->rx[RX_ALL], list) {
-		deliver(skb, r);
+	hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_ALL], list) {
+		deliver(skb, rcv);
 		matches++;
 	}
 
 	/* check for can_id/mask entries */
-	hlist_for_each_entry_rcu(r, &d->rx[RX_FIL], list) {
-		if ((can_id & r->mask) == r->can_id) {
-			deliver(skb, r);
+	hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_FIL], list) {
+		if ((can_id & rcv->mask) == rcv->can_id) {
+			deliver(skb, rcv);
 			matches++;
 		}
 	}
 
 	/* check for inverted can_id/mask entries */
-	hlist_for_each_entry_rcu(r, &d->rx[RX_INV], list) {
-		if ((can_id & r->mask) != r->can_id) {
-			deliver(skb, r);
+	hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_INV], list) {
+		if ((can_id & rcv->mask) != rcv->can_id) {
+			deliver(skb, rcv);
 			matches++;
 		}
 	}
@@ -663,16 +618,16 @@
 		return matches;
 
 	if (can_id & CAN_EFF_FLAG) {
-		hlist_for_each_entry_rcu(r, &d->rx_eff[effhash(can_id)], list) {
-			if (r->can_id == can_id) {
-				deliver(skb, r);
+		hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx_eff[effhash(can_id)], list) {
+			if (rcv->can_id == can_id) {
+				deliver(skb, rcv);
 				matches++;
 			}
 		}
 	} else {
 		can_id &= CAN_SFF_MASK;
-		hlist_for_each_entry_rcu(r, &d->rx_sff[can_id], list) {
-			deliver(skb, r);
+		hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx_sff[can_id], list) {
+			deliver(skb, rcv);
 			matches++;
 		}
 	}
@@ -682,14 +637,14 @@
 
 static void can_receive(struct sk_buff *skb, struct net_device *dev)
 {
-	struct can_dev_rcv_lists *d;
+	struct can_dev_rcv_lists *dev_rcv_lists;
 	struct net *net = dev_net(dev);
-	struct s_stats *can_stats = net->can.can_stats;
+	struct can_pkg_stats *pkg_stats = net->can.pkg_stats;
 	int matches;
 
 	/* update statistics */
-	can_stats->rx_frames++;
-	can_stats->rx_frames_delta++;
+	pkg_stats->rx_frames++;
+	pkg_stats->rx_frames_delta++;
 
 	/* create non-zero unique skb identifier together with *skb */
 	while (!(can_skb_prv(skb)->skbcnt))
@@ -698,12 +653,11 @@
 	rcu_read_lock();
 
 	/* deliver the packet to sockets listening on all devices */
-	matches = can_rcv_filter(net->can.can_rx_alldev_list, skb);
+	matches = can_rcv_filter(net->can.rx_alldev_list, skb);
 
 	/* find receive list for this device */
-	d = find_dev_rcv_lists(net, dev);
-	if (d)
-		matches += can_rcv_filter(d, skb);
+	dev_rcv_lists = can_dev_rcv_lists_find(net, dev);
+	matches += can_rcv_filter(dev_rcv_lists, skb);
 
 	rcu_read_unlock();
 
@@ -711,8 +665,8 @@
 	consume_skb(skb);
 
 	if (matches > 0) {
-		can_stats->matches++;
-		can_stats->matches_delta++;
+		pkg_stats->matches++;
+		pkg_stats->matches_delta++;
 	}
 }
 
@@ -734,7 +688,7 @@
 }
 
 static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
-		   struct packet_type *pt, struct net_device *orig_dev)
+		     struct packet_type *pt, struct net_device *orig_dev)
 {
 	struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
 
@@ -750,9 +704,7 @@
 	return NET_RX_SUCCESS;
 }
 
-/*
- * af_can protocol functions
- */
+/* af_can protocol functions */
 
 /**
  * can_proto_register - register CAN transport protocol
@@ -783,8 +735,9 @@
 	if (rcu_access_pointer(proto_tab[proto])) {
 		pr_err("can: protocol %d already registered\n", proto);
 		err = -EBUSY;
-	} else
+	} else {
 		RCU_INIT_POINTER(proto_tab[proto], cp);
+	}
 
 	mutex_unlock(&proto_tab_lock);
 
@@ -814,48 +767,19 @@
 }
 EXPORT_SYMBOL(can_proto_unregister);
 
-/*
- * af_can notifier to create/remove CAN netdevice specific structs
- */
+/* af_can notifier to create/remove CAN netdevice specific structs */
 static int can_notifier(struct notifier_block *nb, unsigned long msg,
 			void *ptr)
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct can_dev_rcv_lists *d;
 
 	if (dev->type != ARPHRD_CAN)
 		return NOTIFY_DONE;
 
 	switch (msg) {
-
 	case NETDEV_REGISTER:
-
-		/* create new dev_rcv_lists for this device */
-		d = kzalloc(sizeof(*d), GFP_KERNEL);
-		if (!d)
-			return NOTIFY_DONE;
-		BUG_ON(dev->ml_priv);
-		dev->ml_priv = d;
-
-		break;
-
-	case NETDEV_UNREGISTER:
-		spin_lock(&dev_net(dev)->can.can_rcvlists_lock);
-
-		d = dev->ml_priv;
-		if (d) {
-			if (d->entries)
-				d->remove_on_zero_entries = 1;
-			else {
-				kfree(d);
-				dev->ml_priv = NULL;
-			}
-		} else
-			pr_err("can: notifier: receive list not found for dev "
-			       "%s\n", dev->name);
-
-		spin_unlock(&dev_net(dev)->can.can_rcvlists_lock);
-
+		WARN(!dev->ml_priv,
+		     "No CAN mid layer private allocated, please fix your driver and use alloc_candev()!\n");
 		break;
 	}
 
@@ -864,71 +788,54 @@
 
 static int can_pernet_init(struct net *net)
 {
-	spin_lock_init(&net->can.can_rcvlists_lock);
-	net->can.can_rx_alldev_list =
-		kzalloc(sizeof(struct can_dev_rcv_lists), GFP_KERNEL);
-	if (!net->can.can_rx_alldev_list)
+	spin_lock_init(&net->can.rcvlists_lock);
+	net->can.rx_alldev_list =
+		kzalloc(sizeof(*net->can.rx_alldev_list), GFP_KERNEL);
+	if (!net->can.rx_alldev_list)
 		goto out;
-	net->can.can_stats = kzalloc(sizeof(struct s_stats), GFP_KERNEL);
-	if (!net->can.can_stats)
-		goto out_free_alldev_list;
-	net->can.can_pstats = kzalloc(sizeof(struct s_pstats), GFP_KERNEL);
-	if (!net->can.can_pstats)
-		goto out_free_can_stats;
+	net->can.pkg_stats = kzalloc(sizeof(*net->can.pkg_stats), GFP_KERNEL);
+	if (!net->can.pkg_stats)
+		goto out_free_rx_alldev_list;
+	net->can.rcv_lists_stats = kzalloc(sizeof(*net->can.rcv_lists_stats), GFP_KERNEL);
+	if (!net->can.rcv_lists_stats)
+		goto out_free_pkg_stats;
 
 	if (IS_ENABLED(CONFIG_PROC_FS)) {
 		/* the statistics are updated every second (timer triggered) */
 		if (stats_timer) {
-			timer_setup(&net->can.can_stattimer, can_stat_update,
+			timer_setup(&net->can.stattimer, can_stat_update,
 				    0);
-			mod_timer(&net->can.can_stattimer,
+			mod_timer(&net->can.stattimer,
 				  round_jiffies(jiffies + HZ));
 		}
-		net->can.can_stats->jiffies_init = jiffies;
+		net->can.pkg_stats->jiffies_init = jiffies;
 		can_init_proc(net);
 	}
 
 	return 0;
 
- out_free_can_stats:
-	kfree(net->can.can_stats);
- out_free_alldev_list:
-	kfree(net->can.can_rx_alldev_list);
+ out_free_pkg_stats:
+	kfree(net->can.pkg_stats);
+ out_free_rx_alldev_list:
+	kfree(net->can.rx_alldev_list);
  out:
 	return -ENOMEM;
 }
 
 static void can_pernet_exit(struct net *net)
 {
-	struct net_device *dev;
-
 	if (IS_ENABLED(CONFIG_PROC_FS)) {
 		can_remove_proc(net);
 		if (stats_timer)
-			del_timer_sync(&net->can.can_stattimer);
+			del_timer_sync(&net->can.stattimer);
 	}
 
-	/* remove created dev_rcv_lists from still registered CAN devices */
-	rcu_read_lock();
-	for_each_netdev_rcu(net, dev) {
-		if (dev->type == ARPHRD_CAN && dev->ml_priv) {
-			struct can_dev_rcv_lists *d = dev->ml_priv;
-
-			BUG_ON(d->entries);
-			kfree(d);
-			dev->ml_priv = NULL;
-		}
-	}
-	rcu_read_unlock();
-
-	kfree(net->can.can_rx_alldev_list);
-	kfree(net->can.can_stats);
-	kfree(net->can.can_pstats);
+	kfree(net->can.rx_alldev_list);
+	kfree(net->can.pkg_stats);
+	kfree(net->can.rcv_lists_stats);
 }
 
-/*
- * af_can module init/exit functions
- */
+/* af_can module init/exit functions */
 
 static struct packet_type can_packet __read_mostly = {
 	.type = cpu_to_be16(ETH_P_CAN),
@@ -958,6 +865,8 @@
 
 static __init int can_init(void)
 {
+	int err;
+
 	/* check for correct padding to be able to use the structs similarly */
 	BUILD_BUG_ON(offsetof(struct can_frame, can_dlc) !=
 		     offsetof(struct canfd_frame, len) ||
@@ -971,15 +880,31 @@
 	if (!rcv_cache)
 		return -ENOMEM;
 
-	register_pernet_subsys(&can_pernet_ops);
+	err = register_pernet_subsys(&can_pernet_ops);
+	if (err)
+		goto out_pernet;
 
 	/* protocol register */
-	sock_register(&can_family_ops);
-	register_netdevice_notifier(&can_netdev_notifier);
+	err = sock_register(&can_family_ops);
+	if (err)
+		goto out_sock;
+	err = register_netdevice_notifier(&can_netdev_notifier);
+	if (err)
+		goto out_notifier;
+
 	dev_add_pack(&can_packet);
 	dev_add_pack(&canfd_packet);
 
 	return 0;
+
+out_notifier:
+	sock_unregister(PF_CAN);
+out_sock:
+	unregister_pernet_subsys(&can_pernet_ops);
+out_pernet:
+	kmem_cache_destroy(rcv_cache);
+
+	return err;
 }
 
 static __exit void can_exit(void)

diff --git a/net/can/af_can.h b/net/can/af_can.h
index 9cb3719..7c2d916 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h

@@ -1,5 +1,5 @@
-/*
- * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/* Copyright (c) 2002-2007 Volkswagen Group Electronic Research
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -53,32 +53,17 @@
 	canid_t can_id;
 	canid_t mask;
 	unsigned long matches;
-	void (*func)(struct sk_buff *, void *);
+	void (*func)(struct sk_buff *skb, void *data);
 	void *data;
 	char *ident;
 	struct sock *sk;
 	struct rcu_head rcu;
 };
 
-#define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS)
-#define CAN_EFF_RCV_HASH_BITS 10
-#define CAN_EFF_RCV_ARRAY_SZ (1 << CAN_EFF_RCV_HASH_BITS)
-
-enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_MAX };
-
-/* per device receive filters linked at dev->ml_priv */
-struct can_dev_rcv_lists {
-	struct hlist_head rx[RX_MAX];
-	struct hlist_head rx_sff[CAN_SFF_RCV_ARRAY_SZ];
-	struct hlist_head rx_eff[CAN_EFF_RCV_ARRAY_SZ];
-	int remove_on_zero_entries;
-	int entries;
-};
-
 /* statistic structures */
 
 /* can be reset e.g. by can_init_stats() */
-struct s_stats {
+struct can_pkg_stats {
 	unsigned long jiffies_init;
 
 	unsigned long rx_frames;
@@ -103,7 +88,7 @@
 };
 
 /* persistent statistics */
-struct s_pstats {
+struct can_rcv_lists_stats {
 	unsigned long stats_reset;
 	unsigned long user_reset;
 	unsigned long rcv_entries;

diff --git a/net/can/bcm.c b/net/can/bcm.c
index 0af8f0d..c96fa0f 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
 /*
  * bcm.c - Broadcast Manager to filter/send (cyclic) CAN content
  *
@@ -67,6 +68,9 @@
  */
 #define MAX_NFRAMES 256
 
+/* limit timers to 400 days for sending/timeouts */
+#define BCM_TIMER_SEC_MAX (400 * 24 * 60 * 60)
+
 /* use of last_frames[index].flags */
 #define RX_RECV    0x40 /* received data for this element */
 #define RX_THR     0x80 /* element not been sent due to throttle feature */
@@ -102,7 +106,6 @@
 	unsigned long frames_abs, frames_filtered;
 	struct bcm_timeval ival1, ival2;
 	struct hrtimer timer, thrtimer;
-	struct tasklet_struct tsklet, thrtsklet;
 	ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg;
 	int rx_ifindex;
 	int cfsiz;
@@ -140,6 +143,22 @@
 	return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC);
 }
 
+/* check limitations for timeval provided by user */
+static bool bcm_is_invalid_tv(struct bcm_msg_head *msg_head)
+{
+	if ((msg_head->ival1.tv_sec < 0) ||
+	    (msg_head->ival1.tv_sec > BCM_TIMER_SEC_MAX) ||
+	    (msg_head->ival1.tv_usec < 0) ||
+	    (msg_head->ival1.tv_usec >= USEC_PER_SEC) ||
+	    (msg_head->ival2.tv_sec < 0) ||
+	    (msg_head->ival2.tv_sec > BCM_TIMER_SEC_MAX) ||
+	    (msg_head->ival2.tv_usec < 0) ||
+	    (msg_head->ival2.tv_usec >= USEC_PER_SEC))
+		return true;
+
+	return false;
+}
+
 #define CFSIZ(flags) ((flags & CAN_FD_FRAME) ? CANFD_MTU : CAN_MTU)
 #define OPSIZ sizeof(struct bcm_op)
 #define MHSIZ sizeof(struct bcm_msg_head)
@@ -351,25 +370,34 @@
 	}
 }
 
-static void bcm_tx_start_timer(struct bcm_op *op)
+static bool bcm_tx_set_expiry(struct bcm_op *op, struct hrtimer *hrt)
 {
+	ktime_t ival;
+
 	if (op->kt_ival1 && op->count)
-		hrtimer_start(&op->timer,
-			      ktime_add(ktime_get(), op->kt_ival1),
-			      HRTIMER_MODE_ABS);
+		ival = op->kt_ival1;
 	else if (op->kt_ival2)
-		hrtimer_start(&op->timer,
-			      ktime_add(ktime_get(), op->kt_ival2),
-			      HRTIMER_MODE_ABS);
+		ival = op->kt_ival2;
+	else
+		return false;
+
+	hrtimer_set_expires(hrt, ktime_add(ktime_get(), ival));
+	return true;
 }
 
-static void bcm_tx_timeout_tsklet(unsigned long data)
+static void bcm_tx_start_timer(struct bcm_op *op)
 {
-	struct bcm_op *op = (struct bcm_op *)data;
+	if (bcm_tx_set_expiry(op, &op->timer))
+		hrtimer_start_expires(&op->timer, HRTIMER_MODE_ABS_SOFT);
+}
+
+/* bcm_tx_timeout_handler - performs cyclic CAN frame transmissions */
+static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
+{
+	struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
 	struct bcm_msg_head msg_head;
 
 	if (op->kt_ival1 && (op->count > 0)) {
-
 		op->count--;
 		if (!op->count && (op->flags & TX_COUNTEVT)) {
 
@@ -386,22 +414,12 @@
 		}
 		bcm_can_tx(op);
 
-	} else if (op->kt_ival2)
+	} else if (op->kt_ival2) {
 		bcm_can_tx(op);
+	}
 
-	bcm_tx_start_timer(op);
-}
-
-/*
- * bcm_tx_timeout_handler - performs cyclic CAN frame transmissions
- */
-static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
-{
-	struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
-
-	tasklet_schedule(&op->tsklet);
-
-	return HRTIMER_NORESTART;
+	return bcm_tx_set_expiry(op, &op->timer) ?
+		HRTIMER_RESTART : HRTIMER_NORESTART;
 }
 
 /*
@@ -467,7 +485,7 @@
 		/* do not send the saved data - only start throttle timer */
 		hrtimer_start(&op->thrtimer,
 			      ktime_add(op->kt_lastmsg, op->kt_ival2),
-			      HRTIMER_MODE_ABS);
+			      HRTIMER_MODE_ABS_SOFT);
 		return;
 	}
 
@@ -526,14 +544,21 @@
 		return;
 
 	if (op->kt_ival1)
-		hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL);
+		hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL_SOFT);
 }
 
-static void bcm_rx_timeout_tsklet(unsigned long data)
+/* bcm_rx_timeout_handler - when the (cyclic) CAN frame reception timed out */
+static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
 {
-	struct bcm_op *op = (struct bcm_op *)data;
+	struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
 	struct bcm_msg_head msg_head;
 
+	/* if user wants to be informed, when cyclic CAN-Messages come back */
+	if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) {
+		/* clear received CAN frames to indicate 'nothing received' */
+		memset(op->last_frames, 0, op->nframes * op->cfsiz);
+	}
+
 	/* create notification to user */
 	msg_head.opcode  = RX_TIMEOUT;
 	msg_head.flags   = op->flags;
@@ -544,25 +569,6 @@
 	msg_head.nframes = 0;
 
 	bcm_send_to_user(op, &msg_head, NULL, 0);
-}
-
-/*
- * bcm_rx_timeout_handler - when the (cyclic) CAN frame reception timed out
- */
-static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
-{
-	struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
-
-	/* schedule before NET_RX_SOFTIRQ */
-	tasklet_hi_schedule(&op->tsklet);
-
-	/* no restart of the timer is done here! */
-
-	/* if user wants to be informed, when cyclic CAN-Messages come back */
-	if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) {
-		/* clear received CAN frames to indicate 'nothing received' */
-		memset(op->last_frames, 0, op->nframes * op->cfsiz);
-	}
 
 	return HRTIMER_NORESTART;
 }
@@ -570,14 +576,12 @@
 /*
  * bcm_rx_do_flush - helper for bcm_rx_thr_flush
  */
-static inline int bcm_rx_do_flush(struct bcm_op *op, int update,
-				  unsigned int index)
+static inline int bcm_rx_do_flush(struct bcm_op *op, unsigned int index)
 {
 	struct canfd_frame *lcf = op->last_frames + op->cfsiz * index;
 
 	if ((op->last_frames) && (lcf->flags & RX_THR)) {
-		if (update)
-			bcm_rx_changed(op, lcf);
+		bcm_rx_changed(op, lcf);
 		return 1;
 	}
 	return 0;
@@ -585,11 +589,8 @@
 
 /*
  * bcm_rx_thr_flush - Check for throttled data and send it to the userspace
- *
- * update == 0 : just check if throttled data is available  (any irq context)
- * update == 1 : check and send throttled data to userspace (soft_irq context)
  */
-static int bcm_rx_thr_flush(struct bcm_op *op, int update)
+static int bcm_rx_thr_flush(struct bcm_op *op)
 {
 	int updated = 0;
 
@@ -598,24 +599,16 @@
 
 		/* for MUX filter we start at index 1 */
 		for (i = 1; i < op->nframes; i++)
-			updated += bcm_rx_do_flush(op, update, i);
+			updated += bcm_rx_do_flush(op, i);
 
 	} else {
 		/* for RX_FILTER_ID and simple filter */
-		updated += bcm_rx_do_flush(op, update, 0);
+		updated += bcm_rx_do_flush(op, 0);
 	}
 
 	return updated;
 }
 
-static void bcm_rx_thr_tsklet(unsigned long data)
-{
-	struct bcm_op *op = (struct bcm_op *)data;
-
-	/* push the changed data to the userspace */
-	bcm_rx_thr_flush(op, 1);
-}
-
 /*
  * bcm_rx_thr_handler - the time for blocked content updates is over now:
  *                      Check for throttled data and send it to the userspace
@@ -624,9 +617,7 @@
 {
 	struct bcm_op *op = container_of(hrtimer, struct bcm_op, thrtimer);
 
-	tasklet_schedule(&op->thrtsklet);
-
-	if (bcm_rx_thr_flush(op, 0)) {
+	if (bcm_rx_thr_flush(op)) {
 		hrtimer_forward(hrtimer, ktime_get(), op->kt_ival2);
 		return HRTIMER_RESTART;
 	} else {
@@ -722,23 +713,8 @@
 
 static void bcm_remove_op(struct bcm_op *op)
 {
-	if (op->tsklet.func) {
-		while (test_bit(TASKLET_STATE_SCHED, &op->tsklet.state) ||
-		       test_bit(TASKLET_STATE_RUN, &op->tsklet.state) ||
-		       hrtimer_active(&op->timer)) {
-			hrtimer_cancel(&op->timer);
-			tasklet_kill(&op->tsklet);
-		}
-	}
-
-	if (op->thrtsklet.func) {
-		while (test_bit(TASKLET_STATE_SCHED, &op->thrtsklet.state) ||
-		       test_bit(TASKLET_STATE_RUN, &op->thrtsklet.state) ||
-		       hrtimer_active(&op->thrtimer)) {
-			hrtimer_cancel(&op->thrtimer);
-			tasklet_kill(&op->thrtsklet);
-		}
-	}
+	hrtimer_cancel(&op->timer);
+	hrtimer_cancel(&op->thrtimer);
 
 	if ((op->frames) && (op->frames != &op->sframe))
 		kfree(op->frames);
@@ -873,6 +849,10 @@
 	if (msg_head->nframes < 1 || msg_head->nframes > MAX_NFRAMES)
 		return -EINVAL;
 
+	/* check timeval limitations */
+	if ((msg_head->flags & SETTIMER) && bcm_is_invalid_tv(msg_head))
+		return -EINVAL;
+
 	/* check the given can_id */
 	op = bcm_find_op(&bo->tx_ops, msg_head, ifindex);
 	if (op) {
@@ -967,15 +947,13 @@
 		op->ifindex = ifindex;
 
 		/* initialize uninitialized (kzalloc) structure */
-		hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		hrtimer_init(&op->timer, CLOCK_MONOTONIC,
+			     HRTIMER_MODE_REL_SOFT);
 		op->timer.function = bcm_tx_timeout_handler;
 
-		/* initialize tasklet for tx countevent notification */
-		tasklet_init(&op->tsklet, bcm_tx_timeout_tsklet,
-			     (unsigned long) op);
-
 		/* currently unused in tx_ops */
-		hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC,
+			     HRTIMER_MODE_REL_SOFT);
 
 		/* add this bcm_op to the list of the tx_ops */
 		list_add(&op->list, &bo->tx_ops);
@@ -1053,6 +1031,10 @@
 	     (!(msg_head->can_id & CAN_RTR_FLAG))))
 		return -EINVAL;
 
+	/* check timeval limitations */
+	if ((msg_head->flags & SETTIMER) && bcm_is_invalid_tv(msg_head))
+		return -EINVAL;
+
 	/* check the given can_id */
 	op = bcm_find_op(&bo->rx_ops, msg_head, ifindex);
 	if (op) {
@@ -1140,20 +1122,14 @@
 		op->rx_ifindex = ifindex;
 
 		/* initialize uninitialized (kzalloc) structure */
-		hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		hrtimer_init(&op->timer, CLOCK_MONOTONIC,
+			     HRTIMER_MODE_REL_SOFT);
 		op->timer.function = bcm_rx_timeout_handler;
 
-		/* initialize tasklet for rx timeout notification */
-		tasklet_init(&op->tsklet, bcm_rx_timeout_tsklet,
-			     (unsigned long) op);
-
-		hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC,
+			     HRTIMER_MODE_REL_SOFT);
 		op->thrtimer.function = bcm_rx_thr_handler;
 
-		/* initialize tasklet for rx throttle handling */
-		tasklet_init(&op->thrtsklet, bcm_rx_thr_tsklet,
-			     (unsigned long) op);
-
 		/* add this bcm_op to the list of the rx_ops */
 		list_add(&op->list, &bo->rx_ops);
 
@@ -1199,12 +1175,12 @@
 			 */
 			op->kt_lastmsg = 0;
 			hrtimer_cancel(&op->thrtimer);
-			bcm_rx_thr_flush(op, 1);
+			bcm_rx_thr_flush(op);
 		}
 
 		if ((op->flags & STARTTIMER) && op->kt_ival1)
 			hrtimer_start(&op->timer, op->kt_ival1,
-				      HRTIMER_MODE_REL);
+				      HRTIMER_MODE_REL_SOFT);
 	}
 
 	/* now we can register for can_ids, if we added a new bcm_op */
@@ -1318,7 +1294,7 @@
 		/* no bound device as default => check msg_name */
 		DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name);
 
-		if (msg->msg_namelen < sizeof(*addr))
+		if (msg->msg_namelen < CAN_REQUIRED_SIZE(*addr, can_ifindex))
 			return -EINVAL;
 
 		if (addr->can_family != AF_CAN)
@@ -1560,7 +1536,7 @@
 	struct net *net = sock_net(sk);
 	int ret = 0;
 
-	if (len < sizeof(*addr))
+	if (len < CAN_REQUIRED_SIZE(*addr, can_ifindex))
 		return -EINVAL;
 
 	lock_sock(sk);
@@ -1652,6 +1628,13 @@
 	return size;
 }
 
+static int bcm_sock_no_ioctlcmd(struct socket *sock, unsigned int cmd,
+				unsigned long arg)
+{
+	/* no ioctls for socket layer -> hand it down to NIC layer */
+	return -ENOIOCTLCMD;
+}
+
 static const struct proto_ops bcm_ops = {
 	.family        = PF_CAN,
 	.release       = bcm_release,
@@ -1661,7 +1644,8 @@
 	.accept        = sock_no_accept,
 	.getname       = sock_no_getname,
 	.poll          = datagram_poll,
-	.ioctl         = can_ioctl,	/* use can_ioctl() from af_can.c */
+	.ioctl         = bcm_sock_no_ioctlcmd,
+	.gettstamp     = sock_gettstamp,
 	.listen        = sock_no_listen,
 	.shutdown      = sock_no_shutdown,
 	.setsockopt    = sock_no_setsockopt,

diff --git a/net/can/gw.c b/net/can/gw.c
index faa3da8..65d60c9 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c

@@ -1,7 +1,7 @@
-/*
- * gw.c - CAN frame Gateway/Router/Bridge with netlink interface
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* gw.c - CAN frame Gateway/Router/Bridge with netlink interface
  *
- * Copyright (c) 2017 Volkswagen Group Electronic Research
+ * Copyright (c) 2019 Volkswagen Group Electronic Research
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -59,7 +59,7 @@
 #include <net/net_namespace.h>
 #include <net/sock.h>
 
-#define CAN_GW_VERSION "20170425"
+#define CAN_GW_VERSION "20190810"
 #define CAN_GW_NAME "can-gw"
 
 MODULE_DESCRIPTION("PF_CAN netlink gateway");
@@ -85,10 +85,10 @@
 /* structure that contains the (on-the-fly) CAN frame modifications */
 struct cf_mod {
 	struct {
-		struct can_frame and;
-		struct can_frame or;
-		struct can_frame xor;
-		struct can_frame set;
+		struct canfd_frame and;
+		struct canfd_frame or;
+		struct canfd_frame xor;
+		struct canfd_frame set;
 	} modframe;
 	struct {
 		u8 and;
@@ -96,7 +96,7 @@
 		u8 xor;
 		u8 set;
 	} modtype;
-	void (*modfunc[MAX_MODFUNCTIONS])(struct can_frame *cf,
+	void (*modfunc[MAX_MODFUNCTIONS])(struct canfd_frame *cf,
 					  struct cf_mod *mod);
 
 	/* CAN frame checksum calculation after CAN frame modifications */
@@ -105,15 +105,15 @@
 		struct cgw_csum_crc8 crc8;
 	} csum;
 	struct {
-		void (*xor)(struct can_frame *cf, struct cgw_csum_xor *xor);
-		void (*crc8)(struct can_frame *cf, struct cgw_csum_crc8 *crc8);
+		void (*xor)(struct canfd_frame *cf,
+			    struct cgw_csum_xor *xor);
+		void (*crc8)(struct canfd_frame *cf,
+			     struct cgw_csum_crc8 *crc8);
 	} csumfunc;
 	u32 uid;
 };
 
-
-/*
- * So far we just support CAN -> CAN routing and frame modifications.
+/* So far we just support CAN -> CAN routing and frame modifications.
  *
  * The internal can_can_gw structure contains data and attributes for
  * a CAN -> CAN gateway job.
@@ -151,39 +151,88 @@
 
 /* modification functions that are invoked in the hot path in can_can_gw_rcv */
 
-#define MODFUNC(func, op) static void func(struct can_frame *cf, \
+#define MODFUNC(func, op) static void func(struct canfd_frame *cf, \
 					   struct cf_mod *mod) { op ; }
 
 MODFUNC(mod_and_id, cf->can_id &= mod->modframe.and.can_id)
-MODFUNC(mod_and_dlc, cf->can_dlc &= mod->modframe.and.can_dlc)
+MODFUNC(mod_and_len, cf->len &= mod->modframe.and.len)
+MODFUNC(mod_and_flags, cf->flags &= mod->modframe.and.flags)
 MODFUNC(mod_and_data, *(u64 *)cf->data &= *(u64 *)mod->modframe.and.data)
 MODFUNC(mod_or_id, cf->can_id |= mod->modframe.or.can_id)
-MODFUNC(mod_or_dlc, cf->can_dlc |= mod->modframe.or.can_dlc)
+MODFUNC(mod_or_len, cf->len |= mod->modframe.or.len)
+MODFUNC(mod_or_flags, cf->flags |= mod->modframe.or.flags)
 MODFUNC(mod_or_data, *(u64 *)cf->data |= *(u64 *)mod->modframe.or.data)
 MODFUNC(mod_xor_id, cf->can_id ^= mod->modframe.xor.can_id)
-MODFUNC(mod_xor_dlc, cf->can_dlc ^= mod->modframe.xor.can_dlc)
+MODFUNC(mod_xor_len, cf->len ^= mod->modframe.xor.len)
+MODFUNC(mod_xor_flags, cf->flags ^= mod->modframe.xor.flags)
 MODFUNC(mod_xor_data, *(u64 *)cf->data ^= *(u64 *)mod->modframe.xor.data)
 MODFUNC(mod_set_id, cf->can_id = mod->modframe.set.can_id)
-MODFUNC(mod_set_dlc, cf->can_dlc = mod->modframe.set.can_dlc)
+MODFUNC(mod_set_len, cf->len = mod->modframe.set.len)
+MODFUNC(mod_set_flags, cf->flags = mod->modframe.set.flags)
 MODFUNC(mod_set_data, *(u64 *)cf->data = *(u64 *)mod->modframe.set.data)
 
-static inline void canframecpy(struct can_frame *dst, struct can_frame *src)
+static void mod_and_fddata(struct canfd_frame *cf, struct cf_mod *mod)
 {
-	/*
-	 * Copy the struct members separately to ensure that no uninitialized
+	int i;
+
+	for (i = 0; i < CANFD_MAX_DLEN; i += 8)
+		*(u64 *)(cf->data + i) &= *(u64 *)(mod->modframe.and.data + i);
+}
+
+static void mod_or_fddata(struct canfd_frame *cf, struct cf_mod *mod)
+{
+	int i;
+
+	for (i = 0; i < CANFD_MAX_DLEN; i += 8)
+		*(u64 *)(cf->data + i) |= *(u64 *)(mod->modframe.or.data + i);
+}
+
+static void mod_xor_fddata(struct canfd_frame *cf, struct cf_mod *mod)
+{
+	int i;
+
+	for (i = 0; i < CANFD_MAX_DLEN; i += 8)
+		*(u64 *)(cf->data + i) ^= *(u64 *)(mod->modframe.xor.data + i);
+}
+
+static void mod_set_fddata(struct canfd_frame *cf, struct cf_mod *mod)
+{
+	memcpy(cf->data, mod->modframe.set.data, CANFD_MAX_DLEN);
+}
+
+static void canframecpy(struct canfd_frame *dst, struct can_frame *src)
+{
+	/* Copy the struct members separately to ensure that no uninitialized
 	 * data are copied in the 3 bytes hole of the struct. This is needed
 	 * to make easy compares of the data in the struct cf_mod.
 	 */
 
 	dst->can_id = src->can_id;
-	dst->can_dlc = src->can_dlc;
+	dst->len = src->can_dlc;
 	*(u64 *)dst->data = *(u64 *)src->data;
 }
 
-static int cgw_chk_csum_parms(s8 fr, s8 to, s8 re)
+static void canfdframecpy(struct canfd_frame *dst, struct canfd_frame *src)
 {
-	/*
-	 * absolute dlc values 0 .. 7 => 0 .. 7, e.g. data [0]
+	/* Copy the struct members separately to ensure that no uninitialized
+	 * data are copied in the 2 bytes hole of the struct. This is needed
+	 * to make easy compares of the data in the struct cf_mod.
+	 */
+
+	dst->can_id = src->can_id;
+	dst->flags = src->flags;
+	dst->len = src->len;
+	memcpy(dst->data, src->data, CANFD_MAX_DLEN);
+}
+
+static int cgw_chk_csum_parms(s8 fr, s8 to, s8 re, struct rtcanmsg *r)
+{
+	s8 dlen = CAN_MAX_DLEN;
+
+	if (r->flags & CGW_FLAGS_CAN_FD)
+		dlen = CANFD_MAX_DLEN;
+
+	/* absolute dlc values 0 .. 7 => 0 .. 7, e.g. data [0]
 	 * relative to received dlc -1 .. -8 :
 	 * e.g. for received dlc = 8
 	 * -1 => index = 7 (data[7])
@@ -191,27 +240,27 @@
 	 * -8 => index = 0 (data[0])
 	 */
 
-	if (fr > -9 && fr < 8 &&
-	    to > -9 && to < 8 &&
-	    re > -9 && re < 8)
+	if (fr >= -dlen && fr < dlen &&
+	    to >= -dlen && to < dlen &&
+	    re >= -dlen && re < dlen)
 		return 0;
 	else
 		return -EINVAL;
 }
 
-static inline int calc_idx(int idx, int rx_dlc)
+static inline int calc_idx(int idx, int rx_len)
 {
 	if (idx < 0)
-		return rx_dlc + idx;
+		return rx_len + idx;
 	else
 		return idx;
 }
 
-static void cgw_csum_xor_rel(struct can_frame *cf, struct cgw_csum_xor *xor)
+static void cgw_csum_xor_rel(struct canfd_frame *cf, struct cgw_csum_xor *xor)
 {
-	int from = calc_idx(xor->from_idx, cf->can_dlc);
-	int to = calc_idx(xor->to_idx, cf->can_dlc);
-	int res = calc_idx(xor->result_idx, cf->can_dlc);
+	int from = calc_idx(xor->from_idx, cf->len);
+	int to = calc_idx(xor->to_idx, cf->len);
+	int res = calc_idx(xor->result_idx, cf->len);
 	u8 val = xor->init_xor_val;
 	int i;
 
@@ -229,7 +278,7 @@
 	cf->data[res] = val;
 }
 
-static void cgw_csum_xor_pos(struct can_frame *cf, struct cgw_csum_xor *xor)
+static void cgw_csum_xor_pos(struct canfd_frame *cf, struct cgw_csum_xor *xor)
 {
 	u8 val = xor->init_xor_val;
 	int i;
@@ -240,7 +289,7 @@
 	cf->data[xor->result_idx] = val;
 }
 
-static void cgw_csum_xor_neg(struct can_frame *cf, struct cgw_csum_xor *xor)
+static void cgw_csum_xor_neg(struct canfd_frame *cf, struct cgw_csum_xor *xor)
 {
 	u8 val = xor->init_xor_val;
 	int i;
@@ -251,11 +300,12 @@
 	cf->data[xor->result_idx] = val;
 }
 
-static void cgw_csum_crc8_rel(struct can_frame *cf, struct cgw_csum_crc8 *crc8)
+static void cgw_csum_crc8_rel(struct canfd_frame *cf,
+			      struct cgw_csum_crc8 *crc8)
 {
-	int from = calc_idx(crc8->from_idx, cf->can_dlc);
-	int to = calc_idx(crc8->to_idx, cf->can_dlc);
-	int res = calc_idx(crc8->result_idx, cf->can_dlc);
+	int from = calc_idx(crc8->from_idx, cf->len);
+	int to = calc_idx(crc8->to_idx, cf->len);
+	int res = calc_idx(crc8->result_idx, cf->len);
 	u8 crc = crc8->init_crc_val;
 	int i;
 
@@ -264,96 +314,102 @@
 
 	if (from <= to) {
 		for (i = crc8->from_idx; i <= crc8->to_idx; i++)
-			crc = crc8->crctab[crc^cf->data[i]];
+			crc = crc8->crctab[crc ^ cf->data[i]];
 	} else {
 		for (i = crc8->from_idx; i >= crc8->to_idx; i--)
-			crc = crc8->crctab[crc^cf->data[i]];
+			crc = crc8->crctab[crc ^ cf->data[i]];
 	}
 
 	switch (crc8->profile) {
-
 	case CGW_CRC8PRF_1U8:
-		crc = crc8->crctab[crc^crc8->profile_data[0]];
+		crc = crc8->crctab[crc ^ crc8->profile_data[0]];
 		break;
 
 	case  CGW_CRC8PRF_16U8:
-		crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]];
+		crc = crc8->crctab[crc ^ crc8->profile_data[cf->data[1] & 0xF]];
 		break;
 
 	case CGW_CRC8PRF_SFFID_XOR:
-		crc = crc8->crctab[crc^(cf->can_id & 0xFF)^
+		crc = crc8->crctab[crc ^ (cf->can_id & 0xFF) ^
 				   (cf->can_id >> 8 & 0xFF)];
 		break;
-
 	}
 
-	cf->data[crc8->result_idx] = crc^crc8->final_xor_val;
+	cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val;
 }
 
-static void cgw_csum_crc8_pos(struct can_frame *cf, struct cgw_csum_crc8 *crc8)
+static void cgw_csum_crc8_pos(struct canfd_frame *cf,
+			      struct cgw_csum_crc8 *crc8)
 {
 	u8 crc = crc8->init_crc_val;
 	int i;
 
 	for (i = crc8->from_idx; i <= crc8->to_idx; i++)
-		crc = crc8->crctab[crc^cf->data[i]];
+		crc = crc8->crctab[crc ^ cf->data[i]];
 
 	switch (crc8->profile) {
-
 	case CGW_CRC8PRF_1U8:
-		crc = crc8->crctab[crc^crc8->profile_data[0]];
+		crc = crc8->crctab[crc ^ crc8->profile_data[0]];
 		break;
 
 	case  CGW_CRC8PRF_16U8:
-		crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]];
+		crc = crc8->crctab[crc ^ crc8->profile_data[cf->data[1] & 0xF]];
 		break;
 
 	case CGW_CRC8PRF_SFFID_XOR:
-		crc = crc8->crctab[crc^(cf->can_id & 0xFF)^
+		crc = crc8->crctab[crc ^ (cf->can_id & 0xFF) ^
 				   (cf->can_id >> 8 & 0xFF)];
 		break;
 	}
 
-	cf->data[crc8->result_idx] = crc^crc8->final_xor_val;
+	cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val;
 }
 
-static void cgw_csum_crc8_neg(struct can_frame *cf, struct cgw_csum_crc8 *crc8)
+static void cgw_csum_crc8_neg(struct canfd_frame *cf,
+			      struct cgw_csum_crc8 *crc8)
 {
 	u8 crc = crc8->init_crc_val;
 	int i;
 
 	for (i = crc8->from_idx; i >= crc8->to_idx; i--)
-		crc = crc8->crctab[crc^cf->data[i]];
+		crc = crc8->crctab[crc ^ cf->data[i]];
 
 	switch (crc8->profile) {
-
 	case CGW_CRC8PRF_1U8:
-		crc = crc8->crctab[crc^crc8->profile_data[0]];
+		crc = crc8->crctab[crc ^ crc8->profile_data[0]];
 		break;
 
 	case  CGW_CRC8PRF_16U8:
-		crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]];
+		crc = crc8->crctab[crc ^ crc8->profile_data[cf->data[1] & 0xF]];
 		break;
 
 	case CGW_CRC8PRF_SFFID_XOR:
-		crc = crc8->crctab[crc^(cf->can_id & 0xFF)^
+		crc = crc8->crctab[crc ^ (cf->can_id & 0xFF) ^
 				   (cf->can_id >> 8 & 0xFF)];
 		break;
 	}
 
-	cf->data[crc8->result_idx] = crc^crc8->final_xor_val;
+	cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val;
 }
 
 /* the receive & process & send function */
 static void can_can_gw_rcv(struct sk_buff *skb, void *data)
 {
 	struct cgw_job *gwj = (struct cgw_job *)data;
-	struct can_frame *cf;
+	struct canfd_frame *cf;
 	struct sk_buff *nskb;
 	int modidx = 0;
 
-	/*
-	 * Do not handle CAN frames routed more than 'max_hops' times.
+	/* process strictly Classic CAN or CAN FD frames */
+	if (gwj->flags & CGW_FLAGS_CAN_FD) {
+		if (skb->len != CANFD_MTU)
+			return;
+	} else {
+		if (skb->len != CAN_MTU)
+			return;
+	}
+
+	/* Do not handle CAN frames routed more than 'max_hops' times.
 	 * In general we should never catch this delimiter which is intended
 	 * to cover a misconfiguration protection (e.g. circular CAN routes).
 	 *
@@ -384,8 +440,7 @@
 	    can_skb_prv(skb)->ifindex == gwj->dst.dev->ifindex)
 		return;
 
-	/*
-	 * clone the given skb, which has not been done in can_rcv()
+	/* clone the given skb, which has not been done in can_rcv()
 	 *
 	 * When there is at least one modification function activated,
 	 * we need to copy the skb as we want to modify skb->data.
@@ -410,14 +465,26 @@
 	nskb->dev = gwj->dst.dev;
 
 	/* pointer to modifiable CAN frame */
-	cf = (struct can_frame *)nskb->data;
+	cf = (struct canfd_frame *)nskb->data;
 
 	/* perform preprocessed modification functions if there are any */
 	while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx])
 		(*gwj->mod.modfunc[modidx++])(cf, &gwj->mod);
 
-	/* check for checksum updates when the CAN frame has been modified */
+	/* Has the CAN frame been modified? */
 	if (modidx) {
+		/* get available space for the processed CAN frame type */
+		int max_len = nskb->len - offsetof(struct canfd_frame, data);
+
+		/* dlc may have changed, make sure it fits to the CAN frame */
+		if (cf->len > max_len) {
+			/* delete frame due to misconfiguration */
+			gwj->deleted_frames++;
+			kfree_skb(nskb);
+			return;
+		}
+
+		/* check for checksum updates */
 		if (gwj->mod.csumfunc.crc8)
 			(*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8);
 
@@ -459,14 +526,12 @@
 		return NOTIFY_DONE;
 
 	if (msg == NETDEV_UNREGISTER) {
-
 		struct cgw_job *gwj = NULL;
 		struct hlist_node *nx;
 
 		ASSERT_RTNL();
 
 		hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) {
-
 			if (gwj->src.dev == dev || gwj->dst.dev == dev) {
 				hlist_del(&gwj->list);
 				cgw_unregister_filter(net, gwj);
@@ -481,7 +546,6 @@
 static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type,
 		       u32 pid, u32 seq, int flags)
 {
-	struct cgw_frame_mod mb;
 	struct rtcanmsg *rtcan;
 	struct nlmsghdr *nlh;
 
@@ -518,32 +582,66 @@
 			goto cancel;
 	}
 
-	if (gwj->mod.modtype.and) {
-		memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf));
-		mb.modtype = gwj->mod.modtype.and;
-		if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0)
-			goto cancel;
-	}
+	if (gwj->flags & CGW_FLAGS_CAN_FD) {
+		struct cgw_fdframe_mod mb;
 
-	if (gwj->mod.modtype.or) {
-		memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf));
-		mb.modtype = gwj->mod.modtype.or;
-		if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0)
-			goto cancel;
-	}
+		if (gwj->mod.modtype.and) {
+			memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf));
+			mb.modtype = gwj->mod.modtype.and;
+			if (nla_put(skb, CGW_FDMOD_AND, sizeof(mb), &mb) < 0)
+				goto cancel;
+		}
 
-	if (gwj->mod.modtype.xor) {
-		memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf));
-		mb.modtype = gwj->mod.modtype.xor;
-		if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0)
-			goto cancel;
-	}
+		if (gwj->mod.modtype.or) {
+			memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf));
+			mb.modtype = gwj->mod.modtype.or;
+			if (nla_put(skb, CGW_FDMOD_OR, sizeof(mb), &mb) < 0)
+				goto cancel;
+		}
 
-	if (gwj->mod.modtype.set) {
-		memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf));
-		mb.modtype = gwj->mod.modtype.set;
-		if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0)
-			goto cancel;
+		if (gwj->mod.modtype.xor) {
+			memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf));
+			mb.modtype = gwj->mod.modtype.xor;
+			if (nla_put(skb, CGW_FDMOD_XOR, sizeof(mb), &mb) < 0)
+				goto cancel;
+		}
+
+		if (gwj->mod.modtype.set) {
+			memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf));
+			mb.modtype = gwj->mod.modtype.set;
+			if (nla_put(skb, CGW_FDMOD_SET, sizeof(mb), &mb) < 0)
+				goto cancel;
+		}
+	} else {
+		struct cgw_frame_mod mb;
+
+		if (gwj->mod.modtype.and) {
+			memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf));
+			mb.modtype = gwj->mod.modtype.and;
+			if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0)
+				goto cancel;
+		}
+
+		if (gwj->mod.modtype.or) {
+			memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf));
+			mb.modtype = gwj->mod.modtype.or;
+			if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0)
+				goto cancel;
+		}
+
+		if (gwj->mod.modtype.xor) {
+			memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf));
+			mb.modtype = gwj->mod.modtype.xor;
+			if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0)
+				goto cancel;
+		}
+
+		if (gwj->mod.modtype.set) {
+			memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf));
+			mb.modtype = gwj->mod.modtype.set;
+			if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0)
+				goto cancel;
+		}
 	}
 
 	if (gwj->mod.uid) {
@@ -564,7 +662,6 @@
 	}
 
 	if (gwj->gwtype == CGW_TYPE_CAN_CAN) {
-
 		if (gwj->ccgw.filter.can_id || gwj->ccgw.filter.can_mask) {
 			if (nla_put(skb, CGW_FILTER, sizeof(struct can_filter),
 				    &gwj->ccgw.filter) < 0)
@@ -599,8 +696,9 @@
 		if (idx < s_idx)
 			goto cont;
 
-		if (cgw_put_job(skb, gwj, RTM_NEWROUTE, NETLINK_CB(cb->skb).portid,
-		    cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0)
+		if (cgw_put_job(skb, gwj, RTM_NEWROUTE,
+				NETLINK_CB(cb->skb).portid,
+				cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0)
 			break;
 cont:
 		idx++;
@@ -612,7 +710,7 @@
 	return skb->len;
 }
 
-static const struct nla_policy cgw_policy[CGW_MAX+1] = {
+static const struct nla_policy cgw_policy[CGW_MAX + 1] = {
 	[CGW_MOD_AND]	= { .len = sizeof(struct cgw_frame_mod) },
 	[CGW_MOD_OR]	= { .len = sizeof(struct cgw_frame_mod) },
 	[CGW_MOD_XOR]	= { .len = sizeof(struct cgw_frame_mod) },
@@ -624,22 +722,26 @@
 	[CGW_FILTER]	= { .len = sizeof(struct can_filter) },
 	[CGW_LIM_HOPS]	= { .type = NLA_U8 },
 	[CGW_MOD_UID]	= { .type = NLA_U32 },
+	[CGW_FDMOD_AND]	= { .len = sizeof(struct cgw_fdframe_mod) },
+	[CGW_FDMOD_OR]	= { .len = sizeof(struct cgw_fdframe_mod) },
+	[CGW_FDMOD_XOR]	= { .len = sizeof(struct cgw_fdframe_mod) },
+	[CGW_FDMOD_SET]	= { .len = sizeof(struct cgw_fdframe_mod) },
 };
 
 /* check for common and gwtype specific attributes */
 static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
 			  u8 gwtype, void *gwtypeattr, u8 *limhops)
 {
-	struct nlattr *tb[CGW_MAX+1];
-	struct cgw_frame_mod mb;
+	struct nlattr *tb[CGW_MAX + 1];
+	struct rtcanmsg *r = nlmsg_data(nlh);
 	int modidx = 0;
 	int err = 0;
 
 	/* initialize modification & checksum data space */
 	memset(mod, 0, sizeof(*mod));
 
-	err = nlmsg_parse(nlh, sizeof(struct rtcanmsg), tb, CGW_MAX,
-			  cgw_policy, NULL);
+	err = nlmsg_parse_deprecated(nlh, sizeof(struct rtcanmsg), tb,
+				     CGW_MAX, cgw_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -651,87 +753,166 @@
 	}
 
 	/* check for AND/OR/XOR/SET modifications */
+	if (r->flags & CGW_FLAGS_CAN_FD) {
+		struct cgw_fdframe_mod mb;
 
-	if (tb[CGW_MOD_AND]) {
-		nla_memcpy(&mb, tb[CGW_MOD_AND], CGW_MODATTR_LEN);
+		if (tb[CGW_FDMOD_AND]) {
+			nla_memcpy(&mb, tb[CGW_FDMOD_AND], CGW_FDMODATTR_LEN);
 
-		canframecpy(&mod->modframe.and, &mb.cf);
-		mod->modtype.and = mb.modtype;
+			canfdframecpy(&mod->modframe.and, &mb.cf);
+			mod->modtype.and = mb.modtype;
 
-		if (mb.modtype & CGW_MOD_ID)
-			mod->modfunc[modidx++] = mod_and_id;
+			if (mb.modtype & CGW_MOD_ID)
+				mod->modfunc[modidx++] = mod_and_id;
 
-		if (mb.modtype & CGW_MOD_DLC)
-			mod->modfunc[modidx++] = mod_and_dlc;
+			if (mb.modtype & CGW_MOD_LEN)
+				mod->modfunc[modidx++] = mod_and_len;
 
-		if (mb.modtype & CGW_MOD_DATA)
-			mod->modfunc[modidx++] = mod_and_data;
-	}
+			if (mb.modtype & CGW_MOD_FLAGS)
+				mod->modfunc[modidx++] = mod_and_flags;
 
-	if (tb[CGW_MOD_OR]) {
-		nla_memcpy(&mb, tb[CGW_MOD_OR], CGW_MODATTR_LEN);
+			if (mb.modtype & CGW_MOD_DATA)
+				mod->modfunc[modidx++] = mod_and_fddata;
+		}
 
-		canframecpy(&mod->modframe.or, &mb.cf);
-		mod->modtype.or = mb.modtype;
+		if (tb[CGW_FDMOD_OR]) {
+			nla_memcpy(&mb, tb[CGW_FDMOD_OR], CGW_FDMODATTR_LEN);
 
-		if (mb.modtype & CGW_MOD_ID)
-			mod->modfunc[modidx++] = mod_or_id;
+			canfdframecpy(&mod->modframe.or, &mb.cf);
+			mod->modtype.or = mb.modtype;
 
-		if (mb.modtype & CGW_MOD_DLC)
-			mod->modfunc[modidx++] = mod_or_dlc;
+			if (mb.modtype & CGW_MOD_ID)
+				mod->modfunc[modidx++] = mod_or_id;
 
-		if (mb.modtype & CGW_MOD_DATA)
-			mod->modfunc[modidx++] = mod_or_data;
-	}
+			if (mb.modtype & CGW_MOD_LEN)
+				mod->modfunc[modidx++] = mod_or_len;
 
-	if (tb[CGW_MOD_XOR]) {
-		nla_memcpy(&mb, tb[CGW_MOD_XOR], CGW_MODATTR_LEN);
+			if (mb.modtype & CGW_MOD_FLAGS)
+				mod->modfunc[modidx++] = mod_or_flags;
 
-		canframecpy(&mod->modframe.xor, &mb.cf);
-		mod->modtype.xor = mb.modtype;
+			if (mb.modtype & CGW_MOD_DATA)
+				mod->modfunc[modidx++] = mod_or_fddata;
+		}
 
-		if (mb.modtype & CGW_MOD_ID)
-			mod->modfunc[modidx++] = mod_xor_id;
+		if (tb[CGW_FDMOD_XOR]) {
+			nla_memcpy(&mb, tb[CGW_FDMOD_XOR], CGW_FDMODATTR_LEN);
 
-		if (mb.modtype & CGW_MOD_DLC)
-			mod->modfunc[modidx++] = mod_xor_dlc;
+			canfdframecpy(&mod->modframe.xor, &mb.cf);
+			mod->modtype.xor = mb.modtype;
 
-		if (mb.modtype & CGW_MOD_DATA)
-			mod->modfunc[modidx++] = mod_xor_data;
-	}
+			if (mb.modtype & CGW_MOD_ID)
+				mod->modfunc[modidx++] = mod_xor_id;
 
-	if (tb[CGW_MOD_SET]) {
-		nla_memcpy(&mb, tb[CGW_MOD_SET], CGW_MODATTR_LEN);
+			if (mb.modtype & CGW_MOD_LEN)
+				mod->modfunc[modidx++] = mod_xor_len;
 
-		canframecpy(&mod->modframe.set, &mb.cf);
-		mod->modtype.set = mb.modtype;
+			if (mb.modtype & CGW_MOD_FLAGS)
+				mod->modfunc[modidx++] = mod_xor_flags;
 
-		if (mb.modtype & CGW_MOD_ID)
-			mod->modfunc[modidx++] = mod_set_id;
+			if (mb.modtype & CGW_MOD_DATA)
+				mod->modfunc[modidx++] = mod_xor_fddata;
+		}
 
-		if (mb.modtype & CGW_MOD_DLC)
-			mod->modfunc[modidx++] = mod_set_dlc;
+		if (tb[CGW_FDMOD_SET]) {
+			nla_memcpy(&mb, tb[CGW_FDMOD_SET], CGW_FDMODATTR_LEN);
 
-		if (mb.modtype & CGW_MOD_DATA)
-			mod->modfunc[modidx++] = mod_set_data;
+			canfdframecpy(&mod->modframe.set, &mb.cf);
+			mod->modtype.set = mb.modtype;
+
+			if (mb.modtype & CGW_MOD_ID)
+				mod->modfunc[modidx++] = mod_set_id;
+
+			if (mb.modtype & CGW_MOD_LEN)
+				mod->modfunc[modidx++] = mod_set_len;
+
+			if (mb.modtype & CGW_MOD_FLAGS)
+				mod->modfunc[modidx++] = mod_set_flags;
+
+			if (mb.modtype & CGW_MOD_DATA)
+				mod->modfunc[modidx++] = mod_set_fddata;
+		}
+	} else {
+		struct cgw_frame_mod mb;
+
+		if (tb[CGW_MOD_AND]) {
+			nla_memcpy(&mb, tb[CGW_MOD_AND], CGW_MODATTR_LEN);
+
+			canframecpy(&mod->modframe.and, &mb.cf);
+			mod->modtype.and = mb.modtype;
+
+			if (mb.modtype & CGW_MOD_ID)
+				mod->modfunc[modidx++] = mod_and_id;
+
+			if (mb.modtype & CGW_MOD_LEN)
+				mod->modfunc[modidx++] = mod_and_len;
+
+			if (mb.modtype & CGW_MOD_DATA)
+				mod->modfunc[modidx++] = mod_and_data;
+		}
+
+		if (tb[CGW_MOD_OR]) {
+			nla_memcpy(&mb, tb[CGW_MOD_OR], CGW_MODATTR_LEN);
+
+			canframecpy(&mod->modframe.or, &mb.cf);
+			mod->modtype.or = mb.modtype;
+
+			if (mb.modtype & CGW_MOD_ID)
+				mod->modfunc[modidx++] = mod_or_id;
+
+			if (mb.modtype & CGW_MOD_LEN)
+				mod->modfunc[modidx++] = mod_or_len;
+
+			if (mb.modtype & CGW_MOD_DATA)
+				mod->modfunc[modidx++] = mod_or_data;
+		}
+
+		if (tb[CGW_MOD_XOR]) {
+			nla_memcpy(&mb, tb[CGW_MOD_XOR], CGW_MODATTR_LEN);
+
+			canframecpy(&mod->modframe.xor, &mb.cf);
+			mod->modtype.xor = mb.modtype;
+
+			if (mb.modtype & CGW_MOD_ID)
+				mod->modfunc[modidx++] = mod_xor_id;
+
+			if (mb.modtype & CGW_MOD_LEN)
+				mod->modfunc[modidx++] = mod_xor_len;
+
+			if (mb.modtype & CGW_MOD_DATA)
+				mod->modfunc[modidx++] = mod_xor_data;
+		}
+
+		if (tb[CGW_MOD_SET]) {
+			nla_memcpy(&mb, tb[CGW_MOD_SET], CGW_MODATTR_LEN);
+
+			canframecpy(&mod->modframe.set, &mb.cf);
+			mod->modtype.set = mb.modtype;
+
+			if (mb.modtype & CGW_MOD_ID)
+				mod->modfunc[modidx++] = mod_set_id;
+
+			if (mb.modtype & CGW_MOD_LEN)
+				mod->modfunc[modidx++] = mod_set_len;
+
+			if (mb.modtype & CGW_MOD_DATA)
+				mod->modfunc[modidx++] = mod_set_data;
+		}
 	}
 
 	/* check for checksum operations after CAN frame modifications */
 	if (modidx) {
-
 		if (tb[CGW_CS_CRC8]) {
 			struct cgw_csum_crc8 *c = nla_data(tb[CGW_CS_CRC8]);
 
 			err = cgw_chk_csum_parms(c->from_idx, c->to_idx,
-						 c->result_idx);
+						 c->result_idx, r);
 			if (err)
 				return err;
 
 			nla_memcpy(&mod->csum.crc8, tb[CGW_CS_CRC8],
 				   CGW_CS_CRC8_LEN);
 
-			/*
-			 * select dedicated processing function to reduce
+			/* select dedicated processing function to reduce
 			 * runtime operations in receive hot path.
 			 */
 			if (c->from_idx < 0 || c->to_idx < 0 ||
@@ -747,15 +928,14 @@
 			struct cgw_csum_xor *c = nla_data(tb[CGW_CS_XOR]);
 
 			err = cgw_chk_csum_parms(c->from_idx, c->to_idx,
-						 c->result_idx);
+						 c->result_idx, r);
 			if (err)
 				return err;
 
 			nla_memcpy(&mod->csum.xor, tb[CGW_CS_XOR],
 				   CGW_CS_XOR_LEN);
 
-			/*
-			 * select dedicated processing function to reduce
+			/* select dedicated processing function to reduce
 			 * runtime operations in receive hot path.
 			 */
 			if (c->from_idx < 0 || c->to_idx < 0 ||
@@ -767,16 +947,14 @@
 				mod->csumfunc.xor = cgw_csum_xor_neg;
 		}
 
-		if (tb[CGW_MOD_UID]) {
+		if (tb[CGW_MOD_UID])
 			nla_memcpy(&mod->uid, tb[CGW_MOD_UID], sizeof(u32));
-		}
 	}
 
 	if (gwtype == CGW_TYPE_CAN_CAN) {
-
 		/* check CGW_TYPE_CAN_CAN specific attributes */
-
 		struct can_can_gw *ccgw = (struct can_can_gw *)gwtypeattr;
+
 		memset(ccgw, 0, sizeof(*ccgw));
 
 		/* check for can_filter in attributes */
@@ -837,12 +1015,10 @@
 		return err;
 
 	if (mod.uid) {
-
 		ASSERT_RTNL();
 
 		/* check for updating an existing job with identical uid */
 		hlist_for_each_entry(gwj, &net->can.cgw_list, list) {
-
 			if (gwj->mod.uid != mod.uid)
 				continue;
 
@@ -963,7 +1139,6 @@
 
 	/* remove only the first matching entry */
 	hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) {
-
 		if (gwj->flags != r->flags)
 			continue;
 
@@ -1022,32 +1197,50 @@
 	pr_info("can: netlink gateway (rev " CAN_GW_VERSION ") max_hops=%d\n",
 		max_hops);
 
-	register_pernet_subsys(&cangw_pernet_ops);
+	ret = register_pernet_subsys(&cangw_pernet_ops);
+	if (ret)
+		return ret;
+
+	ret = -ENOMEM;
 	cgw_cache = kmem_cache_create("can_gw", sizeof(struct cgw_job),
 				      0, 0, NULL);
-
 	if (!cgw_cache)
-		return -ENOMEM;
+		goto out_cache_create;
 
 	/* set notifier */
 	notifier.notifier_call = cgw_notifier;
-	register_netdevice_notifier(&notifier);
+	ret = register_netdevice_notifier(&notifier);
+	if (ret)
+		goto out_register_notifier;
 
 	ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_GETROUTE,
 				   NULL, cgw_dump_jobs, 0);
-	if (ret) {
-		unregister_netdevice_notifier(&notifier);
-		kmem_cache_destroy(cgw_cache);
-		return -ENOBUFS;
-	}
+	if (ret)
+		goto out_rtnl_register1;
 
-	/* Only the first call to rtnl_register_module can fail */
-	rtnl_register_module(THIS_MODULE, PF_CAN, RTM_NEWROUTE,
-			     cgw_create_job, NULL, 0);
-	rtnl_register_module(THIS_MODULE, PF_CAN, RTM_DELROUTE,
-			     cgw_remove_job, NULL, 0);
+	ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_NEWROUTE,
+				   cgw_create_job, NULL, 0);
+	if (ret)
+		goto out_rtnl_register2;
+	ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_DELROUTE,
+				   cgw_remove_job, NULL, 0);
+	if (ret)
+		goto out_rtnl_register3;
 
 	return 0;
+
+out_rtnl_register3:
+	rtnl_unregister(PF_CAN, RTM_NEWROUTE);
+out_rtnl_register2:
+	rtnl_unregister(PF_CAN, RTM_GETROUTE);
+out_rtnl_register1:
+	unregister_netdevice_notifier(&notifier);
+out_register_notifier:
+	kmem_cache_destroy(cgw_cache);
+out_cache_create:
+	unregister_pernet_subsys(&cangw_pernet_ops);
+
+	return ret;
 }
 
 static __exit void cgw_module_exit(void)

diff --git a/net/can/j1939/Kconfig b/net/can/j1939/Kconfig
new file mode 100644
index 0000000..2998298
--- /dev/null
+++ b/net/can/j1939/Kconfig

@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# SAE J1939 network layer core configuration
+#
+
+config CAN_J1939
+	tristate "SAE J1939"
+	depends on CAN
+	help
+	  SAE J1939
+	  Say Y to have in-kernel support for j1939 socket type. This
+	  allows communication according to SAE j1939.
+	  The relevant parts in kernel are
+	  SAE j1939-21 (datalink & transport protocol)
+	  & SAE j1939-81 (network management).

diff --git a/net/can/j1939/Makefile b/net/can/j1939/Makefile
new file mode 100644
index 0000000..19181bd
--- /dev/null
+++ b/net/can/j1939/Makefile

@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_CAN_J1939) += can-j1939.o
+
+can-j1939-objs := \
+	address-claim.o \
+	bus.o \
+	main.o \
+	socket.o \
+	transport.o

diff --git a/net/can/j1939/address-claim.c b/net/can/j1939/address-claim.c
new file mode 100644
index 0000000..f33c473
--- /dev/null
+++ b/net/can/j1939/address-claim.c

@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2010-2011 EIA Electronics,
+//                         Kurt Van Dijck <kurt.van.dijck@eia.be>
+// Copyright (c) 2010-2011 EIA Electronics,
+//                         Pieter Beyens <pieter.beyens@eia.be>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Marc Kleine-Budde <kernel@pengutronix.de>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Oleksij Rempel <kernel@pengutronix.de>
+
+/* J1939 Address Claiming.
+ * Address Claiming in the kernel
+ * - keeps track of the AC states of ECU's,
+ * - resolves NAME<=>SA taking into account the AC states of ECU's.
+ *
+ * All Address Claim msgs (including host-originated msg) are processed
+ * at the receive path (a sent msg is always received again via CAN echo).
+ * As such, the processing of AC msgs is done in the order on which msgs
+ * are sent on the bus.
+ *
+ * This module doesn't send msgs itself (e.g. replies on Address Claims),
+ * this is the responsibility of a user space application or daemon.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+#include "j1939-priv.h"
+
+static inline name_t j1939_skb_to_name(const struct sk_buff *skb)
+{
+	return le64_to_cpup((__le64 *)skb->data);
+}
+
+static inline bool j1939_ac_msg_is_request(struct sk_buff *skb)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	int req_pgn;
+
+	if (skb->len < 3 || skcb->addr.pgn != J1939_PGN_REQUEST)
+		return false;
+
+	req_pgn = skb->data[0] | (skb->data[1] << 8) | (skb->data[2] << 16);
+
+	return req_pgn == J1939_PGN_ADDRESS_CLAIMED;
+}
+
+static int j1939_ac_verify_outgoing(struct j1939_priv *priv,
+				    struct sk_buff *skb)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+
+	if (skb->len != 8) {
+		netdev_notice(priv->ndev, "tx address claim with dlc %i\n",
+			      skb->len);
+		return -EPROTO;
+	}
+
+	if (skcb->addr.src_name != j1939_skb_to_name(skb)) {
+		netdev_notice(priv->ndev, "tx address claim with different name\n");
+		return -EPROTO;
+	}
+
+	if (skcb->addr.sa == J1939_NO_ADDR) {
+		netdev_notice(priv->ndev, "tx address claim with broadcast sa\n");
+		return -EPROTO;
+	}
+
+	/* ac must always be a broadcast */
+	if (skcb->addr.dst_name || skcb->addr.da != J1939_NO_ADDR) {
+		netdev_notice(priv->ndev, "tx address claim with dest, not broadcast\n");
+		return -EPROTO;
+	}
+	return 0;
+}
+
+int j1939_ac_fixup(struct j1939_priv *priv, struct sk_buff *skb)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	int ret;
+	u8 addr;
+
+	/* network mgmt: address claiming msgs */
+	if (skcb->addr.pgn == J1939_PGN_ADDRESS_CLAIMED) {
+		struct j1939_ecu *ecu;
+
+		ret = j1939_ac_verify_outgoing(priv, skb);
+		/* return both when failure & when successful */
+		if (ret < 0)
+			return ret;
+		ecu = j1939_ecu_get_by_name(priv, skcb->addr.src_name);
+		if (!ecu)
+			return -ENODEV;
+
+		if (ecu->addr != skcb->addr.sa)
+			/* hold further traffic for ecu, remove from parent */
+			j1939_ecu_unmap(ecu);
+		j1939_ecu_put(ecu);
+	} else if (skcb->addr.src_name) {
+		/* assign source address */
+		addr = j1939_name_to_addr(priv, skcb->addr.src_name);
+		if (!j1939_address_is_unicast(addr) &&
+		    !j1939_ac_msg_is_request(skb)) {
+			netdev_notice(priv->ndev, "tx drop: invalid sa for name 0x%016llx\n",
+				      skcb->addr.src_name);
+			return -EADDRNOTAVAIL;
+		}
+		skcb->addr.sa = addr;
+	}
+
+	/* assign destination address */
+	if (skcb->addr.dst_name) {
+		addr = j1939_name_to_addr(priv, skcb->addr.dst_name);
+		if (!j1939_address_is_unicast(addr)) {
+			netdev_notice(priv->ndev, "tx drop: invalid da for name 0x%016llx\n",
+				      skcb->addr.dst_name);
+			return -EADDRNOTAVAIL;
+		}
+		skcb->addr.da = addr;
+	}
+	return 0;
+}
+
+static void j1939_ac_process(struct j1939_priv *priv, struct sk_buff *skb)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct j1939_ecu *ecu, *prev;
+	name_t name;
+
+	if (skb->len != 8) {
+		netdev_notice(priv->ndev, "rx address claim with wrong dlc %i\n",
+			      skb->len);
+		return;
+	}
+
+	name = j1939_skb_to_name(skb);
+	skcb->addr.src_name = name;
+	if (!name) {
+		netdev_notice(priv->ndev, "rx address claim without name\n");
+		return;
+	}
+
+	if (!j1939_address_is_valid(skcb->addr.sa)) {
+		netdev_notice(priv->ndev, "rx address claim with broadcast sa\n");
+		return;
+	}
+
+	write_lock_bh(&priv->lock);
+
+	/* Few words on the ECU ref counting:
+	 *
+	 * First we get an ECU handle, either with
+	 * j1939_ecu_get_by_name_locked() (increments the ref counter)
+	 * or j1939_ecu_create_locked() (initializes an ECU object
+	 * with a ref counter of 1).
+	 *
+	 * j1939_ecu_unmap_locked() will decrement the ref counter,
+	 * but only if the ECU was mapped before. So "ecu" still
+	 * belongs to us.
+	 *
+	 * j1939_ecu_timer_start() will increment the ref counter
+	 * before it starts the timer, so we can put the ecu when
+	 * leaving this function.
+	 */
+	ecu = j1939_ecu_get_by_name_locked(priv, name);
+	if (!ecu && j1939_address_is_unicast(skcb->addr.sa))
+		ecu = j1939_ecu_create_locked(priv, name);
+
+	if (IS_ERR_OR_NULL(ecu))
+		goto out_unlock_bh;
+
+	/* cancel pending (previous) address claim */
+	j1939_ecu_timer_cancel(ecu);
+
+	if (j1939_address_is_idle(skcb->addr.sa)) {
+		j1939_ecu_unmap_locked(ecu);
+		goto out_ecu_put;
+	}
+
+	/* save new addr */
+	if (ecu->addr != skcb->addr.sa)
+		j1939_ecu_unmap_locked(ecu);
+	ecu->addr = skcb->addr.sa;
+
+	prev = j1939_ecu_get_by_addr_locked(priv, skcb->addr.sa);
+	if (prev) {
+		if (ecu->name > prev->name) {
+			j1939_ecu_unmap_locked(ecu);
+			j1939_ecu_put(prev);
+			goto out_ecu_put;
+		} else {
+			/* kick prev if less or equal */
+			j1939_ecu_unmap_locked(prev);
+			j1939_ecu_put(prev);
+		}
+	}
+
+	j1939_ecu_timer_start(ecu);
+ out_ecu_put:
+	j1939_ecu_put(ecu);
+ out_unlock_bh:
+	write_unlock_bh(&priv->lock);
+}
+
+void j1939_ac_recv(struct j1939_priv *priv, struct sk_buff *skb)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct j1939_ecu *ecu;
+
+	/* network mgmt */
+	if (skcb->addr.pgn == J1939_PGN_ADDRESS_CLAIMED) {
+		j1939_ac_process(priv, skb);
+	} else if (j1939_address_is_unicast(skcb->addr.sa)) {
+		/* assign source name */
+		ecu = j1939_ecu_get_by_addr(priv, skcb->addr.sa);
+		if (ecu) {
+			skcb->addr.src_name = ecu->name;
+			j1939_ecu_put(ecu);
+		}
+	}
+
+	/* assign destination name */
+	ecu = j1939_ecu_get_by_addr(priv, skcb->addr.da);
+	if (ecu) {
+		skcb->addr.dst_name = ecu->name;
+		j1939_ecu_put(ecu);
+	}
+}

diff --git a/net/can/j1939/bus.c b/net/can/j1939/bus.c
new file mode 100644
index 0000000..4866879
--- /dev/null
+++ b/net/can/j1939/bus.c

@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2010-2011 EIA Electronics,
+//                         Kurt Van Dijck <kurt.van.dijck@eia.be>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Marc Kleine-Budde <kernel@pengutronix.de>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Oleksij Rempel <kernel@pengutronix.de>
+
+/* bus for j1939 remote devices
+ * Since rtnetlink, no real bus is used.
+ */
+
+#include <net/sock.h>
+
+#include "j1939-priv.h"
+
+static void __j1939_ecu_release(struct kref *kref)
+{
+	struct j1939_ecu *ecu = container_of(kref, struct j1939_ecu, kref);
+	struct j1939_priv *priv = ecu->priv;
+
+	list_del(&ecu->list);
+	kfree(ecu);
+	j1939_priv_put(priv);
+}
+
+void j1939_ecu_put(struct j1939_ecu *ecu)
+{
+	kref_put(&ecu->kref, __j1939_ecu_release);
+}
+
+static void j1939_ecu_get(struct j1939_ecu *ecu)
+{
+	kref_get(&ecu->kref);
+}
+
+static bool j1939_ecu_is_mapped_locked(struct j1939_ecu *ecu)
+{
+	struct j1939_priv *priv = ecu->priv;
+
+	lockdep_assert_held(&priv->lock);
+
+	return j1939_ecu_find_by_addr_locked(priv, ecu->addr) == ecu;
+}
+
+/* ECU device interface */
+/* map ECU to a bus address space */
+static void j1939_ecu_map_locked(struct j1939_ecu *ecu)
+{
+	struct j1939_priv *priv = ecu->priv;
+	struct j1939_addr_ent *ent;
+
+	lockdep_assert_held(&priv->lock);
+
+	if (!j1939_address_is_unicast(ecu->addr))
+		return;
+
+	ent = &priv->ents[ecu->addr];
+
+	if (ent->ecu) {
+		netdev_warn(priv->ndev, "Trying to map already mapped ECU, addr: 0x%02x, name: 0x%016llx. Skip it.\n",
+			    ecu->addr, ecu->name);
+		return;
+	}
+
+	j1939_ecu_get(ecu);
+	ent->ecu = ecu;
+	ent->nusers += ecu->nusers;
+}
+
+/* unmap ECU from a bus address space */
+void j1939_ecu_unmap_locked(struct j1939_ecu *ecu)
+{
+	struct j1939_priv *priv = ecu->priv;
+	struct j1939_addr_ent *ent;
+
+	lockdep_assert_held(&priv->lock);
+
+	if (!j1939_address_is_unicast(ecu->addr))
+		return;
+
+	if (!j1939_ecu_is_mapped_locked(ecu))
+		return;
+
+	ent = &priv->ents[ecu->addr];
+	ent->ecu = NULL;
+	ent->nusers -= ecu->nusers;
+	j1939_ecu_put(ecu);
+}
+
+void j1939_ecu_unmap(struct j1939_ecu *ecu)
+{
+	write_lock_bh(&ecu->priv->lock);
+	j1939_ecu_unmap_locked(ecu);
+	write_unlock_bh(&ecu->priv->lock);
+}
+
+void j1939_ecu_unmap_all(struct j1939_priv *priv)
+{
+	int i;
+
+	write_lock_bh(&priv->lock);
+	for (i = 0; i < ARRAY_SIZE(priv->ents); i++)
+		if (priv->ents[i].ecu)
+			j1939_ecu_unmap_locked(priv->ents[i].ecu);
+	write_unlock_bh(&priv->lock);
+}
+
+void j1939_ecu_timer_start(struct j1939_ecu *ecu)
+{
+	/* The ECU is held here and released in the
+	 * j1939_ecu_timer_handler() or j1939_ecu_timer_cancel().
+	 */
+	j1939_ecu_get(ecu);
+
+	/* Schedule timer in 250 msec to commit address change. */
+	hrtimer_start(&ecu->ac_timer, ms_to_ktime(250),
+		      HRTIMER_MODE_REL_SOFT);
+}
+
+void j1939_ecu_timer_cancel(struct j1939_ecu *ecu)
+{
+	if (hrtimer_cancel(&ecu->ac_timer))
+		j1939_ecu_put(ecu);
+}
+
+static enum hrtimer_restart j1939_ecu_timer_handler(struct hrtimer *hrtimer)
+{
+	struct j1939_ecu *ecu =
+		container_of(hrtimer, struct j1939_ecu, ac_timer);
+	struct j1939_priv *priv = ecu->priv;
+
+	write_lock_bh(&priv->lock);
+	/* TODO: can we test if ecu->addr is unicast before starting
+	 * the timer?
+	 */
+	j1939_ecu_map_locked(ecu);
+
+	/* The corresponding j1939_ecu_get() is in
+	 * j1939_ecu_timer_start().
+	 */
+	j1939_ecu_put(ecu);
+	write_unlock_bh(&priv->lock);
+
+	return HRTIMER_NORESTART;
+}
+
+struct j1939_ecu *j1939_ecu_create_locked(struct j1939_priv *priv, name_t name)
+{
+	struct j1939_ecu *ecu;
+
+	lockdep_assert_held(&priv->lock);
+
+	ecu = kzalloc(sizeof(*ecu), gfp_any());
+	if (!ecu)
+		return ERR_PTR(-ENOMEM);
+	kref_init(&ecu->kref);
+	ecu->addr = J1939_IDLE_ADDR;
+	ecu->name = name;
+
+	hrtimer_init(&ecu->ac_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
+	ecu->ac_timer.function = j1939_ecu_timer_handler;
+	INIT_LIST_HEAD(&ecu->list);
+
+	j1939_priv_get(priv);
+	ecu->priv = priv;
+	list_add_tail(&ecu->list, &priv->ecus);
+
+	return ecu;
+}
+
+struct j1939_ecu *j1939_ecu_find_by_addr_locked(struct j1939_priv *priv,
+						u8 addr)
+{
+	lockdep_assert_held(&priv->lock);
+
+	return priv->ents[addr].ecu;
+}
+
+struct j1939_ecu *j1939_ecu_get_by_addr_locked(struct j1939_priv *priv, u8 addr)
+{
+	struct j1939_ecu *ecu;
+
+	lockdep_assert_held(&priv->lock);
+
+	if (!j1939_address_is_unicast(addr))
+		return NULL;
+
+	ecu = j1939_ecu_find_by_addr_locked(priv, addr);
+	if (ecu)
+		j1939_ecu_get(ecu);
+
+	return ecu;
+}
+
+struct j1939_ecu *j1939_ecu_get_by_addr(struct j1939_priv *priv, u8 addr)
+{
+	struct j1939_ecu *ecu;
+
+	read_lock_bh(&priv->lock);
+	ecu = j1939_ecu_get_by_addr_locked(priv, addr);
+	read_unlock_bh(&priv->lock);
+
+	return ecu;
+}
+
+/* get pointer to ecu without increasing ref counter */
+static struct j1939_ecu *j1939_ecu_find_by_name_locked(struct j1939_priv *priv,
+						       name_t name)
+{
+	struct j1939_ecu *ecu;
+
+	lockdep_assert_held(&priv->lock);
+
+	list_for_each_entry(ecu, &priv->ecus, list) {
+		if (ecu->name == name)
+			return ecu;
+	}
+
+	return NULL;
+}
+
+struct j1939_ecu *j1939_ecu_get_by_name_locked(struct j1939_priv *priv,
+					       name_t name)
+{
+	struct j1939_ecu *ecu;
+
+	lockdep_assert_held(&priv->lock);
+
+	if (!name)
+		return NULL;
+
+	ecu = j1939_ecu_find_by_name_locked(priv, name);
+	if (ecu)
+		j1939_ecu_get(ecu);
+
+	return ecu;
+}
+
+struct j1939_ecu *j1939_ecu_get_by_name(struct j1939_priv *priv, name_t name)
+{
+	struct j1939_ecu *ecu;
+
+	read_lock_bh(&priv->lock);
+	ecu = j1939_ecu_get_by_name_locked(priv, name);
+	read_unlock_bh(&priv->lock);
+
+	return ecu;
+}
+
+u8 j1939_name_to_addr(struct j1939_priv *priv, name_t name)
+{
+	struct j1939_ecu *ecu;
+	int addr = J1939_IDLE_ADDR;
+
+	if (!name)
+		return J1939_NO_ADDR;
+
+	read_lock_bh(&priv->lock);
+	ecu = j1939_ecu_find_by_name_locked(priv, name);
+	if (ecu && j1939_ecu_is_mapped_locked(ecu))
+		/* ecu's SA is registered */
+		addr = ecu->addr;
+
+	read_unlock_bh(&priv->lock);
+
+	return addr;
+}
+
+/* TX addr/name accounting
+ * Transport protocol needs to know if a SA is local or not
+ * These functions originate from userspace manipulating sockets,
+ * so locking is straigforward
+ */
+
+int j1939_local_ecu_get(struct j1939_priv *priv, name_t name, u8 sa)
+{
+	struct j1939_ecu *ecu;
+	int err = 0;
+
+	write_lock_bh(&priv->lock);
+
+	if (j1939_address_is_unicast(sa))
+		priv->ents[sa].nusers++;
+
+	if (!name)
+		goto done;
+
+	ecu = j1939_ecu_get_by_name_locked(priv, name);
+	if (!ecu)
+		ecu = j1939_ecu_create_locked(priv, name);
+	err = PTR_ERR_OR_ZERO(ecu);
+	if (err)
+		goto done;
+
+	ecu->nusers++;
+	/* TODO: do we care if ecu->addr != sa? */
+	if (j1939_ecu_is_mapped_locked(ecu))
+		/* ecu's sa is active already */
+		priv->ents[ecu->addr].nusers++;
+
+ done:
+	write_unlock_bh(&priv->lock);
+
+	return err;
+}
+
+void j1939_local_ecu_put(struct j1939_priv *priv, name_t name, u8 sa)
+{
+	struct j1939_ecu *ecu;
+
+	write_lock_bh(&priv->lock);
+
+	if (j1939_address_is_unicast(sa))
+		priv->ents[sa].nusers--;
+
+	if (!name)
+		goto done;
+
+	ecu = j1939_ecu_find_by_name_locked(priv, name);
+	if (WARN_ON_ONCE(!ecu))
+		goto done;
+
+	ecu->nusers--;
+	/* TODO: do we care if ecu->addr != sa? */
+	if (j1939_ecu_is_mapped_locked(ecu))
+		/* ecu's sa is active already */
+		priv->ents[ecu->addr].nusers--;
+	j1939_ecu_put(ecu);
+
+ done:
+	write_unlock_bh(&priv->lock);
+}

diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h
new file mode 100644
index 0000000..12369b6
--- /dev/null
+++ b/net/can/j1939/j1939-priv.h

@@ -0,0 +1,338 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2010-2011 EIA Electronics,
+//                         Kurt Van Dijck <kurt.van.dijck@eia.be>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Marc Kleine-Budde <kernel@pengutronix.de>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Oleksij Rempel <kernel@pengutronix.de>
+
+#ifndef _J1939_PRIV_H_
+#define _J1939_PRIV_H_
+
+#include <linux/can/j1939.h>
+#include <net/sock.h>
+
+/* Timeout to receive the abort signal over loop back. In case CAN
+ * bus is open, the timeout should be triggered.
+ */
+#define J1939_XTP_ABORT_TIMEOUT_MS 500
+#define J1939_SIMPLE_ECHO_TIMEOUT_MS (10 * 1000)
+
+struct j1939_session;
+enum j1939_sk_errqueue_type {
+	J1939_ERRQUEUE_ACK,
+	J1939_ERRQUEUE_SCHED,
+	J1939_ERRQUEUE_ABORT,
+};
+
+/* j1939 devices */
+struct j1939_ecu {
+	struct list_head list;
+	name_t name;
+	u8 addr;
+
+	/* indicates that this ecu successfully claimed @sa as its address */
+	struct hrtimer ac_timer;
+	struct kref kref;
+	struct j1939_priv *priv;
+
+	/* count users, to help transport protocol decide for interaction */
+	int nusers;
+};
+
+struct j1939_priv {
+	struct list_head ecus;
+	/* local list entry in priv
+	 * These allow irq (& softirq) context lookups on j1939 devices
+	 * This approach (separate lists) is done as the other 2 alternatives
+	 * are not easier or even wrong
+	 * 1) using the pure kobject methods involves mutexes, which are not
+	 *    allowed in irq context.
+	 * 2) duplicating data structures would require a lot of synchronization
+	 *    code
+	 * usage:
+	 */
+
+	/* segments need a lock to protect the above list */
+	rwlock_t lock;
+
+	struct net_device *ndev;
+
+	/* list of 256 ecu ptrs, that cache the claimed addresses.
+	 * also protected by the above lock
+	 */
+	struct j1939_addr_ent {
+		struct j1939_ecu *ecu;
+		/* count users, to help transport protocol */
+		int nusers;
+	} ents[256];
+
+	struct kref kref;
+
+	/* List of active sessions to prevent start of conflicting
+	 * one.
+	 *
+	 * Do not start two sessions of same type, addresses and
+	 * direction.
+	 */
+	struct list_head active_session_list;
+
+	/* protects active_session_list */
+	spinlock_t active_session_list_lock;
+
+	unsigned int tp_max_packet_size;
+
+	/* lock for j1939_socks list */
+	spinlock_t j1939_socks_lock;
+	struct list_head j1939_socks;
+
+	struct kref rx_kref;
+};
+
+void j1939_ecu_put(struct j1939_ecu *ecu);
+
+/* keep the cache of what is local */
+int j1939_local_ecu_get(struct j1939_priv *priv, name_t name, u8 sa);
+void j1939_local_ecu_put(struct j1939_priv *priv, name_t name, u8 sa);
+
+static inline bool j1939_address_is_unicast(u8 addr)
+{
+	return addr <= J1939_MAX_UNICAST_ADDR;
+}
+
+static inline bool j1939_address_is_idle(u8 addr)
+{
+	return addr == J1939_IDLE_ADDR;
+}
+
+static inline bool j1939_address_is_valid(u8 addr)
+{
+	return addr != J1939_NO_ADDR;
+}
+
+static inline bool j1939_pgn_is_pdu1(pgn_t pgn)
+{
+	/* ignore dp & res bits for this */
+	return (pgn & 0xff00) < 0xf000;
+}
+
+/* utility to correctly unmap an ECU */
+void j1939_ecu_unmap_locked(struct j1939_ecu *ecu);
+void j1939_ecu_unmap(struct j1939_ecu *ecu);
+
+u8 j1939_name_to_addr(struct j1939_priv *priv, name_t name);
+struct j1939_ecu *j1939_ecu_find_by_addr_locked(struct j1939_priv *priv,
+						u8 addr);
+struct j1939_ecu *j1939_ecu_get_by_addr(struct j1939_priv *priv, u8 addr);
+struct j1939_ecu *j1939_ecu_get_by_addr_locked(struct j1939_priv *priv,
+					       u8 addr);
+struct j1939_ecu *j1939_ecu_get_by_name(struct j1939_priv *priv, name_t name);
+struct j1939_ecu *j1939_ecu_get_by_name_locked(struct j1939_priv *priv,
+					       name_t name);
+
+enum j1939_transfer_type {
+	J1939_TP,
+	J1939_ETP,
+	J1939_SIMPLE,
+};
+
+struct j1939_addr {
+	name_t src_name;
+	name_t dst_name;
+	pgn_t pgn;
+
+	u8 sa;
+	u8 da;
+
+	u8 type;
+};
+
+/* control buffer of the sk_buff */
+struct j1939_sk_buff_cb {
+	/* Offset in bytes within one ETP session */
+	u32 offset;
+
+	/* for tx, MSG_SYN will be used to sync on sockets */
+	u32 msg_flags;
+	u32 tskey;
+
+	struct j1939_addr addr;
+
+	/* Flags for quick lookups during skb processing.
+	 * These are set in the receive path only.
+	 */
+#define J1939_ECU_LOCAL_SRC BIT(0)
+#define J1939_ECU_LOCAL_DST BIT(1)
+	u8 flags;
+
+	priority_t priority;
+};
+
+static inline
+struct j1939_sk_buff_cb *j1939_skb_to_cb(const struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(struct j1939_sk_buff_cb) > sizeof(skb->cb));
+
+	return (struct j1939_sk_buff_cb *)skb->cb;
+}
+
+int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb);
+void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb);
+bool j1939_sk_recv_match(struct j1939_priv *priv,
+			 struct j1939_sk_buff_cb *skcb);
+void j1939_sk_send_loop_abort(struct sock *sk, int err);
+void j1939_sk_errqueue(struct j1939_session *session,
+		       enum j1939_sk_errqueue_type type);
+void j1939_sk_queue_activate_next(struct j1939_session *session);
+
+/* stack entries */
+struct j1939_session *j1939_tp_send(struct j1939_priv *priv,
+				    struct sk_buff *skb, size_t size);
+int j1939_tp_recv(struct j1939_priv *priv, struct sk_buff *skb);
+int j1939_ac_fixup(struct j1939_priv *priv, struct sk_buff *skb);
+void j1939_ac_recv(struct j1939_priv *priv, struct sk_buff *skb);
+void j1939_simple_recv(struct j1939_priv *priv, struct sk_buff *skb);
+
+/* network management */
+struct j1939_ecu *j1939_ecu_create_locked(struct j1939_priv *priv, name_t name);
+
+void j1939_ecu_timer_start(struct j1939_ecu *ecu);
+void j1939_ecu_timer_cancel(struct j1939_ecu *ecu);
+void j1939_ecu_unmap_all(struct j1939_priv *priv);
+
+struct j1939_priv *j1939_netdev_start(struct net_device *ndev);
+void j1939_netdev_stop(struct j1939_priv *priv);
+
+void j1939_priv_put(struct j1939_priv *priv);
+void j1939_priv_get(struct j1939_priv *priv);
+
+/* notify/alert all j1939 sockets bound to ifindex */
+void j1939_sk_netdev_event_netdown(struct j1939_priv *priv);
+int j1939_cancel_active_session(struct j1939_priv *priv, struct sock *sk);
+void j1939_tp_init(struct j1939_priv *priv);
+
+/* decrement pending skb for a j1939 socket */
+void j1939_sock_pending_del(struct sock *sk);
+
+enum j1939_session_state {
+	J1939_SESSION_NEW,
+	J1939_SESSION_ACTIVE,
+	/* waiting for abort signal on the bus */
+	J1939_SESSION_WAITING_ABORT,
+	J1939_SESSION_ACTIVE_MAX,
+	J1939_SESSION_DONE,
+};
+
+struct j1939_session {
+	struct j1939_priv *priv;
+	struct list_head active_session_list_entry;
+	struct list_head sk_session_queue_entry;
+	struct kref kref;
+	struct sock *sk;
+
+	/* ifindex, src, dst, pgn define the session block
+	 * the are _never_ modified after insertion in the list
+	 * this decreases locking problems a _lot_
+	 */
+	struct j1939_sk_buff_cb skcb;
+	struct sk_buff_head skb_queue;
+
+	/* all tx related stuff (last_txcmd, pkt.tx)
+	 * is protected (modified only) with the txtimer hrtimer
+	 * 'total' & 'block' are never changed,
+	 * last_cmd, last & block are protected by ->lock
+	 * this means that the tx may run after cts is received that should
+	 * have stopped tx, but this time discrepancy is never avoided anyhow
+	 */
+	u8 last_cmd, last_txcmd;
+	bool transmission;
+	bool extd;
+	/* Total message size, number of bytes */
+	unsigned int total_message_size;
+	/* Total number of bytes queue from socket to the session */
+	unsigned int total_queued_size;
+	unsigned int tx_retry;
+
+	int err;
+	u32 tskey;
+	enum j1939_session_state state;
+
+	/* Packets counters for a (extended) transfer session. The packet is
+	 * maximal of 7 bytes.
+	 */
+	struct {
+		/* total - total number of packets for this session */
+		unsigned int total;
+		/* last - last packet of a transfer block after which
+		 * responder should send ETP.CM_CTS and originator
+		 * ETP.CM_DPO
+		 */
+		unsigned int last;
+		/* tx - number of packets send by originator node.
+		 * this counter can be set back if responder node
+		 * didn't received all packets send by originator.
+		 */
+		unsigned int tx;
+		unsigned int tx_acked;
+		/* rx - number of packets received */
+		unsigned int rx;
+		/* block - amount of packets expected in one block */
+		unsigned int block;
+		/* dpo - ETP.CM_DPO, Data Packet Offset */
+		unsigned int dpo;
+	} pkt;
+	struct hrtimer txtimer, rxtimer;
+};
+
+struct j1939_sock {
+	struct sock sk; /* must be first to skip with memset */
+	struct j1939_priv *priv;
+	struct list_head list;
+
+#define J1939_SOCK_BOUND BIT(0)
+#define J1939_SOCK_CONNECTED BIT(1)
+#define J1939_SOCK_PROMISC BIT(2)
+#define J1939_SOCK_ERRQUEUE BIT(3)
+	int state;
+
+	int ifindex;
+	struct j1939_addr addr;
+	struct j1939_filter *filters;
+	int nfilters;
+	pgn_t pgn_rx_filter;
+
+	/* j1939 may emit equal PGN (!= equal CAN-id's) out of order
+	 * when transport protocol comes in.
+	 * To allow emitting in order, keep a 'pending' nr. of packets
+	 */
+	atomic_t skb_pending;
+	wait_queue_head_t waitq;
+
+	/* lock for the sk_session_queue list */
+	spinlock_t sk_session_queue_lock;
+	struct list_head sk_session_queue;
+};
+
+static inline struct j1939_sock *j1939_sk(const struct sock *sk)
+{
+	return container_of(sk, struct j1939_sock, sk);
+}
+
+void j1939_session_get(struct j1939_session *session);
+void j1939_session_put(struct j1939_session *session);
+void j1939_session_skb_queue(struct j1939_session *session,
+			     struct sk_buff *skb);
+int j1939_session_activate(struct j1939_session *session);
+void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec);
+void j1939_session_timers_cancel(struct j1939_session *session);
+
+#define J1939_MAX_TP_PACKET_SIZE (7 * 0xff)
+#define J1939_MAX_ETP_PACKET_SIZE (7 * 0x00ffffff)
+
+#define J1939_REGULAR 0
+#define J1939_EXTENDED 1
+
+/* CAN protocol */
+extern const struct can_proto j1939_can_proto;
+
+#endif /* _J1939_PRIV_H_ */

diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
new file mode 100644
index 0000000..137054b
--- /dev/null
+++ b/net/can/j1939/main.c

@@ -0,0 +1,412 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2010-2011 EIA Electronics,
+//                         Pieter Beyens <pieter.beyens@eia.be>
+// Copyright (c) 2010-2011 EIA Electronics,
+//                         Kurt Van Dijck <kurt.van.dijck@eia.be>
+// Copyright (c) 2018 Protonic,
+//                         Robin van der Gracht <robin@protonic.nl>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Marc Kleine-Budde <kernel@pengutronix.de>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Oleksij Rempel <kernel@pengutronix.de>
+
+/* Core of can-j1939 that links j1939 to CAN. */
+
+#include <linux/can/can-ml.h>
+#include <linux/can/core.h>
+#include <linux/can/skb.h>
+#include <linux/if_arp.h>
+#include <linux/module.h>
+
+#include "j1939-priv.h"
+
+MODULE_DESCRIPTION("PF_CAN SAE J1939");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("EIA Electronics (Kurt Van Dijck & Pieter Beyens)");
+MODULE_ALIAS("can-proto-" __stringify(CAN_J1939));
+
+/* LOWLEVEL CAN interface */
+
+/* CAN_HDR: #bytes before can_frame data part */
+#define J1939_CAN_HDR (offsetof(struct can_frame, data))
+
+/* CAN_FTR: #bytes beyond data part */
+#define J1939_CAN_FTR (sizeof(struct can_frame) - J1939_CAN_HDR - \
+		 sizeof(((struct can_frame *)0)->data))
+
+/* lowest layer */
+static void j1939_can_recv(struct sk_buff *iskb, void *data)
+{
+	struct j1939_priv *priv = data;
+	struct sk_buff *skb;
+	struct j1939_sk_buff_cb *skcb, *iskcb;
+	struct can_frame *cf;
+
+	/* create a copy of the skb
+	 * j1939 only delivers the real data bytes,
+	 * the header goes into sockaddr.
+	 * j1939 may not touch the incoming skb in such way
+	 */
+	skb = skb_clone(iskb, GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	j1939_priv_get(priv);
+	can_skb_set_owner(skb, iskb->sk);
+
+	/* get a pointer to the header of the skb
+	 * the skb payload (pointer) is moved, so that the next skb_data
+	 * returns the actual payload
+	 */
+	cf = (void *)skb->data;
+	skb_pull(skb, J1939_CAN_HDR);
+
+	/* fix length, set to dlc, with 8 maximum */
+	skb_trim(skb, min_t(uint8_t, cf->can_dlc, 8));
+
+	/* set addr */
+	skcb = j1939_skb_to_cb(skb);
+	memset(skcb, 0, sizeof(*skcb));
+
+	iskcb = j1939_skb_to_cb(iskb);
+	skcb->tskey = iskcb->tskey;
+	skcb->priority = (cf->can_id >> 26) & 0x7;
+	skcb->addr.sa = cf->can_id;
+	skcb->addr.pgn = (cf->can_id >> 8) & J1939_PGN_MAX;
+	/* set default message type */
+	skcb->addr.type = J1939_TP;
+	if (j1939_pgn_is_pdu1(skcb->addr.pgn)) {
+		/* Type 1: with destination address */
+		skcb->addr.da = skcb->addr.pgn;
+		/* normalize pgn: strip dst address */
+		skcb->addr.pgn &= 0x3ff00;
+	} else {
+		/* set broadcast address */
+		skcb->addr.da = J1939_NO_ADDR;
+	}
+
+	/* update localflags */
+	read_lock_bh(&priv->lock);
+	if (j1939_address_is_unicast(skcb->addr.sa) &&
+	    priv->ents[skcb->addr.sa].nusers)
+		skcb->flags |= J1939_ECU_LOCAL_SRC;
+	if (j1939_address_is_unicast(skcb->addr.da) &&
+	    priv->ents[skcb->addr.da].nusers)
+		skcb->flags |= J1939_ECU_LOCAL_DST;
+	read_unlock_bh(&priv->lock);
+
+	/* deliver into the j1939 stack ... */
+	j1939_ac_recv(priv, skb);
+
+	if (j1939_tp_recv(priv, skb))
+		/* this means the transport layer processed the message */
+		goto done;
+
+	j1939_simple_recv(priv, skb);
+	j1939_sk_recv(priv, skb);
+ done:
+	j1939_priv_put(priv);
+	kfree_skb(skb);
+}
+
+/* NETDEV MANAGEMENT */
+
+/* values for can_rx_(un)register */
+#define J1939_CAN_ID CAN_EFF_FLAG
+#define J1939_CAN_MASK (CAN_EFF_FLAG | CAN_RTR_FLAG)
+
+static DEFINE_SPINLOCK(j1939_netdev_lock);
+
+static struct j1939_priv *j1939_priv_create(struct net_device *ndev)
+{
+	struct j1939_priv *priv;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return NULL;
+
+	rwlock_init(&priv->lock);
+	INIT_LIST_HEAD(&priv->ecus);
+	priv->ndev = ndev;
+	kref_init(&priv->kref);
+	kref_init(&priv->rx_kref);
+	dev_hold(ndev);
+
+	netdev_dbg(priv->ndev, "%s : 0x%p\n", __func__, priv);
+
+	return priv;
+}
+
+static inline void j1939_priv_set(struct net_device *ndev,
+				  struct j1939_priv *priv)
+{
+	struct can_ml_priv *can_ml_priv = ndev->ml_priv;
+
+	can_ml_priv->j1939_priv = priv;
+}
+
+static void __j1939_priv_release(struct kref *kref)
+{
+	struct j1939_priv *priv = container_of(kref, struct j1939_priv, kref);
+	struct net_device *ndev = priv->ndev;
+
+	netdev_dbg(priv->ndev, "%s: 0x%p\n", __func__, priv);
+
+	WARN_ON_ONCE(!list_empty(&priv->active_session_list));
+	WARN_ON_ONCE(!list_empty(&priv->ecus));
+	WARN_ON_ONCE(!list_empty(&priv->j1939_socks));
+
+	dev_put(ndev);
+	kfree(priv);
+}
+
+void j1939_priv_put(struct j1939_priv *priv)
+{
+	kref_put(&priv->kref, __j1939_priv_release);
+}
+
+void j1939_priv_get(struct j1939_priv *priv)
+{
+	kref_get(&priv->kref);
+}
+
+static int j1939_can_rx_register(struct j1939_priv *priv)
+{
+	struct net_device *ndev = priv->ndev;
+	int ret;
+
+	j1939_priv_get(priv);
+	ret = can_rx_register(dev_net(ndev), ndev, J1939_CAN_ID, J1939_CAN_MASK,
+			      j1939_can_recv, priv, "j1939", NULL);
+	if (ret < 0) {
+		j1939_priv_put(priv);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void j1939_can_rx_unregister(struct j1939_priv *priv)
+{
+	struct net_device *ndev = priv->ndev;
+
+	can_rx_unregister(dev_net(ndev), ndev, J1939_CAN_ID, J1939_CAN_MASK,
+			  j1939_can_recv, priv);
+
+	j1939_priv_put(priv);
+}
+
+static void __j1939_rx_release(struct kref *kref)
+	__releases(&j1939_netdev_lock)
+{
+	struct j1939_priv *priv = container_of(kref, struct j1939_priv,
+					       rx_kref);
+
+	j1939_can_rx_unregister(priv);
+	j1939_ecu_unmap_all(priv);
+	j1939_priv_set(priv->ndev, NULL);
+	spin_unlock(&j1939_netdev_lock);
+}
+
+/* get pointer to priv without increasing ref counter */
+static inline struct j1939_priv *j1939_ndev_to_priv(struct net_device *ndev)
+{
+	struct can_ml_priv *can_ml_priv = ndev->ml_priv;
+
+	if (!can_ml_priv)
+		return NULL;
+
+	return can_ml_priv->j1939_priv;
+}
+
+static struct j1939_priv *j1939_priv_get_by_ndev_locked(struct net_device *ndev)
+{
+	struct j1939_priv *priv;
+
+	lockdep_assert_held(&j1939_netdev_lock);
+
+	if (ndev->type != ARPHRD_CAN)
+		return NULL;
+
+	priv = j1939_ndev_to_priv(ndev);
+	if (priv)
+		j1939_priv_get(priv);
+
+	return priv;
+}
+
+static struct j1939_priv *j1939_priv_get_by_ndev(struct net_device *ndev)
+{
+	struct j1939_priv *priv;
+
+	spin_lock(&j1939_netdev_lock);
+	priv = j1939_priv_get_by_ndev_locked(ndev);
+	spin_unlock(&j1939_netdev_lock);
+
+	return priv;
+}
+
+struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
+{
+	struct j1939_priv *priv, *priv_new;
+	int ret;
+
+	priv = j1939_priv_get_by_ndev(ndev);
+	if (priv) {
+		kref_get(&priv->rx_kref);
+		return priv;
+	}
+
+	priv = j1939_priv_create(ndev);
+	if (!priv)
+		return ERR_PTR(-ENOMEM);
+
+	j1939_tp_init(priv);
+	spin_lock_init(&priv->j1939_socks_lock);
+	INIT_LIST_HEAD(&priv->j1939_socks);
+
+	spin_lock(&j1939_netdev_lock);
+	priv_new = j1939_priv_get_by_ndev_locked(ndev);
+	if (priv_new) {
+		/* Someone was faster than us, use their priv and roll
+		 * back our's.
+		 */
+		spin_unlock(&j1939_netdev_lock);
+		dev_put(ndev);
+		kfree(priv);
+		kref_get(&priv_new->rx_kref);
+		return priv_new;
+	}
+	j1939_priv_set(ndev, priv);
+	spin_unlock(&j1939_netdev_lock);
+
+	ret = j1939_can_rx_register(priv);
+	if (ret < 0)
+		goto out_priv_put;
+
+	return priv;
+
+ out_priv_put:
+	j1939_priv_set(ndev, NULL);
+	dev_put(ndev);
+	kfree(priv);
+
+	return ERR_PTR(ret);
+}
+
+void j1939_netdev_stop(struct j1939_priv *priv)
+{
+	kref_put_lock(&priv->rx_kref, __j1939_rx_release, &j1939_netdev_lock);
+	j1939_priv_put(priv);
+}
+
+int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb)
+{
+	int ret, dlc;
+	canid_t canid;
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct can_frame *cf;
+
+	/* apply sanity checks */
+	if (j1939_pgn_is_pdu1(skcb->addr.pgn))
+		skcb->addr.pgn &= J1939_PGN_PDU1_MAX;
+	else
+		skcb->addr.pgn &= J1939_PGN_MAX;
+
+	if (skcb->priority > 7)
+		skcb->priority = 6;
+
+	ret = j1939_ac_fixup(priv, skb);
+	if (unlikely(ret))
+		goto failed;
+	dlc = skb->len;
+
+	/* re-claim the CAN_HDR from the SKB */
+	cf = skb_push(skb, J1939_CAN_HDR);
+
+	/* make it a full can frame again */
+	skb_put(skb, J1939_CAN_FTR + (8 - dlc));
+
+	canid = CAN_EFF_FLAG |
+		(skcb->priority << 26) |
+		(skcb->addr.pgn << 8) |
+		skcb->addr.sa;
+	if (j1939_pgn_is_pdu1(skcb->addr.pgn))
+		canid |= skcb->addr.da << 8;
+
+	cf->can_id = canid;
+	cf->can_dlc = dlc;
+
+	return can_send(skb, 1);
+
+ failed:
+	kfree_skb(skb);
+	return ret;
+}
+
+static int j1939_netdev_notify(struct notifier_block *nb,
+			       unsigned long msg, void *data)
+{
+	struct net_device *ndev = netdev_notifier_info_to_dev(data);
+	struct j1939_priv *priv;
+
+	priv = j1939_priv_get_by_ndev(ndev);
+	if (!priv)
+		goto notify_done;
+
+	if (ndev->type != ARPHRD_CAN)
+		goto notify_put;
+
+	switch (msg) {
+	case NETDEV_DOWN:
+		j1939_cancel_active_session(priv, NULL);
+		j1939_sk_netdev_event_netdown(priv);
+		j1939_ecu_unmap_all(priv);
+		break;
+	}
+
+notify_put:
+	j1939_priv_put(priv);
+
+notify_done:
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block j1939_netdev_notifier = {
+	.notifier_call = j1939_netdev_notify,
+};
+
+/* MODULE interface */
+static __init int j1939_module_init(void)
+{
+	int ret;
+
+	pr_info("can: SAE J1939\n");
+
+	ret = register_netdevice_notifier(&j1939_netdev_notifier);
+	if (ret)
+		goto fail_notifier;
+
+	ret = can_proto_register(&j1939_can_proto);
+	if (ret < 0) {
+		pr_err("can: registration of j1939 protocol failed\n");
+		goto fail_sk;
+	}
+
+	return 0;
+
+ fail_sk:
+	unregister_netdevice_notifier(&j1939_netdev_notifier);
+ fail_notifier:
+	return ret;
+}
+
+static __exit void j1939_module_exit(void)
+{
+	can_proto_unregister(&j1939_can_proto);
+
+	unregister_netdevice_notifier(&j1939_netdev_notifier);
+}
+
+module_init(j1939_module_init);
+module_exit(j1939_module_exit);

diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
new file mode 100644
index 0000000..de09b0a
--- /dev/null
+++ b/net/can/j1939/socket.c

@@ -0,0 +1,1219 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2010-2011 EIA Electronics,
+//                         Pieter Beyens <pieter.beyens@eia.be>
+// Copyright (c) 2010-2011 EIA Electronics,
+//                         Kurt Van Dijck <kurt.van.dijck@eia.be>
+// Copyright (c) 2018 Protonic,
+//                         Robin van der Gracht <robin@protonic.nl>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Marc Kleine-Budde <kernel@pengutronix.de>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Oleksij Rempel <kernel@pengutronix.de>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/can/core.h>
+#include <linux/can/skb.h>
+#include <linux/errqueue.h>
+#include <linux/if_arp.h>
+
+#include "j1939-priv.h"
+
+#define J1939_MIN_NAMELEN CAN_REQUIRED_SIZE(struct sockaddr_can, can_addr.j1939)
+
+/* conversion function between struct sock::sk_priority from linux and
+ * j1939 priority field
+ */
+static inline priority_t j1939_prio(u32 sk_priority)
+{
+	sk_priority = min(sk_priority, 7U);
+
+	return 7 - sk_priority;
+}
+
+static inline u32 j1939_to_sk_priority(priority_t prio)
+{
+	return 7 - prio;
+}
+
+/* function to see if pgn is to be evaluated */
+static inline bool j1939_pgn_is_valid(pgn_t pgn)
+{
+	return pgn <= J1939_PGN_MAX;
+}
+
+/* test function to avoid non-zero DA placeholder for pdu1 pgn's */
+static inline bool j1939_pgn_is_clean_pdu(pgn_t pgn)
+{
+	if (j1939_pgn_is_pdu1(pgn))
+		return !(pgn & 0xff);
+	else
+		return true;
+}
+
+static inline void j1939_sock_pending_add(struct sock *sk)
+{
+	struct j1939_sock *jsk = j1939_sk(sk);
+
+	atomic_inc(&jsk->skb_pending);
+}
+
+static int j1939_sock_pending_get(struct sock *sk)
+{
+	struct j1939_sock *jsk = j1939_sk(sk);
+
+	return atomic_read(&jsk->skb_pending);
+}
+
+void j1939_sock_pending_del(struct sock *sk)
+{
+	struct j1939_sock *jsk = j1939_sk(sk);
+
+	/* atomic_dec_return returns the new value */
+	if (!atomic_dec_return(&jsk->skb_pending))
+		wake_up(&jsk->waitq);	/* no pending SKB's */
+}
+
+static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk)
+{
+	jsk->state |= J1939_SOCK_BOUND;
+	j1939_priv_get(priv);
+
+	spin_lock_bh(&priv->j1939_socks_lock);
+	list_add_tail(&jsk->list, &priv->j1939_socks);
+	spin_unlock_bh(&priv->j1939_socks_lock);
+}
+
+static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk)
+{
+	spin_lock_bh(&priv->j1939_socks_lock);
+	list_del_init(&jsk->list);
+	spin_unlock_bh(&priv->j1939_socks_lock);
+
+	j1939_priv_put(priv);
+	jsk->state &= ~J1939_SOCK_BOUND;
+}
+
+static bool j1939_sk_queue_session(struct j1939_session *session)
+{
+	struct j1939_sock *jsk = j1939_sk(session->sk);
+	bool empty;
+
+	spin_lock_bh(&jsk->sk_session_queue_lock);
+	empty = list_empty(&jsk->sk_session_queue);
+	j1939_session_get(session);
+	list_add_tail(&session->sk_session_queue_entry, &jsk->sk_session_queue);
+	spin_unlock_bh(&jsk->sk_session_queue_lock);
+	j1939_sock_pending_add(&jsk->sk);
+
+	return empty;
+}
+
+static struct
+j1939_session *j1939_sk_get_incomplete_session(struct j1939_sock *jsk)
+{
+	struct j1939_session *session = NULL;
+
+	spin_lock_bh(&jsk->sk_session_queue_lock);
+	if (!list_empty(&jsk->sk_session_queue)) {
+		session = list_last_entry(&jsk->sk_session_queue,
+					  struct j1939_session,
+					  sk_session_queue_entry);
+		if (session->total_queued_size == session->total_message_size)
+			session = NULL;
+		else
+			j1939_session_get(session);
+	}
+	spin_unlock_bh(&jsk->sk_session_queue_lock);
+
+	return session;
+}
+
+static void j1939_sk_queue_drop_all(struct j1939_priv *priv,
+				    struct j1939_sock *jsk, int err)
+{
+	struct j1939_session *session, *tmp;
+
+	netdev_dbg(priv->ndev, "%s: err: %i\n", __func__, err);
+	spin_lock_bh(&jsk->sk_session_queue_lock);
+	list_for_each_entry_safe(session, tmp, &jsk->sk_session_queue,
+				 sk_session_queue_entry) {
+		list_del_init(&session->sk_session_queue_entry);
+		session->err = err;
+		j1939_session_put(session);
+	}
+	spin_unlock_bh(&jsk->sk_session_queue_lock);
+}
+
+static void j1939_sk_queue_activate_next_locked(struct j1939_session *session)
+{
+	struct j1939_sock *jsk;
+	struct j1939_session *first;
+	int err;
+
+	/* RX-Session don't have a socket (yet) */
+	if (!session->sk)
+		return;
+
+	jsk = j1939_sk(session->sk);
+	lockdep_assert_held(&jsk->sk_session_queue_lock);
+
+	err = session->err;
+
+	first = list_first_entry_or_null(&jsk->sk_session_queue,
+					 struct j1939_session,
+					 sk_session_queue_entry);
+
+	/* Some else has already activated the next session */
+	if (first != session)
+		return;
+
+activate_next:
+	list_del_init(&first->sk_session_queue_entry);
+	j1939_session_put(first);
+	first = list_first_entry_or_null(&jsk->sk_session_queue,
+					 struct j1939_session,
+					 sk_session_queue_entry);
+	if (!first)
+		return;
+
+	if (WARN_ON_ONCE(j1939_session_activate(first))) {
+		first->err = -EBUSY;
+		goto activate_next;
+	} else {
+		/* Give receiver some time (arbitrary chosen) to recover */
+		int time_ms = 0;
+
+		if (err)
+			time_ms = 10 + prandom_u32_max(16);
+
+		j1939_tp_schedule_txtimer(first, time_ms);
+	}
+}
+
+void j1939_sk_queue_activate_next(struct j1939_session *session)
+{
+	struct j1939_sock *jsk;
+
+	if (!session->sk)
+		return;
+
+	jsk = j1939_sk(session->sk);
+
+	spin_lock_bh(&jsk->sk_session_queue_lock);
+	j1939_sk_queue_activate_next_locked(session);
+	spin_unlock_bh(&jsk->sk_session_queue_lock);
+}
+
+static bool j1939_sk_match_dst(struct j1939_sock *jsk,
+			       const struct j1939_sk_buff_cb *skcb)
+{
+	if ((jsk->state & J1939_SOCK_PROMISC))
+		return true;
+
+	/* Destination address filter */
+	if (jsk->addr.src_name && skcb->addr.dst_name) {
+		if (jsk->addr.src_name != skcb->addr.dst_name)
+			return false;
+	} else {
+		/* receive (all sockets) if
+		 * - all packages that match our bind() address
+		 * - all broadcast on a socket if SO_BROADCAST
+		 *   is set
+		 */
+		if (j1939_address_is_unicast(skcb->addr.da)) {
+			if (jsk->addr.sa != skcb->addr.da)
+				return false;
+		} else if (!sock_flag(&jsk->sk, SOCK_BROADCAST)) {
+			/* receiving broadcast without SO_BROADCAST
+			 * flag is not allowed
+			 */
+			return false;
+		}
+	}
+
+	/* Source address filter */
+	if (jsk->state & J1939_SOCK_CONNECTED) {
+		/* receive (all sockets) if
+		 * - all packages that match our connect() name or address
+		 */
+		if (jsk->addr.dst_name && skcb->addr.src_name) {
+			if (jsk->addr.dst_name != skcb->addr.src_name)
+				return false;
+		} else {
+			if (jsk->addr.da != skcb->addr.sa)
+				return false;
+		}
+	}
+
+	/* PGN filter */
+	if (j1939_pgn_is_valid(jsk->pgn_rx_filter) &&
+	    jsk->pgn_rx_filter != skcb->addr.pgn)
+		return false;
+
+	return true;
+}
+
+/* matches skb control buffer (addr) with a j1939 filter */
+static bool j1939_sk_match_filter(struct j1939_sock *jsk,
+				  const struct j1939_sk_buff_cb *skcb)
+{
+	const struct j1939_filter *f = jsk->filters;
+	int nfilter = jsk->nfilters;
+
+	if (!nfilter)
+		/* receive all when no filters are assigned */
+		return true;
+
+	for (; nfilter; ++f, --nfilter) {
+		if ((skcb->addr.pgn & f->pgn_mask) != f->pgn)
+			continue;
+		if ((skcb->addr.sa & f->addr_mask) != f->addr)
+			continue;
+		if ((skcb->addr.src_name & f->name_mask) != f->name)
+			continue;
+		return true;
+	}
+	return false;
+}
+
+static bool j1939_sk_recv_match_one(struct j1939_sock *jsk,
+				    const struct j1939_sk_buff_cb *skcb)
+{
+	if (!(jsk->state & J1939_SOCK_BOUND))
+		return false;
+
+	if (!j1939_sk_match_dst(jsk, skcb))
+		return false;
+
+	if (!j1939_sk_match_filter(jsk, skcb))
+		return false;
+
+	return true;
+}
+
+static void j1939_sk_recv_one(struct j1939_sock *jsk, struct sk_buff *oskb)
+{
+	const struct j1939_sk_buff_cb *oskcb = j1939_skb_to_cb(oskb);
+	struct j1939_sk_buff_cb *skcb;
+	struct sk_buff *skb;
+
+	if (oskb->sk == &jsk->sk)
+		return;
+
+	if (!j1939_sk_recv_match_one(jsk, oskcb))
+		return;
+
+	skb = skb_clone(oskb, GFP_ATOMIC);
+	if (!skb) {
+		pr_warn("skb clone failed\n");
+		return;
+	}
+	can_skb_set_owner(skb, oskb->sk);
+
+	skcb = j1939_skb_to_cb(skb);
+	skcb->msg_flags &= ~(MSG_DONTROUTE);
+	if (skb->sk)
+		skcb->msg_flags |= MSG_DONTROUTE;
+
+	if (sock_queue_rcv_skb(&jsk->sk, skb) < 0)
+		kfree_skb(skb);
+}
+
+bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb)
+{
+	struct j1939_sock *jsk;
+	bool match = false;
+
+	spin_lock_bh(&priv->j1939_socks_lock);
+	list_for_each_entry(jsk, &priv->j1939_socks, list) {
+		match = j1939_sk_recv_match_one(jsk, skcb);
+		if (match)
+			break;
+	}
+	spin_unlock_bh(&priv->j1939_socks_lock);
+
+	return match;
+}
+
+void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb)
+{
+	struct j1939_sock *jsk;
+
+	spin_lock_bh(&priv->j1939_socks_lock);
+	list_for_each_entry(jsk, &priv->j1939_socks, list) {
+		j1939_sk_recv_one(jsk, skb);
+	}
+	spin_unlock_bh(&priv->j1939_socks_lock);
+}
+
+static void j1939_sk_sock_destruct(struct sock *sk)
+{
+	struct j1939_sock *jsk = j1939_sk(sk);
+
+	/* This function will be call by the generic networking code, when then
+	 * the socket is ultimately closed (sk->sk_destruct).
+	 *
+	 * The race between
+	 * - processing a received CAN frame
+	 *   (can_receive -> j1939_can_recv)
+	 *   and accessing j1939_priv
+	 * ... and ...
+	 * - closing a socket
+	 *   (j1939_can_rx_unregister -> can_rx_unregister)
+	 *   and calling the final j1939_priv_put()
+	 *
+	 * is avoided by calling the final j1939_priv_put() from this
+	 * RCU deferred cleanup call.
+	 */
+	if (jsk->priv) {
+		j1939_priv_put(jsk->priv);
+		jsk->priv = NULL;
+	}
+
+	/* call generic CAN sock destruct */
+	can_sock_destruct(sk);
+}
+
+static int j1939_sk_init(struct sock *sk)
+{
+	struct j1939_sock *jsk = j1939_sk(sk);
+
+	/* Ensure that "sk" is first member in "struct j1939_sock", so that we
+	 * can skip it during memset().
+	 */
+	BUILD_BUG_ON(offsetof(struct j1939_sock, sk) != 0);
+	memset((void *)jsk + sizeof(jsk->sk), 0x0,
+	       sizeof(*jsk) - sizeof(jsk->sk));
+
+	INIT_LIST_HEAD(&jsk->list);
+	init_waitqueue_head(&jsk->waitq);
+	jsk->sk.sk_priority = j1939_to_sk_priority(6);
+	jsk->sk.sk_reuse = 1; /* per default */
+	jsk->addr.sa = J1939_NO_ADDR;
+	jsk->addr.da = J1939_NO_ADDR;
+	jsk->addr.pgn = J1939_NO_PGN;
+	jsk->pgn_rx_filter = J1939_NO_PGN;
+	atomic_set(&jsk->skb_pending, 0);
+	spin_lock_init(&jsk->sk_session_queue_lock);
+	INIT_LIST_HEAD(&jsk->sk_session_queue);
+	sk->sk_destruct = j1939_sk_sock_destruct;
+
+	return 0;
+}
+
+static int j1939_sk_sanity_check(struct sockaddr_can *addr, int len)
+{
+	if (!addr)
+		return -EDESTADDRREQ;
+	if (len < J1939_MIN_NAMELEN)
+		return -EINVAL;
+	if (addr->can_family != AF_CAN)
+		return -EINVAL;
+	if (!addr->can_ifindex)
+		return -ENODEV;
+	if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn) &&
+	    !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
+{
+	struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
+	struct j1939_sock *jsk = j1939_sk(sock->sk);
+	struct j1939_priv *priv = jsk->priv;
+	struct sock *sk = sock->sk;
+	struct net *net = sock_net(sk);
+	int ret = 0;
+
+	ret = j1939_sk_sanity_check(addr, len);
+	if (ret)
+		return ret;
+
+	lock_sock(sock->sk);
+
+	/* Already bound to an interface? */
+	if (jsk->state & J1939_SOCK_BOUND) {
+		/* A re-bind() to a different interface is not
+		 * supported.
+		 */
+		if (jsk->ifindex != addr->can_ifindex) {
+			ret = -EINVAL;
+			goto out_release_sock;
+		}
+
+		/* drop old references */
+		j1939_jsk_del(priv, jsk);
+		j1939_local_ecu_put(priv, jsk->addr.src_name, jsk->addr.sa);
+	} else {
+		struct net_device *ndev;
+
+		ndev = dev_get_by_index(net, addr->can_ifindex);
+		if (!ndev) {
+			ret = -ENODEV;
+			goto out_release_sock;
+		}
+
+		if (ndev->type != ARPHRD_CAN) {
+			dev_put(ndev);
+			ret = -ENODEV;
+			goto out_release_sock;
+		}
+
+		priv = j1939_netdev_start(ndev);
+		dev_put(ndev);
+		if (IS_ERR(priv)) {
+			ret = PTR_ERR(priv);
+			goto out_release_sock;
+		}
+
+		jsk->ifindex = addr->can_ifindex;
+
+		/* the corresponding j1939_priv_put() is called via
+		 * sk->sk_destruct, which points to j1939_sk_sock_destruct()
+		 */
+		j1939_priv_get(priv);
+		jsk->priv = priv;
+	}
+
+	/* set default transmit pgn */
+	if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn))
+		jsk->pgn_rx_filter = addr->can_addr.j1939.pgn;
+	jsk->addr.src_name = addr->can_addr.j1939.name;
+	jsk->addr.sa = addr->can_addr.j1939.addr;
+
+	/* get new references */
+	ret = j1939_local_ecu_get(priv, jsk->addr.src_name, jsk->addr.sa);
+	if (ret) {
+		j1939_netdev_stop(priv);
+		goto out_release_sock;
+	}
+
+	j1939_jsk_add(priv, jsk);
+
+ out_release_sock: /* fall through */
+	release_sock(sock->sk);
+
+	return ret;
+}
+
+static int j1939_sk_connect(struct socket *sock, struct sockaddr *uaddr,
+			    int len, int flags)
+{
+	struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
+	struct j1939_sock *jsk = j1939_sk(sock->sk);
+	int ret = 0;
+
+	ret = j1939_sk_sanity_check(addr, len);
+	if (ret)
+		return ret;
+
+	lock_sock(sock->sk);
+
+	/* bind() before connect() is mandatory */
+	if (!(jsk->state & J1939_SOCK_BOUND)) {
+		ret = -EINVAL;
+		goto out_release_sock;
+	}
+
+	/* A connect() to a different interface is not supported. */
+	if (jsk->ifindex != addr->can_ifindex) {
+		ret = -EINVAL;
+		goto out_release_sock;
+	}
+
+	if (!addr->can_addr.j1939.name &&
+	    addr->can_addr.j1939.addr == J1939_NO_ADDR &&
+	    !sock_flag(&jsk->sk, SOCK_BROADCAST)) {
+		/* broadcast, but SO_BROADCAST not set */
+		ret = -EACCES;
+		goto out_release_sock;
+	}
+
+	jsk->addr.dst_name = addr->can_addr.j1939.name;
+	jsk->addr.da = addr->can_addr.j1939.addr;
+
+	if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn))
+		jsk->addr.pgn = addr->can_addr.j1939.pgn;
+
+	jsk->state |= J1939_SOCK_CONNECTED;
+
+ out_release_sock: /* fall through */
+	release_sock(sock->sk);
+
+	return ret;
+}
+
+static void j1939_sk_sock2sockaddr_can(struct sockaddr_can *addr,
+				       const struct j1939_sock *jsk, int peer)
+{
+	addr->can_family = AF_CAN;
+	addr->can_ifindex = jsk->ifindex;
+	addr->can_addr.j1939.pgn = jsk->addr.pgn;
+	if (peer) {
+		addr->can_addr.j1939.name = jsk->addr.dst_name;
+		addr->can_addr.j1939.addr = jsk->addr.da;
+	} else {
+		addr->can_addr.j1939.name = jsk->addr.src_name;
+		addr->can_addr.j1939.addr = jsk->addr.sa;
+	}
+}
+
+static int j1939_sk_getname(struct socket *sock, struct sockaddr *uaddr,
+			    int peer)
+{
+	struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
+	struct sock *sk = sock->sk;
+	struct j1939_sock *jsk = j1939_sk(sk);
+	int ret = 0;
+
+	lock_sock(sk);
+
+	if (peer && !(jsk->state & J1939_SOCK_CONNECTED)) {
+		ret = -EADDRNOTAVAIL;
+		goto failure;
+	}
+
+	j1939_sk_sock2sockaddr_can(addr, jsk, peer);
+	ret = J1939_MIN_NAMELEN;
+
+ failure:
+	release_sock(sk);
+
+	return ret;
+}
+
+static int j1939_sk_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct j1939_sock *jsk;
+
+	if (!sk)
+		return 0;
+
+	lock_sock(sk);
+	jsk = j1939_sk(sk);
+
+	if (jsk->state & J1939_SOCK_BOUND) {
+		struct j1939_priv *priv = jsk->priv;
+
+		if (wait_event_interruptible(jsk->waitq,
+					     !j1939_sock_pending_get(&jsk->sk))) {
+			j1939_cancel_active_session(priv, sk);
+			j1939_sk_queue_drop_all(priv, jsk, ESHUTDOWN);
+		}
+
+		j1939_jsk_del(priv, jsk);
+
+		j1939_local_ecu_put(priv, jsk->addr.src_name,
+				    jsk->addr.sa);
+
+		j1939_netdev_stop(priv);
+	}
+
+	kfree(jsk->filters);
+	sock_orphan(sk);
+	sock->sk = NULL;
+
+	release_sock(sk);
+	sock_put(sk);
+
+	return 0;
+}
+
+static int j1939_sk_setsockopt_flag(struct j1939_sock *jsk, char __user *optval,
+				    unsigned int optlen, int flag)
+{
+	int tmp;
+
+	if (optlen != sizeof(tmp))
+		return -EINVAL;
+	if (copy_from_user(&tmp, optval, optlen))
+		return -EFAULT;
+	lock_sock(&jsk->sk);
+	if (tmp)
+		jsk->state |= flag;
+	else
+		jsk->state &= ~flag;
+	release_sock(&jsk->sk);
+	return tmp;
+}
+
+static int j1939_sk_setsockopt(struct socket *sock, int level, int optname,
+			       char __user *optval, unsigned int optlen)
+{
+	struct sock *sk = sock->sk;
+	struct j1939_sock *jsk = j1939_sk(sk);
+	int tmp, count = 0, ret = 0;
+	struct j1939_filter *filters = NULL, *ofilters;
+
+	if (level != SOL_CAN_J1939)
+		return -EINVAL;
+
+	switch (optname) {
+	case SO_J1939_FILTER:
+		if (optval) {
+			struct j1939_filter *f;
+			int c;
+
+			if (optlen % sizeof(*filters) != 0)
+				return -EINVAL;
+
+			if (optlen > J1939_FILTER_MAX *
+			    sizeof(struct j1939_filter))
+				return -EINVAL;
+
+			count = optlen / sizeof(*filters);
+			filters = memdup_user(optval, optlen);
+			if (IS_ERR(filters))
+				return PTR_ERR(filters);
+
+			for (f = filters, c = count; c; f++, c--) {
+				f->name &= f->name_mask;
+				f->pgn &= f->pgn_mask;
+				f->addr &= f->addr_mask;
+			}
+		}
+
+		lock_sock(&jsk->sk);
+		ofilters = jsk->filters;
+		jsk->filters = filters;
+		jsk->nfilters = count;
+		release_sock(&jsk->sk);
+		kfree(ofilters);
+		return 0;
+	case SO_J1939_PROMISC:
+		return j1939_sk_setsockopt_flag(jsk, optval, optlen,
+						J1939_SOCK_PROMISC);
+	case SO_J1939_ERRQUEUE:
+		ret = j1939_sk_setsockopt_flag(jsk, optval, optlen,
+					       J1939_SOCK_ERRQUEUE);
+		if (ret < 0)
+			return ret;
+
+		if (!(jsk->state & J1939_SOCK_ERRQUEUE))
+			skb_queue_purge(&sk->sk_error_queue);
+		return ret;
+	case SO_J1939_SEND_PRIO:
+		if (optlen != sizeof(tmp))
+			return -EINVAL;
+		if (copy_from_user(&tmp, optval, optlen))
+			return -EFAULT;
+		if (tmp < 0 || tmp > 7)
+			return -EDOM;
+		if (tmp < 2 && !capable(CAP_NET_ADMIN))
+			return -EPERM;
+		lock_sock(&jsk->sk);
+		jsk->sk.sk_priority = j1939_to_sk_priority(tmp);
+		release_sock(&jsk->sk);
+		return 0;
+	default:
+		return -ENOPROTOOPT;
+	}
+}
+
+static int j1939_sk_getsockopt(struct socket *sock, int level, int optname,
+			       char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	struct j1939_sock *jsk = j1939_sk(sk);
+	int ret, ulen;
+	/* set defaults for using 'int' properties */
+	int tmp = 0;
+	int len = sizeof(tmp);
+	void *val = &tmp;
+
+	if (level != SOL_CAN_J1939)
+		return -EINVAL;
+	if (get_user(ulen, optlen))
+		return -EFAULT;
+	if (ulen < 0)
+		return -EINVAL;
+
+	lock_sock(&jsk->sk);
+	switch (optname) {
+	case SO_J1939_PROMISC:
+		tmp = (jsk->state & J1939_SOCK_PROMISC) ? 1 : 0;
+		break;
+	case SO_J1939_ERRQUEUE:
+		tmp = (jsk->state & J1939_SOCK_ERRQUEUE) ? 1 : 0;
+		break;
+	case SO_J1939_SEND_PRIO:
+		tmp = j1939_prio(jsk->sk.sk_priority);
+		break;
+	default:
+		ret = -ENOPROTOOPT;
+		goto no_copy;
+	}
+
+	/* copy to user, based on 'len' & 'val'
+	 * but most sockopt's are 'int' properties, and have 'len' & 'val'
+	 * left unchanged, but instead modified 'tmp'
+	 */
+	if (len > ulen)
+		ret = -EFAULT;
+	else if (put_user(len, optlen))
+		ret = -EFAULT;
+	else if (copy_to_user(optval, val, len))
+		ret = -EFAULT;
+	else
+		ret = 0;
+ no_copy:
+	release_sock(&jsk->sk);
+	return ret;
+}
+
+static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg,
+			    size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sk_buff *skb;
+	struct j1939_sk_buff_cb *skcb;
+	int ret = 0;
+
+	if (flags & ~(MSG_DONTWAIT | MSG_ERRQUEUE))
+		return -EINVAL;
+
+	if (flags & MSG_ERRQUEUE)
+		return sock_recv_errqueue(sock->sk, msg, size, SOL_CAN_J1939,
+					  SCM_J1939_ERRQUEUE);
+
+	skb = skb_recv_datagram(sk, flags, 0, &ret);
+	if (!skb)
+		return ret;
+
+	if (size < skb->len)
+		msg->msg_flags |= MSG_TRUNC;
+	else
+		size = skb->len;
+
+	ret = memcpy_to_msg(msg, skb->data, size);
+	if (ret < 0) {
+		skb_free_datagram(sk, skb);
+		return ret;
+	}
+
+	skcb = j1939_skb_to_cb(skb);
+	if (j1939_address_is_valid(skcb->addr.da))
+		put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_DEST_ADDR,
+			 sizeof(skcb->addr.da), &skcb->addr.da);
+
+	if (skcb->addr.dst_name)
+		put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_DEST_NAME,
+			 sizeof(skcb->addr.dst_name), &skcb->addr.dst_name);
+
+	put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_PRIO,
+		 sizeof(skcb->priority), &skcb->priority);
+
+	if (msg->msg_name) {
+		struct sockaddr_can *paddr = msg->msg_name;
+
+		msg->msg_namelen = J1939_MIN_NAMELEN;
+		memset(msg->msg_name, 0, msg->msg_namelen);
+		paddr->can_family = AF_CAN;
+		paddr->can_ifindex = skb->skb_iif;
+		paddr->can_addr.j1939.name = skcb->addr.src_name;
+		paddr->can_addr.j1939.addr = skcb->addr.sa;
+		paddr->can_addr.j1939.pgn = skcb->addr.pgn;
+	}
+
+	sock_recv_ts_and_drops(msg, sk, skb);
+	msg->msg_flags |= skcb->msg_flags;
+	skb_free_datagram(sk, skb);
+
+	return size;
+}
+
+static struct sk_buff *j1939_sk_alloc_skb(struct net_device *ndev,
+					  struct sock *sk,
+					  struct msghdr *msg, size_t size,
+					  int *errcode)
+{
+	struct j1939_sock *jsk = j1939_sk(sk);
+	struct j1939_sk_buff_cb *skcb;
+	struct sk_buff *skb;
+	int ret;
+
+	skb = sock_alloc_send_skb(sk,
+				  size +
+				  sizeof(struct can_frame) -
+				  sizeof(((struct can_frame *)NULL)->data) +
+				  sizeof(struct can_skb_priv),
+				  msg->msg_flags & MSG_DONTWAIT, &ret);
+	if (!skb)
+		goto failure;
+
+	can_skb_reserve(skb);
+	can_skb_prv(skb)->ifindex = ndev->ifindex;
+	can_skb_prv(skb)->skbcnt = 0;
+	skb_reserve(skb, offsetof(struct can_frame, data));
+
+	ret = memcpy_from_msg(skb_put(skb, size), msg, size);
+	if (ret < 0)
+		goto free_skb;
+
+	skb->dev = ndev;
+
+	skcb = j1939_skb_to_cb(skb);
+	memset(skcb, 0, sizeof(*skcb));
+	skcb->addr = jsk->addr;
+	skcb->priority = j1939_prio(sk->sk_priority);
+
+	if (msg->msg_name) {
+		struct sockaddr_can *addr = msg->msg_name;
+
+		if (addr->can_addr.j1939.name ||
+		    addr->can_addr.j1939.addr != J1939_NO_ADDR) {
+			skcb->addr.dst_name = addr->can_addr.j1939.name;
+			skcb->addr.da = addr->can_addr.j1939.addr;
+		}
+		if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn))
+			skcb->addr.pgn = addr->can_addr.j1939.pgn;
+	}
+
+	*errcode = ret;
+	return skb;
+
+free_skb:
+	kfree_skb(skb);
+failure:
+	*errcode = ret;
+	return NULL;
+}
+
+static size_t j1939_sk_opt_stats_get_size(void)
+{
+	return
+		nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */
+		0;
+}
+
+static struct sk_buff *
+j1939_sk_get_timestamping_opt_stats(struct j1939_session *session)
+{
+	struct sk_buff *stats;
+	u32 size;
+
+	stats = alloc_skb(j1939_sk_opt_stats_get_size(), GFP_ATOMIC);
+	if (!stats)
+		return NULL;
+
+	if (session->skcb.addr.type == J1939_SIMPLE)
+		size = session->total_message_size;
+	else
+		size = min(session->pkt.tx_acked * 7,
+			   session->total_message_size);
+
+	nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size);
+
+	return stats;
+}
+
+void j1939_sk_errqueue(struct j1939_session *session,
+		       enum j1939_sk_errqueue_type type)
+{
+	struct j1939_priv *priv = session->priv;
+	struct sock *sk = session->sk;
+	struct j1939_sock *jsk;
+	struct sock_exterr_skb *serr;
+	struct sk_buff *skb;
+	char *state = "UNK";
+	int err;
+
+	/* currently we have no sk for the RX session */
+	if (!sk)
+		return;
+
+	jsk = j1939_sk(sk);
+
+	if (!(jsk->state & J1939_SOCK_ERRQUEUE))
+		return;
+
+	skb = j1939_sk_get_timestamping_opt_stats(session);
+	if (!skb)
+		return;
+
+	skb->tstamp = ktime_get_real();
+
+	BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb));
+
+	serr = SKB_EXT_ERR(skb);
+	memset(serr, 0, sizeof(*serr));
+	switch (type) {
+	case J1939_ERRQUEUE_ACK:
+		if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)) {
+			kfree_skb(skb);
+			return;
+		}
+
+		serr->ee.ee_errno = ENOMSG;
+		serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+		serr->ee.ee_info = SCM_TSTAMP_ACK;
+		state = "ACK";
+		break;
+	case J1939_ERRQUEUE_SCHED:
+		if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)) {
+			kfree_skb(skb);
+			return;
+		}
+
+		serr->ee.ee_errno = ENOMSG;
+		serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+		serr->ee.ee_info = SCM_TSTAMP_SCHED;
+		state = "SCH";
+		break;
+	case J1939_ERRQUEUE_ABORT:
+		serr->ee.ee_errno = session->err;
+		serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+		serr->ee.ee_info = J1939_EE_INFO_TX_ABORT;
+		state = "ABT";
+		break;
+	default:
+		netdev_err(priv->ndev, "Unknown errqueue type %i\n", type);
+	}
+
+	serr->opt_stats = true;
+	if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+		serr->ee.ee_data = session->tskey;
+
+	netdev_dbg(session->priv->ndev, "%s: 0x%p tskey: %i, state: %s\n",
+		   __func__, session, session->tskey, state);
+	err = sock_queue_err_skb(sk, skb);
+
+	if (err)
+		kfree_skb(skb);
+};
+
+void j1939_sk_send_loop_abort(struct sock *sk, int err)
+{
+	sk->sk_err = err;
+
+	sk->sk_error_report(sk);
+}
+
+static int j1939_sk_send_loop(struct j1939_priv *priv,  struct sock *sk,
+			      struct msghdr *msg, size_t size)
+
+{
+	struct j1939_sock *jsk = j1939_sk(sk);
+	struct j1939_session *session = j1939_sk_get_incomplete_session(jsk);
+	struct sk_buff *skb;
+	size_t segment_size, todo_size;
+	int ret = 0;
+
+	if (session &&
+	    session->total_message_size != session->total_queued_size + size) {
+		j1939_session_put(session);
+		return -EIO;
+	}
+
+	todo_size = size;
+
+	while (todo_size) {
+		struct j1939_sk_buff_cb *skcb;
+
+		segment_size = min_t(size_t, J1939_MAX_TP_PACKET_SIZE,
+				     todo_size);
+
+		/* Allocate skb for one segment */
+		skb = j1939_sk_alloc_skb(priv->ndev, sk, msg, segment_size,
+					 &ret);
+		if (ret)
+			break;
+
+		skcb = j1939_skb_to_cb(skb);
+
+		if (!session) {
+			/* at this point the size should be full size
+			 * of the session
+			 */
+			skcb->offset = 0;
+			session = j1939_tp_send(priv, skb, size);
+			if (IS_ERR(session)) {
+				ret = PTR_ERR(session);
+				goto kfree_skb;
+			}
+			if (j1939_sk_queue_session(session)) {
+				/* try to activate session if we a
+				 * fist in the queue
+				 */
+				if (!j1939_session_activate(session)) {
+					j1939_tp_schedule_txtimer(session, 0);
+				} else {
+					ret = -EBUSY;
+					session->err = ret;
+					j1939_sk_queue_drop_all(priv, jsk,
+								EBUSY);
+					break;
+				}
+			}
+		} else {
+			skcb->offset = session->total_queued_size;
+			j1939_session_skb_queue(session, skb);
+		}
+
+		todo_size -= segment_size;
+		session->total_queued_size += segment_size;
+	}
+
+	switch (ret) {
+	case 0: /* OK */
+		if (todo_size)
+			netdev_warn(priv->ndev,
+				    "no error found and not completely queued?! %zu\n",
+				    todo_size);
+		ret = size;
+		break;
+	case -ERESTARTSYS:
+		ret = -EINTR;
+		/* fall through */
+	case -EAGAIN: /* OK */
+		if (todo_size != size)
+			ret = size - todo_size;
+		break;
+	default: /* ERROR */
+		break;
+	}
+
+	if (session)
+		j1939_session_put(session);
+
+	return ret;
+
+ kfree_skb:
+	kfree_skb(skb);
+	return ret;
+}
+
+static int j1939_sk_sendmsg(struct socket *sock, struct msghdr *msg,
+			    size_t size)
+{
+	struct sock *sk = sock->sk;
+	struct j1939_sock *jsk = j1939_sk(sk);
+	struct j1939_priv *priv;
+	int ifindex;
+	int ret;
+
+	lock_sock(sock->sk);
+	/* various socket state tests */
+	if (!(jsk->state & J1939_SOCK_BOUND)) {
+		ret = -EBADFD;
+		goto sendmsg_done;
+	}
+
+	priv = jsk->priv;
+	ifindex = jsk->ifindex;
+
+	if (!jsk->addr.src_name && jsk->addr.sa == J1939_NO_ADDR) {
+		/* no source address assigned yet */
+		ret = -EBADFD;
+		goto sendmsg_done;
+	}
+
+	/* deal with provided destination address info */
+	if (msg->msg_name) {
+		struct sockaddr_can *addr = msg->msg_name;
+
+		if (msg->msg_namelen < J1939_MIN_NAMELEN) {
+			ret = -EINVAL;
+			goto sendmsg_done;
+		}
+
+		if (addr->can_family != AF_CAN) {
+			ret = -EINVAL;
+			goto sendmsg_done;
+		}
+
+		if (addr->can_ifindex && addr->can_ifindex != ifindex) {
+			ret = -EBADFD;
+			goto sendmsg_done;
+		}
+
+		if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn) &&
+		    !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn)) {
+			ret = -EINVAL;
+			goto sendmsg_done;
+		}
+
+		if (!addr->can_addr.j1939.name &&
+		    addr->can_addr.j1939.addr == J1939_NO_ADDR &&
+		    !sock_flag(sk, SOCK_BROADCAST)) {
+			/* broadcast, but SO_BROADCAST not set */
+			ret = -EACCES;
+			goto sendmsg_done;
+		}
+	} else {
+		if (!jsk->addr.dst_name && jsk->addr.da == J1939_NO_ADDR &&
+		    !sock_flag(sk, SOCK_BROADCAST)) {
+			/* broadcast, but SO_BROADCAST not set */
+			ret = -EACCES;
+			goto sendmsg_done;
+		}
+	}
+
+	ret = j1939_sk_send_loop(priv, sk, msg, size);
+
+sendmsg_done:
+	release_sock(sock->sk);
+
+	return ret;
+}
+
+void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
+{
+	struct j1939_sock *jsk;
+	int error_code = ENETDOWN;
+
+	spin_lock_bh(&priv->j1939_socks_lock);
+	list_for_each_entry(jsk, &priv->j1939_socks, list) {
+		jsk->sk.sk_err = error_code;
+		if (!sock_flag(&jsk->sk, SOCK_DEAD))
+			jsk->sk.sk_error_report(&jsk->sk);
+
+		j1939_sk_queue_drop_all(priv, jsk, error_code);
+	}
+	spin_unlock_bh(&priv->j1939_socks_lock);
+}
+
+static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd,
+				unsigned long arg)
+{
+	/* no ioctls for socket layer -> hand it down to NIC layer */
+	return -ENOIOCTLCMD;
+}
+
+static const struct proto_ops j1939_ops = {
+	.family = PF_CAN,
+	.release = j1939_sk_release,
+	.bind = j1939_sk_bind,
+	.connect = j1939_sk_connect,
+	.socketpair = sock_no_socketpair,
+	.accept = sock_no_accept,
+	.getname = j1939_sk_getname,
+	.poll = datagram_poll,
+	.ioctl = j1939_sk_no_ioctlcmd,
+	.listen = sock_no_listen,
+	.shutdown = sock_no_shutdown,
+	.setsockopt = j1939_sk_setsockopt,
+	.getsockopt = j1939_sk_getsockopt,
+	.sendmsg = j1939_sk_sendmsg,
+	.recvmsg = j1939_sk_recvmsg,
+	.mmap = sock_no_mmap,
+	.sendpage = sock_no_sendpage,
+};
+
+static struct proto j1939_proto __read_mostly = {
+	.name = "CAN_J1939",
+	.owner = THIS_MODULE,
+	.obj_size = sizeof(struct j1939_sock),
+	.init = j1939_sk_init,
+};
+
+const struct can_proto j1939_can_proto = {
+	.type = SOCK_DGRAM,
+	.protocol = CAN_J1939,
+	.ops = &j1939_ops,
+	.prot = &j1939_proto,
+};

diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
new file mode 100644
index 0000000..9f99af5
--- /dev/null
+++ b/net/can/j1939/transport.c

@@ -0,0 +1,2063 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2010-2011 EIA Electronics,
+//                         Kurt Van Dijck <kurt.van.dijck@eia.be>
+// Copyright (c) 2018 Protonic,
+//                         Robin van der Gracht <robin@protonic.nl>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Marc Kleine-Budde <kernel@pengutronix.de>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Oleksij Rempel <kernel@pengutronix.de>
+
+#include <linux/can/skb.h>
+
+#include "j1939-priv.h"
+
+#define J1939_XTP_TX_RETRY_LIMIT 100
+
+#define J1939_ETP_PGN_CTL 0xc800
+#define J1939_ETP_PGN_DAT 0xc700
+#define J1939_TP_PGN_CTL 0xec00
+#define J1939_TP_PGN_DAT 0xeb00
+
+#define J1939_TP_CMD_RTS 0x10
+#define J1939_TP_CMD_CTS 0x11
+#define J1939_TP_CMD_EOMA 0x13
+#define J1939_TP_CMD_BAM 0x20
+#define J1939_TP_CMD_ABORT 0xff
+
+#define J1939_ETP_CMD_RTS 0x14
+#define J1939_ETP_CMD_CTS 0x15
+#define J1939_ETP_CMD_DPO 0x16
+#define J1939_ETP_CMD_EOMA 0x17
+#define J1939_ETP_CMD_ABORT 0xff
+
+enum j1939_xtp_abort {
+	J1939_XTP_NO_ABORT = 0,
+	J1939_XTP_ABORT_BUSY = 1,
+	/* Already in one or more connection managed sessions and
+	 * cannot support another.
+	 *
+	 * EALREADY:
+	 * Operation already in progress
+	 */
+
+	J1939_XTP_ABORT_RESOURCE = 2,
+	/* System resources were needed for another task so this
+	 * connection managed session was terminated.
+	 *
+	 * EMSGSIZE:
+	 * The socket type requires that message be sent atomically,
+	 * and the size of the message to be sent made this
+	 * impossible.
+	 */
+
+	J1939_XTP_ABORT_TIMEOUT = 3,
+	/* A timeout occurred and this is the connection abort to
+	 * close the session.
+	 *
+	 * EHOSTUNREACH:
+	 * The destination host cannot be reached (probably because
+	 * the host is down or a remote router cannot reach it).
+	 */
+
+	J1939_XTP_ABORT_GENERIC = 4,
+	/* CTS messages received when data transfer is in progress
+	 *
+	 * EBADMSG:
+	 * Not a data message
+	 */
+
+	J1939_XTP_ABORT_FAULT = 5,
+	/* Maximal retransmit request limit reached
+	 *
+	 * ENOTRECOVERABLE:
+	 * State not recoverable
+	 */
+
+	J1939_XTP_ABORT_UNEXPECTED_DATA = 6,
+	/* Unexpected data transfer packet
+	 *
+	 * ENOTCONN:
+	 * Transport endpoint is not connected
+	 */
+
+	J1939_XTP_ABORT_BAD_SEQ = 7,
+	/* Bad sequence number (and software is not able to recover)
+	 *
+	 * EILSEQ:
+	 * Illegal byte sequence
+	 */
+
+	J1939_XTP_ABORT_DUP_SEQ = 8,
+	/* Duplicate sequence number (and software is not able to
+	 * recover)
+	 */
+
+	J1939_XTP_ABORT_EDPO_UNEXPECTED = 9,
+	/* Unexpected EDPO packet (ETP) or Message size > 1785 bytes
+	 * (TP)
+	 */
+
+	J1939_XTP_ABORT_BAD_EDPO_PGN = 10,
+	/* Unexpected EDPO PGN (PGN in EDPO is bad) */
+
+	J1939_XTP_ABORT_EDPO_OUTOF_CTS = 11,
+	/* EDPO number of packets is greater than CTS */
+
+	J1939_XTP_ABORT_BAD_EDPO_OFFSET = 12,
+	/* Bad EDPO offset */
+
+	J1939_XTP_ABORT_OTHER_DEPRECATED = 13,
+	/* Deprecated. Use 250 instead (Any other reason)  */
+
+	J1939_XTP_ABORT_ECTS_UNXPECTED_PGN = 14,
+	/* Unexpected ECTS PGN (PGN in ECTS is bad) */
+
+	J1939_XTP_ABORT_ECTS_TOO_BIG = 15,
+	/* ECTS requested packets exceeds message size */
+
+	J1939_XTP_ABORT_OTHER = 250,
+	/* Any other reason (if a Connection Abort reason is
+	 * identified that is not listed in the table use code 250)
+	 */
+};
+
+static unsigned int j1939_tp_block = 255;
+static unsigned int j1939_tp_packet_delay;
+static unsigned int j1939_tp_padding = 1;
+
+/* helpers */
+static const char *j1939_xtp_abort_to_str(enum j1939_xtp_abort abort)
+{
+	switch (abort) {
+	case J1939_XTP_ABORT_BUSY:
+		return "Already in one or more connection managed sessions and cannot support another.";
+	case J1939_XTP_ABORT_RESOURCE:
+		return "System resources were needed for another task so this connection managed session was terminated.";
+	case J1939_XTP_ABORT_TIMEOUT:
+		return "A timeout occurred and this is the connection abort to close the session.";
+	case J1939_XTP_ABORT_GENERIC:
+		return "CTS messages received when data transfer is in progress";
+	case J1939_XTP_ABORT_FAULT:
+		return "Maximal retransmit request limit reached";
+	case J1939_XTP_ABORT_UNEXPECTED_DATA:
+		return "Unexpected data transfer packet";
+	case J1939_XTP_ABORT_BAD_SEQ:
+		return "Bad sequence number (and software is not able to recover)";
+	case J1939_XTP_ABORT_DUP_SEQ:
+		return "Duplicate sequence number (and software is not able to recover)";
+	case J1939_XTP_ABORT_EDPO_UNEXPECTED:
+		return "Unexpected EDPO packet (ETP) or Message size > 1785 bytes (TP)";
+	case J1939_XTP_ABORT_BAD_EDPO_PGN:
+		return "Unexpected EDPO PGN (PGN in EDPO is bad)";
+	case J1939_XTP_ABORT_EDPO_OUTOF_CTS:
+		return "EDPO number of packets is greater than CTS";
+	case J1939_XTP_ABORT_BAD_EDPO_OFFSET:
+		return "Bad EDPO offset";
+	case J1939_XTP_ABORT_OTHER_DEPRECATED:
+		return "Deprecated. Use 250 instead (Any other reason)";
+	case J1939_XTP_ABORT_ECTS_UNXPECTED_PGN:
+		return "Unexpected ECTS PGN (PGN in ECTS is bad)";
+	case J1939_XTP_ABORT_ECTS_TOO_BIG:
+		return "ECTS requested packets exceeds message size";
+	case J1939_XTP_ABORT_OTHER:
+		return "Any other reason (if a Connection Abort reason is identified that is not listed in the table use code 250)";
+	default:
+		return "<unknown>";
+	}
+}
+
+static int j1939_xtp_abort_to_errno(struct j1939_priv *priv,
+				    enum j1939_xtp_abort abort)
+{
+	int err;
+
+	switch (abort) {
+	case J1939_XTP_NO_ABORT:
+		WARN_ON_ONCE(abort == J1939_XTP_NO_ABORT);
+		err = 0;
+		break;
+	case J1939_XTP_ABORT_BUSY:
+		err = EALREADY;
+		break;
+	case J1939_XTP_ABORT_RESOURCE:
+		err = EMSGSIZE;
+		break;
+	case J1939_XTP_ABORT_TIMEOUT:
+		err = EHOSTUNREACH;
+		break;
+	case J1939_XTP_ABORT_GENERIC:
+		err = EBADMSG;
+		break;
+	case J1939_XTP_ABORT_FAULT:
+		err = ENOTRECOVERABLE;
+		break;
+	case J1939_XTP_ABORT_UNEXPECTED_DATA:
+		err = ENOTCONN;
+		break;
+	case J1939_XTP_ABORT_BAD_SEQ:
+		err = EILSEQ;
+		break;
+	case J1939_XTP_ABORT_DUP_SEQ:
+		err = EPROTO;
+		break;
+	case J1939_XTP_ABORT_EDPO_UNEXPECTED:
+		err = EPROTO;
+		break;
+	case J1939_XTP_ABORT_BAD_EDPO_PGN:
+		err = EPROTO;
+		break;
+	case J1939_XTP_ABORT_EDPO_OUTOF_CTS:
+		err = EPROTO;
+		break;
+	case J1939_XTP_ABORT_BAD_EDPO_OFFSET:
+		err = EPROTO;
+		break;
+	case J1939_XTP_ABORT_OTHER_DEPRECATED:
+		err = EPROTO;
+		break;
+	case J1939_XTP_ABORT_ECTS_UNXPECTED_PGN:
+		err = EPROTO;
+		break;
+	case J1939_XTP_ABORT_ECTS_TOO_BIG:
+		err = EPROTO;
+		break;
+	case J1939_XTP_ABORT_OTHER:
+		err = EPROTO;
+		break;
+	default:
+		netdev_warn(priv->ndev, "Unknown abort code %i", abort);
+		err = EPROTO;
+	}
+
+	return err;
+}
+
+static inline void j1939_session_list_lock(struct j1939_priv *priv)
+{
+	spin_lock_bh(&priv->active_session_list_lock);
+}
+
+static inline void j1939_session_list_unlock(struct j1939_priv *priv)
+{
+	spin_unlock_bh(&priv->active_session_list_lock);
+}
+
+void j1939_session_get(struct j1939_session *session)
+{
+	kref_get(&session->kref);
+}
+
+/* session completion functions */
+static void __j1939_session_drop(struct j1939_session *session)
+{
+	if (!session->transmission)
+		return;
+
+	j1939_sock_pending_del(session->sk);
+	sock_put(session->sk);
+}
+
+static void j1939_session_destroy(struct j1939_session *session)
+{
+	if (session->err)
+		j1939_sk_errqueue(session, J1939_ERRQUEUE_ABORT);
+	else
+		j1939_sk_errqueue(session, J1939_ERRQUEUE_ACK);
+
+	netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+	WARN_ON_ONCE(!list_empty(&session->sk_session_queue_entry));
+	WARN_ON_ONCE(!list_empty(&session->active_session_list_entry));
+
+	skb_queue_purge(&session->skb_queue);
+	__j1939_session_drop(session);
+	j1939_priv_put(session->priv);
+	kfree(session);
+}
+
+static void __j1939_session_release(struct kref *kref)
+{
+	struct j1939_session *session = container_of(kref, struct j1939_session,
+						     kref);
+
+	j1939_session_destroy(session);
+}
+
+void j1939_session_put(struct j1939_session *session)
+{
+	kref_put(&session->kref, __j1939_session_release);
+}
+
+static void j1939_session_txtimer_cancel(struct j1939_session *session)
+{
+	if (hrtimer_cancel(&session->txtimer))
+		j1939_session_put(session);
+}
+
+static void j1939_session_rxtimer_cancel(struct j1939_session *session)
+{
+	if (hrtimer_cancel(&session->rxtimer))
+		j1939_session_put(session);
+}
+
+void j1939_session_timers_cancel(struct j1939_session *session)
+{
+	j1939_session_txtimer_cancel(session);
+	j1939_session_rxtimer_cancel(session);
+}
+
+static inline bool j1939_cb_is_broadcast(const struct j1939_sk_buff_cb *skcb)
+{
+	return (!skcb->addr.dst_name && (skcb->addr.da == 0xff));
+}
+
+static void j1939_session_skb_drop_old(struct j1939_session *session)
+{
+	struct sk_buff *do_skb;
+	struct j1939_sk_buff_cb *do_skcb;
+	unsigned int offset_start;
+	unsigned long flags;
+
+	if (skb_queue_len(&session->skb_queue) < 2)
+		return;
+
+	offset_start = session->pkt.tx_acked * 7;
+
+	spin_lock_irqsave(&session->skb_queue.lock, flags);
+	do_skb = skb_peek(&session->skb_queue);
+	do_skcb = j1939_skb_to_cb(do_skb);
+
+	if ((do_skcb->offset + do_skb->len) < offset_start) {
+		__skb_unlink(do_skb, &session->skb_queue);
+		kfree_skb(do_skb);
+	}
+	spin_unlock_irqrestore(&session->skb_queue.lock, flags);
+}
+
+void j1939_session_skb_queue(struct j1939_session *session,
+			     struct sk_buff *skb)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct j1939_priv *priv = session->priv;
+
+	j1939_ac_fixup(priv, skb);
+
+	if (j1939_address_is_unicast(skcb->addr.da) &&
+	    priv->ents[skcb->addr.da].nusers)
+		skcb->flags |= J1939_ECU_LOCAL_DST;
+
+	skcb->flags |= J1939_ECU_LOCAL_SRC;
+
+	skb_queue_tail(&session->skb_queue, skb);
+}
+
+static struct sk_buff *j1939_session_skb_find(struct j1939_session *session)
+{
+	struct j1939_priv *priv = session->priv;
+	struct sk_buff *skb = NULL;
+	struct sk_buff *do_skb;
+	struct j1939_sk_buff_cb *do_skcb;
+	unsigned int offset_start;
+	unsigned long flags;
+
+	offset_start = session->pkt.dpo * 7;
+
+	spin_lock_irqsave(&session->skb_queue.lock, flags);
+	skb_queue_walk(&session->skb_queue, do_skb) {
+		do_skcb = j1939_skb_to_cb(do_skb);
+
+		if (offset_start >= do_skcb->offset &&
+		    offset_start < (do_skcb->offset + do_skb->len)) {
+			skb = do_skb;
+		}
+	}
+	spin_unlock_irqrestore(&session->skb_queue.lock, flags);
+
+	if (!skb)
+		netdev_dbg(priv->ndev, "%s: 0x%p: no skb found for start: %i, queue size: %i\n",
+			   __func__, session, offset_start,
+			   skb_queue_len(&session->skb_queue));
+
+	return skb;
+}
+
+/* see if we are receiver
+ * returns 0 for broadcasts, although we will receive them
+ */
+static inline int j1939_tp_im_receiver(const struct j1939_sk_buff_cb *skcb)
+{
+	return skcb->flags & J1939_ECU_LOCAL_DST;
+}
+
+/* see if we are sender */
+static inline int j1939_tp_im_transmitter(const struct j1939_sk_buff_cb *skcb)
+{
+	return skcb->flags & J1939_ECU_LOCAL_SRC;
+}
+
+/* see if we are involved as either receiver or transmitter */
+static int j1939_tp_im_involved(const struct j1939_sk_buff_cb *skcb, bool swap)
+{
+	if (swap)
+		return j1939_tp_im_receiver(skcb);
+	else
+		return j1939_tp_im_transmitter(skcb);
+}
+
+static int j1939_tp_im_involved_anydir(struct j1939_sk_buff_cb *skcb)
+{
+	return skcb->flags & (J1939_ECU_LOCAL_SRC | J1939_ECU_LOCAL_DST);
+}
+
+/* extract pgn from flow-ctl message */
+static inline pgn_t j1939_xtp_ctl_to_pgn(const u8 *dat)
+{
+	pgn_t pgn;
+
+	pgn = (dat[7] << 16) | (dat[6] << 8) | (dat[5] << 0);
+	if (j1939_pgn_is_pdu1(pgn))
+		pgn &= 0xffff00;
+	return pgn;
+}
+
+static inline unsigned int j1939_tp_ctl_to_size(const u8 *dat)
+{
+	return (dat[2] << 8) + (dat[1] << 0);
+}
+
+static inline unsigned int j1939_etp_ctl_to_packet(const u8 *dat)
+{
+	return (dat[4] << 16) | (dat[3] << 8) | (dat[2] << 0);
+}
+
+static inline unsigned int j1939_etp_ctl_to_size(const u8 *dat)
+{
+	return (dat[4] << 24) | (dat[3] << 16) |
+		(dat[2] << 8) | (dat[1] << 0);
+}
+
+/* find existing session:
+ * reverse: swap cb's src & dst
+ * there is no problem with matching broadcasts, since
+ * broadcasts (no dst, no da) would never call this
+ * with reverse == true
+ */
+static bool j1939_session_match(struct j1939_addr *se_addr,
+				struct j1939_addr *sk_addr, bool reverse)
+{
+	if (se_addr->type != sk_addr->type)
+		return false;
+
+	if (reverse) {
+		if (se_addr->src_name) {
+			if (se_addr->src_name != sk_addr->dst_name)
+				return false;
+		} else if (se_addr->sa != sk_addr->da) {
+			return false;
+		}
+
+		if (se_addr->dst_name) {
+			if (se_addr->dst_name != sk_addr->src_name)
+				return false;
+		} else if (se_addr->da != sk_addr->sa) {
+			return false;
+		}
+	} else {
+		if (se_addr->src_name) {
+			if (se_addr->src_name != sk_addr->src_name)
+				return false;
+		} else if (se_addr->sa != sk_addr->sa) {
+			return false;
+		}
+
+		if (se_addr->dst_name) {
+			if (se_addr->dst_name != sk_addr->dst_name)
+				return false;
+		} else if (se_addr->da != sk_addr->da) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static struct
+j1939_session *j1939_session_get_by_addr_locked(struct j1939_priv *priv,
+						struct list_head *root,
+						struct j1939_addr *addr,
+						bool reverse, bool transmitter)
+{
+	struct j1939_session *session;
+
+	lockdep_assert_held(&priv->active_session_list_lock);
+
+	list_for_each_entry(session, root, active_session_list_entry) {
+		j1939_session_get(session);
+		if (j1939_session_match(&session->skcb.addr, addr, reverse) &&
+		    session->transmission == transmitter)
+			return session;
+		j1939_session_put(session);
+	}
+
+	return NULL;
+}
+
+static struct
+j1939_session *j1939_session_get_simple(struct j1939_priv *priv,
+					struct sk_buff *skb)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct j1939_session *session;
+
+	lockdep_assert_held(&priv->active_session_list_lock);
+
+	list_for_each_entry(session, &priv->active_session_list,
+			    active_session_list_entry) {
+		j1939_session_get(session);
+		if (session->skcb.addr.type == J1939_SIMPLE &&
+		    session->tskey == skcb->tskey && session->sk == skb->sk)
+			return session;
+		j1939_session_put(session);
+	}
+
+	return NULL;
+}
+
+static struct
+j1939_session *j1939_session_get_by_addr(struct j1939_priv *priv,
+					 struct j1939_addr *addr,
+					 bool reverse, bool transmitter)
+{
+	struct j1939_session *session;
+
+	j1939_session_list_lock(priv);
+	session = j1939_session_get_by_addr_locked(priv,
+						   &priv->active_session_list,
+						   addr, reverse, transmitter);
+	j1939_session_list_unlock(priv);
+
+	return session;
+}
+
+static void j1939_skbcb_swap(struct j1939_sk_buff_cb *skcb)
+{
+	u8 tmp = 0;
+
+	swap(skcb->addr.dst_name, skcb->addr.src_name);
+	swap(skcb->addr.da, skcb->addr.sa);
+
+	/* swap SRC and DST flags, leave other untouched */
+	if (skcb->flags & J1939_ECU_LOCAL_SRC)
+		tmp |= J1939_ECU_LOCAL_DST;
+	if (skcb->flags & J1939_ECU_LOCAL_DST)
+		tmp |= J1939_ECU_LOCAL_SRC;
+	skcb->flags &= ~(J1939_ECU_LOCAL_SRC | J1939_ECU_LOCAL_DST);
+	skcb->flags |= tmp;
+}
+
+static struct
+sk_buff *j1939_tp_tx_dat_new(struct j1939_priv *priv,
+			     const struct j1939_sk_buff_cb *re_skcb,
+			     bool ctl,
+			     bool swap_src_dst)
+{
+	struct sk_buff *skb;
+	struct j1939_sk_buff_cb *skcb;
+
+	skb = alloc_skb(sizeof(struct can_frame) + sizeof(struct can_skb_priv),
+			GFP_ATOMIC);
+	if (unlikely(!skb))
+		return ERR_PTR(-ENOMEM);
+
+	skb->dev = priv->ndev;
+	can_skb_reserve(skb);
+	can_skb_prv(skb)->ifindex = priv->ndev->ifindex;
+	/* reserve CAN header */
+	skb_reserve(skb, offsetof(struct can_frame, data));
+
+	memcpy(skb->cb, re_skcb, sizeof(skb->cb));
+	skcb = j1939_skb_to_cb(skb);
+	if (swap_src_dst)
+		j1939_skbcb_swap(skcb);
+
+	if (ctl) {
+		if (skcb->addr.type == J1939_ETP)
+			skcb->addr.pgn = J1939_ETP_PGN_CTL;
+		else
+			skcb->addr.pgn = J1939_TP_PGN_CTL;
+	} else {
+		if (skcb->addr.type == J1939_ETP)
+			skcb->addr.pgn = J1939_ETP_PGN_DAT;
+		else
+			skcb->addr.pgn = J1939_TP_PGN_DAT;
+	}
+
+	return skb;
+}
+
+/* TP transmit packet functions */
+static int j1939_tp_tx_dat(struct j1939_session *session,
+			   const u8 *dat, int len)
+{
+	struct j1939_priv *priv = session->priv;
+	struct sk_buff *skb;
+
+	skb = j1939_tp_tx_dat_new(priv, &session->skcb,
+				  false, false);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	skb_put_data(skb, dat, len);
+	if (j1939_tp_padding && len < 8)
+		memset(skb_put(skb, 8 - len), 0xff, 8 - len);
+
+	return j1939_send_one(priv, skb);
+}
+
+static int j1939_xtp_do_tx_ctl(struct j1939_priv *priv,
+			       const struct j1939_sk_buff_cb *re_skcb,
+			       bool swap_src_dst, pgn_t pgn, const u8 *dat)
+{
+	struct sk_buff *skb;
+	u8 *skdat;
+
+	if (!j1939_tp_im_involved(re_skcb, swap_src_dst))
+		return 0;
+
+	skb = j1939_tp_tx_dat_new(priv, re_skcb, true, swap_src_dst);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	skdat = skb_put(skb, 8);
+	memcpy(skdat, dat, 5);
+	skdat[5] = (pgn >> 0);
+	skdat[6] = (pgn >> 8);
+	skdat[7] = (pgn >> 16);
+
+	return j1939_send_one(priv, skb);
+}
+
+static inline int j1939_tp_tx_ctl(struct j1939_session *session,
+				  bool swap_src_dst, const u8 *dat)
+{
+	struct j1939_priv *priv = session->priv;
+
+	return j1939_xtp_do_tx_ctl(priv, &session->skcb,
+				   swap_src_dst,
+				   session->skcb.addr.pgn, dat);
+}
+
+static int j1939_xtp_tx_abort(struct j1939_priv *priv,
+			      const struct j1939_sk_buff_cb *re_skcb,
+			      bool swap_src_dst,
+			      enum j1939_xtp_abort err,
+			      pgn_t pgn)
+{
+	u8 dat[5];
+
+	if (!j1939_tp_im_involved(re_skcb, swap_src_dst))
+		return 0;
+
+	memset(dat, 0xff, sizeof(dat));
+	dat[0] = J1939_TP_CMD_ABORT;
+	dat[1] = err;
+	return j1939_xtp_do_tx_ctl(priv, re_skcb, swap_src_dst, pgn, dat);
+}
+
+void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec)
+{
+	j1939_session_get(session);
+	hrtimer_start(&session->txtimer, ms_to_ktime(msec),
+		      HRTIMER_MODE_REL_SOFT);
+}
+
+static inline void j1939_tp_set_rxtimeout(struct j1939_session *session,
+					  int msec)
+{
+	j1939_session_rxtimer_cancel(session);
+	j1939_session_get(session);
+	hrtimer_start(&session->rxtimer, ms_to_ktime(msec),
+		      HRTIMER_MODE_REL_SOFT);
+}
+
+static int j1939_session_tx_rts(struct j1939_session *session)
+{
+	u8 dat[8];
+	int ret;
+
+	memset(dat, 0xff, sizeof(dat));
+
+	dat[1] = (session->total_message_size >> 0);
+	dat[2] = (session->total_message_size >> 8);
+	dat[3] = session->pkt.total;
+
+	if (session->skcb.addr.type == J1939_ETP) {
+		dat[0] = J1939_ETP_CMD_RTS;
+		dat[1] = (session->total_message_size >> 0);
+		dat[2] = (session->total_message_size >> 8);
+		dat[3] = (session->total_message_size >> 16);
+		dat[4] = (session->total_message_size >> 24);
+	} else if (j1939_cb_is_broadcast(&session->skcb)) {
+		dat[0] = J1939_TP_CMD_BAM;
+		/* fake cts for broadcast */
+		session->pkt.tx = 0;
+	} else {
+		dat[0] = J1939_TP_CMD_RTS;
+		dat[4] = dat[3];
+	}
+
+	if (dat[0] == session->last_txcmd)
+		/* done already */
+		return 0;
+
+	ret = j1939_tp_tx_ctl(session, false, dat);
+	if (ret < 0)
+		return ret;
+
+	session->last_txcmd = dat[0];
+	if (dat[0] == J1939_TP_CMD_BAM)
+		j1939_tp_schedule_txtimer(session, 50);
+
+	j1939_tp_set_rxtimeout(session, 1250);
+
+	netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+	return 0;
+}
+
+static int j1939_session_tx_dpo(struct j1939_session *session)
+{
+	unsigned int pkt;
+	u8 dat[8];
+	int ret;
+
+	memset(dat, 0xff, sizeof(dat));
+
+	dat[0] = J1939_ETP_CMD_DPO;
+	session->pkt.dpo = session->pkt.tx_acked;
+	pkt = session->pkt.dpo;
+	dat[1] = session->pkt.last - session->pkt.tx_acked;
+	dat[2] = (pkt >> 0);
+	dat[3] = (pkt >> 8);
+	dat[4] = (pkt >> 16);
+
+	ret = j1939_tp_tx_ctl(session, false, dat);
+	if (ret < 0)
+		return ret;
+
+	session->last_txcmd = dat[0];
+	j1939_tp_set_rxtimeout(session, 1250);
+	session->pkt.tx = session->pkt.tx_acked;
+
+	netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+	return 0;
+}
+
+static int j1939_session_tx_dat(struct j1939_session *session)
+{
+	struct j1939_priv *priv = session->priv;
+	struct j1939_sk_buff_cb *skcb;
+	int offset, pkt_done, pkt_end;
+	unsigned int len, pdelay;
+	struct sk_buff *se_skb;
+	const u8 *tpdat;
+	int ret = 0;
+	u8 dat[8];
+
+	se_skb = j1939_session_skb_find(session);
+	if (!se_skb)
+		return -ENOBUFS;
+
+	skcb = j1939_skb_to_cb(se_skb);
+	tpdat = se_skb->data;
+	ret = 0;
+	pkt_done = 0;
+	if (session->skcb.addr.type != J1939_ETP &&
+	    j1939_cb_is_broadcast(&session->skcb))
+		pkt_end = session->pkt.total;
+	else
+		pkt_end = session->pkt.last;
+
+	while (session->pkt.tx < pkt_end) {
+		dat[0] = session->pkt.tx - session->pkt.dpo + 1;
+		offset = (session->pkt.tx * 7) - skcb->offset;
+		len =  se_skb->len - offset;
+		if (len > 7)
+			len = 7;
+
+		memcpy(&dat[1], &tpdat[offset], len);
+		ret = j1939_tp_tx_dat(session, dat, len + 1);
+		if (ret < 0) {
+			/* ENOBUS == CAN interface TX queue is full */
+			if (ret != -ENOBUFS)
+				netdev_alert(priv->ndev,
+					     "%s: 0x%p: queue data error: %i\n",
+					     __func__, session, ret);
+			break;
+		}
+
+		session->last_txcmd = 0xff;
+		pkt_done++;
+		session->pkt.tx++;
+		pdelay = j1939_cb_is_broadcast(&session->skcb) ? 50 :
+			j1939_tp_packet_delay;
+
+		if (session->pkt.tx < session->pkt.total && pdelay) {
+			j1939_tp_schedule_txtimer(session, pdelay);
+			break;
+		}
+	}
+
+	if (pkt_done)
+		j1939_tp_set_rxtimeout(session, 250);
+
+	return ret;
+}
+
+static int j1939_xtp_txnext_transmiter(struct j1939_session *session)
+{
+	struct j1939_priv *priv = session->priv;
+	int ret = 0;
+
+	if (!j1939_tp_im_transmitter(&session->skcb)) {
+		netdev_alert(priv->ndev, "%s: 0x%p: called by not transmitter!\n",
+			     __func__, session);
+		return -EINVAL;
+	}
+
+	switch (session->last_cmd) {
+	case 0:
+		ret = j1939_session_tx_rts(session);
+		break;
+
+	case J1939_ETP_CMD_CTS:
+		if (session->last_txcmd != J1939_ETP_CMD_DPO) {
+			ret = j1939_session_tx_dpo(session);
+			if (ret)
+				return ret;
+		}
+
+		/* fall through */
+	case J1939_TP_CMD_CTS:
+	case 0xff: /* did some data */
+	case J1939_ETP_CMD_DPO:
+	case J1939_TP_CMD_BAM:
+		ret = j1939_session_tx_dat(session);
+
+		break;
+	default:
+		netdev_alert(priv->ndev, "%s: 0x%p: unexpected last_cmd: %x\n",
+			     __func__, session, session->last_cmd);
+	}
+
+	return ret;
+}
+
+static int j1939_session_tx_cts(struct j1939_session *session)
+{
+	struct j1939_priv *priv = session->priv;
+	unsigned int pkt, len;
+	int ret;
+	u8 dat[8];
+
+	if (!j1939_sk_recv_match(priv, &session->skcb))
+		return -ENOENT;
+
+	len = session->pkt.total - session->pkt.rx;
+	len = min3(len, session->pkt.block, j1939_tp_block ?: 255);
+	memset(dat, 0xff, sizeof(dat));
+
+	if (session->skcb.addr.type == J1939_ETP) {
+		pkt = session->pkt.rx + 1;
+		dat[0] = J1939_ETP_CMD_CTS;
+		dat[1] = len;
+		dat[2] = (pkt >> 0);
+		dat[3] = (pkt >> 8);
+		dat[4] = (pkt >> 16);
+	} else {
+		dat[0] = J1939_TP_CMD_CTS;
+		dat[1] = len;
+		dat[2] = session->pkt.rx + 1;
+	}
+
+	if (dat[0] == session->last_txcmd)
+		/* done already */
+		return 0;
+
+	ret = j1939_tp_tx_ctl(session, true, dat);
+	if (ret < 0)
+		return ret;
+
+	if (len)
+		/* only mark cts done when len is set */
+		session->last_txcmd = dat[0];
+	j1939_tp_set_rxtimeout(session, 1250);
+
+	netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+	return 0;
+}
+
+static int j1939_session_tx_eoma(struct j1939_session *session)
+{
+	struct j1939_priv *priv = session->priv;
+	u8 dat[8];
+	int ret;
+
+	if (!j1939_sk_recv_match(priv, &session->skcb))
+		return -ENOENT;
+
+	memset(dat, 0xff, sizeof(dat));
+
+	if (session->skcb.addr.type == J1939_ETP) {
+		dat[0] = J1939_ETP_CMD_EOMA;
+		dat[1] = session->total_message_size >> 0;
+		dat[2] = session->total_message_size >> 8;
+		dat[3] = session->total_message_size >> 16;
+		dat[4] = session->total_message_size >> 24;
+	} else {
+		dat[0] = J1939_TP_CMD_EOMA;
+		dat[1] = session->total_message_size;
+		dat[2] = session->total_message_size >> 8;
+		dat[3] = session->pkt.total;
+	}
+
+	if (dat[0] == session->last_txcmd)
+		/* done already */
+		return 0;
+
+	ret = j1939_tp_tx_ctl(session, true, dat);
+	if (ret < 0)
+		return ret;
+
+	session->last_txcmd = dat[0];
+
+	/* wait for the EOMA packet to come in */
+	j1939_tp_set_rxtimeout(session, 1250);
+
+	netdev_dbg(session->priv->ndev, "%p: 0x%p\n", __func__, session);
+
+	return 0;
+}
+
+static int j1939_xtp_txnext_receiver(struct j1939_session *session)
+{
+	struct j1939_priv *priv = session->priv;
+	int ret = 0;
+
+	if (!j1939_tp_im_receiver(&session->skcb)) {
+		netdev_alert(priv->ndev, "%s: 0x%p: called by not receiver!\n",
+			     __func__, session);
+		return -EINVAL;
+	}
+
+	switch (session->last_cmd) {
+	case J1939_TP_CMD_RTS:
+	case J1939_ETP_CMD_RTS:
+		ret = j1939_session_tx_cts(session);
+		break;
+
+	case J1939_ETP_CMD_CTS:
+	case J1939_TP_CMD_CTS:
+	case 0xff: /* did some data */
+	case J1939_ETP_CMD_DPO:
+		if ((session->skcb.addr.type == J1939_TP &&
+		     j1939_cb_is_broadcast(&session->skcb)))
+			break;
+
+		if (session->pkt.rx >= session->pkt.total) {
+			ret = j1939_session_tx_eoma(session);
+		} else if (session->pkt.rx >= session->pkt.last) {
+			session->last_txcmd = 0;
+			ret = j1939_session_tx_cts(session);
+		}
+		break;
+	default:
+		netdev_alert(priv->ndev, "%s: 0x%p: unexpected last_cmd: %x\n",
+			     __func__, session, session->last_cmd);
+	}
+
+	return ret;
+}
+
+static int j1939_simple_txnext(struct j1939_session *session)
+{
+	struct j1939_priv *priv = session->priv;
+	struct sk_buff *se_skb = j1939_session_skb_find(session);
+	struct sk_buff *skb;
+	int ret;
+
+	if (!se_skb)
+		return 0;
+
+	skb = skb_clone(se_skb, GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	can_skb_set_owner(skb, se_skb->sk);
+
+	j1939_tp_set_rxtimeout(session, J1939_SIMPLE_ECHO_TIMEOUT_MS);
+
+	ret = j1939_send_one(priv, skb);
+	if (ret)
+		return ret;
+
+	j1939_sk_errqueue(session, J1939_ERRQUEUE_SCHED);
+	j1939_sk_queue_activate_next(session);
+
+	return 0;
+}
+
+static bool j1939_session_deactivate_locked(struct j1939_session *session)
+{
+	bool active = false;
+
+	lockdep_assert_held(&session->priv->active_session_list_lock);
+
+	if (session->state >= J1939_SESSION_ACTIVE &&
+	    session->state < J1939_SESSION_ACTIVE_MAX) {
+		active = true;
+
+		list_del_init(&session->active_session_list_entry);
+		session->state = J1939_SESSION_DONE;
+		j1939_session_put(session);
+	}
+
+	return active;
+}
+
+static bool j1939_session_deactivate(struct j1939_session *session)
+{
+	bool active;
+
+	j1939_session_list_lock(session->priv);
+	active = j1939_session_deactivate_locked(session);
+	j1939_session_list_unlock(session->priv);
+
+	return active;
+}
+
+static void
+j1939_session_deactivate_activate_next(struct j1939_session *session)
+{
+	if (j1939_session_deactivate(session))
+		j1939_sk_queue_activate_next(session);
+}
+
+static void __j1939_session_cancel(struct j1939_session *session,
+				 enum j1939_xtp_abort err)
+{
+	struct j1939_priv *priv = session->priv;
+
+	WARN_ON_ONCE(!err);
+	lockdep_assert_held(&session->priv->active_session_list_lock);
+
+	session->err = j1939_xtp_abort_to_errno(priv, err);
+	/* do not send aborts on incoming broadcasts */
+	if (!j1939_cb_is_broadcast(&session->skcb)) {
+		session->state = J1939_SESSION_WAITING_ABORT;
+		j1939_xtp_tx_abort(priv, &session->skcb,
+				   !session->transmission,
+				   err, session->skcb.addr.pgn);
+	}
+
+	if (session->sk)
+		j1939_sk_send_loop_abort(session->sk, session->err);
+}
+
+static void j1939_session_cancel(struct j1939_session *session,
+				 enum j1939_xtp_abort err)
+{
+	j1939_session_list_lock(session->priv);
+
+	if (session->state >= J1939_SESSION_ACTIVE &&
+	    session->state < J1939_SESSION_WAITING_ABORT) {
+		j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
+		__j1939_session_cancel(session, err);
+	}
+
+	j1939_session_list_unlock(session->priv);
+}
+
+static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer)
+{
+	struct j1939_session *session =
+		container_of(hrtimer, struct j1939_session, txtimer);
+	struct j1939_priv *priv = session->priv;
+	int ret = 0;
+
+	if (session->skcb.addr.type == J1939_SIMPLE) {
+		ret = j1939_simple_txnext(session);
+	} else {
+		if (session->transmission)
+			ret = j1939_xtp_txnext_transmiter(session);
+		else
+			ret = j1939_xtp_txnext_receiver(session);
+	}
+
+	switch (ret) {
+	case -ENOBUFS:
+		/* Retry limit is currently arbitrary chosen */
+		if (session->tx_retry < J1939_XTP_TX_RETRY_LIMIT) {
+			session->tx_retry++;
+			j1939_tp_schedule_txtimer(session,
+						  10 + prandom_u32_max(16));
+		} else {
+			netdev_alert(priv->ndev, "%s: 0x%p: tx retry count reached\n",
+				     __func__, session);
+			session->err = -ENETUNREACH;
+			j1939_session_rxtimer_cancel(session);
+			j1939_session_deactivate_activate_next(session);
+		}
+		break;
+	case -ENETDOWN:
+		/* In this case we should get a netdev_event(), all active
+		 * sessions will be cleared by
+		 * j1939_cancel_all_active_sessions(). So handle this as an
+		 * error, but let j1939_cancel_all_active_sessions() do the
+		 * cleanup including propagation of the error to user space.
+		 */
+		break;
+	case 0:
+		session->tx_retry = 0;
+		break;
+	default:
+		netdev_alert(priv->ndev, "%s: 0x%p: tx aborted with unknown reason: %i\n",
+			     __func__, session, ret);
+		if (session->skcb.addr.type != J1939_SIMPLE) {
+			j1939_session_cancel(session, J1939_XTP_ABORT_OTHER);
+		} else {
+			session->err = ret;
+			j1939_session_rxtimer_cancel(session);
+			j1939_session_deactivate_activate_next(session);
+		}
+	}
+
+	j1939_session_put(session);
+
+	return HRTIMER_NORESTART;
+}
+
+static void j1939_session_completed(struct j1939_session *session)
+{
+	struct sk_buff *skb;
+
+	if (!session->transmission) {
+		skb = j1939_session_skb_find(session);
+		/* distribute among j1939 receivers */
+		j1939_sk_recv(session->priv, skb);
+	}
+
+	j1939_session_deactivate_activate_next(session);
+}
+
+static enum hrtimer_restart j1939_tp_rxtimer(struct hrtimer *hrtimer)
+{
+	struct j1939_session *session = container_of(hrtimer,
+						     struct j1939_session,
+						     rxtimer);
+	struct j1939_priv *priv = session->priv;
+
+	if (session->state == J1939_SESSION_WAITING_ABORT) {
+		netdev_alert(priv->ndev, "%s: 0x%p: abort rx timeout. Force session deactivation\n",
+			     __func__, session);
+
+		j1939_session_deactivate_activate_next(session);
+
+	} else if (session->skcb.addr.type == J1939_SIMPLE) {
+		netdev_alert(priv->ndev, "%s: 0x%p: Timeout. Failed to send simple message.\n",
+			     __func__, session);
+
+		/* The message is probably stuck in the CAN controller and can
+		 * be send as soon as CAN bus is in working state again.
+		 */
+		session->err = -ETIME;
+		j1939_session_deactivate(session);
+	} else {
+		netdev_alert(priv->ndev, "%s: 0x%p: rx timeout, send abort\n",
+			     __func__, session);
+
+		j1939_session_list_lock(session->priv);
+		if (session->state >= J1939_SESSION_ACTIVE &&
+		    session->state < J1939_SESSION_ACTIVE_MAX) {
+			j1939_session_get(session);
+			hrtimer_start(&session->rxtimer,
+				      ms_to_ktime(J1939_XTP_ABORT_TIMEOUT_MS),
+				      HRTIMER_MODE_REL_SOFT);
+			__j1939_session_cancel(session, J1939_XTP_ABORT_TIMEOUT);
+		}
+		j1939_session_list_unlock(session->priv);
+	}
+
+	j1939_session_put(session);
+
+	return HRTIMER_NORESTART;
+}
+
+static bool j1939_xtp_rx_cmd_bad_pgn(struct j1939_session *session,
+				     const struct sk_buff *skb)
+{
+	const struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	pgn_t pgn = j1939_xtp_ctl_to_pgn(skb->data);
+	struct j1939_priv *priv = session->priv;
+	enum j1939_xtp_abort abort = J1939_XTP_NO_ABORT;
+	u8 cmd = skb->data[0];
+
+	if (session->skcb.addr.pgn == pgn)
+		return false;
+
+	switch (cmd) {
+	case J1939_TP_CMD_BAM:
+		abort = J1939_XTP_NO_ABORT;
+		break;
+
+	case J1939_ETP_CMD_RTS:
+	case J1939_TP_CMD_RTS: /* fall through */
+		abort = J1939_XTP_ABORT_BUSY;
+		break;
+
+	case J1939_ETP_CMD_CTS:
+	case J1939_TP_CMD_CTS: /* fall through */
+		abort = J1939_XTP_ABORT_ECTS_UNXPECTED_PGN;
+		break;
+
+	case J1939_ETP_CMD_DPO:
+		abort = J1939_XTP_ABORT_BAD_EDPO_PGN;
+		break;
+
+	case J1939_ETP_CMD_EOMA:
+	case J1939_TP_CMD_EOMA: /* fall through */
+		abort = J1939_XTP_ABORT_OTHER;
+		break;
+
+	case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */
+		abort = J1939_XTP_NO_ABORT;
+		break;
+
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+
+	netdev_warn(priv->ndev, "%s: 0x%p: CMD 0x%02x with PGN 0x%05x for running session with different PGN 0x%05x.\n",
+		    __func__, session, cmd, pgn, session->skcb.addr.pgn);
+	if (abort != J1939_XTP_NO_ABORT)
+		j1939_xtp_tx_abort(priv, skcb, true, abort, pgn);
+
+	return true;
+}
+
+static void j1939_xtp_rx_abort_one(struct j1939_priv *priv, struct sk_buff *skb,
+				   bool reverse, bool transmitter)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct j1939_session *session;
+	u8 abort = skb->data[1];
+
+	session = j1939_session_get_by_addr(priv, &skcb->addr, reverse,
+					    transmitter);
+	if (!session)
+		return;
+
+	if (j1939_xtp_rx_cmd_bad_pgn(session, skb))
+		goto abort_put;
+
+	netdev_info(priv->ndev, "%s: 0x%p: 0x%05x: (%u) %s\n", __func__,
+		    session, j1939_xtp_ctl_to_pgn(skb->data), abort,
+		    j1939_xtp_abort_to_str(abort));
+
+	j1939_session_timers_cancel(session);
+	session->err = j1939_xtp_abort_to_errno(priv, abort);
+	if (session->sk)
+		j1939_sk_send_loop_abort(session->sk, session->err);
+	j1939_session_deactivate_activate_next(session);
+
+abort_put:
+	j1939_session_put(session);
+}
+
+/* abort packets may come in 2 directions */
+static void
+j1939_xtp_rx_abort(struct j1939_priv *priv, struct sk_buff *skb,
+		   bool transmitter)
+{
+	j1939_xtp_rx_abort_one(priv, skb, false, transmitter);
+	j1939_xtp_rx_abort_one(priv, skb, true, transmitter);
+}
+
+static void
+j1939_xtp_rx_eoma_one(struct j1939_session *session, struct sk_buff *skb)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	const u8 *dat;
+	int len;
+
+	if (j1939_xtp_rx_cmd_bad_pgn(session, skb))
+		return;
+
+	dat = skb->data;
+
+	if (skcb->addr.type == J1939_ETP)
+		len = j1939_etp_ctl_to_size(dat);
+	else
+		len = j1939_tp_ctl_to_size(dat);
+
+	if (session->total_message_size != len) {
+		netdev_warn_once(session->priv->ndev,
+				 "%s: 0x%p: Incorrect size. Expected: %i; got: %i.\n",
+				 __func__, session, session->total_message_size,
+				 len);
+	}
+
+	netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+	session->pkt.tx_acked = session->pkt.total;
+	j1939_session_timers_cancel(session);
+	/* transmitted without problems */
+	j1939_session_completed(session);
+}
+
+static void
+j1939_xtp_rx_eoma(struct j1939_priv *priv, struct sk_buff *skb,
+		  bool transmitter)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct j1939_session *session;
+
+	session = j1939_session_get_by_addr(priv, &skcb->addr, true,
+					    transmitter);
+	if (!session)
+		return;
+
+	j1939_xtp_rx_eoma_one(session, skb);
+	j1939_session_put(session);
+}
+
+static void
+j1939_xtp_rx_cts_one(struct j1939_session *session, struct sk_buff *skb)
+{
+	enum j1939_xtp_abort err = J1939_XTP_ABORT_FAULT;
+	unsigned int pkt;
+	const u8 *dat;
+
+	dat = skb->data;
+
+	if (j1939_xtp_rx_cmd_bad_pgn(session, skb))
+		return;
+
+	netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+	if (session->last_cmd == dat[0]) {
+		err = J1939_XTP_ABORT_DUP_SEQ;
+		goto out_session_cancel;
+	}
+
+	if (session->skcb.addr.type == J1939_ETP)
+		pkt = j1939_etp_ctl_to_packet(dat);
+	else
+		pkt = dat[2];
+
+	if (!pkt)
+		goto out_session_cancel;
+	else if (dat[1] > session->pkt.block /* 0xff for etp */)
+		goto out_session_cancel;
+
+	/* set packet counters only when not CTS(0) */
+	session->pkt.tx_acked = pkt - 1;
+	j1939_session_skb_drop_old(session);
+	session->pkt.last = session->pkt.tx_acked + dat[1];
+	if (session->pkt.last > session->pkt.total)
+		/* safety measure */
+		session->pkt.last = session->pkt.total;
+	/* TODO: do not set tx here, do it in txtimer */
+	session->pkt.tx = session->pkt.tx_acked;
+
+	session->last_cmd = dat[0];
+	if (dat[1]) {
+		j1939_tp_set_rxtimeout(session, 1250);
+		if (session->transmission) {
+			if (session->pkt.tx_acked)
+				j1939_sk_errqueue(session,
+						  J1939_ERRQUEUE_SCHED);
+			j1939_session_txtimer_cancel(session);
+			j1939_tp_schedule_txtimer(session, 0);
+		}
+	} else {
+		/* CTS(0) */
+		j1939_tp_set_rxtimeout(session, 550);
+	}
+	return;
+
+ out_session_cancel:
+	j1939_session_timers_cancel(session);
+	j1939_session_cancel(session, err);
+}
+
+static void
+j1939_xtp_rx_cts(struct j1939_priv *priv, struct sk_buff *skb, bool transmitter)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct j1939_session *session;
+
+	session = j1939_session_get_by_addr(priv, &skcb->addr, true,
+					    transmitter);
+	if (!session)
+		return;
+	j1939_xtp_rx_cts_one(session, skb);
+	j1939_session_put(session);
+}
+
+static struct j1939_session *j1939_session_new(struct j1939_priv *priv,
+					       struct sk_buff *skb, size_t size)
+{
+	struct j1939_session *session;
+	struct j1939_sk_buff_cb *skcb;
+
+	session = kzalloc(sizeof(*session), gfp_any());
+	if (!session)
+		return NULL;
+
+	INIT_LIST_HEAD(&session->active_session_list_entry);
+	INIT_LIST_HEAD(&session->sk_session_queue_entry);
+	kref_init(&session->kref);
+
+	j1939_priv_get(priv);
+	session->priv = priv;
+	session->total_message_size = size;
+	session->state = J1939_SESSION_NEW;
+
+	skb_queue_head_init(&session->skb_queue);
+	skb_queue_tail(&session->skb_queue, skb);
+
+	skcb = j1939_skb_to_cb(skb);
+	memcpy(&session->skcb, skcb, sizeof(session->skcb));
+
+	hrtimer_init(&session->txtimer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_REL_SOFT);
+	session->txtimer.function = j1939_tp_txtimer;
+	hrtimer_init(&session->rxtimer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_REL_SOFT);
+	session->rxtimer.function = j1939_tp_rxtimer;
+
+	netdev_dbg(priv->ndev, "%s: 0x%p: sa: %02x, da: %02x\n",
+		   __func__, session, skcb->addr.sa, skcb->addr.da);
+
+	return session;
+}
+
+static struct
+j1939_session *j1939_session_fresh_new(struct j1939_priv *priv,
+				       int size,
+				       const struct j1939_sk_buff_cb *rel_skcb)
+{
+	struct sk_buff *skb;
+	struct j1939_sk_buff_cb *skcb;
+	struct j1939_session *session;
+
+	skb = alloc_skb(size + sizeof(struct can_skb_priv), GFP_ATOMIC);
+	if (unlikely(!skb))
+		return NULL;
+
+	skb->dev = priv->ndev;
+	can_skb_reserve(skb);
+	can_skb_prv(skb)->ifindex = priv->ndev->ifindex;
+	skcb = j1939_skb_to_cb(skb);
+	memcpy(skcb, rel_skcb, sizeof(*skcb));
+
+	session = j1939_session_new(priv, skb, size);
+	if (!session) {
+		kfree_skb(skb);
+		return NULL;
+	}
+
+	/* alloc data area */
+	skb_put(skb, size);
+	/* skb is recounted in j1939_session_new() */
+	return session;
+}
+
+int j1939_session_activate(struct j1939_session *session)
+{
+	struct j1939_priv *priv = session->priv;
+	struct j1939_session *active = NULL;
+	int ret = 0;
+
+	j1939_session_list_lock(priv);
+	if (session->skcb.addr.type != J1939_SIMPLE)
+		active = j1939_session_get_by_addr_locked(priv,
+							  &priv->active_session_list,
+							  &session->skcb.addr, false,
+							  session->transmission);
+	if (active) {
+		j1939_session_put(active);
+		ret = -EAGAIN;
+	} else {
+		WARN_ON_ONCE(session->state != J1939_SESSION_NEW);
+		list_add_tail(&session->active_session_list_entry,
+			      &priv->active_session_list);
+		j1939_session_get(session);
+		session->state = J1939_SESSION_ACTIVE;
+
+		netdev_dbg(session->priv->ndev, "%s: 0x%p\n",
+			   __func__, session);
+	}
+	j1939_session_list_unlock(priv);
+
+	return ret;
+}
+
+static struct
+j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv,
+					    struct sk_buff *skb)
+{
+	enum j1939_xtp_abort abort = J1939_XTP_NO_ABORT;
+	struct j1939_sk_buff_cb skcb = *j1939_skb_to_cb(skb);
+	struct j1939_session *session;
+	const u8 *dat;
+	pgn_t pgn;
+	int len;
+
+	netdev_dbg(priv->ndev, "%s\n", __func__);
+
+	dat = skb->data;
+	pgn = j1939_xtp_ctl_to_pgn(dat);
+	skcb.addr.pgn = pgn;
+
+	if (!j1939_sk_recv_match(priv, &skcb))
+		return NULL;
+
+	if (skcb.addr.type == J1939_ETP) {
+		len = j1939_etp_ctl_to_size(dat);
+		if (len > J1939_MAX_ETP_PACKET_SIZE)
+			abort = J1939_XTP_ABORT_FAULT;
+		else if (len > priv->tp_max_packet_size)
+			abort = J1939_XTP_ABORT_RESOURCE;
+		else if (len <= J1939_MAX_TP_PACKET_SIZE)
+			abort = J1939_XTP_ABORT_FAULT;
+	} else {
+		len = j1939_tp_ctl_to_size(dat);
+		if (len > J1939_MAX_TP_PACKET_SIZE)
+			abort = J1939_XTP_ABORT_FAULT;
+		else if (len > priv->tp_max_packet_size)
+			abort = J1939_XTP_ABORT_RESOURCE;
+	}
+
+	if (abort != J1939_XTP_NO_ABORT) {
+		j1939_xtp_tx_abort(priv, &skcb, true, abort, pgn);
+		return NULL;
+	}
+
+	session = j1939_session_fresh_new(priv, len, &skcb);
+	if (!session) {
+		j1939_xtp_tx_abort(priv, &skcb, true,
+				   J1939_XTP_ABORT_RESOURCE, pgn);
+		return NULL;
+	}
+
+	/* initialize the control buffer: plain copy */
+	session->pkt.total = (len + 6) / 7;
+	session->pkt.block = 0xff;
+	if (skcb.addr.type != J1939_ETP) {
+		if (dat[3] != session->pkt.total)
+			netdev_alert(priv->ndev, "%s: 0x%p: strange total, %u != %u\n",
+				     __func__, session, session->pkt.total,
+				     dat[3]);
+		session->pkt.total = dat[3];
+		session->pkt.block = min(dat[3], dat[4]);
+	}
+
+	session->pkt.rx = 0;
+	session->pkt.tx = 0;
+
+	WARN_ON_ONCE(j1939_session_activate(session));
+
+	return session;
+}
+
+static int j1939_xtp_rx_rts_session_active(struct j1939_session *session,
+					   struct sk_buff *skb)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct j1939_priv *priv = session->priv;
+
+	if (!session->transmission) {
+		if (j1939_xtp_rx_cmd_bad_pgn(session, skb))
+			return -EBUSY;
+
+		/* RTS on active session */
+		j1939_session_timers_cancel(session);
+		j1939_session_cancel(session, J1939_XTP_ABORT_BUSY);
+	}
+
+	if (session->last_cmd != 0) {
+		/* we received a second rts on the same connection */
+		netdev_alert(priv->ndev, "%s: 0x%p: connection exists (%02x %02x). last cmd: %x\n",
+			     __func__, session, skcb->addr.sa, skcb->addr.da,
+			     session->last_cmd);
+
+		j1939_session_timers_cancel(session);
+		j1939_session_cancel(session, J1939_XTP_ABORT_BUSY);
+
+		return -EBUSY;
+	}
+
+	if (session->skcb.addr.sa != skcb->addr.sa ||
+	    session->skcb.addr.da != skcb->addr.da)
+		netdev_warn(priv->ndev, "%s: 0x%p: session->skcb.addr.sa=0x%02x skcb->addr.sa=0x%02x session->skcb.addr.da=0x%02x skcb->addr.da=0x%02x\n",
+			    __func__, session,
+			    session->skcb.addr.sa, skcb->addr.sa,
+			    session->skcb.addr.da, skcb->addr.da);
+	/* make sure 'sa' & 'da' are correct !
+	 * They may be 'not filled in yet' for sending
+	 * skb's, since they did not pass the Address Claim ever.
+	 */
+	session->skcb.addr.sa = skcb->addr.sa;
+	session->skcb.addr.da = skcb->addr.da;
+
+	netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+	return 0;
+}
+
+static void j1939_xtp_rx_rts(struct j1939_priv *priv, struct sk_buff *skb,
+			     bool transmitter)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct j1939_session *session;
+	u8 cmd = skb->data[0];
+
+	session = j1939_session_get_by_addr(priv, &skcb->addr, false,
+					    transmitter);
+
+	if (!session) {
+		if (transmitter) {
+			/* If we're the transmitter and this function is called,
+			 * we received our own RTS. A session has already been
+			 * created.
+			 *
+			 * For some reasons however it might have been destroyed
+			 * already. So don't create a new one here (using
+			 * "j1939_xtp_rx_rts_session_new()") as this will be a
+			 * receiver session.
+			 *
+			 * The reasons the session is already destroyed might
+			 * be:
+			 * - user space closed socket was and the session was
+			 *   aborted
+			 * - session was aborted due to external abort message
+			 */
+			return;
+		}
+		session = j1939_xtp_rx_rts_session_new(priv, skb);
+		if (!session)
+			return;
+	} else {
+		if (j1939_xtp_rx_rts_session_active(session, skb)) {
+			j1939_session_put(session);
+			return;
+		}
+	}
+	session->last_cmd = cmd;
+
+	j1939_tp_set_rxtimeout(session, 1250);
+
+	if (cmd != J1939_TP_CMD_BAM && !session->transmission) {
+		j1939_session_txtimer_cancel(session);
+		j1939_tp_schedule_txtimer(session, 0);
+	}
+
+	j1939_session_put(session);
+}
+
+static void j1939_xtp_rx_dpo_one(struct j1939_session *session,
+				 struct sk_buff *skb)
+{
+	const u8 *dat = skb->data;
+
+	if (j1939_xtp_rx_cmd_bad_pgn(session, skb))
+		return;
+
+	netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+	/* transmitted without problems */
+	session->pkt.dpo = j1939_etp_ctl_to_packet(skb->data);
+	session->last_cmd = dat[0];
+	j1939_tp_set_rxtimeout(session, 750);
+}
+
+static void j1939_xtp_rx_dpo(struct j1939_priv *priv, struct sk_buff *skb,
+			     bool transmitter)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct j1939_session *session;
+
+	session = j1939_session_get_by_addr(priv, &skcb->addr, false,
+					    transmitter);
+	if (!session) {
+		netdev_info(priv->ndev,
+			    "%s: no connection found\n", __func__);
+		return;
+	}
+
+	j1939_xtp_rx_dpo_one(session, skb);
+	j1939_session_put(session);
+}
+
+static void j1939_xtp_rx_dat_one(struct j1939_session *session,
+				 struct sk_buff *skb)
+{
+	struct j1939_priv *priv = session->priv;
+	struct j1939_sk_buff_cb *skcb;
+	struct sk_buff *se_skb;
+	const u8 *dat;
+	u8 *tpdat;
+	int offset;
+	int nbytes;
+	bool final = false;
+	bool do_cts_eoma = false;
+	int packet;
+
+	skcb = j1939_skb_to_cb(skb);
+	dat = skb->data;
+	if (skb->len <= 1)
+		/* makes no sense */
+		goto out_session_cancel;
+
+	switch (session->last_cmd) {
+	case 0xff:
+		break;
+	case J1939_ETP_CMD_DPO:
+		if (skcb->addr.type == J1939_ETP)
+			break;
+		/* fall through */
+	case J1939_TP_CMD_BAM: /* fall through */
+	case J1939_TP_CMD_CTS: /* fall through */
+		if (skcb->addr.type != J1939_ETP)
+			break;
+		/* fall through */
+	default:
+		netdev_info(priv->ndev, "%s: 0x%p: last %02x\n", __func__,
+			    session, session->last_cmd);
+		goto out_session_cancel;
+	}
+
+	packet = (dat[0] - 1 + session->pkt.dpo);
+	if (packet > session->pkt.total ||
+	    (session->pkt.rx + 1) > session->pkt.total) {
+		netdev_info(priv->ndev, "%s: 0x%p: should have been completed\n",
+			    __func__, session);
+		goto out_session_cancel;
+	}
+	se_skb = j1939_session_skb_find(session);
+	if (!se_skb) {
+		netdev_warn(priv->ndev, "%s: 0x%p: no skb found\n", __func__,
+			    session);
+		goto out_session_cancel;
+	}
+
+	skcb = j1939_skb_to_cb(se_skb);
+	offset = packet * 7 - skcb->offset;
+	nbytes = se_skb->len - offset;
+	if (nbytes > 7)
+		nbytes = 7;
+	if (nbytes <= 0 || (nbytes + 1) > skb->len) {
+		netdev_info(priv->ndev, "%s: 0x%p: nbytes %i, len %i\n",
+			    __func__, session, nbytes, skb->len);
+		goto out_session_cancel;
+	}
+
+	tpdat = se_skb->data;
+	memcpy(&tpdat[offset], &dat[1], nbytes);
+	if (packet == session->pkt.rx)
+		session->pkt.rx++;
+
+	if (skcb->addr.type != J1939_ETP &&
+	    j1939_cb_is_broadcast(&session->skcb)) {
+		if (session->pkt.rx >= session->pkt.total)
+			final = true;
+	} else {
+		/* never final, an EOMA must follow */
+		if (session->pkt.rx >= session->pkt.last)
+			do_cts_eoma = true;
+	}
+
+	if (final) {
+		j1939_session_completed(session);
+	} else if (do_cts_eoma) {
+		j1939_tp_set_rxtimeout(session, 1250);
+		if (!session->transmission)
+			j1939_tp_schedule_txtimer(session, 0);
+	} else {
+		j1939_tp_set_rxtimeout(session, 250);
+	}
+	session->last_cmd = 0xff;
+	j1939_session_put(session);
+
+	return;
+
+ out_session_cancel:
+	j1939_session_timers_cancel(session);
+	j1939_session_cancel(session, J1939_XTP_ABORT_FAULT);
+	j1939_session_put(session);
+}
+
+static void j1939_xtp_rx_dat(struct j1939_priv *priv, struct sk_buff *skb)
+{
+	struct j1939_sk_buff_cb *skcb;
+	struct j1939_session *session;
+
+	skcb = j1939_skb_to_cb(skb);
+
+	if (j1939_tp_im_transmitter(skcb)) {
+		session = j1939_session_get_by_addr(priv, &skcb->addr, false,
+						    true);
+		if (!session)
+			netdev_info(priv->ndev, "%s: no tx connection found\n",
+				    __func__);
+		else
+			j1939_xtp_rx_dat_one(session, skb);
+	}
+
+	if (j1939_tp_im_receiver(skcb)) {
+		session = j1939_session_get_by_addr(priv, &skcb->addr, false,
+						    false);
+		if (!session)
+			netdev_info(priv->ndev, "%s: no rx connection found\n",
+				    __func__);
+		else
+			j1939_xtp_rx_dat_one(session, skb);
+	}
+}
+
+/* j1939 main intf */
+struct j1939_session *j1939_tp_send(struct j1939_priv *priv,
+				    struct sk_buff *skb, size_t size)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct j1939_session *session;
+	int ret;
+
+	if (skcb->addr.pgn == J1939_TP_PGN_DAT ||
+	    skcb->addr.pgn == J1939_TP_PGN_CTL ||
+	    skcb->addr.pgn == J1939_ETP_PGN_DAT ||
+	    skcb->addr.pgn == J1939_ETP_PGN_CTL)
+		/* avoid conflict */
+		return ERR_PTR(-EDOM);
+
+	if (size > priv->tp_max_packet_size)
+		return ERR_PTR(-EMSGSIZE);
+
+	if (size <= 8)
+		skcb->addr.type = J1939_SIMPLE;
+	else if (size > J1939_MAX_TP_PACKET_SIZE)
+		skcb->addr.type = J1939_ETP;
+	else
+		skcb->addr.type = J1939_TP;
+
+	if (skcb->addr.type == J1939_ETP &&
+	    j1939_cb_is_broadcast(skcb))
+		return ERR_PTR(-EDESTADDRREQ);
+
+	/* fill in addresses from names */
+	ret = j1939_ac_fixup(priv, skb);
+	if (unlikely(ret))
+		return ERR_PTR(ret);
+
+	/* fix DST flags, it may be used there soon */
+	if (j1939_address_is_unicast(skcb->addr.da) &&
+	    priv->ents[skcb->addr.da].nusers)
+		skcb->flags |= J1939_ECU_LOCAL_DST;
+
+	/* src is always local, I'm sending ... */
+	skcb->flags |= J1939_ECU_LOCAL_SRC;
+
+	/* prepare new session */
+	session = j1939_session_new(priv, skb, size);
+	if (!session)
+		return ERR_PTR(-ENOMEM);
+
+	/* skb is recounted in j1939_session_new() */
+	sock_hold(skb->sk);
+	session->sk = skb->sk;
+	session->transmission = true;
+	session->pkt.total = (size + 6) / 7;
+	session->pkt.block = skcb->addr.type == J1939_ETP ? 255 :
+		min(j1939_tp_block ?: 255, session->pkt.total);
+
+	if (j1939_cb_is_broadcast(&session->skcb))
+		/* set the end-packet for broadcast */
+		session->pkt.last = session->pkt.total;
+
+	skcb->tskey = session->sk->sk_tskey++;
+	session->tskey = skcb->tskey;
+
+	return session;
+}
+
+static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	int extd = J1939_TP;
+	u8 cmd = skb->data[0];
+
+	switch (cmd) {
+	case J1939_ETP_CMD_RTS:
+		extd = J1939_ETP;
+		/* fall through */
+	case J1939_TP_CMD_BAM: /* fall through */
+	case J1939_TP_CMD_RTS: /* fall through */
+		if (skcb->addr.type != extd)
+			return;
+
+		if (cmd == J1939_TP_CMD_RTS && j1939_cb_is_broadcast(skcb)) {
+			netdev_alert(priv->ndev, "%s: rts without destination (%02x)\n",
+				     __func__, skcb->addr.sa);
+			return;
+		}
+
+		if (j1939_tp_im_transmitter(skcb))
+			j1939_xtp_rx_rts(priv, skb, true);
+
+		if (j1939_tp_im_receiver(skcb))
+			j1939_xtp_rx_rts(priv, skb, false);
+
+		break;
+
+	case J1939_ETP_CMD_CTS:
+		extd = J1939_ETP;
+		/* fall through */
+	case J1939_TP_CMD_CTS:
+		if (skcb->addr.type != extd)
+			return;
+
+		if (j1939_tp_im_transmitter(skcb))
+			j1939_xtp_rx_cts(priv, skb, false);
+
+		if (j1939_tp_im_receiver(skcb))
+			j1939_xtp_rx_cts(priv, skb, true);
+
+		break;
+
+	case J1939_ETP_CMD_DPO:
+		if (skcb->addr.type != J1939_ETP)
+			return;
+
+		if (j1939_tp_im_transmitter(skcb))
+			j1939_xtp_rx_dpo(priv, skb, true);
+
+		if (j1939_tp_im_receiver(skcb))
+			j1939_xtp_rx_dpo(priv, skb, false);
+
+		break;
+
+	case J1939_ETP_CMD_EOMA:
+		extd = J1939_ETP;
+		/* fall through */
+	case J1939_TP_CMD_EOMA:
+		if (skcb->addr.type != extd)
+			return;
+
+		if (j1939_tp_im_transmitter(skcb))
+			j1939_xtp_rx_eoma(priv, skb, false);
+
+		if (j1939_tp_im_receiver(skcb))
+			j1939_xtp_rx_eoma(priv, skb, true);
+
+		break;
+
+	case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */
+		if (j1939_tp_im_transmitter(skcb))
+			j1939_xtp_rx_abort(priv, skb, true);
+
+		if (j1939_tp_im_receiver(skcb))
+			j1939_xtp_rx_abort(priv, skb, false);
+
+		break;
+	default:
+		return;
+	}
+}
+
+int j1939_tp_recv(struct j1939_priv *priv, struct sk_buff *skb)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+
+	if (!j1939_tp_im_involved_anydir(skcb))
+		return 0;
+
+	switch (skcb->addr.pgn) {
+	case J1939_ETP_PGN_DAT:
+		skcb->addr.type = J1939_ETP;
+		/* fall through */
+	case J1939_TP_PGN_DAT:
+		j1939_xtp_rx_dat(priv, skb);
+		break;
+
+	case J1939_ETP_PGN_CTL:
+		skcb->addr.type = J1939_ETP;
+		/* fall through */
+	case J1939_TP_PGN_CTL:
+		if (skb->len < 8)
+			return 0; /* Don't care. Nothing to extract here */
+
+		j1939_tp_cmd_recv(priv, skb);
+		break;
+	default:
+		return 0; /* no problem */
+	}
+	return 1; /* "I processed the message" */
+}
+
+void j1939_simple_recv(struct j1939_priv *priv, struct sk_buff *skb)
+{
+	struct j1939_session *session;
+
+	if (!skb->sk)
+		return;
+
+	j1939_session_list_lock(priv);
+	session = j1939_session_get_simple(priv, skb);
+	j1939_session_list_unlock(priv);
+	if (!session) {
+		netdev_warn(priv->ndev,
+			    "%s: Received already invalidated message\n",
+			    __func__);
+		return;
+	}
+
+	j1939_session_timers_cancel(session);
+	j1939_session_deactivate(session);
+	j1939_session_put(session);
+}
+
+int j1939_cancel_active_session(struct j1939_priv *priv, struct sock *sk)
+{
+	struct j1939_session *session, *saved;
+
+	netdev_dbg(priv->ndev, "%s, sk: %p\n", __func__, sk);
+	j1939_session_list_lock(priv);
+	list_for_each_entry_safe(session, saved,
+				 &priv->active_session_list,
+				 active_session_list_entry) {
+		if (!sk || sk == session->sk) {
+			if (hrtimer_try_to_cancel(&session->txtimer) == 1)
+				j1939_session_put(session);
+			if (hrtimer_try_to_cancel(&session->rxtimer) == 1)
+				j1939_session_put(session);
+
+			session->err = ESHUTDOWN;
+			j1939_session_deactivate_locked(session);
+		}
+	}
+	j1939_session_list_unlock(priv);
+	return NOTIFY_DONE;
+}
+
+void j1939_tp_init(struct j1939_priv *priv)
+{
+	spin_lock_init(&priv->active_session_list_lock);
+	INIT_LIST_HEAD(&priv->active_session_list);
+	priv->tp_max_packet_size = J1939_MAX_ETP_PACKET_SIZE;
+}

diff --git a/net/can/proc.c b/net/can/proc.c
index 70fea17..e6881bf 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
 /*
  * proc.c - procfs support for Protocol family CAN core module
  *
@@ -44,6 +45,7 @@
 #include <linux/list.h>
 #include <linux/rcupdate.h>
 #include <linux/if_arp.h>
+#include <linux/can/can-ml.h>
 #include <linux/can/core.h>
 
 #include "af_can.h"
@@ -77,21 +79,21 @@
 
 static void can_init_stats(struct net *net)
 {
-	struct s_stats *can_stats = net->can.can_stats;
-	struct s_pstats *can_pstats = net->can.can_pstats;
+	struct can_pkg_stats *pkg_stats = net->can.pkg_stats;
+	struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats;
 	/*
 	 * This memset function is called from a timer context (when
 	 * can_stattimer is active which is the default) OR in a process
 	 * context (reading the proc_fs when can_stattimer is disabled).
 	 */
-	memset(can_stats, 0, sizeof(struct s_stats));
-	can_stats->jiffies_init = jiffies;
+	memset(pkg_stats, 0, sizeof(struct can_pkg_stats));
+	pkg_stats->jiffies_init = jiffies;
 
-	can_pstats->stats_reset++;
+	rcv_lists_stats->stats_reset++;
 
 	if (user_reset) {
 		user_reset = 0;
-		can_pstats->user_reset++;
+		rcv_lists_stats->user_reset++;
 	}
 }
 
@@ -117,8 +119,8 @@
 
 void can_stat_update(struct timer_list *t)
 {
-	struct net *net = from_timer(net, t, can.can_stattimer);
-	struct s_stats *can_stats = net->can.can_stats;
+	struct net *net = from_timer(net, t, can.stattimer);
+	struct can_pkg_stats *pkg_stats = net->can.pkg_stats;
 	unsigned long j = jiffies; /* snapshot */
 
 	/* restart counting in timer context on user request */
@@ -126,57 +128,57 @@
 		can_init_stats(net);
 
 	/* restart counting on jiffies overflow */
-	if (j < can_stats->jiffies_init)
+	if (j < pkg_stats->jiffies_init)
 		can_init_stats(net);
 
 	/* prevent overflow in calc_rate() */
-	if (can_stats->rx_frames > (ULONG_MAX / HZ))
+	if (pkg_stats->rx_frames > (ULONG_MAX / HZ))
 		can_init_stats(net);
 
 	/* prevent overflow in calc_rate() */
-	if (can_stats->tx_frames > (ULONG_MAX / HZ))
+	if (pkg_stats->tx_frames > (ULONG_MAX / HZ))
 		can_init_stats(net);
 
 	/* matches overflow - very improbable */
-	if (can_stats->matches > (ULONG_MAX / 100))
+	if (pkg_stats->matches > (ULONG_MAX / 100))
 		can_init_stats(net);
 
 	/* calc total values */
-	if (can_stats->rx_frames)
-		can_stats->total_rx_match_ratio = (can_stats->matches * 100) /
-			can_stats->rx_frames;
+	if (pkg_stats->rx_frames)
+		pkg_stats->total_rx_match_ratio = (pkg_stats->matches * 100) /
+			pkg_stats->rx_frames;
 
-	can_stats->total_tx_rate = calc_rate(can_stats->jiffies_init, j,
-					    can_stats->tx_frames);
-	can_stats->total_rx_rate = calc_rate(can_stats->jiffies_init, j,
-					    can_stats->rx_frames);
+	pkg_stats->total_tx_rate = calc_rate(pkg_stats->jiffies_init, j,
+					    pkg_stats->tx_frames);
+	pkg_stats->total_rx_rate = calc_rate(pkg_stats->jiffies_init, j,
+					    pkg_stats->rx_frames);
 
 	/* calc current values */
-	if (can_stats->rx_frames_delta)
-		can_stats->current_rx_match_ratio =
-			(can_stats->matches_delta * 100) /
-			can_stats->rx_frames_delta;
+	if (pkg_stats->rx_frames_delta)
+		pkg_stats->current_rx_match_ratio =
+			(pkg_stats->matches_delta * 100) /
+			pkg_stats->rx_frames_delta;
 
-	can_stats->current_tx_rate = calc_rate(0, HZ, can_stats->tx_frames_delta);
-	can_stats->current_rx_rate = calc_rate(0, HZ, can_stats->rx_frames_delta);
+	pkg_stats->current_tx_rate = calc_rate(0, HZ, pkg_stats->tx_frames_delta);
+	pkg_stats->current_rx_rate = calc_rate(0, HZ, pkg_stats->rx_frames_delta);
 
 	/* check / update maximum values */
-	if (can_stats->max_tx_rate < can_stats->current_tx_rate)
-		can_stats->max_tx_rate = can_stats->current_tx_rate;
+	if (pkg_stats->max_tx_rate < pkg_stats->current_tx_rate)
+		pkg_stats->max_tx_rate = pkg_stats->current_tx_rate;
 
-	if (can_stats->max_rx_rate < can_stats->current_rx_rate)
-		can_stats->max_rx_rate = can_stats->current_rx_rate;
+	if (pkg_stats->max_rx_rate < pkg_stats->current_rx_rate)
+		pkg_stats->max_rx_rate = pkg_stats->current_rx_rate;
 
-	if (can_stats->max_rx_match_ratio < can_stats->current_rx_match_ratio)
-		can_stats->max_rx_match_ratio = can_stats->current_rx_match_ratio;
+	if (pkg_stats->max_rx_match_ratio < pkg_stats->current_rx_match_ratio)
+		pkg_stats->max_rx_match_ratio = pkg_stats->current_rx_match_ratio;
 
 	/* clear values for 'current rate' calculation */
-	can_stats->tx_frames_delta = 0;
-	can_stats->rx_frames_delta = 0;
-	can_stats->matches_delta   = 0;
+	pkg_stats->tx_frames_delta = 0;
+	pkg_stats->rx_frames_delta = 0;
+	pkg_stats->matches_delta   = 0;
 
 	/* restart timer (one second) */
-	mod_timer(&net->can.can_stattimer, round_jiffies(jiffies + HZ));
+	mod_timer(&net->can.stattimer, round_jiffies(jiffies + HZ));
 }
 
 /*
@@ -211,60 +213,60 @@
 static int can_stats_proc_show(struct seq_file *m, void *v)
 {
 	struct net *net = m->private;
-	struct s_stats *can_stats = net->can.can_stats;
-	struct s_pstats *can_pstats = net->can.can_pstats;
+	struct can_pkg_stats *pkg_stats = net->can.pkg_stats;
+	struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats;
 
 	seq_putc(m, '\n');
-	seq_printf(m, " %8ld transmitted frames (TXF)\n", can_stats->tx_frames);
-	seq_printf(m, " %8ld received frames (RXF)\n", can_stats->rx_frames);
-	seq_printf(m, " %8ld matched frames (RXMF)\n", can_stats->matches);
+	seq_printf(m, " %8ld transmitted frames (TXF)\n", pkg_stats->tx_frames);
+	seq_printf(m, " %8ld received frames (RXF)\n", pkg_stats->rx_frames);
+	seq_printf(m, " %8ld matched frames (RXMF)\n", pkg_stats->matches);
 
 	seq_putc(m, '\n');
 
-	if (net->can.can_stattimer.function == can_stat_update) {
+	if (net->can.stattimer.function == can_stat_update) {
 		seq_printf(m, " %8ld %% total match ratio (RXMR)\n",
-				can_stats->total_rx_match_ratio);
+				pkg_stats->total_rx_match_ratio);
 
 		seq_printf(m, " %8ld frames/s total tx rate (TXR)\n",
-				can_stats->total_tx_rate);
+				pkg_stats->total_tx_rate);
 		seq_printf(m, " %8ld frames/s total rx rate (RXR)\n",
-				can_stats->total_rx_rate);
+				pkg_stats->total_rx_rate);
 
 		seq_putc(m, '\n');
 
 		seq_printf(m, " %8ld %% current match ratio (CRXMR)\n",
-				can_stats->current_rx_match_ratio);
+				pkg_stats->current_rx_match_ratio);
 
 		seq_printf(m, " %8ld frames/s current tx rate (CTXR)\n",
-				can_stats->current_tx_rate);
+				pkg_stats->current_tx_rate);
 		seq_printf(m, " %8ld frames/s current rx rate (CRXR)\n",
-				can_stats->current_rx_rate);
+				pkg_stats->current_rx_rate);
 
 		seq_putc(m, '\n');
 
 		seq_printf(m, " %8ld %% max match ratio (MRXMR)\n",
-				can_stats->max_rx_match_ratio);
+				pkg_stats->max_rx_match_ratio);
 
 		seq_printf(m, " %8ld frames/s max tx rate (MTXR)\n",
-				can_stats->max_tx_rate);
+				pkg_stats->max_tx_rate);
 		seq_printf(m, " %8ld frames/s max rx rate (MRXR)\n",
-				can_stats->max_rx_rate);
+				pkg_stats->max_rx_rate);
 
 		seq_putc(m, '\n');
 	}
 
 	seq_printf(m, " %8ld current receive list entries (CRCV)\n",
-			can_pstats->rcv_entries);
+			rcv_lists_stats->rcv_entries);
 	seq_printf(m, " %8ld maximum receive list entries (MRCV)\n",
-			can_pstats->rcv_entries_max);
+			rcv_lists_stats->rcv_entries_max);
 
-	if (can_pstats->stats_reset)
+	if (rcv_lists_stats->stats_reset)
 		seq_printf(m, "\n %8ld statistic resets (STR)\n",
-				can_pstats->stats_reset);
+				rcv_lists_stats->stats_reset);
 
-	if (can_pstats->user_reset)
+	if (rcv_lists_stats->user_reset)
 		seq_printf(m, " %8ld user statistic resets (USTR)\n",
-				can_pstats->user_reset);
+				rcv_lists_stats->user_reset);
 
 	seq_putc(m, '\n');
 	return 0;
@@ -273,20 +275,20 @@
 static int can_reset_stats_proc_show(struct seq_file *m, void *v)
 {
 	struct net *net = m->private;
-	struct s_pstats *can_pstats = net->can.can_pstats;
-	struct s_stats *can_stats = net->can.can_stats;
+	struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats;
+	struct can_pkg_stats *pkg_stats = net->can.pkg_stats;
 
 	user_reset = 1;
 
-	if (net->can.can_stattimer.function == can_stat_update) {
+	if (net->can.stattimer.function == can_stat_update) {
 		seq_printf(m, "Scheduled statistic reset #%ld.\n",
-				can_pstats->stats_reset + 1);
+				rcv_lists_stats->stats_reset + 1);
 	} else {
-		if (can_stats->jiffies_init != jiffies)
+		if (pkg_stats->jiffies_init != jiffies)
 			can_init_stats(net);
 
 		seq_printf(m, "Performed statistic reset #%ld.\n",
-				can_pstats->stats_reset);
+				rcv_lists_stats->stats_reset);
 	}
 	return 0;
 }
@@ -299,11 +301,11 @@
 
 static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx,
 					     struct net_device *dev,
-					     struct can_dev_rcv_lists *d)
+					     struct can_dev_rcv_lists *dev_rcv_lists)
 {
-	if (!hlist_empty(&d->rx[idx])) {
+	if (!hlist_empty(&dev_rcv_lists->rx[idx])) {
 		can_print_recv_banner(m);
-		can_print_rcvlist(m, &d->rx[idx], dev);
+		can_print_rcvlist(m, &dev_rcv_lists->rx[idx], dev);
 	} else
 		seq_printf(m, "  (%s: no entry)\n", DNAME(dev));
 
@@ -314,7 +316,7 @@
 	/* double cast to prevent GCC warning */
 	int idx = (int)(long)PDE_DATA(m->file->f_inode);
 	struct net_device *dev;
-	struct can_dev_rcv_lists *d;
+	struct can_dev_rcv_lists *dev_rcv_lists;
 	struct net *net = m->private;
 
 	seq_printf(m, "\nreceive list '%s':\n", rx_list_name[idx]);
@@ -322,8 +324,8 @@
 	rcu_read_lock();
 
 	/* receive list for 'all' CAN devices (dev == NULL) */
-	d = net->can.can_rx_alldev_list;
-	can_rcvlist_proc_show_one(m, idx, NULL, d);
+	dev_rcv_lists = net->can.rx_alldev_list;
+	can_rcvlist_proc_show_one(m, idx, NULL, dev_rcv_lists);
 
 	/* receive list for registered CAN devices */
 	for_each_netdev_rcu(net, dev) {
@@ -365,7 +367,7 @@
 static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
 {
 	struct net_device *dev;
-	struct can_dev_rcv_lists *d;
+	struct can_dev_rcv_lists *dev_rcv_lists;
 	struct net *net = m->private;
 
 	/* RX_SFF */
@@ -374,15 +376,16 @@
 	rcu_read_lock();
 
 	/* sff receive list for 'all' CAN devices (dev == NULL) */
-	d = net->can.can_rx_alldev_list;
-	can_rcvlist_proc_show_array(m, NULL, d->rx_sff, ARRAY_SIZE(d->rx_sff));
+	dev_rcv_lists = net->can.rx_alldev_list;
+	can_rcvlist_proc_show_array(m, NULL, dev_rcv_lists->rx_sff,
+				    ARRAY_SIZE(dev_rcv_lists->rx_sff));
 
 	/* sff receive list for registered CAN devices */
 	for_each_netdev_rcu(net, dev) {
 		if (dev->type == ARPHRD_CAN && dev->ml_priv) {
-			d = dev->ml_priv;
-			can_rcvlist_proc_show_array(m, dev, d->rx_sff,
-						    ARRAY_SIZE(d->rx_sff));
+			dev_rcv_lists = dev->ml_priv;
+			can_rcvlist_proc_show_array(m, dev, dev_rcv_lists->rx_sff,
+						    ARRAY_SIZE(dev_rcv_lists->rx_sff));
 		}
 	}
 
@@ -395,7 +398,7 @@
 static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
 {
 	struct net_device *dev;
-	struct can_dev_rcv_lists *d;
+	struct can_dev_rcv_lists *dev_rcv_lists;
 	struct net *net = m->private;
 
 	/* RX_EFF */
@@ -404,15 +407,16 @@
 	rcu_read_lock();
 
 	/* eff receive list for 'all' CAN devices (dev == NULL) */
-	d = net->can.can_rx_alldev_list;
-	can_rcvlist_proc_show_array(m, NULL, d->rx_eff, ARRAY_SIZE(d->rx_eff));
+	dev_rcv_lists = net->can.rx_alldev_list;
+	can_rcvlist_proc_show_array(m, NULL, dev_rcv_lists->rx_eff,
+				    ARRAY_SIZE(dev_rcv_lists->rx_eff));
 
 	/* eff receive list for registered CAN devices */
 	for_each_netdev_rcu(net, dev) {
 		if (dev->type == ARPHRD_CAN && dev->ml_priv) {
-			d = dev->ml_priv;
-			can_rcvlist_proc_show_array(m, dev, d->rx_eff,
-						    ARRAY_SIZE(d->rx_eff));
+			dev_rcv_lists = dev->ml_priv;
+			can_rcvlist_proc_show_array(m, dev, dev_rcv_lists->rx_eff,
+						    ARRAY_SIZE(dev_rcv_lists->rx_eff));
 		}
 	}
 

diff --git a/net/can/raw.c b/net/can/raw.c
index 3aab766..59c039d 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c

@@ -1,5 +1,5 @@
-/*
- * raw.c - Raw sockets for protocol family CAN
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* raw.c - Raw sockets for protocol family CAN
  *
  * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
  * All rights reserved.
@@ -64,8 +64,7 @@
 
 #define MASK_ALL 0
 
-/*
- * A raw socket has a list of can_filters attached to it, each receiving
+/* A raw socket has a list of can_filters attached to it, each receiving
  * the CAN frames matching that filter.  If the filter list is empty,
  * no CAN frames will be received by the socket.  The default after
  * opening the socket, is to have one filter which receives all frames.
@@ -96,8 +95,7 @@
 	struct uniqframe __percpu *uniq;
 };
 
-/*
- * Return pointer to store the extra msg flags for raw_recvmsg().
+/* Return pointer to store the extra msg flags for raw_recvmsg().
  * We use the space of one unsigned int beyond the 'struct sockaddr_can'
  * in skb->cb.
  */
@@ -156,8 +154,7 @@
 	if (!skb)
 		return;
 
-	/*
-	 *  Put the datagram to the queue so that raw_recvmsg() can
+	/*  Put the datagram to the queue so that raw_recvmsg() can
 	 *  get it from there.  We need to pass the interface index to
 	 *  raw_recvmsg().  We pass a whole struct sockaddr_can in skb->cb
 	 *  containing the interface index.
@@ -283,7 +280,6 @@
 		return NOTIFY_DONE;
 
 	switch (msg) {
-
 	case NETDEV_UNREGISTER:
 		lock_sock(sk);
 		/* remove current filters & unregister */
@@ -369,8 +365,9 @@
 				raw_disable_allfilters(dev_net(dev), dev, sk);
 				dev_put(dev);
 			}
-		} else
+		} else {
 			raw_disable_allfilters(sock_net(sk), NULL, sk);
+		}
 	}
 
 	if (ro->count > 1)
@@ -399,7 +396,7 @@
 	int err = 0;
 	int notify_enetdown = 0;
 
-	if (len < sizeof(*addr))
+	if (len < CAN_REQUIRED_SIZE(*addr, can_ifindex))
 		return -EINVAL;
 	if (addr->can_family != AF_CAN)
 		return -EINVAL;
@@ -450,8 +447,9 @@
 							       dev, sk);
 					dev_put(dev);
 				}
-			} else
+			} else {
 				raw_disable_allfilters(sock_net(sk), NULL, sk);
+			}
 		}
 		ro->ifindex = ifindex;
 		ro->bound = 1;
@@ -502,7 +500,6 @@
 		return -EINVAL;
 
 	switch (optname) {
-
 	case CAN_RAW_FILTER:
 		if (optlen % sizeof(struct can_filter) != 0)
 			return -EINVAL;
@@ -665,17 +662,18 @@
 		return -EINVAL;
 
 	switch (optname) {
-
 	case CAN_RAW_FILTER:
 		lock_sock(sk);
 		if (ro->count > 0) {
 			int fsize = ro->count * sizeof(struct can_filter);
+
 			if (len > fsize)
 				len = fsize;
 			if (copy_to_user(optval, ro->filter, len))
 				err = -EFAULT;
-		} else
+		} else {
 			len = 0;
+		}
 		release_sock(sk);
 
 		if (!err)
@@ -735,15 +733,16 @@
 	if (msg->msg_name) {
 		DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name);
 
-		if (msg->msg_namelen < sizeof(*addr))
+		if (msg->msg_namelen < CAN_REQUIRED_SIZE(*addr, can_ifindex))
 			return -EINVAL;
 
 		if (addr->can_family != AF_CAN)
 			return -EINVAL;
 
 		ifindex = addr->can_ifindex;
-	} else
+	} else {
 		ifindex = ro->ifindex;
+	}
 
 	dev = dev_get_by_index(sock_net(sk), ifindex);
 	if (!dev)
@@ -771,7 +770,7 @@
 	if (err < 0)
 		goto free_skb;
 
-	sock_tx_timestamp(sk, sk->sk_tsflags, &skb_shinfo(skb)->tx_flags);
+	skb_setup_tx_timestamp(skb, sk->sk_tsflags);
 
 	skb->dev = dev;
 	skb->sk  = sk;
@@ -836,6 +835,13 @@
 	return size;
 }
 
+static int raw_sock_no_ioctlcmd(struct socket *sock, unsigned int cmd,
+				unsigned long arg)
+{
+	/* no ioctls for socket layer -> hand it down to NIC layer */
+	return -ENOIOCTLCMD;
+}
+
 static const struct proto_ops raw_ops = {
 	.family        = PF_CAN,
 	.release       = raw_release,
@@ -845,7 +851,8 @@
 	.accept        = sock_no_accept,
 	.getname       = raw_getname,
 	.poll          = datagram_poll,
-	.ioctl         = can_ioctl,	/* use can_ioctl() from af_can.c */
+	.ioctl         = raw_sock_no_ioctlcmd,
+	.gettstamp     = sock_gettstamp,
 	.listen        = sock_no_listen,
 	.shutdown      = sock_no_shutdown,
 	.setsockopt    = raw_setsockopt,
@@ -878,7 +885,7 @@
 
 	err = can_proto_register(&raw_can_proto);
 	if (err < 0)
-		printk(KERN_ERR "can: registration of raw protocol failed\n");
+		pr_err("can: registration of raw protocol failed\n");
 
 	return err;
 }

diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig
index cd2d5b9..2e8e6f9 100644
--- a/net/ceph/Kconfig
+++ b/net/ceph/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config CEPH_LIB
 	tristate "Ceph core library"
 	depends on INET

diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index db09def..59d0ba2 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile

@@ -5,7 +5,7 @@
 obj-$(CONFIG_CEPH_LIB) += libceph.o
 
 libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
-	mon_client.o \
+	mon_client.o decode.o \
 	cls_lock_client.o \
 	osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
 	striper.o \

diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 87afb9e..2d56824 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 
 #include <linux/ceph/ceph_debug.h>
 #include <linux/backing-dev.h>
@@ -12,6 +13,7 @@
 #include <linux/nsproxy.h>
 #include <linux/parser.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/statfs.h>
@@ -184,18 +186,34 @@
 }
 EXPORT_SYMBOL(ceph_compare_options);
 
+/*
+ * kvmalloc() doesn't fall back to the vmalloc allocator unless flags are
+ * compatible with (a superset of) GFP_KERNEL.  This is because while the
+ * actual pages are allocated with the specified flags, the page table pages
+ * are always allocated with GFP_KERNEL.  map_vm_area() doesn't even take
+ * flags because GFP_KERNEL is hard-coded in {p4d,pud,pmd,pte}_alloc().
+ *
+ * ceph_kvmalloc() may be called with GFP_KERNEL, GFP_NOFS or GFP_NOIO.
+ */
 void *ceph_kvmalloc(size_t size, gfp_t flags)
 {
-	if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
-		void *ptr = kmalloc(size, flags | __GFP_NOWARN);
-		if (ptr)
-			return ptr;
+	void *p;
+
+	if ((flags & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) {
+		p = kvmalloc(size, flags);
+	} else if ((flags & (__GFP_IO | __GFP_FS)) == __GFP_IO) {
+		unsigned int nofs_flag = memalloc_nofs_save();
+		p = kvmalloc(size, GFP_KERNEL);
+		memalloc_nofs_restore(nofs_flag);
+	} else {
+		unsigned int noio_flag = memalloc_noio_save();
+		p = kvmalloc(size, GFP_KERNEL);
+		memalloc_noio_restore(noio_flag);
 	}
 
-	return __vmalloc(size, flags, PAGE_KERNEL);
+	return p;
 }
 
-
 static int parse_fsid(const char *str, struct ceph_fsid *fsid)
 {
 	int i = 0;
@@ -255,6 +273,7 @@
 	Opt_nocephx_sign_messages,
 	Opt_tcp_nodelay,
 	Opt_notcp_nodelay,
+	Opt_abort_on_full,
 };
 
 static match_table_t opt_tokens = {
@@ -280,6 +299,7 @@
 	{Opt_nocephx_sign_messages, "nocephx_sign_messages"},
 	{Opt_tcp_nodelay, "tcp_nodelay"},
 	{Opt_notcp_nodelay, "notcp_nodelay"},
+	{Opt_abort_on_full, "abort_on_full"},
 	{-1, NULL}
 };
 
@@ -535,6 +555,10 @@
 			opt->flags &= ~CEPH_OPT_TCP_NODELAY;
 			break;
 
+		case Opt_abort_on_full:
+			opt->flags |= CEPH_OPT_ABORT_ON_FULL;
+			break;
+
 		default:
 			BUG_ON(token);
 		}
@@ -549,7 +573,8 @@
 }
 EXPORT_SYMBOL(ceph_parse_options);
 
-int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
+int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
+			      bool show_all)
 {
 	struct ceph_options *opt = client->options;
 	size_t pos = m->count;
@@ -574,6 +599,8 @@
 		seq_puts(m, "nocephx_sign_messages,");
 	if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0)
 		seq_puts(m, "notcp_nodelay,");
+	if (show_all && (opt->flags & CEPH_OPT_ABORT_ON_FULL))
+		seq_puts(m, "abort_on_full,");
 
 	if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
 		seq_printf(m, "mount_timeout=%d,",
@@ -684,6 +711,14 @@
 }
 EXPORT_SYMBOL(ceph_destroy_client);
 
+void ceph_reset_client_addr(struct ceph_client *client)
+{
+	ceph_messenger_reset_nonce(&client->msgr);
+	ceph_monc_reopen_session(&client->monc);
+	ceph_osdc_reopen_osds(&client->osdc);
+}
+EXPORT_SYMBOL(ceph_reset_client_addr);
+
 /*
  * true if we have the mon map (and have thus joined the cluster)
  */
@@ -729,7 +764,6 @@
 }
 EXPORT_SYMBOL(__ceph_open_session);
 
-
 int ceph_open_session(struct ceph_client *client)
 {
 	int ret;
@@ -745,14 +779,29 @@
 }
 EXPORT_SYMBOL(ceph_open_session);
 
+int ceph_wait_for_latest_osdmap(struct ceph_client *client,
+				unsigned long timeout)
+{
+	u64 newest_epoch;
+	int ret;
+
+	ret = ceph_monc_get_version(&client->monc, "osdmap", &newest_epoch);
+	if (ret)
+		return ret;
+
+	if (client->osdc.osdmap->epoch >= newest_epoch)
+		return 0;
+
+	ceph_osdc_maybe_request_map(&client->osdc);
+	return ceph_monc_wait_osdmap(&client->monc, newest_epoch, timeout);
+}
+EXPORT_SYMBOL(ceph_wait_for_latest_osdmap);
 
 static int __init init_ceph_lib(void)
 {
 	int ret = 0;
 
-	ret = ceph_debugfs_init();
-	if (ret < 0)
-		goto out;
+	ceph_debugfs_init();
 
 	ret = ceph_crypto_init();
 	if (ret < 0)
@@ -777,7 +826,6 @@
 	ceph_crypto_shutdown();
 out_debugfs:
 	ceph_debugfs_cleanup();
-out:
 	return ret;
 }
 

diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c
index 2105a6e..17447c1 100644
--- a/net/ceph/cls_lock_client.c
+++ b/net/ceph/cls_lock_client.c

@@ -6,6 +6,7 @@
 
 #include <linux/ceph/cls_lock_client.h>
 #include <linux/ceph/decode.h>
+#include <linux/ceph/libceph.h>
 
 /**
  * ceph_cls_lock - grab rados lock for object
@@ -264,14 +265,17 @@
 		return ret;
 
 	*p += sizeof(struct ceph_timespec); /* skip expiration */
-	ceph_decode_copy(p, &locker->info.addr, sizeof(locker->info.addr));
-	ceph_decode_addr(&locker->info.addr);
+
+	ret = ceph_decode_entity_addr(p, end, &locker->info.addr);
+	if (ret)
+		return ret;
+
 	len = ceph_decode_32(p);
 	*p += len; /* skip description */
 
 	dout("%s %s%llu cookie %s addr %s\n", __func__,
 	     ENTITY_NAME(locker->id.name), locker->id.cookie,
-	     ceph_pr_addr(&locker->info.addr.in_addr));
+	     ceph_pr_addr(&locker->info.addr));
 	return 0;
 }
 
@@ -360,7 +364,7 @@
 	dout("%s lock_name %s\n", __func__, lock_name);
 	ret = ceph_osdc_call(osdc, oid, oloc, "lock", "get_info",
 			     CEPH_OSD_FLAG_READ, get_info_op_page,
-			     get_info_op_buf_size, reply_page, &reply_len);
+			     get_info_op_buf_size, &reply_page, &reply_len);
 
 	dout("%s: status %d\n", __func__, ret);
 	if (ret >= 0) {
@@ -375,3 +379,47 @@
 	return ret;
 }
 EXPORT_SYMBOL(ceph_cls_lock_info);
+
+int ceph_cls_assert_locked(struct ceph_osd_request *req, int which,
+			   char *lock_name, u8 type, char *cookie, char *tag)
+{
+	int assert_op_buf_size;
+	int name_len = strlen(lock_name);
+	int cookie_len = strlen(cookie);
+	int tag_len = strlen(tag);
+	struct page **pages;
+	void *p, *end;
+	int ret;
+
+	assert_op_buf_size = name_len + sizeof(__le32) +
+			     cookie_len + sizeof(__le32) +
+			     tag_len + sizeof(__le32) +
+			     sizeof(u8) + CEPH_ENCODING_START_BLK_LEN;
+	if (assert_op_buf_size > PAGE_SIZE)
+		return -E2BIG;
+
+	ret = osd_req_op_cls_init(req, which, "lock", "assert_locked");
+	if (ret)
+		return ret;
+
+	pages = ceph_alloc_page_vector(1, GFP_NOIO);
+	if (IS_ERR(pages))
+		return PTR_ERR(pages);
+
+	p = page_address(pages[0]);
+	end = p + assert_op_buf_size;
+
+	/* encode cls_lock_assert_op struct */
+	ceph_start_encoding(&p, 1, 1,
+			    assert_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
+	ceph_encode_string(&p, end, lock_name, name_len);
+	ceph_encode_8(&p, type);
+	ceph_encode_string(&p, end, cookie, cookie_len);
+	ceph_encode_string(&p, end, tag, tag_len);
+	WARN_ON(p != end);
+
+	osd_req_op_cls_request_data_pages(req, which, pages, assert_op_buf_size,
+					  0, false, true);
+	return 0;
+}
+EXPORT_SYMBOL(ceph_cls_assert_locked);

diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 02172c4..4f75df4 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c

@@ -46,9 +46,9 @@
 		goto fail;
 	}
 
-	/* crypto_alloc_skcipher() allocates with GFP_KERNEL */
+	/* crypto_alloc_sync_skcipher() allocates with GFP_KERNEL */
 	noio_flag = memalloc_noio_save();
-	key->tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
+	key->tfm = crypto_alloc_sync_skcipher("cbc(aes)", 0, 0);
 	memalloc_noio_restore(noio_flag);
 	if (IS_ERR(key->tfm)) {
 		ret = PTR_ERR(key->tfm);
@@ -56,7 +56,7 @@
 		goto fail;
 	}
 
-	ret = crypto_skcipher_setkey(key->tfm, key->key, key->len);
+	ret = crypto_sync_skcipher_setkey(key->tfm, key->key, key->len);
 	if (ret)
 		goto fail;
 
@@ -136,8 +136,10 @@
 	if (key) {
 		kfree(key->key);
 		key->key = NULL;
-		crypto_free_skcipher(key->tfm);
-		key->tfm = NULL;
+		if (key->tfm) {
+			crypto_free_sync_skcipher(key->tfm);
+			key->tfm = NULL;
+		}
 	}
 }
 
@@ -216,7 +218,7 @@
 static int ceph_aes_crypt(const struct ceph_crypto_key *key, bool encrypt,
 			  void *buf, int buf_len, int in_len, int *pout_len)
 {
-	SKCIPHER_REQUEST_ON_STACK(req, key->tfm);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, key->tfm);
 	struct sg_table sgt;
 	struct scatterlist prealloc_sg;
 	char iv[AES_BLOCK_SIZE] __aligned(8);
@@ -232,7 +234,7 @@
 		return ret;
 
 	memcpy(iv, aes_iv, AES_BLOCK_SIZE);
-	skcipher_request_set_tfm(req, key->tfm);
+	skcipher_request_set_sync_tfm(req, key->tfm);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sgt.sgl, sgt.sgl, crypt_len, iv);
 

diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
index bb45c7d..96ef4d8 100644
--- a/net/ceph/crypto.h
+++ b/net/ceph/crypto.h

@@ -13,7 +13,7 @@
 	struct ceph_timespec created;
 	int len;
 	void *key;
-	struct crypto_skcipher *tfm;
+	struct crypto_sync_skcipher *tfm;
 };
 
 int ceph_crypto_key_clone(struct ceph_crypto_key *dst,

diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 0295260..7cb992e 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c

@@ -46,7 +46,7 @@
 
 		seq_printf(s, "\t%s%lld\t%s\n",
 			   ENTITY_NAME(inst->name),
-			   ceph_pr_addr(&inst->addr.in_addr));
+			   ceph_pr_addr(&inst->addr));
 	}
 	return 0;
 }
@@ -82,7 +82,7 @@
 		char sb[64];
 
 		seq_printf(s, "osd%d\t%s\t%3d%%\t(%s)\t%3d%%\n",
-			   i, ceph_pr_addr(&addr->in_addr),
+			   i, ceph_pr_addr(addr),
 			   ((map->osd_weight[i]*100) >> 16),
 			   ceph_osdmap_state_str(sb, sizeof(sb), state),
 			   ((ceph_get_primary_affinity(map, i)*100) >> 16));
@@ -375,7 +375,7 @@
 	struct ceph_client *client = s->private;
 	int ret;
 
-	ret = ceph_print_client_options(s, client);
+	ret = ceph_print_client_options(s, client, true);
 	if (ret)
 		return ret;
 
@@ -389,12 +389,9 @@
 CEPH_DEFINE_SHOW_FUNC(osdc_show)
 CEPH_DEFINE_SHOW_FUNC(client_options_show)
 
-int __init ceph_debugfs_init(void)
+void __init ceph_debugfs_init(void)
 {
 	ceph_debugfs_dir = debugfs_create_dir("ceph", NULL);
-	if (!ceph_debugfs_dir)
-		return -ENOMEM;
-	return 0;
 }
 
 void ceph_debugfs_cleanup(void)
@@ -402,9 +399,8 @@
 	debugfs_remove(ceph_debugfs_dir);
 }
 
-int ceph_debugfs_client_init(struct ceph_client *client)
+void ceph_debugfs_client_init(struct ceph_client *client)
 {
-	int ret = -ENOMEM;
 	char name[80];
 
 	snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
@@ -412,56 +408,37 @@
 
 	dout("ceph_debugfs_client_init %p %s\n", client, name);
 
-	BUG_ON(client->debugfs_dir);
 	client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
-	if (!client->debugfs_dir)
-		goto out;
 
 	client->monc.debugfs_file = debugfs_create_file("monc",
 						      0400,
 						      client->debugfs_dir,
 						      client,
 						      &monc_show_fops);
-	if (!client->monc.debugfs_file)
-		goto out;
 
 	client->osdc.debugfs_file = debugfs_create_file("osdc",
 						      0400,
 						      client->debugfs_dir,
 						      client,
 						      &osdc_show_fops);
-	if (!client->osdc.debugfs_file)
-		goto out;
 
 	client->debugfs_monmap = debugfs_create_file("monmap",
 					0400,
 					client->debugfs_dir,
 					client,
 					&monmap_show_fops);
-	if (!client->debugfs_monmap)
-		goto out;
 
 	client->debugfs_osdmap = debugfs_create_file("osdmap",
 					0400,
 					client->debugfs_dir,
 					client,
 					&osdmap_show_fops);
-	if (!client->debugfs_osdmap)
-		goto out;
 
 	client->debugfs_options = debugfs_create_file("client_options",
 					0400,
 					client->debugfs_dir,
 					client,
 					&client_options_show_fops);
-	if (!client->debugfs_options)
-		goto out;
-
-	return 0;
-
-out:
-	ceph_debugfs_client_cleanup(client);
-	return ret;
 }
 
 void ceph_debugfs_client_cleanup(struct ceph_client *client)
@@ -477,18 +454,16 @@
 
 #else  /* CONFIG_DEBUG_FS */
 
-int __init ceph_debugfs_init(void)
+void __init ceph_debugfs_init(void)
 {
-	return 0;
 }
 
 void ceph_debugfs_cleanup(void)
 {
 }
 
-int ceph_debugfs_client_init(struct ceph_client *client)
+void ceph_debugfs_client_init(struct ceph_client *client)
 {
-	return 0;
 }
 
 void ceph_debugfs_client_cleanup(struct ceph_client *client)

diff --git a/net/ceph/decode.c b/net/ceph/decode.c
new file mode 100644
index 0000000..eea5295
--- /dev/null
+++ b/net/ceph/decode.c

@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ceph/decode.h>
+
+static int
+ceph_decode_entity_addr_versioned(void **p, void *end,
+				  struct ceph_entity_addr *addr)
+{
+	int ret;
+	u8 struct_v;
+	u32 struct_len, addr_len;
+	void *struct_end;
+
+	ret = ceph_start_decoding(p, end, 1, "entity_addr_t", &struct_v,
+				  &struct_len);
+	if (ret)
+		goto bad;
+
+	ret = -EINVAL;
+	struct_end = *p + struct_len;
+
+	ceph_decode_copy_safe(p, end, &addr->type, sizeof(addr->type), bad);
+
+	ceph_decode_copy_safe(p, end, &addr->nonce, sizeof(addr->nonce), bad);
+
+	ceph_decode_32_safe(p, end, addr_len, bad);
+	if (addr_len > sizeof(addr->in_addr))
+		goto bad;
+
+	memset(&addr->in_addr, 0, sizeof(addr->in_addr));
+	if (addr_len) {
+		ceph_decode_copy_safe(p, end, &addr->in_addr, addr_len, bad);
+
+		addr->in_addr.ss_family =
+			le16_to_cpu((__force __le16)addr->in_addr.ss_family);
+	}
+
+	/* Advance past anything the client doesn't yet understand */
+	*p = struct_end;
+	ret = 0;
+bad:
+	return ret;
+}
+
+static int
+ceph_decode_entity_addr_legacy(void **p, void *end,
+			       struct ceph_entity_addr *addr)
+{
+	int ret = -EINVAL;
+
+	/* Skip rest of type field */
+	ceph_decode_skip_n(p, end, 3, bad);
+
+	/*
+	 * Clients that don't support ADDR2 always send TYPE_NONE, change it
+	 * to TYPE_LEGACY for forward compatibility.
+	 */
+	addr->type = CEPH_ENTITY_ADDR_TYPE_LEGACY;
+	ceph_decode_copy_safe(p, end, &addr->nonce, sizeof(addr->nonce), bad);
+	memset(&addr->in_addr, 0, sizeof(addr->in_addr));
+	ceph_decode_copy_safe(p, end, &addr->in_addr,
+			      sizeof(addr->in_addr), bad);
+	addr->in_addr.ss_family =
+			be16_to_cpu((__force __be16)addr->in_addr.ss_family);
+	ret = 0;
+bad:
+	return ret;
+}
+
+int
+ceph_decode_entity_addr(void **p, void *end, struct ceph_entity_addr *addr)
+{
+	u8 marker;
+
+	ceph_decode_8_safe(p, end, marker, bad);
+	if (marker == 1)
+		return ceph_decode_entity_addr_versioned(p, end, addr);
+	else if (marker == 0)
+		return ceph_decode_entity_addr_legacy(p, end, addr);
+bad:
+	return -EINVAL;
+}
+EXPORT_SYMBOL(ceph_decode_entity_addr);
+

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 9a1c27c..e4cb3db 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c

@@ -156,7 +156,6 @@
 /* Slab caches for frequently-allocated structures */
 
 static struct kmem_cache	*ceph_msg_cache;
-static struct kmem_cache	*ceph_msg_data_cache;
 
 /* static tag bytes (protocol control messages) */
 static char tag_msg = CEPH_MSGR_TAG_MSG;
@@ -187,30 +186,33 @@
 
 static struct page *zero_page;		/* used in certain error cases */
 
-const char *ceph_pr_addr(const struct sockaddr_storage *ss)
+const char *ceph_pr_addr(const struct ceph_entity_addr *addr)
 {
 	int i;
 	char *s;
-	struct sockaddr_in *in4 = (struct sockaddr_in *) ss;
-	struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss;
+	struct sockaddr_storage ss = addr->in_addr; /* align */
+	struct sockaddr_in *in4 = (struct sockaddr_in *)&ss;
+	struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)&ss;
 
 	i = atomic_inc_return(&addr_str_seq) & ADDR_STR_COUNT_MASK;
 	s = addr_str[i];
 
-	switch (ss->ss_family) {
+	switch (ss.ss_family) {
 	case AF_INET:
-		snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%hu", &in4->sin_addr,
+		snprintf(s, MAX_ADDR_STR_LEN, "(%d)%pI4:%hu",
+			 le32_to_cpu(addr->type), &in4->sin_addr,
 			 ntohs(in4->sin_port));
 		break;
 
 	case AF_INET6:
-		snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%hu", &in6->sin6_addr,
+		snprintf(s, MAX_ADDR_STR_LEN, "(%d)[%pI6c]:%hu",
+			 le32_to_cpu(addr->type), &in6->sin6_addr,
 			 ntohs(in6->sin6_port));
 		break;
 
 	default:
 		snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %hu)",
-			 ss->ss_family);
+			 ss.ss_family);
 	}
 
 	return s;
@@ -220,7 +222,7 @@
 static void encode_my_addr(struct ceph_messenger *msgr)
 {
 	memcpy(&msgr->my_enc_addr, &msgr->inst.addr, sizeof(msgr->my_enc_addr));
-	ceph_encode_addr(&msgr->my_enc_addr);
+	ceph_encode_banner_addr(&msgr->my_enc_addr);
 }
 
 /*
@@ -235,23 +237,11 @@
 	if (!ceph_msg_cache)
 		return -ENOMEM;
 
-	BUG_ON(ceph_msg_data_cache);
-	ceph_msg_data_cache = KMEM_CACHE(ceph_msg_data, 0);
-	if (ceph_msg_data_cache)
-		return 0;
-
-	kmem_cache_destroy(ceph_msg_cache);
-	ceph_msg_cache = NULL;
-
-	return -ENOMEM;
+	return 0;
 }
 
 static void ceph_msgr_slab_exit(void)
 {
-	BUG_ON(!ceph_msg_data_cache);
-	kmem_cache_destroy(ceph_msg_data_cache);
-	ceph_msg_data_cache = NULL;
-
 	BUG_ON(!ceph_msg_cache);
 	kmem_cache_destroy(ceph_msg_cache);
 	ceph_msg_cache = NULL;
@@ -462,7 +452,7 @@
  */
 static int ceph_tcp_connect(struct ceph_connection *con)
 {
-	struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
+	struct sockaddr_storage ss = con->peer_addr.in_addr; /* align */
 	struct socket *sock;
 	unsigned int noio_flag;
 	int ret;
@@ -471,7 +461,7 @@
 
 	/* sock_create_kern() allocates with GFP_KERNEL */
 	noio_flag = memalloc_noio_save();
-	ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family,
+	ret = sock_create_kern(read_pnet(&con->msgr->net), ss.ss_family,
 			       SOCK_STREAM, IPPROTO_TCP, &sock);
 	memalloc_noio_restore(noio_flag);
 	if (ret)
@@ -484,18 +474,18 @@
 
 	set_sock_callbacks(sock, con);
 
-	dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr));
+	dout("connect %s\n", ceph_pr_addr(&con->peer_addr));
 
 	con_sock_state_connecting(con);
-	ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
+	ret = sock->ops->connect(sock, (struct sockaddr *)&ss, sizeof(ss),
 				 O_NONBLOCK);
 	if (ret == -EINPROGRESS) {
 		dout("connect %s EINPROGRESS sk_state = %u\n",
-		     ceph_pr_addr(&con->peer_addr.in_addr),
+		     ceph_pr_addr(&con->peer_addr),
 		     sock->sk->sk_state);
 	} else if (ret < 0) {
 		pr_err("connect %s error %d\n",
-		       ceph_pr_addr(&con->peer_addr.in_addr), ret);
+		       ceph_pr_addr(&con->peer_addr), ret);
 		sock_release(sock);
 		return ret;
 	}
@@ -526,7 +516,7 @@
 	if (!buf)
 		msg.msg_flags |= MSG_TRUNC;
 
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, len);
+	iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, len);
 	r = sock_recvmsg(sock, &msg, msg.msg_flags);
 	if (r == -EAGAIN)
 		r = 0;
@@ -545,7 +535,7 @@
 	int r;
 
 	BUG_ON(page_offset + length > PAGE_SIZE);
-	iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, &bvec, 1, length);
+	iov_iter_bvec(&msg.msg_iter, READ, &bvec, 1, length);
 	r = sock_recvmsg(sock, &msg, msg.msg_flags);
 	if (r == -EAGAIN)
 		r = 0;
@@ -557,7 +547,7 @@
  * shortly.
  */
 static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
-		     size_t kvlen, size_t len, int more)
+			    size_t kvlen, size_t len, bool more)
 {
 	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
 	int r;
@@ -573,24 +563,15 @@
 	return r;
 }
 
-static int __ceph_tcp_sendpage(struct socket *sock, struct page *page,
-		     int offset, size_t size, bool more)
-{
-	int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR);
-	int ret;
-
-	ret = kernel_sendpage(sock, page, offset, size, flags);
-	if (ret == -EAGAIN)
-		ret = 0;
-
-	return ret;
-}
-
+/*
+ * @more: either or both of MSG_MORE and MSG_SENDPAGE_NOTLAST
+ */
 static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
-		     int offset, size_t size, bool more)
+			     int offset, size_t size, int more)
 {
-	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
-	struct bio_vec bvec;
+	ssize_t (*sendpage)(struct socket *sock, struct page *page,
+			    int offset, size_t size, int flags);
+	int flags = MSG_DONTWAIT | MSG_NOSIGNAL | more;
 	int ret;
 
 	/*
@@ -602,19 +583,11 @@
 	 * triggers one of hardened usercopy checks.
 	 */
 	if (page_count(page) >= 1 && !PageSlab(page))
-		return __ceph_tcp_sendpage(sock, page, offset, size, more);
-
-	bvec.bv_page = page;
-	bvec.bv_offset = offset;
-	bvec.bv_len = size;
-
-	if (more)
-		msg.msg_flags |= MSG_MORE;
+		sendpage = sock->ops->sendpage;
 	else
-		msg.msg_flags |= MSG_EOR;  /* superfluous, but what the hell */
+		sendpage = sock_no_sendpage;
 
-	iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC, &bvec, 1, size);
-	ret = sock_sendmsg(sock, &msg);
+	ret = sendpage(sock, page, offset, size, flags);
 	if (ret == -EAGAIN)
 		ret = 0;
 
@@ -699,8 +672,7 @@
 void ceph_con_close(struct ceph_connection *con)
 {
 	mutex_lock(&con->mutex);
-	dout("con_close %p peer %s\n", con,
-	     ceph_pr_addr(&con->peer_addr.in_addr));
+	dout("con_close %p peer %s\n", con, ceph_pr_addr(&con->peer_addr));
 	con->state = CON_STATE_CLOSED;
 
 	con_flag_clear(con, CON_FLAG_LOSSYTX);	/* so we retry next connect */
@@ -724,7 +696,7 @@
 		   struct ceph_entity_addr *addr)
 {
 	mutex_lock(&con->mutex);
-	dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
+	dout("con_open %p %s\n", con, ceph_pr_addr(addr));
 
 	WARN_ON(con->state != CON_STATE_CLOSED);
 	con->state = CON_STATE_PREOPEN;
@@ -870,6 +842,7 @@
 					size_t bytes)
 {
 	struct ceph_bio_iter *it = &cursor->bio_iter;
+	struct page *page = bio_iter_page(it->bio, it->iter);
 
 	BUG_ON(bytes > cursor->resid);
 	BUG_ON(bytes > bio_iter_len(it->bio, it->iter));
@@ -881,7 +854,8 @@
 		return false;   /* no more data */
 	}
 
-	if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done))
+	if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done &&
+		       page == bio_iter_page(it->bio, it->iter)))
 		return false;	/* more bytes to process in this segment */
 
 	if (!it->iter.bi_size) {
@@ -929,6 +903,7 @@
 					size_t bytes)
 {
 	struct bio_vec *bvecs = cursor->data->bvec_pos.bvecs;
+	struct page *page = bvec_iter_page(bvecs, cursor->bvec_iter);
 
 	BUG_ON(bytes > cursor->resid);
 	BUG_ON(bytes > bvec_iter_len(bvecs, cursor->bvec_iter));
@@ -940,7 +915,8 @@
 		return false;   /* no more data */
 	}
 
-	if (!bytes || cursor->bvec_iter.bi_bvec_done)
+	if (!bytes || (cursor->bvec_iter.bi_bvec_done &&
+		       page == bvec_iter_page(bvecs, cursor->bvec_iter)))
 		return false;	/* more bytes to process in this segment */
 
 	BUG_ON(cursor->last_piece);
@@ -1147,16 +1123,13 @@
 static void ceph_msg_data_cursor_init(struct ceph_msg *msg, size_t length)
 {
 	struct ceph_msg_data_cursor *cursor = &msg->cursor;
-	struct ceph_msg_data *data;
 
 	BUG_ON(!length);
 	BUG_ON(length > msg->data_length);
-	BUG_ON(list_empty(&msg->data));
+	BUG_ON(!msg->num_data_items);
 
-	cursor->data_head = &msg->data;
 	cursor->total_resid = length;
-	data = list_first_entry(&msg->data, struct ceph_msg_data, links);
-	cursor->data = data;
+	cursor->data = msg->data;
 
 	__ceph_msg_data_cursor_init(cursor);
 }
@@ -1237,8 +1210,7 @@
 
 	if (!cursor->resid && cursor->total_resid) {
 		WARN_ON(!cursor->last_piece);
-		BUG_ON(list_is_last(&cursor->data->links, cursor->data_head));
-		cursor->data = list_next_entry(cursor->data, links);
+		cursor->data++;
 		__ceph_msg_data_cursor_init(cursor);
 		new_piece = true;
 	}
@@ -1254,9 +1226,6 @@
 
 static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
 {
-	BUG_ON(!msg);
-	BUG_ON(!data_len);
-
 	/* Initialize data cursor */
 
 	ceph_msg_data_cursor_init(msg, (size_t)data_len);
@@ -1592,11 +1561,12 @@
 	struct ceph_msg *msg = con->out_msg;
 	struct ceph_msg_data_cursor *cursor = &msg->cursor;
 	bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
+	int more = MSG_MORE | MSG_SENDPAGE_NOTLAST;
 	u32 crc;
 
 	dout("%s %p msg %p\n", __func__, con, msg);
 
-	if (list_empty(&msg->data))
+	if (!msg->num_data_items)
 		return -EINVAL;
 
 	/*
@@ -1612,7 +1582,6 @@
 		struct page *page;
 		size_t page_offset;
 		size_t length;
-		bool last_piece;
 		int ret;
 
 		if (!cursor->resid) {
@@ -1620,10 +1589,11 @@
 			continue;
 		}
 
-		page = ceph_msg_data_next(cursor, &page_offset, &length,
-					  &last_piece);
-		ret = ceph_tcp_sendpage(con->sock, page, page_offset,
-					length, !last_piece);
+		page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
+		if (length == cursor->total_resid)
+			more = MSG_MORE;
+		ret = ceph_tcp_sendpage(con->sock, page, page_offset, length,
+					more);
 		if (ret <= 0) {
 			if (do_datacrc)
 				msg->footer.data_crc = cpu_to_le32(crc);
@@ -1653,13 +1623,16 @@
  */
 static int write_partial_skip(struct ceph_connection *con)
 {
+	int more = MSG_MORE | MSG_SENDPAGE_NOTLAST;
 	int ret;
 
 	dout("%s %p %d left\n", __func__, con, con->out_skip);
 	while (con->out_skip > 0) {
 		size_t size = min(con->out_skip, (int) PAGE_SIZE);
 
-		ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true);
+		if (size == con->out_skip)
+			more = MSG_MORE;
+		ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, more);
 		if (ret <= 0)
 			goto out;
 		con->out_skip -= ret;
@@ -1761,12 +1734,14 @@
 	ret = read_partial(con, end, size, &con->actual_peer_addr);
 	if (ret <= 0)
 		goto out;
+	ceph_decode_banner_addr(&con->actual_peer_addr);
 
 	size = sizeof (con->peer_addr_for_me);
 	end += size;
 	ret = read_partial(con, end, size, &con->peer_addr_for_me);
 	if (ret <= 0)
 		goto out;
+	ceph_decode_banner_addr(&con->peer_addr_for_me);
 
 out:
 	return ret;
@@ -1817,21 +1792,22 @@
 {
 	if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) {
 		pr_err("connect to %s got bad banner\n",
-		       ceph_pr_addr(&con->peer_addr.in_addr));
+		       ceph_pr_addr(&con->peer_addr));
 		con->error_msg = "protocol error, bad banner";
 		return -1;
 	}
 	return 0;
 }
 
-static bool addr_is_blank(struct sockaddr_storage *ss)
+static bool addr_is_blank(struct ceph_entity_addr *addr)
 {
-	struct in_addr *addr = &((struct sockaddr_in *)ss)->sin_addr;
-	struct in6_addr *addr6 = &((struct sockaddr_in6 *)ss)->sin6_addr;
+	struct sockaddr_storage ss = addr->in_addr; /* align */
+	struct in_addr *addr4 = &((struct sockaddr_in *)&ss)->sin_addr;
+	struct in6_addr *addr6 = &((struct sockaddr_in6 *)&ss)->sin6_addr;
 
-	switch (ss->ss_family) {
+	switch (ss.ss_family) {
 	case AF_INET:
-		return addr->s_addr == htonl(INADDR_ANY);
+		return addr4->s_addr == htonl(INADDR_ANY);
 	case AF_INET6:
 		return ipv6_addr_any(addr6);
 	default:
@@ -1839,25 +1815,25 @@
 	}
 }
 
-static int addr_port(struct sockaddr_storage *ss)
+static int addr_port(struct ceph_entity_addr *addr)
 {
-	switch (ss->ss_family) {
+	switch (get_unaligned(&addr->in_addr.ss_family)) {
 	case AF_INET:
-		return ntohs(((struct sockaddr_in *)ss)->sin_port);
+		return ntohs(get_unaligned(&((struct sockaddr_in *)&addr->in_addr)->sin_port));
 	case AF_INET6:
-		return ntohs(((struct sockaddr_in6 *)ss)->sin6_port);
+		return ntohs(get_unaligned(&((struct sockaddr_in6 *)&addr->in_addr)->sin6_port));
 	}
 	return 0;
 }
 
-static void addr_set_port(struct sockaddr_storage *ss, int p)
+static void addr_set_port(struct ceph_entity_addr *addr, int p)
 {
-	switch (ss->ss_family) {
+	switch (get_unaligned(&addr->in_addr.ss_family)) {
 	case AF_INET:
-		((struct sockaddr_in *)ss)->sin_port = htons(p);
+		put_unaligned(htons(p), &((struct sockaddr_in *)&addr->in_addr)->sin_port);
 		break;
 	case AF_INET6:
-		((struct sockaddr_in6 *)ss)->sin6_port = htons(p);
+		put_unaligned(htons(p), &((struct sockaddr_in6 *)&addr->in_addr)->sin6_port);
 		break;
 	}
 }
@@ -1865,21 +1841,18 @@
 /*
  * Unlike other *_pton function semantics, zero indicates success.
  */
-static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss,
+static int ceph_pton(const char *str, size_t len, struct ceph_entity_addr *addr,
 		char delim, const char **ipend)
 {
-	struct sockaddr_in *in4 = (struct sockaddr_in *) ss;
-	struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss;
+	memset(&addr->in_addr, 0, sizeof(addr->in_addr));
 
-	memset(ss, 0, sizeof(*ss));
-
-	if (in4_pton(str, len, (u8 *)&in4->sin_addr.s_addr, delim, ipend)) {
-		ss->ss_family = AF_INET;
+	if (in4_pton(str, len, (u8 *)&((struct sockaddr_in *)&addr->in_addr)->sin_addr.s_addr, delim, ipend)) {
+		put_unaligned(AF_INET, &addr->in_addr.ss_family);
 		return 0;
 	}
 
-	if (in6_pton(str, len, (u8 *)&in6->sin6_addr.s6_addr, delim, ipend)) {
-		ss->ss_family = AF_INET6;
+	if (in6_pton(str, len, (u8 *)&((struct sockaddr_in6 *)&addr->in_addr)->sin6_addr.s6_addr, delim, ipend)) {
+		put_unaligned(AF_INET6, &addr->in_addr.ss_family);
 		return 0;
 	}
 
@@ -1891,7 +1864,7 @@
  */
 #ifdef CONFIG_CEPH_LIB_USE_DNS_RESOLVER
 static int ceph_dns_resolve_name(const char *name, size_t namelen,
-		struct sockaddr_storage *ss, char delim, const char **ipend)
+		struct ceph_entity_addr *addr, char delim, const char **ipend)
 {
 	const char *end, *delim_p;
 	char *colon_p, *ip_addr = NULL;
@@ -1918,9 +1891,10 @@
 		return -EINVAL;
 
 	/* do dns_resolve upcall */
-	ip_len = dns_query(NULL, name, end - name, NULL, &ip_addr, NULL);
+	ip_len = dns_query(current->nsproxy->net_ns,
+			   NULL, name, end - name, NULL, &ip_addr, NULL, false);
 	if (ip_len > 0)
-		ret = ceph_pton(ip_addr, ip_len, ss, -1, NULL);
+		ret = ceph_pton(ip_addr, ip_len, addr, -1, NULL);
 	else
 		ret = -ESRCH;
 
@@ -1929,13 +1903,13 @@
 	*ipend = end;
 
 	pr_info("resolve '%.*s' (ret=%d): %s\n", (int)(end - name), name,
-			ret, ret ? "failed" : ceph_pr_addr(ss));
+			ret, ret ? "failed" : ceph_pr_addr(addr));
 
 	return ret;
 }
 #else
 static inline int ceph_dns_resolve_name(const char *name, size_t namelen,
-		struct sockaddr_storage *ss, char delim, const char **ipend)
+		struct ceph_entity_addr *addr, char delim, const char **ipend)
 {
 	return -EINVAL;
 }
@@ -1946,13 +1920,13 @@
  * then try to extract a hostname to resolve using userspace DNS upcall.
  */
 static int ceph_parse_server_name(const char *name, size_t namelen,
-			struct sockaddr_storage *ss, char delim, const char **ipend)
+		struct ceph_entity_addr *addr, char delim, const char **ipend)
 {
 	int ret;
 
-	ret = ceph_pton(name, namelen, ss, delim, ipend);
+	ret = ceph_pton(name, namelen, addr, delim, ipend);
 	if (ret)
-		ret = ceph_dns_resolve_name(name, namelen, ss, delim, ipend);
+		ret = ceph_dns_resolve_name(name, namelen, addr, delim, ipend);
 
 	return ret;
 }
@@ -1971,7 +1945,6 @@
 	dout("parse_ips on '%.*s'\n", (int)(end-c), c);
 	for (i = 0; i < max_count; i++) {
 		const char *ipend;
-		struct sockaddr_storage *ss = &addr[i].in_addr;
 		int port;
 		char delim = ',';
 
@@ -1980,7 +1953,7 @@
 			p++;
 		}
 
-		ret = ceph_parse_server_name(p, end - p, ss, delim, &ipend);
+		ret = ceph_parse_server_name(p, end - p, &addr[i], delim, &ipend);
 		if (ret)
 			goto bad;
 		ret = -EINVAL;
@@ -2011,9 +1984,10 @@
 			port = CEPH_MON_PORT;
 		}
 
-		addr_set_port(ss, port);
+		addr_set_port(&addr[i], port);
+		addr[i].type = CEPH_ENTITY_ADDR_TYPE_LEGACY;
 
-		dout("parse_ips got %s\n", ceph_pr_addr(ss));
+		dout("parse_ips got %s\n", ceph_pr_addr(&addr[i]));
 
 		if (p == end)
 			break;
@@ -2042,9 +2016,6 @@
 	if (verify_hello(con) < 0)
 		return -1;
 
-	ceph_decode_addr(&con->actual_peer_addr);
-	ceph_decode_addr(&con->peer_addr_for_me);
-
 	/*
 	 * Make sure the other end is who we wanted.  note that the other
 	 * end may not yet know their ip address, so if it's 0.0.0.0, give
@@ -2052,12 +2023,12 @@
 	 */
 	if (memcmp(&con->peer_addr, &con->actual_peer_addr,
 		   sizeof(con->peer_addr)) != 0 &&
-	    !(addr_is_blank(&con->actual_peer_addr.in_addr) &&
+	    !(addr_is_blank(&con->actual_peer_addr) &&
 	      con->actual_peer_addr.nonce == con->peer_addr.nonce)) {
 		pr_warn("wrong peer, want %s/%d, got %s/%d\n",
-			ceph_pr_addr(&con->peer_addr.in_addr),
+			ceph_pr_addr(&con->peer_addr),
 			(int)le32_to_cpu(con->peer_addr.nonce),
-			ceph_pr_addr(&con->actual_peer_addr.in_addr),
+			ceph_pr_addr(&con->actual_peer_addr),
 			(int)le32_to_cpu(con->actual_peer_addr.nonce));
 		con->error_msg = "wrong peer at address";
 		return -1;
@@ -2066,16 +2037,16 @@
 	/*
 	 * did we learn our address?
 	 */
-	if (addr_is_blank(&con->msgr->inst.addr.in_addr)) {
-		int port = addr_port(&con->msgr->inst.addr.in_addr);
+	if (addr_is_blank(&con->msgr->inst.addr)) {
+		int port = addr_port(&con->msgr->inst.addr);
 
 		memcpy(&con->msgr->inst.addr.in_addr,
 		       &con->peer_addr_for_me.in_addr,
 		       sizeof(con->peer_addr_for_me.in_addr));
-		addr_set_port(&con->msgr->inst.addr.in_addr, port);
+		addr_set_port(&con->msgr->inst.addr, port);
 		encode_my_addr(con->msgr);
 		dout("process_banner learned my addr is %s\n",
-		     ceph_pr_addr(&con->msgr->inst.addr.in_addr));
+		     ceph_pr_addr(&con->msgr->inst.addr));
 	}
 
 	return 0;
@@ -2091,6 +2062,8 @@
 	dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
 
 	if (con->auth) {
+		int len = le32_to_cpu(con->in_reply.authorizer_len);
+
 		/*
 		 * Any connection that defines ->get_authorizer()
 		 * should also define ->add_authorizer_challenge() and
@@ -2100,8 +2073,7 @@
 		 */
 		if (con->in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) {
 			ret = con->ops->add_authorizer_challenge(
-				    con, con->auth->authorizer_reply_buf,
-				    le32_to_cpu(con->in_reply.authorizer_len));
+				    con, con->auth->authorizer_reply_buf, len);
 			if (ret < 0)
 				return ret;
 
@@ -2111,10 +2083,12 @@
 			return 0;
 		}
 
-		ret = con->ops->verify_authorizer_reply(con);
-		if (ret < 0) {
-			con->error_msg = "bad authorize reply";
-			return ret;
+		if (len) {
+			ret = con->ops->verify_authorizer_reply(con);
+			if (ret < 0) {
+				con->error_msg = "bad authorize reply";
+				return ret;
+			}
 		}
 	}
 
@@ -2123,7 +2097,7 @@
 		pr_err("%s%lld %s feature set mismatch,"
 		       " my %llx < server's %llx, missing %llx\n",
 		       ENTITY_NAME(con->peer_name),
-		       ceph_pr_addr(&con->peer_addr.in_addr),
+		       ceph_pr_addr(&con->peer_addr),
 		       sup_feat, server_feat, server_feat & ~sup_feat);
 		con->error_msg = "missing required protocol features";
 		reset_connection(con);
@@ -2133,7 +2107,7 @@
 		pr_err("%s%lld %s protocol version mismatch,"
 		       " my %d != server's %d\n",
 		       ENTITY_NAME(con->peer_name),
-		       ceph_pr_addr(&con->peer_addr.in_addr),
+		       ceph_pr_addr(&con->peer_addr),
 		       le32_to_cpu(con->out_connect.protocol_version),
 		       le32_to_cpu(con->in_reply.protocol_version));
 		con->error_msg = "protocol version mismatch";
@@ -2167,7 +2141,7 @@
 		     le32_to_cpu(con->in_reply.connect_seq));
 		pr_err("%s%lld %s connection reset\n",
 		       ENTITY_NAME(con->peer_name),
-		       ceph_pr_addr(&con->peer_addr.in_addr));
+		       ceph_pr_addr(&con->peer_addr));
 		reset_connection(con);
 		con_out_kvec_reset(con);
 		ret = prepare_write_connect(con);
@@ -2224,7 +2198,7 @@
 			pr_err("%s%lld %s protocol feature mismatch,"
 			       " my required %llx > server's %llx, need %llx\n",
 			       ENTITY_NAME(con->peer_name),
-			       ceph_pr_addr(&con->peer_addr.in_addr),
+			       ceph_pr_addr(&con->peer_addr),
 			       req_feat, server_feat, req_feat & ~server_feat);
 			con->error_msg = "missing required protocol features";
 			reset_connection(con);
@@ -2353,8 +2327,7 @@
 	u32 crc = 0;
 	int ret;
 
-	BUG_ON(!msg);
-	if (list_empty(&msg->data))
+	if (!msg->num_data_items)
 		return -EIO;
 
 	if (do_datacrc)
@@ -2432,7 +2405,7 @@
 	if ((s64)seq - (s64)con->in_seq < 1) {
 		pr_info("skipping %s%lld %s seq %lld expected %lld\n",
 			ENTITY_NAME(con->peer_name),
-			ceph_pr_addr(&con->peer_addr.in_addr),
+			ceph_pr_addr(&con->peer_addr),
 			seq, con->in_seq + 1);
 		con->in_base_pos = -front_len - middle_len - data_len -
 			sizeof_footer(con);
@@ -3011,10 +2984,10 @@
 static void con_fault(struct ceph_connection *con)
 {
 	dout("fault %p state %lu to peer %s\n",
-	     con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
+	     con, con->state, ceph_pr_addr(&con->peer_addr));
 
 	pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
-		ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
+		ceph_pr_addr(&con->peer_addr), con->error_msg);
 	con->error_msg = NULL;
 
 	WARN_ON(con->state != CON_STATE_CONNECTING &&
@@ -3058,6 +3031,12 @@
 }
 
 
+void ceph_messenger_reset_nonce(struct ceph_messenger *msgr)
+{
+	u32 nonce = le32_to_cpu(msgr->inst.addr.nonce) + 1000000;
+	msgr->inst.addr.nonce = cpu_to_le32(nonce);
+	encode_my_addr(msgr);
+}
 
 /*
  * initialize a new messenger instance
@@ -3240,9 +3219,10 @@
 	dout("con_keepalive %p\n", con);
 	mutex_lock(&con->mutex);
 	clear_standby(con);
+	con_flag_set(con, CON_FLAG_KEEPALIVE_PENDING);
 	mutex_unlock(&con->mutex);
-	if (con_flag_test_and_set(con, CON_FLAG_KEEPALIVE_PENDING) == 0 &&
-	    con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0)
+
+	if (con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0)
 		queue_con(con);
 }
 EXPORT_SYMBOL(ceph_con_keepalive);
@@ -3262,32 +3242,16 @@
 	return false;
 }
 
-static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type)
+static struct ceph_msg_data *ceph_msg_data_add(struct ceph_msg *msg)
 {
-	struct ceph_msg_data *data;
-
-	if (WARN_ON(!ceph_msg_data_type_valid(type)))
-		return NULL;
-
-	data = kmem_cache_zalloc(ceph_msg_data_cache, GFP_NOFS);
-	if (!data)
-		return NULL;
-
-	data->type = type;
-	INIT_LIST_HEAD(&data->links);
-
-	return data;
+	BUG_ON(msg->num_data_items >= msg->max_data_items);
+	return &msg->data[msg->num_data_items++];
 }
 
 static void ceph_msg_data_destroy(struct ceph_msg_data *data)
 {
-	if (!data)
-		return;
-
-	WARN_ON(!list_empty(&data->links));
 	if (data->type == CEPH_MSG_DATA_PAGELIST)
 		ceph_pagelist_release(data->pagelist);
-	kmem_cache_free(ceph_msg_data_cache, data);
 }
 
 void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
@@ -3298,13 +3262,12 @@
 	BUG_ON(!pages);
 	BUG_ON(!length);
 
-	data = ceph_msg_data_create(CEPH_MSG_DATA_PAGES);
-	BUG_ON(!data);
+	data = ceph_msg_data_add(msg);
+	data->type = CEPH_MSG_DATA_PAGES;
 	data->pages = pages;
 	data->length = length;
 	data->alignment = alignment & ~PAGE_MASK;
 
-	list_add_tail(&data->links, &msg->data);
 	msg->data_length += length;
 }
 EXPORT_SYMBOL(ceph_msg_data_add_pages);
@@ -3317,11 +3280,11 @@
 	BUG_ON(!pagelist);
 	BUG_ON(!pagelist->length);
 
-	data = ceph_msg_data_create(CEPH_MSG_DATA_PAGELIST);
-	BUG_ON(!data);
+	data = ceph_msg_data_add(msg);
+	data->type = CEPH_MSG_DATA_PAGELIST;
+	refcount_inc(&pagelist->refcnt);
 	data->pagelist = pagelist;
 
-	list_add_tail(&data->links, &msg->data);
 	msg->data_length += pagelist->length;
 }
 EXPORT_SYMBOL(ceph_msg_data_add_pagelist);
@@ -3332,12 +3295,11 @@
 {
 	struct ceph_msg_data *data;
 
-	data = ceph_msg_data_create(CEPH_MSG_DATA_BIO);
-	BUG_ON(!data);
+	data = ceph_msg_data_add(msg);
+	data->type = CEPH_MSG_DATA_BIO;
 	data->bio_pos = *bio_pos;
 	data->bio_length = length;
 
-	list_add_tail(&data->links, &msg->data);
 	msg->data_length += length;
 }
 EXPORT_SYMBOL(ceph_msg_data_add_bio);
@@ -3348,11 +3310,10 @@
 {
 	struct ceph_msg_data *data;
 
-	data = ceph_msg_data_create(CEPH_MSG_DATA_BVECS);
-	BUG_ON(!data);
+	data = ceph_msg_data_add(msg);
+	data->type = CEPH_MSG_DATA_BVECS;
 	data->bvec_pos = *bvec_pos;
 
-	list_add_tail(&data->links, &msg->data);
 	msg->data_length += bvec_pos->iter.bi_size;
 }
 EXPORT_SYMBOL(ceph_msg_data_add_bvecs);
@@ -3361,8 +3322,8 @@
  * construct a new message with given type, size
  * the new msg has a ref count of 1.
  */
-struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
-			      bool can_fail)
+struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items,
+			       gfp_t flags, bool can_fail)
 {
 	struct ceph_msg *m;
 
@@ -3376,7 +3337,6 @@
 
 	INIT_LIST_HEAD(&m->list_head);
 	kref_init(&m->kref);
-	INIT_LIST_HEAD(&m->data);
 
 	/* front */
 	if (front_len) {
@@ -3391,6 +3351,15 @@
 	}
 	m->front_alloc_len = m->front.iov_len = front_len;
 
+	if (max_data_items) {
+		m->data = kmalloc_array(max_data_items, sizeof(*m->data),
+					flags);
+		if (!m->data)
+			goto out2;
+
+		m->max_data_items = max_data_items;
+	}
+
 	dout("ceph_msg_new %p front %d\n", m, front_len);
 	return m;
 
@@ -3407,6 +3376,13 @@
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(ceph_msg_new2);
+
+struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
+			      bool can_fail)
+{
+	return ceph_msg_new2(type, front_len, 0, flags, can_fail);
+}
 EXPORT_SYMBOL(ceph_msg_new);
 
 /*
@@ -3502,13 +3478,14 @@
 {
 	dout("%s %p\n", __func__, m);
 	kvfree(m->front.iov_base);
+	kfree(m->data);
 	kmem_cache_free(ceph_msg_cache, m);
 }
 
 static void ceph_msg_release(struct kref *kref)
 {
 	struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
-	struct ceph_msg_data *data, *next;
+	int i;
 
 	dout("%s %p\n", __func__, m);
 	WARN_ON(!list_empty(&m->list_head));
@@ -3521,11 +3498,8 @@
 		m->middle = NULL;
 	}
 
-	list_for_each_entry_safe(data, next, &m->data, links) {
-		list_del_init(&data->links);
-		ceph_msg_data_destroy(data);
-	}
-	m->data_length = 0;
+	for (i = 0; i < m->num_data_items; i++)
+		ceph_msg_data_destroy(&m->data[i]);
 
 	if (m->pool)
 		ceph_msgpool_put(m->pool, m);

diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 18deb3d..7256c40 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c

@@ -39,7 +39,7 @@
 /*
  * Decode a monmap blob (e.g., during mount).
  */
-struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
+static struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
 {
 	struct ceph_monmap *m = NULL;
 	int i, err = -EINVAL;
@@ -50,7 +50,7 @@
 	ceph_decode_32_safe(&p, end, len, bad);
 	ceph_decode_need(&p, end, len, bad);
 
-	dout("monmap_decode %p %p len %d\n", p, end, (int)(end-p));
+	dout("monmap_decode %p %p len %d (%d)\n", p, end, len, (int)(end-p));
 	p += sizeof(u16);  /* skip version */
 
 	ceph_decode_need(&p, end, sizeof(fsid) + 2*sizeof(u32), bad);
@@ -58,7 +58,6 @@
 	epoch = ceph_decode_32(&p);
 
 	num_mon = ceph_decode_32(&p);
-	ceph_decode_need(&p, end, num_mon*sizeof(m->mon_inst[0]), bad);
 
 	if (num_mon > CEPH_MAX_MON)
 		goto bad;
@@ -68,17 +67,22 @@
 	m->fsid = fsid;
 	m->epoch = epoch;
 	m->num_mon = num_mon;
-	ceph_decode_copy(&p, m->mon_inst, num_mon*sizeof(m->mon_inst[0]));
-	for (i = 0; i < num_mon; i++)
-		ceph_decode_addr(&m->mon_inst[i].addr);
+	for (i = 0; i < num_mon; ++i) {
+		struct ceph_entity_inst *inst = &m->mon_inst[i];
 
+		/* copy name portion */
+		ceph_decode_copy_safe(&p, end, &inst->name,
+					sizeof(inst->name), bad);
+		err = ceph_decode_entity_addr(&p, end, &inst->addr);
+		if (err)
+			goto bad;
+	}
 	dout("monmap_decode epoch %d, num_mon %d\n", m->epoch,
 	     m->num_mon);
 	for (i = 0; i < m->num_mon; i++)
 		dout("monmap_decode  mon%d is %s\n", i,
-		     ceph_pr_addr(&m->mon_inst[i].addr.in_addr));
+		     ceph_pr_addr(&m->mon_inst[i].addr));
 	return m;
-
 bad:
 	dout("monmap_decode failed with %d\n", err);
 	kfree(m);
@@ -203,12 +207,19 @@
 {
 	if (!monc->hunting)
 		pr_info("mon%d %s session lost, hunting for new mon\n",
-		    monc->cur_mon, ceph_pr_addr(&monc->con.peer_addr.in_addr));
+		    monc->cur_mon, ceph_pr_addr(&monc->con.peer_addr));
 
 	__close_session(monc);
 	__open_session(monc);
 }
 
+void ceph_monc_reopen_session(struct ceph_mon_client *monc)
+{
+	mutex_lock(&monc->mutex);
+	reopen_session(monc);
+	mutex_unlock(&monc->mutex);
+}
+
 static void un_backoff(struct ceph_mon_client *monc)
 {
 	monc->hunt_mult /= 2; /* reduce by 50% */
@@ -469,6 +480,7 @@
 	if (IS_ERR(monmap)) {
 		pr_err("problem decoding monmap, %d\n",
 		       (int)PTR_ERR(monmap));
+		ceph_msg_dump(msg);
 		goto out;
 	}
 
@@ -922,6 +934,15 @@
 	mutex_unlock(&monc->mutex);
 
 	ret = wait_generic_request(req);
+	if (!ret)
+		/*
+		 * Make sure we have the osdmap that includes the blacklist
+		 * entry.  This is needed to ensure that the OSDs pick up the
+		 * new blacklist before processing any future requests from
+		 * this client.
+		 */
+		ret = ceph_wait_for_latest_osdmap(monc->client, 0);
+
 out:
 	put_generic_request(req);
 	return ret;
@@ -1169,7 +1190,7 @@
 		__resend_generic_request(monc);
 
 		pr_info("mon%d %s session established\n", monc->cur_mon,
-			ceph_pr_addr(&monc->con.peer_addr.in_addr));
+			ceph_pr_addr(&monc->con.peer_addr));
 	}
 
 out:

diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c
index 7257153..e3ecb80 100644
--- a/net/ceph/msgpool.c
+++ b/net/ceph/msgpool.c

@@ -14,7 +14,8 @@
 	struct ceph_msgpool *pool = arg;
 	struct ceph_msg *msg;
 
-	msg = ceph_msg_new(pool->type, pool->front_len, gfp_mask, true);
+	msg = ceph_msg_new2(pool->type, pool->front_len, pool->max_data_items,
+			    gfp_mask, true);
 	if (!msg) {
 		dout("msgpool_alloc %s failed\n", pool->name);
 	} else {
@@ -35,11 +36,13 @@
 }
 
 int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
-		      int front_len, int size, bool blocking, const char *name)
+		      int front_len, int max_data_items, int size,
+		      const char *name)
 {
 	dout("msgpool %s init\n", name);
 	pool->type = type;
 	pool->front_len = front_len;
+	pool->max_data_items = max_data_items;
 	pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool);
 	if (!pool->pool)
 		return -ENOMEM;
@@ -53,18 +56,21 @@
 	mempool_destroy(pool->pool);
 }
 
-struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
-				  int front_len)
+struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len,
+				  int max_data_items)
 {
 	struct ceph_msg *msg;
 
-	if (front_len > pool->front_len) {
-		dout("msgpool_get %s need front %d, pool size is %d\n",
-		       pool->name, front_len, pool->front_len);
-		WARN_ON(1);
+	if (front_len > pool->front_len ||
+	    max_data_items > pool->max_data_items) {
+		pr_warn_ratelimited("%s need %d/%d, pool %s has %d/%d\n",
+		    __func__, front_len, max_data_items, pool->name,
+		    pool->front_len, pool->max_data_items);
+		WARN_ON_ONCE(1);
 
 		/* try to alloc a fresh message */
-		return ceph_msg_new(pool->type, front_len, GFP_NOFS, false);
+		return ceph_msg_new2(pool->type, front_len, max_data_items,
+				     GFP_NOFS, false);
 	}
 
 	msg = mempool_alloc(pool->pool, GFP_NOFS);
@@ -80,6 +86,9 @@
 	msg->front.iov_len = pool->front_len;
 	msg->hdr.front_len = cpu_to_le32(pool->front_len);
 
+	msg->data_length = 0;
+	msg->num_data_items = 0;
+
 	kref_init(&msg->kref);  /* retake single ref */
 	mempool_free(msg, pool->pool);
 }

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 60934bd..ba45b07 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c

@@ -126,6 +126,9 @@
 	osd_data->type = CEPH_OSD_DATA_TYPE_NONE;
 }
 
+/*
+ * Consumes @pages if @own_pages is true.
+ */
 static void ceph_osd_data_pages_init(struct ceph_osd_data *osd_data,
 			struct page **pages, u64 length, u32 alignment,
 			bool pages_from_pool, bool own_pages)
@@ -138,6 +141,9 @@
 	osd_data->own_pages = own_pages;
 }
 
+/*
+ * Consumes a ref on @pagelist.
+ */
 static void ceph_osd_data_pagelist_init(struct ceph_osd_data *osd_data,
 			struct ceph_pagelist *pagelist)
 {
@@ -165,14 +171,6 @@
 	osd_data->num_bvecs = num_bvecs;
 }
 
-#define osd_req_op_data(oreq, whch, typ, fld)				\
-({									\
-	struct ceph_osd_request *__oreq = (oreq);			\
-	unsigned int __whch = (whch);					\
-	BUG_ON(__whch >= __oreq->r_num_ops);				\
-	&__oreq->r_ops[__whch].typ.fld;					\
-})
-
 static struct ceph_osd_data *
 osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which)
 {
@@ -362,6 +360,8 @@
 		num_pages = calc_pages_for((u64)osd_data->alignment,
 						(u64)osd_data->length);
 		ceph_release_page_vector(osd_data->pages, num_pages);
+	} else if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGELIST) {
+		ceph_pagelist_release(osd_data->pagelist);
 	}
 	ceph_osd_data_init(osd_data);
 }
@@ -402,6 +402,9 @@
 	case CEPH_OSD_OP_LIST_WATCHERS:
 		ceph_osd_data_release(&op->list_watchers.response_data);
 		break;
+	case CEPH_OSD_OP_COPY_FROM:
+		ceph_osd_data_release(&op->copy_from.osd_data);
+		break;
 	default:
 		break;
 	}
@@ -467,7 +470,7 @@
 {
 	WARN_ON(!RB_EMPTY_NODE(&req->r_node));
 	WARN_ON(!RB_EMPTY_NODE(&req->r_mc_node));
-	WARN_ON(!list_empty(&req->r_unsafe_item));
+	WARN_ON(!list_empty(&req->r_private_item));
 	WARN_ON(req->r_osd);
 }
 
@@ -527,7 +530,7 @@
 	init_completion(&req->r_completion);
 	RB_CLEAR_NODE(&req->r_node);
 	RB_CLEAR_NODE(&req->r_mc_node);
-	INIT_LIST_HEAD(&req->r_unsafe_item);
+	INIT_LIST_HEAD(&req->r_private_item);
 
 	target_init(&req->r_t);
 }
@@ -606,12 +609,15 @@
 	return 8 + 4 + 4 + 4 + (oloc->pool_ns ? oloc->pool_ns->len : 0);
 }
 
-int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
+static int __ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp,
+				      int num_request_data_items,
+				      int num_reply_data_items)
 {
 	struct ceph_osd_client *osdc = req->r_osdc;
 	struct ceph_msg *msg;
 	int msg_size;
 
+	WARN_ON(req->r_request || req->r_reply);
 	WARN_ON(ceph_oid_empty(&req->r_base_oid));
 	WARN_ON(ceph_oloc_empty(&req->r_base_oloc));
 
@@ -633,9 +639,11 @@
 	msg_size += 4 + 8; /* retry_attempt, features */
 
 	if (req->r_mempool)
-		msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
+		msg = ceph_msgpool_get(&osdc->msgpool_op, msg_size,
+				       num_request_data_items);
 	else
-		msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp, true);
+		msg = ceph_msg_new2(CEPH_MSG_OSD_OP, msg_size,
+				    num_request_data_items, gfp, true);
 	if (!msg)
 		return -ENOMEM;
 
@@ -648,9 +656,11 @@
 	msg_size += req->r_num_ops * sizeof(struct ceph_osd_op);
 
 	if (req->r_mempool)
-		msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0);
+		msg = ceph_msgpool_get(&osdc->msgpool_op_reply, msg_size,
+				       num_reply_data_items);
 	else
-		msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, msg_size, gfp, true);
+		msg = ceph_msg_new2(CEPH_MSG_OSD_OPREPLY, msg_size,
+				    num_reply_data_items, gfp, true);
 	if (!msg)
 		return -ENOMEM;
 
@@ -658,7 +668,6 @@
 
 	return 0;
 }
-EXPORT_SYMBOL(ceph_osdc_alloc_messages);
 
 static bool osd_req_opcode_valid(u16 opcode)
 {
@@ -671,6 +680,65 @@
 	}
 }
 
+static void get_num_data_items(struct ceph_osd_request *req,
+			       int *num_request_data_items,
+			       int *num_reply_data_items)
+{
+	struct ceph_osd_req_op *op;
+
+	*num_request_data_items = 0;
+	*num_reply_data_items = 0;
+
+	for (op = req->r_ops; op != &req->r_ops[req->r_num_ops]; op++) {
+		switch (op->op) {
+		/* request */
+		case CEPH_OSD_OP_WRITE:
+		case CEPH_OSD_OP_WRITEFULL:
+		case CEPH_OSD_OP_SETXATTR:
+		case CEPH_OSD_OP_CMPXATTR:
+		case CEPH_OSD_OP_NOTIFY_ACK:
+		case CEPH_OSD_OP_COPY_FROM:
+			*num_request_data_items += 1;
+			break;
+
+		/* reply */
+		case CEPH_OSD_OP_STAT:
+		case CEPH_OSD_OP_READ:
+		case CEPH_OSD_OP_LIST_WATCHERS:
+			*num_reply_data_items += 1;
+			break;
+
+		/* both */
+		case CEPH_OSD_OP_NOTIFY:
+			*num_request_data_items += 1;
+			*num_reply_data_items += 1;
+			break;
+		case CEPH_OSD_OP_CALL:
+			*num_request_data_items += 2;
+			*num_reply_data_items += 1;
+			break;
+
+		default:
+			WARN_ON(!osd_req_opcode_valid(op->op));
+			break;
+		}
+	}
+}
+
+/*
+ * oid, oloc and OSD op opcode(s) must be filled in before this function
+ * is called.
+ */
+int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
+{
+	int num_request_data_items, num_reply_data_items;
+
+	get_num_data_items(req, &num_request_data_items, &num_reply_data_items);
+	return __ceph_osdc_alloc_messages(req, gfp, num_request_data_items,
+					  num_reply_data_items);
+}
+EXPORT_SYMBOL(ceph_osdc_alloc_messages);
+
 /*
  * This is an osd op init function for opcodes that have no data or
  * other information associated with them.  It also serves as a
@@ -767,40 +835,45 @@
 EXPORT_SYMBOL(osd_req_op_extent_dup_last);
 
 int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
-			u16 opcode, const char *class, const char *method)
+			const char *class, const char *method)
 {
-	struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
-						      opcode, 0);
+	struct ceph_osd_req_op *op;
 	struct ceph_pagelist *pagelist;
 	size_t payload_len = 0;
 	size_t size;
+	int ret;
 
-	BUG_ON(opcode != CEPH_OSD_OP_CALL);
+	op = _osd_req_op_init(osd_req, which, CEPH_OSD_OP_CALL, 0);
 
-	pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS);
+	pagelist = ceph_pagelist_alloc(GFP_NOFS);
 	if (!pagelist)
 		return -ENOMEM;
 
-	ceph_pagelist_init(pagelist);
-
 	op->cls.class_name = class;
 	size = strlen(class);
 	BUG_ON(size > (size_t) U8_MAX);
 	op->cls.class_len = size;
-	ceph_pagelist_append(pagelist, class, size);
+	ret = ceph_pagelist_append(pagelist, class, size);
+	if (ret)
+		goto err_pagelist_free;
 	payload_len += size;
 
 	op->cls.method_name = method;
 	size = strlen(method);
 	BUG_ON(size > (size_t) U8_MAX);
 	op->cls.method_len = size;
-	ceph_pagelist_append(pagelist, method, size);
+	ret = ceph_pagelist_append(pagelist, method, size);
+	if (ret)
+		goto err_pagelist_free;
 	payload_len += size;
 
 	osd_req_op_cls_request_info_pagelist(osd_req, which, pagelist);
-
 	op->indata_len = payload_len;
 	return 0;
+
+err_pagelist_free:
+	ceph_pagelist_release(pagelist);
+	return ret;
 }
 EXPORT_SYMBOL(osd_req_op_cls_init);
 
@@ -812,21 +885,24 @@
 						      opcode, 0);
 	struct ceph_pagelist *pagelist;
 	size_t payload_len;
+	int ret;
 
 	BUG_ON(opcode != CEPH_OSD_OP_SETXATTR && opcode != CEPH_OSD_OP_CMPXATTR);
 
-	pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
+	pagelist = ceph_pagelist_alloc(GFP_NOFS);
 	if (!pagelist)
 		return -ENOMEM;
 
-	ceph_pagelist_init(pagelist);
-
 	payload_len = strlen(name);
 	op->xattr.name_len = payload_len;
-	ceph_pagelist_append(pagelist, name, payload_len);
+	ret = ceph_pagelist_append(pagelist, name, payload_len);
+	if (ret)
+		goto err_pagelist_free;
 
 	op->xattr.value_len = size;
-	ceph_pagelist_append(pagelist, value, size);
+	ret = ceph_pagelist_append(pagelist, value, size);
+	if (ret)
+		goto err_pagelist_free;
 	payload_len += size;
 
 	op->xattr.cmp_op = cmp_op;
@@ -835,6 +911,10 @@
 	ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist);
 	op->indata_len = payload_len;
 	return 0;
+
+err_pagelist_free:
+	ceph_pagelist_release(pagelist);
+	return ret;
 }
 EXPORT_SYMBOL(osd_req_op_xattr_init);
 
@@ -900,12 +980,6 @@
 static u32 osd_req_encode_op(struct ceph_osd_op *dst,
 			     const struct ceph_osd_req_op *src)
 {
-	if (WARN_ON(!osd_req_opcode_valid(src->op))) {
-		pr_err("unrecognized osd opcode %d\n", src->op);
-
-		return 0;
-	}
-
 	switch (src->op) {
 	case CEPH_OSD_OP_STAT:
 		break;
@@ -955,6 +1029,14 @@
 	case CEPH_OSD_OP_CREATE:
 	case CEPH_OSD_OP_DELETE:
 		break;
+	case CEPH_OSD_OP_COPY_FROM:
+		dst->copy_from.snapid = cpu_to_le64(src->copy_from.snapid);
+		dst->copy_from.src_version =
+			cpu_to_le64(src->copy_from.src_version);
+		dst->copy_from.flags = src->copy_from.flags;
+		dst->copy_from.src_fadvise_flags =
+			cpu_to_le32(src->copy_from.src_fadvise_flags);
+		break;
 	default:
 		pr_err("unsupported osd opcode %s\n",
 			ceph_osd_op_name(src->op));
@@ -1038,7 +1120,15 @@
 	if (flags & CEPH_OSD_FLAG_WRITE)
 		req->r_data_offset = off;
 
-	r = ceph_osdc_alloc_messages(req, GFP_NOFS);
+	if (num_ops > 1)
+		/*
+		 * This is a special case for ceph_writepages_start(), but it
+		 * also covers ceph_uninline_data().  If more multi-op request
+		 * use cases emerge, we will need a separate helper.
+		 */
+		r = __ceph_osdc_alloc_messages(req, GFP_NOFS, num_ops, 0);
+	else
+		r = ceph_osdc_alloc_messages(req, GFP_NOFS);
 	if (r)
 		goto fail;
 
@@ -1415,7 +1505,6 @@
 
 static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 					   struct ceph_osd_request_target *t,
-					   struct ceph_connection *con,
 					   bool any_change)
 {
 	struct ceph_pg_pool_info *pi;
@@ -1423,7 +1512,7 @@
 	struct ceph_osds up, acting;
 	bool force_resend = false;
 	bool unpaused = false;
-	bool legacy_change;
+	bool legacy_change = false;
 	bool split = false;
 	bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE);
 	bool recovery_deletes = ceph_osdmap_flag(osdc,
@@ -1511,15 +1600,14 @@
 		t->osd = acting.primary;
 	}
 
-	if (unpaused || legacy_change || force_resend ||
-	    (split && con && CEPH_HAVE_FEATURE(con->peer_features,
-					       RESEND_ON_SPLIT)))
+	if (unpaused || legacy_change || force_resend || split)
 		ct_res = CALC_TARGET_NEED_RESEND;
 	else
 		ct_res = CALC_TARGET_NO_ACTION;
 
 out:
-	dout("%s t %p -> ct_res %d osd %d\n", __func__, t, ct_res, t->osd);
+	dout("%s t %p -> %d%d%d%d ct_res %d osd%d\n", __func__, t, unpaused,
+	     legacy_change, force_resend, split, ct_res, t->osd);
 	return ct_res;
 }
 
@@ -1845,48 +1933,55 @@
 	return true;
 }
 
-static void setup_request_data(struct ceph_osd_request *req,
-			       struct ceph_msg *msg)
+/*
+ * Keep get_num_data_items() in sync with this function.
+ */
+static void setup_request_data(struct ceph_osd_request *req)
 {
-	u32 data_len = 0;
-	int i;
+	struct ceph_msg *request_msg = req->r_request;
+	struct ceph_msg *reply_msg = req->r_reply;
+	struct ceph_osd_req_op *op;
 
-	if (!list_empty(&msg->data))
+	if (req->r_request->num_data_items || req->r_reply->num_data_items)
 		return;
 
-	WARN_ON(msg->data_length);
-	for (i = 0; i < req->r_num_ops; i++) {
-		struct ceph_osd_req_op *op = &req->r_ops[i];
-
+	WARN_ON(request_msg->data_length || reply_msg->data_length);
+	for (op = req->r_ops; op != &req->r_ops[req->r_num_ops]; op++) {
 		switch (op->op) {
 		/* request */
 		case CEPH_OSD_OP_WRITE:
 		case CEPH_OSD_OP_WRITEFULL:
 			WARN_ON(op->indata_len != op->extent.length);
-			ceph_osdc_msg_data_add(msg, &op->extent.osd_data);
+			ceph_osdc_msg_data_add(request_msg,
+					       &op->extent.osd_data);
 			break;
 		case CEPH_OSD_OP_SETXATTR:
 		case CEPH_OSD_OP_CMPXATTR:
 			WARN_ON(op->indata_len != op->xattr.name_len +
 						  op->xattr.value_len);
-			ceph_osdc_msg_data_add(msg, &op->xattr.osd_data);
+			ceph_osdc_msg_data_add(request_msg,
+					       &op->xattr.osd_data);
 			break;
 		case CEPH_OSD_OP_NOTIFY_ACK:
-			ceph_osdc_msg_data_add(msg,
+			ceph_osdc_msg_data_add(request_msg,
 					       &op->notify_ack.request_data);
 			break;
+		case CEPH_OSD_OP_COPY_FROM:
+			ceph_osdc_msg_data_add(request_msg,
+					       &op->copy_from.osd_data);
+			break;
 
 		/* reply */
 		case CEPH_OSD_OP_STAT:
-			ceph_osdc_msg_data_add(req->r_reply,
+			ceph_osdc_msg_data_add(reply_msg,
 					       &op->raw_data_in);
 			break;
 		case CEPH_OSD_OP_READ:
-			ceph_osdc_msg_data_add(req->r_reply,
+			ceph_osdc_msg_data_add(reply_msg,
 					       &op->extent.osd_data);
 			break;
 		case CEPH_OSD_OP_LIST_WATCHERS:
-			ceph_osdc_msg_data_add(req->r_reply,
+			ceph_osdc_msg_data_add(reply_msg,
 					       &op->list_watchers.response_data);
 			break;
 
@@ -1895,25 +1990,23 @@
 			WARN_ON(op->indata_len != op->cls.class_len +
 						  op->cls.method_len +
 						  op->cls.indata_len);
-			ceph_osdc_msg_data_add(msg, &op->cls.request_info);
+			ceph_osdc_msg_data_add(request_msg,
+					       &op->cls.request_info);
 			/* optional, can be NONE */
-			ceph_osdc_msg_data_add(msg, &op->cls.request_data);
+			ceph_osdc_msg_data_add(request_msg,
+					       &op->cls.request_data);
 			/* optional, can be NONE */
-			ceph_osdc_msg_data_add(req->r_reply,
+			ceph_osdc_msg_data_add(reply_msg,
 					       &op->cls.response_data);
 			break;
 		case CEPH_OSD_OP_NOTIFY:
-			ceph_osdc_msg_data_add(msg,
+			ceph_osdc_msg_data_add(request_msg,
 					       &op->notify.request_data);
-			ceph_osdc_msg_data_add(req->r_reply,
+			ceph_osdc_msg_data_add(reply_msg,
 					       &op->notify.response_data);
 			break;
 		}
-
-		data_len += op->indata_len;
 	}
-
-	WARN_ON(data_len != msg->data_length);
 }
 
 static void encode_pgid(void **p, const struct ceph_pg *pgid)
@@ -1961,7 +2054,7 @@
 			req->r_data_offset || req->r_snapc);
 	}
 
-	setup_request_data(req, msg);
+	setup_request_data(req);
 
 	encode_spgid(&p, &req->r_t.spgid); /* actual spg */
 	ceph_encode_32(&p, req->r_t.pgid.seed); /* raw hash */
@@ -2195,7 +2288,7 @@
 	dout("%s req %p wrlocked %d\n", __func__, req, wrlocked);
 
 again:
-	ct_res = calc_target(osdc, &req->r_t, NULL, false);
+	ct_res = calc_target(osdc, &req->r_t, false);
 	if (ct_res == CALC_TARGET_POOL_DNE && !wrlocked)
 		goto promote;
 
@@ -2229,7 +2322,7 @@
 		   (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
 		    pool_full(osdc, req->r_t.base_oloc.pool))) {
 		dout("req %p full/pool_full\n", req);
-		if (osdc->abort_on_full) {
+		if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) {
 			err = -ENOSPC;
 		} else {
 			pr_warn_ratelimited("FULL or reached pool quota\n");
@@ -2312,7 +2405,7 @@
 
 static void __complete_request(struct ceph_osd_request *req)
 {
-	dout("%s req %p tid %llu cb %pf result %d\n", __func__, req,
+	dout("%s req %p tid %llu cb %ps result %d\n", __func__, req,
 	     req->r_tid, req->r_callback, req->r_result);
 
 	if (req->r_callback)
@@ -2399,6 +2492,14 @@
 }
 EXPORT_SYMBOL(ceph_osdc_abort_requests);
 
+void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc)
+{
+	down_write(&osdc->lock);
+	osdc->abort_err = 0;
+	up_write(&osdc->lock);
+}
+EXPORT_SYMBOL(ceph_osdc_clear_abort_err);
+
 static void update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb)
 {
 	if (likely(eb > osdc->epoch_barrier)) {
@@ -2459,7 +2560,7 @@
 {
 	bool victims = false;
 
-	if (osdc->abort_on_full &&
+	if (ceph_test_opt(osdc->client, ABORT_ON_FULL) &&
 	    (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || have_pool_full(osdc)))
 		for_each_request(osdc, abort_on_full_fn, &victims);
 }
@@ -3001,11 +3102,21 @@
 	struct ceph_osd_client *osdc = lreq->osdc;
 	struct ceph_osd *osd;
 
-	calc_target(osdc, &lreq->t, NULL, false);
+	down_write(&osdc->lock);
+	linger_register(lreq);
+	if (lreq->is_watch) {
+		lreq->reg_req->r_ops[0].watch.cookie = lreq->linger_id;
+		lreq->ping_req->r_ops[0].watch.cookie = lreq->linger_id;
+	} else {
+		lreq->reg_req->r_ops[0].notify.cookie = lreq->linger_id;
+	}
+
+	calc_target(osdc, &lreq->t, false);
 	osd = lookup_create_osd(osdc, lreq->t.osd, true);
 	link_linger(osd, lreq);
 
 	send_linger(lreq);
+	up_write(&osdc->lock);
 }
 
 static void cancel_linger_map_check(struct ceph_osd_linger_request *lreq)
@@ -3617,7 +3728,7 @@
 	struct ceph_osd_client *osdc = lreq->osdc;
 	enum calc_target_result ct_res;
 
-	ct_res = calc_target(osdc, &lreq->t, NULL, true);
+	ct_res = calc_target(osdc, &lreq->t, true);
 	if (ct_res == CALC_TARGET_NEED_RESEND) {
 		struct ceph_osd *osd;
 
@@ -3689,8 +3800,7 @@
 		n = rb_next(n); /* unlink_request(), check_pool_dne() */
 
 		dout("%s req %p tid %llu\n", __func__, req, req->r_tid);
-		ct_res = calc_target(osdc, &req->r_t, &req->r_osd->o_con,
-				     false);
+		ct_res = calc_target(osdc, &req->r_t, false);
 		switch (ct_res) {
 		case CALC_TARGET_NO_ACTION:
 			force_resend_writes = cleared_full ||
@@ -3799,7 +3909,7 @@
 		n = rb_next(n);
 
 		if (req->r_t.epoch < osdc->osdmap->epoch) {
-			ct_res = calc_target(osdc, &req->r_t, NULL, false);
+			ct_res = calc_target(osdc, &req->r_t, false);
 			if (ct_res == CALC_TARGET_POOL_DNE) {
 				erase_request(need_resend, req);
 				check_pool_dne(req);
@@ -4318,9 +4428,7 @@
 			     lreq->notify_id, notify_id);
 		} else if (!completion_done(&lreq->notify_finish_wait)) {
 			struct ceph_msg_data *data =
-			    list_first_entry_or_null(&msg->data,
-						     struct ceph_msg_data,
-						     links);
+			    msg->num_data_items ? &msg->data[0] : NULL;
 
 			if (data) {
 				if (lreq->preply_pages) {
@@ -4476,6 +4584,23 @@
 
 	ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid);
 	ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
+	return req;
+}
+
+static struct ceph_osd_request *
+alloc_watch_request(struct ceph_osd_linger_request *lreq, u8 watch_opcode)
+{
+	struct ceph_osd_request *req;
+
+	req = alloc_linger_request(lreq);
+	if (!req)
+		return NULL;
+
+	/*
+	 * Pass 0 for cookie because we don't know it yet, it will be
+	 * filled in by linger_submit().
+	 */
+	osd_req_op_watch_init(req, 0, 0, watch_opcode);
 
 	if (ceph_osdc_alloc_messages(req, GFP_NOIO)) {
 		ceph_osdc_put_request(req);
@@ -4514,27 +4639,19 @@
 	lreq->t.flags = CEPH_OSD_FLAG_WRITE;
 	ktime_get_real_ts64(&lreq->mtime);
 
-	lreq->reg_req = alloc_linger_request(lreq);
+	lreq->reg_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_WATCH);
 	if (!lreq->reg_req) {
 		ret = -ENOMEM;
 		goto err_put_lreq;
 	}
 
-	lreq->ping_req = alloc_linger_request(lreq);
+	lreq->ping_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_PING);
 	if (!lreq->ping_req) {
 		ret = -ENOMEM;
 		goto err_put_lreq;
 	}
 
-	down_write(&osdc->lock);
-	linger_register(lreq); /* before osd_req_op_* */
-	osd_req_op_watch_init(lreq->reg_req, 0, lreq->linger_id,
-			      CEPH_OSD_WATCH_OP_WATCH);
-	osd_req_op_watch_init(lreq->ping_req, 0, lreq->linger_id,
-			      CEPH_OSD_WATCH_OP_PING);
 	linger_submit(lreq);
-	up_write(&osdc->lock);
-
 	ret = linger_reg_commit_wait(lreq);
 	if (ret) {
 		linger_cancel(lreq);
@@ -4599,11 +4716,10 @@
 
 	op = _osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY_ACK, 0);
 
-	pl = kmalloc(sizeof(*pl), GFP_NOIO);
+	pl = ceph_pagelist_alloc(GFP_NOIO);
 	if (!pl)
 		return -ENOMEM;
 
-	ceph_pagelist_init(pl);
 	ret = ceph_pagelist_encode_64(pl, notify_id);
 	ret |= ceph_pagelist_encode_64(pl, cookie);
 	if (payload) {
@@ -4641,12 +4757,12 @@
 	ceph_oloc_copy(&req->r_base_oloc, oloc);
 	req->r_flags = CEPH_OSD_FLAG_READ;
 
-	ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+	ret = osd_req_op_notify_ack_init(req, 0, notify_id, cookie, payload,
+					 payload_len);
 	if (ret)
 		goto out_put_req;
 
-	ret = osd_req_op_notify_ack_init(req, 0, notify_id, cookie, payload,
-					 payload_len);
+	ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
 	if (ret)
 		goto out_put_req;
 
@@ -4670,11 +4786,10 @@
 	op = _osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
 	op->notify.cookie = cookie;
 
-	pl = kmalloc(sizeof(*pl), GFP_NOIO);
+	pl = ceph_pagelist_alloc(GFP_NOIO);
 	if (!pl)
 		return -ENOMEM;
 
-	ceph_pagelist_init(pl);
 	ret = ceph_pagelist_encode_32(pl, 1); /* prot_ver */
 	ret |= ceph_pagelist_encode_32(pl, timeout);
 	ret |= ceph_pagelist_encode_32(pl, payload_len);
@@ -4733,29 +4848,30 @@
 		goto out_put_lreq;
 	}
 
+	/*
+	 * Pass 0 for cookie because we don't know it yet, it will be
+	 * filled in by linger_submit().
+	 */
+	ret = osd_req_op_notify_init(lreq->reg_req, 0, 0, 1, timeout,
+				     payload, payload_len);
+	if (ret)
+		goto out_put_lreq;
+
 	/* for notify_id */
 	pages = ceph_alloc_page_vector(1, GFP_NOIO);
 	if (IS_ERR(pages)) {
 		ret = PTR_ERR(pages);
 		goto out_put_lreq;
 	}
-
-	down_write(&osdc->lock);
-	linger_register(lreq); /* before osd_req_op_* */
-	ret = osd_req_op_notify_init(lreq->reg_req, 0, lreq->linger_id, 1,
-				     timeout, payload, payload_len);
-	if (ret) {
-		linger_unregister(lreq);
-		up_write(&osdc->lock);
-		ceph_release_page_vector(pages, 1);
-		goto out_put_lreq;
-	}
 	ceph_osd_data_pages_init(osd_req_op_data(lreq->reg_req, 0, notify,
 						 response_data),
 				 pages, PAGE_SIZE, 0, false, true);
-	linger_submit(lreq);
-	up_write(&osdc->lock);
 
+	ret = ceph_osdc_alloc_messages(lreq->reg_req, GFP_NOIO);
+	if (ret)
+		goto out_put_lreq;
+
+	linger_submit(lreq);
 	ret = linger_reg_commit_wait(lreq);
 	if (!ret)
 		ret = linger_notify_finish_wait(lreq);
@@ -4812,20 +4928,26 @@
 	ret = ceph_start_decoding(p, end, 2, "watch_item_t",
 				  &struct_v, &struct_len);
 	if (ret)
-		return ret;
+		goto bad;
 
-	ceph_decode_copy(p, &item->name, sizeof(item->name));
-	item->cookie = ceph_decode_64(p);
-	*p += 4; /* skip timeout_seconds */
+	ret = -EINVAL;
+	ceph_decode_copy_safe(p, end, &item->name, sizeof(item->name), bad);
+	ceph_decode_64_safe(p, end, item->cookie, bad);
+	ceph_decode_skip_32(p, end, bad); /* skip timeout seconds */
+
 	if (struct_v >= 2) {
-		ceph_decode_copy(p, &item->addr, sizeof(item->addr));
-		ceph_decode_addr(&item->addr);
+		ret = ceph_decode_entity_addr(p, end, &item->addr);
+		if (ret)
+			goto bad;
+	} else {
+		ret = 0;
 	}
 
 	dout("%s %s%llu cookie %llu addr %s\n", __func__,
 	     ENTITY_NAME(item->name), item->cookie,
-	     ceph_pr_addr(&item->addr.in_addr));
-	return 0;
+	     ceph_pr_addr(&item->addr));
+bad:
+	return ret;
 }
 
 static int decode_watchers(void **p, void *end,
@@ -4881,10 +5003,6 @@
 	ceph_oloc_copy(&req->r_base_oloc, oloc);
 	req->r_flags = CEPH_OSD_FLAG_READ;
 
-	ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
-	if (ret)
-		goto out_put_req;
-
 	pages = ceph_alloc_page_vector(1, GFP_NOIO);
 	if (IS_ERR(pages)) {
 		ret = PTR_ERR(pages);
@@ -4896,6 +5014,10 @@
 						 response_data),
 				 pages, PAGE_SIZE, 0, false, true);
 
+	ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+	if (ret)
+		goto out_put_req;
+
 	ceph_osdc_start_request(osdc, req, false);
 	ret = ceph_osdc_wait_request(osdc, req);
 	if (ret >= 0) {
@@ -4942,12 +5064,12 @@
 		   const char *class, const char *method,
 		   unsigned int flags,
 		   struct page *req_page, size_t req_len,
-		   struct page *resp_page, size_t *resp_len)
+		   struct page **resp_pages, size_t *resp_len)
 {
 	struct ceph_osd_request *req;
 	int ret;
 
-	if (req_len > PAGE_SIZE || (resp_page && *resp_len > PAGE_SIZE))
+	if (req_len > PAGE_SIZE)
 		return -E2BIG;
 
 	req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO);
@@ -4958,26 +5080,26 @@
 	ceph_oloc_copy(&req->r_base_oloc, oloc);
 	req->r_flags = flags;
 
-	ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
-	if (ret)
-		goto out_put_req;
-
-	ret = osd_req_op_cls_init(req, 0, CEPH_OSD_OP_CALL, class, method);
+	ret = osd_req_op_cls_init(req, 0, class, method);
 	if (ret)
 		goto out_put_req;
 
 	if (req_page)
 		osd_req_op_cls_request_data_pages(req, 0, &req_page, req_len,
 						  0, false, false);
-	if (resp_page)
-		osd_req_op_cls_response_data_pages(req, 0, &resp_page,
+	if (resp_pages)
+		osd_req_op_cls_response_data_pages(req, 0, resp_pages,
 						   *resp_len, 0, false, false);
 
+	ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+	if (ret)
+		goto out_put_req;
+
 	ceph_osdc_start_request(osdc, req, false);
 	ret = ceph_osdc_wait_request(osdc, req);
 	if (ret >= 0) {
 		ret = req->r_ops[0].rval;
-		if (resp_page)
+		if (resp_pages)
 			*resp_len = req->r_ops[0].outdata_len;
 	}
 
@@ -4988,6 +5110,24 @@
 EXPORT_SYMBOL(ceph_osdc_call);
 
 /*
+ * reset all osd connections
+ */
+void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc)
+{
+	struct rb_node *n;
+
+	down_write(&osdc->lock);
+	for (n = rb_first(&osdc->osds); n; ) {
+		struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
+
+		n = rb_next(n);
+		if (!reopen_osd(osd))
+			kick_osd_requests(osd);
+	}
+	up_write(&osdc->lock);
+}
+
+/*
  * init, shutdown
  */
 int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
@@ -5021,11 +5161,12 @@
 		goto out_map;
 
 	err = ceph_msgpool_init(&osdc->msgpool_op, CEPH_MSG_OSD_OP,
-				PAGE_SIZE, 10, true, "osd_op");
+				PAGE_SIZE, CEPH_OSD_SLAB_OPS, 10, "osd_op");
 	if (err < 0)
 		goto out_mempool;
 	err = ceph_msgpool_init(&osdc->msgpool_op_reply, CEPH_MSG_OSD_OPREPLY,
-				PAGE_SIZE, 10, true, "osd_op_reply");
+				PAGE_SIZE, CEPH_OSD_SLAB_OPS, 10,
+				"osd_op_reply");
 	if (err < 0)
 		goto out_msgpool;
 
@@ -5168,6 +5309,80 @@
 }
 EXPORT_SYMBOL(ceph_osdc_writepages);
 
+static int osd_req_op_copy_from_init(struct ceph_osd_request *req,
+				     u64 src_snapid, u64 src_version,
+				     struct ceph_object_id *src_oid,
+				     struct ceph_object_locator *src_oloc,
+				     u32 src_fadvise_flags,
+				     u32 dst_fadvise_flags,
+				     u8 copy_from_flags)
+{
+	struct ceph_osd_req_op *op;
+	struct page **pages;
+	void *p, *end;
+
+	pages = ceph_alloc_page_vector(1, GFP_KERNEL);
+	if (IS_ERR(pages))
+		return PTR_ERR(pages);
+
+	op = _osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM, dst_fadvise_flags);
+	op->copy_from.snapid = src_snapid;
+	op->copy_from.src_version = src_version;
+	op->copy_from.flags = copy_from_flags;
+	op->copy_from.src_fadvise_flags = src_fadvise_flags;
+
+	p = page_address(pages[0]);
+	end = p + PAGE_SIZE;
+	ceph_encode_string(&p, end, src_oid->name, src_oid->name_len);
+	encode_oloc(&p, end, src_oloc);
+	op->indata_len = PAGE_SIZE - (end - p);
+
+	ceph_osd_data_pages_init(&op->copy_from.osd_data, pages,
+				 op->indata_len, 0, false, true);
+	return 0;
+}
+
+int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
+			u64 src_snapid, u64 src_version,
+			struct ceph_object_id *src_oid,
+			struct ceph_object_locator *src_oloc,
+			u32 src_fadvise_flags,
+			struct ceph_object_id *dst_oid,
+			struct ceph_object_locator *dst_oloc,
+			u32 dst_fadvise_flags,
+			u8 copy_from_flags)
+{
+	struct ceph_osd_request *req;
+	int ret;
+
+	req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	req->r_flags = CEPH_OSD_FLAG_WRITE;
+
+	ceph_oloc_copy(&req->r_t.base_oloc, dst_oloc);
+	ceph_oid_copy(&req->r_t.base_oid, dst_oid);
+
+	ret = osd_req_op_copy_from_init(req, src_snapid, src_version, src_oid,
+					src_oloc, src_fadvise_flags,
+					dst_fadvise_flags, copy_from_flags);
+	if (ret)
+		goto out;
+
+	ret = ceph_osdc_alloc_messages(req, GFP_KERNEL);
+	if (ret)
+		goto out;
+
+	ceph_osdc_start_request(osdc, req, false);
+	ret = ceph_osdc_wait_request(osdc, req);
+
+out:
+	ceph_osdc_put_request(req);
+	return ret;
+}
+EXPORT_SYMBOL(ceph_osdc_copy_from);
+
 int __init ceph_osdc_setup(void)
 {
 	size_t size = sizeof(struct ceph_osd_request) +
@@ -5295,7 +5510,7 @@
 	u32 front_len = le32_to_cpu(hdr->front_len);
 	u32 data_len = le32_to_cpu(hdr->data_len);
 
-	m = ceph_msg_new(type, front_len, GFP_NOIO, false);
+	m = ceph_msg_new2(type, front_len, 1, GFP_NOIO, false);
 	if (!m)
 		return NULL;
 

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 98c0ff3..4e0de14 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c

@@ -495,9 +495,8 @@
 			  / sizeof(struct crush_rule_step))
 			goto bad;
 #endif
-		r = c->rules[i] = kmalloc(sizeof(*r) +
-					  yes*sizeof(struct crush_rule_step),
-					  GFP_NOFS);
+		r = kmalloc(struct_size(r, steps, yes), GFP_NOFS);
+		c->rules[i] = r;
 		if (r == NULL)
 			goto badmem;
 		dout(" rule %d is at %p\n", i, r);
@@ -974,11 +973,11 @@
 				 struct ceph_pg_pool_info, node);
 		__remove_pg_pool(&map->pg_pools, pi);
 	}
-	kfree(map->osd_state);
-	kfree(map->osd_weight);
-	kfree(map->osd_addr);
-	kfree(map->osd_primary_affinity);
-	kfree(map->crush_workspace);
+	kvfree(map->osd_state);
+	kvfree(map->osd_weight);
+	kvfree(map->osd_addr);
+	kvfree(map->osd_primary_affinity);
+	kvfree(map->crush_workspace);
 	kfree(map);
 }
 
@@ -987,28 +986,41 @@
  *
  * The new elements are properly initialized.
  */
-static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
+static int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max)
 {
 	u32 *state;
 	u32 *weight;
 	struct ceph_entity_addr *addr;
+	u32 to_copy;
 	int i;
 
-	state = krealloc(map->osd_state, max*sizeof(*state), GFP_NOFS);
-	if (!state)
+	dout("%s old %u new %u\n", __func__, map->max_osd, max);
+	if (max == map->max_osd)
+		return 0;
+
+	state = ceph_kvmalloc(array_size(max, sizeof(*state)), GFP_NOFS);
+	weight = ceph_kvmalloc(array_size(max, sizeof(*weight)), GFP_NOFS);
+	addr = ceph_kvmalloc(array_size(max, sizeof(*addr)), GFP_NOFS);
+	if (!state || !weight || !addr) {
+		kvfree(state);
+		kvfree(weight);
+		kvfree(addr);
 		return -ENOMEM;
+	}
+
+	to_copy = min(map->max_osd, max);
+	if (map->osd_state) {
+		memcpy(state, map->osd_state, to_copy * sizeof(*state));
+		memcpy(weight, map->osd_weight, to_copy * sizeof(*weight));
+		memcpy(addr, map->osd_addr, to_copy * sizeof(*addr));
+		kvfree(map->osd_state);
+		kvfree(map->osd_weight);
+		kvfree(map->osd_addr);
+	}
+
 	map->osd_state = state;
-
-	weight = krealloc(map->osd_weight, max*sizeof(*weight), GFP_NOFS);
-	if (!weight)
-		return -ENOMEM;
 	map->osd_weight = weight;
-
-	addr = krealloc(map->osd_addr, max*sizeof(*addr), GFP_NOFS);
-	if (!addr)
-		return -ENOMEM;
 	map->osd_addr = addr;
-
 	for (i = map->max_osd; i < max; i++) {
 		map->osd_state[i] = 0;
 		map->osd_weight[i] = CEPH_OSD_OUT;
@@ -1018,12 +1030,16 @@
 	if (map->osd_primary_affinity) {
 		u32 *affinity;
 
-		affinity = krealloc(map->osd_primary_affinity,
-				    max*sizeof(*affinity), GFP_NOFS);
+		affinity = ceph_kvmalloc(array_size(max, sizeof(*affinity)),
+					 GFP_NOFS);
 		if (!affinity)
 			return -ENOMEM;
-		map->osd_primary_affinity = affinity;
 
+		memcpy(affinity, map->osd_primary_affinity,
+		       to_copy * sizeof(*affinity));
+		kvfree(map->osd_primary_affinity);
+
+		map->osd_primary_affinity = affinity;
 		for (i = map->max_osd; i < max; i++)
 			map->osd_primary_affinity[i] =
 			    CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
@@ -1044,7 +1060,7 @@
 
 	work_size = crush_work_size(crush, CEPH_PG_MAX_SIZE);
 	dout("%s work_size %zu bytes\n", __func__, work_size);
-	workspace = kmalloc(work_size, GFP_NOIO);
+	workspace = ceph_kvmalloc(work_size, GFP_NOIO);
 	if (!workspace) {
 		crush_destroy(crush);
 		return -ENOMEM;
@@ -1053,7 +1069,7 @@
 
 	if (map->crush)
 		crush_destroy(map->crush);
-	kfree(map->crush_workspace);
+	kvfree(map->crush_workspace);
 	map->crush = crush;
 	map->crush_workspace = workspace;
 	return 0;
@@ -1299,9 +1315,9 @@
 	if (!map->osd_primary_affinity) {
 		int i;
 
-		map->osd_primary_affinity = kmalloc_array(map->max_osd,
-							  sizeof(u32),
-							  GFP_NOFS);
+		map->osd_primary_affinity = ceph_kvmalloc(
+		    array_size(map->max_osd, sizeof(*map->osd_primary_affinity)),
+		    GFP_NOFS);
 		if (!map->osd_primary_affinity)
 			return -ENOMEM;
 
@@ -1322,7 +1338,7 @@
 
 	ceph_decode_32_safe(p, end, len, e_inval);
 	if (len == 0) {
-		kfree(map->osd_primary_affinity);
+		kvfree(map->osd_primary_affinity);
 		map->osd_primary_affinity = NULL;
 		return 0;
 	}
@@ -1490,11 +1506,9 @@
 
 	/* osd_state, osd_weight, osd_addrs->client_addr */
 	ceph_decode_need(p, end, 3*sizeof(u32) +
-			 map->max_osd*((struct_v >= 5 ? sizeof(u32) :
-							sizeof(u8)) +
-				       sizeof(*map->osd_weight) +
-				       sizeof(*map->osd_addr)), e_inval);
-
+			 map->max_osd*(struct_v >= 5 ? sizeof(u32) :
+						       sizeof(u8)) +
+				       sizeof(*map->osd_weight), e_inval);
 	if (ceph_decode_32(p) != map->max_osd)
 		goto e_inval;
 
@@ -1515,9 +1529,11 @@
 	if (ceph_decode_32(p) != map->max_osd)
 		goto e_inval;
 
-	ceph_decode_copy(p, map->osd_addr, map->max_osd*sizeof(*map->osd_addr));
-	for (i = 0; i < map->max_osd; i++)
-		ceph_decode_addr(&map->osd_addr[i]);
+	for (i = 0; i < map->max_osd; i++) {
+		err = ceph_decode_entity_addr(p, end, &map->osd_addr[i]);
+		if (err)
+			goto bad;
+	}
 
 	/* pg_temp */
 	err = decode_pg_temp(p, end, map);
@@ -1619,12 +1635,17 @@
 	void *new_state;
 	void *new_weight_end;
 	u32 len;
+	int i;
 
 	new_up_client = *p;
 	ceph_decode_32_safe(p, end, len, e_inval);
-	len *= sizeof(u32) + sizeof(struct ceph_entity_addr);
-	ceph_decode_need(p, end, len, e_inval);
-	*p += len;
+	for (i = 0; i < len; ++i) {
+		struct ceph_entity_addr addr;
+
+		ceph_decode_skip_32(p, end, e_inval);
+		if (ceph_decode_entity_addr(p, end, &addr))
+			goto e_inval;
+	}
 
 	new_state = *p;
 	ceph_decode_32_safe(p, end, len, e_inval);
@@ -1700,9 +1721,9 @@
 		struct ceph_entity_addr addr;
 
 		osd = ceph_decode_32(p);
-		ceph_decode_copy(p, &addr, sizeof(addr));
-		ceph_decode_addr(&addr);
 		BUG_ON(osd >= map->max_osd);
+		if (ceph_decode_entity_addr(p, end, &addr))
+			goto e_inval;
 		pr_info("osd%d up\n", osd);
 		map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP;
 		map->osd_addr[osd] = addr;

diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index 2ea0564..65e34f7 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c

@@ -6,6 +6,26 @@
 #include <linux/highmem.h>
 #include <linux/ceph/pagelist.h>
 
+struct ceph_pagelist *ceph_pagelist_alloc(gfp_t gfp_flags)
+{
+	struct ceph_pagelist *pl;
+
+	pl = kmalloc(sizeof(*pl), gfp_flags);
+	if (!pl)
+		return NULL;
+
+	INIT_LIST_HEAD(&pl->head);
+	pl->mapped_tail = NULL;
+	pl->length = 0;
+	pl->room = 0;
+	INIT_LIST_HEAD(&pl->free_list);
+	pl->num_pages_free = 0;
+	refcount_set(&pl->refcnt, 1);
+
+	return pl;
+}
+EXPORT_SYMBOL(ceph_pagelist_alloc);
+
 static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
 {
 	if (pl->mapped_tail) {

diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index d3736f5..64305e7 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c

@@ -10,39 +10,6 @@
 
 #include <linux/ceph/libceph.h>
 
-/*
- * build a vector of user pages
- */
-struct page **ceph_get_direct_page_vector(const void __user *data,
-					  int num_pages, bool write_page)
-{
-	struct page **pages;
-	int got = 0;
-	int rc = 0;
-
-	pages = kmalloc_array(num_pages, sizeof(*pages), GFP_NOFS);
-	if (!pages)
-		return ERR_PTR(-ENOMEM);
-
-	while (got < num_pages) {
-		rc = get_user_pages_fast(
-		    (unsigned long)data + ((unsigned long)got * PAGE_SIZE),
-		    num_pages - got, write_page, pages + got);
-		if (rc < 0)
-			break;
-		BUG_ON(rc == 0);
-		got += rc;
-	}
-	if (rc < 0)
-		goto fail;
-	return pages;
-
-fail:
-	ceph_put_page_vector(pages, got, false);
-	return ERR_PTR(rc);
-}
-EXPORT_SYMBOL(ceph_get_direct_page_vector);
-
 void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty)
 {
 	int i;

diff --git a/net/ceph/snapshot.c b/net/ceph/snapshot.c
index e14a5d0..e243159 100644
--- a/net/ceph/snapshot.c
+++ b/net/ceph/snapshot.c

@@ -1,21 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * snapshot.c    Ceph snapshot context utility routines (part of libceph)
  *
  * Copyright (C) 2013 Inktank Storage, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
  */
 
 #include <linux/types.h>

diff --git a/net/ceph/striper.c b/net/ceph/striper.c
index c36462d..3b3fa75 100644
--- a/net/ceph/striper.c
+++ b/net/ceph/striper.c

@@ -259,3 +259,20 @@
 	return 0;
 }
 EXPORT_SYMBOL(ceph_extent_to_file);
+
+u64 ceph_get_num_objects(struct ceph_file_layout *l, u64 size)
+{
+	u64 period = (u64)l->stripe_count * l->object_size;
+	u64 num_periods = DIV64_U64_ROUND_UP(size, period);
+	u64 remainder_bytes;
+	u64 remainder_objs = 0;
+
+	div64_u64_rem(size, period, &remainder_bytes);
+	if (remainder_bytes > 0 &&
+	    remainder_bytes < (u64)l->stripe_count * l->stripe_unit)
+		remainder_objs = l->stripe_count -
+			    DIV_ROUND_UP_ULL(remainder_bytes, l->stripe_unit);
+
+	return num_periods * l->stripe_count - remainder_objs;
+}
+EXPORT_SYMBOL(ceph_get_num_objects);

diff --git a/net/compat.c b/net/compat.c
index 3b2105f..0f7ded2 100644
--- a/net/compat.c
+++ b/net/compat.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * 32bit Socket syscall emulation. Based on arch/sparc64/kernel/sys_sparc32.c.
  *
@@ -79,9 +80,10 @@
 
 	kmsg->msg_iocb = NULL;
 
-	return compat_import_iovec(save_addr ? READ : WRITE,
+	err = compat_import_iovec(save_addr ? READ : WRITE,
 				   compat_ptr(msg.msg_iov), msg.msg_iovlen,
 				   UIO_FASTIOV, iov, &kmsg->msg_iter);
+	return err < 0 ? err : 0;
 }
 
 /* Bleech... */
@@ -209,8 +211,8 @@
 {
 	struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control;
 	struct compat_cmsghdr cmhdr;
-	struct compat_timeval ctv;
-	struct compat_timespec cts[3];
+	struct old_timeval32 ctv;
+	struct old_timespec32 cts[3];
 	int cmlen;
 
 	if (cm == NULL || kmsg->msg_controllen < sizeof(*cm)) {
@@ -219,16 +221,16 @@
 	}
 
 	if (!COMPAT_USE_64BIT_TIME) {
-		if (level == SOL_SOCKET && type == SCM_TIMESTAMP) {
-			struct timeval *tv = (struct timeval *)data;
+		if (level == SOL_SOCKET && type == SO_TIMESTAMP_OLD) {
+			struct __kernel_old_timeval *tv = (struct __kernel_old_timeval *)data;
 			ctv.tv_sec = tv->tv_sec;
 			ctv.tv_usec = tv->tv_usec;
 			data = &ctv;
 			len = sizeof(ctv);
 		}
 		if (level == SOL_SOCKET &&
-		    (type == SCM_TIMESTAMPNS || type == SCM_TIMESTAMPING)) {
-			int count = type == SCM_TIMESTAMPNS ? 1 : 3;
+		    (type == SO_TIMESTAMPNS_OLD || type == SO_TIMESTAMPING_OLD)) {
+			int count = type == SO_TIMESTAMPNS_OLD ? 1 : 3;
 			int i;
 			struct timespec *ts = (struct timespec *)data;
 			for (i = 0; i < count; i++) {
@@ -348,28 +350,6 @@
 			      sizeof(struct sock_fprog));
 }
 
-static int do_set_sock_timeout(struct socket *sock, int level,
-		int optname, char __user *optval, unsigned int optlen)
-{
-	struct compat_timeval __user *up = (struct compat_timeval __user *)optval;
-	struct timeval ktime;
-	mm_segment_t old_fs;
-	int err;
-
-	if (optlen < sizeof(*up))
-		return -EINVAL;
-	if (!access_ok(VERIFY_READ, up, sizeof(*up)) ||
-	    __get_user(ktime.tv_sec, &up->tv_sec) ||
-	    __get_user(ktime.tv_usec, &up->tv_usec))
-		return -EFAULT;
-	old_fs = get_fs();
-	set_fs(KERNEL_DS);
-	err = sock_setsockopt(sock, level, optname, (char *)&ktime, sizeof(ktime));
-	set_fs(old_fs);
-
-	return err;
-}
-
 static int compat_sock_setsockopt(struct socket *sock, int level, int optname,
 				char __user *optval, unsigned int optlen)
 {
@@ -377,10 +357,6 @@
 	    optname == SO_ATTACH_REUSEPORT_CBPF)
 		return do_set_attach_filter(sock, level, optname,
 					    optval, optlen);
-	if (!COMPAT_USE_64BIT_TIME &&
-	    (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO))
-		return do_set_sock_timeout(sock, level, optname, optval, optlen);
-
 	return sock_setsockopt(sock, level, optname, optval, optlen);
 }
 
@@ -388,8 +364,12 @@
 				   char __user *optval, unsigned int optlen)
 {
 	int err;
-	struct socket *sock = sockfd_lookup(fd, &err);
+	struct socket *sock;
 
+	if (optlen > INT_MAX)
+		return -EINVAL;
+
+	sock = sockfd_lookup(fd, &err);
 	if (sock) {
 		err = security_socket_setsockopt(sock, level, optname);
 		if (err) {
@@ -417,98 +397,6 @@
 	return __compat_sys_setsockopt(fd, level, optname, optval, optlen);
 }
 
-static int do_get_sock_timeout(struct socket *sock, int level, int optname,
-		char __user *optval, int __user *optlen)
-{
-	struct compat_timeval __user *up;
-	struct timeval ktime;
-	mm_segment_t old_fs;
-	int len, err;
-
-	up = (struct compat_timeval __user *) optval;
-	if (get_user(len, optlen))
-		return -EFAULT;
-	if (len < sizeof(*up))
-		return -EINVAL;
-	len = sizeof(ktime);
-	old_fs = get_fs();
-	set_fs(KERNEL_DS);
-	err = sock_getsockopt(sock, level, optname, (char *) &ktime, &len);
-	set_fs(old_fs);
-
-	if (!err) {
-		if (put_user(sizeof(*up), optlen) ||
-		    !access_ok(VERIFY_WRITE, up, sizeof(*up)) ||
-		    __put_user(ktime.tv_sec, &up->tv_sec) ||
-		    __put_user(ktime.tv_usec, &up->tv_usec))
-			err = -EFAULT;
-	}
-	return err;
-}
-
-static int compat_sock_getsockopt(struct socket *sock, int level, int optname,
-				char __user *optval, int __user *optlen)
-{
-	if (!COMPAT_USE_64BIT_TIME &&
-	    (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO))
-		return do_get_sock_timeout(sock, level, optname, optval, optlen);
-	return sock_getsockopt(sock, level, optname, optval, optlen);
-}
-
-int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
-{
-	struct compat_timeval __user *ctv;
-	int err;
-	struct timeval tv;
-
-	if (COMPAT_USE_64BIT_TIME)
-		return sock_get_timestamp(sk, userstamp);
-
-	ctv = (struct compat_timeval __user *) userstamp;
-	err = -ENOENT;
-	sock_enable_timestamp(sk, SOCK_TIMESTAMP);
-	tv = ktime_to_timeval(sk->sk_stamp);
-	if (tv.tv_sec == -1)
-		return err;
-	if (tv.tv_sec == 0) {
-		sk->sk_stamp = ktime_get_real();
-		tv = ktime_to_timeval(sk->sk_stamp);
-	}
-	err = 0;
-	if (put_user(tv.tv_sec, &ctv->tv_sec) ||
-			put_user(tv.tv_usec, &ctv->tv_usec))
-		err = -EFAULT;
-	return err;
-}
-EXPORT_SYMBOL(compat_sock_get_timestamp);
-
-int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
-{
-	struct compat_timespec __user *ctv;
-	int err;
-	struct timespec ts;
-
-	if (COMPAT_USE_64BIT_TIME)
-		return sock_get_timestampns (sk, userstamp);
-
-	ctv = (struct compat_timespec __user *) userstamp;
-	err = -ENOENT;
-	sock_enable_timestamp(sk, SOCK_TIMESTAMP);
-	ts = ktime_to_timespec(sk->sk_stamp);
-	if (ts.tv_sec == -1)
-		return err;
-	if (ts.tv_sec == 0) {
-		sk->sk_stamp = ktime_get_real();
-		ts = ktime_to_timespec(sk->sk_stamp);
-	}
-	err = 0;
-	if (put_user(ts.tv_sec, &ctv->tv_sec) ||
-			put_user(ts.tv_nsec, &ctv->tv_nsec))
-		err = -EFAULT;
-	return err;
-}
-EXPORT_SYMBOL(compat_sock_get_timestampns);
-
 static int __compat_sys_getsockopt(int fd, int level, int optname,
 				   char __user *optval,
 				   int __user *optlen)
@@ -524,7 +412,7 @@
 		}
 
 		if (level == SOL_SOCKET)
-			err = compat_sock_getsockopt(sock, level,
+			err = sock_getsockopt(sock, level,
 					optname, optval, optlen);
 		else if (sock->ops->compat_getsockopt)
 			err = sock->ops->compat_getsockopt(sock, level,
@@ -582,13 +470,13 @@
 	case MCAST_JOIN_GROUP:
 	case MCAST_LEAVE_GROUP:
 	{
-		struct compat_group_req __user *gr32 = (void *)optval;
+		struct compat_group_req __user *gr32 = (void __user *)optval;
 		struct group_req __user *kgr =
 			compat_alloc_user_space(sizeof(struct group_req));
 		u32 interface;
 
-		if (!access_ok(VERIFY_READ, gr32, sizeof(*gr32)) ||
-		    !access_ok(VERIFY_WRITE, kgr, sizeof(struct group_req)) ||
+		if (!access_ok(gr32, sizeof(*gr32)) ||
+		    !access_ok(kgr, sizeof(struct group_req)) ||
 		    __get_user(interface, &gr32->gr_interface) ||
 		    __put_user(interface, &kgr->gr_interface) ||
 		    copy_in_user(&kgr->gr_group, &gr32->gr_group,
@@ -603,13 +491,13 @@
 	case MCAST_BLOCK_SOURCE:
 	case MCAST_UNBLOCK_SOURCE:
 	{
-		struct compat_group_source_req __user *gsr32 = (void *)optval;
+		struct compat_group_source_req __user *gsr32 = (void __user *)optval;
 		struct group_source_req __user *kgsr = compat_alloc_user_space(
 			sizeof(struct group_source_req));
 		u32 interface;
 
-		if (!access_ok(VERIFY_READ, gsr32, sizeof(*gsr32)) ||
-		    !access_ok(VERIFY_WRITE, kgsr,
+		if (!access_ok(gsr32, sizeof(*gsr32)) ||
+		    !access_ok(kgsr,
 			sizeof(struct group_source_req)) ||
 		    __get_user(interface, &gsr32->gsr_interface) ||
 		    __put_user(interface, &kgsr->gsr_interface) ||
@@ -624,11 +512,11 @@
 	}
 	case MCAST_MSFILTER:
 	{
-		struct compat_group_filter __user *gf32 = (void *)optval;
+		struct compat_group_filter __user *gf32 = (void __user *)optval;
 		struct group_filter __user *kgf;
 		u32 interface, fmode, numsrc;
 
-		if (!access_ok(VERIFY_READ, gf32, __COMPAT_GF0_SIZE) ||
+		if (!access_ok(gf32, __COMPAT_GF0_SIZE) ||
 		    __get_user(interface, &gf32->gf_interface) ||
 		    __get_user(fmode, &gf32->gf_fmode) ||
 		    __get_user(numsrc, &gf32->gf_numsrc))
@@ -638,7 +526,7 @@
 		if (koptlen < GROUP_FILTER_SIZE(numsrc))
 			return -EINVAL;
 		kgf = compat_alloc_user_space(koptlen);
-		if (!access_ok(VERIFY_WRITE, kgf, koptlen) ||
+		if (!access_ok(kgf, koptlen) ||
 		    __put_user(interface, &kgf->gf_interface) ||
 		    __put_user(fmode, &kgf->gf_fmode) ||
 		    __put_user(numsrc, &kgf->gf_numsrc) ||
@@ -662,7 +550,7 @@
 	char __user *optval, int __user *optlen,
 	int (*getsockopt)(struct sock *, int, int, char __user *, int __user *))
 {
-	struct compat_group_filter __user *gf32 = (void *)optval;
+	struct compat_group_filter __user *gf32 = (void __user *)optval;
 	struct group_filter __user *kgf;
 	int __user	*koptlen;
 	u32 interface, fmode, numsrc;
@@ -672,7 +560,7 @@
 		return getsockopt(sock, level, optname, optval, optlen);
 
 	koptlen = compat_alloc_user_space(sizeof(*koptlen));
-	if (!access_ok(VERIFY_READ, optlen, sizeof(*optlen)) ||
+	if (!access_ok(optlen, sizeof(*optlen)) ||
 	    __get_user(ulen, optlen))
 		return -EFAULT;
 
@@ -682,14 +570,14 @@
 	if (klen < GROUP_FILTER_SIZE(0))
 		return -EINVAL;
 
-	if (!access_ok(VERIFY_WRITE, koptlen, sizeof(*koptlen)) ||
+	if (!access_ok(koptlen, sizeof(*koptlen)) ||
 	    __put_user(klen, koptlen))
 		return -EFAULT;
 
 	/* have to allow space for previous compat_alloc_user_space, too */
 	kgf = compat_alloc_user_space(klen+sizeof(*optlen));
 
-	if (!access_ok(VERIFY_READ, gf32, __COMPAT_GF0_SIZE) ||
+	if (!access_ok(gf32, __COMPAT_GF0_SIZE) ||
 	    __get_user(interface, &gf32->gf_interface) ||
 	    __get_user(fmode, &gf32->gf_fmode) ||
 	    __get_user(numsrc, &gf32->gf_numsrc) ||
@@ -703,18 +591,18 @@
 	if (err)
 		return err;
 
-	if (!access_ok(VERIFY_READ, koptlen, sizeof(*koptlen)) ||
+	if (!access_ok(koptlen, sizeof(*koptlen)) ||
 	    __get_user(klen, koptlen))
 		return -EFAULT;
 
 	ulen = klen - (sizeof(*kgf)-sizeof(*gf32));
 
-	if (!access_ok(VERIFY_WRITE, optlen, sizeof(*optlen)) ||
+	if (!access_ok(optlen, sizeof(*optlen)) ||
 	    __put_user(ulen, optlen))
 		return -EFAULT;
 
-	if (!access_ok(VERIFY_READ, kgf, klen) ||
-	    !access_ok(VERIFY_WRITE, gf32, ulen) ||
+	if (!access_ok(kgf, klen) ||
+	    !access_ok(gf32, ulen) ||
 	    __get_user(interface, &kgf->gf_interface) ||
 	    __get_user(fmode, &kgf->gf_fmode) ||
 	    __get_user(numsrc, &kgf->gf_numsrc) ||
@@ -810,35 +698,24 @@
 	return __compat_sys_recvfrom(fd, buf, len, flags, addr, addrlen);
 }
 
-static int __compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
-				 unsigned int vlen, unsigned int flags,
-				 struct compat_timespec __user *timeout)
-{
-	int datagrams;
-	struct timespec ktspec;
-
-	if (timeout == NULL)
-		return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
-				      flags | MSG_CMSG_COMPAT, NULL);
-
-	if (compat_get_timespec(&ktspec, timeout))
-		return -EFAULT;
-
-	datagrams = __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
-				   flags | MSG_CMSG_COMPAT, &ktspec);
-	if (datagrams > 0 && compat_put_timespec(&ktspec, timeout))
-		datagrams = -EFAULT;
-
-	return datagrams;
-}
-
-COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg,
+COMPAT_SYSCALL_DEFINE5(recvmmsg_time64, int, fd, struct compat_mmsghdr __user *, mmsg,
 		       unsigned int, vlen, unsigned int, flags,
-		       struct compat_timespec __user *, timeout)
+		       struct __kernel_timespec __user *, timeout)
 {
-	return __compat_sys_recvmmsg(fd, mmsg, vlen, flags, timeout);
+	return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
+			      flags | MSG_CMSG_COMPAT, timeout, NULL);
 }
 
+#ifdef CONFIG_COMPAT_32BIT_TIME
+COMPAT_SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct compat_mmsghdr __user *, mmsg,
+		       unsigned int, vlen, unsigned int, flags,
+		       struct old_timespec32 __user *, timeout)
+{
+	return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
+			      flags | MSG_CMSG_COMPAT, NULL, timeout);
+}
+#endif
+
 COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args)
 {
 	u32 a[AUDITSC_ARGS];
@@ -925,8 +802,9 @@
 		ret = __compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
 		break;
 	case SYS_RECVMMSG:
-		ret = __compat_sys_recvmmsg(a0, compat_ptr(a1), a[2], a[3],
-					    compat_ptr(a[4]));
+		ret = __sys_recvmmsg(a0, compat_ptr(a1), a[2],
+				     a[3] | MSG_CMSG_COMPAT, NULL,
+				     compat_ptr(a[4]));
 		break;
 	case SYS_ACCEPT4:
 		ret = __sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]);

diff --git a/net/core/Makefile b/net/core/Makefile
index 80175e6..a104dc8 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile

@@ -11,11 +11,12 @@
 obj-y		     += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
 			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
 			sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
-			fib_notifier.o xdp.o
+			fib_notifier.o xdp.o flow_offload.o
 
 obj-y += net-sysfs.o
 obj-$(CONFIG_PAGE_POOL) += page_pool.o
 obj-$(CONFIG_PROC_FS) += net-procfs.o
+obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
 obj-$(CONFIG_NET_PKTGEN) += pktgen.o
 obj-$(CONFIG_NETPOLL) += netpoll.o
 obj-$(CONFIG_FIB_RULES) += fib_rules.o
@@ -27,8 +28,10 @@
 obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
 obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
 obj-$(CONFIG_LWTUNNEL_BPF) += lwt_bpf.o
+obj-$(CONFIG_BPF_STREAM_PARSER) += sock_map.o
 obj-$(CONFIG_DST_CACHE) += dst_cache.o
 obj-$(CONFIG_HWBM) += hwbm.o
 obj-$(CONFIG_NET_DEVLINK) += devlink.o
 obj-$(CONFIG_GRO_CELLS) += gro_cells.o
 obj-$(CONFIG_FAILOVER) += failover.o
+obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o

diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
new file mode 100644
index 0000000..da5639a
--- /dev/null
+++ b/net/core/bpf_sk_storage.c

@@ -0,0 +1,911 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook  */
+#include <linux/rculist.h>
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/bpf.h>
+#include <net/bpf_sk_storage.h>
+#include <net/sock.h>
+#include <uapi/linux/btf.h>
+
+static atomic_t cache_idx;
+
+#define SK_STORAGE_CREATE_FLAG_MASK					\
+	(BPF_F_NO_PREALLOC | BPF_F_CLONE)
+
+struct bucket {
+	struct hlist_head list;
+	raw_spinlock_t lock;
+};
+
+/* Thp map is not the primary owner of a bpf_sk_storage_elem.
+ * Instead, the sk->sk_bpf_storage is.
+ *
+ * The map (bpf_sk_storage_map) is for two purposes
+ * 1. Define the size of the "sk local storage".  It is
+ *    the map's value_size.
+ *
+ * 2. Maintain a list to keep track of all elems such
+ *    that they can be cleaned up during the map destruction.
+ *
+ * When a bpf local storage is being looked up for a
+ * particular sk,  the "bpf_map" pointer is actually used
+ * as the "key" to search in the list of elem in
+ * sk->sk_bpf_storage.
+ *
+ * Hence, consider sk->sk_bpf_storage is the mini-map
+ * with the "bpf_map" pointer as the searching key.
+ */
+struct bpf_sk_storage_map {
+	struct bpf_map map;
+	/* Lookup elem does not require accessing the map.
+	 *
+	 * Updating/Deleting requires a bucket lock to
+	 * link/unlink the elem from the map.  Having
+	 * multiple buckets to improve contention.
+	 */
+	struct bucket *buckets;
+	u32 bucket_log;
+	u16 elem_size;
+	u16 cache_idx;
+};
+
+struct bpf_sk_storage_data {
+	/* smap is used as the searching key when looking up
+	 * from sk->sk_bpf_storage.
+	 *
+	 * Put it in the same cacheline as the data to minimize
+	 * the number of cachelines access during the cache hit case.
+	 */
+	struct bpf_sk_storage_map __rcu *smap;
+	u8 data[0] __aligned(8);
+};
+
+/* Linked to bpf_sk_storage and bpf_sk_storage_map */
+struct bpf_sk_storage_elem {
+	struct hlist_node map_node;	/* Linked to bpf_sk_storage_map */
+	struct hlist_node snode;	/* Linked to bpf_sk_storage */
+	struct bpf_sk_storage __rcu *sk_storage;
+	struct rcu_head rcu;
+	/* 8 bytes hole */
+	/* The data is stored in aother cacheline to minimize
+	 * the number of cachelines access during a cache hit.
+	 */
+	struct bpf_sk_storage_data sdata ____cacheline_aligned;
+};
+
+#define SELEM(_SDATA) container_of((_SDATA), struct bpf_sk_storage_elem, sdata)
+#define SDATA(_SELEM) (&(_SELEM)->sdata)
+#define BPF_SK_STORAGE_CACHE_SIZE	16
+
+struct bpf_sk_storage {
+	struct bpf_sk_storage_data __rcu *cache[BPF_SK_STORAGE_CACHE_SIZE];
+	struct hlist_head list;	/* List of bpf_sk_storage_elem */
+	struct sock *sk;	/* The sk that owns the the above "list" of
+				 * bpf_sk_storage_elem.
+				 */
+	struct rcu_head rcu;
+	raw_spinlock_t lock;	/* Protect adding/removing from the "list" */
+};
+
+static struct bucket *select_bucket(struct bpf_sk_storage_map *smap,
+				    struct bpf_sk_storage_elem *selem)
+{
+	return &smap->buckets[hash_ptr(selem, smap->bucket_log)];
+}
+
+static int omem_charge(struct sock *sk, unsigned int size)
+{
+	/* same check as in sock_kmalloc() */
+	if (size <= sysctl_optmem_max &&
+	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
+		atomic_add(size, &sk->sk_omem_alloc);
+		return 0;
+	}
+
+	return -ENOMEM;
+}
+
+static bool selem_linked_to_sk(const struct bpf_sk_storage_elem *selem)
+{
+	return !hlist_unhashed(&selem->snode);
+}
+
+static bool selem_linked_to_map(const struct bpf_sk_storage_elem *selem)
+{
+	return !hlist_unhashed(&selem->map_node);
+}
+
+static struct bpf_sk_storage_elem *selem_alloc(struct bpf_sk_storage_map *smap,
+					       struct sock *sk, void *value,
+					       bool charge_omem)
+{
+	struct bpf_sk_storage_elem *selem;
+
+	if (charge_omem && omem_charge(sk, smap->elem_size))
+		return NULL;
+
+	selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN);
+	if (selem) {
+		if (value)
+			memcpy(SDATA(selem)->data, value, smap->map.value_size);
+		return selem;
+	}
+
+	if (charge_omem)
+		atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
+
+	return NULL;
+}
+
+/* sk_storage->lock must be held and selem->sk_storage == sk_storage.
+ * The caller must ensure selem->smap is still valid to be
+ * dereferenced for its smap->elem_size and smap->cache_idx.
+ */
+static bool __selem_unlink_sk(struct bpf_sk_storage *sk_storage,
+			      struct bpf_sk_storage_elem *selem,
+			      bool uncharge_omem)
+{
+	struct bpf_sk_storage_map *smap;
+	bool free_sk_storage;
+	struct sock *sk;
+
+	smap = rcu_dereference(SDATA(selem)->smap);
+	sk = sk_storage->sk;
+
+	/* All uncharging on sk->sk_omem_alloc must be done first.
+	 * sk may be freed once the last selem is unlinked from sk_storage.
+	 */
+	if (uncharge_omem)
+		atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
+
+	free_sk_storage = hlist_is_singular_node(&selem->snode,
+						 &sk_storage->list);
+	if (free_sk_storage) {
+		atomic_sub(sizeof(struct bpf_sk_storage), &sk->sk_omem_alloc);
+		sk_storage->sk = NULL;
+		/* After this RCU_INIT, sk may be freed and cannot be used */
+		RCU_INIT_POINTER(sk->sk_bpf_storage, NULL);
+
+		/* sk_storage is not freed now.  sk_storage->lock is
+		 * still held and raw_spin_unlock_bh(&sk_storage->lock)
+		 * will be done by the caller.
+		 *
+		 * Although the unlock will be done under
+		 * rcu_read_lock(),  it is more intutivie to
+		 * read if kfree_rcu(sk_storage, rcu) is done
+		 * after the raw_spin_unlock_bh(&sk_storage->lock).
+		 *
+		 * Hence, a "bool free_sk_storage" is returned
+		 * to the caller which then calls the kfree_rcu()
+		 * after unlock.
+		 */
+	}
+	hlist_del_init_rcu(&selem->snode);
+	if (rcu_access_pointer(sk_storage->cache[smap->cache_idx]) ==
+	    SDATA(selem))
+		RCU_INIT_POINTER(sk_storage->cache[smap->cache_idx], NULL);
+
+	kfree_rcu(selem, rcu);
+
+	return free_sk_storage;
+}
+
+static void selem_unlink_sk(struct bpf_sk_storage_elem *selem)
+{
+	struct bpf_sk_storage *sk_storage;
+	bool free_sk_storage = false;
+
+	if (unlikely(!selem_linked_to_sk(selem)))
+		/* selem has already been unlinked from sk */
+		return;
+
+	sk_storage = rcu_dereference(selem->sk_storage);
+	raw_spin_lock_bh(&sk_storage->lock);
+	if (likely(selem_linked_to_sk(selem)))
+		free_sk_storage = __selem_unlink_sk(sk_storage, selem, true);
+	raw_spin_unlock_bh(&sk_storage->lock);
+
+	if (free_sk_storage)
+		kfree_rcu(sk_storage, rcu);
+}
+
+static void __selem_link_sk(struct bpf_sk_storage *sk_storage,
+			    struct bpf_sk_storage_elem *selem)
+{
+	RCU_INIT_POINTER(selem->sk_storage, sk_storage);
+	hlist_add_head(&selem->snode, &sk_storage->list);
+}
+
+static void selem_unlink_map(struct bpf_sk_storage_elem *selem)
+{
+	struct bpf_sk_storage_map *smap;
+	struct bucket *b;
+
+	if (unlikely(!selem_linked_to_map(selem)))
+		/* selem has already be unlinked from smap */
+		return;
+
+	smap = rcu_dereference(SDATA(selem)->smap);
+	b = select_bucket(smap, selem);
+	raw_spin_lock_bh(&b->lock);
+	if (likely(selem_linked_to_map(selem)))
+		hlist_del_init_rcu(&selem->map_node);
+	raw_spin_unlock_bh(&b->lock);
+}
+
+static void selem_link_map(struct bpf_sk_storage_map *smap,
+			   struct bpf_sk_storage_elem *selem)
+{
+	struct bucket *b = select_bucket(smap, selem);
+
+	raw_spin_lock_bh(&b->lock);
+	RCU_INIT_POINTER(SDATA(selem)->smap, smap);
+	hlist_add_head_rcu(&selem->map_node, &b->list);
+	raw_spin_unlock_bh(&b->lock);
+}
+
+static void selem_unlink(struct bpf_sk_storage_elem *selem)
+{
+	/* Always unlink from map before unlinking from sk_storage
+	 * because selem will be freed after successfully unlinked from
+	 * the sk_storage.
+	 */
+	selem_unlink_map(selem);
+	selem_unlink_sk(selem);
+}
+
+static struct bpf_sk_storage_data *
+__sk_storage_lookup(struct bpf_sk_storage *sk_storage,
+		    struct bpf_sk_storage_map *smap,
+		    bool cacheit_lockit)
+{
+	struct bpf_sk_storage_data *sdata;
+	struct bpf_sk_storage_elem *selem;
+
+	/* Fast path (cache hit) */
+	sdata = rcu_dereference(sk_storage->cache[smap->cache_idx]);
+	if (sdata && rcu_access_pointer(sdata->smap) == smap)
+		return sdata;
+
+	/* Slow path (cache miss) */
+	hlist_for_each_entry_rcu(selem, &sk_storage->list, snode)
+		if (rcu_access_pointer(SDATA(selem)->smap) == smap)
+			break;
+
+	if (!selem)
+		return NULL;
+
+	sdata = SDATA(selem);
+	if (cacheit_lockit) {
+		/* spinlock is needed to avoid racing with the
+		 * parallel delete.  Otherwise, publishing an already
+		 * deleted sdata to the cache will become a use-after-free
+		 * problem in the next __sk_storage_lookup().
+		 */
+		raw_spin_lock_bh(&sk_storage->lock);
+		if (selem_linked_to_sk(selem))
+			rcu_assign_pointer(sk_storage->cache[smap->cache_idx],
+					   sdata);
+		raw_spin_unlock_bh(&sk_storage->lock);
+	}
+
+	return sdata;
+}
+
+static struct bpf_sk_storage_data *
+sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit)
+{
+	struct bpf_sk_storage *sk_storage;
+	struct bpf_sk_storage_map *smap;
+
+	sk_storage = rcu_dereference(sk->sk_bpf_storage);
+	if (!sk_storage)
+		return NULL;
+
+	smap = (struct bpf_sk_storage_map *)map;
+	return __sk_storage_lookup(sk_storage, smap, cacheit_lockit);
+}
+
+static int check_flags(const struct bpf_sk_storage_data *old_sdata,
+		       u64 map_flags)
+{
+	if (old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
+		/* elem already exists */
+		return -EEXIST;
+
+	if (!old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
+		/* elem doesn't exist, cannot update it */
+		return -ENOENT;
+
+	return 0;
+}
+
+static int sk_storage_alloc(struct sock *sk,
+			    struct bpf_sk_storage_map *smap,
+			    struct bpf_sk_storage_elem *first_selem)
+{
+	struct bpf_sk_storage *prev_sk_storage, *sk_storage;
+	int err;
+
+	err = omem_charge(sk, sizeof(*sk_storage));
+	if (err)
+		return err;
+
+	sk_storage = kzalloc(sizeof(*sk_storage), GFP_ATOMIC | __GFP_NOWARN);
+	if (!sk_storage) {
+		err = -ENOMEM;
+		goto uncharge;
+	}
+	INIT_HLIST_HEAD(&sk_storage->list);
+	raw_spin_lock_init(&sk_storage->lock);
+	sk_storage->sk = sk;
+
+	__selem_link_sk(sk_storage, first_selem);
+	selem_link_map(smap, first_selem);
+	/* Publish sk_storage to sk.  sk->sk_lock cannot be acquired.
+	 * Hence, atomic ops is used to set sk->sk_bpf_storage
+	 * from NULL to the newly allocated sk_storage ptr.
+	 *
+	 * From now on, the sk->sk_bpf_storage pointer is protected
+	 * by the sk_storage->lock.  Hence,  when freeing
+	 * the sk->sk_bpf_storage, the sk_storage->lock must
+	 * be held before setting sk->sk_bpf_storage to NULL.
+	 */
+	prev_sk_storage = cmpxchg((struct bpf_sk_storage **)&sk->sk_bpf_storage,
+				  NULL, sk_storage);
+	if (unlikely(prev_sk_storage)) {
+		selem_unlink_map(first_selem);
+		err = -EAGAIN;
+		goto uncharge;
+
+		/* Note that even first_selem was linked to smap's
+		 * bucket->list, first_selem can be freed immediately
+		 * (instead of kfree_rcu) because
+		 * bpf_sk_storage_map_free() does a
+		 * synchronize_rcu() before walking the bucket->list.
+		 * Hence, no one is accessing selem from the
+		 * bucket->list under rcu_read_lock().
+		 */
+	}
+
+	return 0;
+
+uncharge:
+	kfree(sk_storage);
+	atomic_sub(sizeof(*sk_storage), &sk->sk_omem_alloc);
+	return err;
+}
+
+/* sk cannot be going away because it is linking new elem
+ * to sk->sk_bpf_storage. (i.e. sk->sk_refcnt cannot be 0).
+ * Otherwise, it will become a leak (and other memory issues
+ * during map destruction).
+ */
+static struct bpf_sk_storage_data *sk_storage_update(struct sock *sk,
+						     struct bpf_map *map,
+						     void *value,
+						     u64 map_flags)
+{
+	struct bpf_sk_storage_data *old_sdata = NULL;
+	struct bpf_sk_storage_elem *selem;
+	struct bpf_sk_storage *sk_storage;
+	struct bpf_sk_storage_map *smap;
+	int err;
+
+	/* BPF_EXIST and BPF_NOEXIST cannot be both set */
+	if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) ||
+	    /* BPF_F_LOCK can only be used in a value with spin_lock */
+	    unlikely((map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)))
+		return ERR_PTR(-EINVAL);
+
+	smap = (struct bpf_sk_storage_map *)map;
+	sk_storage = rcu_dereference(sk->sk_bpf_storage);
+	if (!sk_storage || hlist_empty(&sk_storage->list)) {
+		/* Very first elem for this sk */
+		err = check_flags(NULL, map_flags);
+		if (err)
+			return ERR_PTR(err);
+
+		selem = selem_alloc(smap, sk, value, true);
+		if (!selem)
+			return ERR_PTR(-ENOMEM);
+
+		err = sk_storage_alloc(sk, smap, selem);
+		if (err) {
+			kfree(selem);
+			atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
+			return ERR_PTR(err);
+		}
+
+		return SDATA(selem);
+	}
+
+	if ((map_flags & BPF_F_LOCK) && !(map_flags & BPF_NOEXIST)) {
+		/* Hoping to find an old_sdata to do inline update
+		 * such that it can avoid taking the sk_storage->lock
+		 * and changing the lists.
+		 */
+		old_sdata = __sk_storage_lookup(sk_storage, smap, false);
+		err = check_flags(old_sdata, map_flags);
+		if (err)
+			return ERR_PTR(err);
+		if (old_sdata && selem_linked_to_sk(SELEM(old_sdata))) {
+			copy_map_value_locked(map, old_sdata->data,
+					      value, false);
+			return old_sdata;
+		}
+	}
+
+	raw_spin_lock_bh(&sk_storage->lock);
+
+	/* Recheck sk_storage->list under sk_storage->lock */
+	if (unlikely(hlist_empty(&sk_storage->list))) {
+		/* A parallel del is happening and sk_storage is going
+		 * away.  It has just been checked before, so very
+		 * unlikely.  Return instead of retry to keep things
+		 * simple.
+		 */
+		err = -EAGAIN;
+		goto unlock_err;
+	}
+
+	old_sdata = __sk_storage_lookup(sk_storage, smap, false);
+	err = check_flags(old_sdata, map_flags);
+	if (err)
+		goto unlock_err;
+
+	if (old_sdata && (map_flags & BPF_F_LOCK)) {
+		copy_map_value_locked(map, old_sdata->data, value, false);
+		selem = SELEM(old_sdata);
+		goto unlock;
+	}
+
+	/* sk_storage->lock is held.  Hence, we are sure
+	 * we can unlink and uncharge the old_sdata successfully
+	 * later.  Hence, instead of charging the new selem now
+	 * and then uncharge the old selem later (which may cause
+	 * a potential but unnecessary charge failure),  avoid taking
+	 * a charge at all here (the "!old_sdata" check) and the
+	 * old_sdata will not be uncharged later during __selem_unlink_sk().
+	 */
+	selem = selem_alloc(smap, sk, value, !old_sdata);
+	if (!selem) {
+		err = -ENOMEM;
+		goto unlock_err;
+	}
+
+	/* First, link the new selem to the map */
+	selem_link_map(smap, selem);
+
+	/* Second, link (and publish) the new selem to sk_storage */
+	__selem_link_sk(sk_storage, selem);
+
+	/* Third, remove old selem, SELEM(old_sdata) */
+	if (old_sdata) {
+		selem_unlink_map(SELEM(old_sdata));
+		__selem_unlink_sk(sk_storage, SELEM(old_sdata), false);
+	}
+
+unlock:
+	raw_spin_unlock_bh(&sk_storage->lock);
+	return SDATA(selem);
+
+unlock_err:
+	raw_spin_unlock_bh(&sk_storage->lock);
+	return ERR_PTR(err);
+}
+
+static int sk_storage_delete(struct sock *sk, struct bpf_map *map)
+{
+	struct bpf_sk_storage_data *sdata;
+
+	sdata = sk_storage_lookup(sk, map, false);
+	if (!sdata)
+		return -ENOENT;
+
+	selem_unlink(SELEM(sdata));
+
+	return 0;
+}
+
+/* Called by __sk_destruct() & bpf_sk_storage_clone() */
+void bpf_sk_storage_free(struct sock *sk)
+{
+	struct bpf_sk_storage_elem *selem;
+	struct bpf_sk_storage *sk_storage;
+	bool free_sk_storage = false;
+	struct hlist_node *n;
+
+	rcu_read_lock();
+	sk_storage = rcu_dereference(sk->sk_bpf_storage);
+	if (!sk_storage) {
+		rcu_read_unlock();
+		return;
+	}
+
+	/* Netiher the bpf_prog nor the bpf-map's syscall
+	 * could be modifying the sk_storage->list now.
+	 * Thus, no elem can be added-to or deleted-from the
+	 * sk_storage->list by the bpf_prog or by the bpf-map's syscall.
+	 *
+	 * It is racing with bpf_sk_storage_map_free() alone
+	 * when unlinking elem from the sk_storage->list and
+	 * the map's bucket->list.
+	 */
+	raw_spin_lock_bh(&sk_storage->lock);
+	hlist_for_each_entry_safe(selem, n, &sk_storage->list, snode) {
+		/* Always unlink from map before unlinking from
+		 * sk_storage.
+		 */
+		selem_unlink_map(selem);
+		free_sk_storage = __selem_unlink_sk(sk_storage, selem, true);
+	}
+	raw_spin_unlock_bh(&sk_storage->lock);
+	rcu_read_unlock();
+
+	if (free_sk_storage)
+		kfree_rcu(sk_storage, rcu);
+}
+
+static void bpf_sk_storage_map_free(struct bpf_map *map)
+{
+	struct bpf_sk_storage_elem *selem;
+	struct bpf_sk_storage_map *smap;
+	struct bucket *b;
+	unsigned int i;
+
+	smap = (struct bpf_sk_storage_map *)map;
+
+	/* Note that this map might be concurrently cloned from
+	 * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
+	 * RCU read section to finish before proceeding. New RCU
+	 * read sections should be prevented via bpf_map_inc_not_zero.
+	 */
+	synchronize_rcu();
+
+	/* bpf prog and the userspace can no longer access this map
+	 * now.  No new selem (of this map) can be added
+	 * to the sk->sk_bpf_storage or to the map bucket's list.
+	 *
+	 * The elem of this map can be cleaned up here
+	 * or
+	 * by bpf_sk_storage_free() during __sk_destruct().
+	 */
+	for (i = 0; i < (1U << smap->bucket_log); i++) {
+		b = &smap->buckets[i];
+
+		rcu_read_lock();
+		/* No one is adding to b->list now */
+		while ((selem = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(&b->list)),
+						 struct bpf_sk_storage_elem,
+						 map_node))) {
+			selem_unlink(selem);
+			cond_resched_rcu();
+		}
+		rcu_read_unlock();
+	}
+
+	/* bpf_sk_storage_free() may still need to access the map.
+	 * e.g. bpf_sk_storage_free() has unlinked selem from the map
+	 * which then made the above while((selem = ...)) loop
+	 * exited immediately.
+	 *
+	 * However, the bpf_sk_storage_free() still needs to access
+	 * the smap->elem_size to do the uncharging in
+	 * __selem_unlink_sk().
+	 *
+	 * Hence, wait another rcu grace period for the
+	 * bpf_sk_storage_free() to finish.
+	 */
+	synchronize_rcu();
+
+	kvfree(smap->buckets);
+	kfree(map);
+}
+
+static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
+{
+	if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK ||
+	    !(attr->map_flags & BPF_F_NO_PREALLOC) ||
+	    attr->max_entries ||
+	    attr->key_size != sizeof(int) || !attr->value_size ||
+	    /* Enforce BTF for userspace sk dumping */
+	    !attr->btf_key_type_id || !attr->btf_value_type_id)
+		return -EINVAL;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (attr->value_size >= KMALLOC_MAX_SIZE -
+	    MAX_BPF_STACK - sizeof(struct bpf_sk_storage_elem) ||
+	    /* U16_MAX is much more than enough for sk local storage
+	     * considering a tcp_sock is ~2k.
+	     */
+	    attr->value_size > U16_MAX - sizeof(struct bpf_sk_storage_elem))
+		return -E2BIG;
+
+	return 0;
+}
+
+static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
+{
+	struct bpf_sk_storage_map *smap;
+	unsigned int i;
+	u32 nbuckets;
+	u64 cost;
+	int ret;
+
+	smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN);
+	if (!smap)
+		return ERR_PTR(-ENOMEM);
+	bpf_map_init_from_attr(&smap->map, attr);
+
+	/* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
+	smap->bucket_log = max_t(u32, 1, ilog2(roundup_pow_of_two(num_possible_cpus())));
+	nbuckets = 1U << smap->bucket_log;
+	cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap);
+
+	ret = bpf_map_charge_init(&smap->map.memory, cost);
+	if (ret < 0) {
+		kfree(smap);
+		return ERR_PTR(ret);
+	}
+
+	smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
+				 GFP_USER | __GFP_NOWARN);
+	if (!smap->buckets) {
+		bpf_map_charge_finish(&smap->map.memory);
+		kfree(smap);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	for (i = 0; i < nbuckets; i++) {
+		INIT_HLIST_HEAD(&smap->buckets[i].list);
+		raw_spin_lock_init(&smap->buckets[i].lock);
+	}
+
+	smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size;
+	smap->cache_idx = (unsigned int)atomic_inc_return(&cache_idx) %
+		BPF_SK_STORAGE_CACHE_SIZE;
+
+	return &smap->map;
+}
+
+static int notsupp_get_next_key(struct bpf_map *map, void *key,
+				void *next_key)
+{
+	return -ENOTSUPP;
+}
+
+static int bpf_sk_storage_map_check_btf(const struct bpf_map *map,
+					const struct btf *btf,
+					const struct btf_type *key_type,
+					const struct btf_type *value_type)
+{
+	u32 int_data;
+
+	if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
+		return -EINVAL;
+
+	int_data = *(u32 *)(key_type + 1);
+	if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
+		return -EINVAL;
+
+	return 0;
+}
+
+static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_sk_storage_data *sdata;
+	struct socket *sock;
+	int fd, err;
+
+	fd = *(int *)key;
+	sock = sockfd_lookup(fd, &err);
+	if (sock) {
+		sdata = sk_storage_lookup(sock->sk, map, true);
+		sockfd_put(sock);
+		return sdata ? sdata->data : NULL;
+	}
+
+	return ERR_PTR(err);
+}
+
+static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
+					 void *value, u64 map_flags)
+{
+	struct bpf_sk_storage_data *sdata;
+	struct socket *sock;
+	int fd, err;
+
+	fd = *(int *)key;
+	sock = sockfd_lookup(fd, &err);
+	if (sock) {
+		sdata = sk_storage_update(sock->sk, map, value, map_flags);
+		sockfd_put(sock);
+		return PTR_ERR_OR_ZERO(sdata);
+	}
+
+	return err;
+}
+
+static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
+{
+	struct socket *sock;
+	int fd, err;
+
+	fd = *(int *)key;
+	sock = sockfd_lookup(fd, &err);
+	if (sock) {
+		err = sk_storage_delete(sock->sk, map);
+		sockfd_put(sock);
+		return err;
+	}
+
+	return err;
+}
+
+static struct bpf_sk_storage_elem *
+bpf_sk_storage_clone_elem(struct sock *newsk,
+			  struct bpf_sk_storage_map *smap,
+			  struct bpf_sk_storage_elem *selem)
+{
+	struct bpf_sk_storage_elem *copy_selem;
+
+	copy_selem = selem_alloc(smap, newsk, NULL, true);
+	if (!copy_selem)
+		return NULL;
+
+	if (map_value_has_spin_lock(&smap->map))
+		copy_map_value_locked(&smap->map, SDATA(copy_selem)->data,
+				      SDATA(selem)->data, true);
+	else
+		copy_map_value(&smap->map, SDATA(copy_selem)->data,
+			       SDATA(selem)->data);
+
+	return copy_selem;
+}
+
+int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
+{
+	struct bpf_sk_storage *new_sk_storage = NULL;
+	struct bpf_sk_storage *sk_storage;
+	struct bpf_sk_storage_elem *selem;
+	int ret = 0;
+
+	RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
+
+	rcu_read_lock();
+	sk_storage = rcu_dereference(sk->sk_bpf_storage);
+
+	if (!sk_storage || hlist_empty(&sk_storage->list))
+		goto out;
+
+	hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
+		struct bpf_sk_storage_elem *copy_selem;
+		struct bpf_sk_storage_map *smap;
+		struct bpf_map *map;
+
+		smap = rcu_dereference(SDATA(selem)->smap);
+		if (!(smap->map.map_flags & BPF_F_CLONE))
+			continue;
+
+		/* Note that for lockless listeners adding new element
+		 * here can race with cleanup in bpf_sk_storage_map_free.
+		 * Try to grab map refcnt to make sure that it's still
+		 * alive and prevent concurrent removal.
+		 */
+		map = bpf_map_inc_not_zero(&smap->map, false);
+		if (IS_ERR(map))
+			continue;
+
+		copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem);
+		if (!copy_selem) {
+			ret = -ENOMEM;
+			bpf_map_put(map);
+			goto out;
+		}
+
+		if (new_sk_storage) {
+			selem_link_map(smap, copy_selem);
+			__selem_link_sk(new_sk_storage, copy_selem);
+		} else {
+			ret = sk_storage_alloc(newsk, smap, copy_selem);
+			if (ret) {
+				kfree(copy_selem);
+				atomic_sub(smap->elem_size,
+					   &newsk->sk_omem_alloc);
+				bpf_map_put(map);
+				goto out;
+			}
+
+			new_sk_storage = rcu_dereference(copy_selem->sk_storage);
+		}
+		bpf_map_put(map);
+	}
+
+out:
+	rcu_read_unlock();
+
+	/* In case of an error, don't free anything explicitly here, the
+	 * caller is responsible to call bpf_sk_storage_free.
+	 */
+
+	return ret;
+}
+
+BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
+	   void *, value, u64, flags)
+{
+	struct bpf_sk_storage_data *sdata;
+
+	if (flags > BPF_SK_STORAGE_GET_F_CREATE)
+		return (unsigned long)NULL;
+
+	sdata = sk_storage_lookup(sk, map, true);
+	if (sdata)
+		return (unsigned long)sdata->data;
+
+	if (flags == BPF_SK_STORAGE_GET_F_CREATE &&
+	    /* Cannot add new elem to a going away sk.
+	     * Otherwise, the new elem may become a leak
+	     * (and also other memory issues during map
+	     *  destruction).
+	     */
+	    refcount_inc_not_zero(&sk->sk_refcnt)) {
+		sdata = sk_storage_update(sk, map, value, BPF_NOEXIST);
+		/* sk must be a fullsock (guaranteed by verifier),
+		 * so sock_gen_put() is unnecessary.
+		 */
+		sock_put(sk);
+		return IS_ERR(sdata) ?
+			(unsigned long)NULL : (unsigned long)sdata->data;
+	}
+
+	return (unsigned long)NULL;
+}
+
+BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
+{
+	if (refcount_inc_not_zero(&sk->sk_refcnt)) {
+		int err;
+
+		err = sk_storage_delete(sk, map);
+		sock_put(sk);
+		return err;
+	}
+
+	return -ENOENT;
+}
+
+const struct bpf_map_ops sk_storage_map_ops = {
+	.map_alloc_check = bpf_sk_storage_map_alloc_check,
+	.map_alloc = bpf_sk_storage_map_alloc,
+	.map_free = bpf_sk_storage_map_free,
+	.map_get_next_key = notsupp_get_next_key,
+	.map_lookup_elem = bpf_fd_sk_storage_lookup_elem,
+	.map_update_elem = bpf_fd_sk_storage_update_elem,
+	.map_delete_elem = bpf_fd_sk_storage_delete_elem,
+	.map_check_btf = bpf_sk_storage_map_check_btf,
+};
+
+const struct bpf_func_proto bpf_sk_storage_get_proto = {
+	.func		= bpf_sk_storage_get,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_PTR_TO_SOCKET,
+	.arg3_type	= ARG_PTR_TO_MAP_VALUE_OR_NULL,
+	.arg4_type	= ARG_ANYTHING,
+};
+
+const struct bpf_func_proto bpf_sk_storage_delete_proto = {
+	.func		= bpf_sk_storage_delete,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_PTR_TO_SOCKET,
+};

diff --git a/net/core/datagram.c b/net/core/datagram.c
index 57f3a6f..da3c24e 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c

@@ -61,6 +61,8 @@
 #include <trace/events/skb.h>
 #include <net/busy_poll.h>
 
+#include "datagram.h"
+
 /*
  *	Is a socket 'connection oriented' ?
  */
@@ -95,7 +97,7 @@
 	if (error)
 		goto out_err;
 
-	if (sk->sk_receive_queue.prev != skb)
+	if (READ_ONCE(sk->sk_receive_queue.prev) != skb)
 		goto out;
 
 	/* Socket shut down? */
@@ -165,7 +167,7 @@
 					  unsigned int flags,
 					  void (*destructor)(struct sock *sk,
 							   struct sk_buff *skb),
-					  int *peeked, int *off, int *err,
+					  int *off, int *err,
 					  struct sk_buff **last)
 {
 	bool peek_at_off = false;
@@ -192,7 +194,6 @@
 					return NULL;
 				}
 			}
-			*peeked = 1;
 			refcount_inc(&skb->users);
 		} else {
 			__skb_unlink(skb, queue);
@@ -210,7 +211,6 @@
  *	@sk: socket
  *	@flags: MSG\_ flags
  *	@destructor: invoked under the receive lock on successful dequeue
- *	@peeked: returns non-zero if this packet has been seen before
  *	@off: an offset in bytes to peek skb from. Returns an offset
  *	      within an skb where data actually starts
  *	@err: error code returned
@@ -244,7 +244,7 @@
 struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
 					void (*destructor)(struct sock *sk,
 							   struct sk_buff *skb),
-					int *peeked, int *off, int *err,
+					int *off, int *err,
 					struct sk_buff **last)
 {
 	struct sk_buff_head *queue = &sk->sk_receive_queue;
@@ -258,7 +258,6 @@
 	if (error)
 		goto no_packet;
 
-	*peeked = 0;
 	do {
 		/* Again only user level code calls this function, so nothing
 		 * interrupt level will suddenly eat the receive_queue.
@@ -268,7 +267,7 @@
 		 */
 		spin_lock_irqsave(&queue->lock, cpu_flags);
 		skb = __skb_try_recv_from_queue(sk, queue, flags, destructor,
-						peeked, off, &error, last);
+						off, &error, last);
 		spin_unlock_irqrestore(&queue->lock, cpu_flags);
 		if (error)
 			goto no_packet;
@@ -279,7 +278,7 @@
 			break;
 
 		sk_busy_loop(sk, flags & MSG_DONTWAIT);
-	} while (!skb_queue_empty(&sk->sk_receive_queue));
+	} while (READ_ONCE(sk->sk_receive_queue.prev) != *last);
 
 	error = -EAGAIN;
 
@@ -292,7 +291,7 @@
 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
 				    void (*destructor)(struct sock *sk,
 						       struct sk_buff *skb),
-				    int *peeked, int *off, int *err)
+				    int *off, int *err)
 {
 	struct sk_buff *skb, *last;
 	long timeo;
@@ -300,8 +299,8 @@
 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 
 	do {
-		skb = __skb_try_recv_datagram(sk, flags, destructor, peeked,
-					      off, err, &last);
+		skb = __skb_try_recv_datagram(sk, flags, destructor, off, err,
+					      &last);
 		if (skb)
 			return skb;
 
@@ -317,10 +316,10 @@
 struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
 				  int noblock, int *err)
 {
-	int peeked, off = 0;
+	int off = 0;
 
 	return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
-				   NULL, &peeked, &off, err);
+				   NULL, &off, err);
 }
 EXPORT_SYMBOL(skb_recv_datagram);
 
@@ -408,27 +407,20 @@
 }
 EXPORT_SYMBOL(skb_kill_datagram);
 
-/**
- *	skb_copy_datagram_iter - Copy a datagram to an iovec iterator.
- *	@skb: buffer to copy
- *	@offset: offset in the buffer to start copying from
- *	@to: iovec iterator to copy to
- *	@len: amount of data to copy from buffer to iovec
- */
-int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
-			   struct iov_iter *to, int len)
+static int __skb_datagram_iter(const struct sk_buff *skb, int offset,
+			       struct iov_iter *to, int len, bool fault_short,
+			       size_t (*cb)(const void *, size_t, void *,
+					    struct iov_iter *), void *data)
 {
 	int start = skb_headlen(skb);
 	int i, copy = start - offset, start_off = offset, n;
 	struct sk_buff *frag_iter;
 
-	trace_skb_copy_datagram_iovec(skb, len);
-
 	/* Copy header. */
 	if (copy > 0) {
 		if (copy > len)
 			copy = len;
-		n = copy_to_iter(skb->data + offset, copy, to);
+		n = cb(skb->data + offset, copy, data, to);
 		offset += n;
 		if (n != copy)
 			goto short_copy;
@@ -445,11 +437,14 @@
 
 		end = start + skb_frag_size(frag);
 		if ((copy = end - offset) > 0) {
+			struct page *page = skb_frag_page(frag);
+			u8 *vaddr = kmap(page);
+
 			if (copy > len)
 				copy = len;
-			n = copy_page_to_iter(skb_frag_page(frag),
-					      frag->page_offset + offset -
-					      start, copy, to);
+			n = cb(vaddr + skb_frag_off(frag) + offset - start,
+			       copy, data, to);
+			kunmap(page);
 			offset += n;
 			if (n != copy)
 				goto short_copy;
@@ -468,8 +463,8 @@
 		if ((copy = end - offset) > 0) {
 			if (copy > len)
 				copy = len;
-			if (skb_copy_datagram_iter(frag_iter, offset - start,
-						   to, copy))
+			if (__skb_datagram_iter(frag_iter, offset - start,
+						to, copy, fault_short, cb, data))
 				goto fault;
 			if ((len -= copy) == 0)
 				return 0;
@@ -490,11 +485,50 @@
 	return -EFAULT;
 
 short_copy:
-	if (iov_iter_count(to))
+	if (fault_short || iov_iter_count(to))
 		goto fault;
 
 	return 0;
 }
+
+/**
+ *	skb_copy_and_hash_datagram_iter - Copy datagram to an iovec iterator
+ *          and update a hash.
+ *	@skb: buffer to copy
+ *	@offset: offset in the buffer to start copying from
+ *	@to: iovec iterator to copy to
+ *	@len: amount of data to copy from buffer to iovec
+ *      @hash: hash request to update
+ */
+int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset,
+			   struct iov_iter *to, int len,
+			   struct ahash_request *hash)
+{
+	return __skb_datagram_iter(skb, offset, to, len, true,
+			hash_and_copy_to_iter, hash);
+}
+EXPORT_SYMBOL(skb_copy_and_hash_datagram_iter);
+
+static size_t simple_copy_to_iter(const void *addr, size_t bytes,
+		void *data __always_unused, struct iov_iter *i)
+{
+	return copy_to_iter(addr, bytes, i);
+}
+
+/**
+ *	skb_copy_datagram_iter - Copy a datagram to an iovec iterator.
+ *	@skb: buffer to copy
+ *	@offset: offset in the buffer to start copying from
+ *	@to: iovec iterator to copy to
+ *	@len: amount of data to copy from buffer to iovec
+ */
+int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
+			   struct iov_iter *to, int len)
+{
+	trace_skb_copy_datagram_iovec(skb, len);
+	return __skb_datagram_iter(skb, offset, to, len, false,
+			simple_copy_to_iter, NULL);
+}
 EXPORT_SYMBOL(skb_copy_datagram_iter);
 
 /**
@@ -539,7 +573,7 @@
 			if (copy > len)
 				copy = len;
 			copied = copy_page_from_iter(skb_frag_page(frag),
-					  frag->page_offset + offset - start,
+					  skb_frag_off(frag) + offset - start,
 					  copy, from);
 			if (copied != copy)
 				goto fault;
@@ -606,7 +640,7 @@
 		skb->len += copied;
 		skb->truesize += truesize;
 		if (sk && sk->sk_type == SOCK_STREAM) {
-			sk->sk_wmem_queued += truesize;
+			sk_wmem_queued_add(sk, truesize);
 			sk_mem_charge(sk, truesize);
 		} else {
 			refcount_add(truesize, &skb->sk->sk_wmem_alloc);
@@ -645,132 +679,23 @@
 }
 EXPORT_SYMBOL(zerocopy_sg_from_iter);
 
+/**
+ *	skb_copy_and_csum_datagram_iter - Copy datagram to an iovec iterator
+ *          and update a checksum.
+ *	@skb: buffer to copy
+ *	@offset: offset in the buffer to start copying from
+ *	@to: iovec iterator to copy to
+ *	@len: amount of data to copy from buffer to iovec
+ *      @csump: checksum pointer
+ */
 static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
 				      struct iov_iter *to, int len,
 				      __wsum *csump)
 {
-	int start = skb_headlen(skb);
-	int i, copy = start - offset, start_off = offset;
-	struct sk_buff *frag_iter;
-	int pos = 0;
-	int n;
-
-	/* Copy header. */
-	if (copy > 0) {
-		if (copy > len)
-			copy = len;
-		n = csum_and_copy_to_iter(skb->data + offset, copy, csump, to);
-		offset += n;
-		if (n != copy)
-			goto fault;
-		if ((len -= copy) == 0)
-			return 0;
-		pos = copy;
-	}
-
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		int end;
-		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-
-		WARN_ON(start > offset + len);
-
-		end = start + skb_frag_size(frag);
-		if ((copy = end - offset) > 0) {
-			__wsum csum2 = 0;
-			struct page *page = skb_frag_page(frag);
-			u8  *vaddr = kmap(page);
-
-			if (copy > len)
-				copy = len;
-			n = csum_and_copy_to_iter(vaddr + frag->page_offset +
-						  offset - start, copy,
-						  &csum2, to);
-			kunmap(page);
-			offset += n;
-			if (n != copy)
-				goto fault;
-			*csump = csum_block_add(*csump, csum2, pos);
-			if (!(len -= copy))
-				return 0;
-			pos += copy;
-		}
-		start = end;
-	}
-
-	skb_walk_frags(skb, frag_iter) {
-		int end;
-
-		WARN_ON(start > offset + len);
-
-		end = start + frag_iter->len;
-		if ((copy = end - offset) > 0) {
-			__wsum csum2 = 0;
-			if (copy > len)
-				copy = len;
-			if (skb_copy_and_csum_datagram(frag_iter,
-						       offset - start,
-						       to, copy,
-						       &csum2))
-				goto fault;
-			*csump = csum_block_add(*csump, csum2, pos);
-			if ((len -= copy) == 0)
-				return 0;
-			offset += copy;
-			pos += copy;
-		}
-		start = end;
-	}
-	if (!len)
-		return 0;
-
-fault:
-	iov_iter_revert(to, offset - start_off);
-	return -EFAULT;
+	return __skb_datagram_iter(skb, offset, to, len, true,
+			csum_and_copy_to_iter, csump);
 }
 
-__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
-{
-	__sum16 sum;
-
-	sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
-	if (likely(!sum)) {
-		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
-		    !skb->csum_complete_sw)
-			netdev_rx_csum_fault(skb->dev);
-	}
-	if (!skb_shared(skb))
-		skb->csum_valid = !sum;
-	return sum;
-}
-EXPORT_SYMBOL(__skb_checksum_complete_head);
-
-__sum16 __skb_checksum_complete(struct sk_buff *skb)
-{
-	__wsum csum;
-	__sum16 sum;
-
-	csum = skb_checksum(skb, 0, skb->len, 0);
-
-	/* skb->csum holds pseudo checksum */
-	sum = csum_fold(csum_add(skb->csum, csum));
-	if (likely(!sum)) {
-		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
-		    !skb->csum_complete_sw)
-			netdev_rx_csum_fault(skb->dev);
-	}
-
-	if (!skb_shared(skb)) {
-		/* Save full packet checksum */
-		skb->csum = csum;
-		skb->ip_summed = CHECKSUM_COMPLETE;
-		skb->csum_complete_sw = 1;
-		skb->csum_valid = !sum;
-	}
-
-	return sum;
-}
-EXPORT_SYMBOL(__skb_checksum_complete);
-
 /**
  *	skb_copy_and_csum_datagram_msg - Copy and checksum skb to user iovec.
  *	@skb: skbuff
@@ -810,7 +735,7 @@
 
 		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
 		    !skb->csum_complete_sw)
-			netdev_rx_csum_fault(NULL);
+			netdev_rx_csum_fault(NULL, skb);
 	}
 	return 0;
 fault:
@@ -842,7 +767,7 @@
 	mask = 0;
 
 	/* exceptional events? */
-	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+	if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
 		mask |= EPOLLERR |
 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
 
@@ -852,7 +777,7 @@
 		mask |= EPOLLHUP;
 
 	/* readable? */
-	if (!skb_queue_empty(&sk->sk_receive_queue))
+	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
 		mask |= EPOLLIN | EPOLLRDNORM;
 
 	/* Connection-based need to check for termination and startup */

diff --git a/net/core/datagram.h b/net/core/datagram.h
new file mode 100644
index 0000000..bcfb75b
--- /dev/null
+++ b/net/core/datagram.h

@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _NET_CORE_DATAGRAM_H_
+#define _NET_CORE_DATAGRAM_H_
+
+#include <linux/types.h>
+
+struct sock;
+struct sk_buff;
+struct iov_iter;
+
+int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
+			    struct iov_iter *from, size_t length);
+
+#endif /* _NET_CORE_DATAGRAM_H_ */

diff --git a/net/core/dev.c b/net/core/dev.c
index 1f1aae2..99ac84f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *      NET3    Protocol independent device support routines.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  *	Derived from the non IP parts of dev.c 1.0.19
  *              Authors:	Ross Biro
  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -131,7 +127,6 @@
 #include <trace/events/napi.h>
 #include <trace/events/net.h>
 #include <trace/events/skb.h>
-#include <linux/pci.h>
 #include <linux/inetdevice.h>
 #include <linux/cpu_rmap.h>
 #include <linux/static_key.h>
@@ -145,10 +140,13 @@
 #include <linux/sctp.h>
 #include <net/udp_tunnel.h>
 #include <linux/net_namespace.h>
+#include <linux/indirect_call_wrapper.h>
+#include <net/devlink.h>
 
 #include "net-sysfs.h"
 
 #define MAX_GRO_SKBS 8
+#define MAX_NEST_DEV 8
 
 /* This should be increased if a protocol with a bigger head is added. */
 #define GRO_MAX_HEAD (MAX_HEADER + 128)
@@ -162,6 +160,9 @@
 static int netif_rx_internal(struct sk_buff *skb);
 static int call_netdevice_notifiers_info(unsigned long val,
 					 struct netdev_notifier_info *info);
+static int call_netdevice_notifiers_extack(unsigned long val,
+					   struct net_device *dev,
+					   struct netlink_ext_ack *extack);
 static struct napi_struct *napi_by_id(unsigned int napi_id);
 
 /*
@@ -276,88 +277,6 @@
 DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 EXPORT_PER_CPU_SYMBOL(softnet_data);
 
-#ifdef CONFIG_LOCKDEP
-/*
- * register_netdevice() inits txq->_xmit_lock and sets lockdep class
- * according to dev->type
- */
-static const unsigned short netdev_lock_type[] = {
-	 ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
-	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
-	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
-	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
-	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
-	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
-	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
-	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
-	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
-	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
-	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
-	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
-	 ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
-	 ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
-	 ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
-
-static const char *const netdev_lock_name[] = {
-	"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
-	"_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
-	"_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
-	"_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
-	"_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
-	"_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
-	"_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
-	"_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
-	"_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
-	"_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
-	"_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
-	"_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
-	"_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
-	"_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
-	"_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
-
-static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
-static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
-
-static inline unsigned short netdev_lock_pos(unsigned short dev_type)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
-		if (netdev_lock_type[i] == dev_type)
-			return i;
-	/* the last key is used by default */
-	return ARRAY_SIZE(netdev_lock_type) - 1;
-}
-
-static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
-						 unsigned short dev_type)
-{
-	int i;
-
-	i = netdev_lock_pos(dev_type);
-	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
-				   netdev_lock_name[i]);
-}
-
-static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
-{
-	int i;
-
-	i = netdev_lock_pos(dev->type);
-	lockdep_set_class_and_name(&dev->addr_list_lock,
-				   &netdev_addr_lock_key[i],
-				   netdev_lock_name[i]);
-}
-#else
-static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
-						 unsigned short dev_type)
-{
-}
-static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
-{
-}
-#endif
-
 /*******************************************************************************
  *
  *		Protocol management and registration routines
@@ -1180,7 +1099,21 @@
 	BUG_ON(!dev_net(dev));
 
 	net = dev_net(dev);
-	if (dev->flags & IFF_UP)
+
+	/* Some auto-enslaved devices e.g. failover slaves are
+	 * special, as userspace might rename the device after
+	 * the interface had been brought up and running since
+	 * the point kernel initiated auto-enslavement. Allow
+	 * live name change even when these slave devices are
+	 * up and running.
+	 *
+	 * Typically, users of these auto-enslaving devices
+	 * don't actually care about slave name change, as
+	 * they are supposed to operate on master interface
+	 * directly.
+	 */
+	if (dev->flags & IFF_UP &&
+	    likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK)))
 		return -EBUSY;
 
 	write_seqcount_begin(&devnet_rename_seq);
@@ -1361,7 +1294,7 @@
 }
 EXPORT_SYMBOL(netdev_notify_peers);
 
-static int __dev_open(struct net_device *dev)
+static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	int ret;
@@ -1377,7 +1310,7 @@
 	 */
 	netpoll_poll_disable(dev);
 
-	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
+	ret = call_netdevice_notifiers_extack(NETDEV_PRE_UP, dev, extack);
 	ret = notifier_to_errno(ret);
 	if (ret)
 		return ret;
@@ -1406,7 +1339,8 @@
 
 /**
  *	dev_open	- prepare an interface for use.
- *	@dev:	device to open
+ *	@dev: device to open
+ *	@extack: netlink extended ack
  *
  *	Takes a device from down to up state. The device's private open
  *	function is invoked and then the multicast lists are loaded. Finally
@@ -1416,14 +1350,14 @@
  *	Calling this function on an active interface is a nop. On a failure
  *	a negative errno code is returned.
  */
-int dev_open(struct net_device *dev)
+int dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
 {
 	int ret;
 
 	if (dev->flags & IFF_UP)
 		return 0;
 
-	ret = __dev_open(dev);
+	ret = __dev_open(dev, extack);
 	if (ret < 0)
 		return ret;
 
@@ -1585,6 +1519,7 @@
 	N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
 	N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
 	N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
+	N(PRE_CHANGEADDR)
 	}
 #undef N
 	return "UNKNOWN_NETDEV_EVENT";
@@ -1733,6 +1668,18 @@
 	return raw_notifier_call_chain(&netdev_chain, val, info);
 }
 
+static int call_netdevice_notifiers_extack(unsigned long val,
+					   struct net_device *dev,
+					   struct netlink_ext_ack *extack)
+{
+	struct netdev_notifier_info info = {
+		.dev = dev,
+		.extack = extack,
+	};
+
+	return call_netdevice_notifiers_info(val, &info);
+}
+
 /**
  *	call_netdevice_notifiers - call all network notifier blocks
  *      @val: value passed unmodified to notifier function
@@ -1744,11 +1691,7 @@
 
 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
 {
-	struct netdev_notifier_info info = {
-		.dev = dev,
-	};
-
-	return call_netdevice_notifiers_info(val, &info);
+	return call_netdevice_notifiers_extack(val, dev, NULL);
 }
 EXPORT_SYMBOL(call_netdevice_notifiers);
 
@@ -1807,7 +1750,7 @@
 #endif
 
 static DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
-#ifdef HAVE_JUMP_LABEL
+#ifdef CONFIG_JUMP_LABEL
 static atomic_t netstamp_needed_deferred;
 static atomic_t netstamp_wanted;
 static void netstamp_clear(struct work_struct *work)
@@ -1826,7 +1769,7 @@
 
 void net_enable_timestamp(void)
 {
-#ifdef HAVE_JUMP_LABEL
+#ifdef CONFIG_JUMP_LABEL
 	int wanted;
 
 	while (1) {
@@ -1846,7 +1789,7 @@
 
 void net_disable_timestamp(void)
 {
-#ifdef HAVE_JUMP_LABEL
+#ifdef CONFIG_JUMP_LABEL
 	int wanted;
 
 	while (1) {
@@ -1976,6 +1919,17 @@
 	return false;
 }
 
+/**
+ * dev_nit_active - return true if any network interface taps are in use
+ *
+ * @dev: network device to check for the presence of taps
+ */
+bool dev_nit_active(struct net_device *dev)
+{
+	return !list_empty(&ptype_all) || !list_empty(&dev->ptype_all);
+}
+EXPORT_SYMBOL_GPL(dev_nit_active);
+
 /*
  *	Support routine. Sends outgoing frames to any network
  *	taps currently in use.
@@ -1991,6 +1945,9 @@
 	rcu_read_lock();
 again:
 	list_for_each_entry_rcu(ptype, ptype_list, list) {
+		if (ptype->ignore_outgoing)
+			continue;
+
 		/* Never send packets back to the socket
 		 * they originated from - MvS (miquels@drinkel.ow.org)
 		 */
@@ -2862,12 +2819,10 @@
 		else
 			name = netdev_name(dev);
 	}
-	WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
-	     "gso_type=%d ip_summed=%d\n",
+	skb_dump(KERN_WARNING, skb, false);
+	WARN(1, "%s: caps=(%pNF, %pNF)\n",
 	     name, dev ? &dev->features : &null_features,
-	     skb->sk ? &skb->sk->sk_route_caps : &null_features,
-	     skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
-	     skb_shinfo(skb)->gso_type, skb->ip_summed);
+	     skb->sk ? &skb->sk->sk_route_caps : &null_features);
 }
 
 /*
@@ -3082,10 +3037,11 @@
 
 /* Take action when hardware reception checksum errors are detected. */
 #ifdef CONFIG_BUG
-void netdev_rx_csum_fault(struct net_device *dev)
+void netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb)
 {
 	if (net_ratelimit()) {
 		pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
+		skb_dump(KERN_ERR, skb, true);
 		dump_stack();
 	}
 }
@@ -3235,7 +3191,7 @@
 	unsigned int len;
 	int rc;
 
-	if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all))
+	if (dev_nit_active(dev))
 		dev_queue_xmit_nit(skb, dev);
 
 	len = skb->len;
@@ -3255,7 +3211,7 @@
 	while (skb) {
 		struct sk_buff *next = skb->next;
 
-		skb->next = NULL;
+		skb_mark_not_on_list(skb);
 		rc = xmit_one(skb, dev, txq, next != NULL);
 		if (unlikely(!dev_xmit_complete(rc))) {
 			skb->next = next;
@@ -3263,7 +3219,7 @@
 		}
 
 		skb = next;
-		if (netif_xmit_stopped(txq) && skb) {
+		if (netif_tx_queue_stopped(txq) && skb) {
 			rc = NETDEV_TX_BUSY;
 			break;
 		}
@@ -3355,7 +3311,7 @@
 
 	for (; skb != NULL; skb = next) {
 		next = skb->next;
-		skb->next = NULL;
+		skb_mark_not_on_list(skb);
 
 		/* in case skb wont be segmented, point to itself */
 		skb->prev = skb;
@@ -3386,7 +3342,7 @@
 	/* To get more precise estimation of bytes sent on wire,
 	 * we add to pkt_len the headers size of all segments
 	 */
-	if (shinfo->gso_size)  {
+	if (shinfo->gso_size && skb_transport_header_was_set(skb)) {
 		unsigned int hdr_len;
 		u16 gso_segs = shinfo->gso_segs;
 
@@ -3430,9 +3386,22 @@
 	qdisc_calculate_pkt_len(skb, q);
 
 	if (q->flags & TCQ_F_NOLOCK) {
-		if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
-			__qdisc_drop(skb, &to_free);
-			rc = NET_XMIT_DROP;
+		if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty &&
+		    qdisc_run_begin(q)) {
+			if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
+					      &q->state))) {
+				__qdisc_drop(skb, &to_free);
+				rc = NET_XMIT_DROP;
+				goto end_run;
+			}
+			qdisc_bstats_cpu_update(q, skb);
+
+			rc = NET_XMIT_SUCCESS;
+			if (sch_direct_xmit(skb, q, dev, txq, NULL, true))
+				__qdisc_run(q);
+
+end_run:
+			qdisc_run_end(q);
 		} else {
 			rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
 			qdisc_run(q);
@@ -3521,9 +3490,6 @@
 #define skb_update_prio(skb)
 #endif
 
-DEFINE_PER_CPU(int, xmit_recursion);
-EXPORT_SYMBOL(xmit_recursion);
-
 /**
  *	dev_loopback_xmit - loop back @skb
  *	@net: network namespace this loopback is happening in
@@ -3654,23 +3620,21 @@
 }
 
 u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
-		     struct net_device *sb_dev,
-		     select_queue_fallback_t fallback)
+		     struct net_device *sb_dev)
 {
 	return 0;
 }
 EXPORT_SYMBOL(dev_pick_tx_zero);
 
 u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
-		       struct net_device *sb_dev,
-		       select_queue_fallback_t fallback)
+		       struct net_device *sb_dev)
 {
 	return (u16)raw_smp_processor_id() % dev->real_num_tx_queues;
 }
 EXPORT_SYMBOL(dev_pick_tx_cpu_id);
 
-static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
-			    struct net_device *sb_dev)
+u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
+		     struct net_device *sb_dev)
 {
 	struct sock *sk = skb->sk;
 	int queue_index = sk_tx_queue_get(sk);
@@ -3694,10 +3658,11 @@
 
 	return queue_index;
 }
+EXPORT_SYMBOL(netdev_pick_tx);
 
-struct netdev_queue *netdev_pick_tx(struct net_device *dev,
-				    struct sk_buff *skb,
-				    struct net_device *sb_dev)
+struct netdev_queue *netdev_core_pick_tx(struct net_device *dev,
+					 struct sk_buff *skb,
+					 struct net_device *sb_dev)
 {
 	int queue_index = 0;
 
@@ -3712,10 +3677,9 @@
 		const struct net_device_ops *ops = dev->netdev_ops;
 
 		if (ops->ndo_select_queue)
-			queue_index = ops->ndo_select_queue(dev, skb, sb_dev,
-							    __netdev_pick_tx);
+			queue_index = ops->ndo_select_queue(dev, skb, sb_dev);
 		else
-			queue_index = __netdev_pick_tx(dev, skb, sb_dev);
+			queue_index = netdev_pick_tx(dev, skb, sb_dev);
 
 		queue_index = netdev_cap_txqueue(dev, queue_index);
 	}
@@ -3789,7 +3753,7 @@
 	else
 		skb_dst_force(skb);
 
-	txq = netdev_pick_tx(dev, skb, sb_dev);
+	txq = netdev_core_pick_tx(dev, skb, sb_dev);
 	q = rcu_dereference_bh(txq->qdisc);
 
 	trace_net_dev_queue(skb);
@@ -3814,8 +3778,7 @@
 		int cpu = smp_processor_id(); /* ok because BHs are off */
 
 		if (txq->xmit_lock_owner != cpu) {
-			if (unlikely(__this_cpu_read(xmit_recursion) >
-				     XMIT_RECURSION_LIMIT))
+			if (dev_xmit_recursion())
 				goto recursion_alert;
 
 			skb = validate_xmit_skb(skb, dev, &again);
@@ -3825,9 +3788,9 @@
 			HARD_TX_LOCK(dev, txq, cpu);
 
 			if (!netif_xmit_stopped(txq)) {
-				__this_cpu_inc(xmit_recursion);
+				dev_xmit_recursion_inc();
 				skb = dev_hard_start_xmit(skb, dev, txq, &rc);
-				__this_cpu_dec(xmit_recursion);
+				dev_xmit_recursion_dec();
 				if (dev_xmit_complete(rc)) {
 					HARD_TX_UNLOCK(dev, txq);
 					goto out;
@@ -3923,6 +3886,8 @@
 int dev_weight_tx_bias __read_mostly = 1;  /* bias for output_queue quota */
 int dev_rx_weight __read_mostly = 64;
 int dev_tx_weight __read_mostly = 64;
+/* Maximum number of GRO_NORMAL skbs to batch up for list-RX */
+int gro_normal_batch __read_mostly = 8;
 
 /* Called with irq disabled */
 static inline void ____napi_schedule(struct softnet_data *sd,
@@ -3940,9 +3905,9 @@
 u32 rps_cpu_mask __read_mostly;
 EXPORT_SYMBOL(rps_cpu_mask);
 
-struct static_key rps_needed __read_mostly;
+struct static_key_false rps_needed __read_mostly;
 EXPORT_SYMBOL(rps_needed);
-struct static_key rfs_needed __read_mostly;
+struct static_key_false rfs_needed __read_mostly;
 EXPORT_SYMBOL(rfs_needed);
 
 static struct rps_dev_flow *
@@ -4282,6 +4247,9 @@
 	struct netdev_rx_queue *rxqueue;
 	void *orig_data, *orig_data_end;
 	u32 metalen, act = XDP_DROP;
+	__be16 orig_eth_type;
+	struct ethhdr *eth;
+	bool orig_bcast;
 	int hlen, off;
 	u32 mac_len;
 
@@ -4322,18 +4290,26 @@
 	xdp->data_hard_start = skb->data - skb_headroom(skb);
 	orig_data_end = xdp->data_end;
 	orig_data = xdp->data;
+	eth = (struct ethhdr *)xdp->data;
+	orig_bcast = is_multicast_ether_addr_64bits(eth->h_dest);
+	orig_eth_type = eth->h_proto;
 
 	rxqueue = netif_get_rxqueue(skb);
 	xdp->rxq = &rxqueue->xdp_rxq;
 
 	act = bpf_prog_run_xdp(xdp_prog, xdp);
 
+	/* check if bpf_xdp_adjust_head was used */
 	off = xdp->data - orig_data;
-	if (off > 0)
-		__skb_pull(skb, off);
-	else if (off < 0)
-		__skb_push(skb, -off);
-	skb->mac_header += off;
+	if (off) {
+		if (off > 0)
+			__skb_pull(skb, off);
+		else if (off < 0)
+			__skb_push(skb, -off);
+
+		skb->mac_header += off;
+		skb_reset_network_header(skb);
+	}
 
 	/* check if bpf_xdp_adjust_tail was used. it can only "shrink"
 	 * pckt.
@@ -4345,6 +4321,14 @@
 
 	}
 
+	/* check if XDP changed eth hdr such SKB needs update */
+	eth = (struct ethhdr *)xdp->data;
+	if ((orig_eth_type != eth->h_proto) ||
+	    (orig_bcast != is_multicast_ether_addr_64bits(eth->h_dest))) {
+		__skb_push(skb, ETH_HLEN);
+		skb->protocol = eth_type_trans(skb, skb->dev);
+	}
+
 	switch (act) {
 	case XDP_REDIRECT:
 	case XDP_TX:
@@ -4380,7 +4364,7 @@
 	bool free_skb = true;
 	int cpu, rc;
 
-	txq = netdev_pick_tx(dev, skb, NULL);
+	txq = netdev_core_pick_tx(dev, skb, NULL);
 	cpu = smp_processor_id();
 	HARD_TX_LOCK(dev, txq, cpu);
 	if (!netif_xmit_stopped(txq)) {
@@ -4436,25 +4420,8 @@
 
 	trace_netif_rx(skb);
 
-	if (static_branch_unlikely(&generic_xdp_needed_key)) {
-		int ret;
-
-		preempt_disable();
-		rcu_read_lock();
-		ret = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
-		rcu_read_unlock();
-		preempt_enable();
-
-		/* Consider XDP consuming the packet a success from
-		 * the netdev point of view we do not want to count
-		 * this as an error.
-		 */
-		if (ret != XDP_PASS)
-			return NET_RX_SUCCESS;
-	}
-
 #ifdef CONFIG_RPS
-	if (static_key_false(&rps_needed)) {
+	if (static_branch_unlikely(&rps_needed)) {
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
 		int cpu;
 
@@ -4497,9 +4464,14 @@
 
 int netif_rx(struct sk_buff *skb)
 {
+	int ret;
+
 	trace_netif_rx_entry(skb);
 
-	return netif_rx_internal(skb);
+	ret = netif_rx_internal(skb);
+	trace_netif_rx_exit(ret);
+
+	return ret;
 }
 EXPORT_SYMBOL(netif_rx);
 
@@ -4514,6 +4486,7 @@
 	if (local_softirq_pending())
 		do_softirq();
 	preempt_enable();
+	trace_netif_rx_ni_exit(err);
 
 	return err;
 }
@@ -4638,9 +4611,7 @@
 		__skb_push(skb, skb->mac_len);
 		skb_do_redirect(skb);
 		return NULL;
-	case TC_ACT_REINSERT:
-		/* this does not scrub the packet, and updates stats on error */
-		skb_tc_reinsert(skb, &cl_res);
+	case TC_ACT_CONSUMED:
 		return NULL;
 	default:
 		break;
@@ -4786,6 +4757,18 @@
 
 	__this_cpu_inc(softnet_data.processed);
 
+	if (static_branch_unlikely(&generic_xdp_needed_key)) {
+		int ret2;
+
+		preempt_disable();
+		ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
+		preempt_enable();
+
+		if (ret2 != XDP_PASS)
+			return NET_RX_DROP;
+		skb_reset_mac_len(skb);
+	}
+
 	if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
 	    skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
 		skb = skb_vlan_untag(skb);
@@ -4860,13 +4843,41 @@
 	}
 
 	if (unlikely(skb_vlan_tag_present(skb))) {
-		if (skb_vlan_tag_get_id(skb))
+check_vlan_id:
+		if (skb_vlan_tag_get_id(skb)) {
+			/* Vlan id is non 0 and vlan_do_receive() above couldn't
+			 * find vlan device.
+			 */
 			skb->pkt_type = PACKET_OTHERHOST;
+		} else if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
+			   skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
+			/* Outer header is 802.1P with vlan 0, inner header is
+			 * 802.1Q or 802.1AD and vlan_do_receive() above could
+			 * not find vlan dev for vlan id 0.
+			 */
+			__vlan_hwaccel_clear_tag(skb);
+			skb = skb_vlan_untag(skb);
+			if (unlikely(!skb))
+				goto out;
+			if (vlan_do_receive(&skb))
+				/* After stripping off 802.1P header with vlan 0
+				 * vlan dev is found for inner header.
+				 */
+				goto another_round;
+			else if (unlikely(!skb))
+				goto out;
+			else
+				/* We have stripped outer 802.1P vlan 0 header.
+				 * But could not find vlan dev.
+				 * check again for vlan id to set OTHERHOST.
+				 */
+				goto check_vlan_id;
+		}
 		/* Note: we might in the future use prio bits
 		 * and set skb->priority like in vlan_do_receive()
 		 * For the time being, just ignore Priority Code Point
 		 */
-		skb->vlan_tci = 0;
+		__vlan_hwaccel_clear_tag(skb);
 	}
 
 	type = skb->protocol;
@@ -4915,7 +4926,8 @@
 
 	ret = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
 	if (pt_prev)
-		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+		ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb,
+					 skb->dev, pt_prev, orig_dev);
 	return ret;
 }
 
@@ -4957,10 +4969,13 @@
 	if (list_empty(head))
 		return;
 	if (pt_prev->list_func != NULL)
-		pt_prev->list_func(head, pt_prev, orig_dev);
+		INDIRECT_CALL_INET(pt_prev->list_func, ipv6_list_rcv,
+				   ip_list_rcv, head, pt_prev, orig_dev);
 	else
-		list_for_each_entry_safe(skb, next, head, list)
+		list_for_each_entry_safe(skb, next, head, list) {
+			skb_list_del_init(skb);
 			pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+		}
 }
 
 static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc)
@@ -5102,22 +5117,9 @@
 	if (skb_defer_rx_timestamp(skb))
 		return NET_RX_SUCCESS;
 
-	if (static_branch_unlikely(&generic_xdp_needed_key)) {
-		int ret;
-
-		preempt_disable();
-		rcu_read_lock();
-		ret = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
-		rcu_read_unlock();
-		preempt_enable();
-
-		if (ret != XDP_PASS)
-			return NET_RX_DROP;
-	}
-
 	rcu_read_lock();
 #ifdef CONFIG_RPS
-	if (static_key_false(&rps_needed)) {
+	if (static_branch_unlikely(&rps_needed)) {
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
 		int cpu = get_rps_cpu(skb->dev, skb, &rflow);
 
@@ -5135,7 +5137,6 @@
 
 static void netif_receive_skb_list_internal(struct list_head *head)
 {
-	struct bpf_prog *xdp_prog = NULL;
 	struct sk_buff *skb, *next;
 	struct list_head sublist;
 
@@ -5148,24 +5149,9 @@
 	}
 	list_splice_init(&sublist, head);
 
-	if (static_branch_unlikely(&generic_xdp_needed_key)) {
-		preempt_disable();
-		rcu_read_lock();
-		list_for_each_entry_safe(skb, next, head, list) {
-			xdp_prog = rcu_dereference(skb->dev->xdp_prog);
-			skb_list_del_init(skb);
-			if (do_xdp_generic(xdp_prog, skb) == XDP_PASS)
-				list_add_tail(&skb->list, &sublist);
-		}
-		rcu_read_unlock();
-		preempt_enable();
-		/* Put passed packets back on main list */
-		list_splice_init(&sublist, head);
-	}
-
 	rcu_read_lock();
 #ifdef CONFIG_RPS
-	if (static_key_false(&rps_needed)) {
+	if (static_branch_unlikely(&rps_needed)) {
 		list_for_each_entry_safe(skb, next, head, list) {
 			struct rps_dev_flow voidflow, *rflow = &voidflow;
 			int cpu = get_rps_cpu(skb->dev, skb, &rflow);
@@ -5199,9 +5185,14 @@
  */
 int netif_receive_skb(struct sk_buff *skb)
 {
+	int ret;
+
 	trace_netif_receive_skb_entry(skb);
 
-	return netif_receive_skb_internal(skb);
+	ret = netif_receive_skb_internal(skb);
+	trace_netif_receive_skb_exit(ret);
+
+	return ret;
 }
 EXPORT_SYMBOL(netif_receive_skb);
 
@@ -5221,9 +5212,12 @@
 
 	if (list_empty(head))
 		return;
-	list_for_each_entry(skb, head, list)
-		trace_netif_receive_skb_list_entry(skb);
+	if (trace_netif_receive_skb_list_entry_enabled()) {
+		list_for_each_entry(skb, head, list)
+			trace_netif_receive_skb_list_entry(skb);
+	}
 	netif_receive_skb_list_internal(head);
+	trace_netif_receive_skb_list_exit(0);
 }
 EXPORT_SYMBOL(netif_receive_skb_list);
 
@@ -5276,6 +5270,8 @@
 	put_online_cpus();
 }
 
+INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
+INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
 static int napi_gro_complete(struct sk_buff *skb)
 {
 	struct packet_offload *ptype;
@@ -5295,7 +5291,9 @@
 		if (ptype->type != type || !ptype->callbacks.gro_complete)
 			continue;
 
-		err = ptype->callbacks.gro_complete(skb, 0);
+		err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete,
+					 ipv6_gro_complete, inet_gro_complete,
+					 skb, 0);
 		break;
 	}
 	rcu_read_unlock();
@@ -5319,8 +5317,7 @@
 	list_for_each_entry_safe_reverse(skb, p, head, list) {
 		if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
 			return;
-		list_del(&skb->list);
-		skb->next = NULL;
+		skb_list_del_init(skb);
 		napi_gro_complete(skb);
 		napi->gro_hash[index].count--;
 	}
@@ -5335,11 +5332,13 @@
  */
 void napi_gro_flush(struct napi_struct *napi, bool flush_old)
 {
-	u32 i;
+	unsigned long bitmask = napi->gro_bitmask;
+	unsigned int i, base = ~0U;
 
-	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
-		if (test_bit(i, &napi->gro_bitmask))
-			__napi_gro_flush_chain(napi, i, flush_old);
+	while ((i = ffs(bitmask)) != 0) {
+		bitmask >>= i;
+		base += i;
+		__napi_gro_flush_chain(napi, base, flush_old);
 	}
 }
 EXPORT_SYMBOL(napi_gro_flush);
@@ -5364,7 +5363,9 @@
 		}
 
 		diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
-		diffs |= p->vlan_tci ^ skb->vlan_tci;
+		diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb);
+		if (skb_vlan_tag_present(p))
+			diffs |= p->vlan_tci ^ skb->vlan_tci;
 		diffs |= skb_metadata_dst_cmp(p, skb);
 		diffs |= skb_metadata_differs(p, skb);
 		if (maclen == ETH_HLEN)
@@ -5410,7 +5411,7 @@
 	skb->data_len -= grow;
 	skb->tail += grow;
 
-	pinfo->frags[0].page_offset += grow;
+	skb_frag_off_add(&pinfo->frags[0], grow);
 	skb_frag_size_sub(&pinfo->frags[0], grow);
 
 	if (unlikely(!skb_frag_size(&pinfo->frags[0]))) {
@@ -5435,11 +5436,14 @@
 	/* Do not adjust napi->gro_hash[].count, caller is adding a new
 	 * SKB to the chain.
 	 */
-	list_del(&oldest->list);
-	oldest->next = NULL;
+	skb_list_del_init(oldest);
 	napi_gro_complete(oldest);
 }
 
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
+							   struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *,
+							   struct sk_buff *));
 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
@@ -5489,7 +5493,9 @@
 			NAPI_GRO_CB(skb)->csum_valid = 0;
 		}
 
-		pp = ptype->callbacks.gro_receive(gro_head, skb);
+		pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
+					ipv6_gro_receive, inet_gro_receive,
+					gro_head, skb);
 		break;
 	}
 	rcu_read_unlock();
@@ -5506,8 +5512,7 @@
 	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
 
 	if (pp) {
-		list_del(&pp->list);
-		pp->next = NULL;
+		skb_list_del_init(pp);
 		napi_gro_complete(pp);
 		napi->gro_hash[hash].count--;
 	}
@@ -5580,7 +5585,7 @@
 static void napi_skb_free_stolen_head(struct sk_buff *skb)
 {
 	skb_dst_drop(skb);
-	secpath_reset(skb);
+	skb_ext_put(skb);
 	kmem_cache_free(skbuff_head_cache, skb);
 }
 
@@ -5614,12 +5619,17 @@
 
 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
+	gro_result_t ret;
+
 	skb_mark_napi_id(skb, napi);
 	trace_napi_gro_receive_entry(skb);
 
 	skb_gro_reset_offset(skb);
 
-	return napi_skb_finish(dev_gro_receive(napi, skb), skb);
+	ret = napi_skb_finish(dev_gro_receive(napi, skb), skb);
+	trace_napi_gro_receive_exit(ret);
+
+	return ret;
 }
 EXPORT_SYMBOL(napi_gro_receive);
 
@@ -5632,7 +5642,7 @@
 	__skb_pull(skb, skb_headlen(skb));
 	/* restore the reserve we had after netdev_alloc_skb_ip_align() */
 	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
-	skb->vlan_tci = 0;
+	__vlan_hwaccel_clear_tag(skb);
 	skb->dev = napi->dev;
 	skb->skb_iif = 0;
 
@@ -5642,7 +5652,7 @@
 	skb->encapsulation = 0;
 	skb_shinfo(skb)->gso_type = 0;
 	skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
-	secpath_reset(skb);
+	skb_ext_reset(skb);
 
 	napi->skb = skb;
 }
@@ -5662,6 +5672,26 @@
 }
 EXPORT_SYMBOL(napi_get_frags);
 
+/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
+static void gro_normal_list(struct napi_struct *napi)
+{
+	if (!napi->rx_count)
+		return;
+	netif_receive_skb_list_internal(&napi->rx_list);
+	INIT_LIST_HEAD(&napi->rx_list);
+	napi->rx_count = 0;
+}
+
+/* Queue one GRO_NORMAL SKB up for list processing.  If batch size exceeded,
+ * pass the whole batch up to the stack.
+ */
+static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
+{
+	list_add_tail(&skb->list, &napi->rx_list);
+	if (++napi->rx_count >= gro_normal_batch)
+		gro_normal_list(napi);
+}
+
 static gro_result_t napi_frags_finish(struct napi_struct *napi,
 				      struct sk_buff *skb,
 				      gro_result_t ret)
@@ -5671,8 +5701,8 @@
 	case GRO_HELD:
 		__skb_push(skb, ETH_HLEN);
 		skb->protocol = eth_type_trans(skb, skb->dev);
-		if (ret == GRO_NORMAL && netif_receive_skb_internal(skb))
-			ret = GRO_DROP;
+		if (ret == GRO_NORMAL)
+			gro_normal_one(napi, skb);
 		break;
 
 	case GRO_DROP:
@@ -5709,7 +5739,6 @@
 	skb_reset_mac_header(skb);
 	skb_gro_reset_offset(skb);
 
-	eth = skb_gro_header_fast(skb, 0);
 	if (unlikely(skb_gro_header_hard(skb, hlen))) {
 		eth = skb_gro_header_slow(skb, hlen, 0);
 		if (unlikely(!eth)) {
@@ -5719,6 +5748,7 @@
 			return NULL;
 		}
 	} else {
+		eth = (const struct ethhdr *)skb->data;
 		gro_pull_from_frag0(skb, hlen);
 		NAPI_GRO_CB(skb)->frag0 += hlen;
 		NAPI_GRO_CB(skb)->frag0_len -= hlen;
@@ -5737,6 +5767,7 @@
 
 gro_result_t napi_gro_frags(struct napi_struct *napi)
 {
+	gro_result_t ret;
 	struct sk_buff *skb = napi_frags_skb(napi);
 
 	if (!skb)
@@ -5744,7 +5775,10 @@
 
 	trace_napi_gro_frags_entry(skb);
 
-	return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
+	ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
+	trace_napi_gro_frags_exit(ret);
+
+	return ret;
 }
 EXPORT_SYMBOL(napi_gro_frags);
 
@@ -5760,10 +5794,11 @@
 
 	/* NAPI_GRO_CB(skb)->csum holds pseudo checksum */
 	sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum));
+	/* See comments in __skb_checksum_complete(). */
 	if (likely(!sum)) {
 		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
 		    !skb->csum_complete_sw)
-			netdev_rx_csum_fault(skb->dev);
+			netdev_rx_csum_fault(skb->dev, skb);
 	}
 
 	NAPI_GRO_CB(skb)->csum = wsum;
@@ -5944,6 +5979,8 @@
 				 NAPIF_STATE_IN_BUSY_POLL)))
 		return false;
 
+	gro_normal_list(n);
+
 	if (n->gro_bitmask) {
 		unsigned long timeout = 0;
 
@@ -6029,10 +6066,19 @@
 	 * Ideally, a new ndo_busy_poll_stop() could avoid another round.
 	 */
 	rc = napi->poll(napi, BUSY_POLL_BUDGET);
+	/* We can't gro_normal_list() here, because napi->poll() might have
+	 * rearmed the napi (napi_complete_done()) in which case it could
+	 * already be running on another CPU.
+	 */
 	trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
 	netpoll_poll_unlock(have_poll_lock);
-	if (rc == BUSY_POLL_BUDGET)
+	if (rc == BUSY_POLL_BUDGET) {
+		/* As the whole budget was spent, we still own the napi so can
+		 * safely handle the rx_list.
+		 */
+		gro_normal_list(napi);
 		__napi_schedule(napi);
+	}
 	local_bh_enable();
 }
 
@@ -6077,6 +6123,7 @@
 		}
 		work = napi_poll(napi, BUSY_POLL_BUDGET);
 		trace_napi_poll(napi, work, BUSY_POLL_BUDGET);
+		gro_normal_list(napi);
 count:
 		if (work > 0)
 			__NET_ADD_STATS(dev_net(napi->dev),
@@ -6182,10 +6229,12 @@
 	napi->timer.function = napi_watchdog;
 	init_gro_hash(napi);
 	napi->skb = NULL;
+	INIT_LIST_HEAD(&napi->rx_list);
+	napi->rx_count = 0;
 	napi->poll = poll;
 	if (weight > NAPI_POLL_WEIGHT)
-		pr_err_once("netif_napi_add() called with weight %d on device %s\n",
-			    weight, dev->name);
+		netdev_err_once(dev, "%s() called with weight %d\n", __func__,
+				weight);
 	napi->weight = weight;
 	list_add(&napi->dev_list, &dev->napi_list);
 	napi->dev = dev;
@@ -6278,6 +6327,8 @@
 		goto out_unlock;
 	}
 
+	gro_normal_list(n);
+
 	if (n->gro_bitmask) {
 		/* flush too old packets
 		 * If HZ < 1000, flush all packets.
@@ -6357,6 +6408,9 @@
 	/* upper master flag, there can only be one master device per list */
 	bool master;
 
+	/* lookup ignore flag */
+	bool ignore;
+
 	/* counter for the number of times this device was added to us */
 	u16 ref_nr;
 
@@ -6379,7 +6433,7 @@
 	return NULL;
 }
 
-static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data)
+static int ____netdev_has_upper_dev(struct net_device *upper_dev, void *data)
 {
 	struct net_device *dev = data;
 
@@ -6400,7 +6454,7 @@
 {
 	ASSERT_RTNL();
 
-	return netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev,
+	return netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
 					     upper_dev);
 }
 EXPORT_SYMBOL(netdev_has_upper_dev);
@@ -6418,7 +6472,7 @@
 bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
 				  struct net_device *upper_dev)
 {
-	return !!netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev,
+	return !!netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
 					       upper_dev);
 }
 EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu);
@@ -6462,6 +6516,22 @@
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_get);
 
+static struct net_device *__netdev_master_upper_dev_get(struct net_device *dev)
+{
+	struct netdev_adjacent *upper;
+
+	ASSERT_RTNL();
+
+	if (list_empty(&dev->adj_list.upper))
+		return NULL;
+
+	upper = list_first_entry(&dev->adj_list.upper,
+				 struct netdev_adjacent, list);
+	if (likely(upper->master) && !upper->ignore)
+		return upper->dev;
+	return NULL;
+}
+
 /**
  * netdev_has_any_lower_dev - Check if device is linked to some device
  * @dev: device
@@ -6512,6 +6582,23 @@
 }
 EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
 
+static struct net_device *__netdev_next_upper_dev(struct net_device *dev,
+						  struct list_head **iter,
+						  bool *ignore)
+{
+	struct netdev_adjacent *upper;
+
+	upper = list_entry((*iter)->next, struct netdev_adjacent, list);
+
+	if (&upper->list == &dev->adj_list.upper)
+		return NULL;
+
+	*iter = &upper->list;
+	*ignore = upper->ignore;
+
+	return upper->dev;
+}
+
 static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
 						    struct list_head **iter)
 {
@@ -6529,34 +6616,111 @@
 	return upper->dev;
 }
 
+static int __netdev_walk_all_upper_dev(struct net_device *dev,
+				       int (*fn)(struct net_device *dev,
+						 void *data),
+				       void *data)
+{
+	struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
+	struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
+	int ret, cur = 0;
+	bool ignore;
+
+	now = dev;
+	iter = &dev->adj_list.upper;
+
+	while (1) {
+		if (now != dev) {
+			ret = fn(now, data);
+			if (ret)
+				return ret;
+		}
+
+		next = NULL;
+		while (1) {
+			udev = __netdev_next_upper_dev(now, &iter, &ignore);
+			if (!udev)
+				break;
+			if (ignore)
+				continue;
+
+			next = udev;
+			niter = &udev->adj_list.upper;
+			dev_stack[cur] = now;
+			iter_stack[cur++] = iter;
+			break;
+		}
+
+		if (!next) {
+			if (!cur)
+				return 0;
+			next = dev_stack[--cur];
+			niter = iter_stack[cur];
+		}
+
+		now = next;
+		iter = niter;
+	}
+
+	return 0;
+}
+
 int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
 				  int (*fn)(struct net_device *dev,
 					    void *data),
 				  void *data)
 {
-	struct net_device *udev;
-	struct list_head *iter;
-	int ret;
+	struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
+	struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
+	int ret, cur = 0;
 
-	for (iter = &dev->adj_list.upper,
-	     udev = netdev_next_upper_dev_rcu(dev, &iter);
-	     udev;
-	     udev = netdev_next_upper_dev_rcu(dev, &iter)) {
-		/* first is the upper device itself */
-		ret = fn(udev, data);
-		if (ret)
-			return ret;
+	now = dev;
+	iter = &dev->adj_list.upper;
 
-		/* then look at all of its upper devices */
-		ret = netdev_walk_all_upper_dev_rcu(udev, fn, data);
-		if (ret)
-			return ret;
+	while (1) {
+		if (now != dev) {
+			ret = fn(now, data);
+			if (ret)
+				return ret;
+		}
+
+		next = NULL;
+		while (1) {
+			udev = netdev_next_upper_dev_rcu(now, &iter);
+			if (!udev)
+				break;
+
+			next = udev;
+			niter = &udev->adj_list.upper;
+			dev_stack[cur] = now;
+			iter_stack[cur++] = iter;
+			break;
+		}
+
+		if (!next) {
+			if (!cur)
+				return 0;
+			next = dev_stack[--cur];
+			niter = iter_stack[cur];
+		}
+
+		now = next;
+		iter = niter;
 	}
 
 	return 0;
 }
 EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu);
 
+static bool __netdev_has_upper_dev(struct net_device *dev,
+				   struct net_device *upper_dev)
+{
+	ASSERT_RTNL();
+
+	return __netdev_walk_all_upper_dev(dev, ____netdev_has_upper_dev,
+					   upper_dev);
+}
+
 /**
  * netdev_lower_get_next_private - Get the next ->private from the
  *				   lower neighbour list
@@ -6653,34 +6817,119 @@
 	return lower->dev;
 }
 
+static struct net_device *__netdev_next_lower_dev(struct net_device *dev,
+						  struct list_head **iter,
+						  bool *ignore)
+{
+	struct netdev_adjacent *lower;
+
+	lower = list_entry((*iter)->next, struct netdev_adjacent, list);
+
+	if (&lower->list == &dev->adj_list.lower)
+		return NULL;
+
+	*iter = &lower->list;
+	*ignore = lower->ignore;
+
+	return lower->dev;
+}
+
 int netdev_walk_all_lower_dev(struct net_device *dev,
 			      int (*fn)(struct net_device *dev,
 					void *data),
 			      void *data)
 {
-	struct net_device *ldev;
-	struct list_head *iter;
-	int ret;
+	struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
+	struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
+	int ret, cur = 0;
 
-	for (iter = &dev->adj_list.lower,
-	     ldev = netdev_next_lower_dev(dev, &iter);
-	     ldev;
-	     ldev = netdev_next_lower_dev(dev, &iter)) {
-		/* first is the lower device itself */
-		ret = fn(ldev, data);
-		if (ret)
-			return ret;
+	now = dev;
+	iter = &dev->adj_list.lower;
 
-		/* then look at all of its lower devices */
-		ret = netdev_walk_all_lower_dev(ldev, fn, data);
-		if (ret)
-			return ret;
+	while (1) {
+		if (now != dev) {
+			ret = fn(now, data);
+			if (ret)
+				return ret;
+		}
+
+		next = NULL;
+		while (1) {
+			ldev = netdev_next_lower_dev(now, &iter);
+			if (!ldev)
+				break;
+
+			next = ldev;
+			niter = &ldev->adj_list.lower;
+			dev_stack[cur] = now;
+			iter_stack[cur++] = iter;
+			break;
+		}
+
+		if (!next) {
+			if (!cur)
+				return 0;
+			next = dev_stack[--cur];
+			niter = iter_stack[cur];
+		}
+
+		now = next;
+		iter = niter;
 	}
 
 	return 0;
 }
 EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev);
 
+static int __netdev_walk_all_lower_dev(struct net_device *dev,
+				       int (*fn)(struct net_device *dev,
+						 void *data),
+				       void *data)
+{
+	struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
+	struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
+	int ret, cur = 0;
+	bool ignore;
+
+	now = dev;
+	iter = &dev->adj_list.lower;
+
+	while (1) {
+		if (now != dev) {
+			ret = fn(now, data);
+			if (ret)
+				return ret;
+		}
+
+		next = NULL;
+		while (1) {
+			ldev = __netdev_next_lower_dev(now, &iter, &ignore);
+			if (!ldev)
+				break;
+			if (ignore)
+				continue;
+
+			next = ldev;
+			niter = &ldev->adj_list.lower;
+			dev_stack[cur] = now;
+			iter_stack[cur++] = iter;
+			break;
+		}
+
+		if (!next) {
+			if (!cur)
+				return 0;
+			next = dev_stack[--cur];
+			niter = iter_stack[cur];
+		}
+
+		now = next;
+		iter = niter;
+	}
+
+	return 0;
+}
+
 static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
 						    struct list_head **iter)
 {
@@ -6695,28 +6944,99 @@
 	return lower->dev;
 }
 
+static u8 __netdev_upper_depth(struct net_device *dev)
+{
+	struct net_device *udev;
+	struct list_head *iter;
+	u8 max_depth = 0;
+	bool ignore;
+
+	for (iter = &dev->adj_list.upper,
+	     udev = __netdev_next_upper_dev(dev, &iter, &ignore);
+	     udev;
+	     udev = __netdev_next_upper_dev(dev, &iter, &ignore)) {
+		if (ignore)
+			continue;
+		if (max_depth < udev->upper_level)
+			max_depth = udev->upper_level;
+	}
+
+	return max_depth;
+}
+
+static u8 __netdev_lower_depth(struct net_device *dev)
+{
+	struct net_device *ldev;
+	struct list_head *iter;
+	u8 max_depth = 0;
+	bool ignore;
+
+	for (iter = &dev->adj_list.lower,
+	     ldev = __netdev_next_lower_dev(dev, &iter, &ignore);
+	     ldev;
+	     ldev = __netdev_next_lower_dev(dev, &iter, &ignore)) {
+		if (ignore)
+			continue;
+		if (max_depth < ldev->lower_level)
+			max_depth = ldev->lower_level;
+	}
+
+	return max_depth;
+}
+
+static int __netdev_update_upper_level(struct net_device *dev, void *data)
+{
+	dev->upper_level = __netdev_upper_depth(dev) + 1;
+	return 0;
+}
+
+static int __netdev_update_lower_level(struct net_device *dev, void *data)
+{
+	dev->lower_level = __netdev_lower_depth(dev) + 1;
+	return 0;
+}
+
 int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
 				  int (*fn)(struct net_device *dev,
 					    void *data),
 				  void *data)
 {
-	struct net_device *ldev;
-	struct list_head *iter;
-	int ret;
+	struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
+	struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
+	int ret, cur = 0;
 
-	for (iter = &dev->adj_list.lower,
-	     ldev = netdev_next_lower_dev_rcu(dev, &iter);
-	     ldev;
-	     ldev = netdev_next_lower_dev_rcu(dev, &iter)) {
-		/* first is the lower device itself */
-		ret = fn(ldev, data);
-		if (ret)
-			return ret;
+	now = dev;
+	iter = &dev->adj_list.lower;
 
-		/* then look at all of its lower devices */
-		ret = netdev_walk_all_lower_dev_rcu(ldev, fn, data);
-		if (ret)
-			return ret;
+	while (1) {
+		if (now != dev) {
+			ret = fn(now, data);
+			if (ret)
+				return ret;
+		}
+
+		next = NULL;
+		while (1) {
+			ldev = netdev_next_lower_dev_rcu(now, &iter);
+			if (!ldev)
+				break;
+
+			next = ldev;
+			niter = &ldev->adj_list.lower;
+			dev_stack[cur] = now;
+			iter_stack[cur++] = iter;
+			break;
+		}
+
+		if (!next) {
+			if (!cur)
+				return 0;
+			next = dev_stack[--cur];
+			niter = iter_stack[cur];
+		}
+
+		now = next;
+		iter = niter;
 	}
 
 	return 0;
@@ -6820,6 +7140,7 @@
 	adj->master = master;
 	adj->ref_nr = 1;
 	adj->private = private;
+	adj->ignore = false;
 	dev_hold(adj_dev);
 
 	pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n",
@@ -6970,14 +7291,17 @@
 		return -EBUSY;
 
 	/* To prevent loops, check if dev is not upper device to upper_dev. */
-	if (netdev_has_upper_dev(upper_dev, dev))
+	if (__netdev_has_upper_dev(upper_dev, dev))
 		return -EBUSY;
 
+	if ((dev->lower_level + upper_dev->upper_level) > MAX_NEST_DEV)
+		return -EMLINK;
+
 	if (!master) {
-		if (netdev_has_upper_dev(dev, upper_dev))
+		if (__netdev_has_upper_dev(dev, upper_dev))
 			return -EEXIST;
 	} else {
-		master_dev = netdev_master_upper_dev_get(dev);
+		master_dev = __netdev_master_upper_dev_get(dev);
 		if (master_dev)
 			return master_dev == upper_dev ? -EEXIST : -EBUSY;
 	}
@@ -6999,6 +7323,13 @@
 	if (ret)
 		goto rollback;
 
+	__netdev_update_upper_level(dev, NULL);
+	__netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
+
+	__netdev_update_lower_level(upper_dev, NULL);
+	__netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
+				    NULL);
+
 	return 0;
 
 rollback:
@@ -7081,9 +7412,96 @@
 
 	call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
 				      &changeupper_info.info);
+
+	__netdev_update_upper_level(dev, NULL);
+	__netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
+
+	__netdev_update_lower_level(upper_dev, NULL);
+	__netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
+				    NULL);
 }
 EXPORT_SYMBOL(netdev_upper_dev_unlink);
 
+static void __netdev_adjacent_dev_set(struct net_device *upper_dev,
+				      struct net_device *lower_dev,
+				      bool val)
+{
+	struct netdev_adjacent *adj;
+
+	adj = __netdev_find_adj(lower_dev, &upper_dev->adj_list.lower);
+	if (adj)
+		adj->ignore = val;
+
+	adj = __netdev_find_adj(upper_dev, &lower_dev->adj_list.upper);
+	if (adj)
+		adj->ignore = val;
+}
+
+static void netdev_adjacent_dev_disable(struct net_device *upper_dev,
+					struct net_device *lower_dev)
+{
+	__netdev_adjacent_dev_set(upper_dev, lower_dev, true);
+}
+
+static void netdev_adjacent_dev_enable(struct net_device *upper_dev,
+				       struct net_device *lower_dev)
+{
+	__netdev_adjacent_dev_set(upper_dev, lower_dev, false);
+}
+
+int netdev_adjacent_change_prepare(struct net_device *old_dev,
+				   struct net_device *new_dev,
+				   struct net_device *dev,
+				   struct netlink_ext_ack *extack)
+{
+	int err;
+
+	if (!new_dev)
+		return 0;
+
+	if (old_dev && new_dev != old_dev)
+		netdev_adjacent_dev_disable(dev, old_dev);
+
+	err = netdev_upper_dev_link(new_dev, dev, extack);
+	if (err) {
+		if (old_dev && new_dev != old_dev)
+			netdev_adjacent_dev_enable(dev, old_dev);
+		return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(netdev_adjacent_change_prepare);
+
+void netdev_adjacent_change_commit(struct net_device *old_dev,
+				   struct net_device *new_dev,
+				   struct net_device *dev)
+{
+	if (!new_dev || !old_dev)
+		return;
+
+	if (new_dev == old_dev)
+		return;
+
+	netdev_adjacent_dev_enable(dev, old_dev);
+	netdev_upper_dev_unlink(old_dev, dev);
+}
+EXPORT_SYMBOL(netdev_adjacent_change_commit);
+
+void netdev_adjacent_change_abort(struct net_device *old_dev,
+				  struct net_device *new_dev,
+				  struct net_device *dev)
+{
+	if (!new_dev)
+		return;
+
+	if (old_dev && new_dev != old_dev)
+		netdev_adjacent_dev_enable(dev, old_dev);
+
+	netdev_upper_dev_unlink(new_dev, dev);
+}
+EXPORT_SYMBOL(netdev_adjacent_change_abort);
+
 /**
  * netdev_bonding_info_change - Dispatch event about slave change
  * @dev: device
@@ -7197,25 +7615,6 @@
 EXPORT_SYMBOL(netdev_lower_dev_get_private);
 
 
-int dev_get_nest_level(struct net_device *dev)
-{
-	struct net_device *lower = NULL;
-	struct list_head *iter;
-	int max_nest = -1;
-	int nest;
-
-	ASSERT_RTNL();
-
-	netdev_for_each_lower_dev(dev, lower, iter) {
-		nest = dev_get_nest_level(lower);
-		if (max_nest < nest)
-			max_nest = nest;
-	}
-
-	return max_nest + 1;
-}
-EXPORT_SYMBOL(dev_get_nest_level);
-
 /**
  * netdev_lower_change - Dispatch event about lower device state change
  * @lower_dev: device
@@ -7442,7 +7841,8 @@
 }
 EXPORT_SYMBOL(dev_get_flags);
 
-int __dev_change_flags(struct net_device *dev, unsigned int flags)
+int __dev_change_flags(struct net_device *dev, unsigned int flags,
+		       struct netlink_ext_ack *extack)
 {
 	unsigned int old_flags = dev->flags;
 	int ret;
@@ -7479,7 +7879,7 @@
 		if (old_flags & IFF_UP)
 			__dev_close(dev);
 		else
-			ret = __dev_open(dev);
+			ret = __dev_open(dev, extack);
 	}
 
 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
@@ -7539,16 +7939,18 @@
  *	dev_change_flags - change device settings
  *	@dev: device
  *	@flags: device state flags
+ *	@extack: netlink extended ack
  *
  *	Change settings on device based state flags. The flags are
  *	in the userspace exported format.
  */
-int dev_change_flags(struct net_device *dev, unsigned int flags)
+int dev_change_flags(struct net_device *dev, unsigned int flags,
+		     struct netlink_ext_ack *extack)
 {
 	int ret;
 	unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;
 
-	ret = __dev_change_flags(dev, flags);
+	ret = __dev_change_flags(dev, flags, extack);
 	if (ret < 0)
 		return ret;
 
@@ -7681,13 +8083,36 @@
 EXPORT_SYMBOL(dev_set_group);
 
 /**
+ *	dev_pre_changeaddr_notify - Call NETDEV_PRE_CHANGEADDR.
+ *	@dev: device
+ *	@addr: new address
+ *	@extack: netlink extended ack
+ */
+int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
+			      struct netlink_ext_ack *extack)
+{
+	struct netdev_notifier_pre_changeaddr_info info = {
+		.info.dev = dev,
+		.info.extack = extack,
+		.dev_addr = addr,
+	};
+	int rc;
+
+	rc = call_netdevice_notifiers_info(NETDEV_PRE_CHANGEADDR, &info.info);
+	return notifier_to_errno(rc);
+}
+EXPORT_SYMBOL(dev_pre_changeaddr_notify);
+
+/**
  *	dev_set_mac_address - Change Media Access Control Address
  *	@dev: device
  *	@sa: new address
+ *	@extack: netlink extended ack
  *
  *	Change the hardware (MAC) address of the device
  */
-int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
+int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
+			struct netlink_ext_ack *extack)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	int err;
@@ -7698,6 +8123,9 @@
 		return -EINVAL;
 	if (!netif_device_present(dev))
 		return -ENODEV;
+	err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack);
+	if (err)
+		return err;
 	err = ops->ndo_set_mac_address(dev, sa);
 	if (err)
 		return err;
@@ -7757,14 +8185,82 @@
 			   char *name, size_t len)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
+	int err;
 
-	if (!ops->ndo_get_phys_port_name)
-		return -EOPNOTSUPP;
-	return ops->ndo_get_phys_port_name(dev, name, len);
+	if (ops->ndo_get_phys_port_name) {
+		err = ops->ndo_get_phys_port_name(dev, name, len);
+		if (err != -EOPNOTSUPP)
+			return err;
+	}
+	return devlink_compat_phys_port_name_get(dev, name, len);
 }
 EXPORT_SYMBOL(dev_get_phys_port_name);
 
 /**
+ *	dev_get_port_parent_id - Get the device's port parent identifier
+ *	@dev: network device
+ *	@ppid: pointer to a storage for the port's parent identifier
+ *	@recurse: allow/disallow recursion to lower devices
+ *
+ *	Get the devices's port parent identifier
+ */
+int dev_get_port_parent_id(struct net_device *dev,
+			   struct netdev_phys_item_id *ppid,
+			   bool recurse)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+	struct netdev_phys_item_id first = { };
+	struct net_device *lower_dev;
+	struct list_head *iter;
+	int err;
+
+	if (ops->ndo_get_port_parent_id) {
+		err = ops->ndo_get_port_parent_id(dev, ppid);
+		if (err != -EOPNOTSUPP)
+			return err;
+	}
+
+	err = devlink_compat_switch_id_get(dev, ppid);
+	if (!err || err != -EOPNOTSUPP)
+		return err;
+
+	if (!recurse)
+		return -EOPNOTSUPP;
+
+	netdev_for_each_lower_dev(dev, lower_dev, iter) {
+		err = dev_get_port_parent_id(lower_dev, ppid, recurse);
+		if (err)
+			break;
+		if (!first.id_len)
+			first = *ppid;
+		else if (memcmp(&first, ppid, sizeof(*ppid)))
+			return -ENODATA;
+	}
+
+	return err;
+}
+EXPORT_SYMBOL(dev_get_port_parent_id);
+
+/**
+ *	netdev_port_same_parent_id - Indicate if two network devices have
+ *	the same port parent identifier
+ *	@a: first network device
+ *	@b: second network device
+ */
+bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b)
+{
+	struct netdev_phys_item_id a_id = { };
+	struct netdev_phys_item_id b_id = { };
+
+	if (dev_get_port_parent_id(a, &a_id, true) ||
+	    dev_get_port_parent_id(b, &b_id, true))
+		return false;
+
+	return netdev_phys_item_id_same(&a_id, &b_id);
+}
+EXPORT_SYMBOL(netdev_port_same_parent_id);
+
+/**
  *	dev_change_proto_down - update protocol port state information
  *	@dev: device
  *	@proto_down: new value
@@ -7784,6 +8280,25 @@
 }
 EXPORT_SYMBOL(dev_change_proto_down);
 
+/**
+ *	dev_change_proto_down_generic - generic implementation for
+ * 	ndo_change_proto_down that sets carrier according to
+ * 	proto_down.
+ *
+ *	@dev: device
+ *	@proto_down: new value
+ */
+int dev_change_proto_down_generic(struct net_device *dev, bool proto_down)
+{
+	if (proto_down)
+		netif_carrier_off(dev);
+	else
+		netif_carrier_on(dev);
+	dev->proto_down = proto_down;
+	return 0;
+}
+EXPORT_SYMBOL(dev_change_proto_down_generic);
+
 u32 __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
 		    enum bpf_netdev_command cmd)
 {
@@ -7863,39 +8378,57 @@
 	enum bpf_netdev_command query;
 	struct bpf_prog *prog = NULL;
 	bpf_op_t bpf_op, bpf_chk;
+	bool offload;
 	int err;
 
 	ASSERT_RTNL();
 
-	query = flags & XDP_FLAGS_HW_MODE ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG;
+	offload = flags & XDP_FLAGS_HW_MODE;
+	query = offload ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG;
 
 	bpf_op = bpf_chk = ops->ndo_bpf;
-	if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
+	if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) {
+		NL_SET_ERR_MSG(extack, "underlying driver does not support XDP in native mode");
 		return -EOPNOTSUPP;
+	}
 	if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE))
 		bpf_op = generic_xdp_install;
 	if (bpf_op == bpf_chk)
 		bpf_chk = generic_xdp_install;
 
 	if (fd >= 0) {
-		if (__dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG) ||
-		    __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG_HW))
+		u32 prog_id;
+
+		if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) {
+			NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time");
 			return -EEXIST;
-		if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
-		    __dev_xdp_query(dev, bpf_op, query))
+		}
+
+		prog_id = __dev_xdp_query(dev, bpf_op, query);
+		if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && prog_id) {
+			NL_SET_ERR_MSG(extack, "XDP program already attached");
 			return -EBUSY;
+		}
 
 		prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
 					     bpf_op == ops->ndo_bpf);
 		if (IS_ERR(prog))
 			return PTR_ERR(prog);
 
-		if (!(flags & XDP_FLAGS_HW_MODE) &&
-		    bpf_prog_is_dev_bound(prog->aux)) {
+		if (!offload && bpf_prog_is_dev_bound(prog->aux)) {
 			NL_SET_ERR_MSG(extack, "using device-bound program without HW_MODE flag is not supported");
 			bpf_prog_put(prog);
 			return -EINVAL;
 		}
+
+		/* prog->aux->id may be 0 for orphaned device-bound progs */
+		if (prog->aux->id && prog->aux->id == prog_id) {
+			bpf_prog_put(prog);
+			return 0;
+		}
+	} else {
+		if (!__dev_xdp_query(dev, bpf_op, query))
+			return 0;
 	}
 
 	err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
@@ -8039,7 +8572,7 @@
 	netdev_features_t feature;
 	int feature_bit;
 
-	for_each_netdev_feature(&upper_disables, feature_bit) {
+	for_each_netdev_feature(upper_disables, feature_bit) {
 		feature = __NETIF_F_BIT(feature_bit);
 		if (!(upper->wanted_features & feature)
 		    && (features & feature)) {
@@ -8059,7 +8592,7 @@
 	netdev_features_t feature;
 	int feature_bit;
 
-	for_each_netdev_feature(&upper_disables, feature_bit) {
+	for_each_netdev_feature(upper_disables, feature_bit) {
 		feature = __NETIF_F_BIT(feature_bit);
 		if (!(features & feature) && (lower->features & feature)) {
 			netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
@@ -8354,7 +8887,7 @@
 {
 	/* Initialize queue lock */
 	spin_lock_init(&queue->_xmit_lock);
-	netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
+	lockdep_set_class(&queue->_xmit_lock, &dev->qdisc_xmit_lock_key);
 	queue->xmit_lock_owner = -1;
 	netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
 	queue->dev = dev;
@@ -8401,6 +8934,43 @@
 }
 EXPORT_SYMBOL(netif_tx_stop_all_queues);
 
+static void netdev_register_lockdep_key(struct net_device *dev)
+{
+	lockdep_register_key(&dev->qdisc_tx_busylock_key);
+	lockdep_register_key(&dev->qdisc_running_key);
+	lockdep_register_key(&dev->qdisc_xmit_lock_key);
+	lockdep_register_key(&dev->addr_list_lock_key);
+}
+
+static void netdev_unregister_lockdep_key(struct net_device *dev)
+{
+	lockdep_unregister_key(&dev->qdisc_tx_busylock_key);
+	lockdep_unregister_key(&dev->qdisc_running_key);
+	lockdep_unregister_key(&dev->qdisc_xmit_lock_key);
+	lockdep_unregister_key(&dev->addr_list_lock_key);
+}
+
+void netdev_update_lockdep_key(struct net_device *dev)
+{
+	struct netdev_queue *queue;
+	int i;
+
+	lockdep_unregister_key(&dev->qdisc_xmit_lock_key);
+	lockdep_unregister_key(&dev->addr_list_lock_key);
+
+	lockdep_register_key(&dev->qdisc_xmit_lock_key);
+	lockdep_register_key(&dev->addr_list_lock_key);
+
+	lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key);
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		queue = netdev_get_tx_queue(dev, i);
+
+		lockdep_set_class(&queue->_xmit_lock,
+				  &dev->qdisc_xmit_lock_key);
+	}
+}
+EXPORT_SYMBOL(netdev_update_lockdep_key);
+
 /**
  *	register_netdevice	- register a network device
  *	@dev: device to register
@@ -8435,7 +9005,7 @@
 	BUG_ON(!net);
 
 	spin_lock_init(&dev->addr_list_lock);
-	netdev_set_addr_lockdep_class(dev);
+	lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key);
 
 	ret = dev_get_valid_name(net, dev, dev->name);
 	if (ret < 0)
@@ -8546,6 +9116,8 @@
 	ret = notifier_to_errno(ret);
 	if (ret) {
 		rollback_registered(dev);
+		rcu_barrier();
+
 		dev->reg_state = NETREG_UNREGISTERED;
 	}
 	/*
@@ -8599,6 +9171,9 @@
 	set_bit(__LINK_STATE_PRESENT, &dev->state);
 	set_bit(__LINK_STATE_START, &dev->state);
 
+	/* napi_busy_loop stats accounting wants this */
+	dev_net_set(dev, &init_net);
+
 	/* Note : We dont allocate pcpu_refcnt for dummy devices,
 	 * because users of this 'device' dont need to change
 	 * its refcount.
@@ -8697,7 +9272,7 @@
 
 		refcnt = netdev_refcnt_read(dev);
 
-		if (time_after(jiffies, warning_time + 10 * HZ)) {
+		if (refcnt && time_after(jiffies, warning_time + 10 * HZ)) {
 			pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
 				 dev->name, refcnt);
 			warning_time = jiffies;
@@ -8940,8 +9515,12 @@
 
 	dev_net_set(dev, &init_net);
 
+	netdev_register_lockdep_key(dev);
+
 	dev->gso_max_size = GSO_MAX_SIZE;
 	dev->gso_max_segs = GSO_MAX_SEGS;
+	dev->upper_level = 1;
+	dev->lower_level = 1;
 
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
@@ -9022,6 +9601,8 @@
 	free_percpu(dev->pcpu_refcnt);
 	dev->pcpu_refcnt = NULL;
 
+	netdev_unregister_lockdep_key(dev);
+
 	/*  Compatibility with error handling in drivers */
 	if (dev->reg_state == NETREG_UNINITIALIZED) {
 		netdev_freemem(dev);
@@ -9190,7 +9771,7 @@
 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 	rcu_barrier();
 
-	new_nsid = peernet2id_alloc(dev_net(dev), net);
+	new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
 	/* If there is an ifindex conflict assign a new one */
 	if (__dev_get_by_index(net, dev->ifindex))
 		new_ifindex = dev_new_index(net);
@@ -9491,6 +10072,8 @@
 
 		/* Push remaining network devices to init_net */
 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
+		if (__dev_get_by_name(&init_net, fb_name))
+			snprintf(fb_name, IFNAMSIZ, "dev%%d");
 		err = dev_change_net_namespace(dev, &init_net, fb_name);
 		if (err) {
 			pr_emerg("%s: failed to move %s to init_net: %d\n",

diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index d884d8f..2f949b5 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/core/dev_addr_lists.c - Functions for handling net device lists
  * Copyright (c) 2010 Jiri Pirko <jpirko@redhat.com>
  *
  * This file contains functions for working with unicast, multicast and device
  * addresses lists.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/netdevice.h>
@@ -278,6 +274,103 @@
 EXPORT_SYMBOL(__hw_addr_sync_dev);
 
 /**
+ *  __hw_addr_ref_sync_dev - Synchronize device's multicast address list taking
+ *  into account references
+ *  @list: address list to synchronize
+ *  @dev:  device to sync
+ *  @sync: function to call if address or reference on it should be added
+ *  @unsync: function to call if address or some reference on it should removed
+ *
+ *  This function is intended to be called from the ndo_set_rx_mode
+ *  function of devices that require explicit address or references on it
+ *  add/remove notifications. The unsync function may be NULL in which case
+ *  the addresses or references on it requiring removal will simply be
+ *  removed without any notification to the device. That is responsibility of
+ *  the driver to identify and distribute address or references on it between
+ *  internal address tables.
+ **/
+int __hw_addr_ref_sync_dev(struct netdev_hw_addr_list *list,
+			   struct net_device *dev,
+			   int (*sync)(struct net_device *,
+				       const unsigned char *, int),
+			   int (*unsync)(struct net_device *,
+					 const unsigned char *, int))
+{
+	struct netdev_hw_addr *ha, *tmp;
+	int err, ref_cnt;
+
+	/* first go through and flush out any unsynced/stale entries */
+	list_for_each_entry_safe(ha, tmp, &list->list, list) {
+		/* sync if address is not used */
+		if ((ha->sync_cnt << 1) <= ha->refcount)
+			continue;
+
+		/* if fails defer unsyncing address */
+		ref_cnt = ha->refcount - ha->sync_cnt;
+		if (unsync && unsync(dev, ha->addr, ref_cnt))
+			continue;
+
+		ha->refcount = (ref_cnt << 1) + 1;
+		ha->sync_cnt = ref_cnt;
+		__hw_addr_del_entry(list, ha, false, false);
+	}
+
+	/* go through and sync updated/new entries to the list */
+	list_for_each_entry_safe(ha, tmp, &list->list, list) {
+		/* sync if address added or reused */
+		if ((ha->sync_cnt << 1) >= ha->refcount)
+			continue;
+
+		ref_cnt = ha->refcount - ha->sync_cnt;
+		err = sync(dev, ha->addr, ref_cnt);
+		if (err)
+			return err;
+
+		ha->refcount = ref_cnt << 1;
+		ha->sync_cnt = ref_cnt;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(__hw_addr_ref_sync_dev);
+
+/**
+ *  __hw_addr_ref_unsync_dev - Remove synchronized addresses and references on
+ *  it from device
+ *  @list: address list to remove synchronized addresses (references on it) from
+ *  @dev:  device to sync
+ *  @unsync: function to call if address and references on it should be removed
+ *
+ *  Remove all addresses that were added to the device by
+ *  __hw_addr_ref_sync_dev(). This function is intended to be called from the
+ *  ndo_stop or ndo_open functions on devices that require explicit address (or
+ *  references on it) add/remove notifications. If the unsync function pointer
+ *  is NULL then this function can be used to just reset the sync_cnt for the
+ *  addresses in the list.
+ **/
+void __hw_addr_ref_unsync_dev(struct netdev_hw_addr_list *list,
+			      struct net_device *dev,
+			      int (*unsync)(struct net_device *,
+					    const unsigned char *, int))
+{
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, &list->list, list) {
+		if (!ha->sync_cnt)
+			continue;
+
+		/* if fails defer unsyncing address */
+		if (unsync && unsync(dev, ha->addr, ha->sync_cnt))
+			continue;
+
+		ha->refcount -= ha->sync_cnt - 1;
+		ha->sync_cnt = 0;
+		__hw_addr_del_entry(list, ha, false, false);
+	}
+}
+EXPORT_SYMBOL(__hw_addr_ref_unsync_dev);
+
+/**
  *  __hw_addr_unsync_dev - Remove synchronized addresses from device
  *  @list: address list to remove synchronized addresses from
  *  @dev:  device to sync
@@ -401,6 +494,9 @@
 
 	ASSERT_RTNL();
 
+	err = dev_pre_changeaddr_notify(dev, addr, NULL);
+	if (err)
+		return err;
 	err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
 	if (!err)
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
@@ -541,7 +637,7 @@
 	if (to->addr_len != from->addr_len)
 		return -EINVAL;
 
-	netif_addr_lock_nested(to);
+	netif_addr_lock(to);
 	err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
 	if (!err)
 		__dev_set_rx_mode(to);
@@ -571,7 +667,7 @@
 	if (to->addr_len != from->addr_len)
 		return -EINVAL;
 
-	netif_addr_lock_nested(to);
+	netif_addr_lock(to);
 	err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len);
 	if (!err)
 		__dev_set_rx_mode(to);
@@ -595,7 +691,7 @@
 		return;
 
 	netif_addr_lock_bh(from);
-	netif_addr_lock_nested(to);
+	netif_addr_lock(to);
 	__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
 	__dev_set_rx_mode(to);
 	netif_addr_unlock(to);
@@ -762,7 +858,7 @@
 	if (to->addr_len != from->addr_len)
 		return -EINVAL;
 
-	netif_addr_lock_nested(to);
+	netif_addr_lock(to);
 	err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
 	if (!err)
 		__dev_set_rx_mode(to);
@@ -792,7 +888,7 @@
 	if (to->addr_len != from->addr_len)
 		return -EINVAL;
 
-	netif_addr_lock_nested(to);
+	netif_addr_lock(to);
 	err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len);
 	if (!err)
 		__dev_set_rx_mode(to);
@@ -816,7 +912,7 @@
 		return;
 
 	netif_addr_lock_bh(from);
-	netif_addr_lock_nested(to);
+	netif_addr_lock(to);
 	__hw_addr_unsync(&to->mc, &from->mc, to->addr_len);
 	__dev_set_rx_mode(to);
 	netif_addr_unlock(to);

diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 90e8aa3..5163d90 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c

@@ -234,7 +234,7 @@
 
 	switch (cmd) {
 	case SIOCSIFFLAGS:	/* Set interface flags */
-		return dev_change_flags(dev, ifr->ifr_flags);
+		return dev_change_flags(dev, ifr->ifr_flags, NULL);
 
 	case SIOCSIFMETRIC:	/* Set the metric on the interface
 				   (currently unused) */
@@ -246,7 +246,7 @@
 	case SIOCSIFHWADDR:
 		if (dev->addr_len > sizeof(struct sockaddr))
 			return -EINVAL;
-		return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
+		return dev_set_mac_address(dev, &ifr->ifr_hwaddr, NULL);
 
 	case SIOCSIFHWBROADCAST:
 		if (ifr->ifr_hwaddr.sa_family != dev->type)
@@ -366,7 +366,8 @@
  *	dev_ioctl	-	network device ioctl
  *	@net: the applicable net namespace
  *	@cmd: command to issue
- *	@arg: pointer to a struct ifreq in user space
+ *	@ifr: pointer to a struct ifreq in user space
+ *	@need_copyout: whether or not copy_to_user() should be called
  *
  *	Issue ioctl functions to devices. This is normally called by the
  *	user space syscall interfaces but can sometimes be useful for

diff --git a/net/core/devlink.c b/net/core/devlink.c
index 6bc4293..93905dc 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/core/devlink.c - Network physical/parent device Netlink interface
  *
  * Heavily inspired by net/wireless/
  * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/kernel.h>
@@ -19,6 +15,11 @@
 #include <linux/device.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
+#include <linux/spinlock.h>
+#include <linux/refcount.h>
+#include <linux/workqueue.h>
+#include <linux/u64_stats_sync.h>
+#include <linux/timekeeping.h>
 #include <rdma/ib_verbs.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
@@ -26,6 +27,7 @@
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/devlink.h>
+#include <net/drop_monitor.h>
 #define CREATE_TRACE_POINTS
 #include <trace/events/devlink.h>
 
@@ -81,6 +83,7 @@
 EXPORT_SYMBOL(devlink_dpipe_header_ipv6);
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
+EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
 
 static LIST_HEAD(devlink_list);
 
@@ -115,6 +118,8 @@
 	busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]);
 	devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]);
 
+	lockdep_assert_held(&devlink_mutex);
+
 	list_for_each_entry(devlink, &devlink_list, list) {
 		if (strcmp(devlink->dev->bus->name, busname) == 0 &&
 		    strcmp(dev_name(devlink->dev), devname) == 0 &&
@@ -131,7 +136,7 @@
 }
 
 static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
-						      int port_index)
+						      unsigned int port_index)
 {
 	struct devlink_port *devlink_port;
 
@@ -142,7 +147,8 @@
 	return NULL;
 }
 
-static bool devlink_port_index_exists(struct devlink *devlink, int port_index)
+static bool devlink_port_index_exists(struct devlink *devlink,
+				      unsigned int port_index)
 {
 	return devlink_port_get_by_index(devlink, port_index);
 }
@@ -340,7 +346,6 @@
 	struct list_head list;
 	struct devlink_region *region;
 	devlink_snapshot_data_dest_t *data_destructor;
-	u64 data_len;
 	u8 *data;
 	u32 id;
 };
@@ -369,14 +374,6 @@
 	return NULL;
 }
 
-static void devlink_region_snapshot_del(struct devlink_snapshot *snapshot)
-{
-	snapshot->region->cur_snapshots--;
-	list_del(&snapshot->list);
-	(*snapshot->data_destructor)(snapshot->data);
-	kfree(snapshot);
-}
-
 #define DEVLINK_NL_FLAG_NEED_DEVLINK	BIT(0)
 #define DEVLINK_NL_FLAG_NEED_PORT	BIT(1)
 #define DEVLINK_NL_FLAG_NEED_SB		BIT(2)
@@ -474,6 +471,8 @@
 
 	if (devlink_nl_put_handle(msg, devlink))
 		goto nla_put_failure;
+	if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_FAILED, devlink->reload_failed))
+		goto nla_put_failure;
 
 	genlmsg_end(msg, hdr);
 	return 0;
@@ -513,15 +512,37 @@
 		return 0;
 	if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour))
 		return -EMSGSIZE;
-	if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER, attrs->port_number))
-		return -EMSGSIZE;
-	if (!attrs->split)
-		return 0;
-	if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP, attrs->port_number))
-		return -EMSGSIZE;
-	if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER,
-			attrs->split_subport_number))
-		return -EMSGSIZE;
+	switch (devlink_port->attrs.flavour) {
+	case DEVLINK_PORT_FLAVOUR_PCI_PF:
+		if (nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER,
+				attrs->pci_pf.pf))
+			return -EMSGSIZE;
+		break;
+	case DEVLINK_PORT_FLAVOUR_PCI_VF:
+		if (nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER,
+				attrs->pci_vf.pf) ||
+		    nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_VF_NUMBER,
+				attrs->pci_vf.vf))
+			return -EMSGSIZE;
+		break;
+	case DEVLINK_PORT_FLAVOUR_PHYSICAL:
+	case DEVLINK_PORT_FLAVOUR_CPU:
+	case DEVLINK_PORT_FLAVOUR_DSA:
+		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER,
+				attrs->phys.port_number))
+			return -EMSGSIZE;
+		if (!attrs->split)
+			return 0;
+		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP,
+				attrs->phys.port_number))
+			return -EMSGSIZE;
+		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER,
+				attrs->phys.split_subport_number))
+			return -EMSGSIZE;
+		break;
+	default:
+		break;
+	}
 	return 0;
 }
 
@@ -540,12 +561,14 @@
 		goto nla_put_failure;
 	if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
 		goto nla_put_failure;
+
+	spin_lock_bh(&devlink_port->type_lock);
 	if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type))
-		goto nla_put_failure;
+		goto nla_put_failure_type_locked;
 	if (devlink_port->desired_type != DEVLINK_PORT_TYPE_NOTSET &&
 	    nla_put_u16(msg, DEVLINK_ATTR_PORT_DESIRED_TYPE,
 			devlink_port->desired_type))
-		goto nla_put_failure;
+		goto nla_put_failure_type_locked;
 	if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) {
 		struct net_device *netdev = devlink_port->type_dev;
 
@@ -554,7 +577,7 @@
 				 netdev->ifindex) ||
 		     nla_put_string(msg, DEVLINK_ATTR_PORT_NETDEV_NAME,
 				    netdev->name)))
-			goto nla_put_failure;
+			goto nla_put_failure_type_locked;
 	}
 	if (devlink_port->type == DEVLINK_PORT_TYPE_IB) {
 		struct ib_device *ibdev = devlink_port->type_dev;
@@ -562,14 +585,17 @@
 		if (ibdev &&
 		    nla_put_string(msg, DEVLINK_ATTR_PORT_IBDEV_NAME,
 				   ibdev->name))
-			goto nla_put_failure;
+			goto nla_put_failure_type_locked;
 	}
+	spin_unlock_bh(&devlink_port->type_lock);
 	if (devlink_nl_port_attrs_put(msg, devlink_port))
 		goto nla_put_failure;
 
 	genlmsg_end(msg, hdr);
 	return 0;
 
+nla_put_failure_type_locked:
+	spin_unlock_bh(&devlink_port->type_lock);
 nla_put_failure:
 	genlmsg_cancel(msg, hdr);
 	return -EMSGSIZE;
@@ -720,7 +746,7 @@
 {
 	int err;
 
-	if (devlink->ops && devlink->ops->port_type_set) {
+	if (devlink->ops->port_type_set) {
 		if (port_type == DEVLINK_PORT_TYPE_NOTSET)
 			return -EINVAL;
 		if (port_type == devlink_port->type)
@@ -757,7 +783,7 @@
 			      u32 count, struct netlink_ext_ack *extack)
 
 {
-	if (devlink->ops && devlink->ops->port_split)
+	if (devlink->ops->port_split)
 		return devlink->ops->port_split(devlink, port_index, count,
 						extack);
 	return -EOPNOTSUPP;
@@ -783,7 +809,7 @@
 				struct netlink_ext_ack *extack)
 
 {
-	if (devlink->ops && devlink->ops->port_unsplit)
+	if (devlink->ops->port_unsplit)
 		return devlink->ops->port_unsplit(devlink, port_index, extack);
 	return -EOPNOTSUPP;
 }
@@ -932,6 +958,9 @@
 	if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE,
 		       pool_info.threshold_type))
 		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_SB_POOL_CELL_SIZE,
+			pool_info.cell_size))
+		goto nla_put_failure;
 
 	genlmsg_end(msg, hdr);
 	return 0;
@@ -955,7 +984,7 @@
 	if (err)
 		return err;
 
-	if (!devlink->ops || !devlink->ops->sb_pool_get)
+	if (!devlink->ops->sb_pool_get)
 		return -EOPNOTSUPP;
 
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -1011,7 +1040,7 @@
 	mutex_lock(&devlink_mutex);
 	list_for_each_entry(devlink, &devlink_list, list) {
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
-		    !devlink->ops || !devlink->ops->sb_pool_get)
+		    !devlink->ops->sb_pool_get)
 			continue;
 		mutex_lock(&devlink->lock);
 		list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
@@ -1035,14 +1064,15 @@
 
 static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index,
 			       u16 pool_index, u32 size,
-			       enum devlink_sb_threshold_type threshold_type)
+			       enum devlink_sb_threshold_type threshold_type,
+			       struct netlink_ext_ack *extack)
 
 {
 	const struct devlink_ops *ops = devlink->ops;
 
-	if (ops && ops->sb_pool_set)
+	if (ops->sb_pool_set)
 		return ops->sb_pool_set(devlink, sb_index, pool_index,
-					size, threshold_type);
+					size, threshold_type, extack);
 	return -EOPNOTSUPP;
 }
 
@@ -1070,7 +1100,8 @@
 
 	size = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_POOL_SIZE]);
 	return devlink_sb_pool_set(devlink, devlink_sb->index,
-				   pool_index, size, threshold_type);
+				   pool_index, size, threshold_type,
+				   info->extack);
 }
 
 static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg,
@@ -1145,7 +1176,7 @@
 	if (err)
 		return err;
 
-	if (!devlink->ops || !devlink->ops->sb_port_pool_get)
+	if (!devlink->ops->sb_port_pool_get)
 		return -EOPNOTSUPP;
 
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -1207,7 +1238,7 @@
 	mutex_lock(&devlink_mutex);
 	list_for_each_entry(devlink, &devlink_list, list) {
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
-		    !devlink->ops || !devlink->ops->sb_port_pool_get)
+		    !devlink->ops->sb_port_pool_get)
 			continue;
 		mutex_lock(&devlink->lock);
 		list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
@@ -1231,14 +1262,15 @@
 
 static int devlink_sb_port_pool_set(struct devlink_port *devlink_port,
 				    unsigned int sb_index, u16 pool_index,
-				    u32 threshold)
+				    u32 threshold,
+				    struct netlink_ext_ack *extack)
 
 {
 	const struct devlink_ops *ops = devlink_port->devlink->ops;
 
-	if (ops && ops->sb_port_pool_set)
+	if (ops->sb_port_pool_set)
 		return ops->sb_port_pool_set(devlink_port, sb_index,
-					     pool_index, threshold);
+					     pool_index, threshold, extack);
 	return -EOPNOTSUPP;
 }
 
@@ -1261,7 +1293,7 @@
 
 	threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
 	return devlink_sb_port_pool_set(devlink_port, devlink_sb->index,
-					pool_index, threshold);
+					pool_index, threshold, info->extack);
 }
 
 static int
@@ -1349,7 +1381,7 @@
 	if (err)
 		return err;
 
-	if (!devlink->ops || !devlink->ops->sb_tc_pool_bind_get)
+	if (!devlink->ops->sb_tc_pool_bind_get)
 		return -EOPNOTSUPP;
 
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -1433,7 +1465,7 @@
 	mutex_lock(&devlink_mutex);
 	list_for_each_entry(devlink, &devlink_list, list) {
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
-		    !devlink->ops || !devlink->ops->sb_tc_pool_bind_get)
+		    !devlink->ops->sb_tc_pool_bind_get)
 			continue;
 
 		mutex_lock(&devlink->lock);
@@ -1460,15 +1492,16 @@
 static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
 				       unsigned int sb_index, u16 tc_index,
 				       enum devlink_sb_pool_type pool_type,
-				       u16 pool_index, u32 threshold)
+				       u16 pool_index, u32 threshold,
+				       struct netlink_ext_ack *extack)
 
 {
 	const struct devlink_ops *ops = devlink_port->devlink->ops;
 
-	if (ops && ops->sb_tc_pool_bind_set)
+	if (ops->sb_tc_pool_bind_set)
 		return ops->sb_tc_pool_bind_set(devlink_port, sb_index,
 						tc_index, pool_type,
-						pool_index, threshold);
+						pool_index, threshold, extack);
 	return -EOPNOTSUPP;
 }
 
@@ -1503,7 +1536,7 @@
 	threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
 	return devlink_sb_tc_pool_bind_set(devlink_port, devlink_sb->index,
 					   tc_index, pool_type,
-					   pool_index, threshold);
+					   pool_index, threshold, info->extack);
 }
 
 static int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb,
@@ -1513,7 +1546,7 @@
 	struct devlink_sb *devlink_sb = info->user_ptr[1];
 	const struct devlink_ops *ops = devlink->ops;
 
-	if (ops && ops->sb_occ_snapshot)
+	if (ops->sb_occ_snapshot)
 		return ops->sb_occ_snapshot(devlink, devlink_sb->index);
 	return -EOPNOTSUPP;
 }
@@ -1525,7 +1558,7 @@
 	struct devlink_sb *devlink_sb = info->user_ptr[1];
 	const struct devlink_ops *ops = devlink->ops;
 
-	if (ops && ops->sb_occ_max_clear)
+	if (ops->sb_occ_max_clear)
 		return ops->sb_occ_max_clear(devlink, devlink_sb->index);
 	return -EOPNOTSUPP;
 }
@@ -1535,7 +1568,8 @@
 				   u32 seq, int flags)
 {
 	const struct devlink_ops *ops = devlink->ops;
-	u8 inline_mode, encap_mode;
+	enum devlink_eswitch_encap_mode encap_mode;
+	u8 inline_mode;
 	void *hdr;
 	int err = 0;
 	u16 mode;
@@ -1588,13 +1622,9 @@
 					   struct genl_info *info)
 {
 	struct devlink *devlink = info->user_ptr[0];
-	const struct devlink_ops *ops = devlink->ops;
 	struct sk_buff *msg;
 	int err;
 
-	if (!ops)
-		return -EOPNOTSUPP;
-
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!msg)
 		return -ENOMEM;
@@ -1615,18 +1645,16 @@
 {
 	struct devlink *devlink = info->user_ptr[0];
 	const struct devlink_ops *ops = devlink->ops;
-	u8 inline_mode, encap_mode;
+	enum devlink_eswitch_encap_mode encap_mode;
+	u8 inline_mode;
 	int err = 0;
 	u16 mode;
 
-	if (!ops)
-		return -EOPNOTSUPP;
-
 	if (info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) {
 		if (!ops->eswitch_mode_set)
 			return -EOPNOTSUPP;
 		mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
-		err = ops->eswitch_mode_set(devlink, mode);
+		err = ops->eswitch_mode_set(devlink, mode, info->extack);
 		if (err)
 			return err;
 	}
@@ -1636,7 +1664,8 @@
 			return -EOPNOTSUPP;
 		inline_mode = nla_get_u8(
 				info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]);
-		err = ops->eswitch_inline_mode_set(devlink, inline_mode);
+		err = ops->eswitch_inline_mode_set(devlink, inline_mode,
+						   info->extack);
 		if (err)
 			return err;
 	}
@@ -1645,7 +1674,8 @@
 		if (!ops->eswitch_encap_mode_set)
 			return -EOPNOTSUPP;
 		encap_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]);
-		err = ops->eswitch_encap_mode_set(devlink, encap_mode);
+		err = ops->eswitch_encap_mode_set(devlink, encap_mode,
+						  info->extack);
 		if (err)
 			return err;
 	}
@@ -1660,7 +1690,7 @@
 	struct devlink_dpipe_field *field = &header->fields[match->field_id];
 	struct nlattr *match_attr;
 
-	match_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_MATCH);
+	match_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_MATCH);
 	if (!match_attr)
 		return -EMSGSIZE;
 
@@ -1685,7 +1715,8 @@
 {
 	struct nlattr *matches_attr;
 
-	matches_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_MATCHES);
+	matches_attr = nla_nest_start_noflag(skb,
+					     DEVLINK_ATTR_DPIPE_TABLE_MATCHES);
 	if (!matches_attr)
 		return -EMSGSIZE;
 
@@ -1707,7 +1738,7 @@
 	struct devlink_dpipe_field *field = &header->fields[action->field_id];
 	struct nlattr *action_attr;
 
-	action_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ACTION);
+	action_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_ACTION);
 	if (!action_attr)
 		return -EMSGSIZE;
 
@@ -1732,7 +1763,8 @@
 {
 	struct nlattr *actions_attr;
 
-	actions_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_ACTIONS);
+	actions_attr = nla_nest_start_noflag(skb,
+					     DEVLINK_ATTR_DPIPE_TABLE_ACTIONS);
 	if (!actions_attr)
 		return -EMSGSIZE;
 
@@ -1754,7 +1786,7 @@
 	u64 table_size;
 
 	table_size = table->table_ops->size_get(table->priv);
-	table_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE);
+	table_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_TABLE);
 	if (!table_attr)
 		return -EMSGSIZE;
 
@@ -1834,7 +1866,7 @@
 
 	if (devlink_nl_put_handle(skb, devlink))
 		goto nla_put_failure;
-	tables_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLES);
+	tables_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_TABLES);
 	if (!tables_attr)
 		goto nla_put_failure;
 
@@ -1935,8 +1967,8 @@
 	int err;
 
 	for (i = 0; i < values_count; i++) {
-		action_attr = nla_nest_start(skb,
-					     DEVLINK_ATTR_DPIPE_ACTION_VALUE);
+		action_attr = nla_nest_start_noflag(skb,
+						    DEVLINK_ATTR_DPIPE_ACTION_VALUE);
 		if (!action_attr)
 			return -EMSGSIZE;
 		err = devlink_dpipe_action_value_put(skb, &values[i]);
@@ -1972,8 +2004,8 @@
 	int err;
 
 	for (i = 0; i < values_count; i++) {
-		match_attr = nla_nest_start(skb,
-					    DEVLINK_ATTR_DPIPE_MATCH_VALUE);
+		match_attr = nla_nest_start_noflag(skb,
+						   DEVLINK_ATTR_DPIPE_MATCH_VALUE);
 		if (!match_attr)
 			return -EMSGSIZE;
 		err = devlink_dpipe_match_value_put(skb, &values[i]);
@@ -1994,7 +2026,7 @@
 	struct nlattr *entry_attr, *matches_attr, *actions_attr;
 	int err;
 
-	entry_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ENTRY);
+	entry_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_ENTRY);
 	if (!entry_attr)
 		return  -EMSGSIZE;
 
@@ -2006,8 +2038,8 @@
 				      entry->counter, DEVLINK_ATTR_PAD))
 			goto nla_put_failure;
 
-	matches_attr = nla_nest_start(skb,
-				      DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES);
+	matches_attr = nla_nest_start_noflag(skb,
+					     DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES);
 	if (!matches_attr)
 		goto nla_put_failure;
 
@@ -2019,8 +2051,8 @@
 	}
 	nla_nest_end(skb, matches_attr);
 
-	actions_attr = nla_nest_start(skb,
-				      DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES);
+	actions_attr = nla_nest_start_noflag(skb,
+					     DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES);
 	if (!actions_attr)
 		goto nla_put_failure;
 
@@ -2077,8 +2109,8 @@
 	devlink = dump_ctx->info->user_ptr[0];
 	if (devlink_nl_put_handle(dump_ctx->skb, devlink))
 		goto nla_put_failure;
-	dump_ctx->nest = nla_nest_start(dump_ctx->skb,
-					DEVLINK_ATTR_DPIPE_ENTRIES);
+	dump_ctx->nest = nla_nest_start_noflag(dump_ctx->skb,
+					       DEVLINK_ATTR_DPIPE_ENTRIES);
 	if (!dump_ctx->nest)
 		goto nla_put_failure;
 	return 0;
@@ -2188,7 +2220,8 @@
 
 	for (i = 0; i < header->fields_count; i++) {
 		field = &header->fields[i];
-		field_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_FIELD);
+		field_attr = nla_nest_start_noflag(skb,
+						   DEVLINK_ATTR_DPIPE_FIELD);
 		if (!field_attr)
 			return -EMSGSIZE;
 		if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_FIELD_NAME, field->name) ||
@@ -2211,7 +2244,7 @@
 	struct nlattr *fields_attr, *header_attr;
 	int err;
 
-	header_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER);
+	header_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_HEADER);
 	if (!header_attr)
 		return -EMSGSIZE;
 
@@ -2220,7 +2253,8 @@
 	    nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
 		goto nla_put_failure;
 
-	fields_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER_FIELDS);
+	fields_attr = nla_nest_start_noflag(skb,
+					    DEVLINK_ATTR_DPIPE_HEADER_FIELDS);
 	if (!fields_attr)
 		goto nla_put_failure;
 
@@ -2267,7 +2301,7 @@
 
 	if (devlink_nl_put_handle(skb, devlink))
 		goto nla_put_failure;
-	headers_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADERS);
+	headers_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_HEADERS);
 	if (!headers_attr)
 		goto nla_put_failure;
 
@@ -2491,7 +2525,7 @@
 	struct nlattr *child_resource_attr;
 	struct nlattr *resource_attr;
 
-	resource_attr = nla_nest_start(skb, DEVLINK_ATTR_RESOURCE);
+	resource_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_RESOURCE);
 	if (!resource_attr)
 		return -EMSGSIZE;
 
@@ -2515,7 +2549,8 @@
 		       resource->size_valid))
 		goto nla_put_failure;
 
-	child_resource_attr = nla_nest_start(skb, DEVLINK_ATTR_RESOURCE_LIST);
+	child_resource_attr = nla_nest_start_noflag(skb,
+						    DEVLINK_ATTR_RESOURCE_LIST);
 	if (!child_resource_attr)
 		goto nla_put_failure;
 
@@ -2566,7 +2601,8 @@
 	if (devlink_nl_put_handle(skb, devlink))
 		goto nla_put_failure;
 
-	resources_attr = nla_nest_start(skb, DEVLINK_ATTR_RESOURCE_LIST);
+	resources_attr = nla_nest_start_noflag(skb,
+					       DEVLINK_ATTR_RESOURCE_LIST);
 	if (!resources_attr)
 		goto nla_put_failure;
 
@@ -2638,12 +2674,32 @@
 	return err;
 }
 
+static bool devlink_reload_supported(struct devlink *devlink)
+{
+	return devlink->ops->reload_down && devlink->ops->reload_up;
+}
+
+static void devlink_reload_failed_set(struct devlink *devlink,
+				      bool reload_failed)
+{
+	if (devlink->reload_failed == reload_failed)
+		return;
+	devlink->reload_failed = reload_failed;
+	devlink_notify(devlink, DEVLINK_CMD_NEW);
+}
+
+bool devlink_is_reload_failed(const struct devlink *devlink)
+{
+	return devlink->reload_failed;
+}
+EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
+
 static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
 {
 	struct devlink *devlink = info->user_ptr[0];
 	int err;
 
-	if (!devlink->ops->reload)
+	if (!devlink_reload_supported(devlink) || !devlink->reload_enabled)
 		return -EOPNOTSUPP;
 
 	err = devlink_resources_validate(devlink, NULL, info);
@@ -2651,7 +2707,135 @@
 		NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed");
 		return err;
 	}
-	return devlink->ops->reload(devlink, info->extack);
+	err = devlink->ops->reload_down(devlink, info->extack);
+	if (err)
+		return err;
+	err = devlink->ops->reload_up(devlink, info->extack);
+	devlink_reload_failed_set(devlink, !!err);
+	return err;
+}
+
+static int devlink_nl_flash_update_fill(struct sk_buff *msg,
+					struct devlink *devlink,
+					enum devlink_command cmd,
+					const char *status_msg,
+					const char *component,
+					unsigned long done, unsigned long total)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, 0, 0, &devlink_nl_family, 0, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+
+	if (cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS)
+		goto out;
+
+	if (status_msg &&
+	    nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_MSG,
+			   status_msg))
+		goto nla_put_failure;
+	if (component &&
+	    nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_COMPONENT,
+			   component))
+		goto nla_put_failure;
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE,
+			      done, DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL,
+			      total, DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+
+out:
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static void __devlink_flash_update_notify(struct devlink *devlink,
+					  enum devlink_command cmd,
+					  const char *status_msg,
+					  const char *component,
+					  unsigned long done,
+					  unsigned long total)
+{
+	struct sk_buff *msg;
+	int err;
+
+	WARN_ON(cmd != DEVLINK_CMD_FLASH_UPDATE &&
+		cmd != DEVLINK_CMD_FLASH_UPDATE_END &&
+		cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS);
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	err = devlink_nl_flash_update_fill(msg, devlink, cmd, status_msg,
+					   component, done, total);
+	if (err)
+		goto out_free_msg;
+
+	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+	return;
+
+out_free_msg:
+	nlmsg_free(msg);
+}
+
+void devlink_flash_update_begin_notify(struct devlink *devlink)
+{
+	__devlink_flash_update_notify(devlink,
+				      DEVLINK_CMD_FLASH_UPDATE,
+				      NULL, NULL, 0, 0);
+}
+EXPORT_SYMBOL_GPL(devlink_flash_update_begin_notify);
+
+void devlink_flash_update_end_notify(struct devlink *devlink)
+{
+	__devlink_flash_update_notify(devlink,
+				      DEVLINK_CMD_FLASH_UPDATE_END,
+				      NULL, NULL, 0, 0);
+}
+EXPORT_SYMBOL_GPL(devlink_flash_update_end_notify);
+
+void devlink_flash_update_status_notify(struct devlink *devlink,
+					const char *status_msg,
+					const char *component,
+					unsigned long done,
+					unsigned long total)
+{
+	__devlink_flash_update_notify(devlink,
+				      DEVLINK_CMD_FLASH_UPDATE_STATUS,
+				      status_msg, component, done, total);
+}
+EXPORT_SYMBOL_GPL(devlink_flash_update_status_notify);
+
+static int devlink_nl_cmd_flash_update(struct sk_buff *skb,
+				       struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	const char *file_name, *component;
+	struct nlattr *nla_component;
+
+	if (!devlink->ops->flash_update)
+		return -EOPNOTSUPP;
+
+	if (!info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME])
+		return -EINVAL;
+	file_name = nla_data(info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME]);
+
+	nla_component = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT];
+	component = nla_component ? nla_data(nla_component) : NULL;
+
+	return devlink->ops->flash_update(devlink, file_name, component,
+					  info->extack);
 }
 
 static const struct devlink_param devlink_param_generic[] = {
@@ -2675,6 +2859,31 @@
 		.name = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME,
 		.type = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE,
 	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI,
+		.name = DEVLINK_PARAM_GENERIC_IGNORE_ARI_NAME,
+		.type = DEVLINK_PARAM_GENERIC_IGNORE_ARI_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
+		.name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_NAME,
+		.type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
+		.name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_NAME,
+		.type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY,
+		.name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME,
+		.type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE,
+		.name = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_NAME,
+		.type = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_TYPE,
+	},
 };
 
 static int devlink_param_generic_verify(const struct devlink_param *param)
@@ -2779,7 +2988,8 @@
 {
 	struct nlattr *param_value_attr;
 
-	param_value_attr = nla_nest_start(msg, DEVLINK_ATTR_PARAM_VALUE);
+	param_value_attr = nla_nest_start_noflag(msg,
+						 DEVLINK_ATTR_PARAM_VALUE);
 	if (!param_value_attr)
 		goto nla_put_failure;
 
@@ -2821,11 +3031,13 @@
 }
 
 static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
+				 unsigned int port_index,
 				 struct devlink_param_item *param_item,
 				 enum devlink_command cmd,
 				 u32 portid, u32 seq, int flags)
 {
 	union devlink_param_value param_value[DEVLINK_PARAM_CMODE_MAX + 1];
+	bool param_value_set[DEVLINK_PARAM_CMODE_MAX + 1] = {};
 	const struct devlink_param *param = param_item->param;
 	struct devlink_param_gset_ctx ctx;
 	struct nlattr *param_values_list;
@@ -2844,12 +3056,15 @@
 				return -EOPNOTSUPP;
 			param_value[i] = param_item->driverinit_value;
 		} else {
+			if (!param_item->published)
+				continue;
 			ctx.cmode = i;
 			err = devlink_param_get(devlink, param, &ctx);
 			if (err)
 				return err;
 			param_value[i] = ctx.val;
 		}
+		param_value_set[i] = true;
 	}
 
 	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
@@ -2858,7 +3073,14 @@
 
 	if (devlink_nl_put_handle(msg, devlink))
 		goto genlmsg_cancel;
-	param_attr = nla_nest_start(msg, DEVLINK_ATTR_PARAM);
+
+	if (cmd == DEVLINK_CMD_PORT_PARAM_GET ||
+	    cmd == DEVLINK_CMD_PORT_PARAM_NEW ||
+	    cmd == DEVLINK_CMD_PORT_PARAM_DEL)
+		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, port_index))
+			goto genlmsg_cancel;
+
+	param_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PARAM);
 	if (!param_attr)
 		goto genlmsg_cancel;
 	if (nla_put_string(msg, DEVLINK_ATTR_PARAM_NAME, param->name))
@@ -2872,12 +3094,13 @@
 	if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_TYPE, nla_type))
 		goto param_nest_cancel;
 
-	param_values_list = nla_nest_start(msg, DEVLINK_ATTR_PARAM_VALUES_LIST);
+	param_values_list = nla_nest_start_noflag(msg,
+						  DEVLINK_ATTR_PARAM_VALUES_LIST);
 	if (!param_values_list)
 		goto param_nest_cancel;
 
 	for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) {
-		if (!devlink_param_cmode_is_supported(param, i))
+		if (!param_value_set[i])
 			continue;
 		err = devlink_nl_param_value_fill_one(msg, param->type,
 						      i, param_value[i]);
@@ -2900,18 +3123,22 @@
 }
 
 static void devlink_param_notify(struct devlink *devlink,
+				 unsigned int port_index,
 				 struct devlink_param_item *param_item,
 				 enum devlink_command cmd)
 {
 	struct sk_buff *msg;
 	int err;
 
-	WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL);
+	WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL &&
+		cmd != DEVLINK_CMD_PORT_PARAM_NEW &&
+		cmd != DEVLINK_CMD_PORT_PARAM_DEL);
 
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!msg)
 		return;
-	err = devlink_nl_param_fill(msg, devlink, param_item, cmd, 0, 0, 0);
+	err = devlink_nl_param_fill(msg, devlink, port_index, param_item, cmd,
+				    0, 0, 0);
 	if (err) {
 		nlmsg_free(msg);
 		return;
@@ -2940,12 +3167,12 @@
 				idx++;
 				continue;
 			}
-			err = devlink_nl_param_fill(msg, devlink, param_item,
+			err = devlink_nl_param_fill(msg, devlink, 0, param_item,
 						    DEVLINK_CMD_PARAM_GET,
 						    NETLINK_CB(cb->skb).portid,
 						    cb->nlh->nlmsg_seq,
 						    NLM_F_MULTI);
-			if (err) {
+			if (err && err != -EOPNOTSUPP) {
 				mutex_unlock(&devlink->lock);
 				goto out;
 			}
@@ -3029,7 +3256,7 @@
 }
 
 static struct devlink_param_item *
-devlink_param_get_from_info(struct devlink *devlink,
+devlink_param_get_from_info(struct list_head *param_list,
 			    struct genl_info *info)
 {
 	char *param_name;
@@ -3038,7 +3265,7 @@
 		return NULL;
 
 	param_name = nla_data(info->attrs[DEVLINK_ATTR_PARAM_NAME]);
-	return devlink_param_find_by_name(&devlink->param_list, param_name);
+	return devlink_param_find_by_name(param_list, param_name);
 }
 
 static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb,
@@ -3049,7 +3276,7 @@
 	struct sk_buff *msg;
 	int err;
 
-	param_item = devlink_param_get_from_info(devlink, info);
+	param_item = devlink_param_get_from_info(&devlink->param_list, info);
 	if (!param_item)
 		return -EINVAL;
 
@@ -3057,7 +3284,7 @@
 	if (!msg)
 		return -ENOMEM;
 
-	err = devlink_nl_param_fill(msg, devlink, param_item,
+	err = devlink_nl_param_fill(msg, devlink, 0, param_item,
 				    DEVLINK_CMD_PARAM_GET,
 				    info->snd_portid, info->snd_seq, 0);
 	if (err) {
@@ -3068,10 +3295,12 @@
 	return genlmsg_reply(msg, info);
 }
 
-static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb,
-					 struct genl_info *info)
+static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink,
+					   unsigned int port_index,
+					   struct list_head *param_list,
+					   struct genl_info *info,
+					   enum devlink_command cmd)
 {
-	struct devlink *devlink = info->user_ptr[0];
 	enum devlink_param_type param_type;
 	struct devlink_param_gset_ctx ctx;
 	enum devlink_param_cmode cmode;
@@ -3080,7 +3309,7 @@
 	union devlink_param_value value;
 	int err = 0;
 
-	param_item = devlink_param_get_from_info(devlink, info);
+	param_item = devlink_param_get_from_info(param_list, info);
 	if (!param_item)
 		return -EINVAL;
 	param = param_item->param;
@@ -3120,17 +3349,28 @@
 			return err;
 	}
 
-	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
+	devlink_param_notify(devlink, port_index, param_item, cmd);
 	return 0;
 }
 
+static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb,
+					 struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+
+	return __devlink_nl_cmd_param_set_doit(devlink, 0, &devlink->param_list,
+					       info, DEVLINK_CMD_PARAM_NEW);
+}
+
 static int devlink_param_register_one(struct devlink *devlink,
-				      const struct devlink_param *param)
+				      unsigned int port_index,
+				      struct list_head *param_list,
+				      const struct devlink_param *param,
+				      enum devlink_command cmd)
 {
 	struct devlink_param_item *param_item;
 
-	if (devlink_param_find_by_name(&devlink->param_list,
-				       param->name))
+	if (devlink_param_find_by_name(param_list, param->name))
 		return -EEXIST;
 
 	if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT))
@@ -3143,24 +3383,111 @@
 		return -ENOMEM;
 	param_item->param = param;
 
-	list_add_tail(&param_item->list, &devlink->param_list);
-	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
+	list_add_tail(&param_item->list, param_list);
+	devlink_param_notify(devlink, port_index, param_item, cmd);
 	return 0;
 }
 
 static void devlink_param_unregister_one(struct devlink *devlink,
-					 const struct devlink_param *param)
+					 unsigned int port_index,
+					 struct list_head *param_list,
+					 const struct devlink_param *param,
+					 enum devlink_command cmd)
 {
 	struct devlink_param_item *param_item;
 
-	param_item = devlink_param_find_by_name(&devlink->param_list,
-						param->name);
+	param_item = devlink_param_find_by_name(param_list, param->name);
 	WARN_ON(!param_item);
-	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_DEL);
+	devlink_param_notify(devlink, port_index, param_item, cmd);
 	list_del(&param_item->list);
 	kfree(param_item);
 }
 
+static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
+						struct netlink_callback *cb)
+{
+	struct devlink_param_item *param_item;
+	struct devlink_port *devlink_port;
+	struct devlink *devlink;
+	int start = cb->args[0];
+	int idx = 0;
+	int err;
+
+	mutex_lock(&devlink_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+			continue;
+		mutex_lock(&devlink->lock);
+		list_for_each_entry(devlink_port, &devlink->port_list, list) {
+			list_for_each_entry(param_item,
+					    &devlink_port->param_list, list) {
+				if (idx < start) {
+					idx++;
+					continue;
+				}
+				err = devlink_nl_param_fill(msg,
+						devlink_port->devlink,
+						devlink_port->index, param_item,
+						DEVLINK_CMD_PORT_PARAM_GET,
+						NETLINK_CB(cb->skb).portid,
+						cb->nlh->nlmsg_seq,
+						NLM_F_MULTI);
+				if (err && err != -EOPNOTSUPP) {
+					mutex_unlock(&devlink->lock);
+					goto out;
+				}
+				idx++;
+			}
+		}
+		mutex_unlock(&devlink->lock);
+	}
+out:
+	mutex_unlock(&devlink_mutex);
+
+	cb->args[0] = idx;
+	return msg->len;
+}
+
+static int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb,
+					      struct genl_info *info)
+{
+	struct devlink_port *devlink_port = info->user_ptr[0];
+	struct devlink_param_item *param_item;
+	struct sk_buff *msg;
+	int err;
+
+	param_item = devlink_param_get_from_info(&devlink_port->param_list,
+						 info);
+	if (!param_item)
+		return -EINVAL;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_param_fill(msg, devlink_port->devlink,
+				    devlink_port->index, param_item,
+				    DEVLINK_CMD_PORT_PARAM_GET,
+				    info->snd_portid, info->snd_seq, 0);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb,
+					      struct genl_info *info)
+{
+	struct devlink_port *devlink_port = info->user_ptr[0];
+
+	return __devlink_nl_cmd_param_set_doit(devlink_port->devlink,
+					       devlink_port->index,
+					       &devlink_port->param_list, info,
+					       DEVLINK_CMD_PORT_PARAM_NEW);
+}
+
 static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg,
 					     struct devlink *devlink,
 					     struct devlink_snapshot *snapshot)
@@ -3168,7 +3495,7 @@
 	struct nlattr *snap_attr;
 	int err;
 
-	snap_attr = nla_nest_start(msg, DEVLINK_ATTR_REGION_SNAPSHOT);
+	snap_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_SNAPSHOT);
 	if (!snap_attr)
 		return -EINVAL;
 
@@ -3192,7 +3519,8 @@
 	struct nlattr *snapshots_attr;
 	int err;
 
-	snapshots_attr = nla_nest_start(msg, DEVLINK_ATTR_REGION_SNAPSHOTS);
+	snapshots_attr = nla_nest_start_noflag(msg,
+					       DEVLINK_ATTR_REGION_SNAPSHOTS);
 	if (!snapshots_attr)
 		return -EINVAL;
 
@@ -3300,6 +3628,16 @@
 	nlmsg_free(msg);
 }
 
+static void devlink_region_snapshot_del(struct devlink_region *region,
+					struct devlink_snapshot *snapshot)
+{
+	devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL);
+	region->cur_snapshots--;
+	list_del(&snapshot->list);
+	(*snapshot->data_destructor)(snapshot->data);
+	kfree(snapshot);
+}
+
 static int devlink_nl_cmd_region_get_doit(struct sk_buff *skb,
 					  struct genl_info *info)
 {
@@ -3395,8 +3733,7 @@
 	if (!snapshot)
 		return -EINVAL;
 
-	devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL);
-	devlink_region_snapshot_del(snapshot);
+	devlink_region_snapshot_del(region, snapshot);
 	return 0;
 }
 
@@ -3408,7 +3745,7 @@
 	struct nlattr *chunk_attr;
 	int err;
 
-	chunk_attr = nla_nest_start(msg, DEVLINK_ATTR_REGION_CHUNK);
+	chunk_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_CHUNK);
 	if (!chunk_attr)
 		return -EINVAL;
 
@@ -3452,8 +3789,8 @@
 	if (!snapshot)
 		return -EINVAL;
 
-	if (end_offset > snapshot->data_len || dump)
-		end_offset = snapshot->data_len;
+	if (end_offset > region->size || dump)
+		end_offset = region->size;
 
 	while (curr_offset < end_offset) {
 		u32 data_size;
@@ -3482,44 +3819,57 @@
 					     struct netlink_callback *cb)
 {
 	u64 ret_offset, start_offset, end_offset = 0;
-	struct nlattr *attrs[DEVLINK_ATTR_MAX + 1];
-	const struct genl_ops *ops = cb->data;
 	struct devlink_region *region;
 	struct nlattr *chunks_attr;
 	const char *region_name;
 	struct devlink *devlink;
+	struct nlattr **attrs;
 	bool dump = true;
 	void *hdr;
 	int err;
 
 	start_offset = *((u64 *)&cb->args[0]);
 
-	err = nlmsg_parse(cb->nlh, GENL_HDRLEN + devlink_nl_family.hdrsize,
-			  attrs, DEVLINK_ATTR_MAX, ops->policy, NULL);
-	if (err)
-		goto out;
+	attrs = kmalloc_array(DEVLINK_ATTR_MAX + 1, sizeof(*attrs), GFP_KERNEL);
+	if (!attrs)
+		return -ENOMEM;
 
-	devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
-	if (IS_ERR(devlink))
-		goto out;
+	err = nlmsg_parse_deprecated(cb->nlh,
+				     GENL_HDRLEN + devlink_nl_family.hdrsize,
+				     attrs, DEVLINK_ATTR_MAX,
+				     devlink_nl_family.policy, cb->extack);
+	if (err)
+		goto out_free;
 
 	mutex_lock(&devlink_mutex);
+	devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
+	if (IS_ERR(devlink)) {
+		err = PTR_ERR(devlink);
+		goto out_dev;
+	}
+
 	mutex_lock(&devlink->lock);
 
 	if (!attrs[DEVLINK_ATTR_REGION_NAME] ||
-	    !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID])
+	    !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]) {
+		err = -EINVAL;
 		goto out_unlock;
+	}
 
 	region_name = nla_data(attrs[DEVLINK_ATTR_REGION_NAME]);
 	region = devlink_region_get_by_name(devlink, region_name);
-	if (!region)
+	if (!region) {
+		err = -EINVAL;
 		goto out_unlock;
+	}
 
 	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			  &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI,
 			  DEVLINK_CMD_REGION_READ);
-	if (!hdr)
+	if (!hdr) {
+		err = -EMSGSIZE;
 		goto out_unlock;
+	}
 
 	err = devlink_nl_put_handle(skb, devlink);
 	if (err)
@@ -3529,9 +3879,11 @@
 	if (err)
 		goto nla_put_failure;
 
-	chunks_attr = nla_nest_start(skb, DEVLINK_ATTR_REGION_CHUNKS);
-	if (!chunks_attr)
+	chunks_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_REGION_CHUNKS);
+	if (!chunks_attr) {
+		err = -EMSGSIZE;
 		goto nla_put_failure;
+	}
 
 	if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] &&
 	    attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) {
@@ -3554,8 +3906,10 @@
 		goto nla_put_failure;
 
 	/* Check if there was any progress done to prevent infinite loop */
-	if (ret_offset == start_offset)
+	if (ret_offset == start_offset) {
+		err = -EINVAL;
 		goto nla_put_failure;
+	}
 
 	*((u64 *)&cb->args[0]) = ret_offset;
 
@@ -3563,6 +3917,7 @@
 	genlmsg_end(skb, hdr);
 	mutex_unlock(&devlink->lock);
 	mutex_unlock(&devlink_mutex);
+	kfree(attrs);
 
 	return skb->len;
 
@@ -3570,8 +3925,1844 @@
 	genlmsg_cancel(skb, hdr);
 out_unlock:
 	mutex_unlock(&devlink->lock);
+out_dev:
 	mutex_unlock(&devlink_mutex);
+out_free:
+	kfree(attrs);
+	return err;
+}
+
+struct devlink_info_req {
+	struct sk_buff *msg;
+};
+
+int devlink_info_driver_name_put(struct devlink_info_req *req, const char *name)
+{
+	return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, name);
+}
+EXPORT_SYMBOL_GPL(devlink_info_driver_name_put);
+
+int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn)
+{
+	return nla_put_string(req->msg, DEVLINK_ATTR_INFO_SERIAL_NUMBER, sn);
+}
+EXPORT_SYMBOL_GPL(devlink_info_serial_number_put);
+
+static int devlink_info_version_put(struct devlink_info_req *req, int attr,
+				    const char *version_name,
+				    const char *version_value)
+{
+	struct nlattr *nest;
+	int err;
+
+	nest = nla_nest_start_noflag(req->msg, attr);
+	if (!nest)
+		return -EMSGSIZE;
+
+	err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_NAME,
+			     version_name);
+	if (err)
+		goto nla_put_failure;
+
+	err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_VALUE,
+			     version_value);
+	if (err)
+		goto nla_put_failure;
+
+	nla_nest_end(req->msg, nest);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(req->msg, nest);
+	return err;
+}
+
+int devlink_info_version_fixed_put(struct devlink_info_req *req,
+				   const char *version_name,
+				   const char *version_value)
+{
+	return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_FIXED,
+					version_name, version_value);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_fixed_put);
+
+int devlink_info_version_stored_put(struct devlink_info_req *req,
+				    const char *version_name,
+				    const char *version_value)
+{
+	return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED,
+					version_name, version_value);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_stored_put);
+
+int devlink_info_version_running_put(struct devlink_info_req *req,
+				     const char *version_name,
+				     const char *version_value)
+{
+	return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING,
+					version_name, version_value);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_running_put);
+
+static int
+devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink,
+		     enum devlink_command cmd, u32 portid,
+		     u32 seq, int flags, struct netlink_ext_ack *extack)
+{
+	struct devlink_info_req req;
+	void *hdr;
+	int err;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	err = -EMSGSIZE;
+	if (devlink_nl_put_handle(msg, devlink))
+		goto err_cancel_msg;
+
+	req.msg = msg;
+	err = devlink->ops->info_get(devlink, &req, extack);
+	if (err)
+		goto err_cancel_msg;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+err_cancel_msg:
+	genlmsg_cancel(msg, hdr);
+	return err;
+}
+
+static int devlink_nl_cmd_info_get_doit(struct sk_buff *skb,
+					struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct sk_buff *msg;
+	int err;
+
+	if (!devlink->ops->info_get)
+		return -EOPNOTSUPP;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
+				   info->snd_portid, info->snd_seq, 0,
+				   info->extack);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
+					  struct netlink_callback *cb)
+{
+	struct devlink *devlink;
+	int start = cb->args[0];
+	int idx = 0;
+	int err;
+
+	mutex_lock(&devlink_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+			continue;
+		if (idx < start) {
+			idx++;
+			continue;
+		}
+
+		if (!devlink->ops->info_get) {
+			idx++;
+			continue;
+		}
+
+		mutex_lock(&devlink->lock);
+		err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
+					   NETLINK_CB(cb->skb).portid,
+					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					   cb->extack);
+		mutex_unlock(&devlink->lock);
+		if (err && err != -EOPNOTSUPP)
+			break;
+		idx++;
+	}
+	mutex_unlock(&devlink_mutex);
+
+	cb->args[0] = idx;
+	return msg->len;
+}
+
+struct devlink_fmsg_item {
+	struct list_head list;
+	int attrtype;
+	u8 nla_type;
+	u16 len;
+	int value[0];
+};
+
+struct devlink_fmsg {
+	struct list_head item_list;
+};
+
+static struct devlink_fmsg *devlink_fmsg_alloc(void)
+{
+	struct devlink_fmsg *fmsg;
+
+	fmsg = kzalloc(sizeof(*fmsg), GFP_KERNEL);
+	if (!fmsg)
+		return NULL;
+
+	INIT_LIST_HEAD(&fmsg->item_list);
+
+	return fmsg;
+}
+
+static void devlink_fmsg_free(struct devlink_fmsg *fmsg)
+{
+	struct devlink_fmsg_item *item, *tmp;
+
+	list_for_each_entry_safe(item, tmp, &fmsg->item_list, list) {
+		list_del(&item->list);
+		kfree(item);
+	}
+	kfree(fmsg);
+}
+
+static int devlink_fmsg_nest_common(struct devlink_fmsg *fmsg,
+				    int attrtype)
+{
+	struct devlink_fmsg_item *item;
+
+	item = kzalloc(sizeof(*item), GFP_KERNEL);
+	if (!item)
+		return -ENOMEM;
+
+	item->attrtype = attrtype;
+	list_add_tail(&item->list, &fmsg->item_list);
+
+	return 0;
+}
+
+int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg)
+{
+	return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_OBJ_NEST_START);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_start);
+
+static int devlink_fmsg_nest_end(struct devlink_fmsg *fmsg)
+{
+	return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_NEST_END);
+}
+
+int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg)
+{
+	return devlink_fmsg_nest_end(fmsg);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_end);
+
+#define DEVLINK_FMSG_MAX_SIZE (GENLMSG_DEFAULT_SIZE - GENL_HDRLEN - NLA_HDRLEN)
+
+static int devlink_fmsg_put_name(struct devlink_fmsg *fmsg, const char *name)
+{
+	struct devlink_fmsg_item *item;
+
+	if (strlen(name) + 1 > DEVLINK_FMSG_MAX_SIZE)
+		return -EMSGSIZE;
+
+	item = kzalloc(sizeof(*item) + strlen(name) + 1, GFP_KERNEL);
+	if (!item)
+		return -ENOMEM;
+
+	item->nla_type = NLA_NUL_STRING;
+	item->len = strlen(name) + 1;
+	item->attrtype = DEVLINK_ATTR_FMSG_OBJ_NAME;
+	memcpy(&item->value, name, item->len);
+	list_add_tail(&item->list, &fmsg->item_list);
+
+	return 0;
+}
+
+int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name)
+{
+	int err;
+
+	err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_PAIR_NEST_START);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_put_name(fmsg, name);
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_start);
+
+int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg)
+{
+	return devlink_fmsg_nest_end(fmsg);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_end);
+
+int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg,
+				     const char *name)
+{
+	int err;
+
+	err = devlink_fmsg_pair_nest_start(fmsg, name);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_ARR_NEST_START);
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_start);
+
+int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg)
+{
+	int err;
+
+	err = devlink_fmsg_nest_end(fmsg);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_end);
+
+static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg,
+				  const void *value, u16 value_len,
+				  u8 value_nla_type)
+{
+	struct devlink_fmsg_item *item;
+
+	if (value_len > DEVLINK_FMSG_MAX_SIZE)
+		return -EMSGSIZE;
+
+	item = kzalloc(sizeof(*item) + value_len, GFP_KERNEL);
+	if (!item)
+		return -ENOMEM;
+
+	item->nla_type = value_nla_type;
+	item->len = value_len;
+	item->attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA;
+	memcpy(&item->value, value, item->len);
+	list_add_tail(&item->list, &fmsg->item_list);
+
+	return 0;
+}
+
+int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value)
+{
+	return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_bool_put);
+
+int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value)
+{
+	return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u8_put);
+
+int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value)
+{
+	return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U32);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u32_put);
+
+int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value)
+{
+	return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u64_put);
+
+int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value)
+{
+	return devlink_fmsg_put_value(fmsg, value, strlen(value) + 1,
+				      NLA_NUL_STRING);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_string_put);
+
+int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value,
+			    u16 value_len)
+{
+	return devlink_fmsg_put_value(fmsg, value, value_len, NLA_BINARY);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_binary_put);
+
+int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name,
+			       bool value)
+{
+	int err;
+
+	err = devlink_fmsg_pair_nest_start(fmsg, name);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_bool_put(fmsg, value);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_pair_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_bool_pair_put);
+
+int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name,
+			     u8 value)
+{
+	int err;
+
+	err = devlink_fmsg_pair_nest_start(fmsg, name);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u8_put(fmsg, value);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_pair_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u8_pair_put);
+
+int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name,
+			      u32 value)
+{
+	int err;
+
+	err = devlink_fmsg_pair_nest_start(fmsg, name);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_put(fmsg, value);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_pair_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u32_pair_put);
+
+int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name,
+			      u64 value)
+{
+	int err;
+
+	err = devlink_fmsg_pair_nest_start(fmsg, name);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u64_put(fmsg, value);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_pair_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u64_pair_put);
+
+int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name,
+				 const char *value)
+{
+	int err;
+
+	err = devlink_fmsg_pair_nest_start(fmsg, name);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_string_put(fmsg, value);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_pair_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_string_pair_put);
+
+int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
+				 const void *value, u16 value_len)
+{
+	int err;
+
+	err = devlink_fmsg_pair_nest_start(fmsg, name);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_binary_put(fmsg, value, value_len);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_pair_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_put);
+
+static int
+devlink_fmsg_item_fill_type(struct devlink_fmsg_item *msg, struct sk_buff *skb)
+{
+	switch (msg->nla_type) {
+	case NLA_FLAG:
+	case NLA_U8:
+	case NLA_U32:
+	case NLA_U64:
+	case NLA_NUL_STRING:
+	case NLA_BINARY:
+		return nla_put_u8(skb, DEVLINK_ATTR_FMSG_OBJ_VALUE_TYPE,
+				  msg->nla_type);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int
+devlink_fmsg_item_fill_data(struct devlink_fmsg_item *msg, struct sk_buff *skb)
+{
+	int attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA;
+	u8 tmp;
+
+	switch (msg->nla_type) {
+	case NLA_FLAG:
+		/* Always provide flag data, regardless of its value */
+		tmp = *(bool *) msg->value;
+
+		return nla_put_u8(skb, attrtype, tmp);
+	case NLA_U8:
+		return nla_put_u8(skb, attrtype, *(u8 *) msg->value);
+	case NLA_U32:
+		return nla_put_u32(skb, attrtype, *(u32 *) msg->value);
+	case NLA_U64:
+		return nla_put_u64_64bit(skb, attrtype, *(u64 *) msg->value,
+					 DEVLINK_ATTR_PAD);
+	case NLA_NUL_STRING:
+		return nla_put_string(skb, attrtype, (char *) &msg->value);
+	case NLA_BINARY:
+		return nla_put(skb, attrtype, msg->len, (void *) &msg->value);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int
+devlink_fmsg_prepare_skb(struct devlink_fmsg *fmsg, struct sk_buff *skb,
+			 int *start)
+{
+	struct devlink_fmsg_item *item;
+	struct nlattr *fmsg_nlattr;
+	int i = 0;
+	int err;
+
+	fmsg_nlattr = nla_nest_start_noflag(skb, DEVLINK_ATTR_FMSG);
+	if (!fmsg_nlattr)
+		return -EMSGSIZE;
+
+	list_for_each_entry(item, &fmsg->item_list, list) {
+		if (i < *start) {
+			i++;
+			continue;
+		}
+
+		switch (item->attrtype) {
+		case DEVLINK_ATTR_FMSG_OBJ_NEST_START:
+		case DEVLINK_ATTR_FMSG_PAIR_NEST_START:
+		case DEVLINK_ATTR_FMSG_ARR_NEST_START:
+		case DEVLINK_ATTR_FMSG_NEST_END:
+			err = nla_put_flag(skb, item->attrtype);
+			break;
+		case DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA:
+			err = devlink_fmsg_item_fill_type(item, skb);
+			if (err)
+				break;
+			err = devlink_fmsg_item_fill_data(item, skb);
+			break;
+		case DEVLINK_ATTR_FMSG_OBJ_NAME:
+			err = nla_put_string(skb, item->attrtype,
+					     (char *) &item->value);
+			break;
+		default:
+			err = -EINVAL;
+			break;
+		}
+		if (!err)
+			*start = ++i;
+		else
+			break;
+	}
+
+	nla_nest_end(skb, fmsg_nlattr);
+	return err;
+}
+
+static int devlink_fmsg_snd(struct devlink_fmsg *fmsg,
+			    struct genl_info *info,
+			    enum devlink_command cmd, int flags)
+{
+	struct nlmsghdr *nlh;
+	struct sk_buff *skb;
+	bool last = false;
+	int index = 0;
+	void *hdr;
+	int err;
+
+	while (!last) {
+		int tmp_index = index;
+
+		skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+		if (!skb)
+			return -ENOMEM;
+
+		hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+				  &devlink_nl_family, flags | NLM_F_MULTI, cmd);
+		if (!hdr) {
+			err = -EMSGSIZE;
+			goto nla_put_failure;
+		}
+
+		err = devlink_fmsg_prepare_skb(fmsg, skb, &index);
+		if (!err)
+			last = true;
+		else if (err != -EMSGSIZE || tmp_index == index)
+			goto nla_put_failure;
+
+		genlmsg_end(skb, hdr);
+		err = genlmsg_reply(skb, info);
+		if (err)
+			return err;
+	}
+
+	skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+	nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+			NLMSG_DONE, 0, flags | NLM_F_MULTI);
+	if (!nlh) {
+		err = -EMSGSIZE;
+		goto nla_put_failure;
+	}
+
+	return genlmsg_reply(skb, info);
+
+nla_put_failure:
+	nlmsg_free(skb);
+	return err;
+}
+
+static int devlink_fmsg_dumpit(struct devlink_fmsg *fmsg, struct sk_buff *skb,
+			       struct netlink_callback *cb,
+			       enum devlink_command cmd)
+{
+	int index = cb->args[0];
+	int tmp_index = index;
+	void *hdr;
+	int err;
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+			  &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI, cmd);
+	if (!hdr) {
+		err = -EMSGSIZE;
+		goto nla_put_failure;
+	}
+
+	err = devlink_fmsg_prepare_skb(fmsg, skb, &index);
+	if ((err && err != -EMSGSIZE) || tmp_index == index)
+		goto nla_put_failure;
+
+	cb->args[0] = index;
+	genlmsg_end(skb, hdr);
+	return skb->len;
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return err;
+}
+
+struct devlink_health_reporter {
+	struct list_head list;
+	void *priv;
+	const struct devlink_health_reporter_ops *ops;
+	struct devlink *devlink;
+	struct devlink_fmsg *dump_fmsg;
+	struct mutex dump_lock; /* lock parallel read/write from dump buffers */
+	u64 graceful_period;
+	bool auto_recover;
+	u8 health_state;
+	u64 dump_ts;
+	u64 dump_real_ts;
+	u64 error_count;
+	u64 recovery_count;
+	u64 last_recovery_ts;
+	refcount_t refcount;
+};
+
+void *
+devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
+{
+	return reporter->priv;
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_priv);
+
+static struct devlink_health_reporter *
+devlink_health_reporter_find_by_name(struct devlink *devlink,
+				     const char *reporter_name)
+{
+	struct devlink_health_reporter *reporter;
+
+	lockdep_assert_held(&devlink->reporters_lock);
+	list_for_each_entry(reporter, &devlink->reporter_list, list)
+		if (!strcmp(reporter->ops->name, reporter_name))
+			return reporter;
+	return NULL;
+}
+
+/**
+ *	devlink_health_reporter_create - create devlink health reporter
+ *
+ *	@devlink: devlink
+ *	@ops: ops
+ *	@graceful_period: to avoid recovery loops, in msecs
+ *	@auto_recover: auto recover when error occurs
+ *	@priv: priv
+ */
+struct devlink_health_reporter *
+devlink_health_reporter_create(struct devlink *devlink,
+			       const struct devlink_health_reporter_ops *ops,
+			       u64 graceful_period, bool auto_recover,
+			       void *priv)
+{
+	struct devlink_health_reporter *reporter;
+
+	mutex_lock(&devlink->reporters_lock);
+	if (devlink_health_reporter_find_by_name(devlink, ops->name)) {
+		reporter = ERR_PTR(-EEXIST);
+		goto unlock;
+	}
+
+	if (WARN_ON(auto_recover && !ops->recover) ||
+	    WARN_ON(graceful_period && !ops->recover)) {
+		reporter = ERR_PTR(-EINVAL);
+		goto unlock;
+	}
+
+	reporter = kzalloc(sizeof(*reporter), GFP_KERNEL);
+	if (!reporter) {
+		reporter = ERR_PTR(-ENOMEM);
+		goto unlock;
+	}
+
+	reporter->priv = priv;
+	reporter->ops = ops;
+	reporter->devlink = devlink;
+	reporter->graceful_period = graceful_period;
+	reporter->auto_recover = auto_recover;
+	mutex_init(&reporter->dump_lock);
+	refcount_set(&reporter->refcount, 1);
+	list_add_tail(&reporter->list, &devlink->reporter_list);
+unlock:
+	mutex_unlock(&devlink->reporters_lock);
+	return reporter;
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
+
+/**
+ *	devlink_health_reporter_destroy - destroy devlink health reporter
+ *
+ *	@reporter: devlink health reporter to destroy
+ */
+void
+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
+{
+	mutex_lock(&reporter->devlink->reporters_lock);
+	list_del(&reporter->list);
+	mutex_unlock(&reporter->devlink->reporters_lock);
+	while (refcount_read(&reporter->refcount) > 1)
+		msleep(100);
+	mutex_destroy(&reporter->dump_lock);
+	if (reporter->dump_fmsg)
+		devlink_fmsg_free(reporter->dump_fmsg);
+	kfree(reporter);
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
+
+void
+devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
+				     enum devlink_health_reporter_state state)
+{
+	if (WARN_ON(state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY &&
+		    state != DEVLINK_HEALTH_REPORTER_STATE_ERROR))
+		return;
+
+	if (reporter->health_state == state)
+		return;
+
+	reporter->health_state = state;
+	trace_devlink_health_reporter_state_update(reporter->devlink,
+						   reporter->ops->name, state);
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
+
+static int
+devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
+				void *priv_ctx)
+{
+	int err;
+
+	if (!reporter->ops->recover)
+		return -EOPNOTSUPP;
+
+	err = reporter->ops->recover(reporter, priv_ctx);
+	if (err)
+		return err;
+
+	reporter->recovery_count++;
+	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
+	reporter->last_recovery_ts = jiffies;
+
+	return 0;
+}
+
+static void
+devlink_health_dump_clear(struct devlink_health_reporter *reporter)
+{
+	if (!reporter->dump_fmsg)
+		return;
+	devlink_fmsg_free(reporter->dump_fmsg);
+	reporter->dump_fmsg = NULL;
+}
+
+static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
+				  void *priv_ctx)
+{
+	int err;
+
+	if (!reporter->ops->dump)
+		return 0;
+
+	if (reporter->dump_fmsg)
+		return 0;
+
+	reporter->dump_fmsg = devlink_fmsg_alloc();
+	if (!reporter->dump_fmsg) {
+		err = -ENOMEM;
+		return err;
+	}
+
+	err = devlink_fmsg_obj_nest_start(reporter->dump_fmsg);
+	if (err)
+		goto dump_err;
+
+	err = reporter->ops->dump(reporter, reporter->dump_fmsg,
+				  priv_ctx);
+	if (err)
+		goto dump_err;
+
+	err = devlink_fmsg_obj_nest_end(reporter->dump_fmsg);
+	if (err)
+		goto dump_err;
+
+	reporter->dump_ts = jiffies;
+	reporter->dump_real_ts = ktime_get_real_ns();
+
+	return 0;
+
+dump_err:
+	devlink_health_dump_clear(reporter);
+	return err;
+}
+
+int devlink_health_report(struct devlink_health_reporter *reporter,
+			  const char *msg, void *priv_ctx)
+{
+	enum devlink_health_reporter_state prev_health_state;
+	struct devlink *devlink = reporter->devlink;
+
+	/* write a log message of the current error */
+	WARN_ON(!msg);
+	trace_devlink_health_report(devlink, reporter->ops->name, msg);
+	reporter->error_count++;
+	prev_health_state = reporter->health_state;
+	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
+
+	/* abort if the previous error wasn't recovered */
+	if (reporter->auto_recover &&
+	    (prev_health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY ||
+	     jiffies - reporter->last_recovery_ts <
+	     msecs_to_jiffies(reporter->graceful_period))) {
+		trace_devlink_health_recover_aborted(devlink,
+						     reporter->ops->name,
+						     reporter->health_state,
+						     jiffies -
+						     reporter->last_recovery_ts);
+		return -ECANCELED;
+	}
+
+	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
+
+	mutex_lock(&reporter->dump_lock);
+	/* store current dump of current error, for later analysis */
+	devlink_health_do_dump(reporter, priv_ctx);
+	mutex_unlock(&reporter->dump_lock);
+
+	if (reporter->auto_recover)
+		return devlink_health_reporter_recover(reporter, priv_ctx);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_health_report);
+
+static struct devlink_health_reporter *
+devlink_health_reporter_get_from_attrs(struct devlink *devlink,
+				       struct nlattr **attrs)
+{
+	struct devlink_health_reporter *reporter;
+	char *reporter_name;
+
+	if (!attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME])
+		return NULL;
+
+	reporter_name = nla_data(attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]);
+	mutex_lock(&devlink->reporters_lock);
+	reporter = devlink_health_reporter_find_by_name(devlink, reporter_name);
+	if (reporter)
+		refcount_inc(&reporter->refcount);
+	mutex_unlock(&devlink->reporters_lock);
+	return reporter;
+}
+
+static struct devlink_health_reporter *
+devlink_health_reporter_get_from_info(struct devlink *devlink,
+				      struct genl_info *info)
+{
+	return devlink_health_reporter_get_from_attrs(devlink, info->attrs);
+}
+
+static struct devlink_health_reporter *
+devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
+{
+	struct devlink_health_reporter *reporter;
+	struct devlink *devlink;
+	struct nlattr **attrs;
+	int err;
+
+	attrs = kmalloc_array(DEVLINK_ATTR_MAX + 1, sizeof(*attrs), GFP_KERNEL);
+	if (!attrs)
+		return NULL;
+
+	err = nlmsg_parse_deprecated(cb->nlh,
+				     GENL_HDRLEN + devlink_nl_family.hdrsize,
+				     attrs, DEVLINK_ATTR_MAX,
+				     devlink_nl_family.policy, cb->extack);
+	if (err)
+		goto free;
+
+	mutex_lock(&devlink_mutex);
+	devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
+	if (IS_ERR(devlink))
+		goto unlock;
+
+	reporter = devlink_health_reporter_get_from_attrs(devlink, attrs);
+	mutex_unlock(&devlink_mutex);
+	kfree(attrs);
+	return reporter;
+unlock:
+	mutex_unlock(&devlink_mutex);
+free:
+	kfree(attrs);
+	return NULL;
+}
+
+static void
+devlink_health_reporter_put(struct devlink_health_reporter *reporter)
+{
+	refcount_dec(&reporter->refcount);
+}
+
+static int
+devlink_nl_health_reporter_fill(struct sk_buff *msg,
+				struct devlink *devlink,
+				struct devlink_health_reporter *reporter,
+				enum devlink_command cmd, u32 portid,
+				u32 seq, int flags)
+{
+	struct nlattr *reporter_attr;
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto genlmsg_cancel;
+
+	reporter_attr = nla_nest_start_noflag(msg,
+					      DEVLINK_ATTR_HEALTH_REPORTER);
+	if (!reporter_attr)
+		goto genlmsg_cancel;
+	if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME,
+			   reporter->ops->name))
+		goto reporter_nest_cancel;
+	if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE,
+		       reporter->health_state))
+		goto reporter_nest_cancel;
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT,
+			      reporter->error_count, DEVLINK_ATTR_PAD))
+		goto reporter_nest_cancel;
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT,
+			      reporter->recovery_count, DEVLINK_ATTR_PAD))
+		goto reporter_nest_cancel;
+	if (reporter->ops->recover &&
+	    nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,
+			      reporter->graceful_period,
+			      DEVLINK_ATTR_PAD))
+		goto reporter_nest_cancel;
+	if (reporter->ops->recover &&
+	    nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,
+		       reporter->auto_recover))
+		goto reporter_nest_cancel;
+	if (reporter->dump_fmsg &&
+	    nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,
+			      jiffies_to_msecs(reporter->dump_ts),
+			      DEVLINK_ATTR_PAD))
+		goto reporter_nest_cancel;
+	if (reporter->dump_fmsg &&
+	    nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,
+			      reporter->dump_real_ts, DEVLINK_ATTR_PAD))
+		goto reporter_nest_cancel;
+
+	nla_nest_end(msg, reporter_attr);
+	genlmsg_end(msg, hdr);
+	return 0;
+
+reporter_nest_cancel:
+	nla_nest_end(msg, reporter_attr);
+genlmsg_cancel:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
+						   struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_health_reporter *reporter;
+	struct sk_buff *msg;
+	int err;
+
+	reporter = devlink_health_reporter_get_from_info(devlink, info);
+	if (!reporter)
+		return -EINVAL;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = devlink_nl_health_reporter_fill(msg, devlink, reporter,
+					      DEVLINK_CMD_HEALTH_REPORTER_GET,
+					      info->snd_portid, info->snd_seq,
+					      0);
+	if (err) {
+		nlmsg_free(msg);
+		goto out;
+	}
+
+	err = genlmsg_reply(msg, info);
 out:
+	devlink_health_reporter_put(reporter);
+	return err;
+}
+
+static int
+devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
+					  struct netlink_callback *cb)
+{
+	struct devlink_health_reporter *reporter;
+	struct devlink *devlink;
+	int start = cb->args[0];
+	int idx = 0;
+	int err;
+
+	mutex_lock(&devlink_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+			continue;
+		mutex_lock(&devlink->reporters_lock);
+		list_for_each_entry(reporter, &devlink->reporter_list,
+				    list) {
+			if (idx < start) {
+				idx++;
+				continue;
+			}
+			err = devlink_nl_health_reporter_fill(msg, devlink,
+							      reporter,
+							      DEVLINK_CMD_HEALTH_REPORTER_GET,
+							      NETLINK_CB(cb->skb).portid,
+							      cb->nlh->nlmsg_seq,
+							      NLM_F_MULTI);
+			if (err) {
+				mutex_unlock(&devlink->reporters_lock);
+				goto out;
+			}
+			idx++;
+		}
+		mutex_unlock(&devlink->reporters_lock);
+	}
+out:
+	mutex_unlock(&devlink_mutex);
+
+	cb->args[0] = idx;
+	return msg->len;
+}
+
+static int
+devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
+					struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_health_reporter *reporter;
+	int err;
+
+	reporter = devlink_health_reporter_get_from_info(devlink, info);
+	if (!reporter)
+		return -EINVAL;
+
+	if (!reporter->ops->recover &&
+	    (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] ||
+	     info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD])
+		reporter->graceful_period =
+			nla_get_u64(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]);
+
+	if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])
+		reporter->auto_recover =
+			nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]);
+
+	devlink_health_reporter_put(reporter);
+	return 0;
+out:
+	devlink_health_reporter_put(reporter);
+	return err;
+}
+
+static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
+						       struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_health_reporter *reporter;
+	int err;
+
+	reporter = devlink_health_reporter_get_from_info(devlink, info);
+	if (!reporter)
+		return -EINVAL;
+
+	err = devlink_health_reporter_recover(reporter, NULL);
+
+	devlink_health_reporter_put(reporter);
+	return err;
+}
+
+static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
+							struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_health_reporter *reporter;
+	struct devlink_fmsg *fmsg;
+	int err;
+
+	reporter = devlink_health_reporter_get_from_info(devlink, info);
+	if (!reporter)
+		return -EINVAL;
+
+	if (!reporter->ops->diagnose) {
+		devlink_health_reporter_put(reporter);
+		return -EOPNOTSUPP;
+	}
+
+	fmsg = devlink_fmsg_alloc();
+	if (!fmsg) {
+		devlink_health_reporter_put(reporter);
+		return -ENOMEM;
+	}
+
+	err = devlink_fmsg_obj_nest_start(fmsg);
+	if (err)
+		goto out;
+
+	err = reporter->ops->diagnose(reporter, fmsg);
+	if (err)
+		goto out;
+
+	err = devlink_fmsg_obj_nest_end(fmsg);
+	if (err)
+		goto out;
+
+	err = devlink_fmsg_snd(fmsg, info,
+			       DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, 0);
+
+out:
+	devlink_fmsg_free(fmsg);
+	devlink_health_reporter_put(reporter);
+	return err;
+}
+
+static int
+devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb,
+					       struct netlink_callback *cb)
+{
+	struct devlink_health_reporter *reporter;
+	u64 start = cb->args[0];
+	int err;
+
+	reporter = devlink_health_reporter_get_from_cb(cb);
+	if (!reporter)
+		return -EINVAL;
+
+	if (!reporter->ops->dump) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+	mutex_lock(&reporter->dump_lock);
+	if (!start) {
+		err = devlink_health_do_dump(reporter, NULL);
+		if (err)
+			goto unlock;
+		cb->args[1] = reporter->dump_ts;
+	}
+	if (!reporter->dump_fmsg || cb->args[1] != reporter->dump_ts) {
+		NL_SET_ERR_MSG_MOD(cb->extack, "Dump trampled, please retry");
+		err = -EAGAIN;
+		goto unlock;
+	}
+
+	err = devlink_fmsg_dumpit(reporter->dump_fmsg, skb, cb,
+				  DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET);
+unlock:
+	mutex_unlock(&reporter->dump_lock);
+out:
+	devlink_health_reporter_put(reporter);
+	return err;
+}
+
+static int
+devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
+					       struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_health_reporter *reporter;
+
+	reporter = devlink_health_reporter_get_from_info(devlink, info);
+	if (!reporter)
+		return -EINVAL;
+
+	if (!reporter->ops->dump) {
+		devlink_health_reporter_put(reporter);
+		return -EOPNOTSUPP;
+	}
+
+	mutex_lock(&reporter->dump_lock);
+	devlink_health_dump_clear(reporter);
+	mutex_unlock(&reporter->dump_lock);
+	devlink_health_reporter_put(reporter);
+	return 0;
+}
+
+struct devlink_stats {
+	u64 rx_bytes;
+	u64 rx_packets;
+	struct u64_stats_sync syncp;
+};
+
+/**
+ * struct devlink_trap_group_item - Packet trap group attributes.
+ * @group: Immutable packet trap group attributes.
+ * @refcount: Number of trap items using the group.
+ * @list: trap_group_list member.
+ * @stats: Trap group statistics.
+ *
+ * Describes packet trap group attributes. Created by devlink during trap
+ * registration.
+ */
+struct devlink_trap_group_item {
+	const struct devlink_trap_group *group;
+	refcount_t refcount;
+	struct list_head list;
+	struct devlink_stats __percpu *stats;
+};
+
+/**
+ * struct devlink_trap_item - Packet trap attributes.
+ * @trap: Immutable packet trap attributes.
+ * @group_item: Associated group item.
+ * @list: trap_list member.
+ * @action: Trap action.
+ * @stats: Trap statistics.
+ * @priv: Driver private information.
+ *
+ * Describes both mutable and immutable packet trap attributes. Created by
+ * devlink during trap registration and used for all trap related operations.
+ */
+struct devlink_trap_item {
+	const struct devlink_trap *trap;
+	struct devlink_trap_group_item *group_item;
+	struct list_head list;
+	enum devlink_trap_action action;
+	struct devlink_stats __percpu *stats;
+	void *priv;
+};
+
+static struct devlink_trap_item *
+devlink_trap_item_lookup(struct devlink *devlink, const char *name)
+{
+	struct devlink_trap_item *trap_item;
+
+	list_for_each_entry(trap_item, &devlink->trap_list, list) {
+		if (!strcmp(trap_item->trap->name, name))
+			return trap_item;
+	}
+
+	return NULL;
+}
+
+static struct devlink_trap_item *
+devlink_trap_item_get_from_info(struct devlink *devlink,
+				struct genl_info *info)
+{
+	struct nlattr *attr;
+
+	if (!info->attrs[DEVLINK_ATTR_TRAP_NAME])
+		return NULL;
+	attr = info->attrs[DEVLINK_ATTR_TRAP_NAME];
+
+	return devlink_trap_item_lookup(devlink, nla_data(attr));
+}
+
+static int
+devlink_trap_action_get_from_info(struct genl_info *info,
+				  enum devlink_trap_action *p_trap_action)
+{
+	u8 val;
+
+	val = nla_get_u8(info->attrs[DEVLINK_ATTR_TRAP_ACTION]);
+	switch (val) {
+	case DEVLINK_TRAP_ACTION_DROP: /* fall-through */
+	case DEVLINK_TRAP_ACTION_TRAP:
+		*p_trap_action = val;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int devlink_trap_metadata_put(struct sk_buff *msg,
+				     const struct devlink_trap *trap)
+{
+	struct nlattr *attr;
+
+	attr = nla_nest_start(msg, DEVLINK_ATTR_TRAP_METADATA);
+	if (!attr)
+		return -EMSGSIZE;
+
+	if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT) &&
+	    nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, attr);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, attr);
+	return -EMSGSIZE;
+}
+
+static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats,
+				    struct devlink_stats *stats)
+{
+	int i;
+
+	memset(stats, 0, sizeof(*stats));
+	for_each_possible_cpu(i) {
+		struct devlink_stats *cpu_stats;
+		u64 rx_packets, rx_bytes;
+		unsigned int start;
+
+		cpu_stats = per_cpu_ptr(trap_stats, i);
+		do {
+			start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+			rx_packets = cpu_stats->rx_packets;
+			rx_bytes = cpu_stats->rx_bytes;
+		} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
+
+		stats->rx_packets += rx_packets;
+		stats->rx_bytes += rx_bytes;
+	}
+}
+
+static int devlink_trap_stats_put(struct sk_buff *msg,
+				  struct devlink_stats __percpu *trap_stats)
+{
+	struct devlink_stats stats;
+	struct nlattr *attr;
+
+	devlink_trap_stats_read(trap_stats, &stats);
+
+	attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
+	if (!attr)
+		return -EMSGSIZE;
+
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
+			      stats.rx_packets, DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
+			      stats.rx_bytes, DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, attr);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, attr);
+	return -EMSGSIZE;
+}
+
+static int devlink_nl_trap_fill(struct sk_buff *msg, struct devlink *devlink,
+				const struct devlink_trap_item *trap_item,
+				enum devlink_command cmd, u32 portid, u32 seq,
+				int flags)
+{
+	struct devlink_trap_group_item *group_item = trap_item->group_item;
+	void *hdr;
+	int err;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+
+	if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME,
+			   group_item->group->name))
+		goto nla_put_failure;
+
+	if (nla_put_string(msg, DEVLINK_ATTR_TRAP_NAME, trap_item->trap->name))
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_TYPE, trap_item->trap->type))
+		goto nla_put_failure;
+
+	if (trap_item->trap->generic &&
+	    nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC))
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_ACTION, trap_item->action))
+		goto nla_put_failure;
+
+	err = devlink_trap_metadata_put(msg, trap_item->trap);
+	if (err)
+		goto nla_put_failure;
+
+	err = devlink_trap_stats_put(msg, trap_item->stats);
+	if (err)
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_trap_get_doit(struct sk_buff *skb,
+					struct genl_info *info)
+{
+	struct netlink_ext_ack *extack = info->extack;
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_trap_item *trap_item;
+	struct sk_buff *msg;
+	int err;
+
+	if (list_empty(&devlink->trap_list))
+		return -EOPNOTSUPP;
+
+	trap_item = devlink_trap_item_get_from_info(devlink, info);
+	if (!trap_item) {
+		NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap");
+		return -ENOENT;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_trap_fill(msg, devlink, trap_item,
+				   DEVLINK_CMD_TRAP_NEW, info->snd_portid,
+				   info->snd_seq, 0);
+	if (err)
+		goto err_trap_fill;
+
+	return genlmsg_reply(msg, info);
+
+err_trap_fill:
+	nlmsg_free(msg);
+	return err;
+}
+
+static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg,
+					  struct netlink_callback *cb)
+{
+	struct devlink_trap_item *trap_item;
+	struct devlink *devlink;
+	int start = cb->args[0];
+	int idx = 0;
+	int err;
+
+	mutex_lock(&devlink_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+			continue;
+		mutex_lock(&devlink->lock);
+		list_for_each_entry(trap_item, &devlink->trap_list, list) {
+			if (idx < start) {
+				idx++;
+				continue;
+			}
+			err = devlink_nl_trap_fill(msg, devlink, trap_item,
+						   DEVLINK_CMD_TRAP_NEW,
+						   NETLINK_CB(cb->skb).portid,
+						   cb->nlh->nlmsg_seq,
+						   NLM_F_MULTI);
+			if (err) {
+				mutex_unlock(&devlink->lock);
+				goto out;
+			}
+			idx++;
+		}
+		mutex_unlock(&devlink->lock);
+	}
+out:
+	mutex_unlock(&devlink_mutex);
+
+	cb->args[0] = idx;
+	return msg->len;
+}
+
+static int __devlink_trap_action_set(struct devlink *devlink,
+				     struct devlink_trap_item *trap_item,
+				     enum devlink_trap_action trap_action,
+				     struct netlink_ext_ack *extack)
+{
+	int err;
+
+	if (trap_item->action != trap_action &&
+	    trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP) {
+		NL_SET_ERR_MSG_MOD(extack, "Cannot change action of non-drop traps. Skipping");
+		return 0;
+	}
+
+	err = devlink->ops->trap_action_set(devlink, trap_item->trap,
+					    trap_action);
+	if (err)
+		return err;
+
+	trap_item->action = trap_action;
+
+	return 0;
+}
+
+static int devlink_trap_action_set(struct devlink *devlink,
+				   struct devlink_trap_item *trap_item,
+				   struct genl_info *info)
+{
+	enum devlink_trap_action trap_action;
+	int err;
+
+	if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION])
+		return 0;
+
+	err = devlink_trap_action_get_from_info(info, &trap_action);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(info->extack, "Invalid trap action");
+		return -EINVAL;
+	}
+
+	return __devlink_trap_action_set(devlink, trap_item, trap_action,
+					 info->extack);
+}
+
+static int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb,
+					struct genl_info *info)
+{
+	struct netlink_ext_ack *extack = info->extack;
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_trap_item *trap_item;
+	int err;
+
+	if (list_empty(&devlink->trap_list))
+		return -EOPNOTSUPP;
+
+	trap_item = devlink_trap_item_get_from_info(devlink, info);
+	if (!trap_item) {
+		NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap");
+		return -ENOENT;
+	}
+
+	err = devlink_trap_action_set(devlink, trap_item, info);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_lookup(struct devlink *devlink, const char *name)
+{
+	struct devlink_trap_group_item *group_item;
+
+	list_for_each_entry(group_item, &devlink->trap_group_list, list) {
+		if (!strcmp(group_item->group->name, name))
+			return group_item;
+	}
+
+	return NULL;
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_get_from_info(struct devlink *devlink,
+				      struct genl_info *info)
+{
+	char *name;
+
+	if (!info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME])
+		return NULL;
+	name = nla_data(info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME]);
+
+	return devlink_trap_group_item_lookup(devlink, name);
+}
+
+static int
+devlink_nl_trap_group_fill(struct sk_buff *msg, struct devlink *devlink,
+			   const struct devlink_trap_group_item *group_item,
+			   enum devlink_command cmd, u32 portid, u32 seq,
+			   int flags)
+{
+	void *hdr;
+	int err;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+
+	if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME,
+			   group_item->group->name))
+		goto nla_put_failure;
+
+	if (group_item->group->generic &&
+	    nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC))
+		goto nla_put_failure;
+
+	err = devlink_trap_stats_put(msg, group_item->stats);
+	if (err)
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_trap_group_get_doit(struct sk_buff *skb,
+					      struct genl_info *info)
+{
+	struct netlink_ext_ack *extack = info->extack;
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_trap_group_item *group_item;
+	struct sk_buff *msg;
+	int err;
+
+	if (list_empty(&devlink->trap_group_list))
+		return -EOPNOTSUPP;
+
+	group_item = devlink_trap_group_item_get_from_info(devlink, info);
+	if (!group_item) {
+		NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap group");
+		return -ENOENT;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_trap_group_fill(msg, devlink, group_item,
+					 DEVLINK_CMD_TRAP_GROUP_NEW,
+					 info->snd_portid, info->snd_seq, 0);
+	if (err)
+		goto err_trap_group_fill;
+
+	return genlmsg_reply(msg, info);
+
+err_trap_group_fill:
+	nlmsg_free(msg);
+	return err;
+}
+
+static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg,
+						struct netlink_callback *cb)
+{
+	enum devlink_command cmd = DEVLINK_CMD_TRAP_GROUP_NEW;
+	struct devlink_trap_group_item *group_item;
+	u32 portid = NETLINK_CB(cb->skb).portid;
+	struct devlink *devlink;
+	int start = cb->args[0];
+	int idx = 0;
+	int err;
+
+	mutex_lock(&devlink_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+			continue;
+		mutex_lock(&devlink->lock);
+		list_for_each_entry(group_item, &devlink->trap_group_list,
+				    list) {
+			if (idx < start) {
+				idx++;
+				continue;
+			}
+			err = devlink_nl_trap_group_fill(msg, devlink,
+							 group_item, cmd,
+							 portid,
+							 cb->nlh->nlmsg_seq,
+							 NLM_F_MULTI);
+			if (err) {
+				mutex_unlock(&devlink->lock);
+				goto out;
+			}
+			idx++;
+		}
+		mutex_unlock(&devlink->lock);
+	}
+out:
+	mutex_unlock(&devlink_mutex);
+
+	cb->args[0] = idx;
+	return msg->len;
+}
+
+static int
+__devlink_trap_group_action_set(struct devlink *devlink,
+				struct devlink_trap_group_item *group_item,
+				enum devlink_trap_action trap_action,
+				struct netlink_ext_ack *extack)
+{
+	const char *group_name = group_item->group->name;
+	struct devlink_trap_item *trap_item;
+	int err;
+
+	list_for_each_entry(trap_item, &devlink->trap_list, list) {
+		if (strcmp(trap_item->trap->group.name, group_name))
+			continue;
+		err = __devlink_trap_action_set(devlink, trap_item,
+						trap_action, extack);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int
+devlink_trap_group_action_set(struct devlink *devlink,
+			      struct devlink_trap_group_item *group_item,
+			      struct genl_info *info)
+{
+	enum devlink_trap_action trap_action;
+	int err;
+
+	if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION])
+		return 0;
+
+	err = devlink_trap_action_get_from_info(info, &trap_action);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(info->extack, "Invalid trap action");
+		return -EINVAL;
+	}
+
+	err = __devlink_trap_group_action_set(devlink, group_item, trap_action,
+					      info->extack);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb,
+					      struct genl_info *info)
+{
+	struct netlink_ext_ack *extack = info->extack;
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_trap_group_item *group_item;
+	int err;
+
+	if (list_empty(&devlink->trap_group_list))
+		return -EOPNOTSUPP;
+
+	group_item = devlink_trap_group_item_get_from_info(devlink, info);
+	if (!group_item) {
+		NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap group");
+		return -ENOENT;
+	}
+
+	err = devlink_trap_group_action_set(devlink, group_item, info);
+	if (err)
+		return err;
+
 	return 0;
 }
 
@@ -3600,223 +5791,336 @@
 	[DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
 	[DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
+	[DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING },
+	[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 },
+	[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
+	[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING },
+	[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING },
+	[DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING },
+	[DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 },
+	[DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
 	{
 		.cmd = DEVLINK_CMD_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_get_doit,
 		.dumpit = devlink_nl_cmd_get_dumpit,
-		.policy = devlink_nl_policy,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = DEVLINK_CMD_PORT_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_port_get_doit,
 		.dumpit = devlink_nl_cmd_port_get_dumpit,
-		.policy = devlink_nl_policy,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
 		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = DEVLINK_CMD_PORT_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_port_set_doit,
-		.policy = devlink_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
 	},
 	{
 		.cmd = DEVLINK_CMD_PORT_SPLIT,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_port_split_doit,
-		.policy = devlink_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
 				  DEVLINK_NL_FLAG_NO_LOCK,
 	},
 	{
 		.cmd = DEVLINK_CMD_PORT_UNSPLIT,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_port_unsplit_doit,
-		.policy = devlink_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
 				  DEVLINK_NL_FLAG_NO_LOCK,
 	},
 	{
 		.cmd = DEVLINK_CMD_SB_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_sb_get_doit,
 		.dumpit = devlink_nl_cmd_sb_get_dumpit,
-		.policy = devlink_nl_policy,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
 				  DEVLINK_NL_FLAG_NEED_SB,
 		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = DEVLINK_CMD_SB_POOL_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_sb_pool_get_doit,
 		.dumpit = devlink_nl_cmd_sb_pool_get_dumpit,
-		.policy = devlink_nl_policy,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
 				  DEVLINK_NL_FLAG_NEED_SB,
 		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = DEVLINK_CMD_SB_POOL_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_sb_pool_set_doit,
-		.policy = devlink_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
 				  DEVLINK_NL_FLAG_NEED_SB,
 	},
 	{
 		.cmd = DEVLINK_CMD_SB_PORT_POOL_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_sb_port_pool_get_doit,
 		.dumpit = devlink_nl_cmd_sb_port_pool_get_dumpit,
-		.policy = devlink_nl_policy,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
 				  DEVLINK_NL_FLAG_NEED_SB,
 		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = DEVLINK_CMD_SB_PORT_POOL_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_sb_port_pool_set_doit,
-		.policy = devlink_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
 				  DEVLINK_NL_FLAG_NEED_SB,
 	},
 	{
 		.cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit,
 		.dumpit = devlink_nl_cmd_sb_tc_pool_bind_get_dumpit,
-		.policy = devlink_nl_policy,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
 				  DEVLINK_NL_FLAG_NEED_SB,
 		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit,
-		.policy = devlink_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
 				  DEVLINK_NL_FLAG_NEED_SB,
 	},
 	{
 		.cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_sb_occ_snapshot_doit,
-		.policy = devlink_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
 				  DEVLINK_NL_FLAG_NEED_SB,
 	},
 	{
 		.cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_sb_occ_max_clear_doit,
-		.policy = devlink_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
 				  DEVLINK_NL_FLAG_NEED_SB,
 	},
 	{
 		.cmd = DEVLINK_CMD_ESWITCH_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_eswitch_get_doit,
-		.policy = devlink_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 	},
 	{
 		.cmd = DEVLINK_CMD_ESWITCH_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_eswitch_set_doit,
-		.policy = devlink_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
 				  DEVLINK_NL_FLAG_NO_LOCK,
 	},
 	{
 		.cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_dpipe_table_get,
-		.policy = devlink_nl_policy,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_dpipe_entries_get,
-		.policy = devlink_nl_policy,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_dpipe_headers_get,
-		.policy = devlink_nl_policy,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_dpipe_table_counters_set,
-		.policy = devlink_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 	},
 	{
 		.cmd = DEVLINK_CMD_RESOURCE_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_resource_set,
-		.policy = devlink_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 	},
 	{
 		.cmd = DEVLINK_CMD_RESOURCE_DUMP,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_resource_dump,
-		.policy = devlink_nl_policy,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = DEVLINK_CMD_RELOAD,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_reload,
-		.policy = devlink_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
 				  DEVLINK_NL_FLAG_NO_LOCK,
 	},
 	{
 		.cmd = DEVLINK_CMD_PARAM_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_param_get_doit,
 		.dumpit = devlink_nl_cmd_param_get_dumpit,
-		.policy = devlink_nl_policy,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = DEVLINK_CMD_PARAM_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_param_set_doit,
-		.policy = devlink_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 	},
 	{
+		.cmd = DEVLINK_CMD_PORT_PARAM_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_port_param_get_doit,
+		.dumpit = devlink_nl_cmd_port_param_get_dumpit,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+		/* can be retrieved by unprivileged users */
+	},
+	{
+		.cmd = DEVLINK_CMD_PORT_PARAM_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_port_param_set_doit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+	},
+	{
 		.cmd = DEVLINK_CMD_REGION_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_region_get_doit,
 		.dumpit = devlink_nl_cmd_region_get_dumpit,
-		.policy = devlink_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 	},
 	{
 		.cmd = DEVLINK_CMD_REGION_DEL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_region_del,
-		.policy = devlink_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 	},
 	{
 		.cmd = DEVLINK_CMD_REGION_READ,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.dumpit = devlink_nl_cmd_region_read_dumpit,
-		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
+	{
+		.cmd = DEVLINK_CMD_INFO_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_info_get_doit,
+		.dumpit = devlink_nl_cmd_info_get_dumpit,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+		/* can be retrieved by unprivileged users */
+	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_health_reporter_get_doit,
+		.dumpit = devlink_nl_cmd_health_reporter_get_dumpit,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+				  DEVLINK_NL_FLAG_NO_LOCK,
+		/* can be retrieved by unprivileged users */
+	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_health_reporter_set_doit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+				  DEVLINK_NL_FLAG_NO_LOCK,
+	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_health_reporter_recover_doit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+				  DEVLINK_NL_FLAG_NO_LOCK,
+	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_health_reporter_diagnose_doit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+				  DEVLINK_NL_FLAG_NO_LOCK,
+	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.dumpit = devlink_nl_cmd_health_reporter_dump_get_dumpit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+				  DEVLINK_NL_FLAG_NO_LOCK,
+	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_health_reporter_dump_clear_doit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+				  DEVLINK_NL_FLAG_NO_LOCK,
+	},
+	{
+		.cmd = DEVLINK_CMD_FLASH_UPDATE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_flash_update,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
+	{
+		.cmd = DEVLINK_CMD_TRAP_GET,
+		.doit = devlink_nl_cmd_trap_get_doit,
+		.dumpit = devlink_nl_cmd_trap_get_dumpit,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+		/* can be retrieved by unprivileged users */
+	},
+	{
+		.cmd = DEVLINK_CMD_TRAP_SET,
+		.doit = devlink_nl_cmd_trap_set_doit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
+	{
+		.cmd = DEVLINK_CMD_TRAP_GROUP_GET,
+		.doit = devlink_nl_cmd_trap_group_get_doit,
+		.dumpit = devlink_nl_cmd_trap_group_get_dumpit,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+		/* can be retrieved by unprivileged users */
+	},
+	{
+		.cmd = DEVLINK_CMD_TRAP_GROUP_SET,
+		.doit = devlink_nl_cmd_trap_group_set_doit,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 	},
@@ -3826,6 +6130,7 @@
 	.name		= DEVLINK_GENL_NAME,
 	.version	= DEVLINK_GENL_VERSION,
 	.maxattr	= DEVLINK_ATTR_MAX,
+	.policy = devlink_nl_policy,
 	.netnsok	= true,
 	.pre_doit	= devlink_nl_pre_doit,
 	.post_doit	= devlink_nl_post_doit,
@@ -3849,6 +6154,9 @@
 {
 	struct devlink *devlink;
 
+	if (WARN_ON(!ops))
+		return NULL;
+
 	devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL);
 	if (!devlink)
 		return NULL;
@@ -3860,7 +6168,11 @@
 	INIT_LIST_HEAD(&devlink->resource_list);
 	INIT_LIST_HEAD(&devlink->param_list);
 	INIT_LIST_HEAD(&devlink->region_list);
+	INIT_LIST_HEAD(&devlink->reporter_list);
+	INIT_LIST_HEAD(&devlink->trap_list);
+	INIT_LIST_HEAD(&devlink->trap_group_list);
 	mutex_init(&devlink->lock);
+	mutex_init(&devlink->reporters_lock);
 	return devlink;
 }
 EXPORT_SYMBOL_GPL(devlink_alloc);
@@ -3869,6 +6181,7 @@
  *	devlink_register - Register devlink instance
  *
  *	@devlink: devlink
+ *	@dev: parent device
  */
 int devlink_register(struct devlink *devlink, struct device *dev)
 {
@@ -3889,6 +6202,8 @@
 void devlink_unregister(struct devlink *devlink)
 {
 	mutex_lock(&devlink_mutex);
+	WARN_ON(devlink_reload_supported(devlink) &&
+		devlink->reload_enabled);
 	devlink_notify(devlink, DEVLINK_CMD_DEL);
 	list_del(&devlink->list);
 	mutex_unlock(&devlink_mutex);
@@ -3896,22 +6211,101 @@
 EXPORT_SYMBOL_GPL(devlink_unregister);
 
 /**
+ *	devlink_reload_enable - Enable reload of devlink instance
+ *
+ *	@devlink: devlink
+ *
+ *	Should be called at end of device initialization
+ *	process when reload operation is supported.
+ */
+void devlink_reload_enable(struct devlink *devlink)
+{
+	mutex_lock(&devlink_mutex);
+	devlink->reload_enabled = true;
+	mutex_unlock(&devlink_mutex);
+}
+EXPORT_SYMBOL_GPL(devlink_reload_enable);
+
+/**
+ *	devlink_reload_disable - Disable reload of devlink instance
+ *
+ *	@devlink: devlink
+ *
+ *	Should be called at the beginning of device cleanup
+ *	process when reload operation is supported.
+ */
+void devlink_reload_disable(struct devlink *devlink)
+{
+	mutex_lock(&devlink_mutex);
+	/* Mutex is taken which ensures that no reload operation is in
+	 * progress while setting up forbidded flag.
+	 */
+	devlink->reload_enabled = false;
+	mutex_unlock(&devlink_mutex);
+}
+EXPORT_SYMBOL_GPL(devlink_reload_disable);
+
+/**
  *	devlink_free - Free devlink instance resources
  *
  *	@devlink: devlink
  */
 void devlink_free(struct devlink *devlink)
 {
+	mutex_destroy(&devlink->reporters_lock);
+	mutex_destroy(&devlink->lock);
+	WARN_ON(!list_empty(&devlink->trap_group_list));
+	WARN_ON(!list_empty(&devlink->trap_list));
+	WARN_ON(!list_empty(&devlink->reporter_list));
+	WARN_ON(!list_empty(&devlink->region_list));
+	WARN_ON(!list_empty(&devlink->param_list));
+	WARN_ON(!list_empty(&devlink->resource_list));
+	WARN_ON(!list_empty(&devlink->dpipe_table_list));
+	WARN_ON(!list_empty(&devlink->sb_list));
+	WARN_ON(!list_empty(&devlink->port_list));
+
 	kfree(devlink);
 }
 EXPORT_SYMBOL_GPL(devlink_free);
 
+static void devlink_port_type_warn(struct work_struct *work)
+{
+	WARN(true, "Type was not set for devlink port.");
+}
+
+static bool devlink_port_type_should_warn(struct devlink_port *devlink_port)
+{
+	/* Ignore CPU and DSA flavours. */
+	return devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_CPU &&
+	       devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_DSA;
+}
+
+#define DEVLINK_PORT_TYPE_WARN_TIMEOUT (HZ * 30)
+
+static void devlink_port_type_warn_schedule(struct devlink_port *devlink_port)
+{
+	if (!devlink_port_type_should_warn(devlink_port))
+		return;
+	/* Schedule a work to WARN in case driver does not set port
+	 * type within timeout.
+	 */
+	schedule_delayed_work(&devlink_port->type_warn_dw,
+			      DEVLINK_PORT_TYPE_WARN_TIMEOUT);
+}
+
+static void devlink_port_type_warn_cancel(struct devlink_port *devlink_port)
+{
+	if (!devlink_port_type_should_warn(devlink_port))
+		return;
+	cancel_delayed_work_sync(&devlink_port->type_warn_dw);
+}
+
 /**
  *	devlink_port_register - Register devlink port
  *
  *	@devlink: devlink
  *	@devlink_port: devlink port
- *	@port_index
+ *	@port_index: driver-specific numerical identifier of the port
  *
  *	Register devlink port with provided port index. User can use
  *	any indexing, even hw-related one. devlink_port structure
@@ -3931,8 +6325,12 @@
 	devlink_port->devlink = devlink;
 	devlink_port->index = port_index;
 	devlink_port->registered = true;
+	spin_lock_init(&devlink_port->type_lock);
 	list_add_tail(&devlink_port->list, &devlink->port_list);
+	INIT_LIST_HEAD(&devlink_port->param_list);
 	mutex_unlock(&devlink->lock);
+	INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn);
+	devlink_port_type_warn_schedule(devlink_port);
 	devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
 	return 0;
 }
@@ -3947,6 +6345,7 @@
 {
 	struct devlink *devlink = devlink_port->devlink;
 
+	devlink_port_type_warn_cancel(devlink_port);
 	devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
 	mutex_lock(&devlink->lock);
 	list_del(&devlink_port->list);
@@ -3958,8 +6357,13 @@
 				    enum devlink_port_type type,
 				    void *type_dev)
 {
+	if (WARN_ON(!devlink_port->registered))
+		return;
+	devlink_port_type_warn_cancel(devlink_port);
+	spin_lock_bh(&devlink_port->type_lock);
 	devlink_port->type = type;
 	devlink_port->type_dev = type_dev;
+	spin_unlock_bh(&devlink_port->type_lock);
 	devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
 }
 
@@ -3972,8 +6376,39 @@
 void devlink_port_type_eth_set(struct devlink_port *devlink_port,
 			       struct net_device *netdev)
 {
-	return __devlink_port_type_set(devlink_port,
-				       DEVLINK_PORT_TYPE_ETH, netdev);
+	const struct net_device_ops *ops = netdev->netdev_ops;
+
+	/* If driver registers devlink port, it should set devlink port
+	 * attributes accordingly so the compat functions are called
+	 * and the original ops are not used.
+	 */
+	if (ops->ndo_get_phys_port_name) {
+		/* Some drivers use the same set of ndos for netdevs
+		 * that have devlink_port registered and also for
+		 * those who don't. Make sure that ndo_get_phys_port_name
+		 * returns -EOPNOTSUPP here in case it is defined.
+		 * Warn if not.
+		 */
+		char name[IFNAMSIZ];
+		int err;
+
+		err = ops->ndo_get_phys_port_name(netdev, name, sizeof(name));
+		WARN_ON(err != -EOPNOTSUPP);
+	}
+	if (ops->ndo_get_port_parent_id) {
+		/* Some drivers use the same set of ndos for netdevs
+		 * that have devlink_port registered and also for
+		 * those who don't. Make sure that ndo_get_port_parent_id
+		 * returns -EOPNOTSUPP here in case it is defined.
+		 * Warn if not.
+		 */
+		struct netdev_phys_item_id ppid;
+		int err;
+
+		err = ops->ndo_get_port_parent_id(netdev, &ppid);
+		WARN_ON(err != -EOPNOTSUPP);
+	}
+	__devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, netdev);
 }
 EXPORT_SYMBOL_GPL(devlink_port_type_eth_set);
 
@@ -3986,8 +6421,7 @@
 void devlink_port_type_ib_set(struct devlink_port *devlink_port,
 			      struct ib_device *ibdev)
 {
-	return __devlink_port_type_set(devlink_port,
-				       DEVLINK_PORT_TYPE_IB, ibdev);
+	__devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_IB, ibdev);
 }
 EXPORT_SYMBOL_GPL(devlink_port_type_ib_set);
 
@@ -3998,11 +6432,34 @@
  */
 void devlink_port_type_clear(struct devlink_port *devlink_port)
 {
-	return __devlink_port_type_set(devlink_port,
-				       DEVLINK_PORT_TYPE_NOTSET, NULL);
+	__devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, NULL);
+	devlink_port_type_warn_schedule(devlink_port);
 }
 EXPORT_SYMBOL_GPL(devlink_port_type_clear);
 
+static int __devlink_port_attrs_set(struct devlink_port *devlink_port,
+				    enum devlink_port_flavour flavour,
+				    const unsigned char *switch_id,
+				    unsigned char switch_id_len)
+{
+	struct devlink_port_attrs *attrs = &devlink_port->attrs;
+
+	if (WARN_ON(devlink_port->registered))
+		return -EEXIST;
+	attrs->set = true;
+	attrs->flavour = flavour;
+	if (switch_id) {
+		attrs->switch_port = true;
+		if (WARN_ON(switch_id_len > MAX_PHYS_ITEM_ID_LEN))
+			switch_id_len = MAX_PHYS_ITEM_ID_LEN;
+		memcpy(attrs->switch_id.id, switch_id, switch_id_len);
+		attrs->switch_id.id_len = switch_id_len;
+	} else {
+		attrs->switch_port = false;
+	}
+	return 0;
+}
+
 /**
  *	devlink_port_attrs_set - Set port attributes
  *
@@ -4013,25 +6470,86 @@
  *	@split: indicates if this is split port
  *	@split_subport_number: if the port is split, this is the number
  *	                       of subport.
+ *	@switch_id: if the port is part of switch, this is buffer with ID,
+ *	            otwerwise this is NULL
+ *	@switch_id_len: length of the switch_id buffer
  */
 void devlink_port_attrs_set(struct devlink_port *devlink_port,
 			    enum devlink_port_flavour flavour,
 			    u32 port_number, bool split,
-			    u32 split_subport_number)
+			    u32 split_subport_number,
+			    const unsigned char *switch_id,
+			    unsigned char switch_id_len)
 {
 	struct devlink_port_attrs *attrs = &devlink_port->attrs;
+	int ret;
 
-	attrs->set = true;
-	attrs->flavour = flavour;
-	attrs->port_number = port_number;
+	ret = __devlink_port_attrs_set(devlink_port, flavour,
+				       switch_id, switch_id_len);
+	if (ret)
+		return;
 	attrs->split = split;
-	attrs->split_subport_number = split_subport_number;
-	devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+	attrs->phys.port_number = port_number;
+	attrs->phys.split_subport_number = split_subport_number;
 }
 EXPORT_SYMBOL_GPL(devlink_port_attrs_set);
 
-int devlink_port_get_phys_port_name(struct devlink_port *devlink_port,
-				    char *name, size_t len)
+/**
+ *	devlink_port_attrs_pci_pf_set - Set PCI PF port attributes
+ *
+ *	@devlink_port: devlink port
+ *	@pf: associated PF for the devlink port instance
+ *	@switch_id: if the port is part of switch, this is buffer with ID,
+ *	            otherwise this is NULL
+ *	@switch_id_len: length of the switch_id buffer
+ */
+void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port,
+				   const unsigned char *switch_id,
+				   unsigned char switch_id_len, u16 pf)
+{
+	struct devlink_port_attrs *attrs = &devlink_port->attrs;
+	int ret;
+
+	ret = __devlink_port_attrs_set(devlink_port,
+				       DEVLINK_PORT_FLAVOUR_PCI_PF,
+				       switch_id, switch_id_len);
+	if (ret)
+		return;
+
+	attrs->pci_pf.pf = pf;
+}
+EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_pf_set);
+
+/**
+ *	devlink_port_attrs_pci_vf_set - Set PCI VF port attributes
+ *
+ *	@devlink_port: devlink port
+ *	@pf: associated PF for the devlink port instance
+ *	@vf: associated VF of a PF for the devlink port instance
+ *	@switch_id: if the port is part of switch, this is buffer with ID,
+ *	            otherwise this is NULL
+ *	@switch_id_len: length of the switch_id buffer
+ */
+void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port,
+				   const unsigned char *switch_id,
+				   unsigned char switch_id_len,
+				   u16 pf, u16 vf)
+{
+	struct devlink_port_attrs *attrs = &devlink_port->attrs;
+	int ret;
+
+	ret = __devlink_port_attrs_set(devlink_port,
+				       DEVLINK_PORT_FLAVOUR_PCI_VF,
+				       switch_id, switch_id_len);
+	if (ret)
+		return;
+	attrs->pci_vf.pf = pf;
+	attrs->pci_vf.vf = vf;
+}
+EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_vf_set);
+
+static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
+					     char *name, size_t len)
 {
 	struct devlink_port_attrs *attrs = &devlink_port->attrs;
 	int n = 0;
@@ -4042,10 +6560,11 @@
 	switch (attrs->flavour) {
 	case DEVLINK_PORT_FLAVOUR_PHYSICAL:
 		if (!attrs->split)
-			n = snprintf(name, len, "p%u", attrs->port_number);
+			n = snprintf(name, len, "p%u", attrs->phys.port_number);
 		else
-			n = snprintf(name, len, "p%us%u", attrs->port_number,
-				     attrs->split_subport_number);
+			n = snprintf(name, len, "p%us%u",
+				     attrs->phys.port_number,
+				     attrs->phys.split_subport_number);
 		break;
 	case DEVLINK_PORT_FLAVOUR_CPU:
 	case DEVLINK_PORT_FLAVOUR_DSA:
@@ -4054,6 +6573,13 @@
 		 */
 		WARN_ON(1);
 		return -EINVAL;
+	case DEVLINK_PORT_FLAVOUR_PCI_PF:
+		n = snprintf(name, len, "pf%u", attrs->pci_pf.pf);
+		break;
+	case DEVLINK_PORT_FLAVOUR_PCI_VF:
+		n = snprintf(name, len, "pf%uvf%u",
+			     attrs->pci_vf.pf, attrs->pci_vf.vf);
+		break;
 	}
 
 	if (n >= len)
@@ -4061,7 +6587,6 @@
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(devlink_port_get_phys_port_name);
 
 int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
 			u32 size, u16 ingress_pools_count,
@@ -4240,13 +6765,10 @@
  *
  *	@devlink: devlink
  *	@resource_name: resource's name
- *	@top_hierarchy: top hierarchy
- *	@reload_required: reload is required for new configuration to
- *			  apply
  *	@resource_size: resource's size
  *	@resource_id: resource's id
- *	@parent_reosurce_id: resource's parent id
- *	@size params: size parameters
+ *	@parent_resource_id: resource's parent id
+ *	@size_params: size parameters
  */
 int devlink_resource_register(struct devlink *devlink,
 			      const char *resource_name,
@@ -4449,6 +6971,71 @@
 }
 EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister);
 
+static int devlink_param_verify(const struct devlink_param *param)
+{
+	if (!param || !param->name || !param->supported_cmodes)
+		return -EINVAL;
+	if (param->generic)
+		return devlink_param_generic_verify(param);
+	else
+		return devlink_param_driver_verify(param);
+}
+
+static int __devlink_params_register(struct devlink *devlink,
+				     unsigned int port_index,
+				     struct list_head *param_list,
+				     const struct devlink_param *params,
+				     size_t params_count,
+				     enum devlink_command reg_cmd,
+				     enum devlink_command unreg_cmd)
+{
+	const struct devlink_param *param = params;
+	int i;
+	int err;
+
+	mutex_lock(&devlink->lock);
+	for (i = 0; i < params_count; i++, param++) {
+		err = devlink_param_verify(param);
+		if (err)
+			goto rollback;
+
+		err = devlink_param_register_one(devlink, port_index,
+						 param_list, param, reg_cmd);
+		if (err)
+			goto rollback;
+	}
+
+	mutex_unlock(&devlink->lock);
+	return 0;
+
+rollback:
+	if (!i)
+		goto unlock;
+	for (param--; i > 0; i--, param--)
+		devlink_param_unregister_one(devlink, port_index, param_list,
+					     param, unreg_cmd);
+unlock:
+	mutex_unlock(&devlink->lock);
+	return err;
+}
+
+static void __devlink_params_unregister(struct devlink *devlink,
+					unsigned int port_index,
+					struct list_head *param_list,
+					const struct devlink_param *params,
+					size_t params_count,
+					enum devlink_command cmd)
+{
+	const struct devlink_param *param = params;
+	int i;
+
+	mutex_lock(&devlink->lock);
+	for (i = 0; i < params_count; i++, param++)
+		devlink_param_unregister_one(devlink, 0, param_list, param,
+					     cmd);
+	mutex_unlock(&devlink->lock);
+}
+
 /**
  *	devlink_params_register - register configuration parameters
  *
@@ -4462,41 +7049,10 @@
 			    const struct devlink_param *params,
 			    size_t params_count)
 {
-	const struct devlink_param *param = params;
-	int i;
-	int err;
-
-	mutex_lock(&devlink->lock);
-	for (i = 0; i < params_count; i++, param++) {
-		if (!param || !param->name || !param->supported_cmodes) {
-			err = -EINVAL;
-			goto rollback;
-		}
-		if (param->generic) {
-			err = devlink_param_generic_verify(param);
-			if (err)
-				goto rollback;
-		} else {
-			err = devlink_param_driver_verify(param);
-			if (err)
-				goto rollback;
-		}
-		err = devlink_param_register_one(devlink, param);
-		if (err)
-			goto rollback;
-	}
-
-	mutex_unlock(&devlink->lock);
-	return 0;
-
-rollback:
-	if (!i)
-		goto unlock;
-	for (param--; i > 0; i--, param--)
-		devlink_param_unregister_one(devlink, param);
-unlock:
-	mutex_unlock(&devlink->lock);
-	return err;
+	return __devlink_params_register(devlink, 0, &devlink->param_list,
+					 params, params_count,
+					 DEVLINK_CMD_PARAM_NEW,
+					 DEVLINK_CMD_PARAM_DEL);
 }
 EXPORT_SYMBOL_GPL(devlink_params_register);
 
@@ -4510,17 +7066,147 @@
 			       const struct devlink_param *params,
 			       size_t params_count)
 {
-	const struct devlink_param *param = params;
-	int i;
-
-	mutex_lock(&devlink->lock);
-	for (i = 0; i < params_count; i++, param++)
-		devlink_param_unregister_one(devlink, param);
-	mutex_unlock(&devlink->lock);
+	return __devlink_params_unregister(devlink, 0, &devlink->param_list,
+					   params, params_count,
+					   DEVLINK_CMD_PARAM_DEL);
 }
 EXPORT_SYMBOL_GPL(devlink_params_unregister);
 
 /**
+ *	devlink_params_publish - publish configuration parameters
+ *
+ *	@devlink: devlink
+ *
+ *	Publish previously registered configuration parameters.
+ */
+void devlink_params_publish(struct devlink *devlink)
+{
+	struct devlink_param_item *param_item;
+
+	list_for_each_entry(param_item, &devlink->param_list, list) {
+		if (param_item->published)
+			continue;
+		param_item->published = true;
+		devlink_param_notify(devlink, 0, param_item,
+				     DEVLINK_CMD_PARAM_NEW);
+	}
+}
+EXPORT_SYMBOL_GPL(devlink_params_publish);
+
+/**
+ *	devlink_params_unpublish - unpublish configuration parameters
+ *
+ *	@devlink: devlink
+ *
+ *	Unpublish previously registered configuration parameters.
+ */
+void devlink_params_unpublish(struct devlink *devlink)
+{
+	struct devlink_param_item *param_item;
+
+	list_for_each_entry(param_item, &devlink->param_list, list) {
+		if (!param_item->published)
+			continue;
+		param_item->published = false;
+		devlink_param_notify(devlink, 0, param_item,
+				     DEVLINK_CMD_PARAM_DEL);
+	}
+}
+EXPORT_SYMBOL_GPL(devlink_params_unpublish);
+
+/**
+ *	devlink_port_params_register - register port configuration parameters
+ *
+ *	@devlink_port: devlink port
+ *	@params: configuration parameters array
+ *	@params_count: number of parameters provided
+ *
+ *	Register the configuration parameters supported by the port.
+ */
+int devlink_port_params_register(struct devlink_port *devlink_port,
+				 const struct devlink_param *params,
+				 size_t params_count)
+{
+	return __devlink_params_register(devlink_port->devlink,
+					 devlink_port->index,
+					 &devlink_port->param_list, params,
+					 params_count,
+					 DEVLINK_CMD_PORT_PARAM_NEW,
+					 DEVLINK_CMD_PORT_PARAM_DEL);
+}
+EXPORT_SYMBOL_GPL(devlink_port_params_register);
+
+/**
+ *	devlink_port_params_unregister - unregister port configuration
+ *	parameters
+ *
+ *	@devlink_port: devlink port
+ *	@params: configuration parameters array
+ *	@params_count: number of parameters provided
+ */
+void devlink_port_params_unregister(struct devlink_port *devlink_port,
+				    const struct devlink_param *params,
+				    size_t params_count)
+{
+	return __devlink_params_unregister(devlink_port->devlink,
+					   devlink_port->index,
+					   &devlink_port->param_list,
+					   params, params_count,
+					   DEVLINK_CMD_PORT_PARAM_DEL);
+}
+EXPORT_SYMBOL_GPL(devlink_port_params_unregister);
+
+static int
+__devlink_param_driverinit_value_get(struct list_head *param_list, u32 param_id,
+				     union devlink_param_value *init_val)
+{
+	struct devlink_param_item *param_item;
+
+	param_item = devlink_param_find_by_id(param_list, param_id);
+	if (!param_item)
+		return -EINVAL;
+
+	if (!param_item->driverinit_value_valid ||
+	    !devlink_param_cmode_is_supported(param_item->param,
+					      DEVLINK_PARAM_CMODE_DRIVERINIT))
+		return -EOPNOTSUPP;
+
+	if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING)
+		strcpy(init_val->vstr, param_item->driverinit_value.vstr);
+	else
+		*init_val = param_item->driverinit_value;
+
+	return 0;
+}
+
+static int
+__devlink_param_driverinit_value_set(struct devlink *devlink,
+				     unsigned int port_index,
+				     struct list_head *param_list, u32 param_id,
+				     union devlink_param_value init_val,
+				     enum devlink_command cmd)
+{
+	struct devlink_param_item *param_item;
+
+	param_item = devlink_param_find_by_id(param_list, param_id);
+	if (!param_item)
+		return -EINVAL;
+
+	if (!devlink_param_cmode_is_supported(param_item->param,
+					      DEVLINK_PARAM_CMODE_DRIVERINIT))
+		return -EOPNOTSUPP;
+
+	if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING)
+		strcpy(param_item->driverinit_value.vstr, init_val.vstr);
+	else
+		param_item->driverinit_value = init_val;
+	param_item->driverinit_value_valid = true;
+
+	devlink_param_notify(devlink, port_index, param_item, cmd);
+	return 0;
+}
+
+/**
  *	devlink_param_driverinit_value_get - get configuration parameter
  *					     value for driver initializing
  *
@@ -4534,26 +7220,11 @@
 int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
 				       union devlink_param_value *init_val)
 {
-	struct devlink_param_item *param_item;
-
-	if (!devlink->ops || !devlink->ops->reload)
+	if (!devlink_reload_supported(devlink))
 		return -EOPNOTSUPP;
 
-	param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
-	if (!param_item)
-		return -EINVAL;
-
-	if (!param_item->driverinit_value_valid ||
-	    !devlink_param_cmode_is_supported(param_item->param,
-					      DEVLINK_PARAM_CMODE_DRIVERINIT))
-		return -EOPNOTSUPP;
-
-	if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING)
-		strcpy(init_val->vstr, param_item->driverinit_value.vstr);
-	else
-		*init_val = param_item->driverinit_value;
-
-	return 0;
+	return __devlink_param_driverinit_value_get(&devlink->param_list,
+						    param_id, init_val);
 }
 EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get);
 
@@ -4572,28 +7243,63 @@
 int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
 				       union devlink_param_value init_val)
 {
-	struct devlink_param_item *param_item;
-
-	param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
-	if (!param_item)
-		return -EINVAL;
-
-	if (!devlink_param_cmode_is_supported(param_item->param,
-					      DEVLINK_PARAM_CMODE_DRIVERINIT))
-		return -EOPNOTSUPP;
-
-	if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING)
-		strcpy(param_item->driverinit_value.vstr, init_val.vstr);
-	else
-		param_item->driverinit_value = init_val;
-	param_item->driverinit_value_valid = true;
-
-	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
-	return 0;
+	return __devlink_param_driverinit_value_set(devlink, 0,
+						    &devlink->param_list,
+						    param_id, init_val,
+						    DEVLINK_CMD_PARAM_NEW);
 }
 EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set);
 
 /**
+ *	devlink_port_param_driverinit_value_get - get configuration parameter
+ *						value for driver initializing
+ *
+ *	@devlink_port: devlink_port
+ *	@param_id: parameter ID
+ *	@init_val: value of parameter in driverinit configuration mode
+ *
+ *	This function should be used by the driver to get driverinit
+ *	configuration for initialization after reload command.
+ */
+int devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port,
+					    u32 param_id,
+					    union devlink_param_value *init_val)
+{
+	struct devlink *devlink = devlink_port->devlink;
+
+	if (!devlink_reload_supported(devlink))
+		return -EOPNOTSUPP;
+
+	return __devlink_param_driverinit_value_get(&devlink_port->param_list,
+						    param_id, init_val);
+}
+EXPORT_SYMBOL_GPL(devlink_port_param_driverinit_value_get);
+
+/**
+ *     devlink_port_param_driverinit_value_set - set value of configuration
+ *                                               parameter for driverinit
+ *                                               configuration mode
+ *
+ *     @devlink_port: devlink_port
+ *     @param_id: parameter ID
+ *     @init_val: value of parameter to set for driverinit configuration mode
+ *
+ *     This function should be used by the driver to set driverinit
+ *     configuration mode default value.
+ */
+int devlink_port_param_driverinit_value_set(struct devlink_port *devlink_port,
+					    u32 param_id,
+					    union devlink_param_value init_val)
+{
+	return __devlink_param_driverinit_value_set(devlink_port->devlink,
+						    devlink_port->index,
+						    &devlink_port->param_list,
+						    param_id, init_val,
+						    DEVLINK_CMD_PORT_PARAM_NEW);
+}
+EXPORT_SYMBOL_GPL(devlink_port_param_driverinit_value_set);
+
+/**
  *	devlink_param_value_changed - notify devlink on a parameter's value
  *				      change. Should be called by the driver
  *				      right after the change.
@@ -4604,7 +7310,6 @@
  *	This function should be used by the driver to notify devlink on value
  *	change, excluding driverinit configuration mode.
  *	For driverinit configuration mode driver should use the function
- *	devlink_param_driverinit_value_set() instead.
  */
 void devlink_param_value_changed(struct devlink *devlink, u32 param_id)
 {
@@ -4613,11 +7318,38 @@
 	param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
 	WARN_ON(!param_item);
 
-	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
+	devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
 }
 EXPORT_SYMBOL_GPL(devlink_param_value_changed);
 
 /**
+ *     devlink_port_param_value_changed - notify devlink on a parameter's value
+ *                                      change. Should be called by the driver
+ *                                      right after the change.
+ *
+ *     @devlink_port: devlink_port
+ *     @param_id: parameter ID
+ *
+ *     This function should be used by the driver to notify devlink on value
+ *     change, excluding driverinit configuration mode.
+ *     For driverinit configuration mode driver should use the function
+ *     devlink_port_param_driverinit_value_set() instead.
+ */
+void devlink_port_param_value_changed(struct devlink_port *devlink_port,
+				      u32 param_id)
+{
+	struct devlink_param_item *param_item;
+
+	param_item = devlink_param_find_by_id(&devlink_port->param_list,
+					      param_id);
+	WARN_ON(!param_item);
+
+	devlink_param_notify(devlink_port->devlink, devlink_port->index,
+			     param_item, DEVLINK_CMD_PORT_PARAM_NEW);
+}
+EXPORT_SYMBOL_GPL(devlink_port_param_value_changed);
+
+/**
  *	devlink_param_value_str_fill - Safely fill-up the string preventing
  *				       from overflow of the preallocated buffer
  *
@@ -4694,7 +7426,7 @@
 
 	/* Free all snapshots of region */
 	list_for_each_entry_safe(snapshot, ts, &region->snapshot_list, list)
-		devlink_region_snapshot_del(snapshot);
+		devlink_region_snapshot_del(region, snapshot);
 
 	list_del(&region->list);
 
@@ -4733,13 +7465,12 @@
  *	Multiple snapshots can be created on a region.
  *	The @snapshot_id should be obtained using the getter function.
  *
- *	@devlink_region: devlink region of the snapshot
- *	@data_len: size of snapshot data
+ *	@region: devlink region of the snapshot
  *	@data: snapshot data
  *	@snapshot_id: snapshot id to be created
  *	@data_destructor: pointer to destructor function to free data
  */
-int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
+int devlink_region_snapshot_create(struct devlink_region *region,
 				   u8 *data, u32 snapshot_id,
 				   devlink_snapshot_data_dest_t *data_destructor)
 {
@@ -4769,7 +7500,6 @@
 	snapshot->id = snapshot_id;
 	snapshot->region = region;
 	snapshot->data = data;
-	snapshot->data_len = data_len;
 	snapshot->data_destructor = data_destructor;
 
 	list_add_tail(&snapshot->list, &region->snapshot_list);
@@ -4786,20 +7516,596 @@
 }
 EXPORT_SYMBOL_GPL(devlink_region_snapshot_create);
 
-static int __init devlink_module_init(void)
+#define DEVLINK_TRAP(_id, _type)					      \
+	{								      \
+		.type = DEVLINK_TRAP_TYPE_##_type,			      \
+		.id = DEVLINK_TRAP_GENERIC_ID_##_id,			      \
+		.name = DEVLINK_TRAP_GENERIC_NAME_##_id,		      \
+	}
+
+static const struct devlink_trap devlink_trap_generic[] = {
+	DEVLINK_TRAP(SMAC_MC, DROP),
+	DEVLINK_TRAP(VLAN_TAG_MISMATCH, DROP),
+	DEVLINK_TRAP(INGRESS_VLAN_FILTER, DROP),
+	DEVLINK_TRAP(INGRESS_STP_FILTER, DROP),
+	DEVLINK_TRAP(EMPTY_TX_LIST, DROP),
+	DEVLINK_TRAP(PORT_LOOPBACK_FILTER, DROP),
+	DEVLINK_TRAP(BLACKHOLE_ROUTE, DROP),
+	DEVLINK_TRAP(TTL_ERROR, EXCEPTION),
+	DEVLINK_TRAP(TAIL_DROP, DROP),
+};
+
+#define DEVLINK_TRAP_GROUP(_id)						      \
+	{								      \
+		.id = DEVLINK_TRAP_GROUP_GENERIC_ID_##_id,		      \
+		.name = DEVLINK_TRAP_GROUP_GENERIC_NAME_##_id,		      \
+	}
+
+static const struct devlink_trap_group devlink_trap_group_generic[] = {
+	DEVLINK_TRAP_GROUP(L2_DROPS),
+	DEVLINK_TRAP_GROUP(L3_DROPS),
+	DEVLINK_TRAP_GROUP(BUFFER_DROPS),
+};
+
+static int devlink_trap_generic_verify(const struct devlink_trap *trap)
+{
+	if (trap->id > DEVLINK_TRAP_GENERIC_ID_MAX)
+		return -EINVAL;
+
+	if (strcmp(trap->name, devlink_trap_generic[trap->id].name))
+		return -EINVAL;
+
+	if (trap->type != devlink_trap_generic[trap->id].type)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int devlink_trap_driver_verify(const struct devlink_trap *trap)
+{
+	int i;
+
+	if (trap->id <= DEVLINK_TRAP_GENERIC_ID_MAX)
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(devlink_trap_generic); i++) {
+		if (!strcmp(trap->name, devlink_trap_generic[i].name))
+			return -EEXIST;
+	}
+
+	return 0;
+}
+
+static int devlink_trap_verify(const struct devlink_trap *trap)
+{
+	if (!trap || !trap->name || !trap->group.name)
+		return -EINVAL;
+
+	if (trap->generic)
+		return devlink_trap_generic_verify(trap);
+	else
+		return devlink_trap_driver_verify(trap);
+}
+
+static int
+devlink_trap_group_generic_verify(const struct devlink_trap_group *group)
+{
+	if (group->id > DEVLINK_TRAP_GROUP_GENERIC_ID_MAX)
+		return -EINVAL;
+
+	if (strcmp(group->name, devlink_trap_group_generic[group->id].name))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int
+devlink_trap_group_driver_verify(const struct devlink_trap_group *group)
+{
+	int i;
+
+	if (group->id <= DEVLINK_TRAP_GROUP_GENERIC_ID_MAX)
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(devlink_trap_group_generic); i++) {
+		if (!strcmp(group->name, devlink_trap_group_generic[i].name))
+			return -EEXIST;
+	}
+
+	return 0;
+}
+
+static int devlink_trap_group_verify(const struct devlink_trap_group *group)
+{
+	if (group->generic)
+		return devlink_trap_group_generic_verify(group);
+	else
+		return devlink_trap_group_driver_verify(group);
+}
+
+static void
+devlink_trap_group_notify(struct devlink *devlink,
+			  const struct devlink_trap_group_item *group_item,
+			  enum devlink_command cmd)
+{
+	struct sk_buff *msg;
+	int err;
+
+	WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW &&
+		     cmd != DEVLINK_CMD_TRAP_GROUP_DEL);
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	err = devlink_nl_trap_group_fill(msg, devlink, group_item, cmd, 0, 0,
+					 0);
+	if (err) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_create(struct devlink *devlink,
+			       const struct devlink_trap_group *group)
+{
+	struct devlink_trap_group_item *group_item;
+	int err;
+
+	err = devlink_trap_group_verify(group);
+	if (err)
+		return ERR_PTR(err);
+
+	group_item = kzalloc(sizeof(*group_item), GFP_KERNEL);
+	if (!group_item)
+		return ERR_PTR(-ENOMEM);
+
+	group_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats);
+	if (!group_item->stats) {
+		err = -ENOMEM;
+		goto err_stats_alloc;
+	}
+
+	group_item->group = group;
+	refcount_set(&group_item->refcount, 1);
+
+	if (devlink->ops->trap_group_init) {
+		err = devlink->ops->trap_group_init(devlink, group);
+		if (err)
+			goto err_group_init;
+	}
+
+	list_add_tail(&group_item->list, &devlink->trap_group_list);
+	devlink_trap_group_notify(devlink, group_item,
+				  DEVLINK_CMD_TRAP_GROUP_NEW);
+
+	return group_item;
+
+err_group_init:
+	free_percpu(group_item->stats);
+err_stats_alloc:
+	kfree(group_item);
+	return ERR_PTR(err);
+}
+
+static void
+devlink_trap_group_item_destroy(struct devlink *devlink,
+				struct devlink_trap_group_item *group_item)
+{
+	devlink_trap_group_notify(devlink, group_item,
+				  DEVLINK_CMD_TRAP_GROUP_DEL);
+	list_del(&group_item->list);
+	free_percpu(group_item->stats);
+	kfree(group_item);
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_get(struct devlink *devlink,
+			    const struct devlink_trap_group *group)
+{
+	struct devlink_trap_group_item *group_item;
+
+	group_item = devlink_trap_group_item_lookup(devlink, group->name);
+	if (group_item) {
+		refcount_inc(&group_item->refcount);
+		return group_item;
+	}
+
+	return devlink_trap_group_item_create(devlink, group);
+}
+
+static void
+devlink_trap_group_item_put(struct devlink *devlink,
+			    struct devlink_trap_group_item *group_item)
+{
+	if (!refcount_dec_and_test(&group_item->refcount))
+		return;
+
+	devlink_trap_group_item_destroy(devlink, group_item);
+}
+
+static int
+devlink_trap_item_group_link(struct devlink *devlink,
+			     struct devlink_trap_item *trap_item)
+{
+	struct devlink_trap_group_item *group_item;
+
+	group_item = devlink_trap_group_item_get(devlink,
+						 &trap_item->trap->group);
+	if (IS_ERR(group_item))
+		return PTR_ERR(group_item);
+
+	trap_item->group_item = group_item;
+
+	return 0;
+}
+
+static void
+devlink_trap_item_group_unlink(struct devlink *devlink,
+			       struct devlink_trap_item *trap_item)
+{
+	devlink_trap_group_item_put(devlink, trap_item->group_item);
+}
+
+static void devlink_trap_notify(struct devlink *devlink,
+				const struct devlink_trap_item *trap_item,
+				enum devlink_command cmd)
+{
+	struct sk_buff *msg;
+	int err;
+
+	WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW &&
+		     cmd != DEVLINK_CMD_TRAP_DEL);
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	err = devlink_nl_trap_fill(msg, devlink, trap_item, cmd, 0, 0, 0);
+	if (err) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static int
+devlink_trap_register(struct devlink *devlink,
+		      const struct devlink_trap *trap, void *priv)
+{
+	struct devlink_trap_item *trap_item;
+	int err;
+
+	if (devlink_trap_item_lookup(devlink, trap->name))
+		return -EEXIST;
+
+	trap_item = kzalloc(sizeof(*trap_item), GFP_KERNEL);
+	if (!trap_item)
+		return -ENOMEM;
+
+	trap_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats);
+	if (!trap_item->stats) {
+		err = -ENOMEM;
+		goto err_stats_alloc;
+	}
+
+	trap_item->trap = trap;
+	trap_item->action = trap->init_action;
+	trap_item->priv = priv;
+
+	err = devlink_trap_item_group_link(devlink, trap_item);
+	if (err)
+		goto err_group_link;
+
+	err = devlink->ops->trap_init(devlink, trap, trap_item);
+	if (err)
+		goto err_trap_init;
+
+	list_add_tail(&trap_item->list, &devlink->trap_list);
+	devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW);
+
+	return 0;
+
+err_trap_init:
+	devlink_trap_item_group_unlink(devlink, trap_item);
+err_group_link:
+	free_percpu(trap_item->stats);
+err_stats_alloc:
+	kfree(trap_item);
+	return err;
+}
+
+static void devlink_trap_unregister(struct devlink *devlink,
+				    const struct devlink_trap *trap)
+{
+	struct devlink_trap_item *trap_item;
+
+	trap_item = devlink_trap_item_lookup(devlink, trap->name);
+	if (WARN_ON_ONCE(!trap_item))
+		return;
+
+	devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL);
+	list_del(&trap_item->list);
+	if (devlink->ops->trap_fini)
+		devlink->ops->trap_fini(devlink, trap, trap_item);
+	devlink_trap_item_group_unlink(devlink, trap_item);
+	free_percpu(trap_item->stats);
+	kfree(trap_item);
+}
+
+static void devlink_trap_disable(struct devlink *devlink,
+				 const struct devlink_trap *trap)
+{
+	struct devlink_trap_item *trap_item;
+
+	trap_item = devlink_trap_item_lookup(devlink, trap->name);
+	if (WARN_ON_ONCE(!trap_item))
+		return;
+
+	devlink->ops->trap_action_set(devlink, trap, DEVLINK_TRAP_ACTION_DROP);
+	trap_item->action = DEVLINK_TRAP_ACTION_DROP;
+}
+
+/**
+ * devlink_traps_register - Register packet traps with devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ * @priv: Driver private information.
+ *
+ * Return: Non-zero value on failure.
+ */
+int devlink_traps_register(struct devlink *devlink,
+			   const struct devlink_trap *traps,
+			   size_t traps_count, void *priv)
+{
+	int i, err;
+
+	if (!devlink->ops->trap_init || !devlink->ops->trap_action_set)
+		return -EINVAL;
+
+	mutex_lock(&devlink->lock);
+	for (i = 0; i < traps_count; i++) {
+		const struct devlink_trap *trap = &traps[i];
+
+		err = devlink_trap_verify(trap);
+		if (err)
+			goto err_trap_verify;
+
+		err = devlink_trap_register(devlink, trap, priv);
+		if (err)
+			goto err_trap_register;
+	}
+	mutex_unlock(&devlink->lock);
+
+	return 0;
+
+err_trap_register:
+err_trap_verify:
+	for (i--; i >= 0; i--)
+		devlink_trap_unregister(devlink, &traps[i]);
+	mutex_unlock(&devlink->lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(devlink_traps_register);
+
+/**
+ * devlink_traps_unregister - Unregister packet traps from devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ */
+void devlink_traps_unregister(struct devlink *devlink,
+			      const struct devlink_trap *traps,
+			      size_t traps_count)
+{
+	int i;
+
+	mutex_lock(&devlink->lock);
+	/* Make sure we do not have any packets in-flight while unregistering
+	 * traps by disabling all of them and waiting for a grace period.
+	 */
+	for (i = traps_count - 1; i >= 0; i--)
+		devlink_trap_disable(devlink, &traps[i]);
+	synchronize_rcu();
+	for (i = traps_count - 1; i >= 0; i--)
+		devlink_trap_unregister(devlink, &traps[i]);
+	mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_traps_unregister);
+
+static void
+devlink_trap_stats_update(struct devlink_stats __percpu *trap_stats,
+			  size_t skb_len)
+{
+	struct devlink_stats *stats;
+
+	stats = this_cpu_ptr(trap_stats);
+	u64_stats_update_begin(&stats->syncp);
+	stats->rx_bytes += skb_len;
+	stats->rx_packets++;
+	u64_stats_update_end(&stats->syncp);
+}
+
+static void
+devlink_trap_report_metadata_fill(struct net_dm_hw_metadata *hw_metadata,
+				  const struct devlink_trap_item *trap_item,
+				  struct devlink_port *in_devlink_port)
+{
+	struct devlink_trap_group_item *group_item = trap_item->group_item;
+
+	hw_metadata->trap_group_name = group_item->group->name;
+	hw_metadata->trap_name = trap_item->trap->name;
+
+	spin_lock(&in_devlink_port->type_lock);
+	if (in_devlink_port->type == DEVLINK_PORT_TYPE_ETH)
+		hw_metadata->input_dev = in_devlink_port->type_dev;
+	spin_unlock(&in_devlink_port->type_lock);
+}
+
+/**
+ * devlink_trap_report - Report trapped packet to drop monitor.
+ * @devlink: devlink.
+ * @skb: Trapped packet.
+ * @trap_ctx: Trap context.
+ * @in_devlink_port: Input devlink port.
+ */
+void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb,
+			 void *trap_ctx, struct devlink_port *in_devlink_port)
+{
+	struct devlink_trap_item *trap_item = trap_ctx;
+	struct net_dm_hw_metadata hw_metadata = {};
+
+	devlink_trap_stats_update(trap_item->stats, skb->len);
+	devlink_trap_stats_update(trap_item->group_item->stats, skb->len);
+
+	devlink_trap_report_metadata_fill(&hw_metadata, trap_item,
+					  in_devlink_port);
+	net_dm_hw_report(skb, &hw_metadata);
+}
+EXPORT_SYMBOL_GPL(devlink_trap_report);
+
+/**
+ * devlink_trap_ctx_priv - Trap context to driver private information.
+ * @trap_ctx: Trap context.
+ *
+ * Return: Driver private information passed during registration.
+ */
+void *devlink_trap_ctx_priv(void *trap_ctx)
+{
+	struct devlink_trap_item *trap_item = trap_ctx;
+
+	return trap_item->priv;
+}
+EXPORT_SYMBOL_GPL(devlink_trap_ctx_priv);
+
+static void __devlink_compat_running_version(struct devlink *devlink,
+					     char *buf, size_t len)
+{
+	const struct nlattr *nlattr;
+	struct devlink_info_req req;
+	struct sk_buff *msg;
+	int rem, err;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	req.msg = msg;
+	err = devlink->ops->info_get(devlink, &req, NULL);
+	if (err)
+		goto free_msg;
+
+	nla_for_each_attr(nlattr, (void *)msg->data, msg->len, rem) {
+		const struct nlattr *kv;
+		int rem_kv;
+
+		if (nla_type(nlattr) != DEVLINK_ATTR_INFO_VERSION_RUNNING)
+			continue;
+
+		nla_for_each_nested(kv, nlattr, rem_kv) {
+			if (nla_type(kv) != DEVLINK_ATTR_INFO_VERSION_VALUE)
+				continue;
+
+			strlcat(buf, nla_data(kv), len);
+			strlcat(buf, " ", len);
+		}
+	}
+free_msg:
+	nlmsg_free(msg);
+}
+
+void devlink_compat_running_version(struct net_device *dev,
+				    char *buf, size_t len)
+{
+	struct devlink *devlink;
+
+	dev_hold(dev);
+	rtnl_unlock();
+
+	devlink = netdev_to_devlink(dev);
+	if (!devlink || !devlink->ops->info_get)
+		goto out;
+
+	mutex_lock(&devlink->lock);
+	__devlink_compat_running_version(devlink, buf, len);
+	mutex_unlock(&devlink->lock);
+
+out:
+	rtnl_lock();
+	dev_put(dev);
+}
+
+int devlink_compat_flash_update(struct net_device *dev, const char *file_name)
+{
+	struct devlink *devlink;
+	int ret;
+
+	dev_hold(dev);
+	rtnl_unlock();
+
+	devlink = netdev_to_devlink(dev);
+	if (!devlink || !devlink->ops->flash_update) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	mutex_lock(&devlink->lock);
+	ret = devlink->ops->flash_update(devlink, file_name, NULL, NULL);
+	mutex_unlock(&devlink->lock);
+
+out:
+	rtnl_lock();
+	dev_put(dev);
+
+	return ret;
+}
+
+int devlink_compat_phys_port_name_get(struct net_device *dev,
+				      char *name, size_t len)
+{
+	struct devlink_port *devlink_port;
+
+	/* RTNL mutex is held here which ensures that devlink_port
+	 * instance cannot disappear in the middle. No need to take
+	 * any devlink lock as only permanent values are accessed.
+	 */
+	ASSERT_RTNL();
+
+	devlink_port = netdev_to_devlink_port(dev);
+	if (!devlink_port)
+		return -EOPNOTSUPP;
+
+	return __devlink_port_phys_port_name_get(devlink_port, name, len);
+}
+
+int devlink_compat_switch_id_get(struct net_device *dev,
+				 struct netdev_phys_item_id *ppid)
+{
+	struct devlink_port *devlink_port;
+
+	/* Caller must hold RTNL mutex or reference to dev, which ensures that
+	 * devlink_port instance cannot disappear in the middle. No need to take
+	 * any devlink lock as only permanent values are accessed.
+	 */
+	devlink_port = netdev_to_devlink_port(dev);
+	if (!devlink_port || !devlink_port->attrs.switch_port)
+		return -EOPNOTSUPP;
+
+	memcpy(ppid, &devlink_port->attrs.switch_id, sizeof(*ppid));
+
+	return 0;
+}
+
+static int __init devlink_init(void)
 {
 	return genl_register_family(&devlink_nl_family);
 }
 
-static void __exit devlink_module_exit(void)
-{
-	genl_unregister_family(&devlink_nl_family);
-}
-
-module_init(devlink_module_init);
-module_exit(devlink_module_exit);
-
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
-MODULE_DESCRIPTION("Network physical device Netlink interface");
-MODULE_ALIAS_GENL_FAMILY(DEVLINK_GENL_NAME);
+subsys_initcall(devlink_init);

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index c7785ef..536e032 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Monitoring code for network dropped packet alerts
  *
@@ -25,6 +26,7 @@
 #include <linux/bitops.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <net/drop_monitor.h>
 #include <net/genetlink.h>
 #include <net/netevent.h>
 
@@ -42,13 +44,44 @@
  * netlink alerts
  */
 static int trace_state = TRACE_OFF;
-static DEFINE_MUTEX(trace_state_mutex);
+static bool monitor_hw;
+
+/* net_dm_mutex
+ *
+ * An overall lock guarding every operation coming from userspace.
+ * It also guards the global 'hw_stats_list' list.
+ */
+static DEFINE_MUTEX(net_dm_mutex);
+
+struct net_dm_stats {
+	u64 dropped;
+	struct u64_stats_sync syncp;
+};
+
+#define NET_DM_MAX_HW_TRAP_NAME_LEN 40
+
+struct net_dm_hw_entry {
+	char trap_name[NET_DM_MAX_HW_TRAP_NAME_LEN];
+	u32 count;
+};
+
+struct net_dm_hw_entries {
+	u32 num_entries;
+	struct net_dm_hw_entry entries[0];
+};
 
 struct per_cpu_dm_data {
-	spinlock_t		lock;
-	struct sk_buff		*skb;
+	spinlock_t		lock;	/* Protects 'skb', 'hw_entries' and
+					 * 'send_timer'
+					 */
+	union {
+		struct sk_buff			*skb;
+		struct net_dm_hw_entries	*hw_entries;
+	};
+	struct sk_buff_head	drop_queue;
 	struct work_struct	dm_alert_work;
 	struct timer_list	send_timer;
+	struct net_dm_stats	stats;
 };
 
 struct dm_hw_stat_delta {
@@ -62,12 +95,37 @@
 static struct genl_family net_drop_monitor_family;
 
 static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
+static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_hw_cpu_data);
 
 static int dm_hit_limit = 64;
 static int dm_delay = 1;
 static unsigned long dm_hw_check_delta = 2*HZ;
 static LIST_HEAD(hw_stats_list);
 
+static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
+static u32 net_dm_trunc_len;
+static u32 net_dm_queue_len = 1000;
+
+struct net_dm_alert_ops {
+	void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb,
+				void *location);
+	void (*napi_poll_probe)(void *ignore, struct napi_struct *napi,
+				int work, int budget);
+	void (*work_item_func)(struct work_struct *work);
+	void (*hw_work_item_func)(struct work_struct *work);
+	void (*hw_probe)(struct sk_buff *skb,
+			 const struct net_dm_hw_metadata *hw_metadata);
+};
+
+struct net_dm_skb_cb {
+	union {
+		struct net_dm_hw_metadata *hw_metadata;
+		void *pc;
+	};
+};
+
+#define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0]))
+
 static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
 {
 	size_t al;
@@ -234,48 +292,844 @@
 	rcu_read_unlock();
 }
 
-static int set_all_monitor_traces(int state)
+static struct net_dm_hw_entries *
+net_dm_hw_reset_per_cpu_data(struct per_cpu_dm_data *hw_data)
+{
+	struct net_dm_hw_entries *hw_entries;
+	unsigned long flags;
+
+	hw_entries = kzalloc(struct_size(hw_entries, entries, dm_hit_limit),
+			     GFP_KERNEL);
+	if (!hw_entries) {
+		/* If the memory allocation failed, we try to perform another
+		 * allocation in 1/10 second. Otherwise, the probe function
+		 * will constantly bail out.
+		 */
+		mod_timer(&hw_data->send_timer, jiffies + HZ / 10);
+	}
+
+	spin_lock_irqsave(&hw_data->lock, flags);
+	swap(hw_data->hw_entries, hw_entries);
+	spin_unlock_irqrestore(&hw_data->lock, flags);
+
+	return hw_entries;
+}
+
+static int net_dm_hw_entry_put(struct sk_buff *msg,
+			       const struct net_dm_hw_entry *hw_entry)
+{
+	struct nlattr *attr;
+
+	attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRY);
+	if (!attr)
+		return -EMSGSIZE;
+
+	if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME, hw_entry->trap_name))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, NET_DM_ATTR_HW_TRAP_COUNT, hw_entry->count))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, attr);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, attr);
+	return -EMSGSIZE;
+}
+
+static int net_dm_hw_entries_put(struct sk_buff *msg,
+				 const struct net_dm_hw_entries *hw_entries)
+{
+	struct nlattr *attr;
+	int i;
+
+	attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRIES);
+	if (!attr)
+		return -EMSGSIZE;
+
+	for (i = 0; i < hw_entries->num_entries; i++) {
+		int rc;
+
+		rc = net_dm_hw_entry_put(msg, &hw_entries->entries[i]);
+		if (rc)
+			goto nla_put_failure;
+	}
+
+	nla_nest_end(msg, attr);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, attr);
+	return -EMSGSIZE;
+}
+
+static int
+net_dm_hw_summary_report_fill(struct sk_buff *msg,
+			      const struct net_dm_hw_entries *hw_entries)
+{
+	struct net_dm_alert_msg anc_hdr = { 0 };
+	void *hdr;
+	int rc;
+
+	hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
+			  NET_DM_CMD_ALERT);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	/* We need to put the ancillary header in order not to break user
+	 * space.
+	 */
+	if (nla_put(msg, NLA_UNSPEC, sizeof(anc_hdr), &anc_hdr))
+		goto nla_put_failure;
+
+	rc = net_dm_hw_entries_put(msg, hw_entries);
+	if (rc)
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static void net_dm_hw_summary_work(struct work_struct *work)
+{
+	struct net_dm_hw_entries *hw_entries;
+	struct per_cpu_dm_data *hw_data;
+	struct sk_buff *msg;
+	int rc;
+
+	hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
+
+	hw_entries = net_dm_hw_reset_per_cpu_data(hw_data);
+	if (!hw_entries)
+		return;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		goto out;
+
+	rc = net_dm_hw_summary_report_fill(msg, hw_entries);
+	if (rc) {
+		nlmsg_free(msg);
+		goto out;
+	}
+
+	genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
+
+out:
+	kfree(hw_entries);
+}
+
+static void
+net_dm_hw_summary_probe(struct sk_buff *skb,
+			const struct net_dm_hw_metadata *hw_metadata)
+{
+	struct net_dm_hw_entries *hw_entries;
+	struct net_dm_hw_entry *hw_entry;
+	struct per_cpu_dm_data *hw_data;
+	unsigned long flags;
+	int i;
+
+	hw_data = this_cpu_ptr(&dm_hw_cpu_data);
+	spin_lock_irqsave(&hw_data->lock, flags);
+	hw_entries = hw_data->hw_entries;
+
+	if (!hw_entries)
+		goto out;
+
+	for (i = 0; i < hw_entries->num_entries; i++) {
+		hw_entry = &hw_entries->entries[i];
+		if (!strncmp(hw_entry->trap_name, hw_metadata->trap_name,
+			     NET_DM_MAX_HW_TRAP_NAME_LEN - 1)) {
+			hw_entry->count++;
+			goto out;
+		}
+	}
+	if (WARN_ON_ONCE(hw_entries->num_entries == dm_hit_limit))
+		goto out;
+
+	hw_entry = &hw_entries->entries[hw_entries->num_entries];
+	strlcpy(hw_entry->trap_name, hw_metadata->trap_name,
+		NET_DM_MAX_HW_TRAP_NAME_LEN - 1);
+	hw_entry->count = 1;
+	hw_entries->num_entries++;
+
+	if (!timer_pending(&hw_data->send_timer)) {
+		hw_data->send_timer.expires = jiffies + dm_delay * HZ;
+		add_timer(&hw_data->send_timer);
+	}
+
+out:
+	spin_unlock_irqrestore(&hw_data->lock, flags);
+}
+
+static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
+	.kfree_skb_probe	= trace_kfree_skb_hit,
+	.napi_poll_probe	= trace_napi_poll_hit,
+	.work_item_func		= send_dm_alert,
+	.hw_work_item_func	= net_dm_hw_summary_work,
+	.hw_probe		= net_dm_hw_summary_probe,
+};
+
+static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
+					      struct sk_buff *skb,
+					      void *location)
+{
+	ktime_t tstamp = ktime_get_real();
+	struct per_cpu_dm_data *data;
+	struct sk_buff *nskb;
+	unsigned long flags;
+
+	if (!skb_mac_header_was_set(skb))
+		return;
+
+	nskb = skb_clone(skb, GFP_ATOMIC);
+	if (!nskb)
+		return;
+
+	NET_DM_SKB_CB(nskb)->pc = location;
+	/* Override the timestamp because we care about the time when the
+	 * packet was dropped.
+	 */
+	nskb->tstamp = tstamp;
+
+	data = this_cpu_ptr(&dm_cpu_data);
+
+	spin_lock_irqsave(&data->drop_queue.lock, flags);
+	if (skb_queue_len(&data->drop_queue) < net_dm_queue_len)
+		__skb_queue_tail(&data->drop_queue, nskb);
+	else
+		goto unlock_free;
+	spin_unlock_irqrestore(&data->drop_queue.lock, flags);
+
+	schedule_work(&data->dm_alert_work);
+
+	return;
+
+unlock_free:
+	spin_unlock_irqrestore(&data->drop_queue.lock, flags);
+	u64_stats_update_begin(&data->stats.syncp);
+	data->stats.dropped++;
+	u64_stats_update_end(&data->stats.syncp);
+	consume_skb(nskb);
+}
+
+static void net_dm_packet_trace_napi_poll_hit(void *ignore,
+					      struct napi_struct *napi,
+					      int work, int budget)
+{
+}
+
+static size_t net_dm_in_port_size(void)
+{
+	       /* NET_DM_ATTR_IN_PORT nest */
+	return nla_total_size(0) +
+	       /* NET_DM_ATTR_PORT_NETDEV_IFINDEX */
+	       nla_total_size(sizeof(u32)) +
+	       /* NET_DM_ATTR_PORT_NETDEV_NAME */
+	       nla_total_size(IFNAMSIZ + 1);
+}
+
+#define NET_DM_MAX_SYMBOL_LEN 40
+
+static size_t net_dm_packet_report_size(size_t payload_len)
+{
+	size_t size;
+
+	size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize);
+
+	return NLMSG_ALIGN(size) +
+	       /* NET_DM_ATTR_ORIGIN */
+	       nla_total_size(sizeof(u16)) +
+	       /* NET_DM_ATTR_PC */
+	       nla_total_size(sizeof(u64)) +
+	       /* NET_DM_ATTR_SYMBOL */
+	       nla_total_size(NET_DM_MAX_SYMBOL_LEN + 1) +
+	       /* NET_DM_ATTR_IN_PORT */
+	       net_dm_in_port_size() +
+	       /* NET_DM_ATTR_TIMESTAMP */
+	       nla_total_size(sizeof(u64)) +
+	       /* NET_DM_ATTR_ORIG_LEN */
+	       nla_total_size(sizeof(u32)) +
+	       /* NET_DM_ATTR_PROTO */
+	       nla_total_size(sizeof(u16)) +
+	       /* NET_DM_ATTR_PAYLOAD */
+	       nla_total_size(payload_len);
+}
+
+static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex,
+					    const char *name)
+{
+	struct nlattr *attr;
+
+	attr = nla_nest_start(msg, NET_DM_ATTR_IN_PORT);
+	if (!attr)
+		return -EMSGSIZE;
+
+	if (ifindex &&
+	    nla_put_u32(msg, NET_DM_ATTR_PORT_NETDEV_IFINDEX, ifindex))
+		goto nla_put_failure;
+
+	if (name && nla_put_string(msg, NET_DM_ATTR_PORT_NETDEV_NAME, name))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, attr);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, attr);
+	return -EMSGSIZE;
+}
+
+static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
+				     size_t payload_len)
+{
+	u64 pc = (u64)(uintptr_t) NET_DM_SKB_CB(skb)->pc;
+	char buf[NET_DM_MAX_SYMBOL_LEN];
+	struct nlattr *attr;
+	void *hdr;
+	int rc;
+
+	hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
+			  NET_DM_CMD_PACKET_ALERT);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_SW))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, pc, NET_DM_ATTR_PAD))
+		goto nla_put_failure;
+
+	snprintf(buf, sizeof(buf), "%pS", NET_DM_SKB_CB(skb)->pc);
+	if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf))
+		goto nla_put_failure;
+
+	rc = net_dm_packet_report_in_port_put(msg, skb->skb_iif, NULL);
+	if (rc)
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP,
+			      ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len))
+		goto nla_put_failure;
+
+	if (!payload_len)
+		goto out;
+
+	if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol)))
+		goto nla_put_failure;
+
+	attr = skb_put(msg, nla_total_size(payload_len));
+	attr->nla_type = NET_DM_ATTR_PAYLOAD;
+	attr->nla_len = nla_attr_size(payload_len);
+	if (skb_copy_bits(skb, 0, nla_data(attr), payload_len))
+		goto nla_put_failure;
+
+out:
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+#define NET_DM_MAX_PACKET_SIZE (0xffff - NLA_HDRLEN - NLA_ALIGNTO)
+
+static void net_dm_packet_report(struct sk_buff *skb)
+{
+	struct sk_buff *msg;
+	size_t payload_len;
+	int rc;
+
+	/* Make sure we start copying the packet from the MAC header */
+	if (skb->data > skb_mac_header(skb))
+		skb_push(skb, skb->data - skb_mac_header(skb));
+	else
+		skb_pull(skb, skb_mac_header(skb) - skb->data);
+
+	/* Ensure packet fits inside a single netlink attribute */
+	payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE);
+	if (net_dm_trunc_len)
+		payload_len = min_t(size_t, net_dm_trunc_len, payload_len);
+
+	msg = nlmsg_new(net_dm_packet_report_size(payload_len), GFP_KERNEL);
+	if (!msg)
+		goto out;
+
+	rc = net_dm_packet_report_fill(msg, skb, payload_len);
+	if (rc) {
+		nlmsg_free(msg);
+		goto out;
+	}
+
+	genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
+
+out:
+	consume_skb(skb);
+}
+
+static void net_dm_packet_work(struct work_struct *work)
+{
+	struct per_cpu_dm_data *data;
+	struct sk_buff_head list;
+	struct sk_buff *skb;
+	unsigned long flags;
+
+	data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
+
+	__skb_queue_head_init(&list);
+
+	spin_lock_irqsave(&data->drop_queue.lock, flags);
+	skb_queue_splice_tail_init(&data->drop_queue, &list);
+	spin_unlock_irqrestore(&data->drop_queue.lock, flags);
+
+	while ((skb = __skb_dequeue(&list)))
+		net_dm_packet_report(skb);
+}
+
+static size_t
+net_dm_hw_packet_report_size(size_t payload_len,
+			     const struct net_dm_hw_metadata *hw_metadata)
+{
+	size_t size;
+
+	size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize);
+
+	return NLMSG_ALIGN(size) +
+	       /* NET_DM_ATTR_ORIGIN */
+	       nla_total_size(sizeof(u16)) +
+	       /* NET_DM_ATTR_HW_TRAP_GROUP_NAME */
+	       nla_total_size(strlen(hw_metadata->trap_group_name) + 1) +
+	       /* NET_DM_ATTR_HW_TRAP_NAME */
+	       nla_total_size(strlen(hw_metadata->trap_name) + 1) +
+	       /* NET_DM_ATTR_IN_PORT */
+	       net_dm_in_port_size() +
+	       /* NET_DM_ATTR_TIMESTAMP */
+	       nla_total_size(sizeof(u64)) +
+	       /* NET_DM_ATTR_ORIG_LEN */
+	       nla_total_size(sizeof(u32)) +
+	       /* NET_DM_ATTR_PROTO */
+	       nla_total_size(sizeof(u16)) +
+	       /* NET_DM_ATTR_PAYLOAD */
+	       nla_total_size(payload_len);
+}
+
+static int net_dm_hw_packet_report_fill(struct sk_buff *msg,
+					struct sk_buff *skb, size_t payload_len)
+{
+	struct net_dm_hw_metadata *hw_metadata;
+	struct nlattr *attr;
+	void *hdr;
+
+	hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
+
+	hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
+			  NET_DM_CMD_PACKET_ALERT);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_HW))
+		goto nla_put_failure;
+
+	if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_GROUP_NAME,
+			   hw_metadata->trap_group_name))
+		goto nla_put_failure;
+
+	if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME,
+			   hw_metadata->trap_name))
+		goto nla_put_failure;
+
+	if (hw_metadata->input_dev) {
+		struct net_device *dev = hw_metadata->input_dev;
+		int rc;
+
+		rc = net_dm_packet_report_in_port_put(msg, dev->ifindex,
+						      dev->name);
+		if (rc)
+			goto nla_put_failure;
+	}
+
+	if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP,
+			      ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len))
+		goto nla_put_failure;
+
+	if (!payload_len)
+		goto out;
+
+	if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol)))
+		goto nla_put_failure;
+
+	attr = skb_put(msg, nla_total_size(payload_len));
+	attr->nla_type = NET_DM_ATTR_PAYLOAD;
+	attr->nla_len = nla_attr_size(payload_len);
+	if (skb_copy_bits(skb, 0, nla_data(attr), payload_len))
+		goto nla_put_failure;
+
+out:
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static struct net_dm_hw_metadata *
+net_dm_hw_metadata_clone(const struct net_dm_hw_metadata *hw_metadata)
+{
+	struct net_dm_hw_metadata *n_hw_metadata;
+	const char *trap_group_name;
+	const char *trap_name;
+
+	n_hw_metadata = kmalloc(sizeof(*hw_metadata), GFP_ATOMIC);
+	if (!n_hw_metadata)
+		return NULL;
+
+	trap_group_name = kmemdup(hw_metadata->trap_group_name,
+				  strlen(hw_metadata->trap_group_name) + 1,
+				  GFP_ATOMIC | __GFP_ZERO);
+	if (!trap_group_name)
+		goto free_hw_metadata;
+	n_hw_metadata->trap_group_name = trap_group_name;
+
+	trap_name = kmemdup(hw_metadata->trap_name,
+			    strlen(hw_metadata->trap_name) + 1,
+			    GFP_ATOMIC | __GFP_ZERO);
+	if (!trap_name)
+		goto free_trap_group;
+	n_hw_metadata->trap_name = trap_name;
+
+	n_hw_metadata->input_dev = hw_metadata->input_dev;
+	if (n_hw_metadata->input_dev)
+		dev_hold(n_hw_metadata->input_dev);
+
+	return n_hw_metadata;
+
+free_trap_group:
+	kfree(trap_group_name);
+free_hw_metadata:
+	kfree(n_hw_metadata);
+	return NULL;
+}
+
+static void
+net_dm_hw_metadata_free(const struct net_dm_hw_metadata *hw_metadata)
+{
+	if (hw_metadata->input_dev)
+		dev_put(hw_metadata->input_dev);
+	kfree(hw_metadata->trap_name);
+	kfree(hw_metadata->trap_group_name);
+	kfree(hw_metadata);
+}
+
+static void net_dm_hw_packet_report(struct sk_buff *skb)
+{
+	struct net_dm_hw_metadata *hw_metadata;
+	struct sk_buff *msg;
+	size_t payload_len;
+	int rc;
+
+	if (skb->data > skb_mac_header(skb))
+		skb_push(skb, skb->data - skb_mac_header(skb));
+	else
+		skb_pull(skb, skb_mac_header(skb) - skb->data);
+
+	payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE);
+	if (net_dm_trunc_len)
+		payload_len = min_t(size_t, net_dm_trunc_len, payload_len);
+
+	hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
+	msg = nlmsg_new(net_dm_hw_packet_report_size(payload_len, hw_metadata),
+			GFP_KERNEL);
+	if (!msg)
+		goto out;
+
+	rc = net_dm_hw_packet_report_fill(msg, skb, payload_len);
+	if (rc) {
+		nlmsg_free(msg);
+		goto out;
+	}
+
+	genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
+
+out:
+	net_dm_hw_metadata_free(NET_DM_SKB_CB(skb)->hw_metadata);
+	consume_skb(skb);
+}
+
+static void net_dm_hw_packet_work(struct work_struct *work)
+{
+	struct per_cpu_dm_data *hw_data;
+	struct sk_buff_head list;
+	struct sk_buff *skb;
+	unsigned long flags;
+
+	hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
+
+	__skb_queue_head_init(&list);
+
+	spin_lock_irqsave(&hw_data->drop_queue.lock, flags);
+	skb_queue_splice_tail_init(&hw_data->drop_queue, &list);
+	spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
+
+	while ((skb = __skb_dequeue(&list)))
+		net_dm_hw_packet_report(skb);
+}
+
+static void
+net_dm_hw_packet_probe(struct sk_buff *skb,
+		       const struct net_dm_hw_metadata *hw_metadata)
+{
+	struct net_dm_hw_metadata *n_hw_metadata;
+	ktime_t tstamp = ktime_get_real();
+	struct per_cpu_dm_data *hw_data;
+	struct sk_buff *nskb;
+	unsigned long flags;
+
+	if (!skb_mac_header_was_set(skb))
+		return;
+
+	nskb = skb_clone(skb, GFP_ATOMIC);
+	if (!nskb)
+		return;
+
+	n_hw_metadata = net_dm_hw_metadata_clone(hw_metadata);
+	if (!n_hw_metadata)
+		goto free;
+
+	NET_DM_SKB_CB(nskb)->hw_metadata = n_hw_metadata;
+	nskb->tstamp = tstamp;
+
+	hw_data = this_cpu_ptr(&dm_hw_cpu_data);
+
+	spin_lock_irqsave(&hw_data->drop_queue.lock, flags);
+	if (skb_queue_len(&hw_data->drop_queue) < net_dm_queue_len)
+		__skb_queue_tail(&hw_data->drop_queue, nskb);
+	else
+		goto unlock_free;
+	spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
+
+	schedule_work(&hw_data->dm_alert_work);
+
+	return;
+
+unlock_free:
+	spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
+	u64_stats_update_begin(&hw_data->stats.syncp);
+	hw_data->stats.dropped++;
+	u64_stats_update_end(&hw_data->stats.syncp);
+	net_dm_hw_metadata_free(n_hw_metadata);
+free:
+	consume_skb(nskb);
+}
+
+static const struct net_dm_alert_ops net_dm_alert_packet_ops = {
+	.kfree_skb_probe	= net_dm_packet_trace_kfree_skb_hit,
+	.napi_poll_probe	= net_dm_packet_trace_napi_poll_hit,
+	.work_item_func		= net_dm_packet_work,
+	.hw_work_item_func	= net_dm_hw_packet_work,
+	.hw_probe		= net_dm_hw_packet_probe,
+};
+
+static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
+	[NET_DM_ALERT_MODE_SUMMARY]	= &net_dm_alert_summary_ops,
+	[NET_DM_ALERT_MODE_PACKET]	= &net_dm_alert_packet_ops,
+};
+
+void net_dm_hw_report(struct sk_buff *skb,
+		      const struct net_dm_hw_metadata *hw_metadata)
+{
+	rcu_read_lock();
+
+	if (!monitor_hw)
+		goto out;
+
+	net_dm_alert_ops_arr[net_dm_alert_mode]->hw_probe(skb, hw_metadata);
+
+out:
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(net_dm_hw_report);
+
+static int net_dm_hw_monitor_start(struct netlink_ext_ack *extack)
+{
+	const struct net_dm_alert_ops *ops;
+	int cpu;
+
+	if (monitor_hw) {
+		NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already enabled");
+		return -EAGAIN;
+	}
+
+	ops = net_dm_alert_ops_arr[net_dm_alert_mode];
+
+	if (!try_module_get(THIS_MODULE)) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
+		return -ENODEV;
+	}
+
+	for_each_possible_cpu(cpu) {
+		struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+		struct net_dm_hw_entries *hw_entries;
+
+		INIT_WORK(&hw_data->dm_alert_work, ops->hw_work_item_func);
+		timer_setup(&hw_data->send_timer, sched_send_work, 0);
+		hw_entries = net_dm_hw_reset_per_cpu_data(hw_data);
+		kfree(hw_entries);
+	}
+
+	monitor_hw = true;
+
+	return 0;
+}
+
+static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack)
+{
+	int cpu;
+
+	if (!monitor_hw)
+		NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already disabled");
+
+	monitor_hw = false;
+
+	/* After this call returns we are guaranteed that no CPU is processing
+	 * any hardware drops.
+	 */
+	synchronize_rcu();
+
+	for_each_possible_cpu(cpu) {
+		struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+		struct sk_buff *skb;
+
+		del_timer_sync(&hw_data->send_timer);
+		cancel_work_sync(&hw_data->dm_alert_work);
+		while ((skb = __skb_dequeue(&hw_data->drop_queue))) {
+			struct net_dm_hw_metadata *hw_metadata;
+
+			hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
+			net_dm_hw_metadata_free(hw_metadata);
+			consume_skb(skb);
+		}
+	}
+
+	module_put(THIS_MODULE);
+}
+
+static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
+{
+	const struct net_dm_alert_ops *ops;
+	int cpu, rc;
+
+	ops = net_dm_alert_ops_arr[net_dm_alert_mode];
+
+	if (!try_module_get(THIS_MODULE)) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
+		return -ENODEV;
+	}
+
+	for_each_possible_cpu(cpu) {
+		struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
+		struct sk_buff *skb;
+
+		INIT_WORK(&data->dm_alert_work, ops->work_item_func);
+		timer_setup(&data->send_timer, sched_send_work, 0);
+		/* Allocate a new per-CPU skb for the summary alert message and
+		 * free the old one which might contain stale data from
+		 * previous tracing.
+		 */
+		skb = reset_per_cpu_data(data);
+		consume_skb(skb);
+	}
+
+	rc = register_trace_kfree_skb(ops->kfree_skb_probe, NULL);
+	if (rc) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to kfree_skb() tracepoint");
+		goto err_module_put;
+	}
+
+	rc = register_trace_napi_poll(ops->napi_poll_probe, NULL);
+	if (rc) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to napi_poll() tracepoint");
+		goto err_unregister_trace;
+	}
+
+	return 0;
+
+err_unregister_trace:
+	unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL);
+err_module_put:
+	module_put(THIS_MODULE);
+	return rc;
+}
+
+static void net_dm_trace_off_set(void)
+{
+	struct dm_hw_stat_delta *new_stat, *temp;
+	const struct net_dm_alert_ops *ops;
+	int cpu;
+
+	ops = net_dm_alert_ops_arr[net_dm_alert_mode];
+
+	unregister_trace_napi_poll(ops->napi_poll_probe, NULL);
+	unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL);
+
+	tracepoint_synchronize_unregister();
+
+	/* Make sure we do not send notifications to user space after request
+	 * to stop tracing returns.
+	 */
+	for_each_possible_cpu(cpu) {
+		struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
+		struct sk_buff *skb;
+
+		del_timer_sync(&data->send_timer);
+		cancel_work_sync(&data->dm_alert_work);
+		while ((skb = __skb_dequeue(&data->drop_queue)))
+			consume_skb(skb);
+	}
+
+	list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
+		if (new_stat->dev == NULL) {
+			list_del_rcu(&new_stat->list);
+			kfree_rcu(new_stat, rcu);
+		}
+	}
+
+	module_put(THIS_MODULE);
+}
+
+static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack)
 {
 	int rc = 0;
-	struct dm_hw_stat_delta *new_stat = NULL;
-	struct dm_hw_stat_delta *temp;
-
-	mutex_lock(&trace_state_mutex);
 
 	if (state == trace_state) {
-		rc = -EAGAIN;
-		goto out_unlock;
+		NL_SET_ERR_MSG_MOD(extack, "Trace state already set to requested state");
+		return -EAGAIN;
 	}
 
 	switch (state) {
 	case TRACE_ON:
-		if (!try_module_get(THIS_MODULE)) {
-			rc = -ENODEV;
-			break;
-		}
-
-		rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
-		rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL);
+		rc = net_dm_trace_on_set(extack);
 		break;
-
 	case TRACE_OFF:
-		rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
-		rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
-
-		tracepoint_synchronize_unregister();
-
-		/*
-		 * Clean the device list
-		 */
-		list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
-			if (new_stat->dev == NULL) {
-				list_del_rcu(&new_stat->list);
-				kfree_rcu(new_stat, rcu);
-			}
-		}
-
-		module_put(THIS_MODULE);
-
+		net_dm_trace_off_set();
 		break;
 	default:
 		rc = 1;
@@ -287,30 +1141,331 @@
 	else
 		rc = -EINPROGRESS;
 
-out_unlock:
-	mutex_unlock(&trace_state_mutex);
-
 	return rc;
 }
 
+static bool net_dm_is_monitoring(void)
+{
+	return trace_state == TRACE_ON || monitor_hw;
+}
+
+static int net_dm_alert_mode_get_from_info(struct genl_info *info,
+					   enum net_dm_alert_mode *p_alert_mode)
+{
+	u8 val;
+
+	val = nla_get_u8(info->attrs[NET_DM_ATTR_ALERT_MODE]);
+
+	switch (val) {
+	case NET_DM_ALERT_MODE_SUMMARY: /* fall-through */
+	case NET_DM_ALERT_MODE_PACKET:
+		*p_alert_mode = val;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int net_dm_alert_mode_set(struct genl_info *info)
+{
+	struct netlink_ext_ack *extack = info->extack;
+	enum net_dm_alert_mode alert_mode;
+	int rc;
+
+	if (!info->attrs[NET_DM_ATTR_ALERT_MODE])
+		return 0;
+
+	rc = net_dm_alert_mode_get_from_info(info, &alert_mode);
+	if (rc) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid alert mode");
+		return -EINVAL;
+	}
+
+	net_dm_alert_mode = alert_mode;
+
+	return 0;
+}
+
+static void net_dm_trunc_len_set(struct genl_info *info)
+{
+	if (!info->attrs[NET_DM_ATTR_TRUNC_LEN])
+		return;
+
+	net_dm_trunc_len = nla_get_u32(info->attrs[NET_DM_ATTR_TRUNC_LEN]);
+}
+
+static void net_dm_queue_len_set(struct genl_info *info)
+{
+	if (!info->attrs[NET_DM_ATTR_QUEUE_LEN])
+		return;
+
+	net_dm_queue_len = nla_get_u32(info->attrs[NET_DM_ATTR_QUEUE_LEN]);
+}
 
 static int net_dm_cmd_config(struct sk_buff *skb,
 			struct genl_info *info)
 {
-	return -ENOTSUPP;
+	struct netlink_ext_ack *extack = info->extack;
+	int rc;
+
+	if (net_dm_is_monitoring()) {
+		NL_SET_ERR_MSG_MOD(extack, "Cannot configure drop monitor during monitoring");
+		return -EBUSY;
+	}
+
+	rc = net_dm_alert_mode_set(info);
+	if (rc)
+		return rc;
+
+	net_dm_trunc_len_set(info);
+
+	net_dm_queue_len_set(info);
+
+	return 0;
+}
+
+static int net_dm_monitor_start(bool set_sw, bool set_hw,
+				struct netlink_ext_ack *extack)
+{
+	bool sw_set = false;
+	int rc;
+
+	if (set_sw) {
+		rc = set_all_monitor_traces(TRACE_ON, extack);
+		if (rc)
+			return rc;
+		sw_set = true;
+	}
+
+	if (set_hw) {
+		rc = net_dm_hw_monitor_start(extack);
+		if (rc)
+			goto err_monitor_hw;
+	}
+
+	return 0;
+
+err_monitor_hw:
+	if (sw_set)
+		set_all_monitor_traces(TRACE_OFF, extack);
+	return rc;
+}
+
+static void net_dm_monitor_stop(bool set_sw, bool set_hw,
+				struct netlink_ext_ack *extack)
+{
+	if (set_hw)
+		net_dm_hw_monitor_stop(extack);
+	if (set_sw)
+		set_all_monitor_traces(TRACE_OFF, extack);
 }
 
 static int net_dm_cmd_trace(struct sk_buff *skb,
 			struct genl_info *info)
 {
+	bool set_sw = !!info->attrs[NET_DM_ATTR_SW_DROPS];
+	bool set_hw = !!info->attrs[NET_DM_ATTR_HW_DROPS];
+	struct netlink_ext_ack *extack = info->extack;
+
+	/* To maintain backward compatibility, we start / stop monitoring of
+	 * software drops if no flag is specified.
+	 */
+	if (!set_sw && !set_hw)
+		set_sw = true;
+
 	switch (info->genlhdr->cmd) {
 	case NET_DM_CMD_START:
-		return set_all_monitor_traces(TRACE_ON);
+		return net_dm_monitor_start(set_sw, set_hw, extack);
 	case NET_DM_CMD_STOP:
-		return set_all_monitor_traces(TRACE_OFF);
+		net_dm_monitor_stop(set_sw, set_hw, extack);
+		return 0;
 	}
 
-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
+}
+
+static int net_dm_config_fill(struct sk_buff *msg, struct genl_info *info)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+			  &net_drop_monitor_family, 0, NET_DM_CMD_CONFIG_NEW);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (nla_put_u8(msg, NET_DM_ATTR_ALERT_MODE, net_dm_alert_mode))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, NET_DM_ATTR_TRUNC_LEN, net_dm_trunc_len))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, NET_DM_ATTR_QUEUE_LEN, net_dm_queue_len))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int net_dm_cmd_config_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	int rc;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	rc = net_dm_config_fill(msg, info);
+	if (rc)
+		goto free_msg;
+
+	return genlmsg_reply(msg, info);
+
+free_msg:
+	nlmsg_free(msg);
+	return rc;
+}
+
+static void net_dm_stats_read(struct net_dm_stats *stats)
+{
+	int cpu;
+
+	memset(stats, 0, sizeof(*stats));
+	for_each_possible_cpu(cpu) {
+		struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
+		struct net_dm_stats *cpu_stats = &data->stats;
+		unsigned int start;
+		u64 dropped;
+
+		do {
+			start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+			dropped = cpu_stats->dropped;
+		} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
+
+		stats->dropped += dropped;
+	}
+}
+
+static int net_dm_stats_put(struct sk_buff *msg)
+{
+	struct net_dm_stats stats;
+	struct nlattr *attr;
+
+	net_dm_stats_read(&stats);
+
+	attr = nla_nest_start(msg, NET_DM_ATTR_STATS);
+	if (!attr)
+		return -EMSGSIZE;
+
+	if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED,
+			      stats.dropped, NET_DM_ATTR_PAD))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, attr);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, attr);
+	return -EMSGSIZE;
+}
+
+static void net_dm_hw_stats_read(struct net_dm_stats *stats)
+{
+	int cpu;
+
+	memset(stats, 0, sizeof(*stats));
+	for_each_possible_cpu(cpu) {
+		struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+		struct net_dm_stats *cpu_stats = &hw_data->stats;
+		unsigned int start;
+		u64 dropped;
+
+		do {
+			start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+			dropped = cpu_stats->dropped;
+		} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
+
+		stats->dropped += dropped;
+	}
+}
+
+static int net_dm_hw_stats_put(struct sk_buff *msg)
+{
+	struct net_dm_stats stats;
+	struct nlattr *attr;
+
+	net_dm_hw_stats_read(&stats);
+
+	attr = nla_nest_start(msg, NET_DM_ATTR_HW_STATS);
+	if (!attr)
+		return -EMSGSIZE;
+
+	if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED,
+			      stats.dropped, NET_DM_ATTR_PAD))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, attr);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, attr);
+	return -EMSGSIZE;
+}
+
+static int net_dm_stats_fill(struct sk_buff *msg, struct genl_info *info)
+{
+	void *hdr;
+	int rc;
+
+	hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+			  &net_drop_monitor_family, 0, NET_DM_CMD_STATS_NEW);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	rc = net_dm_stats_put(msg);
+	if (rc)
+		goto nla_put_failure;
+
+	rc = net_dm_hw_stats_put(msg);
+	if (rc)
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int net_dm_cmd_stats_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	int rc;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	rc = net_dm_stats_fill(msg, info);
+	if (rc)
+		goto free_msg;
+
+	return genlmsg_reply(msg, info);
+
+free_msg:
+	nlmsg_free(msg);
+	return rc;
 }
 
 static int dropmon_net_event(struct notifier_block *ev_block,
@@ -329,12 +1484,12 @@
 
 		new_stat->dev = dev;
 		new_stat->last_rx = jiffies;
-		mutex_lock(&trace_state_mutex);
+		mutex_lock(&net_dm_mutex);
 		list_add_rcu(&new_stat->list, &hw_stats_list);
-		mutex_unlock(&trace_state_mutex);
+		mutex_unlock(&net_dm_mutex);
 		break;
 	case NETDEV_UNREGISTER:
-		mutex_lock(&trace_state_mutex);
+		mutex_lock(&net_dm_mutex);
 		list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
 			if (new_stat->dev == dev) {
 				new_stat->dev = NULL;
@@ -345,32 +1500,71 @@
 				}
 			}
 		}
-		mutex_unlock(&trace_state_mutex);
+		mutex_unlock(&net_dm_mutex);
 		break;
 	}
 out:
 	return NOTIFY_DONE;
 }
 
+static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = {
+	[NET_DM_ATTR_UNSPEC] = { .strict_start_type = NET_DM_ATTR_UNSPEC + 1 },
+	[NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 },
+	[NET_DM_ATTR_TRUNC_LEN] = { .type = NLA_U32 },
+	[NET_DM_ATTR_QUEUE_LEN] = { .type = NLA_U32 },
+	[NET_DM_ATTR_SW_DROPS]	= {. type = NLA_FLAG },
+	[NET_DM_ATTR_HW_DROPS]	= {. type = NLA_FLAG },
+};
+
 static const struct genl_ops dropmon_ops[] = {
 	{
 		.cmd = NET_DM_CMD_CONFIG,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = net_dm_cmd_config,
+		.flags = GENL_ADMIN_PERM,
 	},
 	{
 		.cmd = NET_DM_CMD_START,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = net_dm_cmd_trace,
 	},
 	{
 		.cmd = NET_DM_CMD_STOP,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = net_dm_cmd_trace,
 	},
+	{
+		.cmd = NET_DM_CMD_CONFIG_GET,
+		.doit = net_dm_cmd_config_get,
+	},
+	{
+		.cmd = NET_DM_CMD_STATS_GET,
+		.doit = net_dm_cmd_stats_get,
+	},
 };
 
+static int net_dm_nl_pre_doit(const struct genl_ops *ops,
+			      struct sk_buff *skb, struct genl_info *info)
+{
+	mutex_lock(&net_dm_mutex);
+
+	return 0;
+}
+
+static void net_dm_nl_post_doit(const struct genl_ops *ops,
+				struct sk_buff *skb, struct genl_info *info)
+{
+	mutex_unlock(&net_dm_mutex);
+}
+
 static struct genl_family net_drop_monitor_family __ro_after_init = {
 	.hdrsize        = 0,
 	.name           = "NET_DM",
 	.version        = 2,
+	.maxattr	= NET_DM_ATTR_MAX,
+	.policy		= net_dm_nl_policy,
+	.pre_doit	= net_dm_nl_pre_doit,
+	.post_doit	= net_dm_nl_post_doit,
 	.module		= THIS_MODULE,
 	.ops		= dropmon_ops,
 	.n_ops		= ARRAY_SIZE(dropmon_ops),
@@ -382,9 +1576,57 @@
 	.notifier_call = dropmon_net_event
 };
 
-static int __init init_net_drop_monitor(void)
+static void __net_dm_cpu_data_init(struct per_cpu_dm_data *data)
+{
+	spin_lock_init(&data->lock);
+	skb_queue_head_init(&data->drop_queue);
+	u64_stats_init(&data->stats.syncp);
+}
+
+static void __net_dm_cpu_data_fini(struct per_cpu_dm_data *data)
+{
+	WARN_ON(!skb_queue_empty(&data->drop_queue));
+}
+
+static void net_dm_cpu_data_init(int cpu)
 {
 	struct per_cpu_dm_data *data;
+
+	data = &per_cpu(dm_cpu_data, cpu);
+	__net_dm_cpu_data_init(data);
+}
+
+static void net_dm_cpu_data_fini(int cpu)
+{
+	struct per_cpu_dm_data *data;
+
+	data = &per_cpu(dm_cpu_data, cpu);
+	/* At this point, we should have exclusive access
+	 * to this struct and can free the skb inside it.
+	 */
+	consume_skb(data->skb);
+	__net_dm_cpu_data_fini(data);
+}
+
+static void net_dm_hw_cpu_data_init(int cpu)
+{
+	struct per_cpu_dm_data *hw_data;
+
+	hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+	__net_dm_cpu_data_init(hw_data);
+}
+
+static void net_dm_hw_cpu_data_fini(int cpu)
+{
+	struct per_cpu_dm_data *hw_data;
+
+	hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+	kfree(hw_data->hw_entries);
+	__net_dm_cpu_data_fini(hw_data);
+}
+
+static int __init init_net_drop_monitor(void)
+{
 	int cpu, rc;
 
 	pr_info("Initializing network drop monitor service\n");
@@ -410,14 +1652,10 @@
 	rc = 0;
 
 	for_each_possible_cpu(cpu) {
-		data = &per_cpu(dm_cpu_data, cpu);
-		INIT_WORK(&data->dm_alert_work, send_dm_alert);
-		timer_setup(&data->send_timer, sched_send_work, 0);
-		spin_lock_init(&data->lock);
-		reset_per_cpu_data(data);
+		net_dm_cpu_data_init(cpu);
+		net_dm_hw_cpu_data_init(cpu);
 	}
 
-
 	goto out;
 
 out_unreg:
@@ -428,7 +1666,6 @@
 
 static void exit_net_drop_monitor(void)
 {
-	struct per_cpu_dm_data *data;
 	int cpu;
 
 	BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier));
@@ -436,19 +1673,11 @@
 	/*
 	 * Because of the module_get/put we do in the trace state change path
 	 * we are guarnateed not to have any current users when we get here
-	 * all we need to do is make sure that we don't have any running timers
-	 * or pending schedule calls
 	 */
 
 	for_each_possible_cpu(cpu) {
-		data = &per_cpu(dm_cpu_data, cpu);
-		del_timer_sync(&data->send_timer);
-		cancel_work_sync(&data->dm_alert_work);
-		/*
-		 * At this point, we should have exclusive access
-		 * to this struct and can free the skb inside it
-		 */
-		kfree_skb(data->skb);
+		net_dm_hw_cpu_data_fini(cpu);
+		net_dm_cpu_data_fini(cpu);
 	}
 
 	BUG_ON(genl_unregister_family(&net_drop_monitor_family));

diff --git a/net/core/dst.c b/net/core/dst.c
index 81ccf20..193af52 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * net/core/dst.c	Protocol independent destination cache.
  *
@@ -26,23 +27,6 @@
 #include <net/dst.h>
 #include <net/dst_metadata.h>
 
-/*
- * Theory of operations:
- * 1) We use a list, protected by a spinlock, to add
- *    new entries from both BH and non-BH context.
- * 2) In order to keep spinlock held for a small delay,
- *    we use a second list where are stored long lived
- *    entries, that are handled by the garbage collect thread
- *    fired by a workqueue.
- * 3) This list is guarded by a mutex,
- *    so that the gc_task and dst_dev_event() can be synchronized.
- */
-
-/*
- * We want to keep lock & list close together
- * to dirty as few cache lines as possible in __dst_free().
- * As this is not a very strong hint, we dont force an alignment on SMP.
- */
 int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	kfree_skb(skb);
@@ -98,8 +82,12 @@
 	struct dst_entry *dst;
 
 	if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
-		if (ops->gc(ops))
+		if (ops->gc(ops)) {
+			printk_ratelimited(KERN_NOTICE "Route cache is full: "
+					   "consider increasing sysctl "
+					   "net.ipv[4|6].route.max_size.\n");
 			return NULL;
+		}
 	}
 
 	dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC);
@@ -172,7 +160,7 @@
 		dst->ops->ifdown(dst, dev, true);
 	dst->input = dst_discard;
 	dst->output = dst_discard_out;
-	dst->dev = dev_net(dst->dev)->loopback_dev;
+	dst->dev = blackhole_netdev;
 	dev_hold(dst->dev);
 	dev_put(dev);
 }
@@ -184,7 +172,7 @@
 		int newrefcnt;
 
 		newrefcnt = atomic_dec_return(&dst->__refcnt);
-		if (unlikely(newrefcnt < 0))
+		if (WARN_ONCE(newrefcnt < 0, "dst_release underflow"))
 			net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
 					     __func__, dst, newrefcnt);
 		if (!newrefcnt)
@@ -199,7 +187,7 @@
 		int newrefcnt;
 
 		newrefcnt = atomic_dec_return(&dst->__refcnt);
-		if (unlikely(newrefcnt < 0))
+		if (WARN_ONCE(newrefcnt < 0, "dst_release_immediate underflow"))
 			net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
 					     __func__, dst, newrefcnt);
 		if (!newrefcnt)

diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
index 64cef97..be74ab4 100644
--- a/net/core/dst_cache.c
+++ b/net/core/dst_cache.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/core/dst_cache.c - dst entry cache
  *
  * Copyright (c) 2016 Paolo Abeni <pabeni@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/kernel.h>

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index aeabc48..cd9bc67 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/core/ethtool.c - Ethtool ioctl handler
  * Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx>
  *
  * This file is where we call all the ethtool_ops commands to get
  * the information ethtool needs.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -27,6 +23,9 @@
 #include <linux/rtnetlink.h>
 #include <linux/sched/signal.h>
 #include <linux/net.h>
+#include <net/devlink.h>
+#include <net/xdp_sock.h>
+#include <net/flow_offload.h>
 
 /*
  * Some useful ethtool_ops methods that're device independent.
@@ -133,6 +132,8 @@
 phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
 	[ETHTOOL_ID_UNSPEC]     = "Unspec",
 	[ETHTOOL_PHY_DOWNSHIFT]	= "phy-downshift",
+	[ETHTOOL_PHY_FAST_LINK_DOWN] = "phy-fast-link-down",
+	[ETHTOOL_PHY_EDPD]	= "phy-energy-detect-power-down",
 };
 
 static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
@@ -539,47 +540,17 @@
 	} link_modes;
 };
 
-/* Internal kernel helper to query a device ethtool_link_settings.
- *
- * Backward compatibility note: for compatibility with legacy drivers
- * that implement only the ethtool_cmd API, this has to work with both
- * drivers implementing get_link_ksettings API and drivers
- * implementing get_settings API. When drivers implement get_settings
- * and report ethtool_cmd deprecated fields
- * (transceiver/maxrxpkt/maxtxpkt), these fields are silently ignored
- * because the resulting struct ethtool_link_settings does not report them.
- */
+/* Internal kernel helper to query a device ethtool_link_settings. */
 int __ethtool_get_link_ksettings(struct net_device *dev,
 				 struct ethtool_link_ksettings *link_ksettings)
 {
-	int err;
-	struct ethtool_cmd cmd;
-
 	ASSERT_RTNL();
 
-	if (dev->ethtool_ops->get_link_ksettings) {
-		memset(link_ksettings, 0, sizeof(*link_ksettings));
-		return dev->ethtool_ops->get_link_ksettings(dev,
-							    link_ksettings);
-	}
-
-	/* driver doesn't support %ethtool_link_ksettings API. revert to
-	 * legacy %ethtool_cmd API, unless it's not supported either.
-	 * TODO: remove when ethtool_ops::get_settings disappears internally
-	 */
-	if (!dev->ethtool_ops->get_settings)
+	if (!dev->ethtool_ops->get_link_ksettings)
 		return -EOPNOTSUPP;
 
-	memset(&cmd, 0, sizeof(cmd));
-	cmd.cmd = ETHTOOL_GSET;
-	err = dev->ethtool_ops->get_settings(dev, &cmd);
-	if (err < 0)
-		return err;
-
-	/* we ignore deprecated fields transceiver/maxrxpkt/maxtxpkt
-	 */
-	convert_legacy_settings_to_link_ksettings(link_ksettings, &cmd);
-	return err;
+	memset(link_ksettings, 0, sizeof(*link_ksettings));
+	return dev->ethtool_ops->get_link_ksettings(dev, link_ksettings);
 }
 EXPORT_SYMBOL(__ethtool_get_link_ksettings);
 
@@ -635,16 +606,7 @@
 	return 0;
 }
 
-/* Query device for its ethtool_link_settings.
- *
- * Backward compatibility note: this function must fail when driver
- * does not implement ethtool::get_link_ksettings, even if legacy
- * ethtool_ops::get_settings is implemented. This tells new versions
- * of ethtool that they should use the legacy API %ETHTOOL_GSET for
- * this driver, so that they can correctly access the ethtool_cmd
- * deprecated fields (transceiver/maxrxpkt/maxtxpkt), until no driver
- * implements ethtool_ops::get_settings anymore.
- */
+/* Query device for its ethtool_link_settings. */
 static int ethtool_get_link_ksettings(struct net_device *dev,
 				      void __user *useraddr)
 {
@@ -652,7 +614,6 @@
 	struct ethtool_link_ksettings link_ksettings;
 
 	ASSERT_RTNL();
-
 	if (!dev->ethtool_ops->get_link_ksettings)
 		return -EOPNOTSUPP;
 
@@ -699,16 +660,7 @@
 	return store_link_ksettings_for_user(useraddr, &link_ksettings);
 }
 
-/* Update device ethtool_link_settings.
- *
- * Backward compatibility note: this function must fail when driver
- * does not implement ethtool::set_link_ksettings, even if legacy
- * ethtool_ops::set_settings is implemented. This tells new versions
- * of ethtool that they should use the legacy API %ETHTOOL_SSET for
- * this driver, so that they can correctly update the ethtool_cmd
- * deprecated fields (transceiver/maxrxpkt/maxtxpkt), until no driver
- * implements ethtool_ops::get_settings anymore.
- */
+/* Update device ethtool_link_settings. */
 static int ethtool_set_link_ksettings(struct net_device *dev,
 				      void __user *useraddr)
 {
@@ -746,51 +698,31 @@
 
 /* Query device for its ethtool_cmd settings.
  *
- * Backward compatibility note: for compatibility with legacy ethtool,
- * this has to work with both drivers implementing get_link_ksettings
- * API and drivers implementing get_settings API. When drivers
- * implement get_link_ksettings and report higher link mode bits, a
- * kernel warning is logged once (with name of 1st driver/device) to
- * recommend user to upgrade ethtool, but the command is successful
- * (only the lower link mode bits reported back to user).
+ * Backward compatibility note: for compatibility with legacy ethtool, this is
+ * now implemented via get_link_ksettings. When driver reports higher link mode
+ * bits, a kernel warning is logged once (with name of 1st driver/device) to
+ * recommend user to upgrade ethtool, but the command is successful (only the
+ * lower link mode bits reported back to user). Deprecated fields from
+ * ethtool_cmd (transceiver/maxrxpkt/maxtxpkt) are always set to zero.
  */
 static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
 {
+	struct ethtool_link_ksettings link_ksettings;
 	struct ethtool_cmd cmd;
+	int err;
 
 	ASSERT_RTNL();
+	if (!dev->ethtool_ops->get_link_ksettings)
+		return -EOPNOTSUPP;
 
-	if (dev->ethtool_ops->get_link_ksettings) {
-		/* First, use link_ksettings API if it is supported */
-		int err;
-		struct ethtool_link_ksettings link_ksettings;
+	memset(&link_ksettings, 0, sizeof(link_ksettings));
+	err = dev->ethtool_ops->get_link_ksettings(dev, &link_ksettings);
+	if (err < 0)
+		return err;
+	convert_link_ksettings_to_legacy_settings(&cmd, &link_ksettings);
 
-		memset(&link_ksettings, 0, sizeof(link_ksettings));
-		err = dev->ethtool_ops->get_link_ksettings(dev,
-							   &link_ksettings);
-		if (err < 0)
-			return err;
-		convert_link_ksettings_to_legacy_settings(&cmd,
-							  &link_ksettings);
-
-		/* send a sensible cmd tag back to user */
-		cmd.cmd = ETHTOOL_GSET;
-	} else {
-		/* driver doesn't support %ethtool_link_ksettings
-		 * API. revert to legacy %ethtool_cmd API, unless it's
-		 * not supported either.
-		 */
-		int err;
-
-		if (!dev->ethtool_ops->get_settings)
-			return -EOPNOTSUPP;
-
-		memset(&cmd, 0, sizeof(cmd));
-		cmd.cmd = ETHTOOL_GSET;
-		err = dev->ethtool_ops->get_settings(dev, &cmd);
-		if (err < 0)
-			return err;
-	}
+	/* send a sensible cmd tag back to user */
+	cmd.cmd = ETHTOOL_GSET;
 
 	if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
 		return -EFAULT;
@@ -800,48 +732,29 @@
 
 /* Update device link settings with given ethtool_cmd.
  *
- * Backward compatibility note: for compatibility with legacy ethtool,
- * this has to work with both drivers implementing set_link_ksettings
- * API and drivers implementing set_settings API. When drivers
- * implement set_link_ksettings and user's request updates deprecated
- * ethtool_cmd fields (transceiver/maxrxpkt/maxtxpkt), a kernel
- * warning is logged once (with name of 1st driver/device) to
- * recommend user to upgrade ethtool, and the request is rejected.
+ * Backward compatibility note: for compatibility with legacy ethtool, this is
+ * now always implemented via set_link_settings. When user's request updates
+ * deprecated ethtool_cmd fields (transceiver/maxrxpkt/maxtxpkt), a kernel
+ * warning is logged once (with name of 1st driver/device) to recommend user to
+ * upgrade ethtool, and the request is rejected.
  */
 static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
 {
+	struct ethtool_link_ksettings link_ksettings;
 	struct ethtool_cmd cmd;
 
 	ASSERT_RTNL();
 
 	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
 		return -EFAULT;
-
-	/* first, try new %ethtool_link_ksettings API. */
-	if (dev->ethtool_ops->set_link_ksettings) {
-		struct ethtool_link_ksettings link_ksettings;
-
-		if (!convert_legacy_settings_to_link_ksettings(&link_ksettings,
-							       &cmd))
-			return -EINVAL;
-
-		link_ksettings.base.cmd = ETHTOOL_SLINKSETTINGS;
-		link_ksettings.base.link_mode_masks_nwords
-			= __ETHTOOL_LINK_MODE_MASK_NU32;
-		return dev->ethtool_ops->set_link_ksettings(dev,
-							    &link_ksettings);
-	}
-
-	/* legacy %ethtool_cmd API */
-
-	/* TODO: return -EOPNOTSUPP when ethtool_ops::get_settings
-	 * disappears internally
-	 */
-
-	if (!dev->ethtool_ops->set_settings)
+	if (!dev->ethtool_ops->set_link_ksettings)
 		return -EOPNOTSUPP;
 
-	return dev->ethtool_ops->set_settings(dev, &cmd);
+	if (!convert_legacy_settings_to_link_ksettings(&link_ksettings, &cmd))
+		return -EINVAL;
+	link_ksettings.base.link_mode_masks_nwords =
+		__ETHTOOL_LINK_MODE_MASK_NU32;
+	return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
 }
 
 static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
@@ -880,11 +793,20 @@
 		if (rc >= 0)
 			info.n_priv_flags = rc;
 	}
-	if (ops->get_regs_len)
-		info.regdump_len = ops->get_regs_len(dev);
+	if (ops->get_regs_len) {
+		int ret = ops->get_regs_len(dev);
+
+		if (ret > 0)
+			info.regdump_len = ret;
+	}
+
 	if (ops->get_eeprom_len)
 		info.eedump_len = ops->get_eeprom_len(dev);
 
+	if (!info.fw_version[0])
+		devlink_compat_running_version(dev, info.fw_version,
+					       sizeof(info.fw_version));
+
 	if (copy_to_user(useraddr, &info, sizeof(info)))
 		return -EFAULT;
 	return 0;
@@ -1424,15 +1346,18 @@
 		return -EFAULT;
 
 	reglen = ops->get_regs_len(dev);
+	if (reglen <= 0)
+		return reglen;
+
 	if (regs.len > reglen)
 		regs.len = reglen;
 
-	regbuf = NULL;
-	if (reglen) {
-		regbuf = vzalloc(reglen);
-		if (!regbuf)
-			return -ENOMEM;
-	}
+	regbuf = vzalloc(reglen);
+	if (!regbuf)
+		return -ENOMEM;
+
+	if (regs.len < reglen)
+		reglen = regs.len;
 
 	ops->get_regs(dev, &regs, regbuf);
 
@@ -1440,7 +1365,7 @@
 	if (copy_to_user(useraddr, &regs, sizeof(regs)))
 		goto out;
 	useraddr += offsetof(struct ethtool_regs, data);
-	if (regbuf && copy_to_user(useraddr, regbuf, regs.len))
+	if (copy_to_user(useraddr, regbuf, reglen))
 		goto out;
 	ret = 0;
 
@@ -1471,11 +1396,13 @@
 
 static int ethtool_get_wol(struct net_device *dev, char __user *useraddr)
 {
-	struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
+	struct ethtool_wolinfo wol;
 
 	if (!dev->ethtool_ops->get_wol)
 		return -EOPNOTSUPP;
 
+	memset(&wol, 0, sizeof(struct ethtool_wolinfo));
+	wol.cmd = ETHTOOL_GWOL;
 	dev->ethtool_ops->get_wol(dev, &wol);
 
 	if (copy_to_user(useraddr, &wol, sizeof(wol)))
@@ -1753,8 +1680,10 @@
 static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
 						   void __user *useraddr)
 {
-	struct ethtool_channels channels, max = { .cmd = ETHTOOL_GCHANNELS };
+	struct ethtool_channels channels, curr = { .cmd = ETHTOOL_GCHANNELS };
+	u16 from_channel, to_channel;
 	u32 max_rx_in_use = 0;
+	unsigned int i;
 
 	if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels)
 		return -EOPNOTSUPP;
@@ -1762,13 +1691,13 @@
 	if (copy_from_user(&channels, useraddr, sizeof(channels)))
 		return -EFAULT;
 
-	dev->ethtool_ops->get_channels(dev, &max);
+	dev->ethtool_ops->get_channels(dev, &curr);
 
 	/* ensure new counts are within the maximums */
-	if ((channels.rx_count > max.max_rx) ||
-	    (channels.tx_count > max.max_tx) ||
-	    (channels.combined_count > max.max_combined) ||
-	    (channels.other_count > max.max_other))
+	if (channels.rx_count > curr.max_rx ||
+	    channels.tx_count > curr.max_tx ||
+	    channels.combined_count > curr.max_combined ||
+	    channels.other_count > curr.max_other)
 		return -EINVAL;
 
 	/* ensure the new Rx count fits within the configured Rx flow
@@ -1778,12 +1707,20 @@
 	    (channels.combined_count + channels.rx_count) <= max_rx_in_use)
 	    return -EINVAL;
 
+	/* Disabling channels, query zero-copy AF_XDP sockets */
+	from_channel = channels.combined_count +
+		min(channels.rx_count, channels.tx_count);
+	to_channel = curr.combined_count + max(curr.rx_count, curr.tx_count);
+	for (i = from_channel; i < to_channel; i++)
+		if (xdp_get_umem_from_qid(dev, i))
+			return -EINVAL;
+
 	return dev->ethtool_ops->set_channels(dev, &channels);
 }
 
 static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr)
 {
-	struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM };
+	struct ethtool_pauseparam pauseparam = { .cmd = ETHTOOL_GPAUSEPARAM };
 
 	if (!dev->ethtool_ops->get_pauseparam)
 		return -EOPNOTSUPP;
@@ -1863,11 +1800,16 @@
 	WARN_ON_ONCE(!ret);
 
 	gstrings.len = ret;
-	data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN));
-	if (gstrings.len && !data)
-		return -ENOMEM;
 
-	__ethtool_get_strings(dev, gstrings.string_set, data);
+	if (gstrings.len) {
+		data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN));
+		if (!data)
+			return -ENOMEM;
+
+		__ethtool_get_strings(dev, gstrings.string_set, data);
+	} else {
+		data = NULL;
+	}
 
 	ret = -EFAULT;
 	if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
@@ -1963,11 +1905,15 @@
 		return -EFAULT;
 
 	stats.n_stats = n_stats;
-	data = vzalloc(array_size(n_stats, sizeof(u64)));
-	if (n_stats && !data)
-		return -ENOMEM;
 
-	ops->get_ethtool_stats(dev, &stats, data);
+	if (n_stats) {
+		data = vzalloc(array_size(n_stats, sizeof(u64)));
+		if (!data)
+			return -ENOMEM;
+		ops->get_ethtool_stats(dev, &stats, data);
+	} else {
+		data = NULL;
+	}
 
 	ret = -EFAULT;
 	if (copy_to_user(useraddr, &stats, sizeof(stats)))
@@ -2007,16 +1953,21 @@
 		return -EFAULT;
 
 	stats.n_stats = n_stats;
-	data = vzalloc(array_size(n_stats, sizeof(u64)));
-	if (n_stats && !data)
-		return -ENOMEM;
 
-	if (dev->phydev && !ops->get_ethtool_phy_stats) {
-		ret = phy_ethtool_get_stats(dev->phydev, &stats, data);
-		if (ret < 0)
-			return ret;
+	if (n_stats) {
+		data = vzalloc(array_size(n_stats, sizeof(u64)));
+		if (!data)
+			return -ENOMEM;
+
+		if (dev->phydev && !ops->get_ethtool_phy_stats) {
+			ret = phy_ethtool_get_stats(dev->phydev, &stats, data);
+			if (ret < 0)
+				goto out;
+		} else {
+			ops->get_ethtool_phy_stats(dev, &stats, data);
+		}
 	} else {
-		ops->get_ethtool_phy_stats(dev, &stats, data);
+		data = NULL;
 	}
 
 	ret = -EFAULT;
@@ -2102,11 +2053,10 @@
 
 	if (copy_from_user(&efl, useraddr, sizeof(efl)))
 		return -EFAULT;
+	efl.data[ETHTOOL_FLASH_MAX_FILENAME - 1] = 0;
 
 	if (!dev->ethtool_ops->flash_device)
-		return -EOPNOTSUPP;
-
-	efl.data[ETHTOOL_FLASH_MAX_FILENAME - 1] = 0;
+		return devlink_compat_flash_update(dev, efl.data);
 
 	return dev->ethtool_ops->flash_device(dev, &efl);
 }
@@ -2386,9 +2336,10 @@
 	return ret;
 }
 
-static int ethtool_get_per_queue_coalesce(struct net_device *dev,
-					  void __user *useraddr,
-					  struct ethtool_per_queue_op *per_queue_opt)
+static noinline_for_stack int
+ethtool_get_per_queue_coalesce(struct net_device *dev,
+			       void __user *useraddr,
+			       struct ethtool_per_queue_op *per_queue_opt)
 {
 	u32 bit;
 	int ret;
@@ -2416,9 +2367,10 @@
 	return 0;
 }
 
-static int ethtool_set_per_queue_coalesce(struct net_device *dev,
-					  void __user *useraddr,
-					  struct ethtool_per_queue_op *per_queue_opt)
+static noinline_for_stack int
+ethtool_set_per_queue_coalesce(struct net_device *dev,
+			       void __user *useraddr,
+			       struct ethtool_per_queue_op *per_queue_opt)
 {
 	u32 bit;
 	int i, ret = 0;
@@ -2472,7 +2424,7 @@
 	return ret;
 }
 
-static int ethtool_set_per_queue(struct net_device *dev,
+static int noinline_for_stack ethtool_set_per_queue(struct net_device *dev,
 				 void __user *useraddr, u32 sub_cmd)
 {
 	struct ethtool_per_queue_op per_queue_opt;
@@ -2497,10 +2449,16 @@
 {
 	switch (tuna->id) {
 	case ETHTOOL_PHY_DOWNSHIFT:
+	case ETHTOOL_PHY_FAST_LINK_DOWN:
 		if (tuna->len != sizeof(u8) ||
 		    tuna->type_id != ETHTOOL_TUNABLE_U8)
 			return -EINVAL;
 		break;
+	case ETHTOOL_PHY_EDPD:
+		if (tuna->len != sizeof(u16) ||
+		    tuna->type_id != ETHTOOL_TUNABLE_U16)
+			return -EINVAL;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -2570,7 +2528,7 @@
 
 static int ethtool_get_fecparam(struct net_device *dev, void __user *useraddr)
 {
-	struct ethtool_fecparam fecparam = { ETHTOOL_GFECPARAM };
+	struct ethtool_fecparam fecparam = { .cmd = ETHTOOL_GFECPARAM };
 	int rc;
 
 	if (!dev->ethtool_ops->get_fecparam)
@@ -2885,3 +2843,274 @@
 
 	return rc;
 }
+
+struct ethtool_rx_flow_key {
+	struct flow_dissector_key_basic			basic;
+	union {
+		struct flow_dissector_key_ipv4_addrs	ipv4;
+		struct flow_dissector_key_ipv6_addrs	ipv6;
+	};
+	struct flow_dissector_key_ports			tp;
+	struct flow_dissector_key_ip			ip;
+	struct flow_dissector_key_vlan			vlan;
+	struct flow_dissector_key_eth_addrs		eth_addrs;
+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
+
+struct ethtool_rx_flow_match {
+	struct flow_dissector		dissector;
+	struct ethtool_rx_flow_key	key;
+	struct ethtool_rx_flow_key	mask;
+};
+
+struct ethtool_rx_flow_rule *
+ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
+{
+	const struct ethtool_rx_flow_spec *fs = input->fs;
+	static struct in6_addr zero_addr = {};
+	struct ethtool_rx_flow_match *match;
+	struct ethtool_rx_flow_rule *flow;
+	struct flow_action_entry *act;
+
+	flow = kzalloc(sizeof(struct ethtool_rx_flow_rule) +
+		       sizeof(struct ethtool_rx_flow_match), GFP_KERNEL);
+	if (!flow)
+		return ERR_PTR(-ENOMEM);
+
+	/* ethtool_rx supports only one single action per rule. */
+	flow->rule = flow_rule_alloc(1);
+	if (!flow->rule) {
+		kfree(flow);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	match = (struct ethtool_rx_flow_match *)flow->priv;
+	flow->rule->match.dissector	= &match->dissector;
+	flow->rule->match.mask		= &match->mask;
+	flow->rule->match.key		= &match->key;
+
+	match->mask.basic.n_proto = htons(0xffff);
+
+	switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) {
+	case ETHER_FLOW: {
+		const struct ethhdr *ether_spec, *ether_m_spec;
+
+		ether_spec = &fs->h_u.ether_spec;
+		ether_m_spec = &fs->m_u.ether_spec;
+
+		if (!is_zero_ether_addr(ether_m_spec->h_source)) {
+			ether_addr_copy(match->key.eth_addrs.src,
+					ether_spec->h_source);
+			ether_addr_copy(match->mask.eth_addrs.src,
+					ether_m_spec->h_source);
+		}
+		if (!is_zero_ether_addr(ether_m_spec->h_dest)) {
+			ether_addr_copy(match->key.eth_addrs.dst,
+					ether_spec->h_dest);
+			ether_addr_copy(match->mask.eth_addrs.dst,
+					ether_m_spec->h_dest);
+		}
+		if (ether_m_spec->h_proto) {
+			match->key.basic.n_proto = ether_spec->h_proto;
+			match->mask.basic.n_proto = ether_m_spec->h_proto;
+		}
+		}
+		break;
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW: {
+		const struct ethtool_tcpip4_spec *v4_spec, *v4_m_spec;
+
+		match->key.basic.n_proto = htons(ETH_P_IP);
+
+		v4_spec = &fs->h_u.tcp_ip4_spec;
+		v4_m_spec = &fs->m_u.tcp_ip4_spec;
+
+		if (v4_m_spec->ip4src) {
+			match->key.ipv4.src = v4_spec->ip4src;
+			match->mask.ipv4.src = v4_m_spec->ip4src;
+		}
+		if (v4_m_spec->ip4dst) {
+			match->key.ipv4.dst = v4_spec->ip4dst;
+			match->mask.ipv4.dst = v4_m_spec->ip4dst;
+		}
+		if (v4_m_spec->ip4src ||
+		    v4_m_spec->ip4dst) {
+			match->dissector.used_keys |=
+				BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS);
+			match->dissector.offset[FLOW_DISSECTOR_KEY_IPV4_ADDRS] =
+				offsetof(struct ethtool_rx_flow_key, ipv4);
+		}
+		if (v4_m_spec->psrc) {
+			match->key.tp.src = v4_spec->psrc;
+			match->mask.tp.src = v4_m_spec->psrc;
+		}
+		if (v4_m_spec->pdst) {
+			match->key.tp.dst = v4_spec->pdst;
+			match->mask.tp.dst = v4_m_spec->pdst;
+		}
+		if (v4_m_spec->psrc ||
+		    v4_m_spec->pdst) {
+			match->dissector.used_keys |=
+				BIT(FLOW_DISSECTOR_KEY_PORTS);
+			match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] =
+				offsetof(struct ethtool_rx_flow_key, tp);
+		}
+		if (v4_m_spec->tos) {
+			match->key.ip.tos = v4_spec->tos;
+			match->mask.ip.tos = v4_m_spec->tos;
+			match->dissector.used_keys |=
+				BIT(FLOW_DISSECTOR_KEY_IP);
+			match->dissector.offset[FLOW_DISSECTOR_KEY_IP] =
+				offsetof(struct ethtool_rx_flow_key, ip);
+		}
+		}
+		break;
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW: {
+		const struct ethtool_tcpip6_spec *v6_spec, *v6_m_spec;
+
+		match->key.basic.n_proto = htons(ETH_P_IPV6);
+
+		v6_spec = &fs->h_u.tcp_ip6_spec;
+		v6_m_spec = &fs->m_u.tcp_ip6_spec;
+		if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) {
+			memcpy(&match->key.ipv6.src, v6_spec->ip6src,
+			       sizeof(match->key.ipv6.src));
+			memcpy(&match->mask.ipv6.src, v6_m_spec->ip6src,
+			       sizeof(match->mask.ipv6.src));
+		}
+		if (memcmp(v6_m_spec->ip6dst, &zero_addr, sizeof(zero_addr))) {
+			memcpy(&match->key.ipv6.dst, v6_spec->ip6dst,
+			       sizeof(match->key.ipv6.dst));
+			memcpy(&match->mask.ipv6.dst, v6_m_spec->ip6dst,
+			       sizeof(match->mask.ipv6.dst));
+		}
+		if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr)) ||
+		    memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) {
+			match->dissector.used_keys |=
+				BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS);
+			match->dissector.offset[FLOW_DISSECTOR_KEY_IPV6_ADDRS] =
+				offsetof(struct ethtool_rx_flow_key, ipv6);
+		}
+		if (v6_m_spec->psrc) {
+			match->key.tp.src = v6_spec->psrc;
+			match->mask.tp.src = v6_m_spec->psrc;
+		}
+		if (v6_m_spec->pdst) {
+			match->key.tp.dst = v6_spec->pdst;
+			match->mask.tp.dst = v6_m_spec->pdst;
+		}
+		if (v6_m_spec->psrc ||
+		    v6_m_spec->pdst) {
+			match->dissector.used_keys |=
+				BIT(FLOW_DISSECTOR_KEY_PORTS);
+			match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] =
+				offsetof(struct ethtool_rx_flow_key, tp);
+		}
+		if (v6_m_spec->tclass) {
+			match->key.ip.tos = v6_spec->tclass;
+			match->mask.ip.tos = v6_m_spec->tclass;
+			match->dissector.used_keys |=
+				BIT(FLOW_DISSECTOR_KEY_IP);
+			match->dissector.offset[FLOW_DISSECTOR_KEY_IP] =
+				offsetof(struct ethtool_rx_flow_key, ip);
+		}
+		}
+		break;
+	default:
+		ethtool_rx_flow_rule_destroy(flow);
+		return ERR_PTR(-EINVAL);
+	}
+
+	switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) {
+	case TCP_V4_FLOW:
+	case TCP_V6_FLOW:
+		match->key.basic.ip_proto = IPPROTO_TCP;
+		break;
+	case UDP_V4_FLOW:
+	case UDP_V6_FLOW:
+		match->key.basic.ip_proto = IPPROTO_UDP;
+		break;
+	}
+	match->mask.basic.ip_proto = 0xff;
+
+	match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_BASIC);
+	match->dissector.offset[FLOW_DISSECTOR_KEY_BASIC] =
+		offsetof(struct ethtool_rx_flow_key, basic);
+
+	if (fs->flow_type & FLOW_EXT) {
+		const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext;
+		const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext;
+
+		if (ext_m_spec->vlan_etype) {
+			match->key.vlan.vlan_tpid = ext_h_spec->vlan_etype;
+			match->mask.vlan.vlan_tpid = ext_m_spec->vlan_etype;
+		}
+
+		if (ext_m_spec->vlan_tci) {
+			match->key.vlan.vlan_id =
+				ntohs(ext_h_spec->vlan_tci) & 0x0fff;
+			match->mask.vlan.vlan_id =
+				ntohs(ext_m_spec->vlan_tci) & 0x0fff;
+
+			match->key.vlan.vlan_dei =
+				!!(ext_h_spec->vlan_tci & htons(0x1000));
+			match->mask.vlan.vlan_dei =
+				!!(ext_m_spec->vlan_tci & htons(0x1000));
+
+			match->key.vlan.vlan_priority =
+				(ntohs(ext_h_spec->vlan_tci) & 0xe000) >> 13;
+			match->mask.vlan.vlan_priority =
+				(ntohs(ext_m_spec->vlan_tci) & 0xe000) >> 13;
+		}
+
+		if (ext_m_spec->vlan_etype ||
+		    ext_m_spec->vlan_tci) {
+			match->dissector.used_keys |=
+				BIT(FLOW_DISSECTOR_KEY_VLAN);
+			match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] =
+				offsetof(struct ethtool_rx_flow_key, vlan);
+		}
+	}
+	if (fs->flow_type & FLOW_MAC_EXT) {
+		const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext;
+		const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext;
+
+		memcpy(match->key.eth_addrs.dst, ext_h_spec->h_dest,
+		       ETH_ALEN);
+		memcpy(match->mask.eth_addrs.dst, ext_m_spec->h_dest,
+		       ETH_ALEN);
+
+		match->dissector.used_keys |=
+			BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS);
+		match->dissector.offset[FLOW_DISSECTOR_KEY_ETH_ADDRS] =
+			offsetof(struct ethtool_rx_flow_key, eth_addrs);
+	}
+
+	act = &flow->rule->action.entries[0];
+	switch (fs->ring_cookie) {
+	case RX_CLS_FLOW_DISC:
+		act->id = FLOW_ACTION_DROP;
+		break;
+	case RX_CLS_FLOW_WAKE:
+		act->id = FLOW_ACTION_WAKE;
+		break;
+	default:
+		act->id = FLOW_ACTION_QUEUE;
+		if (fs->flow_type & FLOW_RSS)
+			act->queue.ctx = input->rss_ctx;
+
+		act->queue.vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie);
+		act->queue.index = ethtool_get_flow_spec_ring(fs->ring_cookie);
+		break;
+	}
+
+	return flow;
+}
+EXPORT_SYMBOL(ethtool_rx_flow_rule_create);
+
+void ethtool_rx_flow_rule_destroy(struct ethtool_rx_flow_rule *flow)
+{
+	kfree(flow->rule);
+	kfree(flow);
+}
+EXPORT_SYMBOL(ethtool_rx_flow_rule_destroy);

diff --git a/net/core/failover.c b/net/core/failover.c
index 4a92a98..b5cd3c7 100644
--- a/net/core/failover.c
+++ b/net/core/failover.c

@@ -80,14 +80,14 @@
 		goto err_upper_link;
 	}
 
-	slave_dev->priv_flags |= IFF_FAILOVER_SLAVE;
+	slave_dev->priv_flags |= (IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK);
 
 	if (fops && fops->slave_register &&
 	    !fops->slave_register(slave_dev, failover_dev))
 		return NOTIFY_OK;
 
 	netdev_upper_dev_unlink(slave_dev, failover_dev);
-	slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
+	slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK);
 err_upper_link:
 	netdev_rx_handler_unregister(slave_dev);
 done:
@@ -121,7 +121,7 @@
 
 	netdev_rx_handler_unregister(slave_dev);
 	netdev_upper_dev_unlink(slave_dev, failover_dev);
-	slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
+	slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK);
 
 	if (fops && fops->slave_unregister &&
 	    !fops->slave_unregister(slave_dev, failover_dev))

diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
index 13a40b8..470a606 100644
--- a/net/core/fib_notifier.c
+++ b/net/core/fib_notifier.c

@@ -5,8 +5,15 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <net/net_namespace.h>
+#include <net/netns/generic.h>
 #include <net/fib_notifier.h>
 
+static unsigned int fib_notifier_net_id;
+
+struct fib_notifier_net {
+	struct list_head fib_notifier_ops;
+};
+
 static ATOMIC_NOTIFIER_HEAD(fib_chain);
 
 int call_fib_notifier(struct notifier_block *nb, struct net *net,
@@ -34,6 +41,7 @@
 
 static unsigned int fib_seq_sum(void)
 {
+	struct fib_notifier_net *fn_net;
 	struct fib_notifier_ops *ops;
 	unsigned int fib_seq = 0;
 	struct net *net;
@@ -41,8 +49,9 @@
 	rtnl_lock();
 	down_read(&net_rwsem);
 	for_each_net(net) {
+		fn_net = net_generic(net, fib_notifier_net_id);
 		rcu_read_lock();
-		list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
+		list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) {
 			if (!try_module_get(ops->owner))
 				continue;
 			fib_seq += ops->fib_seq_read(net);
@@ -58,9 +67,10 @@
 
 static int fib_net_dump(struct net *net, struct notifier_block *nb)
 {
+	struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
 	struct fib_notifier_ops *ops;
 
-	list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
+	list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) {
 		int err;
 
 		if (!try_module_get(ops->owner))
@@ -127,12 +137,13 @@
 static int __fib_notifier_ops_register(struct fib_notifier_ops *ops,
 				       struct net *net)
 {
+	struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
 	struct fib_notifier_ops *o;
 
-	list_for_each_entry(o, &net->fib_notifier_ops, list)
+	list_for_each_entry(o, &fn_net->fib_notifier_ops, list)
 		if (ops->family == o->family)
 			return -EEXIST;
-	list_add_tail_rcu(&ops->list, &net->fib_notifier_ops);
+	list_add_tail_rcu(&ops->list, &fn_net->fib_notifier_ops);
 	return 0;
 }
 
@@ -167,18 +178,24 @@
 
 static int __net_init fib_notifier_net_init(struct net *net)
 {
-	INIT_LIST_HEAD(&net->fib_notifier_ops);
+	struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
+
+	INIT_LIST_HEAD(&fn_net->fib_notifier_ops);
 	return 0;
 }
 
 static void __net_exit fib_notifier_net_exit(struct net *net)
 {
-	WARN_ON_ONCE(!list_empty(&net->fib_notifier_ops));
+	struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
+
+	WARN_ON_ONCE(!list_empty(&fn_net->fib_notifier_ops));
 }
 
 static struct pernet_operations fib_notifier_net_ops = {
 	.init = fib_notifier_net_init,
 	.exit = fib_notifier_net_exit,
+	.id = &fib_notifier_net_id,
+	.size = sizeof(struct fib_notifier_net),
 };
 
 static int __init fib_notifier_init(void)

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 0ff3953..dd220ce 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * net/core/fib_rules.c		Generic Routing Rules
  *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License as
- *	published by the Free Software Foundation, version 2.
- *
  * Authors:	Thomas Graf <tgraf@suug.ch>
  */
 
@@ -746,7 +743,8 @@
 		goto errout;
 	}
 
-	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*frh), tb, FRA_MAX,
+				     ops->policy, extack);
 	if (err < 0) {
 		NL_SET_ERR_MSG(extack, "Error parsing msg");
 		goto errout;
@@ -853,7 +851,8 @@
 		goto errout;
 	}
 
-	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*frh), tb, FRA_MAX,
+				     ops->policy, extack);
 	if (err < 0) {
 		NL_SET_ERR_MSG(extack, "Error parsing msg");
 		goto errout;
@@ -1063,13 +1062,47 @@
 	return err;
 }
 
+static int fib_valid_dumprule_req(const struct nlmsghdr *nlh,
+				   struct netlink_ext_ack *extack)
+{
+	struct fib_rule_hdr *frh;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
+		NL_SET_ERR_MSG(extack, "Invalid header for fib rule dump request");
+		return -EINVAL;
+	}
+
+	frh = nlmsg_data(nlh);
+	if (frh->dst_len || frh->src_len || frh->tos || frh->table ||
+	    frh->res1 || frh->res2 || frh->action || frh->flags) {
+		NL_SET_ERR_MSG(extack,
+			       "Invalid values in header for fib rule dump request");
+		return -EINVAL;
+	}
+
+	if (nlmsg_attrlen(nlh, sizeof(*frh))) {
+		NL_SET_ERR_MSG(extack, "Invalid data after header in fib rule dump request");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	const struct nlmsghdr *nlh = cb->nlh;
 	struct net *net = sock_net(skb->sk);
 	struct fib_rules_ops *ops;
 	int idx = 0, family;
 
-	family = rtnl_msg_family(cb->nlh);
+	if (cb->strict_check) {
+		int err = fib_valid_dumprule_req(nlh, cb->extack);
+
+		if (err < 0)
+			return err;
+	}
+
+	family = rtnl_msg_family(nlh);
 	if (family != AF_UNSPEC) {
 		/* Protocol specific dump request */
 		ops = lookup_rules_ops(net, family);

diff --git a/net/core/filter.c b/net/core/filter.c
index 5e00f2b..6d0111b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Linux Socket Filter - Kernel level socket filtering
  *
@@ -12,11 +13,6 @@
  *	Alexei Starovoitov <ast@plumgrid.com>
  *	Daniel Borkmann <dborkman@redhat.com>
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
  * Andi Kleen - Fix a few bad bugs and races.
  * Kris Katterjohn - Added many additional checks in bpf_check_classic()
  */
@@ -38,6 +34,7 @@
 #include <net/protocol.h>
 #include <net/netlink.h>
 #include <linux/skbuff.h>
+#include <linux/skmsg.h>
 #include <net/sock.h>
 #include <net/flow_dissector.h>
 #include <linux/errno.h>
@@ -58,16 +55,24 @@
 #include <net/busy_poll.h>
 #include <net/tcp.h>
 #include <net/xfrm.h>
+#include <net/udp.h>
 #include <linux/bpf_trace.h>
 #include <net/xdp_sock.h>
 #include <linux/inetdevice.h>
+#include <net/inet_hashtables.h>
+#include <net/inet6_hashtables.h>
 #include <net/ip_fib.h>
+#include <net/nexthop.h>
 #include <net/flow.h>
 #include <net/arp.h>
 #include <net/ipv6.h>
+#include <net/net_namespace.h>
 #include <linux/seg6_local.h>
 #include <net/seg6.h>
 #include <net/seg6_local.h>
+#include <net/lwtunnel.h>
+#include <net/ipv6_stubs.h>
+#include <net/bpf_sk_storage.h>
 
 /**
  *	sk_filter_trim_cap - run a packet through a socket filter
@@ -291,22 +296,18 @@
 		break;
 
 	case SKF_AD_VLAN_TAG:
-	case SKF_AD_VLAN_TAG_PRESENT:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
-		BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
 
 		/* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
 		*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
 				      offsetof(struct sk_buff, vlan_tci));
-		if (skb_field == SKF_AD_VLAN_TAG) {
-			*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg,
-						~VLAN_TAG_PRESENT);
-		} else {
-			/* dst_reg >>= 12 */
-			*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 12);
-			/* dst_reg &= 1 */
+		break;
+	case SKF_AD_VLAN_TAG_PRESENT:
+		*insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_VLAN_PRESENT_OFFSET());
+		if (PKT_VLAN_PRESENT_BIT)
+			*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, PKT_VLAN_PRESENT_BIT);
+		if (PKT_VLAN_PRESENT_BIT < 7)
 			*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1);
-		}
 		break;
 	}
 
@@ -462,7 +463,8 @@
 		bool ldx_off_ok = offset <= S16_MAX;
 
 		*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);
-		*insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
+		if (offset)
+			*insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
 		*insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP,
 				      size, 2 + endian + (!ldx_off_ok * 2));
 		if (ldx_off_ok) {
@@ -1726,6 +1728,40 @@
 	.arg4_type	= ARG_CONST_SIZE,
 };
 
+BPF_CALL_4(bpf_flow_dissector_load_bytes,
+	   const struct bpf_flow_dissector *, ctx, u32, offset,
+	   void *, to, u32, len)
+{
+	void *ptr;
+
+	if (unlikely(offset > 0xffff))
+		goto err_clear;
+
+	if (unlikely(!ctx->skb))
+		goto err_clear;
+
+	ptr = skb_header_pointer(ctx->skb, offset, len, to);
+	if (unlikely(!ptr))
+		goto err_clear;
+	if (ptr != to)
+		memcpy(to, ptr, len);
+
+	return 0;
+err_clear:
+	memset(to, 0, len);
+	return -EFAULT;
+}
+
+static const struct bpf_func_proto bpf_flow_dissector_load_bytes_proto = {
+	.func		= bpf_flow_dissector_load_bytes,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg4_type	= ARG_CONST_SIZE,
+};
+
 BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
 	   u32, offset, void *, to, u32, len, u32, start_header)
 {
@@ -1791,6 +1827,18 @@
 	.arg2_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
+{
+	return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
+}
+
+static const struct bpf_func_proto bpf_sk_fullsock_proto = {
+	.func		= bpf_sk_fullsock,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
+	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
+};
+
 static inline int sk_skb_try_make_writable(struct sk_buff *skb,
 					   unsigned int write_len)
 {
@@ -2000,7 +2048,7 @@
 {
 	int ret;
 
-	if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) {
+	if (dev_xmit_recursion()) {
 		net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
 		kfree_skb(skb);
 		return -ENETDOWN;
@@ -2008,9 +2056,9 @@
 
 	skb->dev = dev;
 
-	__this_cpu_inc(xmit_recursion);
+	dev_xmit_recursion_inc();
 	ret = dev_queue_xmit(skb);
-	__this_cpu_dec(xmit_recursion);
+	dev_xmit_recursion_dec();
 
 	return ret;
 }
@@ -2018,18 +2066,19 @@
 static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
 				 u32 flags)
 {
-	/* skb->mac_len is not set on normal egress */
-	unsigned int mlen = skb->network_header - skb->mac_header;
+	unsigned int mlen = skb_network_offset(skb);
 
-	__skb_pull(skb, mlen);
+	if (mlen) {
+		__skb_pull(skb, mlen);
 
-	/* At ingress, the mac header has already been pulled once.
-	 * At egress, skb_pospull_rcsum has to be done in case that
-	 * the skb is originated from ingress (i.e. a forwarded skb)
-	 * to ensure that rcsum starts at net header.
-	 */
-	if (!skb_at_tc_ingress(skb))
-		skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
+		/* At ingress, the mac header has already been pulled once.
+		 * At egress, skb_pospull_rcsum has to be done in case that
+		 * the skb is originated from ingress (i.e. a forwarded skb)
+		 * to ensure that rcsum starts at net header.
+		 */
+		if (!skb_at_tc_ingress(skb))
+			skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
+	}
 	skb_pop_mac_header(skb);
 	skb_reset_mac_len(skb);
 	return flags & BPF_F_INGRESS ?
@@ -2109,8 +2158,8 @@
 	if (unlikely(flags & ~(BPF_F_INGRESS)))
 		return TC_ACT_SHOT;
 
-	ri->ifindex = ifindex;
 	ri->flags = flags;
+	ri->tgt_index = ifindex;
 
 	return TC_ACT_REDIRECT;
 }
@@ -2120,8 +2169,8 @@
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 	struct net_device *dev;
 
-	dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
-	ri->ifindex = 0;
+	dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->tgt_index);
+	ri->tgt_index = 0;
 	if (unlikely(!dev)) {
 		kfree_skb(skb);
 		return -EINVAL;
@@ -2138,123 +2187,7 @@
 	.arg2_type      = ARG_ANYTHING,
 };
 
-BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb,
-	   struct bpf_map *, map, void *, key, u64, flags)
-{
-	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
-
-	/* If user passes invalid input drop the packet. */
-	if (unlikely(flags & ~(BPF_F_INGRESS)))
-		return SK_DROP;
-
-	tcb->bpf.flags = flags;
-	tcb->bpf.sk_redir = __sock_hash_lookup_elem(map, key);
-	if (!tcb->bpf.sk_redir)
-		return SK_DROP;
-
-	return SK_PASS;
-}
-
-static const struct bpf_func_proto bpf_sk_redirect_hash_proto = {
-	.func           = bpf_sk_redirect_hash,
-	.gpl_only       = false,
-	.ret_type       = RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type      = ARG_CONST_MAP_PTR,
-	.arg3_type      = ARG_PTR_TO_MAP_KEY,
-	.arg4_type      = ARG_ANYTHING,
-};
-
-BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
-	   struct bpf_map *, map, u32, key, u64, flags)
-{
-	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
-
-	/* If user passes invalid input drop the packet. */
-	if (unlikely(flags & ~(BPF_F_INGRESS)))
-		return SK_DROP;
-
-	tcb->bpf.flags = flags;
-	tcb->bpf.sk_redir = __sock_map_lookup_elem(map, key);
-	if (!tcb->bpf.sk_redir)
-		return SK_DROP;
-
-	return SK_PASS;
-}
-
-struct sock *do_sk_redirect_map(struct sk_buff *skb)
-{
-	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
-
-	return tcb->bpf.sk_redir;
-}
-
-static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
-	.func           = bpf_sk_redirect_map,
-	.gpl_only       = false,
-	.ret_type       = RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type      = ARG_CONST_MAP_PTR,
-	.arg3_type      = ARG_ANYTHING,
-	.arg4_type      = ARG_ANYTHING,
-};
-
-BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg_buff *, msg,
-	   struct bpf_map *, map, void *, key, u64, flags)
-{
-	/* If user passes invalid input drop the packet. */
-	if (unlikely(flags & ~(BPF_F_INGRESS)))
-		return SK_DROP;
-
-	msg->flags = flags;
-	msg->sk_redir = __sock_hash_lookup_elem(map, key);
-	if (!msg->sk_redir)
-		return SK_DROP;
-
-	return SK_PASS;
-}
-
-static const struct bpf_func_proto bpf_msg_redirect_hash_proto = {
-	.func           = bpf_msg_redirect_hash,
-	.gpl_only       = false,
-	.ret_type       = RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type      = ARG_CONST_MAP_PTR,
-	.arg3_type      = ARG_PTR_TO_MAP_KEY,
-	.arg4_type      = ARG_ANYTHING,
-};
-
-BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg,
-	   struct bpf_map *, map, u32, key, u64, flags)
-{
-	/* If user passes invalid input drop the packet. */
-	if (unlikely(flags & ~(BPF_F_INGRESS)))
-		return SK_DROP;
-
-	msg->flags = flags;
-	msg->sk_redir = __sock_map_lookup_elem(map, key);
-	if (!msg->sk_redir)
-		return SK_DROP;
-
-	return SK_PASS;
-}
-
-struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
-{
-	return msg->sk_redir;
-}
-
-static const struct bpf_func_proto bpf_msg_redirect_map_proto = {
-	.func           = bpf_msg_redirect_map,
-	.gpl_only       = false,
-	.ret_type       = RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type      = ARG_CONST_MAP_PTR,
-	.arg3_type      = ARG_ANYTHING,
-	.arg4_type      = ARG_ANYTHING,
-};
-
-BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg_buff *, msg, u32, bytes)
+BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
 {
 	msg->apply_bytes = bytes;
 	return 0;
@@ -2268,7 +2201,7 @@
 	.arg2_type      = ARG_ANYTHING,
 };
 
-BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg_buff *, msg, u32, bytes)
+BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes)
 {
 	msg->cork_bytes = bytes;
 	return 0;
@@ -2282,45 +2215,37 @@
 	.arg2_type      = ARG_ANYTHING,
 };
 
-#define sk_msg_iter_var(var)			\
-	do {					\
-		var++;				\
-		if (var == MAX_SKB_FRAGS)	\
-			var = 0;		\
-	} while (0)
-
-BPF_CALL_4(bpf_msg_pull_data,
-	   struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
+BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
+	   u32, end, u64, flags)
 {
-	unsigned int len = 0, offset = 0, copy = 0, poffset = 0;
-	int bytes = end - start, bytes_sg_total;
-	struct scatterlist *sg = msg->sg_data;
-	int first_sg, last_sg, i, shift;
-	unsigned char *p, *to, *from;
+	u32 len = 0, offset = 0, copy = 0, poffset = 0, bytes = end - start;
+	u32 first_sge, last_sge, i, shift, bytes_sg_total;
+	struct scatterlist *sge;
+	u8 *raw, *to, *from;
 	struct page *page;
 
 	if (unlikely(flags || end <= start))
 		return -EINVAL;
 
 	/* First find the starting scatterlist element */
-	i = msg->sg_start;
+	i = msg->sg.start;
 	do {
-		len = sg[i].length;
+		len = sk_msg_elem(msg, i)->length;
 		if (start < offset + len)
 			break;
 		offset += len;
-		sk_msg_iter_var(i);
-	} while (i != msg->sg_end);
+		sk_msg_iter_var_next(i);
+	} while (i != msg->sg.end);
 
 	if (unlikely(start >= offset + len))
 		return -EINVAL;
 
-	first_sg = i;
+	first_sge = i;
 	/* The start may point into the sg element so we need to also
 	 * account for the headroom.
 	 */
 	bytes_sg_total = start - offset + bytes;
-	if (!msg->sg_copy[i] && bytes_sg_total <= len)
+	if (!msg->sg.copy[i] && bytes_sg_total <= len)
 		goto out;
 
 	/* At this point we need to linearize multiple scatterlist
@@ -2334,12 +2259,12 @@
 	 * will copy the entire sg entry.
 	 */
 	do {
-		copy += sg[i].length;
-		sk_msg_iter_var(i);
+		copy += sk_msg_elem(msg, i)->length;
+		sk_msg_iter_var_next(i);
 		if (bytes_sg_total <= copy)
 			break;
-	} while (i != msg->sg_end);
-	last_sg = i;
+	} while (i != msg->sg.end);
+	last_sge = i;
 
 	if (unlikely(bytes_sg_total > copy))
 		return -EINVAL;
@@ -2348,63 +2273,61 @@
 			   get_order(copy));
 	if (unlikely(!page))
 		return -ENOMEM;
-	p = page_address(page);
 
-	i = first_sg;
+	raw = page_address(page);
+	i = first_sge;
 	do {
-		from = sg_virt(&sg[i]);
-		len = sg[i].length;
-		to = p + poffset;
+		sge = sk_msg_elem(msg, i);
+		from = sg_virt(sge);
+		len = sge->length;
+		to = raw + poffset;
 
 		memcpy(to, from, len);
 		poffset += len;
-		sg[i].length = 0;
-		put_page(sg_page(&sg[i]));
+		sge->length = 0;
+		put_page(sg_page(sge));
 
-		sk_msg_iter_var(i);
-	} while (i != last_sg);
+		sk_msg_iter_var_next(i);
+	} while (i != last_sge);
 
-	sg[first_sg].length = copy;
-	sg_set_page(&sg[first_sg], page, copy, 0);
+	sg_set_page(&msg->sg.data[first_sge], page, copy, 0);
 
 	/* To repair sg ring we need to shift entries. If we only
 	 * had a single entry though we can just replace it and
 	 * be done. Otherwise walk the ring and shift the entries.
 	 */
-	WARN_ON_ONCE(last_sg == first_sg);
-	shift = last_sg > first_sg ?
-		last_sg - first_sg - 1 :
-		MAX_SKB_FRAGS - first_sg + last_sg - 1;
+	WARN_ON_ONCE(last_sge == first_sge);
+	shift = last_sge > first_sge ?
+		last_sge - first_sge - 1 :
+		NR_MSG_FRAG_IDS - first_sge + last_sge - 1;
 	if (!shift)
 		goto out;
 
-	i = first_sg;
-	sk_msg_iter_var(i);
+	i = first_sge;
+	sk_msg_iter_var_next(i);
 	do {
-		int move_from;
+		u32 move_from;
 
-		if (i + shift >= MAX_SKB_FRAGS)
-			move_from = i + shift - MAX_SKB_FRAGS;
+		if (i + shift >= NR_MSG_FRAG_IDS)
+			move_from = i + shift - NR_MSG_FRAG_IDS;
 		else
 			move_from = i + shift;
-
-		if (move_from == msg->sg_end)
+		if (move_from == msg->sg.end)
 			break;
 
-		sg[i] = sg[move_from];
-		sg[move_from].length = 0;
-		sg[move_from].page_link = 0;
-		sg[move_from].offset = 0;
-
-		sk_msg_iter_var(i);
+		msg->sg.data[i] = msg->sg.data[move_from];
+		msg->sg.data[move_from].length = 0;
+		msg->sg.data[move_from].page_link = 0;
+		msg->sg.data[move_from].offset = 0;
+		sk_msg_iter_var_next(i);
 	} while (1);
-	msg->sg_end -= shift;
-	if (msg->sg_end < 0)
-		msg->sg_end += MAX_SKB_FRAGS;
-out:
-	msg->data = sg_virt(&sg[first_sg]) + start - offset;
-	msg->data_end = msg->data + bytes;
 
+	msg->sg.end = msg->sg.end - shift > msg->sg.end ?
+		      msg->sg.end - shift + NR_MSG_FRAG_IDS :
+		      msg->sg.end - shift;
+out:
+	msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset;
+	msg->data_end = msg->data + bytes;
 	return 0;
 }
 
@@ -2418,6 +2341,305 @@
 	.arg4_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
+	   u32, len, u64, flags)
+{
+	struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge;
+	u32 new, i = 0, l, space, copy = 0, offset = 0;
+	u8 *raw, *to, *from;
+	struct page *page;
+
+	if (unlikely(flags))
+		return -EINVAL;
+
+	/* First find the starting scatterlist element */
+	i = msg->sg.start;
+	do {
+		l = sk_msg_elem(msg, i)->length;
+
+		if (start < offset + l)
+			break;
+		offset += l;
+		sk_msg_iter_var_next(i);
+	} while (i != msg->sg.end);
+
+	if (start >= offset + l)
+		return -EINVAL;
+
+	space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
+
+	/* If no space available will fallback to copy, we need at
+	 * least one scatterlist elem available to push data into
+	 * when start aligns to the beginning of an element or two
+	 * when it falls inside an element. We handle the start equals
+	 * offset case because its the common case for inserting a
+	 * header.
+	 */
+	if (!space || (space == 1 && start != offset))
+		copy = msg->sg.data[i].length;
+
+	page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
+			   get_order(copy + len));
+	if (unlikely(!page))
+		return -ENOMEM;
+
+	if (copy) {
+		int front, back;
+
+		raw = page_address(page);
+
+		psge = sk_msg_elem(msg, i);
+		front = start - offset;
+		back = psge->length - front;
+		from = sg_virt(psge);
+
+		if (front)
+			memcpy(raw, from, front);
+
+		if (back) {
+			from += front;
+			to = raw + front + len;
+
+			memcpy(to, from, back);
+		}
+
+		put_page(sg_page(psge));
+	} else if (start - offset) {
+		psge = sk_msg_elem(msg, i);
+		rsge = sk_msg_elem_cpy(msg, i);
+
+		psge->length = start - offset;
+		rsge.length -= psge->length;
+		rsge.offset += start;
+
+		sk_msg_iter_var_next(i);
+		sg_unmark_end(psge);
+		sk_msg_iter_next(msg, end);
+	}
+
+	/* Slot(s) to place newly allocated data */
+	new = i;
+
+	/* Shift one or two slots as needed */
+	if (!copy) {
+		sge = sk_msg_elem_cpy(msg, i);
+
+		sk_msg_iter_var_next(i);
+		sg_unmark_end(&sge);
+		sk_msg_iter_next(msg, end);
+
+		nsge = sk_msg_elem_cpy(msg, i);
+		if (rsge.length) {
+			sk_msg_iter_var_next(i);
+			nnsge = sk_msg_elem_cpy(msg, i);
+		}
+
+		while (i != msg->sg.end) {
+			msg->sg.data[i] = sge;
+			sge = nsge;
+			sk_msg_iter_var_next(i);
+			if (rsge.length) {
+				nsge = nnsge;
+				nnsge = sk_msg_elem_cpy(msg, i);
+			} else {
+				nsge = sk_msg_elem_cpy(msg, i);
+			}
+		}
+	}
+
+	/* Place newly allocated data buffer */
+	sk_mem_charge(msg->sk, len);
+	msg->sg.size += len;
+	msg->sg.copy[new] = false;
+	sg_set_page(&msg->sg.data[new], page, len + copy, 0);
+	if (rsge.length) {
+		get_page(sg_page(&rsge));
+		sk_msg_iter_var_next(new);
+		msg->sg.data[new] = rsge;
+	}
+
+	sk_msg_compute_data_pointers(msg);
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_push_data_proto = {
+	.func		= bpf_msg_push_data,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_ANYTHING,
+};
+
+static void sk_msg_shift_left(struct sk_msg *msg, int i)
+{
+	int prev;
+
+	do {
+		prev = i;
+		sk_msg_iter_var_next(i);
+		msg->sg.data[prev] = msg->sg.data[i];
+	} while (i != msg->sg.end);
+
+	sk_msg_iter_prev(msg, end);
+}
+
+static void sk_msg_shift_right(struct sk_msg *msg, int i)
+{
+	struct scatterlist tmp, sge;
+
+	sk_msg_iter_next(msg, end);
+	sge = sk_msg_elem_cpy(msg, i);
+	sk_msg_iter_var_next(i);
+	tmp = sk_msg_elem_cpy(msg, i);
+
+	while (i != msg->sg.end) {
+		msg->sg.data[i] = sge;
+		sk_msg_iter_var_next(i);
+		sge = tmp;
+		tmp = sk_msg_elem_cpy(msg, i);
+	}
+}
+
+BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
+	   u32, len, u64, flags)
+{
+	u32 i = 0, l, space, offset = 0;
+	u64 last = start + len;
+	int pop;
+
+	if (unlikely(flags))
+		return -EINVAL;
+
+	/* First find the starting scatterlist element */
+	i = msg->sg.start;
+	do {
+		l = sk_msg_elem(msg, i)->length;
+
+		if (start < offset + l)
+			break;
+		offset += l;
+		sk_msg_iter_var_next(i);
+	} while (i != msg->sg.end);
+
+	/* Bounds checks: start and pop must be inside message */
+	if (start >= offset + l || last >= msg->sg.size)
+		return -EINVAL;
+
+	space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
+
+	pop = len;
+	/* --------------| offset
+	 * -| start      |-------- len -------|
+	 *
+	 *  |----- a ----|-------- pop -------|----- b ----|
+	 *  |______________________________________________| length
+	 *
+	 *
+	 * a:   region at front of scatter element to save
+	 * b:   region at back of scatter element to save when length > A + pop
+	 * pop: region to pop from element, same as input 'pop' here will be
+	 *      decremented below per iteration.
+	 *
+	 * Two top-level cases to handle when start != offset, first B is non
+	 * zero and second B is zero corresponding to when a pop includes more
+	 * than one element.
+	 *
+	 * Then if B is non-zero AND there is no space allocate space and
+	 * compact A, B regions into page. If there is space shift ring to
+	 * the rigth free'ing the next element in ring to place B, leaving
+	 * A untouched except to reduce length.
+	 */
+	if (start != offset) {
+		struct scatterlist *nsge, *sge = sk_msg_elem(msg, i);
+		int a = start;
+		int b = sge->length - pop - a;
+
+		sk_msg_iter_var_next(i);
+
+		if (pop < sge->length - a) {
+			if (space) {
+				sge->length = a;
+				sk_msg_shift_right(msg, i);
+				nsge = sk_msg_elem(msg, i);
+				get_page(sg_page(sge));
+				sg_set_page(nsge,
+					    sg_page(sge),
+					    b, sge->offset + pop + a);
+			} else {
+				struct page *page, *orig;
+				u8 *to, *from;
+
+				page = alloc_pages(__GFP_NOWARN |
+						   __GFP_COMP   | GFP_ATOMIC,
+						   get_order(a + b));
+				if (unlikely(!page))
+					return -ENOMEM;
+
+				sge->length = a;
+				orig = sg_page(sge);
+				from = sg_virt(sge);
+				to = page_address(page);
+				memcpy(to, from, a);
+				memcpy(to + a, from + a + pop, b);
+				sg_set_page(sge, page, a + b, 0);
+				put_page(orig);
+			}
+			pop = 0;
+		} else if (pop >= sge->length - a) {
+			sge->length = a;
+			pop -= (sge->length - a);
+		}
+	}
+
+	/* From above the current layout _must_ be as follows,
+	 *
+	 * -| offset
+	 * -| start
+	 *
+	 *  |---- pop ---|---------------- b ------------|
+	 *  |____________________________________________| length
+	 *
+	 * Offset and start of the current msg elem are equal because in the
+	 * previous case we handled offset != start and either consumed the
+	 * entire element and advanced to the next element OR pop == 0.
+	 *
+	 * Two cases to handle here are first pop is less than the length
+	 * leaving some remainder b above. Simply adjust the element's layout
+	 * in this case. Or pop >= length of the element so that b = 0. In this
+	 * case advance to next element decrementing pop.
+	 */
+	while (pop) {
+		struct scatterlist *sge = sk_msg_elem(msg, i);
+
+		if (pop < sge->length) {
+			sge->length -= pop;
+			sge->offset += pop;
+			pop = 0;
+		} else {
+			pop -= sge->length;
+			sk_msg_shift_left(msg, i);
+		}
+		sk_msg_iter_var_next(i);
+	}
+
+	sk_mem_uncharge(msg->sk, len - pop);
+	msg->sg.size -= (len - pop);
+	sk_msg_compute_data_pointers(msg);
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_pop_data_proto = {
+	.func		= bpf_msg_pop_data,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_ANYTHING,
+};
+
 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
 {
 	return task_get_classid(skb);
@@ -2613,8 +2835,7 @@
 	u32 off = skb_mac_header_len(skb);
 	int ret;
 
-	/* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
-	if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
 		return -ENOTSUPP;
 
 	ret = skb_cow(skb, len_diff);
@@ -2655,8 +2876,7 @@
 	u32 off = skb_mac_header_len(skb);
 	int ret;
 
-	/* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
-	if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
 		return -ENOTSUPP;
 
 	ret = skb_unclone(skb, GFP_ATOMIC);
@@ -2776,44 +2996,128 @@
 	}
 }
 
-static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
+#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK	(BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
+					 BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+
+#define BPF_F_ADJ_ROOM_MASK		(BPF_F_ADJ_ROOM_FIXED_GSO | \
+					 BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
+					 BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
+					 BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
+					 BPF_F_ADJ_ROOM_ENCAP_L2( \
+					  BPF_ADJ_ROOM_ENCAP_L2_MASK))
+
+static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
+			    u64 flags)
 {
-	u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
+	u8 inner_mac_len = flags >> BPF_ADJ_ROOM_ENCAP_L2_SHIFT;
+	bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK;
+	u16 mac_len = 0, inner_net = 0, inner_trans = 0;
+	unsigned int gso_type = SKB_GSO_DODGY;
 	int ret;
 
-	/* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
-	if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
-		return -ENOTSUPP;
+	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
+		/* udp gso_size delineates datagrams, only allow if fixed */
+		if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
+		    !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+			return -ENOTSUPP;
+	}
 
-	ret = skb_cow(skb, len_diff);
+	ret = skb_cow_head(skb, len_diff);
 	if (unlikely(ret < 0))
 		return ret;
 
+	if (encap) {
+		if (skb->protocol != htons(ETH_P_IP) &&
+		    skb->protocol != htons(ETH_P_IPV6))
+			return -ENOTSUPP;
+
+		if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 &&
+		    flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+			return -EINVAL;
+
+		if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE &&
+		    flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
+			return -EINVAL;
+
+		if (skb->encapsulation)
+			return -EALREADY;
+
+		mac_len = skb->network_header - skb->mac_header;
+		inner_net = skb->network_header;
+		if (inner_mac_len > len_diff)
+			return -EINVAL;
+		inner_trans = skb->transport_header;
+	}
+
 	ret = bpf_skb_net_hdr_push(skb, off, len_diff);
 	if (unlikely(ret < 0))
 		return ret;
 
+	if (encap) {
+		skb->inner_mac_header = inner_net - inner_mac_len;
+		skb->inner_network_header = inner_net;
+		skb->inner_transport_header = inner_trans;
+		skb_set_inner_protocol(skb, skb->protocol);
+
+		skb->encapsulation = 1;
+		skb_set_network_header(skb, mac_len);
+
+		if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
+			gso_type |= SKB_GSO_UDP_TUNNEL;
+		else if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE)
+			gso_type |= SKB_GSO_GRE;
+		else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+			gso_type |= SKB_GSO_IPXIP6;
+		else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
+			gso_type |= SKB_GSO_IPXIP4;
+
+		if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE ||
+		    flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) {
+			int nh_len = flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 ?
+					sizeof(struct ipv6hdr) :
+					sizeof(struct iphdr);
+
+			skb_set_transport_header(skb, mac_len + nh_len);
+		}
+
+		/* Match skb->protocol to new outer l3 protocol */
+		if (skb->protocol == htons(ETH_P_IP) &&
+		    flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+			skb->protocol = htons(ETH_P_IPV6);
+		else if (skb->protocol == htons(ETH_P_IPV6) &&
+			 flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
+			skb->protocol = htons(ETH_P_IP);
+	}
+
 	if (skb_is_gso(skb)) {
 		struct skb_shared_info *shinfo = skb_shinfo(skb);
 
 		/* Due to header grow, MSS needs to be downgraded. */
-		skb_decrease_gso_size(shinfo, len_diff);
+		if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+			skb_decrease_gso_size(shinfo, len_diff);
+
 		/* Header must be checked, and gso_segs recomputed. */
-		shinfo->gso_type |= SKB_GSO_DODGY;
+		shinfo->gso_type |= gso_type;
 		shinfo->gso_segs = 0;
 	}
 
 	return 0;
 }
 
-static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
+static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
+			      u64 flags)
 {
-	u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
 	int ret;
 
-	/* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
-	if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
-		return -ENOTSUPP;
+	if (flags & ~BPF_F_ADJ_ROOM_FIXED_GSO)
+		return -EINVAL;
+
+	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
+		/* udp gso_size delineates datagrams, only allow if fixed */
+		if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
+		    !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+			return -ENOTSUPP;
+	}
 
 	ret = skb_unclone(skb, GFP_ATOMIC);
 	if (unlikely(ret < 0))
@@ -2827,7 +3131,9 @@
 		struct skb_shared_info *shinfo = skb_shinfo(skb);
 
 		/* Due to header shrink, MSS can be upgraded. */
-		skb_increase_gso_size(shinfo, len_diff);
+		if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+			skb_increase_gso_size(shinfo, len_diff);
+
 		/* Header must be checked, and gso_segs recomputed. */
 		shinfo->gso_type |= SKB_GSO_DODGY;
 		shinfo->gso_segs = 0;
@@ -2842,49 +3148,50 @@
 			  SKB_MAX_ALLOC;
 }
 
-static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
+BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
+	   u32, mode, u64, flags)
 {
-	bool trans_same = skb->transport_header == skb->network_header;
 	u32 len_cur, len_diff_abs = abs(len_diff);
 	u32 len_min = bpf_skb_net_base_len(skb);
 	u32 len_max = __bpf_skb_max_len(skb);
 	__be16 proto = skb->protocol;
 	bool shrink = len_diff < 0;
+	u32 off;
 	int ret;
 
+	if (unlikely(flags & ~BPF_F_ADJ_ROOM_MASK))
+		return -EINVAL;
 	if (unlikely(len_diff_abs > 0xfffU))
 		return -EFAULT;
 	if (unlikely(proto != htons(ETH_P_IP) &&
 		     proto != htons(ETH_P_IPV6)))
 		return -ENOTSUPP;
 
+	off = skb_mac_header_len(skb);
+	switch (mode) {
+	case BPF_ADJ_ROOM_NET:
+		off += bpf_skb_net_base_len(skb);
+		break;
+	case BPF_ADJ_ROOM_MAC:
+		break;
+	default:
+		return -ENOTSUPP;
+	}
+
 	len_cur = skb->len - skb_network_offset(skb);
-	if (skb_transport_header_was_set(skb) && !trans_same)
-		len_cur = skb_network_header_len(skb);
 	if ((shrink && (len_diff_abs >= len_cur ||
 			len_cur - len_diff_abs < len_min)) ||
 	    (!shrink && (skb->len + len_diff_abs > len_max &&
 			 !skb_is_gso(skb))))
 		return -ENOTSUPP;
 
-	ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) :
-		       bpf_skb_net_grow(skb, len_diff_abs);
+	ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff_abs, flags) :
+		       bpf_skb_net_grow(skb, off, len_diff_abs, flags);
 
 	bpf_compute_data_pointers(skb);
 	return ret;
 }
 
-BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
-	   u32, mode, u64, flags)
-{
-	if (unlikely(flags))
-		return -EINVAL;
-	if (likely(mode == BPF_ADJ_ROOM_NET))
-		return bpf_skb_adjust_net(skb, len_diff);
-
-	return -ENOTSUPP;
-}
-
 static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
 	.func		= bpf_skb_adjust_room,
 	.gpl_only	= false,
@@ -3176,6 +3483,32 @@
 	return 0;
 }
 
+static noinline int
+xdp_do_redirect_slow(struct net_device *dev, struct xdp_buff *xdp,
+		     struct bpf_prog *xdp_prog, struct bpf_redirect_info *ri)
+{
+	struct net_device *fwd;
+	u32 index = ri->tgt_index;
+	int err;
+
+	fwd = dev_get_by_index_rcu(dev_net(dev), index);
+	ri->tgt_index = 0;
+	if (unlikely(!fwd)) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	err = __bpf_tx_xdp(fwd, NULL, xdp, 0);
+	if (unlikely(err))
+		goto err;
+
+	_trace_xdp_redirect(dev, xdp_prog, index);
+	return 0;
+err:
+	_trace_xdp_redirect_err(dev, xdp_prog, index, err);
+	return err;
+}
+
 static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
 			    struct bpf_map *map,
 			    struct xdp_buff *xdp,
@@ -3184,22 +3517,21 @@
 	int err;
 
 	switch (map->map_type) {
-	case BPF_MAP_TYPE_DEVMAP: {
+	case BPF_MAP_TYPE_DEVMAP:
+	case BPF_MAP_TYPE_DEVMAP_HASH: {
 		struct bpf_dtab_netdev *dst = fwd;
 
 		err = dev_map_enqueue(dst, xdp, dev_rx);
-		if (err)
+		if (unlikely(err))
 			return err;
-		__dev_map_insert_ctx(map, index);
 		break;
 	}
 	case BPF_MAP_TYPE_CPUMAP: {
 		struct bpf_cpu_map_entry *rcpu = fwd;
 
 		err = cpu_map_enqueue(rcpu, xdp, dev_rx);
-		if (err)
+		if (unlikely(err))
 			return err;
-		__cpu_map_insert_ctx(map, index);
 		break;
 	}
 	case BPF_MAP_TYPE_XSKMAP: {
@@ -3223,6 +3555,7 @@
 	if (map) {
 		switch (map->map_type) {
 		case BPF_MAP_TYPE_DEVMAP:
+		case BPF_MAP_TYPE_DEVMAP_HASH:
 			__dev_map_flush(map);
 			break;
 		case BPF_MAP_TYPE_CPUMAP:
@@ -3238,11 +3571,13 @@
 }
 EXPORT_SYMBOL_GPL(xdp_do_flush_map);
 
-static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
+static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
 {
 	switch (map->map_type) {
 	case BPF_MAP_TYPE_DEVMAP:
 		return __dev_map_lookup_elem(map, index);
+	case BPF_MAP_TYPE_DEVMAP_HASH:
+		return __dev_map_hash_lookup_elem(map, index);
 	case BPF_MAP_TYPE_CPUMAP:
 		return __cpu_map_lookup_elem(map, index);
 	case BPF_MAP_TYPE_XSKMAP:
@@ -3270,22 +3605,18 @@
 }
 
 static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
-			       struct bpf_prog *xdp_prog, struct bpf_map *map)
+			       struct bpf_prog *xdp_prog, struct bpf_map *map,
+			       struct bpf_redirect_info *ri)
 {
-	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
-	u32 index = ri->ifindex;
-	void *fwd = NULL;
+	u32 index = ri->tgt_index;
+	void *fwd = ri->tgt_value;
 	int err;
 
-	ri->ifindex = 0;
+	ri->tgt_index = 0;
+	ri->tgt_value = NULL;
 	WRITE_ONCE(ri->map, NULL);
 
-	fwd = __xdp_map_lookup_elem(map, index);
-	if (!fwd) {
-		err = -EINVAL;
-		goto err;
-	}
-	if (ri->map_to_flush && ri->map_to_flush != map)
+	if (ri->map_to_flush && unlikely(ri->map_to_flush != map))
 		xdp_do_flush_map();
 
 	err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index);
@@ -3305,29 +3636,11 @@
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 	struct bpf_map *map = READ_ONCE(ri->map);
-	struct net_device *fwd;
-	u32 index = ri->ifindex;
-	int err;
 
-	if (map)
-		return xdp_do_redirect_map(dev, xdp, xdp_prog, map);
+	if (likely(map))
+		return xdp_do_redirect_map(dev, xdp, xdp_prog, map, ri);
 
-	fwd = dev_get_by_index_rcu(dev_net(dev), index);
-	ri->ifindex = 0;
-	if (unlikely(!fwd)) {
-		err = -EINVAL;
-		goto err;
-	}
-
-	err = __bpf_tx_xdp(fwd, NULL, xdp, 0);
-	if (unlikely(err))
-		goto err;
-
-	_trace_xdp_redirect(dev, xdp_prog, index);
-	return 0;
-err:
-	_trace_xdp_redirect_err(dev, xdp_prog, index, err);
-	return err;
+	return xdp_do_redirect_slow(dev, xdp, xdp_prog, ri);
 }
 EXPORT_SYMBOL_GPL(xdp_do_redirect);
 
@@ -3338,20 +3651,16 @@
 				       struct bpf_map *map)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
-	u32 index = ri->ifindex;
-	void *fwd = NULL;
+	u32 index = ri->tgt_index;
+	void *fwd = ri->tgt_value;
 	int err = 0;
 
-	ri->ifindex = 0;
+	ri->tgt_index = 0;
+	ri->tgt_value = NULL;
 	WRITE_ONCE(ri->map, NULL);
 
-	fwd = __xdp_map_lookup_elem(map, index);
-	if (unlikely(!fwd)) {
-		err = -EINVAL;
-		goto err;
-	}
-
-	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
+	if (map->map_type == BPF_MAP_TYPE_DEVMAP ||
+	    map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
 		struct bpf_dtab_netdev *dst = fwd;
 
 		err = dev_map_generic_redirect(dst, skb, xdp_prog);
@@ -3382,14 +3691,14 @@
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 	struct bpf_map *map = READ_ONCE(ri->map);
-	u32 index = ri->ifindex;
+	u32 index = ri->tgt_index;
 	struct net_device *fwd;
 	int err = 0;
 
 	if (map)
 		return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog,
 						   map);
-	ri->ifindex = 0;
+	ri->tgt_index = 0;
 	fwd = dev_get_by_index_rcu(dev_net(dev), index);
 	if (unlikely(!fwd)) {
 		err = -EINVAL;
@@ -3417,8 +3726,9 @@
 	if (unlikely(flags))
 		return XDP_ABORTED;
 
-	ri->ifindex = ifindex;
 	ri->flags = flags;
+	ri->tgt_index = ifindex;
+	ri->tgt_value = NULL;
 	WRITE_ONCE(ri->map, NULL);
 
 	return XDP_REDIRECT;
@@ -3437,11 +3747,23 @@
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 
-	if (unlikely(flags))
+	/* Lower bits of the flags are used as return code on lookup failure */
+	if (unlikely(flags > XDP_TX))
 		return XDP_ABORTED;
 
-	ri->ifindex = ifindex;
+	ri->tgt_value = __xdp_map_lookup_elem(map, ifindex);
+	if (unlikely(!ri->tgt_value)) {
+		/* If the lookup fails we want to clear out the state in the
+		 * redirect_info struct completely, so that if an eBPF program
+		 * performs multiple lookups, the last one always takes
+		 * precedence.
+		 */
+		WRITE_ONCE(ri->map, NULL);
+		return flags;
+	}
+
 	ri->flags = flags;
+	ri->tgt_index = ifindex;
 	WRITE_ONCE(ri->map, map);
 
 	return XDP_REDIRECT;
@@ -3890,6 +4212,26 @@
 	.arg1_type      = ARG_PTR_TO_CTX,
 };
 
+BPF_CALL_5(bpf_sockopt_event_output, struct bpf_sock_ops_kern *, bpf_sock,
+	   struct bpf_map *, map, u64, flags, void *, data, u64, size)
+{
+	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
+		return -EINVAL;
+
+	return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
+}
+
+static const struct bpf_func_proto bpf_sockopt_event_output_proto =  {
+	.func		= bpf_sockopt_event_output,
+	.gpl_only       = true,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_CONST_MAP_PTR,
+	.arg3_type      = ARG_ANYTHING,
+	.arg4_type      = ARG_PTR_TO_MEM,
+	.arg5_type      = ARG_CONST_SIZE_OR_ZERO,
+};
+
 BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
 	   int, level, int, optname, char *, optval, int, optlen)
 {
@@ -3908,15 +4250,23 @@
 		/* Only some socketops are supported */
 		switch (optname) {
 		case SO_RCVBUF:
+			val = min_t(u32, val, sysctl_rmem_max);
 			sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
-			sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
+			WRITE_ONCE(sk->sk_rcvbuf,
+				   max_t(int, val * 2, SOCK_MIN_RCVBUF));
 			break;
 		case SO_SNDBUF:
+			val = min_t(u32, val, sysctl_wmem_max);
 			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
-			sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
+			WRITE_ONCE(sk->sk_sndbuf,
+				   max_t(int, val * 2, SOCK_MIN_SNDBUF));
 			break;
-		case SO_MAX_PACING_RATE:
-			sk->sk_max_pacing_rate = val;
+		case SO_MAX_PACING_RATE: /* 32bit version */
+			if (val != ~0U)
+				cmpxchg(&sk->sk_pacing_status,
+					SK_PACING_NONE,
+					SK_PACING_NEEDED);
+			sk->sk_max_pacing_rate = (val == ~0U) ? ~0UL : val;
 			sk->sk_pacing_rate = min(sk->sk_pacing_rate,
 						 sk->sk_max_pacing_rate);
 			break;
@@ -3926,10 +4276,13 @@
 		case SO_RCVLOWAT:
 			if (val < 0)
 				val = INT_MAX;
-			sk->sk_rcvlowat = val ? : 1;
+			WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
 			break;
 		case SO_MARK:
-			sk->sk_mark = val;
+			if (sk->sk_mark != val) {
+				sk->sk_mark = val;
+				sk_dst_reset(sk);
+			}
 			break;
 		default:
 			ret = -EINVAL;
@@ -3989,7 +4342,7 @@
 						    TCP_CA_NAME_MAX-1));
 			name[TCP_CA_NAME_MAX-1] = 0;
 			ret = tcp_set_congestion_control(sk, name, false,
-							 reinit);
+							 reinit, true);
 		} else {
 			struct tcp_sock *tp = tcp_sk(sk);
 
@@ -4000,7 +4353,7 @@
 			/* Only some options are supported */
 			switch (optname) {
 			case TCP_BPF_IW:
-				if (val <= 0 || tp->data_segs_out > 0)
+				if (val <= 0 || tp->data_segs_out > tp->syn_data)
 					ret = -EINVAL;
 				else
 					tp->snd_cwnd = val;
@@ -4013,6 +4366,12 @@
 					tp->snd_ssthresh = val;
 				}
 				break;
+			case TCP_SAVE_SYN:
+				if (val < 0 || val > 1)
+					ret = -EINVAL;
+				else
+					tp->save_syn = val;
+				break;
 			default:
 				ret = -EINVAL;
 			}
@@ -4042,17 +4401,29 @@
 
 	if (!sk_fullsock(sk))
 		goto err_clear;
-
 #ifdef CONFIG_INET
 	if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
-		if (optname == TCP_CONGESTION) {
-			struct inet_connection_sock *icsk = inet_csk(sk);
+		struct inet_connection_sock *icsk;
+		struct tcp_sock *tp;
+
+		switch (optname) {
+		case TCP_CONGESTION:
+			icsk = inet_csk(sk);
 
 			if (!icsk->icsk_ca_ops || optlen <= 1)
 				goto err_clear;
 			strncpy(optval, icsk->icsk_ca_ops->name, optlen);
 			optval[optlen - 1] = 0;
-		} else {
+			break;
+		case TCP_SAVED_SYN:
+			tp = tcp_sk(sk);
+
+			if (optlen <= 0 || !tp->saved_syn ||
+			    optlen > tp->saved_syn[0])
+				goto err_clear;
+			memcpy(optval, tp->saved_syn + 1, optlen);
+			break;
+		default:
 			goto err_clear;
 		}
 	} else if (level == SOL_IP) {
@@ -4115,8 +4486,7 @@
 	if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk))
 		return -EINVAL;
 
-	if (val)
-		tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
+	tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
 
 	return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
 }
@@ -4143,6 +4513,8 @@
 	 * Only binding to IP is supported.
 	 */
 	err = -EINVAL;
+	if (addr_len < offsetofend(struct sockaddr, sa_family))
+		return err;
 	if (addr->sa_family == AF_INET) {
 		if (addr_len < sizeof(struct sockaddr_in))
 			return err;
@@ -4240,11 +4612,11 @@
 static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 			       u32 flags, bool check_mtu)
 {
+	struct fib_nh_common *nhc;
 	struct in_device *in_dev;
 	struct neighbour *neigh;
 	struct net_device *dev;
 	struct fib_result res;
-	struct fib_nh *nh;
 	struct flowi4 fl4;
 	int err;
 	u32 mtu;
@@ -4308,7 +4680,7 @@
 	if (res.type != RTN_UNICAST)
 		return BPF_FIB_LKUP_RET_NOT_FWDED;
 
-	if (res.fi->fib_nhs > 1)
+	if (fib_info_num_path(res.fi) > 1)
 		fib_select_path(net, &res, &fl4, NULL);
 
 	if (check_mtu) {
@@ -4317,22 +4689,33 @@
 			return BPF_FIB_LKUP_RET_FRAG_NEEDED;
 	}
 
-	nh = &res.fi->fib_nh[res.nh_sel];
+	nhc = res.nhc;
 
 	/* do not handle lwt encaps right now */
-	if (nh->nh_lwtstate)
+	if (nhc->nhc_lwtstate)
 		return BPF_FIB_LKUP_RET_UNSUPP_LWT;
 
-	dev = nh->nh_dev;
-	if (nh->nh_gw)
-		params->ipv4_dst = nh->nh_gw;
+	dev = nhc->nhc_dev;
 
 	params->rt_metric = res.fi->fib_priority;
 
 	/* xdp and cls_bpf programs are run in RCU-bh so
 	 * rcu_read_lock_bh is not needed here
 	 */
-	neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst);
+	if (likely(nhc->nhc_gw_family != AF_INET6)) {
+		if (nhc->nhc_gw_family)
+			params->ipv4_dst = nhc->nhc_gw.ipv4;
+
+		neigh = __ipv4_neigh_lookup_noref(dev,
+						 (__force u32)params->ipv4_dst);
+	} else {
+		struct in6_addr *dst = (struct in6_addr *)params->ipv6_dst;
+
+		params->family = AF_INET6;
+		*dst = nhc->nhc_gw.ipv6;
+		neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
+	}
+
 	if (!neigh)
 		return BPF_FIB_LKUP_RET_NO_NEIGH;
 
@@ -4346,13 +4729,13 @@
 {
 	struct in6_addr *src = (struct in6_addr *) params->ipv6_src;
 	struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst;
+	struct fib6_result res = {};
 	struct neighbour *neigh;
 	struct net_device *dev;
 	struct inet6_dev *idev;
-	struct fib6_info *f6i;
 	struct flowi6 fl6;
 	int strict = 0;
-	int oif;
+	int oif, err;
 	u32 mtu;
 
 	/* link local addresses are never forwarded */
@@ -4364,7 +4747,7 @@
 		return -ENODEV;
 
 	idev = __in6_dev_get_safely(dev);
-	if (unlikely(!idev || !net->ipv6.devconf_all->forwarding))
+	if (unlikely(!idev || !idev->cnf.forwarding))
 		return BPF_FIB_LKUP_RET_FWD_DISABLED;
 
 	if (flags & BPF_FIB_LOOKUP_OUTPUT) {
@@ -4394,61 +4777,57 @@
 		if (unlikely(!tb))
 			return BPF_FIB_LKUP_RET_NOT_FWDED;
 
-		f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict);
+		err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
+						   strict);
 	} else {
 		fl6.flowi6_mark = 0;
 		fl6.flowi6_secid = 0;
 		fl6.flowi6_tun_key.tun_id = 0;
 		fl6.flowi6_uid = sock_net_uid(net, NULL);
 
-		f6i = ipv6_stub->fib6_lookup(net, oif, &fl6, strict);
+		err = ipv6_stub->fib6_lookup(net, oif, &fl6, &res, strict);
 	}
 
-	if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry))
+	if (unlikely(err || IS_ERR_OR_NULL(res.f6i) ||
+		     res.f6i == net->ipv6.fib6_null_entry))
 		return BPF_FIB_LKUP_RET_NOT_FWDED;
 
-	if (unlikely(f6i->fib6_flags & RTF_REJECT)) {
-		switch (f6i->fib6_type) {
-		case RTN_BLACKHOLE:
-			return BPF_FIB_LKUP_RET_BLACKHOLE;
-		case RTN_UNREACHABLE:
-			return BPF_FIB_LKUP_RET_UNREACHABLE;
-		case RTN_PROHIBIT:
-			return BPF_FIB_LKUP_RET_PROHIBIT;
-		default:
-			return BPF_FIB_LKUP_RET_NOT_FWDED;
-		}
+	switch (res.fib6_type) {
+	/* only unicast is forwarded */
+	case RTN_UNICAST:
+		break;
+	case RTN_BLACKHOLE:
+		return BPF_FIB_LKUP_RET_BLACKHOLE;
+	case RTN_UNREACHABLE:
+		return BPF_FIB_LKUP_RET_UNREACHABLE;
+	case RTN_PROHIBIT:
+		return BPF_FIB_LKUP_RET_PROHIBIT;
+	default:
+		return BPF_FIB_LKUP_RET_NOT_FWDED;
 	}
 
-	if (f6i->fib6_type != RTN_UNICAST)
-		return BPF_FIB_LKUP_RET_NOT_FWDED;
-
-	if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0)
-		f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6,
-						       fl6.flowi6_oif, NULL,
-						       strict);
+	ipv6_stub->fib6_select_path(net, &res, &fl6, fl6.flowi6_oif,
+				    fl6.flowi6_oif != 0, NULL, strict);
 
 	if (check_mtu) {
-		mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src);
+		mtu = ipv6_stub->ip6_mtu_from_fib6(&res, dst, src);
 		if (params->tot_len > mtu)
 			return BPF_FIB_LKUP_RET_FRAG_NEEDED;
 	}
 
-	if (f6i->fib6_nh.nh_lwtstate)
+	if (res.nh->fib_nh_lws)
 		return BPF_FIB_LKUP_RET_UNSUPP_LWT;
 
-	if (f6i->fib6_flags & RTF_GATEWAY)
-		*dst = f6i->fib6_nh.nh_gw;
+	if (res.nh->fib_nh_gw_family)
+		*dst = res.nh->fib_nh_gw6;
 
-	dev = f6i->fib6_nh.nh_dev;
-	params->rt_metric = f6i->fib6_metric;
+	dev = res.nh->fib_nh_dev;
+	params->rt_metric = res.f6i->fib6_metric;
 
 	/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
-	 * not needed here. Can not use __ipv6_neigh_lookup_noref here
-	 * because we need to get nd_tbl via the stub
+	 * not needed here.
 	 */
-	neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
-				      ndisc_hashfn, dst, dev);
+	neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
 	if (!neigh)
 		return BPF_FIB_LKUP_RET_NO_NEIGH;
 
@@ -4572,7 +4951,15 @@
 }
 #endif /* CONFIG_IPV6_SEG6_BPF */
 
-BPF_CALL_4(bpf_lwt_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
+#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
+static int bpf_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
+			     bool ingress)
+{
+	return bpf_lwt_push_ip_encap(skb, hdr, len, ingress);
+}
+#endif
+
+BPF_CALL_4(bpf_lwt_in_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
 	   u32, len)
 {
 	switch (type) {
@@ -4581,13 +4968,40 @@
 	case BPF_LWT_ENCAP_SEG6_INLINE:
 		return bpf_push_seg6_encap(skb, type, hdr, len);
 #endif
+#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
+	case BPF_LWT_ENCAP_IP:
+		return bpf_push_ip_encap(skb, hdr, len, true /* ingress */);
+#endif
 	default:
 		return -EINVAL;
 	}
 }
 
-static const struct bpf_func_proto bpf_lwt_push_encap_proto = {
-	.func		= bpf_lwt_push_encap,
+BPF_CALL_4(bpf_lwt_xmit_push_encap, struct sk_buff *, skb, u32, type,
+	   void *, hdr, u32, len)
+{
+	switch (type) {
+#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
+	case BPF_LWT_ENCAP_IP:
+		return bpf_push_ip_encap(skb, hdr, len, false /* egress */);
+#endif
+	default:
+		return -EINVAL;
+	}
+}
+
+static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = {
+	.func		= bpf_lwt_in_push_encap,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_PTR_TO_MEM,
+	.arg4_type	= ARG_CONST_SIZE
+};
+
+static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = {
+	.func		= bpf_lwt_xmit_push_encap,
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_CTX,
@@ -4787,6 +5201,733 @@
 };
 #endif /* CONFIG_IPV6_SEG6_BPF */
 
+#ifdef CONFIG_INET
+static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
+			      int dif, int sdif, u8 family, u8 proto)
+{
+	bool refcounted = false;
+	struct sock *sk = NULL;
+
+	if (family == AF_INET) {
+		__be32 src4 = tuple->ipv4.saddr;
+		__be32 dst4 = tuple->ipv4.daddr;
+
+		if (proto == IPPROTO_TCP)
+			sk = __inet_lookup(net, &tcp_hashinfo, NULL, 0,
+					   src4, tuple->ipv4.sport,
+					   dst4, tuple->ipv4.dport,
+					   dif, sdif, &refcounted);
+		else
+			sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport,
+					       dst4, tuple->ipv4.dport,
+					       dif, sdif, &udp_table, NULL);
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
+		struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
+
+		if (proto == IPPROTO_TCP)
+			sk = __inet6_lookup(net, &tcp_hashinfo, NULL, 0,
+					    src6, tuple->ipv6.sport,
+					    dst6, ntohs(tuple->ipv6.dport),
+					    dif, sdif, &refcounted);
+		else if (likely(ipv6_bpf_stub))
+			sk = ipv6_bpf_stub->udp6_lib_lookup(net,
+							    src6, tuple->ipv6.sport,
+							    dst6, tuple->ipv6.dport,
+							    dif, sdif,
+							    &udp_table, NULL);
+#endif
+	}
+
+	if (unlikely(sk && !refcounted && !sock_flag(sk, SOCK_RCU_FREE))) {
+		WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
+		sk = NULL;
+	}
+	return sk;
+}
+
+/* bpf_skc_lookup performs the core lookup for different types of sockets,
+ * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE.
+ * Returns the socket as an 'unsigned long' to simplify the casting in the
+ * callers to satisfy BPF_CALL declarations.
+ */
+static struct sock *
+__bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+		 struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
+		 u64 flags)
+{
+	struct sock *sk = NULL;
+	u8 family = AF_UNSPEC;
+	struct net *net;
+	int sdif;
+
+	if (len == sizeof(tuple->ipv4))
+		family = AF_INET;
+	else if (len == sizeof(tuple->ipv6))
+		family = AF_INET6;
+	else
+		return NULL;
+
+	if (unlikely(family == AF_UNSPEC || flags ||
+		     !((s32)netns_id < 0 || netns_id <= S32_MAX)))
+		goto out;
+
+	if (family == AF_INET)
+		sdif = inet_sdif(skb);
+	else
+		sdif = inet6_sdif(skb);
+
+	if ((s32)netns_id < 0) {
+		net = caller_net;
+		sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
+	} else {
+		net = get_net_ns_by_id(caller_net, netns_id);
+		if (unlikely(!net))
+			goto out;
+		sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
+		put_net(net);
+	}
+
+out:
+	return sk;
+}
+
+static struct sock *
+__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+		struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
+		u64 flags)
+{
+	struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net,
+					   ifindex, proto, netns_id, flags);
+
+	if (sk) {
+		sk = sk_to_full_sk(sk);
+		if (!sk_fullsock(sk)) {
+			if (!sock_flag(sk, SOCK_RCU_FREE))
+				sock_gen_put(sk);
+			return NULL;
+		}
+	}
+
+	return sk;
+}
+
+static struct sock *
+bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+	       u8 proto, u64 netns_id, u64 flags)
+{
+	struct net *caller_net;
+	int ifindex;
+
+	if (skb->dev) {
+		caller_net = dev_net(skb->dev);
+		ifindex = skb->dev->ifindex;
+	} else {
+		caller_net = sock_net(skb->sk);
+		ifindex = 0;
+	}
+
+	return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto,
+				netns_id, flags);
+}
+
+static struct sock *
+bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+	      u8 proto, u64 netns_id, u64 flags)
+{
+	struct sock *sk = bpf_skc_lookup(skb, tuple, len, proto, netns_id,
+					 flags);
+
+	if (sk) {
+		sk = sk_to_full_sk(sk);
+		if (!sk_fullsock(sk)) {
+			if (!sock_flag(sk, SOCK_RCU_FREE))
+				sock_gen_put(sk);
+			return NULL;
+		}
+	}
+
+	return sk;
+}
+
+BPF_CALL_5(bpf_skc_lookup_tcp, struct sk_buff *, skb,
+	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+	return (unsigned long)bpf_skc_lookup(skb, tuple, len, IPPROTO_TCP,
+					     netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = {
+	.func		= bpf_skc_lookup_tcp,
+	.gpl_only	= false,
+	.pkt_access	= true,
+	.ret_type	= RET_PTR_TO_SOCK_COMMON_OR_NULL,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+	.arg4_type	= ARG_ANYTHING,
+	.arg5_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb,
+	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+	return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP,
+					    netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
+	.func		= bpf_sk_lookup_tcp,
+	.gpl_only	= false,
+	.pkt_access	= true,
+	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+	.arg4_type	= ARG_ANYTHING,
+	.arg5_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb,
+	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+	return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP,
+					    netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
+	.func		= bpf_sk_lookup_udp,
+	.gpl_only	= false,
+	.pkt_access	= true,
+	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+	.arg4_type	= ARG_ANYTHING,
+	.arg5_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_1(bpf_sk_release, struct sock *, sk)
+{
+	if (!sock_flag(sk, SOCK_RCU_FREE))
+		sock_gen_put(sk);
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_sk_release_proto = {
+	.func		= bpf_sk_release,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
+};
+
+BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
+	   struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
+{
+	struct net *caller_net = dev_net(ctx->rxq->dev);
+	int ifindex = ctx->rxq->dev->ifindex;
+
+	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
+					      ifindex, IPPROTO_UDP, netns_id,
+					      flags);
+}
+
+static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
+	.func           = bpf_xdp_sk_lookup_udp,
+	.gpl_only       = false,
+	.pkt_access     = true,
+	.ret_type       = RET_PTR_TO_SOCKET_OR_NULL,
+	.arg1_type      = ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_PTR_TO_MEM,
+	.arg3_type      = ARG_CONST_SIZE,
+	.arg4_type      = ARG_ANYTHING,
+	.arg5_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx,
+	   struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
+{
+	struct net *caller_net = dev_net(ctx->rxq->dev);
+	int ifindex = ctx->rxq->dev->ifindex;
+
+	return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net,
+					       ifindex, IPPROTO_TCP, netns_id,
+					       flags);
+}
+
+static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
+	.func           = bpf_xdp_skc_lookup_tcp,
+	.gpl_only       = false,
+	.pkt_access     = true,
+	.ret_type       = RET_PTR_TO_SOCK_COMMON_OR_NULL,
+	.arg1_type      = ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_PTR_TO_MEM,
+	.arg3_type      = ARG_CONST_SIZE,
+	.arg4_type      = ARG_ANYTHING,
+	.arg5_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
+	   struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
+{
+	struct net *caller_net = dev_net(ctx->rxq->dev);
+	int ifindex = ctx->rxq->dev->ifindex;
+
+	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
+					      ifindex, IPPROTO_TCP, netns_id,
+					      flags);
+}
+
+static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
+	.func           = bpf_xdp_sk_lookup_tcp,
+	.gpl_only       = false,
+	.pkt_access     = true,
+	.ret_type       = RET_PTR_TO_SOCKET_OR_NULL,
+	.arg1_type      = ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_PTR_TO_MEM,
+	.arg3_type      = ARG_CONST_SIZE,
+	.arg4_type      = ARG_ANYTHING,
+	.arg5_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
+	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+	return (unsigned long)__bpf_skc_lookup(NULL, tuple, len,
+					       sock_net(ctx->sk), 0,
+					       IPPROTO_TCP, netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = {
+	.func		= bpf_sock_addr_skc_lookup_tcp,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_SOCK_COMMON_OR_NULL,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+	.arg4_type	= ARG_ANYTHING,
+	.arg5_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
+	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
+					      sock_net(ctx->sk), 0, IPPROTO_TCP,
+					      netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
+	.func		= bpf_sock_addr_sk_lookup_tcp,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+	.arg4_type	= ARG_ANYTHING,
+	.arg5_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx,
+	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
+					      sock_net(ctx->sk), 0, IPPROTO_UDP,
+					      netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
+	.func		= bpf_sock_addr_sk_lookup_udp,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+	.arg4_type	= ARG_ANYTHING,
+	.arg5_type	= ARG_ANYTHING,
+};
+
+bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+				  struct bpf_insn_access_aux *info)
+{
+	if (off < 0 || off >= offsetofend(struct bpf_tcp_sock,
+					  icsk_retransmits))
+		return false;
+
+	if (off % size != 0)
+		return false;
+
+	switch (off) {
+	case offsetof(struct bpf_tcp_sock, bytes_received):
+	case offsetof(struct bpf_tcp_sock, bytes_acked):
+		return size == sizeof(__u64);
+	default:
+		return size == sizeof(__u32);
+	}
+}
+
+u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
+				    const struct bpf_insn *si,
+				    struct bpf_insn *insn_buf,
+				    struct bpf_prog *prog, u32 *target_size)
+{
+	struct bpf_insn *insn = insn_buf;
+
+#define BPF_TCP_SOCK_GET_COMMON(FIELD)					\
+	do {								\
+		BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD) >	\
+			     FIELD_SIZEOF(struct bpf_tcp_sock, FIELD));	\
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\
+				      si->dst_reg, si->src_reg,		\
+				      offsetof(struct tcp_sock, FIELD)); \
+	} while (0)
+
+#define BPF_INET_SOCK_GET_COMMON(FIELD)					\
+	do {								\
+		BUILD_BUG_ON(FIELD_SIZEOF(struct inet_connection_sock,	\
+					  FIELD) >			\
+			     FIELD_SIZEOF(struct bpf_tcp_sock, FIELD));	\
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			\
+					struct inet_connection_sock,	\
+					FIELD),				\
+				      si->dst_reg, si->src_reg,		\
+				      offsetof(				\
+					struct inet_connection_sock,	\
+					FIELD));			\
+	} while (0)
+
+	if (insn > insn_buf)
+		return insn - insn_buf;
+
+	switch (si->off) {
+	case offsetof(struct bpf_tcp_sock, rtt_min):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
+			     sizeof(struct minmax));
+		BUILD_BUG_ON(sizeof(struct minmax) <
+			     sizeof(struct minmax_sample));
+
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+				      offsetof(struct tcp_sock, rtt_min) +
+				      offsetof(struct minmax_sample, v));
+		break;
+	case offsetof(struct bpf_tcp_sock, snd_cwnd):
+		BPF_TCP_SOCK_GET_COMMON(snd_cwnd);
+		break;
+	case offsetof(struct bpf_tcp_sock, srtt_us):
+		BPF_TCP_SOCK_GET_COMMON(srtt_us);
+		break;
+	case offsetof(struct bpf_tcp_sock, snd_ssthresh):
+		BPF_TCP_SOCK_GET_COMMON(snd_ssthresh);
+		break;
+	case offsetof(struct bpf_tcp_sock, rcv_nxt):
+		BPF_TCP_SOCK_GET_COMMON(rcv_nxt);
+		break;
+	case offsetof(struct bpf_tcp_sock, snd_nxt):
+		BPF_TCP_SOCK_GET_COMMON(snd_nxt);
+		break;
+	case offsetof(struct bpf_tcp_sock, snd_una):
+		BPF_TCP_SOCK_GET_COMMON(snd_una);
+		break;
+	case offsetof(struct bpf_tcp_sock, mss_cache):
+		BPF_TCP_SOCK_GET_COMMON(mss_cache);
+		break;
+	case offsetof(struct bpf_tcp_sock, ecn_flags):
+		BPF_TCP_SOCK_GET_COMMON(ecn_flags);
+		break;
+	case offsetof(struct bpf_tcp_sock, rate_delivered):
+		BPF_TCP_SOCK_GET_COMMON(rate_delivered);
+		break;
+	case offsetof(struct bpf_tcp_sock, rate_interval_us):
+		BPF_TCP_SOCK_GET_COMMON(rate_interval_us);
+		break;
+	case offsetof(struct bpf_tcp_sock, packets_out):
+		BPF_TCP_SOCK_GET_COMMON(packets_out);
+		break;
+	case offsetof(struct bpf_tcp_sock, retrans_out):
+		BPF_TCP_SOCK_GET_COMMON(retrans_out);
+		break;
+	case offsetof(struct bpf_tcp_sock, total_retrans):
+		BPF_TCP_SOCK_GET_COMMON(total_retrans);
+		break;
+	case offsetof(struct bpf_tcp_sock, segs_in):
+		BPF_TCP_SOCK_GET_COMMON(segs_in);
+		break;
+	case offsetof(struct bpf_tcp_sock, data_segs_in):
+		BPF_TCP_SOCK_GET_COMMON(data_segs_in);
+		break;
+	case offsetof(struct bpf_tcp_sock, segs_out):
+		BPF_TCP_SOCK_GET_COMMON(segs_out);
+		break;
+	case offsetof(struct bpf_tcp_sock, data_segs_out):
+		BPF_TCP_SOCK_GET_COMMON(data_segs_out);
+		break;
+	case offsetof(struct bpf_tcp_sock, lost_out):
+		BPF_TCP_SOCK_GET_COMMON(lost_out);
+		break;
+	case offsetof(struct bpf_tcp_sock, sacked_out):
+		BPF_TCP_SOCK_GET_COMMON(sacked_out);
+		break;
+	case offsetof(struct bpf_tcp_sock, bytes_received):
+		BPF_TCP_SOCK_GET_COMMON(bytes_received);
+		break;
+	case offsetof(struct bpf_tcp_sock, bytes_acked):
+		BPF_TCP_SOCK_GET_COMMON(bytes_acked);
+		break;
+	case offsetof(struct bpf_tcp_sock, dsack_dups):
+		BPF_TCP_SOCK_GET_COMMON(dsack_dups);
+		break;
+	case offsetof(struct bpf_tcp_sock, delivered):
+		BPF_TCP_SOCK_GET_COMMON(delivered);
+		break;
+	case offsetof(struct bpf_tcp_sock, delivered_ce):
+		BPF_TCP_SOCK_GET_COMMON(delivered_ce);
+		break;
+	case offsetof(struct bpf_tcp_sock, icsk_retransmits):
+		BPF_INET_SOCK_GET_COMMON(icsk_retransmits);
+		break;
+	}
+
+	return insn - insn_buf;
+}
+
+BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
+{
+	if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
+		return (unsigned long)sk;
+
+	return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_tcp_sock_proto = {
+	.func		= bpf_tcp_sock,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_TCP_SOCK_OR_NULL,
+	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
+};
+
+BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk)
+{
+	sk = sk_to_full_sk(sk);
+
+	if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
+		return (unsigned long)sk;
+
+	return (unsigned long)NULL;
+}
+
+static const struct bpf_func_proto bpf_get_listener_sock_proto = {
+	.func		= bpf_get_listener_sock,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
+	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
+};
+
+BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
+{
+	unsigned int iphdr_len;
+
+	if (skb->protocol == cpu_to_be16(ETH_P_IP))
+		iphdr_len = sizeof(struct iphdr);
+	else if (skb->protocol == cpu_to_be16(ETH_P_IPV6))
+		iphdr_len = sizeof(struct ipv6hdr);
+	else
+		return 0;
+
+	if (skb_headlen(skb) < iphdr_len)
+		return 0;
+
+	if (skb_cloned(skb) && !skb_clone_writable(skb, iphdr_len))
+		return 0;
+
+	return INET_ECN_set_ce(skb);
+}
+
+bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+				  struct bpf_insn_access_aux *info)
+{
+	if (off < 0 || off >= offsetofend(struct bpf_xdp_sock, queue_id))
+		return false;
+
+	if (off % size != 0)
+		return false;
+
+	switch (off) {
+	default:
+		return size == sizeof(__u32);
+	}
+}
+
+u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
+				    const struct bpf_insn *si,
+				    struct bpf_insn *insn_buf,
+				    struct bpf_prog *prog, u32 *target_size)
+{
+	struct bpf_insn *insn = insn_buf;
+
+#define BPF_XDP_SOCK_GET(FIELD)						\
+	do {								\
+		BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_sock, FIELD) >	\
+			     FIELD_SIZEOF(struct bpf_xdp_sock, FIELD));	\
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_sock, FIELD),\
+				      si->dst_reg, si->src_reg,		\
+				      offsetof(struct xdp_sock, FIELD)); \
+	} while (0)
+
+	switch (si->off) {
+	case offsetof(struct bpf_xdp_sock, queue_id):
+		BPF_XDP_SOCK_GET(queue_id);
+		break;
+	}
+
+	return insn - insn_buf;
+}
+
+static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = {
+	.func           = bpf_skb_ecn_set_ce,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
+	   struct tcphdr *, th, u32, th_len)
+{
+#ifdef CONFIG_SYN_COOKIES
+	u32 cookie;
+	int ret;
+
+	if (unlikely(th_len < sizeof(*th)))
+		return -EINVAL;
+
+	/* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */
+	if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
+		return -EINVAL;
+
+	if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+		return -EINVAL;
+
+	if (!th->ack || th->rst || th->syn)
+		return -ENOENT;
+
+	if (tcp_synq_no_recent_overflow(sk))
+		return -ENOENT;
+
+	cookie = ntohl(th->ack_seq) - 1;
+
+	switch (sk->sk_family) {
+	case AF_INET:
+		if (unlikely(iph_len < sizeof(struct iphdr)))
+			return -EINVAL;
+
+		ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
+		break;
+
+#if IS_BUILTIN(CONFIG_IPV6)
+	case AF_INET6:
+		if (unlikely(iph_len < sizeof(struct ipv6hdr)))
+			return -EINVAL;
+
+		ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
+		break;
+#endif /* CONFIG_IPV6 */
+
+	default:
+		return -EPROTONOSUPPORT;
+	}
+
+	if (ret > 0)
+		return 0;
+
+	return -ENOENT;
+#else
+	return -ENOTSUPP;
+#endif
+}
+
+static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
+	.func		= bpf_tcp_check_syncookie,
+	.gpl_only	= true,
+	.pkt_access	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+	.arg4_type	= ARG_PTR_TO_MEM,
+	.arg5_type	= ARG_CONST_SIZE,
+};
+
+BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
+	   struct tcphdr *, th, u32, th_len)
+{
+#ifdef CONFIG_SYN_COOKIES
+	u32 cookie;
+	u16 mss;
+
+	if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
+		return -EINVAL;
+
+	if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
+		return -EINVAL;
+
+	if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+		return -ENOENT;
+
+	if (!th->syn || th->ack || th->fin || th->rst)
+		return -EINVAL;
+
+	if (unlikely(iph_len < sizeof(struct iphdr)))
+		return -EINVAL;
+
+	/* Both struct iphdr and struct ipv6hdr have the version field at the
+	 * same offset so we can cast to the shorter header (struct iphdr).
+	 */
+	switch (((struct iphdr *)iph)->version) {
+	case 4:
+		if (sk->sk_family == AF_INET6 && sk->sk_ipv6only)
+			return -EINVAL;
+
+		mss = tcp_v4_get_syncookie(sk, iph, th, &cookie);
+		break;
+
+#if IS_BUILTIN(CONFIG_IPV6)
+	case 6:
+		if (unlikely(iph_len < sizeof(struct ipv6hdr)))
+			return -EINVAL;
+
+		if (sk->sk_family != AF_INET6)
+			return -EINVAL;
+
+		mss = tcp_v6_get_syncookie(sk, iph, th, &cookie);
+		break;
+#endif /* CONFIG_IPV6 */
+
+	default:
+		return -EPROTONOSUPPORT;
+	}
+	if (mss == 0)
+		return -ENOENT;
+
+	return cookie | ((u64)mss << 32);
+#else
+	return -EOPNOTSUPP;
+#endif /* CONFIG_SYN_COOKIES */
+}
+
+static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
+	.func		= bpf_tcp_gen_syncookie,
+	.gpl_only	= true, /* __cookie_v*_init_sequence() is GPL */
+	.pkt_access	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+	.arg4_type	= ARG_PTR_TO_MEM,
+	.arg5_type	= ARG_CONST_SIZE,
+};
+
+#endif /* CONFIG_INET */
+
 bool bpf_helper_changes_pkt_data(void *func)
 {
 	if (func == bpf_skb_vlan_push ||
@@ -4806,13 +5947,16 @@
 	    func == bpf_xdp_adjust_head ||
 	    func == bpf_xdp_adjust_meta ||
 	    func == bpf_msg_pull_data ||
+	    func == bpf_msg_push_data ||
+	    func == bpf_msg_pop_data ||
 	    func == bpf_xdp_adjust_tail ||
 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
 	    func == bpf_lwt_seg6_store_bytes ||
 	    func == bpf_lwt_seg6_adjust_srh ||
 	    func == bpf_lwt_seg6_action ||
 #endif
-	    func == bpf_lwt_push_encap)
+	    func == bpf_lwt_in_push_encap ||
+	    func == bpf_lwt_xmit_push_encap)
 		return true;
 
 	return false;
@@ -4828,6 +5972,12 @@
 		return &bpf_map_update_elem_proto;
 	case BPF_FUNC_map_delete_elem:
 		return &bpf_map_delete_elem_proto;
+	case BPF_FUNC_map_push_elem:
+		return &bpf_map_push_elem_proto;
+	case BPF_FUNC_map_pop_elem:
+		return &bpf_map_pop_elem_proto;
+	case BPF_FUNC_map_peek_elem:
+		return &bpf_map_peek_elem_proto;
 	case BPF_FUNC_get_prandom_u32:
 		return &bpf_get_prandom_u32_proto;
 	case BPF_FUNC_get_smp_processor_id:
@@ -4838,10 +5988,20 @@
 		return &bpf_tail_call_proto;
 	case BPF_FUNC_ktime_get_ns:
 		return &bpf_ktime_get_ns_proto;
+	default:
+		break;
+	}
+
+	if (!capable(CAP_SYS_ADMIN))
+		return NULL;
+
+	switch (func_id) {
+	case BPF_FUNC_spin_lock:
+		return &bpf_spin_lock_proto;
+	case BPF_FUNC_spin_unlock:
+		return &bpf_spin_unlock_proto;
 	case BPF_FUNC_trace_printk:
-		if (capable(CAP_SYS_ADMIN))
-			return bpf_get_trace_printk_proto();
-		/* else: fall through */
+		return bpf_get_trace_printk_proto();
 	default:
 		return NULL;
 	}
@@ -4884,6 +6044,20 @@
 		return &bpf_get_socket_cookie_sock_addr_proto;
 	case BPF_FUNC_get_local_storage:
 		return &bpf_get_local_storage_proto;
+#ifdef CONFIG_INET
+	case BPF_FUNC_sk_lookup_tcp:
+		return &bpf_sock_addr_sk_lookup_tcp_proto;
+	case BPF_FUNC_sk_lookup_udp:
+		return &bpf_sock_addr_sk_lookup_udp_proto;
+	case BPF_FUNC_sk_release:
+		return &bpf_sk_release_proto;
+	case BPF_FUNC_skc_lookup_tcp:
+		return &bpf_sock_addr_skc_lookup_tcp_proto;
+#endif /* CONFIG_INET */
+	case BPF_FUNC_sk_storage_get:
+		return &bpf_sk_storage_get_proto;
+	case BPF_FUNC_sk_storage_delete:
+		return &bpf_sk_storage_delete_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -4901,17 +6075,42 @@
 		return &bpf_get_socket_cookie_proto;
 	case BPF_FUNC_get_socket_uid:
 		return &bpf_get_socket_uid_proto;
+	case BPF_FUNC_perf_event_output:
+		return &bpf_skb_event_output_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
 }
 
+const struct bpf_func_proto bpf_sk_storage_get_proto __weak;
+const struct bpf_func_proto bpf_sk_storage_delete_proto __weak;
+
 static const struct bpf_func_proto *
 cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
 	case BPF_FUNC_get_local_storage:
 		return &bpf_get_local_storage_proto;
+	case BPF_FUNC_sk_fullsock:
+		return &bpf_sk_fullsock_proto;
+	case BPF_FUNC_sk_storage_get:
+		return &bpf_sk_storage_get_proto;
+	case BPF_FUNC_sk_storage_delete:
+		return &bpf_sk_storage_delete_proto;
+	case BPF_FUNC_perf_event_output:
+		return &bpf_skb_event_output_proto;
+#ifdef CONFIG_SOCK_CGROUP_DATA
+	case BPF_FUNC_skb_cgroup_id:
+		return &bpf_skb_cgroup_id_proto;
+#endif
+#ifdef CONFIG_INET
+	case BPF_FUNC_tcp_sock:
+		return &bpf_tcp_sock_proto;
+	case BPF_FUNC_get_listener_sock:
+		return &bpf_get_listener_sock_proto;
+	case BPF_FUNC_skb_ecn_set_ce:
+		return &bpf_skb_ecn_set_ce_proto;
+#endif
 	default:
 		return sk_filter_func_proto(func_id, prog);
 	}
@@ -4983,6 +6182,12 @@
 		return &bpf_get_socket_uid_proto;
 	case BPF_FUNC_fib_lookup:
 		return &bpf_skb_fib_lookup_proto;
+	case BPF_FUNC_sk_fullsock:
+		return &bpf_sk_fullsock_proto;
+	case BPF_FUNC_sk_storage_get:
+		return &bpf_sk_storage_get_proto;
+	case BPF_FUNC_sk_storage_delete:
+		return &bpf_sk_storage_delete_proto;
 #ifdef CONFIG_XFRM
 	case BPF_FUNC_skb_get_xfrm_state:
 		return &bpf_skb_get_xfrm_state_proto;
@@ -4993,6 +6198,26 @@
 	case BPF_FUNC_skb_ancestor_cgroup_id:
 		return &bpf_skb_ancestor_cgroup_id_proto;
 #endif
+#ifdef CONFIG_INET
+	case BPF_FUNC_sk_lookup_tcp:
+		return &bpf_sk_lookup_tcp_proto;
+	case BPF_FUNC_sk_lookup_udp:
+		return &bpf_sk_lookup_udp_proto;
+	case BPF_FUNC_sk_release:
+		return &bpf_sk_release_proto;
+	case BPF_FUNC_tcp_sock:
+		return &bpf_tcp_sock_proto;
+	case BPF_FUNC_get_listener_sock:
+		return &bpf_get_listener_sock_proto;
+	case BPF_FUNC_skc_lookup_tcp:
+		return &bpf_skc_lookup_tcp_proto;
+	case BPF_FUNC_tcp_check_syncookie:
+		return &bpf_tcp_check_syncookie_proto;
+	case BPF_FUNC_skb_ecn_set_ce:
+		return &bpf_skb_ecn_set_ce_proto;
+	case BPF_FUNC_tcp_gen_syncookie:
+		return &bpf_tcp_gen_syncookie_proto;
+#endif
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -5020,11 +6245,28 @@
 		return &bpf_xdp_adjust_tail_proto;
 	case BPF_FUNC_fib_lookup:
 		return &bpf_xdp_fib_lookup_proto;
+#ifdef CONFIG_INET
+	case BPF_FUNC_sk_lookup_udp:
+		return &bpf_xdp_sk_lookup_udp_proto;
+	case BPF_FUNC_sk_lookup_tcp:
+		return &bpf_xdp_sk_lookup_tcp_proto;
+	case BPF_FUNC_sk_release:
+		return &bpf_sk_release_proto;
+	case BPF_FUNC_skc_lookup_tcp:
+		return &bpf_xdp_skc_lookup_tcp_proto;
+	case BPF_FUNC_tcp_check_syncookie:
+		return &bpf_tcp_check_syncookie_proto;
+	case BPF_FUNC_tcp_gen_syncookie:
+		return &bpf_tcp_gen_syncookie_proto;
+#endif
 	default:
 		return bpf_base_func_proto(func_id);
 	}
 }
 
+const struct bpf_func_proto bpf_sock_map_update_proto __weak;
+const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
+
 static const struct bpf_func_proto *
 sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -5043,11 +6285,24 @@
 		return &bpf_get_socket_cookie_sock_ops_proto;
 	case BPF_FUNC_get_local_storage:
 		return &bpf_get_local_storage_proto;
+	case BPF_FUNC_perf_event_output:
+		return &bpf_sockopt_event_output_proto;
+	case BPF_FUNC_sk_storage_get:
+		return &bpf_sk_storage_get_proto;
+	case BPF_FUNC_sk_storage_delete:
+		return &bpf_sk_storage_delete_proto;
+#ifdef CONFIG_INET
+	case BPF_FUNC_tcp_sock:
+		return &bpf_tcp_sock_proto;
+#endif /* CONFIG_INET */
 	default:
 		return bpf_base_func_proto(func_id);
 	}
 }
 
+const struct bpf_func_proto bpf_msg_redirect_map_proto __weak;
+const struct bpf_func_proto bpf_msg_redirect_hash_proto __weak;
+
 static const struct bpf_func_proto *
 sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -5062,13 +6317,18 @@
 		return &bpf_msg_cork_bytes_proto;
 	case BPF_FUNC_msg_pull_data:
 		return &bpf_msg_pull_data_proto;
-	case BPF_FUNC_get_local_storage:
-		return &bpf_get_local_storage_proto;
+	case BPF_FUNC_msg_push_data:
+		return &bpf_msg_push_data_proto;
+	case BPF_FUNC_msg_pop_data:
+		return &bpf_msg_pop_data_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
 }
 
+const struct bpf_func_proto bpf_sk_redirect_map_proto __weak;
+const struct bpf_func_proto bpf_sk_redirect_hash_proto __weak;
+
 static const struct bpf_func_proto *
 sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -5091,8 +6351,29 @@
 		return &bpf_sk_redirect_map_proto;
 	case BPF_FUNC_sk_redirect_hash:
 		return &bpf_sk_redirect_hash_proto;
-	case BPF_FUNC_get_local_storage:
-		return &bpf_get_local_storage_proto;
+	case BPF_FUNC_perf_event_output:
+		return &bpf_skb_event_output_proto;
+#ifdef CONFIG_INET
+	case BPF_FUNC_sk_lookup_tcp:
+		return &bpf_sk_lookup_tcp_proto;
+	case BPF_FUNC_sk_lookup_udp:
+		return &bpf_sk_lookup_udp_proto;
+	case BPF_FUNC_sk_release:
+		return &bpf_sk_release_proto;
+	case BPF_FUNC_skc_lookup_tcp:
+		return &bpf_skc_lookup_tcp_proto;
+#endif
+	default:
+		return bpf_base_func_proto(func_id);
+	}
+}
+
+static const struct bpf_func_proto *
+flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	switch (func_id) {
+	case BPF_FUNC_skb_load_bytes:
+		return &bpf_flow_dissector_load_bytes_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -5130,7 +6411,7 @@
 {
 	switch (func_id) {
 	case BPF_FUNC_lwt_push_encap:
-		return &bpf_lwt_push_encap_proto;
+		return &bpf_lwt_in_push_encap_proto;
 	default:
 		return lwt_out_func_proto(func_id, prog);
 	}
@@ -5166,6 +6447,8 @@
 		return &bpf_l4_csum_replace_proto;
 	case BPF_FUNC_set_hash_invalid:
 		return &bpf_set_hash_invalid_proto;
+	case BPF_FUNC_lwt_push_encap:
+		return &bpf_lwt_xmit_push_encap_proto;
 	default:
 		return lwt_out_func_proto(func_id, prog);
 	}
@@ -5216,6 +6499,17 @@
 		if (size != size_default)
 			return false;
 		break;
+	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
+		return false;
+	case bpf_ctx_range(struct __sk_buff, tstamp):
+		if (size != sizeof(__u64))
+			return false;
+		break;
+	case offsetof(struct __sk_buff, sk):
+		if (type == BPF_WRITE || size != sizeof(__u64))
+			return false;
+		info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
+		break;
 	default:
 		/* Only narrow read access allowed for now. */
 		if (type == BPF_WRITE) {
@@ -5242,6 +6536,8 @@
 	case bpf_ctx_range(struct __sk_buff, data_meta):
 	case bpf_ctx_range(struct __sk_buff, data_end):
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+	case bpf_ctx_range(struct __sk_buff, tstamp):
+	case bpf_ctx_range(struct __sk_buff, wire_len):
 		return false;
 	}
 
@@ -5257,6 +6553,50 @@
 	return bpf_skb_is_valid_access(off, size, type, prog, info);
 }
 
+static bool cg_skb_is_valid_access(int off, int size,
+				   enum bpf_access_type type,
+				   const struct bpf_prog *prog,
+				   struct bpf_insn_access_aux *info)
+{
+	switch (off) {
+	case bpf_ctx_range(struct __sk_buff, tc_classid):
+	case bpf_ctx_range(struct __sk_buff, data_meta):
+	case bpf_ctx_range(struct __sk_buff, wire_len):
+		return false;
+	case bpf_ctx_range(struct __sk_buff, data):
+	case bpf_ctx_range(struct __sk_buff, data_end):
+		if (!capable(CAP_SYS_ADMIN))
+			return false;
+		break;
+	}
+
+	if (type == BPF_WRITE) {
+		switch (off) {
+		case bpf_ctx_range(struct __sk_buff, mark):
+		case bpf_ctx_range(struct __sk_buff, priority):
+		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+			break;
+		case bpf_ctx_range(struct __sk_buff, tstamp):
+			if (!capable(CAP_SYS_ADMIN))
+				return false;
+			break;
+		default:
+			return false;
+		}
+	}
+
+	switch (off) {
+	case bpf_ctx_range(struct __sk_buff, data):
+		info->reg_type = PTR_TO_PACKET;
+		break;
+	case bpf_ctx_range(struct __sk_buff, data_end):
+		info->reg_type = PTR_TO_PACKET_END;
+		break;
+	}
+
+	return bpf_skb_is_valid_access(off, size, type, prog, info);
+}
+
 static bool lwt_is_valid_access(int off, int size,
 				enum bpf_access_type type,
 				const struct bpf_prog *prog,
@@ -5266,6 +6606,8 @@
 	case bpf_ctx_range(struct __sk_buff, tc_classid):
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
 	case bpf_ctx_range(struct __sk_buff, data_meta):
+	case bpf_ctx_range(struct __sk_buff, tstamp):
+	case bpf_ctx_range(struct __sk_buff, wire_len):
 		return false;
 	}
 
@@ -5336,14 +6678,39 @@
 	return true;
 }
 
-static bool __sock_filter_check_size(int off, int size,
+bool bpf_sock_common_is_valid_access(int off, int size,
+				     enum bpf_access_type type,
 				     struct bpf_insn_access_aux *info)
 {
+	switch (off) {
+	case bpf_ctx_range_till(struct bpf_sock, type, priority):
+		return false;
+	default:
+		return bpf_sock_is_valid_access(off, size, type, info);
+	}
+}
+
+bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+			      struct bpf_insn_access_aux *info)
+{
 	const int size_default = sizeof(__u32);
 
+	if (off < 0 || off >= sizeof(struct bpf_sock))
+		return false;
+	if (off % size != 0)
+		return false;
+
 	switch (off) {
+	case offsetof(struct bpf_sock, state):
+	case offsetof(struct bpf_sock, family):
+	case offsetof(struct bpf_sock, type):
+	case offsetof(struct bpf_sock, protocol):
+	case offsetof(struct bpf_sock, dst_port):
+	case offsetof(struct bpf_sock, src_port):
 	case bpf_ctx_range(struct bpf_sock, src_ip4):
 	case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
+	case bpf_ctx_range(struct bpf_sock, dst_ip4):
+	case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
 		bpf_ctx_record_field_size(info, size_default);
 		return bpf_ctx_narrow_access_ok(off, size, size_default);
 	}
@@ -5356,16 +6723,19 @@
 					const struct bpf_prog *prog,
 					struct bpf_insn_access_aux *info)
 {
-	if (off < 0 || off >= sizeof(struct bpf_sock))
+	if (!bpf_sock_is_valid_access(off, size, type, info))
 		return false;
-	if (off % size != 0)
-		return false;
-	if (!__sock_filter_check_attach_type(off, type,
-					     prog->expected_attach_type))
-		return false;
-	if (!__sock_filter_check_size(off, size, info))
-		return false;
-	return true;
+	return __sock_filter_check_attach_type(off, type,
+					       prog->expected_attach_type);
+}
+
+static int bpf_noop_prologue(struct bpf_insn *insn_buf, bool direct_write,
+			     const struct bpf_prog *prog)
+{
+	/* Neither direct read nor direct write requires any preliminary
+	 * action.
+	 */
+	return 0;
 }
 
 static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
@@ -5460,6 +6830,8 @@
 		case bpf_ctx_range(struct __sk_buff, priority):
 		case bpf_ctx_range(struct __sk_buff, tc_classid):
 		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+		case bpf_ctx_range(struct __sk_buff, tstamp):
+		case bpf_ctx_range(struct __sk_buff, queue_mapping):
 			break;
 		default:
 			return false;
@@ -5556,6 +6928,7 @@
 		case BPF_CGROUP_INET4_BIND:
 		case BPF_CGROUP_INET4_CONNECT:
 		case BPF_CGROUP_UDP4_SENDMSG:
+		case BPF_CGROUP_UDP4_RECVMSG:
 			break;
 		default:
 			return false;
@@ -5566,6 +6939,7 @@
 		case BPF_CGROUP_INET6_BIND:
 		case BPF_CGROUP_INET6_CONNECT:
 		case BPF_CGROUP_UDP6_SENDMSG:
+		case BPF_CGROUP_UDP6_RECVMSG:
 			break;
 		default:
 			return false;
@@ -5596,12 +6970,32 @@
 	case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
 	case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
 				msg_src_ip6[3]):
-		/* Only narrow read access allowed for now. */
 		if (type == BPF_READ) {
 			bpf_ctx_record_field_size(info, size_default);
+
+			if (bpf_ctx_wide_access_ok(off, size,
+						   struct bpf_sock_addr,
+						   user_ip6))
+				return true;
+
+			if (bpf_ctx_wide_access_ok(off, size,
+						   struct bpf_sock_addr,
+						   msg_src_ip6))
+				return true;
+
 			if (!bpf_ctx_narrow_access_ok(off, size, size_default))
 				return false;
 		} else {
+			if (bpf_ctx_wide_access_ok(off, size,
+						   struct bpf_sock_addr,
+						   user_ip6))
+				return true;
+
+			if (bpf_ctx_wide_access_ok(off, size,
+						   struct bpf_sock_addr,
+						   msg_src_ip6))
+				return true;
+
 			if (size != size_default)
 				return false;
 		}
@@ -5610,6 +7004,13 @@
 		if (size != size_default)
 			return false;
 		break;
+	case offsetof(struct bpf_sock_addr, sk):
+		if (type != BPF_READ)
+			return false;
+		if (size != sizeof(__u64))
+			return false;
+		info->reg_type = PTR_TO_SOCKET;
+		break;
 	default:
 		if (type == BPF_READ) {
 			if (size != size_default)
@@ -5653,6 +7054,11 @@
 			if (size != sizeof(__u64))
 				return false;
 			break;
+		case offsetof(struct bpf_sock_ops, sk):
+			if (size != sizeof(__u64))
+				return false;
+			info->reg_type = PTR_TO_SOCKET_OR_NULL;
+			break;
 		default:
 			if (size != size_default)
 				return false;
@@ -5677,6 +7083,8 @@
 	switch (off) {
 	case bpf_ctx_range(struct __sk_buff, tc_classid):
 	case bpf_ctx_range(struct __sk_buff, data_meta):
+	case bpf_ctx_range(struct __sk_buff, tstamp):
+	case bpf_ctx_range(struct __sk_buff, wire_len):
 		return false;
 	}
 
@@ -5712,6 +7120,9 @@
 	if (type == BPF_WRITE)
 		return false;
 
+	if (off % size != 0)
+		return false;
+
 	switch (off) {
 	case offsetof(struct sk_msg_md, data):
 		info->reg_type = PTR_TO_PACKET;
@@ -5723,17 +7134,87 @@
 		if (size != sizeof(__u64))
 			return false;
 		break;
-	default:
+	case bpf_ctx_range(struct sk_msg_md, family):
+	case bpf_ctx_range(struct sk_msg_md, remote_ip4):
+	case bpf_ctx_range(struct sk_msg_md, local_ip4):
+	case bpf_ctx_range_till(struct sk_msg_md, remote_ip6[0], remote_ip6[3]):
+	case bpf_ctx_range_till(struct sk_msg_md, local_ip6[0], local_ip6[3]):
+	case bpf_ctx_range(struct sk_msg_md, remote_port):
+	case bpf_ctx_range(struct sk_msg_md, local_port):
+	case bpf_ctx_range(struct sk_msg_md, size):
 		if (size != sizeof(__u32))
 			return false;
+		break;
+	default:
+		return false;
+	}
+	return true;
+}
+
+static bool flow_dissector_is_valid_access(int off, int size,
+					   enum bpf_access_type type,
+					   const struct bpf_prog *prog,
+					   struct bpf_insn_access_aux *info)
+{
+	const int size_default = sizeof(__u32);
+
+	if (off < 0 || off >= sizeof(struct __sk_buff))
+		return false;
+
+	if (type == BPF_WRITE)
+		return false;
+
+	switch (off) {
+	case bpf_ctx_range(struct __sk_buff, data):
+		if (size != size_default)
+			return false;
+		info->reg_type = PTR_TO_PACKET;
+		return true;
+	case bpf_ctx_range(struct __sk_buff, data_end):
+		if (size != size_default)
+			return false;
+		info->reg_type = PTR_TO_PACKET_END;
+		return true;
+	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
+		if (size != sizeof(__u64))
+			return false;
+		info->reg_type = PTR_TO_FLOW_KEYS;
+		return true;
+	default:
+		return false;
+	}
+}
+
+static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type,
+					     const struct bpf_insn *si,
+					     struct bpf_insn *insn_buf,
+					     struct bpf_prog *prog,
+					     u32 *target_size)
+
+{
+	struct bpf_insn *insn = insn_buf;
+
+	switch (si->off) {
+	case offsetof(struct __sk_buff, data):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_flow_dissector, data));
+		break;
+
+	case offsetof(struct __sk_buff, data_end):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data_end),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_flow_dissector, data_end));
+		break;
+
+	case offsetof(struct __sk_buff, flow_keys):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, flow_keys),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_flow_dissector, flow_keys));
+		break;
 	}
 
-	if (off < 0 || off >= sizeof(struct sk_msg_md))
-		return false;
-	if (off % size != 0)
-		return false;
-
-	return true;
+	return insn - insn_buf;
 }
 
 static u32 bpf_convert_ctx_access(enum bpf_access_type type,
@@ -5818,25 +7299,34 @@
 		break;
 
 	case offsetof(struct __sk_buff, queue_mapping):
-		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
-				      bpf_target_off(struct sk_buff, queue_mapping, 2,
-						     target_size));
+		if (type == BPF_WRITE) {
+			*insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1);
+			*insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
+					      bpf_target_off(struct sk_buff,
+							     queue_mapping,
+							     2, target_size));
+		} else {
+			*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+					      bpf_target_off(struct sk_buff,
+							     queue_mapping,
+							     2, target_size));
+		}
 		break;
 
 	case offsetof(struct __sk_buff, vlan_present):
-	case offsetof(struct __sk_buff, vlan_tci):
-		BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
+		*target_size = 1;
+		*insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
+				      PKT_VLAN_PRESENT_OFFSET());
+		if (PKT_VLAN_PRESENT_BIT)
+			*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, PKT_VLAN_PRESENT_BIT);
+		if (PKT_VLAN_PRESENT_BIT < 7)
+			*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1);
+		break;
 
+	case offsetof(struct __sk_buff, vlan_tci):
 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
 				      bpf_target_off(struct sk_buff, vlan_tci, 2,
 						     target_size));
-		if (si->off == offsetof(struct __sk_buff, vlan_tci)) {
-			*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg,
-						~VLAN_TAG_PRESENT);
-		} else {
-			*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 12);
-			*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1);
-		}
 		break;
 
 	case offsetof(struct __sk_buff, cb[0]) ...
@@ -6030,15 +7520,70 @@
 				      bpf_target_off(struct sock_common,
 						     skc_num, 2, target_size));
 		break;
+
+	case offsetof(struct __sk_buff, tstamp):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8);
+
+		if (type == BPF_WRITE)
+			*insn++ = BPF_STX_MEM(BPF_DW,
+					      si->dst_reg, si->src_reg,
+					      bpf_target_off(struct sk_buff,
+							     tstamp, 8,
+							     target_size));
+		else
+			*insn++ = BPF_LDX_MEM(BPF_DW,
+					      si->dst_reg, si->src_reg,
+					      bpf_target_off(struct sk_buff,
+							     tstamp, 8,
+							     target_size));
+		break;
+
+	case offsetof(struct __sk_buff, gso_segs):
+		/* si->dst_reg = skb_shinfo(SKB); */
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
+				      BPF_REG_AX, si->src_reg,
+				      offsetof(struct sk_buff, end));
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, head));
+		*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
+#else
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, end));
+#endif
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs),
+				      si->dst_reg, si->dst_reg,
+				      bpf_target_off(struct skb_shared_info,
+						     gso_segs, 2,
+						     target_size));
+		break;
+	case offsetof(struct __sk_buff, wire_len):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4);
+
+		off = si->off;
+		off -= offsetof(struct __sk_buff, wire_len);
+		off += offsetof(struct sk_buff, cb);
+		off += offsetof(struct qdisc_skb_cb, pkt_len);
+		*target_size = 4;
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off);
+		break;
+
+	case offsetof(struct __sk_buff, sk):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, sk));
+		break;
 	}
 
 	return insn - insn_buf;
 }
 
-static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
-					  const struct bpf_insn *si,
-					  struct bpf_insn *insn_buf,
-					  struct bpf_prog *prog, u32 *target_size)
+u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
+				const struct bpf_insn *si,
+				struct bpf_insn *insn_buf,
+				struct bpf_prog *prog, u32 *target_size)
 {
 	struct bpf_insn *insn = insn_buf;
 	int off;
@@ -6078,24 +7623,32 @@
 		break;
 
 	case offsetof(struct bpf_sock, family):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
-
-		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
-				      offsetof(struct sock, sk_family));
+		*insn++ = BPF_LDX_MEM(
+			BPF_FIELD_SIZEOF(struct sock_common, skc_family),
+			si->dst_reg, si->src_reg,
+			bpf_target_off(struct sock_common,
+				       skc_family,
+				       FIELD_SIZEOF(struct sock_common,
+						    skc_family),
+				       target_size));
 		break;
 
 	case offsetof(struct bpf_sock, type):
+		BUILD_BUG_ON(HWEIGHT32(SK_FL_TYPE_MASK) != BITS_PER_BYTE * 2);
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
 				      offsetof(struct sock, __sk_flags_offset));
 		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
 		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
+		*target_size = 2;
 		break;
 
 	case offsetof(struct bpf_sock, protocol):
+		BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
 				      offsetof(struct sock, __sk_flags_offset));
 		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
 		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
+		*target_size = 1;
 		break;
 
 	case offsetof(struct bpf_sock, src_ip4):
@@ -6107,6 +7660,15 @@
 				       target_size));
 		break;
 
+	case offsetof(struct bpf_sock, dst_ip4):
+		*insn++ = BPF_LDX_MEM(
+			BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+			bpf_target_off(struct sock_common, skc_daddr,
+				       FIELD_SIZEOF(struct sock_common,
+						    skc_daddr),
+				       target_size));
+		break;
+
 	case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
 #if IS_ENABLED(CONFIG_IPV6)
 		off = si->off;
@@ -6125,6 +7687,23 @@
 #endif
 		break;
 
+	case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+		off = si->off;
+		off -= offsetof(struct bpf_sock, dst_ip6[0]);
+		*insn++ = BPF_LDX_MEM(
+			BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+			bpf_target_off(struct sock_common,
+				       skc_v6_daddr.s6_addr32[0],
+				       FIELD_SIZEOF(struct sock_common,
+						    skc_v6_daddr.s6_addr32[0]),
+				       target_size) + off);
+#else
+		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+		*target_size = 4;
+#endif
+		break;
+
 	case offsetof(struct bpf_sock, src_port):
 		*insn++ = BPF_LDX_MEM(
 			BPF_FIELD_SIZEOF(struct sock_common, skc_num),
@@ -6134,6 +7713,26 @@
 						    skc_num),
 				       target_size));
 		break;
+
+	case offsetof(struct bpf_sock, dst_port):
+		*insn++ = BPF_LDX_MEM(
+			BPF_FIELD_SIZEOF(struct sock_common, skc_dport),
+			si->dst_reg, si->src_reg,
+			bpf_target_off(struct sock_common, skc_dport,
+				       FIELD_SIZEOF(struct sock_common,
+						    skc_dport),
+				       target_size));
+		break;
+
+	case offsetof(struct bpf_sock, state):
+		*insn++ = BPF_LDX_MEM(
+			BPF_FIELD_SIZEOF(struct sock_common, skc_state),
+			si->dst_reg, si->src_reg,
+			bpf_target_off(struct sock_common, skc_state,
+				       FIELD_SIZEOF(struct sock_common,
+						    skc_state),
+				       target_size));
+		break;
 	}
 
 	return insn - insn_buf;
@@ -6237,9 +7836,6 @@
 /* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to
  * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
  *
- * It doesn't support SIZE argument though since narrow stores are not
- * supported for now.
- *
  * In addition it uses Temporary Field TF (member of struct S) as the 3rd
  * "register" since two registers available in convert_ctx_access are not
  * enough: we can't override neither SRC, since it contains value to store, nor
@@ -6247,7 +7843,7 @@
  * instructions. But we need a temporary place to save pointer to nested
  * structure whose field we want to store to.
  */
-#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, TF)		       \
+#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, OFF, TF)	       \
 	do {								       \
 		int tmp_reg = BPF_REG_9;				       \
 		if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg)	       \
@@ -6258,8 +7854,7 @@
 				      offsetof(S, TF));			       \
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg,	       \
 				      si->dst_reg, offsetof(S, F));	       \
-		*insn++ = BPF_STX_MEM(					       \
-			BPF_FIELD_SIZEOF(NS, NF), tmp_reg, si->src_reg,	       \
+		*insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg,	       \
 			bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF),	       \
 				       target_size)			       \
 				+ OFF);					       \
@@ -6271,8 +7866,8 @@
 						      TF)		       \
 	do {								       \
 		if (type == BPF_WRITE) {				       \
-			SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF,    \
-							 TF);		       \
+			SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE,   \
+							 OFF, TF);	       \
 		} else {						       \
 			SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(		       \
 				S, NS, F, NF, SIZE, OFF);  \
@@ -6367,6 +7962,11 @@
 			struct bpf_sock_addr_kern, struct in6_addr, t_ctx,
 			s6_addr32[0], BPF_SIZE(si->code), off, tmp_reg);
 		break;
+	case offsetof(struct bpf_sock_addr, sk):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_addr_kern, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_addr_kern, sk));
+		break;
 	}
 
 	return insn - insn_buf;
@@ -6381,6 +7981,82 @@
 	struct bpf_insn *insn = insn_buf;
 	int off;
 
+/* Helper macro for adding read access to tcp_sock or sock fields. */
+#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)			      \
+	do {								      \
+		BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) >		      \
+			     FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD));   \
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
+						struct bpf_sock_ops_kern,     \
+						is_fullsock),		      \
+				      si->dst_reg, si->src_reg,		      \
+				      offsetof(struct bpf_sock_ops_kern,      \
+					       is_fullsock));		      \
+		*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2);	      \
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
+						struct bpf_sock_ops_kern, sk),\
+				      si->dst_reg, si->src_reg,		      \
+				      offsetof(struct bpf_sock_ops_kern, sk));\
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ,		      \
+						       OBJ_FIELD),	      \
+				      si->dst_reg, si->dst_reg,		      \
+				      offsetof(OBJ, OBJ_FIELD));	      \
+	} while (0)
+
+#define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \
+		SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock)
+
+/* Helper macro for adding write access to tcp_sock or sock fields.
+ * The macro is called with two registers, dst_reg which contains a pointer
+ * to ctx (context) and src_reg which contains the value that should be
+ * stored. However, we need an additional register since we cannot overwrite
+ * dst_reg because it may be used later in the program.
+ * Instead we "borrow" one of the other register. We first save its value
+ * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
+ * it at the end of the macro.
+ */
+#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)			      \
+	do {								      \
+		int reg = BPF_REG_9;					      \
+		BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) >		      \
+			     FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD));   \
+		if (si->dst_reg == reg || si->src_reg == reg)		      \
+			reg--;						      \
+		if (si->dst_reg == reg || si->src_reg == reg)		      \
+			reg--;						      \
+		*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg,		      \
+				      offsetof(struct bpf_sock_ops_kern,      \
+					       temp));			      \
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
+						struct bpf_sock_ops_kern,     \
+						is_fullsock),		      \
+				      reg, si->dst_reg,			      \
+				      offsetof(struct bpf_sock_ops_kern,      \
+					       is_fullsock));		      \
+		*insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2);		      \
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
+						struct bpf_sock_ops_kern, sk),\
+				      reg, si->dst_reg,			      \
+				      offsetof(struct bpf_sock_ops_kern, sk));\
+		*insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD),	      \
+				      reg, si->src_reg,			      \
+				      offsetof(OBJ, OBJ_FIELD));	      \
+		*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg,		      \
+				      offsetof(struct bpf_sock_ops_kern,      \
+					       temp));			      \
+	} while (0)
+
+#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE)	      \
+	do {								      \
+		if (TYPE == BPF_WRITE)					      \
+			SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ);	      \
+		else							      \
+			SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ);	      \
+	} while (0)
+
+	if (insn > insn_buf)
+		return insn - insn_buf;
+
 	switch (si->off) {
 	case offsetof(struct bpf_sock_ops, op) ...
 	     offsetof(struct bpf_sock_ops, replylong[3]):
@@ -6538,175 +8214,91 @@
 				      FIELD_SIZEOF(struct minmax_sample, t));
 		break;
 
-/* Helper macro for adding read access to tcp_sock or sock fields. */
-#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)			      \
-	do {								      \
-		BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) >		      \
-			     FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD));   \
-		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
-						struct bpf_sock_ops_kern,     \
-						is_fullsock),		      \
-				      si->dst_reg, si->src_reg,		      \
-				      offsetof(struct bpf_sock_ops_kern,      \
-					       is_fullsock));		      \
-		*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2);	      \
-		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
-						struct bpf_sock_ops_kern, sk),\
-				      si->dst_reg, si->src_reg,		      \
-				      offsetof(struct bpf_sock_ops_kern, sk));\
-		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ,		      \
-						       OBJ_FIELD),	      \
-				      si->dst_reg, si->dst_reg,		      \
-				      offsetof(OBJ, OBJ_FIELD));	      \
-	} while (0)
-
-/* Helper macro for adding write access to tcp_sock or sock fields.
- * The macro is called with two registers, dst_reg which contains a pointer
- * to ctx (context) and src_reg which contains the value that should be
- * stored. However, we need an additional register since we cannot overwrite
- * dst_reg because it may be used later in the program.
- * Instead we "borrow" one of the other register. We first save its value
- * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
- * it at the end of the macro.
- */
-#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)			      \
-	do {								      \
-		int reg = BPF_REG_9;					      \
-		BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) >		      \
-			     FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD));   \
-		if (si->dst_reg == reg || si->src_reg == reg)		      \
-			reg--;						      \
-		if (si->dst_reg == reg || si->src_reg == reg)		      \
-			reg--;						      \
-		*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg,		      \
-				      offsetof(struct bpf_sock_ops_kern,      \
-					       temp));			      \
-		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
-						struct bpf_sock_ops_kern,     \
-						is_fullsock),		      \
-				      reg, si->dst_reg,			      \
-				      offsetof(struct bpf_sock_ops_kern,      \
-					       is_fullsock));		      \
-		*insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2);		      \
-		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
-						struct bpf_sock_ops_kern, sk),\
-				      reg, si->dst_reg,			      \
-				      offsetof(struct bpf_sock_ops_kern, sk));\
-		*insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD),	      \
-				      reg, si->src_reg,			      \
-				      offsetof(OBJ, OBJ_FIELD));	      \
-		*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg,		      \
-				      offsetof(struct bpf_sock_ops_kern,      \
-					       temp));			      \
-	} while (0)
-
-#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE)	      \
-	do {								      \
-		if (TYPE == BPF_WRITE)					      \
-			SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ);	      \
-		else							      \
-			SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ);	      \
-	} while (0)
-
-	case offsetof(struct bpf_sock_ops, snd_cwnd):
-		SOCK_OPS_GET_FIELD(snd_cwnd, snd_cwnd, struct tcp_sock);
-		break;
-
-	case offsetof(struct bpf_sock_ops, srtt_us):
-		SOCK_OPS_GET_FIELD(srtt_us, srtt_us, struct tcp_sock);
-		break;
-
 	case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
 		SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
 				   struct tcp_sock);
 		break;
 
-	case offsetof(struct bpf_sock_ops, snd_ssthresh):
-		SOCK_OPS_GET_FIELD(snd_ssthresh, snd_ssthresh, struct tcp_sock);
-		break;
-
-	case offsetof(struct bpf_sock_ops, rcv_nxt):
-		SOCK_OPS_GET_FIELD(rcv_nxt, rcv_nxt, struct tcp_sock);
-		break;
-
-	case offsetof(struct bpf_sock_ops, snd_nxt):
-		SOCK_OPS_GET_FIELD(snd_nxt, snd_nxt, struct tcp_sock);
-		break;
-
-	case offsetof(struct bpf_sock_ops, snd_una):
-		SOCK_OPS_GET_FIELD(snd_una, snd_una, struct tcp_sock);
-		break;
-
-	case offsetof(struct bpf_sock_ops, mss_cache):
-		SOCK_OPS_GET_FIELD(mss_cache, mss_cache, struct tcp_sock);
-		break;
-
-	case offsetof(struct bpf_sock_ops, ecn_flags):
-		SOCK_OPS_GET_FIELD(ecn_flags, ecn_flags, struct tcp_sock);
-		break;
-
-	case offsetof(struct bpf_sock_ops, rate_delivered):
-		SOCK_OPS_GET_FIELD(rate_delivered, rate_delivered,
-				   struct tcp_sock);
-		break;
-
-	case offsetof(struct bpf_sock_ops, rate_interval_us):
-		SOCK_OPS_GET_FIELD(rate_interval_us, rate_interval_us,
-				   struct tcp_sock);
-		break;
-
-	case offsetof(struct bpf_sock_ops, packets_out):
-		SOCK_OPS_GET_FIELD(packets_out, packets_out, struct tcp_sock);
-		break;
-
-	case offsetof(struct bpf_sock_ops, retrans_out):
-		SOCK_OPS_GET_FIELD(retrans_out, retrans_out, struct tcp_sock);
-		break;
-
-	case offsetof(struct bpf_sock_ops, total_retrans):
-		SOCK_OPS_GET_FIELD(total_retrans, total_retrans,
-				   struct tcp_sock);
-		break;
-
-	case offsetof(struct bpf_sock_ops, segs_in):
-		SOCK_OPS_GET_FIELD(segs_in, segs_in, struct tcp_sock);
-		break;
-
-	case offsetof(struct bpf_sock_ops, data_segs_in):
-		SOCK_OPS_GET_FIELD(data_segs_in, data_segs_in, struct tcp_sock);
-		break;
-
-	case offsetof(struct bpf_sock_ops, segs_out):
-		SOCK_OPS_GET_FIELD(segs_out, segs_out, struct tcp_sock);
-		break;
-
-	case offsetof(struct bpf_sock_ops, data_segs_out):
-		SOCK_OPS_GET_FIELD(data_segs_out, data_segs_out,
-				   struct tcp_sock);
-		break;
-
-	case offsetof(struct bpf_sock_ops, lost_out):
-		SOCK_OPS_GET_FIELD(lost_out, lost_out, struct tcp_sock);
-		break;
-
-	case offsetof(struct bpf_sock_ops, sacked_out):
-		SOCK_OPS_GET_FIELD(sacked_out, sacked_out, struct tcp_sock);
-		break;
-
 	case offsetof(struct bpf_sock_ops, sk_txhash):
 		SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
 					  struct sock, type);
 		break;
-
+	case offsetof(struct bpf_sock_ops, snd_cwnd):
+		SOCK_OPS_GET_TCP_SOCK_FIELD(snd_cwnd);
+		break;
+	case offsetof(struct bpf_sock_ops, srtt_us):
+		SOCK_OPS_GET_TCP_SOCK_FIELD(srtt_us);
+		break;
+	case offsetof(struct bpf_sock_ops, snd_ssthresh):
+		SOCK_OPS_GET_TCP_SOCK_FIELD(snd_ssthresh);
+		break;
+	case offsetof(struct bpf_sock_ops, rcv_nxt):
+		SOCK_OPS_GET_TCP_SOCK_FIELD(rcv_nxt);
+		break;
+	case offsetof(struct bpf_sock_ops, snd_nxt):
+		SOCK_OPS_GET_TCP_SOCK_FIELD(snd_nxt);
+		break;
+	case offsetof(struct bpf_sock_ops, snd_una):
+		SOCK_OPS_GET_TCP_SOCK_FIELD(snd_una);
+		break;
+	case offsetof(struct bpf_sock_ops, mss_cache):
+		SOCK_OPS_GET_TCP_SOCK_FIELD(mss_cache);
+		break;
+	case offsetof(struct bpf_sock_ops, ecn_flags):
+		SOCK_OPS_GET_TCP_SOCK_FIELD(ecn_flags);
+		break;
+	case offsetof(struct bpf_sock_ops, rate_delivered):
+		SOCK_OPS_GET_TCP_SOCK_FIELD(rate_delivered);
+		break;
+	case offsetof(struct bpf_sock_ops, rate_interval_us):
+		SOCK_OPS_GET_TCP_SOCK_FIELD(rate_interval_us);
+		break;
+	case offsetof(struct bpf_sock_ops, packets_out):
+		SOCK_OPS_GET_TCP_SOCK_FIELD(packets_out);
+		break;
+	case offsetof(struct bpf_sock_ops, retrans_out):
+		SOCK_OPS_GET_TCP_SOCK_FIELD(retrans_out);
+		break;
+	case offsetof(struct bpf_sock_ops, total_retrans):
+		SOCK_OPS_GET_TCP_SOCK_FIELD(total_retrans);
+		break;
+	case offsetof(struct bpf_sock_ops, segs_in):
+		SOCK_OPS_GET_TCP_SOCK_FIELD(segs_in);
+		break;
+	case offsetof(struct bpf_sock_ops, data_segs_in):
+		SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_in);
+		break;
+	case offsetof(struct bpf_sock_ops, segs_out):
+		SOCK_OPS_GET_TCP_SOCK_FIELD(segs_out);
+		break;
+	case offsetof(struct bpf_sock_ops, data_segs_out):
+		SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_out);
+		break;
+	case offsetof(struct bpf_sock_ops, lost_out):
+		SOCK_OPS_GET_TCP_SOCK_FIELD(lost_out);
+		break;
+	case offsetof(struct bpf_sock_ops, sacked_out):
+		SOCK_OPS_GET_TCP_SOCK_FIELD(sacked_out);
+		break;
 	case offsetof(struct bpf_sock_ops, bytes_received):
-		SOCK_OPS_GET_FIELD(bytes_received, bytes_received,
-				   struct tcp_sock);
+		SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_received);
 		break;
-
 	case offsetof(struct bpf_sock_ops, bytes_acked):
-		SOCK_OPS_GET_FIELD(bytes_acked, bytes_acked, struct tcp_sock);
+		SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_acked);
 		break;
-
+	case offsetof(struct bpf_sock_ops, sk):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+						struct bpf_sock_ops_kern,
+						is_fullsock),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern,
+					       is_fullsock));
+		*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+						struct bpf_sock_ops_kern, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern, sk));
+		break;
 	}
 	return insn - insn_buf;
 }
@@ -6746,24 +8338,27 @@
 	int off;
 #endif
 
+	/* convert ctx uses the fact sg element is first in struct */
+	BUILD_BUG_ON(offsetof(struct sk_msg, sg) != 0);
+
 	switch (si->off) {
 	case offsetof(struct sk_msg_md, data):
-		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data),
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, data));
+				      offsetof(struct sk_msg, data));
 		break;
 	case offsetof(struct sk_msg_md, data_end):
-		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data_end),
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data_end),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, data_end));
+				      offsetof(struct sk_msg, data_end));
 		break;
 	case offsetof(struct sk_msg_md, family):
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
 
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
-					      struct sk_msg_buff, sk),
+					      struct sk_msg, sk),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, sk));
+				      offsetof(struct sk_msg, sk));
 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
 				      offsetof(struct sock_common, skc_family));
 		break;
@@ -6772,9 +8367,9 @@
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
 
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
-						struct sk_msg_buff, sk),
+						struct sk_msg, sk),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, sk));
+				      offsetof(struct sk_msg, sk));
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
 				      offsetof(struct sock_common, skc_daddr));
 		break;
@@ -6784,9 +8379,9 @@
 					  skc_rcv_saddr) != 4);
 
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
-					      struct sk_msg_buff, sk),
+					      struct sk_msg, sk),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, sk));
+				      offsetof(struct sk_msg, sk));
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
 				      offsetof(struct sock_common,
 					       skc_rcv_saddr));
@@ -6801,9 +8396,9 @@
 		off = si->off;
 		off -= offsetof(struct sk_msg_md, remote_ip6[0]);
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
-						struct sk_msg_buff, sk),
+						struct sk_msg, sk),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, sk));
+				      offsetof(struct sk_msg, sk));
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
 				      offsetof(struct sock_common,
 					       skc_v6_daddr.s6_addr32[0]) +
@@ -6822,9 +8417,9 @@
 		off = si->off;
 		off -= offsetof(struct sk_msg_md, local_ip6[0]);
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
-						struct sk_msg_buff, sk),
+						struct sk_msg, sk),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, sk));
+				      offsetof(struct sk_msg, sk));
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
 				      offsetof(struct sock_common,
 					       skc_v6_rcv_saddr.s6_addr32[0]) +
@@ -6838,9 +8433,9 @@
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
 
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
-						struct sk_msg_buff, sk),
+						struct sk_msg, sk),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, sk));
+				      offsetof(struct sk_msg, sk));
 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
 				      offsetof(struct sock_common, skc_dport));
 #ifndef __BIG_ENDIAN_BITFIELD
@@ -6852,12 +8447,18 @@
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
 
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
-						struct sk_msg_buff, sk),
+						struct sk_msg, sk),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, sk));
+				      offsetof(struct sk_msg, sk));
 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
 				      offsetof(struct sock_common, skc_num));
 		break;
+
+	case offsetof(struct sk_msg_md, size):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_sg, size),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_msg_sg, size));
+		break;
 	}
 
 	return insn - insn_buf;
@@ -6890,6 +8491,7 @@
 	.get_func_proto		= xdp_func_proto,
 	.is_valid_access	= xdp_is_valid_access,
 	.convert_ctx_access	= xdp_convert_ctx_access,
+	.gen_prologue		= bpf_noop_prologue,
 };
 
 const struct bpf_prog_ops xdp_prog_ops = {
@@ -6898,7 +8500,7 @@
 
 const struct bpf_verifier_ops cg_skb_verifier_ops = {
 	.get_func_proto		= cg_skb_func_proto,
-	.is_valid_access	= sk_filter_is_valid_access,
+	.is_valid_access	= cg_skb_is_valid_access,
 	.convert_ctx_access	= bpf_convert_ctx_access,
 };
 
@@ -6950,7 +8552,7 @@
 const struct bpf_verifier_ops cg_sock_verifier_ops = {
 	.get_func_proto		= sock_filter_func_proto,
 	.is_valid_access	= sock_filter_is_valid_access,
-	.convert_ctx_access	= sock_filter_convert_ctx_access,
+	.convert_ctx_access	= bpf_sock_convert_ctx_access,
 };
 
 const struct bpf_prog_ops cg_sock_prog_ops = {
@@ -6988,11 +8590,22 @@
 	.get_func_proto		= sk_msg_func_proto,
 	.is_valid_access	= sk_msg_is_valid_access,
 	.convert_ctx_access	= sk_msg_convert_ctx_access,
+	.gen_prologue		= bpf_noop_prologue,
 };
 
 const struct bpf_prog_ops sk_msg_prog_ops = {
 };
 
+const struct bpf_verifier_ops flow_dissector_verifier_ops = {
+	.get_func_proto		= flow_dissector_func_proto,
+	.is_valid_access	= flow_dissector_is_valid_access,
+	.convert_ctx_access	= flow_dissector_convert_ctx_access,
+};
+
+const struct bpf_prog_ops flow_dissector_prog_ops = {
+	.test_run		= bpf_prog_test_run_flow_dissector,
+};
+
 int sk_detach_filter(struct sock *sk)
 {
 	int ret = -ENOENT;
@@ -7230,13 +8843,13 @@
 		return size == size_default;
 
 	/* Fields that allow narrowing */
-	case offsetof(struct sk_reuseport_md, eth_protocol):
+	case bpf_ctx_range(struct sk_reuseport_md, eth_protocol):
 		if (size < FIELD_SIZEOF(struct sk_buff, protocol))
 			return false;
 		/* fall through */
-	case offsetof(struct sk_reuseport_md, ip_protocol):
-	case offsetof(struct sk_reuseport_md, bind_inany):
-	case offsetof(struct sk_reuseport_md, len):
+	case bpf_ctx_range(struct sk_reuseport_md, ip_protocol):
+	case bpf_ctx_range(struct sk_reuseport_md, bind_inany):
+	case bpf_ctx_range(struct sk_reuseport_md, len):
 		bpf_ctx_record_field_size(info, size_default);
 		return bpf_ctx_narrow_access_ok(off, size, size_default);
 

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 415b95f..68eda10 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/export.h>
@@ -25,6 +26,13 @@
 #include <net/flow_dissector.h>
 #include <scsi/fc/fc_fcoe.h>
 #include <uapi/linux/batadv_packet.h>
+#include <linux/bpf.h>
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_labels.h>
+#endif
+
+static DEFINE_MUTEX(flow_dissector_mutex);
 
 static void dissector_set_key(struct flow_dissector *flow_dissector,
 			      enum flow_dissector_key_id key_id)
@@ -62,6 +70,83 @@
 }
 EXPORT_SYMBOL(skb_flow_dissector_init);
 
+int skb_flow_dissector_prog_query(const union bpf_attr *attr,
+				  union bpf_attr __user *uattr)
+{
+	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
+	u32 prog_id, prog_cnt = 0, flags = 0;
+	struct bpf_prog *attached;
+	struct net *net;
+
+	if (attr->query.query_flags)
+		return -EINVAL;
+
+	net = get_net_ns_by_fd(attr->query.target_fd);
+	if (IS_ERR(net))
+		return PTR_ERR(net);
+
+	rcu_read_lock();
+	attached = rcu_dereference(net->flow_dissector_prog);
+	if (attached) {
+		prog_cnt = 1;
+		prog_id = attached->aux->id;
+	}
+	rcu_read_unlock();
+
+	put_net(net);
+
+	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
+		return -EFAULT;
+	if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
+		return -EFAULT;
+
+	if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
+		return 0;
+
+	if (copy_to_user(prog_ids, &prog_id, sizeof(u32)))
+		return -EFAULT;
+
+	return 0;
+}
+
+int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
+				       struct bpf_prog *prog)
+{
+	struct bpf_prog *attached;
+	struct net *net;
+
+	net = current->nsproxy->net_ns;
+	mutex_lock(&flow_dissector_mutex);
+	attached = rcu_dereference_protected(net->flow_dissector_prog,
+					     lockdep_is_held(&flow_dissector_mutex));
+	if (attached) {
+		/* Only one BPF program can be attached at a time */
+		mutex_unlock(&flow_dissector_mutex);
+		return -EEXIST;
+	}
+	rcu_assign_pointer(net->flow_dissector_prog, prog);
+	mutex_unlock(&flow_dissector_mutex);
+	return 0;
+}
+
+int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
+{
+	struct bpf_prog *attached;
+	struct net *net;
+
+	net = current->nsproxy->net_ns;
+	mutex_lock(&flow_dissector_mutex);
+	attached = rcu_dereference_protected(net->flow_dissector_prog,
+					     lockdep_is_held(&flow_dissector_mutex));
+	if (!attached) {
+		mutex_unlock(&flow_dissector_mutex);
+		return -ENOENT;
+	}
+	RCU_INIT_POINTER(net->flow_dissector_prog, NULL);
+	bpf_prog_put(attached);
+	mutex_unlock(&flow_dissector_mutex);
+	return 0;
+}
 /**
  * skb_flow_get_be16 - extract be16 entity
  * @skb: sk_buff to extract from
@@ -118,6 +203,22 @@
 }
 EXPORT_SYMBOL(__skb_flow_get_ports);
 
+void skb_flow_dissect_meta(const struct sk_buff *skb,
+			   struct flow_dissector *flow_dissector,
+			   void *target_container)
+{
+	struct flow_dissector_key_meta *meta;
+
+	if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_META))
+		return;
+
+	meta = skb_flow_dissector_target(flow_dissector,
+					 FLOW_DISSECTOR_KEY_META,
+					 target_container);
+	meta->ingress_ifindex = skb->skb_iif;
+}
+EXPORT_SYMBOL(skb_flow_dissect_meta);
+
 static void
 skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type,
 				   struct flow_dissector *flow_dissector,
@@ -135,6 +236,46 @@
 }
 
 void
+skb_flow_dissect_ct(const struct sk_buff *skb,
+		    struct flow_dissector *flow_dissector,
+		    void *target_container,
+		    u16 *ctinfo_map,
+		    size_t mapsize)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	struct flow_dissector_key_ct *key;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn_labels *cl;
+	struct nf_conn *ct;
+
+	if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_CT))
+		return;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct)
+		return;
+
+	key = skb_flow_dissector_target(flow_dissector,
+					FLOW_DISSECTOR_KEY_CT,
+					target_container);
+
+	if (ctinfo < mapsize)
+		key->ct_state = ctinfo_map[ctinfo];
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)
+	key->ct_zone = ct->zone.id;
+#endif
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
+	key->ct_mark = ct->mark;
+#endif
+
+	cl = nf_ct_labels_find(ct);
+	if (cl)
+		memcpy(key->ct_labels, cl->bits, sizeof(key->ct_labels));
+#endif /* CONFIG_NF_CONNTRACK */
+}
+EXPORT_SYMBOL(skb_flow_dissect_ct);
+
+void
 skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
 			     struct flow_dissector *flow_dissector,
 			     void *target_container)
@@ -382,8 +523,8 @@
 	offset += sizeof(struct gre_base_hdr);
 
 	if (hdr->flags & GRE_CSUM)
-		offset += sizeof(((struct gre_full_hdr *) 0)->csum) +
-			  sizeof(((struct gre_full_hdr *) 0)->reserved1);
+		offset += FIELD_SIZEOF(struct gre_full_hdr, csum) +
+			  FIELD_SIZEOF(struct gre_full_hdr, reserved1);
 
 	if (hdr->flags & GRE_KEY) {
 		const __be32 *keyid;
@@ -405,11 +546,11 @@
 			else
 				key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
 		}
-		offset += sizeof(((struct gre_full_hdr *) 0)->key);
+		offset += FIELD_SIZEOF(struct gre_full_hdr, key);
 	}
 
 	if (hdr->flags & GRE_SEQ)
-		offset += sizeof(((struct pptp_gre_header *) 0)->seq);
+		offset += FIELD_SIZEOF(struct pptp_gre_header, seq);
 
 	if (gre_ver == 0) {
 		if (*p_proto == htons(ETH_P_TEB)) {
@@ -436,7 +577,7 @@
 		u8 *ppp_hdr;
 
 		if (hdr->flags & GRE_ACK)
-			offset += sizeof(((struct pptp_gre_header *) 0)->ack);
+			offset += FIELD_SIZEOF(struct pptp_gre_header, ack);
 
 		ppp_hdr = __skb_header_pointer(skb, *p_nhoff + offset,
 					       sizeof(_ppp_hdr),
@@ -588,8 +729,103 @@
 	return (*num_hdrs <= MAX_FLOW_DISSECT_HDRS);
 }
 
+static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
+				     struct flow_dissector *flow_dissector,
+				     void *target_container)
+{
+	struct flow_dissector_key_control *key_control;
+	struct flow_dissector_key_basic *key_basic;
+	struct flow_dissector_key_addrs *key_addrs;
+	struct flow_dissector_key_ports *key_ports;
+	struct flow_dissector_key_tags *key_tags;
+
+	key_control = skb_flow_dissector_target(flow_dissector,
+						FLOW_DISSECTOR_KEY_CONTROL,
+						target_container);
+	key_control->thoff = flow_keys->thoff;
+	if (flow_keys->is_frag)
+		key_control->flags |= FLOW_DIS_IS_FRAGMENT;
+	if (flow_keys->is_first_frag)
+		key_control->flags |= FLOW_DIS_FIRST_FRAG;
+	if (flow_keys->is_encap)
+		key_control->flags |= FLOW_DIS_ENCAPSULATION;
+
+	key_basic = skb_flow_dissector_target(flow_dissector,
+					      FLOW_DISSECTOR_KEY_BASIC,
+					      target_container);
+	key_basic->n_proto = flow_keys->n_proto;
+	key_basic->ip_proto = flow_keys->ip_proto;
+
+	if (flow_keys->addr_proto == ETH_P_IP &&
+	    dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+		key_addrs = skb_flow_dissector_target(flow_dissector,
+						      FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+						      target_container);
+		key_addrs->v4addrs.src = flow_keys->ipv4_src;
+		key_addrs->v4addrs.dst = flow_keys->ipv4_dst;
+		key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+	} else if (flow_keys->addr_proto == ETH_P_IPV6 &&
+		   dissector_uses_key(flow_dissector,
+				      FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+		key_addrs = skb_flow_dissector_target(flow_dissector,
+						      FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+						      target_container);
+		memcpy(&key_addrs->v6addrs, &flow_keys->ipv6_src,
+		       sizeof(key_addrs->v6addrs));
+		key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+	}
+
+	if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+		key_ports = skb_flow_dissector_target(flow_dissector,
+						      FLOW_DISSECTOR_KEY_PORTS,
+						      target_container);
+		key_ports->src = flow_keys->sport;
+		key_ports->dst = flow_keys->dport;
+	}
+
+	if (dissector_uses_key(flow_dissector,
+			       FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
+		key_tags = skb_flow_dissector_target(flow_dissector,
+						     FLOW_DISSECTOR_KEY_FLOW_LABEL,
+						     target_container);
+		key_tags->flow_label = ntohl(flow_keys->flow_label);
+	}
+}
+
+bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
+		      __be16 proto, int nhoff, int hlen, unsigned int flags)
+{
+	struct bpf_flow_keys *flow_keys = ctx->flow_keys;
+	u32 result;
+
+	/* Pass parameters to the BPF program */
+	memset(flow_keys, 0, sizeof(*flow_keys));
+	flow_keys->n_proto = proto;
+	flow_keys->nhoff = nhoff;
+	flow_keys->thoff = flow_keys->nhoff;
+
+	BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG !=
+		     (int)FLOW_DISSECTOR_F_PARSE_1ST_FRAG);
+	BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL !=
+		     (int)FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+	BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP !=
+		     (int)FLOW_DISSECTOR_F_STOP_AT_ENCAP);
+	flow_keys->flags = flags;
+
+	preempt_disable();
+	result = BPF_PROG_RUN(prog, ctx);
+	preempt_enable();
+
+	flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, nhoff, hlen);
+	flow_keys->thoff = clamp_t(u16, flow_keys->thoff,
+				   flow_keys->nhoff, hlen);
+
+	return result == BPF_OK;
+}
+
 /**
  * __skb_flow_dissect - extract the flow_keys struct and return it
+ * @net: associated network namespace, derived from @skb if NULL
  * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
  * @flow_dissector: list of keys to dissect
  * @target_container: target structure to put dissected values into
@@ -597,6 +833,8 @@
  * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol
  * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb)
  * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
+ * @flags: flags that control the dissection process, e.g.
+ *         FLOW_DISSECTOR_F_STOP_AT_ENCAP.
  *
  * The function will try to retrieve individual keys into target specified
  * by flow_dissector from either the skbuff or a raw buffer specified by the
@@ -604,7 +842,8 @@
  *
  * Caller must take care of zeroing target container memory.
  */
-bool __skb_flow_dissect(const struct sk_buff *skb,
+bool __skb_flow_dissect(const struct net *net,
+			const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
 			void *target_container,
 			void *data, __be16 proto, int nhoff, int hlen,
@@ -617,6 +856,7 @@
 	struct flow_dissector_key_icmp *key_icmp;
 	struct flow_dissector_key_tags *key_tags;
 	struct flow_dissector_key_vlan *key_vlan;
+	struct bpf_prog *attached = NULL;
 	enum flow_dissect_ret fdret;
 	enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
 	int num_hdrs = 0;
@@ -658,6 +898,48 @@
 					      FLOW_DISSECTOR_KEY_BASIC,
 					      target_container);
 
+	if (skb) {
+		if (!net) {
+			if (skb->dev)
+				net = dev_net(skb->dev);
+			else if (skb->sk)
+				net = sock_net(skb->sk);
+		}
+	}
+
+	WARN_ON_ONCE(!net);
+	if (net) {
+		rcu_read_lock();
+		attached = rcu_dereference(net->flow_dissector_prog);
+
+		if (attached) {
+			struct bpf_flow_keys flow_keys;
+			struct bpf_flow_dissector ctx = {
+				.flow_keys = &flow_keys,
+				.data = data,
+				.data_end = data + hlen,
+			};
+			__be16 n_proto = proto;
+
+			if (skb) {
+				ctx.skb = skb;
+				/* we can't use 'proto' in the skb case
+				 * because it might be set to skb->vlan_proto
+				 * which has been pulled from the data
+				 */
+				n_proto = skb->protocol;
+			}
+
+			ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff,
+					       hlen, flags);
+			__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
+						 target_container);
+			rcu_read_unlock();
+			return ret;
+		}
+		rcu_read_unlock();
+	}
+
 	if (dissector_uses_key(flow_dissector,
 			       FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
 		struct ethhdr *eth = eth_hdr(skb);
@@ -717,11 +999,6 @@
 		__skb_flow_dissect_ipv4(skb, flow_dissector,
 					target_container, data, iph);
 
-		if (flags & FLOW_DISSECTOR_F_STOP_AT_L3) {
-			fdret = FLOW_DISSECT_RET_OUT_GOOD;
-			break;
-		}
-
 		break;
 	}
 	case htons(ETH_P_IPV6): {
@@ -770,9 +1047,6 @@
 		__skb_flow_dissect_ipv6(skb, flow_dissector,
 					target_container, data, iph);
 
-		if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
-			fdret = FLOW_DISSECT_RET_OUT_GOOD;
-
 		break;
 	}
 	case htons(ETH_P_8021AD):
@@ -812,8 +1086,7 @@
 
 			if (!vlan) {
 				key_vlan->vlan_id = skb_vlan_tag_get_id(skb);
-				key_vlan->vlan_priority =
-					(skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT);
+				key_vlan->vlan_priority = skb_vlan_tag_get_prio(skb);
 			} else {
 				key_vlan->vlan_id = ntohs(vlan->h_vlan_TCI) &
 					VLAN_VID_MASK;
@@ -1077,30 +1350,21 @@
 }
 EXPORT_SYMBOL(__skb_flow_dissect);
 
-static u32 hashrnd __read_mostly;
+static siphash_key_t hashrnd __read_mostly;
 static __always_inline void __flow_hash_secret_init(void)
 {
 	net_get_random_once(&hashrnd, sizeof(hashrnd));
 }
 
-static __always_inline u32 __flow_hash_words(const u32 *words, u32 length,
-					     u32 keyval)
+static const void *flow_keys_hash_start(const struct flow_keys *flow)
 {
-	return jhash2(words, length, keyval);
-}
-
-static inline const u32 *flow_keys_hash_start(const struct flow_keys *flow)
-{
-	const void *p = flow;
-
-	BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32));
-	return (const u32 *)(p + FLOW_KEYS_HASH_OFFSET);
+	BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % SIPHASH_ALIGNMENT);
+	return &flow->FLOW_KEYS_HASH_START_FIELD;
 }
 
 static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
 {
 	size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs);
-	BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32));
 	BUILD_BUG_ON(offsetof(typeof(*flow), addrs) !=
 		     sizeof(*flow) - sizeof(flow->addrs));
 
@@ -1115,7 +1379,7 @@
 		diff -= sizeof(flow->addrs.tipckey);
 		break;
 	}
-	return (sizeof(*flow) - diff) / sizeof(u32);
+	return sizeof(*flow) - diff;
 }
 
 __be32 flow_get_u32_src(const struct flow_keys *flow)
@@ -1181,14 +1445,15 @@
 	}
 }
 
-static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval)
+static inline u32 __flow_hash_from_keys(struct flow_keys *keys,
+					const siphash_key_t *keyval)
 {
 	u32 hash;
 
 	__flow_hash_consistentify(keys);
 
-	hash = __flow_hash_words(flow_keys_hash_start(keys),
-				 flow_keys_hash_length(keys), keyval);
+	hash = siphash(flow_keys_hash_start(keys),
+		       flow_keys_hash_length(keys), keyval);
 	if (!hash)
 		hash = 1;
 
@@ -1198,12 +1463,13 @@
 u32 flow_hash_from_keys(struct flow_keys *keys)
 {
 	__flow_hash_secret_init();
-	return __flow_hash_from_keys(keys, hashrnd);
+	return __flow_hash_from_keys(keys, &hashrnd);
 }
 EXPORT_SYMBOL(flow_hash_from_keys);
 
 static inline u32 ___skb_get_hash(const struct sk_buff *skb,
-				  struct flow_keys *keys, u32 keyval)
+				  struct flow_keys *keys,
+				  const siphash_key_t *keyval)
 {
 	skb_flow_dissect_flow_keys(skb, keys,
 				   FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
@@ -1247,11 +1513,11 @@
 	__flow_hash_secret_init();
 
 	memset(&keys, 0, sizeof(keys));
-	__skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys,
-			   NULL, 0, 0, 0,
+	__skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric,
+			   &keys, NULL, 0, 0, 0,
 			   FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
 
-	return __flow_hash_from_keys(&keys, hashrnd);
+	return __flow_hash_from_keys(&keys, &hashrnd);
 }
 EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);
 
@@ -1271,13 +1537,14 @@
 
 	__flow_hash_secret_init();
 
-	hash = ___skb_get_hash(skb, &keys, hashrnd);
+	hash = ___skb_get_hash(skb, &keys, &hashrnd);
 
 	__skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
 }
 EXPORT_SYMBOL(__skb_get_hash);
 
-__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
+__u32 skb_get_hash_perturb(const struct sk_buff *skb,
+			   const siphash_key_t *perturb)
 {
 	struct flow_keys keys;
 
@@ -1349,7 +1616,8 @@
 {
 	struct flow_keys_basic keys;
 
-	if (!skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0))
+	if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys,
+					      NULL, 0, 0, 0, 0))
 		return 0;
 
 	return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));

diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
new file mode 100644
index 0000000..cf52d9c
--- /dev/null
+++ b/net/core/flow_offload.c

@@ -0,0 +1,523 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <net/flow_offload.h>
+#include <linux/rtnetlink.h>
+#include <linux/mutex.h>
+
+struct flow_rule *flow_rule_alloc(unsigned int num_actions)
+{
+	struct flow_rule *rule;
+
+	rule = kzalloc(struct_size(rule, action.entries, num_actions),
+		       GFP_KERNEL);
+	if (!rule)
+		return NULL;
+
+	rule->action.num_entries = num_actions;
+
+	return rule;
+}
+EXPORT_SYMBOL(flow_rule_alloc);
+
+#define FLOW_DISSECTOR_MATCH(__rule, __type, __out)				\
+	const struct flow_match *__m = &(__rule)->match;			\
+	struct flow_dissector *__d = (__m)->dissector;				\
+										\
+	(__out)->key = skb_flow_dissector_target(__d, __type, (__m)->key);	\
+	(__out)->mask = skb_flow_dissector_target(__d, __type, (__m)->mask);	\
+
+void flow_rule_match_meta(const struct flow_rule *rule,
+			  struct flow_match_meta *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_META, out);
+}
+EXPORT_SYMBOL(flow_rule_match_meta);
+
+void flow_rule_match_basic(const struct flow_rule *rule,
+			   struct flow_match_basic *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_BASIC, out);
+}
+EXPORT_SYMBOL(flow_rule_match_basic);
+
+void flow_rule_match_control(const struct flow_rule *rule,
+			     struct flow_match_control *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_CONTROL, out);
+}
+EXPORT_SYMBOL(flow_rule_match_control);
+
+void flow_rule_match_eth_addrs(const struct flow_rule *rule,
+			       struct flow_match_eth_addrs *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_eth_addrs);
+
+void flow_rule_match_vlan(const struct flow_rule *rule,
+			  struct flow_match_vlan *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_VLAN, out);
+}
+EXPORT_SYMBOL(flow_rule_match_vlan);
+
+void flow_rule_match_cvlan(const struct flow_rule *rule,
+			   struct flow_match_vlan *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_CVLAN, out);
+}
+EXPORT_SYMBOL(flow_rule_match_cvlan);
+
+void flow_rule_match_ipv4_addrs(const struct flow_rule *rule,
+				struct flow_match_ipv4_addrs *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_ipv4_addrs);
+
+void flow_rule_match_ipv6_addrs(const struct flow_rule *rule,
+				struct flow_match_ipv6_addrs *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_ipv6_addrs);
+
+void flow_rule_match_ip(const struct flow_rule *rule,
+			struct flow_match_ip *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_IP, out);
+}
+EXPORT_SYMBOL(flow_rule_match_ip);
+
+void flow_rule_match_ports(const struct flow_rule *rule,
+			   struct flow_match_ports *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_PORTS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_ports);
+
+void flow_rule_match_tcp(const struct flow_rule *rule,
+			 struct flow_match_tcp *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_TCP, out);
+}
+EXPORT_SYMBOL(flow_rule_match_tcp);
+
+void flow_rule_match_icmp(const struct flow_rule *rule,
+			  struct flow_match_icmp *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ICMP, out);
+}
+EXPORT_SYMBOL(flow_rule_match_icmp);
+
+void flow_rule_match_mpls(const struct flow_rule *rule,
+			  struct flow_match_mpls *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_MPLS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_mpls);
+
+void flow_rule_match_enc_control(const struct flow_rule *rule,
+				 struct flow_match_control *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL, out);
+}
+EXPORT_SYMBOL(flow_rule_match_enc_control);
+
+void flow_rule_match_enc_ipv4_addrs(const struct flow_rule *rule,
+				    struct flow_match_ipv4_addrs *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_enc_ipv4_addrs);
+
+void flow_rule_match_enc_ipv6_addrs(const struct flow_rule *rule,
+				    struct flow_match_ipv6_addrs *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_enc_ipv6_addrs);
+
+void flow_rule_match_enc_ip(const struct flow_rule *rule,
+			    struct flow_match_ip *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_IP, out);
+}
+EXPORT_SYMBOL(flow_rule_match_enc_ip);
+
+void flow_rule_match_enc_ports(const struct flow_rule *rule,
+			       struct flow_match_ports *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_PORTS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_enc_ports);
+
+void flow_rule_match_enc_keyid(const struct flow_rule *rule,
+			       struct flow_match_enc_keyid *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_KEYID, out);
+}
+EXPORT_SYMBOL(flow_rule_match_enc_keyid);
+
+void flow_rule_match_enc_opts(const struct flow_rule *rule,
+			      struct flow_match_enc_opts *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_OPTS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_enc_opts);
+
+struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb,
+					  void *cb_ident, void *cb_priv,
+					  void (*release)(void *cb_priv))
+{
+	struct flow_block_cb *block_cb;
+
+	block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
+	if (!block_cb)
+		return ERR_PTR(-ENOMEM);
+
+	block_cb->cb = cb;
+	block_cb->cb_ident = cb_ident;
+	block_cb->cb_priv = cb_priv;
+	block_cb->release = release;
+
+	return block_cb;
+}
+EXPORT_SYMBOL(flow_block_cb_alloc);
+
+void flow_block_cb_free(struct flow_block_cb *block_cb)
+{
+	if (block_cb->release)
+		block_cb->release(block_cb->cb_priv);
+
+	kfree(block_cb);
+}
+EXPORT_SYMBOL(flow_block_cb_free);
+
+struct flow_block_cb *flow_block_cb_lookup(struct flow_block *block,
+					   flow_setup_cb_t *cb, void *cb_ident)
+{
+	struct flow_block_cb *block_cb;
+
+	list_for_each_entry(block_cb, &block->cb_list, list) {
+		if (block_cb->cb == cb &&
+		    block_cb->cb_ident == cb_ident)
+			return block_cb;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL(flow_block_cb_lookup);
+
+void *flow_block_cb_priv(struct flow_block_cb *block_cb)
+{
+	return block_cb->cb_priv;
+}
+EXPORT_SYMBOL(flow_block_cb_priv);
+
+void flow_block_cb_incref(struct flow_block_cb *block_cb)
+{
+	block_cb->refcnt++;
+}
+EXPORT_SYMBOL(flow_block_cb_incref);
+
+unsigned int flow_block_cb_decref(struct flow_block_cb *block_cb)
+{
+	return --block_cb->refcnt;
+}
+EXPORT_SYMBOL(flow_block_cb_decref);
+
+bool flow_block_cb_is_busy(flow_setup_cb_t *cb, void *cb_ident,
+			   struct list_head *driver_block_list)
+{
+	struct flow_block_cb *block_cb;
+
+	list_for_each_entry(block_cb, driver_block_list, driver_list) {
+		if (block_cb->cb == cb &&
+		    block_cb->cb_ident == cb_ident)
+			return true;
+	}
+
+	return false;
+}
+EXPORT_SYMBOL(flow_block_cb_is_busy);
+
+int flow_block_cb_setup_simple(struct flow_block_offload *f,
+			       struct list_head *driver_block_list,
+			       flow_setup_cb_t *cb,
+			       void *cb_ident, void *cb_priv,
+			       bool ingress_only)
+{
+	struct flow_block_cb *block_cb;
+
+	if (ingress_only &&
+	    f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		return -EOPNOTSUPP;
+
+	f->driver_block_list = driver_block_list;
+
+	switch (f->command) {
+	case FLOW_BLOCK_BIND:
+		if (flow_block_cb_is_busy(cb, cb_ident, driver_block_list))
+			return -EBUSY;
+
+		block_cb = flow_block_cb_alloc(cb, cb_ident, cb_priv, NULL);
+		if (IS_ERR(block_cb))
+			return PTR_ERR(block_cb);
+
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list, driver_block_list);
+		return 0;
+	case FLOW_BLOCK_UNBIND:
+		block_cb = flow_block_cb_lookup(f->block, cb, cb_ident);
+		if (!block_cb)
+			return -ENOENT;
+
+		flow_block_cb_remove(block_cb, f);
+		list_del(&block_cb->driver_list);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+EXPORT_SYMBOL(flow_block_cb_setup_simple);
+
+static LIST_HEAD(block_ing_cb_list);
+
+static struct rhashtable indr_setup_block_ht;
+
+struct flow_indr_block_cb {
+	struct list_head list;
+	void *cb_priv;
+	flow_indr_block_bind_cb_t *cb;
+	void *cb_ident;
+};
+
+struct flow_indr_block_dev {
+	struct rhash_head ht_node;
+	struct net_device *dev;
+	unsigned int refcnt;
+	struct list_head cb_list;
+};
+
+static const struct rhashtable_params flow_indr_setup_block_ht_params = {
+	.key_offset	= offsetof(struct flow_indr_block_dev, dev),
+	.head_offset	= offsetof(struct flow_indr_block_dev, ht_node),
+	.key_len	= sizeof(struct net_device *),
+};
+
+static struct flow_indr_block_dev *
+flow_indr_block_dev_lookup(struct net_device *dev)
+{
+	return rhashtable_lookup_fast(&indr_setup_block_ht, &dev,
+				      flow_indr_setup_block_ht_params);
+}
+
+static struct flow_indr_block_dev *
+flow_indr_block_dev_get(struct net_device *dev)
+{
+	struct flow_indr_block_dev *indr_dev;
+
+	indr_dev = flow_indr_block_dev_lookup(dev);
+	if (indr_dev)
+		goto inc_ref;
+
+	indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL);
+	if (!indr_dev)
+		return NULL;
+
+	INIT_LIST_HEAD(&indr_dev->cb_list);
+	indr_dev->dev = dev;
+	if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
+				   flow_indr_setup_block_ht_params)) {
+		kfree(indr_dev);
+		return NULL;
+	}
+
+inc_ref:
+	indr_dev->refcnt++;
+	return indr_dev;
+}
+
+static void flow_indr_block_dev_put(struct flow_indr_block_dev *indr_dev)
+{
+	if (--indr_dev->refcnt)
+		return;
+
+	rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node,
+			       flow_indr_setup_block_ht_params);
+	kfree(indr_dev);
+}
+
+static struct flow_indr_block_cb *
+flow_indr_block_cb_lookup(struct flow_indr_block_dev *indr_dev,
+			  flow_indr_block_bind_cb_t *cb, void *cb_ident)
+{
+	struct flow_indr_block_cb *indr_block_cb;
+
+	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
+		if (indr_block_cb->cb == cb &&
+		    indr_block_cb->cb_ident == cb_ident)
+			return indr_block_cb;
+	return NULL;
+}
+
+static struct flow_indr_block_cb *
+flow_indr_block_cb_add(struct flow_indr_block_dev *indr_dev, void *cb_priv,
+		       flow_indr_block_bind_cb_t *cb, void *cb_ident)
+{
+	struct flow_indr_block_cb *indr_block_cb;
+
+	indr_block_cb = flow_indr_block_cb_lookup(indr_dev, cb, cb_ident);
+	if (indr_block_cb)
+		return ERR_PTR(-EEXIST);
+
+	indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL);
+	if (!indr_block_cb)
+		return ERR_PTR(-ENOMEM);
+
+	indr_block_cb->cb_priv = cb_priv;
+	indr_block_cb->cb = cb;
+	indr_block_cb->cb_ident = cb_ident;
+	list_add(&indr_block_cb->list, &indr_dev->cb_list);
+
+	return indr_block_cb;
+}
+
+static void flow_indr_block_cb_del(struct flow_indr_block_cb *indr_block_cb)
+{
+	list_del(&indr_block_cb->list);
+	kfree(indr_block_cb);
+}
+
+static DEFINE_MUTEX(flow_indr_block_ing_cb_lock);
+
+static void flow_block_ing_cmd(struct net_device *dev,
+			       flow_indr_block_bind_cb_t *cb,
+			       void *cb_priv,
+			       enum flow_block_command command)
+{
+	struct flow_indr_block_ing_entry *entry;
+
+	mutex_lock(&flow_indr_block_ing_cb_lock);
+	list_for_each_entry(entry, &block_ing_cb_list, list) {
+		entry->cb(dev, cb, cb_priv, command);
+	}
+	mutex_unlock(&flow_indr_block_ing_cb_lock);
+}
+
+int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+				  flow_indr_block_bind_cb_t *cb,
+				  void *cb_ident)
+{
+	struct flow_indr_block_cb *indr_block_cb;
+	struct flow_indr_block_dev *indr_dev;
+	int err;
+
+	indr_dev = flow_indr_block_dev_get(dev);
+	if (!indr_dev)
+		return -ENOMEM;
+
+	indr_block_cb = flow_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident);
+	err = PTR_ERR_OR_ZERO(indr_block_cb);
+	if (err)
+		goto err_dev_put;
+
+	flow_block_ing_cmd(dev, indr_block_cb->cb, indr_block_cb->cb_priv,
+			   FLOW_BLOCK_BIND);
+
+	return 0;
+
+err_dev_put:
+	flow_indr_block_dev_put(indr_dev);
+	return err;
+}
+EXPORT_SYMBOL_GPL(__flow_indr_block_cb_register);
+
+int flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+				flow_indr_block_bind_cb_t *cb,
+				void *cb_ident)
+{
+	int err;
+
+	rtnl_lock();
+	err = __flow_indr_block_cb_register(dev, cb_priv, cb, cb_ident);
+	rtnl_unlock();
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(flow_indr_block_cb_register);
+
+void __flow_indr_block_cb_unregister(struct net_device *dev,
+				     flow_indr_block_bind_cb_t *cb,
+				     void *cb_ident)
+{
+	struct flow_indr_block_cb *indr_block_cb;
+	struct flow_indr_block_dev *indr_dev;
+
+	indr_dev = flow_indr_block_dev_lookup(dev);
+	if (!indr_dev)
+		return;
+
+	indr_block_cb = flow_indr_block_cb_lookup(indr_dev, cb, cb_ident);
+	if (!indr_block_cb)
+		return;
+
+	flow_block_ing_cmd(dev, indr_block_cb->cb, indr_block_cb->cb_priv,
+			   FLOW_BLOCK_UNBIND);
+
+	flow_indr_block_cb_del(indr_block_cb);
+	flow_indr_block_dev_put(indr_dev);
+}
+EXPORT_SYMBOL_GPL(__flow_indr_block_cb_unregister);
+
+void flow_indr_block_cb_unregister(struct net_device *dev,
+				   flow_indr_block_bind_cb_t *cb,
+				   void *cb_ident)
+{
+	rtnl_lock();
+	__flow_indr_block_cb_unregister(dev, cb, cb_ident);
+	rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(flow_indr_block_cb_unregister);
+
+void flow_indr_block_call(struct net_device *dev,
+			  struct flow_block_offload *bo,
+			  enum flow_block_command command)
+{
+	struct flow_indr_block_cb *indr_block_cb;
+	struct flow_indr_block_dev *indr_dev;
+
+	indr_dev = flow_indr_block_dev_lookup(dev);
+	if (!indr_dev)
+		return;
+
+	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
+		indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
+				  bo);
+}
+EXPORT_SYMBOL_GPL(flow_indr_block_call);
+
+void flow_indr_add_block_ing_cb(struct flow_indr_block_ing_entry *entry)
+{
+	mutex_lock(&flow_indr_block_ing_cb_lock);
+	list_add_tail(&entry->list, &block_ing_cb_list);
+	mutex_unlock(&flow_indr_block_ing_cb_lock);
+}
+EXPORT_SYMBOL_GPL(flow_indr_add_block_ing_cb);
+
+void flow_indr_del_block_ing_cb(struct flow_indr_block_ing_entry *entry)
+{
+	mutex_lock(&flow_indr_block_ing_cb_lock);
+	list_del(&entry->list);
+	mutex_unlock(&flow_indr_block_ing_cb_lock);
+}
+EXPORT_SYMBOL_GPL(flow_indr_del_block_ing_cb);
+
+static int __init init_flow_indr_rhashtable(void)
+{
+	return rhashtable_init(&indr_setup_block_ht,
+			       &flow_indr_setup_block_ht_params);
+}
+subsys_initcall(init_flow_indr_rhashtable);

diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index e4e442d..bfe7bdd 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/gen_estimator.c	Simple rate estimator.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  *		Eric Dumazet <edumazet@google.com>
  *

diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 188d693..36888f5 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/core/gen_stats.c
  *
- *             This program is free software; you can redistribute it and/or
- *             modify it under the terms of the GNU General Public License
- *             as published by the Free Software Foundation; either version
- *             2 of the License, or (at your option) any later version.
- *
  * Authors:  Thomas Graf <tgraf@suug.ch>
  *           Jamal Hadi Salim
  *           Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
@@ -162,6 +158,34 @@
 }
 EXPORT_SYMBOL(__gnet_stats_copy_basic);
 
+static int
+___gnet_stats_copy_basic(const seqcount_t *running,
+			 struct gnet_dump *d,
+			 struct gnet_stats_basic_cpu __percpu *cpu,
+			 struct gnet_stats_basic_packed *b,
+			 int type)
+{
+	struct gnet_stats_basic_packed bstats = {0};
+
+	__gnet_stats_copy_basic(running, &bstats, cpu, b);
+
+	if (d->compat_tc_stats && type == TCA_STATS_BASIC) {
+		d->tc_stats.bytes = bstats.bytes;
+		d->tc_stats.packets = bstats.packets;
+	}
+
+	if (d->tail) {
+		struct gnet_stats_basic sb;
+
+		memset(&sb, 0, sizeof(sb));
+		sb.bytes = bstats.bytes;
+		sb.packets = bstats.packets;
+		return gnet_stats_copy(d, type, &sb, sizeof(sb),
+				       TCA_STATS_PAD);
+	}
+	return 0;
+}
+
 /**
  * gnet_stats_copy_basic - copy basic statistics into statistic TLV
  * @running: seqcount_t pointer
@@ -181,29 +205,36 @@
 		      struct gnet_stats_basic_cpu __percpu *cpu,
 		      struct gnet_stats_basic_packed *b)
 {
-	struct gnet_stats_basic_packed bstats = {0};
-
-	__gnet_stats_copy_basic(running, &bstats, cpu, b);
-
-	if (d->compat_tc_stats) {
-		d->tc_stats.bytes = bstats.bytes;
-		d->tc_stats.packets = bstats.packets;
-	}
-
-	if (d->tail) {
-		struct gnet_stats_basic sb;
-
-		memset(&sb, 0, sizeof(sb));
-		sb.bytes = bstats.bytes;
-		sb.packets = bstats.packets;
-		return gnet_stats_copy(d, TCA_STATS_BASIC, &sb, sizeof(sb),
-				       TCA_STATS_PAD);
-	}
-	return 0;
+	return ___gnet_stats_copy_basic(running, d, cpu, b,
+					TCA_STATS_BASIC);
 }
 EXPORT_SYMBOL(gnet_stats_copy_basic);
 
 /**
+ * gnet_stats_copy_basic_hw - copy basic hw statistics into statistic TLV
+ * @running: seqcount_t pointer
+ * @d: dumping handle
+ * @cpu: copy statistic per cpu
+ * @b: basic statistics
+ *
+ * Appends the basic statistics to the top level TLV created by
+ * gnet_stats_start_copy().
+ *
+ * Returns 0 on success or -1 with the statistic lock released
+ * if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_copy_basic_hw(const seqcount_t *running,
+			 struct gnet_dump *d,
+			 struct gnet_stats_basic_cpu __percpu *cpu,
+			 struct gnet_stats_basic_packed *b)
+{
+	return ___gnet_stats_copy_basic(running, d, cpu, b,
+					TCA_STATS_BASIC_HW);
+}
+EXPORT_SYMBOL(gnet_stats_copy_basic_hw);
+
+/**
  * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV
  * @d: dumping handle
  * @rate_est: rate estimator

diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index 4b54e5f..e095fb8 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c

@@ -13,22 +13,36 @@
 {
 	struct net_device *dev = skb->dev;
 	struct gro_cell *cell;
+	int res;
 
-	if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev))
-		return netif_rx(skb);
+	rcu_read_lock();
+	if (unlikely(!(dev->flags & IFF_UP)))
+		goto drop;
+
+	if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev)) {
+		res = netif_rx(skb);
+		goto unlock;
+	}
 
 	cell = this_cpu_ptr(gcells->cells);
 
 	if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) {
+drop:
 		atomic_long_inc(&dev->rx_dropped);
 		kfree_skb(skb);
-		return NET_RX_DROP;
+		res = NET_RX_DROP;
+		goto unlock;
 	}
 
 	__skb_queue_tail(&cell->napi_skbs, skb);
 	if (skb_queue_len(&cell->napi_skbs) == 1)
 		napi_schedule(&cell->napi);
-	return NET_RX_SUCCESS;
+
+	res = NET_RX_SUCCESS;
+
+unlock:
+	rcu_read_unlock();
+	return res;
 }
 EXPORT_SYMBOL(gro_cells_receive);
 
@@ -84,6 +98,7 @@
 	for_each_possible_cpu(i) {
 		struct gro_cell *cell = per_cpu_ptr(gcells->cells, i);
 
+		napi_disable(&cell->napi);
 		netif_napi_del(&cell->napi);
 		__skb_queue_purge(&cell->napi_skbs);
 	}

diff --git a/net/core/hwbm.c b/net/core/hwbm.c
index 2cab489..ac1a66d 100644
--- a/net/core/hwbm.c
+++ b/net/core/hwbm.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Support for hardware buffer manager.
  *
  * Copyright (C) 2016 Marvell
  *
  * Gregory CLEMENT <gregory.clement@free-electrons.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
  */
 #include <linux/kernel.h>
 #include <linux/printk.h>
@@ -47,34 +43,33 @@
 }
 EXPORT_SYMBOL_GPL(hwbm_pool_refill);
 
-int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp)
+int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num)
 {
 	int err, i;
-	unsigned long flags;
 
-	spin_lock_irqsave(&bm_pool->lock, flags);
+	mutex_lock(&bm_pool->buf_lock);
 	if (bm_pool->buf_num == bm_pool->size) {
 		pr_warn("pool already filled\n");
-		spin_unlock_irqrestore(&bm_pool->lock, flags);
+		mutex_unlock(&bm_pool->buf_lock);
 		return bm_pool->buf_num;
 	}
 
 	if (buf_num + bm_pool->buf_num > bm_pool->size) {
 		pr_warn("cannot allocate %d buffers for pool\n",
 			buf_num);
-		spin_unlock_irqrestore(&bm_pool->lock, flags);
+		mutex_unlock(&bm_pool->buf_lock);
 		return 0;
 	}
 
 	if ((buf_num + bm_pool->buf_num) < bm_pool->buf_num) {
 		pr_warn("Adding %d buffers to the %d current buffers will overflow\n",
 			buf_num,  bm_pool->buf_num);
-		spin_unlock_irqrestore(&bm_pool->lock, flags);
+		mutex_unlock(&bm_pool->buf_lock);
 		return 0;
 	}
 
 	for (i = 0; i < buf_num; i++) {
-		err = hwbm_pool_refill(bm_pool, gfp);
+		err = hwbm_pool_refill(bm_pool, GFP_KERNEL);
 		if (err < 0)
 			break;
 	}
@@ -83,7 +78,7 @@
 	bm_pool->buf_num += i;
 
 	pr_debug("hwpm pool: %d of %d buffers added\n", i, buf_num);
-	spin_unlock_irqrestore(&bm_pool->lock, flags);
+	mutex_unlock(&bm_pool->buf_lock);
 
 	return i;
 }

diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index e38e641..f153e06 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c

@@ -1,14 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Linux network device link state notification
  *
  * Author:
  *     Stefan Rompf <sux@loplof.de>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
  */
 
 #include <linux/module.h>
@@ -155,7 +150,7 @@
 	clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
 
 	rfc2863_policy(dev);
-	if (dev->flags & IFF_UP) {
+	if (dev->flags & IFF_UP && netif_device_present(dev)) {
 		if (netif_carrier_ok(dev))
 			dev_activate(dev);
 		else
@@ -168,9 +163,16 @@
 
 static void __linkwatch_run_queue(int urgent_only)
 {
+#define MAX_DO_DEV_PER_LOOP	100
+
+	int do_dev = MAX_DO_DEV_PER_LOOP;
 	struct net_device *dev;
 	LIST_HEAD(wrk);
 
+	/* Give urgent case more budget */
+	if (urgent_only)
+		do_dev += MAX_DO_DEV_PER_LOOP;
+
 	/*
 	 * Limit the number of linkwatch events to one
 	 * per second so that a runaway driver does not
@@ -189,7 +191,7 @@
 	spin_lock_irq(&lweventlist_lock);
 	list_splice_init(&lweventlist, &wrk);
 
-	while (!list_empty(&wrk)) {
+	while (!list_empty(&wrk) && do_dev > 0) {
 
 		dev = list_first_entry(&wrk, struct net_device, link_watch_list);
 		list_del_init(&dev->link_watch_list);
@@ -200,9 +202,13 @@
 		}
 		spin_unlock_irq(&lweventlist_lock);
 		linkwatch_do_dev(dev);
+		do_dev--;
 		spin_lock_irq(&lweventlist_lock);
 	}
 
+	/* Add the remaining work back to lweventlist */
+	list_splice_init(&wrk, &lweventlist);
+
 	if (!list_empty(&lweventlist))
 		linkwatch_schedule_work(0);
 	spin_unlock_irq(&lweventlist_lock);

diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 3e85437..74cfb8b 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c

@@ -1,13 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2016 Thomas Graf <tgraf@tgraf.ch>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
  */
 
 #include <linux/kernel.h>
@@ -16,6 +8,9 @@
 #include <linux/types.h>
 #include <linux/bpf.h>
 #include <net/lwtunnel.h>
+#include <net/gre.h>
+#include <net/ip6_route.h>
+#include <net/ipv6_stubs.h>
 
 struct bpf_lwt_prog {
 	struct bpf_prog *prog;
@@ -55,6 +50,7 @@
 
 	switch (ret) {
 	case BPF_OK:
+	case BPF_LWT_REROUTE:
 		break;
 
 	case BPF_REDIRECT:
@@ -63,6 +59,7 @@
 				     lwt->name ? : "<unknown>");
 			ret = BPF_OK;
 		} else {
+			skb_reset_mac_header(skb);
 			ret = skb_do_redirect(skb);
 			if (ret == 0)
 				ret = BPF_REDIRECT;
@@ -86,6 +83,35 @@
 	return ret;
 }
 
+static int bpf_lwt_input_reroute(struct sk_buff *skb)
+{
+	int err = -EINVAL;
+
+	if (skb->protocol == htons(ETH_P_IP)) {
+		struct net_device *dev = skb_dst(skb)->dev;
+		struct iphdr *iph = ip_hdr(skb);
+
+		dev_hold(dev);
+		skb_dst_drop(skb);
+		err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
+					   iph->tos, dev);
+		dev_put(dev);
+	} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		skb_dst_drop(skb);
+		err = ipv6_stub->ipv6_route_input(skb);
+	} else {
+		err = -EAFNOSUPPORT;
+	}
+
+	if (err)
+		goto err;
+	return dst_input(skb);
+
+err:
+	kfree_skb(skb);
+	return err;
+}
+
 static int bpf_input(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
@@ -97,11 +123,11 @@
 		ret = run_lwt_bpf(skb, &bpf->in, dst, NO_REDIRECT);
 		if (ret < 0)
 			return ret;
+		if (ret == BPF_LWT_REROUTE)
+			return bpf_lwt_input_reroute(skb);
 	}
 
 	if (unlikely(!dst->lwtstate->orig_input)) {
-		pr_warn_once("orig_input not set on dst for prog %s\n",
-			     bpf->out.name);
 		kfree_skb(skb);
 		return -EINVAL;
 	}
@@ -146,6 +172,102 @@
 	return 0;
 }
 
+static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
+{
+	struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev);
+	int oif = l3mdev ? l3mdev->ifindex : 0;
+	struct dst_entry *dst = NULL;
+	int err = -EAFNOSUPPORT;
+	struct sock *sk;
+	struct net *net;
+	bool ipv4;
+
+	if (skb->protocol == htons(ETH_P_IP))
+		ipv4 = true;
+	else if (skb->protocol == htons(ETH_P_IPV6))
+		ipv4 = false;
+	else
+		goto err;
+
+	sk = sk_to_full_sk(skb->sk);
+	if (sk) {
+		if (sk->sk_bound_dev_if)
+			oif = sk->sk_bound_dev_if;
+		net = sock_net(sk);
+	} else {
+		net = dev_net(skb_dst(skb)->dev);
+	}
+
+	if (ipv4) {
+		struct iphdr *iph = ip_hdr(skb);
+		struct flowi4 fl4 = {};
+		struct rtable *rt;
+
+		fl4.flowi4_oif = oif;
+		fl4.flowi4_mark = skb->mark;
+		fl4.flowi4_uid = sock_net_uid(net, sk);
+		fl4.flowi4_tos = RT_TOS(iph->tos);
+		fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
+		fl4.flowi4_proto = iph->protocol;
+		fl4.daddr = iph->daddr;
+		fl4.saddr = iph->saddr;
+
+		rt = ip_route_output_key(net, &fl4);
+		if (IS_ERR(rt)) {
+			err = PTR_ERR(rt);
+			goto err;
+		}
+		dst = &rt->dst;
+	} else {
+		struct ipv6hdr *iph6 = ipv6_hdr(skb);
+		struct flowi6 fl6 = {};
+
+		fl6.flowi6_oif = oif;
+		fl6.flowi6_mark = skb->mark;
+		fl6.flowi6_uid = sock_net_uid(net, sk);
+		fl6.flowlabel = ip6_flowinfo(iph6);
+		fl6.flowi6_proto = iph6->nexthdr;
+		fl6.daddr = iph6->daddr;
+		fl6.saddr = iph6->saddr;
+
+		err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6);
+		if (unlikely(err))
+			goto err;
+		if (IS_ERR(dst)) {
+			err = PTR_ERR(dst);
+			goto err;
+		}
+	}
+	if (unlikely(dst->error)) {
+		err = dst->error;
+		dst_release(dst);
+		goto err;
+	}
+
+	/* Although skb header was reserved in bpf_lwt_push_ip_encap(), it
+	 * was done for the previous dst, so we are doing it here again, in
+	 * case the new dst needs much more space. The call below is a noop
+	 * if there is enough header space in skb.
+	 */
+	err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+	if (unlikely(err))
+		goto err;
+
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst);
+
+	err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb);
+	if (unlikely(err))
+		return err;
+
+	/* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */
+	return LWTUNNEL_XMIT_DONE;
+
+err:
+	kfree_skb(skb);
+	return err;
+}
+
 static int bpf_xmit(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
@@ -153,11 +275,20 @@
 
 	bpf = bpf_lwt_lwtunnel(dst->lwtstate);
 	if (bpf->xmit.prog) {
+		__be16 proto = skb->protocol;
 		int ret;
 
 		ret = run_lwt_bpf(skb, &bpf->xmit, dst, CAN_REDIRECT);
 		switch (ret) {
 		case BPF_OK:
+			/* If the header changed, e.g. via bpf_lwt_push_encap,
+			 * BPF_LWT_REROUTE below should have been used if the
+			 * protocol was also changed.
+			 */
+			if (skb->protocol != proto) {
+				kfree_skb(skb);
+				return -EINVAL;
+			}
 			/* If the header was expanded, headroom might be too
 			 * small for L2 header to come, expand as needed.
 			 */
@@ -168,6 +299,8 @@
 			return LWTUNNEL_XMIT_CONTINUE;
 		case BPF_REDIRECT:
 			return LWTUNNEL_XMIT_DONE;
+		case BPF_LWT_REROUTE:
+			return bpf_lwt_xmit_reroute(skb);
 		default:
 			return ret;
 		}
@@ -207,8 +340,8 @@
 	int ret;
 	u32 fd;
 
-	ret = nla_parse_nested(tb, LWT_BPF_PROG_MAX, attr, bpf_prog_policy,
-			       NULL);
+	ret = nla_parse_nested_deprecated(tb, LWT_BPF_PROG_MAX, attr,
+					  bpf_prog_policy, NULL);
 	if (ret < 0)
 		return ret;
 
@@ -249,7 +382,8 @@
 	if (family != AF_INET && family != AF_INET6)
 		return -EAFNOSUPPORT;
 
-	ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy, extack);
+	ret = nla_parse_nested_deprecated(tb, LWT_BPF_MAX, nla, bpf_nl_policy,
+					  extack);
 	if (ret < 0)
 		return ret;
 
@@ -317,7 +451,7 @@
 	if (!prog->prog)
 		return 0;
 
-	nest = nla_nest_start(skb, attr);
+	nest = nla_nest_start_noflag(skb, attr);
 	if (!nest)
 		return -EMSGSIZE;
 
@@ -389,6 +523,135 @@
 	.owner		= THIS_MODULE,
 };
 
+static int handle_gso_type(struct sk_buff *skb, unsigned int gso_type,
+			   int encap_len)
+{
+	struct skb_shared_info *shinfo = skb_shinfo(skb);
+
+	gso_type |= SKB_GSO_DODGY;
+	shinfo->gso_type |= gso_type;
+	skb_decrease_gso_size(shinfo, encap_len);
+	shinfo->gso_segs = 0;
+	return 0;
+}
+
+static int handle_gso_encap(struct sk_buff *skb, bool ipv4, int encap_len)
+{
+	int next_hdr_offset;
+	void *next_hdr;
+	__u8 protocol;
+
+	/* SCTP and UDP_L4 gso need more nuanced handling than what
+	 * handle_gso_type() does above: skb_decrease_gso_size() is not enough.
+	 * So at the moment only TCP GSO packets are let through.
+	 */
+	if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
+		return -ENOTSUPP;
+
+	if (ipv4) {
+		protocol = ip_hdr(skb)->protocol;
+		next_hdr_offset = sizeof(struct iphdr);
+		next_hdr = skb_network_header(skb) + next_hdr_offset;
+	} else {
+		protocol = ipv6_hdr(skb)->nexthdr;
+		next_hdr_offset = sizeof(struct ipv6hdr);
+		next_hdr = skb_network_header(skb) + next_hdr_offset;
+	}
+
+	switch (protocol) {
+	case IPPROTO_GRE:
+		next_hdr_offset += sizeof(struct gre_base_hdr);
+		if (next_hdr_offset > encap_len)
+			return -EINVAL;
+
+		if (((struct gre_base_hdr *)next_hdr)->flags & GRE_CSUM)
+			return handle_gso_type(skb, SKB_GSO_GRE_CSUM,
+					       encap_len);
+		return handle_gso_type(skb, SKB_GSO_GRE, encap_len);
+
+	case IPPROTO_UDP:
+		next_hdr_offset += sizeof(struct udphdr);
+		if (next_hdr_offset > encap_len)
+			return -EINVAL;
+
+		if (((struct udphdr *)next_hdr)->check)
+			return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL_CSUM,
+					       encap_len);
+		return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL, encap_len);
+
+	case IPPROTO_IP:
+	case IPPROTO_IPV6:
+		if (ipv4)
+			return handle_gso_type(skb, SKB_GSO_IPXIP4, encap_len);
+		else
+			return handle_gso_type(skb, SKB_GSO_IPXIP6, encap_len);
+
+	default:
+		return -EPROTONOSUPPORT;
+	}
+}
+
+int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress)
+{
+	struct iphdr *iph;
+	bool ipv4;
+	int err;
+
+	if (unlikely(len < sizeof(struct iphdr) || len > LWT_BPF_MAX_HEADROOM))
+		return -EINVAL;
+
+	/* validate protocol and length */
+	iph = (struct iphdr *)hdr;
+	if (iph->version == 4) {
+		ipv4 = true;
+		if (unlikely(len < iph->ihl * 4))
+			return -EINVAL;
+	} else if (iph->version == 6) {
+		ipv4 = false;
+		if (unlikely(len < sizeof(struct ipv6hdr)))
+			return -EINVAL;
+	} else {
+		return -EINVAL;
+	}
+
+	if (ingress)
+		err = skb_cow_head(skb, len + skb->mac_len);
+	else
+		err = skb_cow_head(skb,
+				   len + LL_RESERVED_SPACE(skb_dst(skb)->dev));
+	if (unlikely(err))
+		return err;
+
+	/* push the encap headers and fix pointers */
+	skb_reset_inner_headers(skb);
+	skb_reset_inner_mac_header(skb);  /* mac header is not yet set */
+	skb_set_inner_protocol(skb, skb->protocol);
+	skb->encapsulation = 1;
+	skb_push(skb, len);
+	if (ingress)
+		skb_postpush_rcsum(skb, iph, len);
+	skb_reset_network_header(skb);
+	memcpy(skb_network_header(skb), hdr, len);
+	bpf_compute_data_pointers(skb);
+	skb_clear_hash(skb);
+
+	if (ipv4) {
+		skb->protocol = htons(ETH_P_IP);
+		iph = ip_hdr(skb);
+
+		if (!iph->check)
+			iph->check = ip_fast_csum((unsigned char *)iph,
+						  iph->ihl);
+	} else {
+		skb->protocol = htons(ETH_P_IPV6);
+	}
+
+	if (skb_is_gso(skb))
+		return handle_gso_encap(skb, ipv4, len);
+
+	return 0;
+}
+
 static int __init bpf_lwt_init(void)
 {
 	return lwtunnel_encap_add_ops(&bpf_encap_ops, LWTUNNEL_ENCAP_BPF);

diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 0b17175..2f9c0de 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c

@@ -1,13 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * lwtunnel	Infrastructure for light weight tunnels like mpls
  *
  * Authors:	Roopa Prabhu, <roopa@cumulusnetworks.com>
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  */
 
 #include <linux/capability.h>
@@ -26,7 +21,7 @@
 #include <net/lwtunnel.h>
 #include <net/rtnetlink.h>
 #include <net/ip6_fib.h>
-#include <net/nexthop.h>
+#include <net/rtnh.h>
 
 #ifdef CONFIG_MODULES
 
@@ -122,18 +117,18 @@
 	ret = -EOPNOTSUPP;
 	rcu_read_lock();
 	ops = rcu_dereference(lwtun_encaps[encap_type]);
-	if (likely(ops && ops->build_state && try_module_get(ops->owner))) {
+	if (likely(ops && ops->build_state && try_module_get(ops->owner)))
 		found = true;
+	rcu_read_unlock();
+
+	if (found) {
 		ret = ops->build_state(encap, family, cfg, lws, extack);
 		if (ret)
 			module_put(ops->owner);
-	}
-	rcu_read_unlock();
-
-	/* don't rely on -EOPNOTSUPP to detect match as build_state
-	 * handlers could return it
-	 */
-	if (!found) {
+	} else {
+		/* don't rely on -EOPNOTSUPP to detect match as build_state
+		 * handlers could return it
+		 */
 		NL_SET_ERR_MSG_ATTR(extack, encap,
 				    "LWT encapsulation type not supported");
 	}
@@ -223,7 +218,8 @@
 }
 EXPORT_SYMBOL_GPL(lwtstate_free);
 
-int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
+int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate,
+			int encap_attr, int encap_type_attr)
 {
 	const struct lwtunnel_encap_ops *ops;
 	struct nlattr *nest;
@@ -236,7 +232,7 @@
 	    lwtstate->type > LWTUNNEL_ENCAP_MAX)
 		return 0;
 
-	nest = nla_nest_start(skb, RTA_ENCAP);
+	nest = nla_nest_start_noflag(skb, encap_attr);
 	if (!nest)
 		return -EMSGSIZE;
 
@@ -250,7 +246,7 @@
 	if (ret)
 		goto nla_put_failure;
 	nla_nest_end(skb, nest);
-	ret = nla_put_u16(skb, RTA_ENCAP_TYPE, lwtstate->type);
+	ret = nla_put_u16(skb, encap_type_attr, lwtstate->type);
 	if (ret)
 		goto nla_put_failure;
 

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 4e07824..5480edf 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Generic address resolution entity
  *
@@ -5,11 +6,6 @@
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
  *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
  *	Fixes:
  *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
  *	Harald Welte		Add neighbour cache statistics like rtstat
@@ -18,6 +14,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/slab.h>
+#include <linux/kmemleak.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -30,6 +27,7 @@
 #include <linux/times.h>
 #include <net/net_namespace.h>
 #include <net/neighbour.h>
+#include <net/arp.h>
 #include <net/dst.h>
 #include <net/sock.h>
 #include <net/netevent.h>
@@ -41,6 +39,8 @@
 #include <linux/inetdevice.h>
 #include <net/addrconf.h>
 
+#include <trace/events/neigh.h>
+
 #define DEBUG
 #define NEIGH_DEBUG 1
 #define neigh_dbg(level, fmt, ...)		\
@@ -101,6 +101,7 @@
 	if (neigh->parms->neigh_cleanup)
 		neigh->parms->neigh_cleanup(neigh);
 
+	trace_neigh_cleanup_and_release(neigh, 0);
 	__neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
 	neigh_release(neigh);
@@ -118,21 +119,77 @@
 }
 EXPORT_SYMBOL(neigh_rand_reach_time);
 
+static void neigh_mark_dead(struct neighbour *n)
+{
+	n->dead = 1;
+	if (!list_empty(&n->gc_list)) {
+		list_del_init(&n->gc_list);
+		atomic_dec(&n->tbl->gc_entries);
+	}
+}
 
-static bool neigh_del(struct neighbour *n, __u8 state, __u8 flags,
-		      struct neighbour __rcu **np, struct neigh_table *tbl)
+static void neigh_update_gc_list(struct neighbour *n)
+{
+	bool on_gc_list, exempt_from_gc;
+
+	write_lock_bh(&n->tbl->lock);
+	write_lock(&n->lock);
+
+	/* remove from the gc list if new state is permanent or if neighbor
+	 * is externally learned; otherwise entry should be on the gc list
+	 */
+	exempt_from_gc = n->nud_state & NUD_PERMANENT ||
+			 n->flags & NTF_EXT_LEARNED;
+	on_gc_list = !list_empty(&n->gc_list);
+
+	if (exempt_from_gc && on_gc_list) {
+		list_del_init(&n->gc_list);
+		atomic_dec(&n->tbl->gc_entries);
+	} else if (!exempt_from_gc && !on_gc_list) {
+		/* add entries to the tail; cleaning removes from the front */
+		list_add_tail(&n->gc_list, &n->tbl->gc_list);
+		atomic_inc(&n->tbl->gc_entries);
+	}
+
+	write_unlock(&n->lock);
+	write_unlock_bh(&n->tbl->lock);
+}
+
+static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
+				     int *notify)
+{
+	bool rc = false;
+	u8 ndm_flags;
+
+	if (!(flags & NEIGH_UPDATE_F_ADMIN))
+		return rc;
+
+	ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
+	if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
+		if (ndm_flags & NTF_EXT_LEARNED)
+			neigh->flags |= NTF_EXT_LEARNED;
+		else
+			neigh->flags &= ~NTF_EXT_LEARNED;
+		rc = true;
+		*notify = 1;
+	}
+
+	return rc;
+}
+
+static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
+		      struct neigh_table *tbl)
 {
 	bool retval = false;
 
 	write_lock(&n->lock);
-	if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state) &&
-	    !(n->flags & flags)) {
+	if (refcount_read(&n->refcnt) == 1) {
 		struct neighbour *neigh;
 
 		neigh = rcu_dereference_protected(n->next,
 						  lockdep_is_held(&tbl->lock));
 		rcu_assign_pointer(*np, neigh);
-		n->dead = 1;
+		neigh_mark_dead(n);
 		retval = true;
 	}
 	write_unlock(&n->lock);
@@ -158,7 +215,7 @@
 	while ((n = rcu_dereference_protected(*np,
 					      lockdep_is_held(&tbl->lock)))) {
 		if (n == ndel)
-			return neigh_del(n, 0, 0, np, tbl);
+			return neigh_del(n, np, tbl);
 		np = &n->next;
 	}
 	return false;
@@ -166,32 +223,29 @@
 
 static int neigh_forced_gc(struct neigh_table *tbl)
 {
+	int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2;
+	unsigned long tref = jiffies - 5 * HZ;
+	struct neighbour *n, *tmp;
 	int shrunk = 0;
-	int i;
-	struct neigh_hash_table *nht;
 
 	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
 
 	write_lock_bh(&tbl->lock);
-	nht = rcu_dereference_protected(tbl->nht,
-					lockdep_is_held(&tbl->lock));
-	for (i = 0; i < (1 << nht->hash_shift); i++) {
-		struct neighbour *n;
-		struct neighbour __rcu **np;
 
-		np = &nht->hash_buckets[i];
-		while ((n = rcu_dereference_protected(*np,
-					lockdep_is_held(&tbl->lock))) != NULL) {
-			/* Neighbour record may be discarded if:
-			 * - nobody refers to it.
-			 * - it is not permanent
-			 */
-			if (neigh_del(n, NUD_PERMANENT, NTF_EXT_LEARNED, np,
-				      tbl)) {
-				shrunk = 1;
-				continue;
-			}
-			np = &n->next;
+	list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
+		if (refcount_read(&n->refcnt) == 1) {
+			bool remove = false;
+
+			write_lock(&n->lock);
+			if ((n->nud_state == NUD_FAILED) ||
+			    time_after(tref, n->updated))
+				remove = true;
+			write_unlock(&n->lock);
+
+			if (remove && neigh_remove_one(n, tbl))
+				shrunk++;
+			if (shrunk >= max_clean)
+				break;
 		}
 	}
 
@@ -232,7 +286,8 @@
 	}
 }
 
-static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
+static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
+			    bool skip_perm)
 {
 	int i;
 	struct neigh_hash_table *nht;
@@ -250,13 +305,16 @@
 				np = &n->next;
 				continue;
 			}
+			if (skip_perm && n->nud_state & NUD_PERMANENT) {
+				np = &n->next;
+				continue;
+			}
 			rcu_assign_pointer(*np,
 				   rcu_dereference_protected(n->next,
 						lockdep_is_held(&tbl->lock)));
 			write_lock(&n->lock);
 			neigh_del_timer(n);
-			n->dead = 1;
-
+			neigh_mark_dead(n);
 			if (refcount_read(&n->refcnt) != 1) {
 				/* The most unpleasant situation.
 				   We must destroy neighbour entry,
@@ -285,30 +343,49 @@
 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
 {
 	write_lock_bh(&tbl->lock);
-	neigh_flush_dev(tbl, dev);
+	neigh_flush_dev(tbl, dev, false);
 	write_unlock_bh(&tbl->lock);
 }
 EXPORT_SYMBOL(neigh_changeaddr);
 
-int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
+			  bool skip_perm)
 {
 	write_lock_bh(&tbl->lock);
-	neigh_flush_dev(tbl, dev);
+	neigh_flush_dev(tbl, dev, skip_perm);
 	pneigh_ifdown_and_unlock(tbl, dev);
 
 	del_timer_sync(&tbl->proxy_timer);
 	pneigh_queue_purge(&tbl->proxy_queue);
 	return 0;
 }
+
+int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
+{
+	__neigh_ifdown(tbl, dev, true);
+	return 0;
+}
+EXPORT_SYMBOL(neigh_carrier_down);
+
+int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+{
+	__neigh_ifdown(tbl, dev, false);
+	return 0;
+}
 EXPORT_SYMBOL(neigh_ifdown);
 
-static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
+static struct neighbour *neigh_alloc(struct neigh_table *tbl,
+				     struct net_device *dev,
+				     bool exempt_from_gc)
 {
 	struct neighbour *n = NULL;
 	unsigned long now = jiffies;
 	int entries;
 
-	entries = atomic_inc_return(&tbl->entries) - 1;
+	if (exempt_from_gc)
+		goto do_alloc;
+
+	entries = atomic_inc_return(&tbl->gc_entries) - 1;
 	if (entries >= tbl->gc_thresh3 ||
 	    (entries >= tbl->gc_thresh2 &&
 	     time_after(now, tbl->last_flush + 5 * HZ))) {
@@ -321,6 +398,7 @@
 		}
 	}
 
+do_alloc:
 	n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
 	if (!n)
 		goto out_entries;
@@ -339,11 +417,15 @@
 	n->tbl		  = tbl;
 	refcount_set(&n->refcnt, 1);
 	n->dead		  = 1;
+	INIT_LIST_HEAD(&n->gc_list);
+
+	atomic_inc(&tbl->entries);
 out:
 	return n;
 
 out_entries:
-	atomic_dec(&tbl->entries);
+	if (!exempt_from_gc)
+		atomic_dec(&tbl->gc_entries);
 	goto out;
 }
 
@@ -362,12 +444,14 @@
 	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
 	if (!ret)
 		return NULL;
-	if (size <= PAGE_SIZE)
+	if (size <= PAGE_SIZE) {
 		buckets = kzalloc(size, GFP_ATOMIC);
-	else
+	} else {
 		buckets = (struct neighbour __rcu **)
 			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
 					   get_order(size));
+		kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
+	}
 	if (!buckets) {
 		kfree(ret);
 		return NULL;
@@ -387,10 +471,12 @@
 	size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
 	struct neighbour __rcu **buckets = nht->hash_buckets;
 
-	if (size <= PAGE_SIZE)
+	if (size <= PAGE_SIZE) {
 		kfree(buckets);
-	else
+	} else {
+		kmemleak_free(buckets);
 		free_pages((unsigned long)buckets, get_order(size));
+	}
 	kfree(nht);
 }
 
@@ -486,15 +572,19 @@
 }
 EXPORT_SYMBOL(neigh_lookup_nodev);
 
-struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
-				 struct net_device *dev, bool want_ref)
+static struct neighbour *___neigh_create(struct neigh_table *tbl,
+					 const void *pkey,
+					 struct net_device *dev,
+					 bool exempt_from_gc, bool want_ref)
 {
+	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc);
 	u32 hash_val;
 	unsigned int key_len = tbl->key_len;
 	int error;
-	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
 	struct neigh_hash_table *nht;
 
+	trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
+
 	if (!n) {
 		rc = ERR_PTR(-ENOBUFS);
 		goto out;
@@ -555,6 +645,9 @@
 	}
 
 	n->dead = 0;
+	if (!exempt_from_gc)
+		list_add_tail(&n->gc_list, &n->tbl->gc_list);
+
 	if (want_ref)
 		neigh_hold(n);
 	rcu_assign_pointer(n->next,
@@ -569,9 +662,17 @@
 out_tbl_unlock:
 	write_unlock_bh(&tbl->lock);
 out_neigh_release:
+	if (!exempt_from_gc)
+		atomic_dec(&tbl->gc_entries);
 	neigh_release(n);
 	goto out;
 }
+
+struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
+				 struct net_device *dev, bool want_ref)
+{
+	return ___neigh_create(tbl, pkey, dev, false, want_ref);
+}
 EXPORT_SYMBOL(__neigh_create);
 
 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
@@ -633,6 +734,7 @@
 	if (!n)
 		goto out;
 
+	n->protocol = 0;
 	write_pnet(&n->net, net);
 	memcpy(n->key, pkey, key_len);
 	n->dev = dev;
@@ -835,7 +937,7 @@
 			    (state == NUD_FAILED ||
 			     time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
 				*np = n->next;
-				n->dead = 1;
+				neigh_mark_dead(n);
 				write_unlock(&n->lock);
 				neigh_cleanup_and_release(n);
 				continue;
@@ -909,7 +1011,7 @@
 	if (neigh->ops->solicit)
 		neigh->ops->solicit(neigh, skb);
 	atomic_inc(&neigh->probes);
-	kfree_skb(skb);
+	consume_skb(skb);
 }
 
 /* Called when a timer expires for a neighbour entry. */
@@ -997,6 +1099,8 @@
 	if (notify)
 		neigh_update_notify(neigh, 0);
 
+	trace_neigh_timer_handler(neigh, 0);
+
 	neigh_release(neigh);
 }
 
@@ -1020,6 +1124,7 @@
 
 			atomic_set(&neigh->probes,
 				   NEIGH_VAR(neigh->parms, UCAST_PROBES));
+			neigh_del_timer(neigh);
 			neigh->nud_state     = NUD_INCOMPLETE;
 			neigh->updated = now;
 			next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
@@ -1036,6 +1141,7 @@
 		}
 	} else if (neigh->nud_state & NUD_STALE) {
 		neigh_dbg(2, "neigh %p is delayed\n", neigh);
+		neigh_del_timer(neigh);
 		neigh->nud_state = NUD_DELAY;
 		neigh->updated = jiffies;
 		neigh_add_timer(neigh, jiffies +
@@ -1067,6 +1173,7 @@
 	else
 		write_unlock(&neigh->lock);
 	local_bh_enable();
+	trace_neigh_event_send_done(neigh, rc);
 	return rc;
 
 out_dead:
@@ -1074,6 +1181,7 @@
 		goto out_unlock_bh;
 	write_unlock_bh(&neigh->lock);
 	kfree_skb(skb);
+	trace_neigh_event_send_dead(neigh, 1);
 	return 1;
 }
 EXPORT_SYMBOL(__neigh_event_send);
@@ -1118,15 +1226,19 @@
    Caller MUST hold reference count on the entry.
  */
 
-int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
-		 u32 flags, u32 nlmsg_pid)
+static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
+			  u8 new, u32 flags, u32 nlmsg_pid,
+			  struct netlink_ext_ack *extack)
 {
+	bool ext_learn_change = false;
 	u8 old;
 	int err;
 	int notify = 0;
 	struct net_device *dev;
 	int update_isrouter = 0;
 
+	trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
+
 	write_lock_bh(&neigh->lock);
 
 	dev    = neigh->dev;
@@ -1136,10 +1248,12 @@
 	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
 	    (old & (NUD_NOARP | NUD_PERMANENT)))
 		goto out;
-	if (neigh->dead)
+	if (neigh->dead) {
+		NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
 		goto out;
+	}
 
-	neigh_update_ext_learned(neigh, flags, &notify);
+	ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify);
 
 	if (!(new & NUD_VALID)) {
 		neigh_del_timer(neigh);
@@ -1174,8 +1288,10 @@
 		   use it, otherwise discard the request.
 		 */
 		err = -EINVAL;
-		if (!(old & NUD_VALID))
+		if (!(old & NUD_VALID)) {
+			NL_SET_ERR_MSG(extack, "No link layer address given");
 			goto out;
+		}
 		lladdr = neigh->ha;
 	}
 
@@ -1279,18 +1395,26 @@
 		neigh->arp_queue_len_bytes = 0;
 	}
 out:
-	if (update_isrouter) {
-		neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
-			(neigh->flags | NTF_ROUTER) :
-			(neigh->flags & ~NTF_ROUTER);
-	}
+	if (update_isrouter)
+		neigh_update_is_router(neigh, flags, &notify);
 	write_unlock_bh(&neigh->lock);
 
+	if (((new ^ old) & NUD_PERMANENT) || ext_learn_change)
+		neigh_update_gc_list(neigh);
+
 	if (notify)
 		neigh_update_notify(neigh, nlmsg_pid);
 
+	trace_neigh_update_done(neigh, err);
+
 	return err;
 }
+
+int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
+		 u32 flags, u32 nlmsg_pid)
+{
+	return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
+}
 EXPORT_SYMBOL(neigh_update);
 
 /* Update the neigh to listen temporarily for probe responses, even if it is
@@ -1555,6 +1679,7 @@
 	unsigned long phsize;
 
 	INIT_LIST_HEAD(&tbl->parms_list);
+	INIT_LIST_HEAD(&tbl->gc_list);
 	list_add(&tbl->parms.list, &tbl->parms_list);
 	write_pnet(&tbl->parms.net, &init_net);
 	refcount_set(&tbl->parms.refcnt, 1);
@@ -1646,6 +1771,19 @@
 	return tbl;
 }
 
+const struct nla_policy nda_policy[NDA_MAX+1] = {
+	[NDA_DST]		= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
+	[NDA_LLADDR]		= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
+	[NDA_CACHEINFO]		= { .len = sizeof(struct nda_cacheinfo) },
+	[NDA_PROBES]		= { .type = NLA_U32 },
+	[NDA_VLAN]		= { .type = NLA_U16 },
+	[NDA_PORT]		= { .type = NLA_U16 },
+	[NDA_VNI]		= { .type = NLA_U32 },
+	[NDA_IFINDEX]		= { .type = NLA_U32 },
+	[NDA_MASTER]		= { .type = NLA_U32 },
+	[NDA_PROTOCOL]		= { .type = NLA_U8 },
+};
+
 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
 			struct netlink_ext_ack *extack)
 {
@@ -1662,8 +1800,10 @@
 		goto out;
 
 	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
-	if (dst_attr == NULL)
+	if (!dst_attr) {
+		NL_SET_ERR_MSG(extack, "Network address not specified");
 		goto out;
+	}
 
 	ndm = nlmsg_data(nlh);
 	if (ndm->ndm_ifindex) {
@@ -1678,8 +1818,10 @@
 	if (tbl == NULL)
 		return -EAFNOSUPPORT;
 
-	if (nla_len(dst_attr) < (int)tbl->key_len)
+	if (nla_len(dst_attr) < (int)tbl->key_len) {
+		NL_SET_ERR_MSG(extack, "Invalid network address");
 		goto out;
+	}
 
 	if (ndm->ndm_flags & NTF_PROXY) {
 		err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
@@ -1695,10 +1837,9 @@
 		goto out;
 	}
 
-	err = neigh_update(neigh, NULL, NUD_FAILED,
-			   NEIGH_UPDATE_F_OVERRIDE |
-			   NEIGH_UPDATE_F_ADMIN,
-			   NETLINK_CB(skb).portid);
+	err = __neigh_update(neigh, NULL, NUD_FAILED,
+			     NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
+			     NETLINK_CB(skb).portid, extack);
 	write_lock_bh(&tbl->lock);
 	neigh_release(neigh);
 	neigh_remove_one(neigh, tbl);
@@ -1711,7 +1852,8 @@
 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 		     struct netlink_ext_ack *extack)
 {
-	int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
+	int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
+		NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
 	struct net *net = sock_net(skb->sk);
 	struct ndmsg *ndm;
 	struct nlattr *tb[NDA_MAX+1];
@@ -1719,16 +1861,20 @@
 	struct net_device *dev = NULL;
 	struct neighbour *neigh;
 	void *dst, *lladdr;
+	u8 protocol = 0;
 	int err;
 
 	ASSERT_RTNL();
-	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
+				     nda_policy, extack);
 	if (err < 0)
 		goto out;
 
 	err = -EINVAL;
-	if (tb[NDA_DST] == NULL)
+	if (!tb[NDA_DST]) {
+		NL_SET_ERR_MSG(extack, "Network address not specified");
 		goto out;
+	}
 
 	ndm = nlmsg_data(nlh);
 	if (ndm->ndm_ifindex) {
@@ -1738,19 +1884,27 @@
 			goto out;
 		}
 
-		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
+		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
+			NL_SET_ERR_MSG(extack, "Invalid link address");
 			goto out;
+		}
 	}
 
 	tbl = neigh_find_table(ndm->ndm_family);
 	if (tbl == NULL)
 		return -EAFNOSUPPORT;
 
-	if (nla_len(tb[NDA_DST]) < (int)tbl->key_len)
+	if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
+		NL_SET_ERR_MSG(extack, "Invalid network address");
 		goto out;
+	}
+
 	dst = nla_data(tb[NDA_DST]);
 	lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
 
+	if (tb[NDA_PROTOCOL])
+		protocol = nla_get_u8(tb[NDA_PROTOCOL]);
+
 	if (ndm->ndm_flags & NTF_PROXY) {
 		struct pneigh_entry *pn;
 
@@ -1758,22 +1912,35 @@
 		pn = pneigh_lookup(tbl, net, dst, dev, 1);
 		if (pn) {
 			pn->flags = ndm->ndm_flags;
+			if (protocol)
+				pn->protocol = protocol;
 			err = 0;
 		}
 		goto out;
 	}
 
-	if (dev == NULL)
+	if (!dev) {
+		NL_SET_ERR_MSG(extack, "Device not specified");
 		goto out;
+	}
+
+	if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
+		err = -EINVAL;
+		goto out;
+	}
 
 	neigh = neigh_lookup(tbl, dst, dev);
 	if (neigh == NULL) {
+		bool exempt_from_gc;
+
 		if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
 			err = -ENOENT;
 			goto out;
 		}
 
-		neigh = __neigh_lookup_errno(tbl, dst, dev);
+		exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
+				 ndm->ndm_flags & NTF_EXT_LEARNED;
+		neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true);
 		if (IS_ERR(neigh)) {
 			err = PTR_ERR(neigh);
 			goto out;
@@ -1786,18 +1953,26 @@
 		}
 
 		if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
-			flags &= ~NEIGH_UPDATE_F_OVERRIDE;
+			flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
+				   NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
 	}
 
 	if (ndm->ndm_flags & NTF_EXT_LEARNED)
 		flags |= NEIGH_UPDATE_F_EXT_LEARNED;
 
+	if (ndm->ndm_flags & NTF_ROUTER)
+		flags |= NEIGH_UPDATE_F_ISROUTER;
+
 	if (ndm->ndm_flags & NTF_USE) {
 		neigh_event_send(neigh, NULL);
 		err = 0;
 	} else
-		err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
-				   NETLINK_CB(skb).portid);
+		err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
+				     NETLINK_CB(skb).portid, extack);
+
+	if (protocol)
+		neigh->protocol = protocol;
+
 	neigh_release(neigh);
 
 out:
@@ -1808,7 +1983,7 @@
 {
 	struct nlattr *nest;
 
-	nest = nla_nest_start(skb, NDTA_PARMS);
+	nest = nla_nest_start_noflag(skb, NDTA_PARMS);
 	if (nest == NULL)
 		return -ENOBUFS;
 
@@ -2010,8 +2185,8 @@
 	bool found = false;
 	int err, tidx;
 
-	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
-			  nl_neightbl_policy, extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
+				     nl_neightbl_policy, extack);
 	if (err < 0)
 		goto errout;
 
@@ -2048,8 +2223,9 @@
 		struct neigh_parms *p;
 		int i, ifindex = 0;
 
-		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
-				       nl_ntbl_parm_policy, extack);
+		err = nla_parse_nested_deprecated(tbp, NDTPA_MAX,
+						  tb[NDTA_PARMS],
+						  nl_ntbl_parm_policy, extack);
 		if (err < 0)
 			goto errout_tbl_lock;
 
@@ -2161,15 +2337,47 @@
 	return err;
 }
 
+static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
+				    struct netlink_ext_ack *extack)
+{
+	struct ndtmsg *ndtm;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
+		NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
+		return -EINVAL;
+	}
+
+	ndtm = nlmsg_data(nlh);
+	if (ndtm->ndtm_pad1  || ndtm->ndtm_pad2) {
+		NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
+		return -EINVAL;
+	}
+
+	if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
+		NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	const struct nlmsghdr *nlh = cb->nlh;
 	struct net *net = sock_net(skb->sk);
 	int family, tidx, nidx = 0;
 	int tbl_skip = cb->args[0];
 	int neigh_skip = cb->args[1];
 	struct neigh_table *tbl;
 
-	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
+	if (cb->strict_check) {
+		int err = neightbl_valid_dump_info(nlh, cb->extack);
+
+		if (err < 0)
+			return err;
+	}
+
+	family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
 
 	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
 		struct neigh_parms *p;
@@ -2182,7 +2390,7 @@
 			continue;
 
 		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
-				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
+				       nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
 				       NLM_F_MULTI) < 0)
 			break;
 
@@ -2197,7 +2405,7 @@
 
 			if (neightbl_fill_param_info(skb, tbl, p,
 						     NETLINK_CB(cb->skb).portid,
-						     cb->nlh->nlmsg_seq,
+						     nlh->nlmsg_seq,
 						     RTM_NEWNEIGHTBL,
 						     NLM_F_MULTI) < 0)
 				goto out;
@@ -2259,6 +2467,9 @@
 	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
 		goto nla_put_failure;
 
+	if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
+		goto nla_put_failure;
+
 	nlmsg_end(skb, nlh);
 	return 0;
 
@@ -2290,6 +2501,9 @@
 	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
 		goto nla_put_failure;
 
+	if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
+		goto nla_put_failure;
+
 	nlmsg_end(skb, nlh);
 	return 0;
 
@@ -2311,7 +2525,7 @@
 	if (!master_idx)
 		return false;
 
-	master = netdev_master_upper_dev_get(dev);
+	master = dev ? netdev_master_upper_dev_get(dev) : NULL;
 	if (!master || master->ifindex != master_idx)
 		return true;
 
@@ -2320,41 +2534,30 @@
 
 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
 {
-	if (filter_idx && dev->ifindex != filter_idx)
+	if (filter_idx && (!dev || dev->ifindex != filter_idx))
 		return true;
 
 	return false;
 }
 
+struct neigh_dump_filter {
+	int master_idx;
+	int dev_idx;
+};
+
 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
-			    struct netlink_callback *cb)
+			    struct netlink_callback *cb,
+			    struct neigh_dump_filter *filter)
 {
 	struct net *net = sock_net(skb->sk);
-	const struct nlmsghdr *nlh = cb->nlh;
-	struct nlattr *tb[NDA_MAX + 1];
 	struct neighbour *n;
 	int rc, h, s_h = cb->args[1];
 	int idx, s_idx = idx = cb->args[2];
 	struct neigh_hash_table *nht;
-	int filter_master_idx = 0, filter_idx = 0;
 	unsigned int flags = NLM_F_MULTI;
-	int err;
 
-	err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, NULL);
-	if (!err) {
-		if (tb[NDA_IFINDEX]) {
-			if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
-				return -EINVAL;
-			filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
-		}
-		if (tb[NDA_MASTER]) {
-			if (nla_len(tb[NDA_MASTER]) != sizeof(u32))
-				return -EINVAL;
-			filter_master_idx = nla_get_u32(tb[NDA_MASTER]);
-		}
-		if (filter_idx || filter_master_idx)
-			flags |= NLM_F_DUMP_FILTERED;
-	}
+	if (filter->dev_idx || filter->master_idx)
+		flags |= NLM_F_DUMP_FILTERED;
 
 	rcu_read_lock_bh();
 	nht = rcu_dereference_bh(tbl->nht);
@@ -2367,8 +2570,8 @@
 		     n = rcu_dereference_bh(n->next)) {
 			if (idx < s_idx || !net_eq(dev_net(n->dev), net))
 				goto next;
-			if (neigh_ifindex_filtered(n->dev, filter_idx) ||
-			    neigh_master_filtered(n->dev, filter_master_idx))
+			if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
+			    neigh_master_filtered(n->dev, filter->master_idx))
 				goto next;
 			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
 					    cb->nlh->nlmsg_seq,
@@ -2390,12 +2593,17 @@
 }
 
 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
-			     struct netlink_callback *cb)
+			     struct netlink_callback *cb,
+			     struct neigh_dump_filter *filter)
 {
 	struct pneigh_entry *n;
 	struct net *net = sock_net(skb->sk);
 	int rc, h, s_h = cb->args[3];
 	int idx, s_idx = idx = cb->args[4];
+	unsigned int flags = NLM_F_MULTI;
+
+	if (filter->dev_idx || filter->master_idx)
+		flags |= NLM_F_DUMP_FILTERED;
 
 	read_lock_bh(&tbl->lock);
 
@@ -2405,10 +2613,12 @@
 		for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
 			if (idx < s_idx || pneigh_net(n) != net)
 				goto next;
+			if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
+			    neigh_master_filtered(n->dev, filter->master_idx))
+				goto next;
 			if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
 					    cb->nlh->nlmsg_seq,
-					    RTM_NEWNEIGH,
-					    NLM_F_MULTI, tbl) < 0) {
+					    RTM_NEWNEIGH, flags, tbl) < 0) {
 				read_unlock_bh(&tbl->lock);
 				rc = -1;
 				goto out;
@@ -2427,22 +2637,89 @@
 
 }
 
+static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
+				bool strict_check,
+				struct neigh_dump_filter *filter,
+				struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[NDA_MAX + 1];
+	int err, i;
+
+	if (strict_check) {
+		struct ndmsg *ndm;
+
+		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
+			NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
+			return -EINVAL;
+		}
+
+		ndm = nlmsg_data(nlh);
+		if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_ifindex ||
+		    ndm->ndm_state || ndm->ndm_type) {
+			NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
+			return -EINVAL;
+		}
+
+		if (ndm->ndm_flags & ~NTF_PROXY) {
+			NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
+			return -EINVAL;
+		}
+
+		err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg),
+						    tb, NDA_MAX, nda_policy,
+						    extack);
+	} else {
+		err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb,
+					     NDA_MAX, nda_policy, extack);
+	}
+	if (err < 0)
+		return err;
+
+	for (i = 0; i <= NDA_MAX; ++i) {
+		if (!tb[i])
+			continue;
+
+		/* all new attributes should require strict_check */
+		switch (i) {
+		case NDA_IFINDEX:
+			filter->dev_idx = nla_get_u32(tb[i]);
+			break;
+		case NDA_MASTER:
+			filter->master_idx = nla_get_u32(tb[i]);
+			break;
+		default:
+			if (strict_check) {
+				NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
+				return -EINVAL;
+			}
+		}
+	}
+
+	return 0;
+}
+
 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	const struct nlmsghdr *nlh = cb->nlh;
+	struct neigh_dump_filter filter = {};
 	struct neigh_table *tbl;
 	int t, family, s_t;
 	int proxy = 0;
 	int err;
 
-	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
+	family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
 
 	/* check for full ndmsg structure presence, family member is
 	 * the same for both structures
 	 */
-	if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
-	    ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
+	if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
+	    ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
 		proxy = 1;
 
+	err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
+	if (err < 0 && cb->strict_check)
+		return err;
+
 	s_t = cb->args[0];
 
 	for (t = 0; t < NEIGH_NR_TABLES; t++) {
@@ -2456,9 +2733,9 @@
 			memset(&cb->args[1], 0, sizeof(cb->args) -
 						sizeof(cb->args[0]));
 		if (proxy)
-			err = pneigh_dump_table(tbl, skb, cb);
+			err = pneigh_dump_table(tbl, skb, cb, &filter);
 		else
-			err = neigh_dump_table(tbl, skb, cb);
+			err = neigh_dump_table(tbl, skb, cb, &filter);
 		if (err < 0)
 			break;
 	}
@@ -2467,6 +2744,186 @@
 	return skb->len;
 }
 
+static int neigh_valid_get_req(const struct nlmsghdr *nlh,
+			       struct neigh_table **tbl,
+			       void **dst, int *dev_idx, u8 *ndm_flags,
+			       struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[NDA_MAX + 1];
+	struct ndmsg *ndm;
+	int err, i;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
+		NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
+		return -EINVAL;
+	}
+
+	ndm = nlmsg_data(nlh);
+	if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_state ||
+	    ndm->ndm_type) {
+		NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
+		return -EINVAL;
+	}
+
+	if (ndm->ndm_flags & ~NTF_PROXY) {
+		NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
+		return -EINVAL;
+	}
+
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
+					    NDA_MAX, nda_policy, extack);
+	if (err < 0)
+		return err;
+
+	*ndm_flags = ndm->ndm_flags;
+	*dev_idx = ndm->ndm_ifindex;
+	*tbl = neigh_find_table(ndm->ndm_family);
+	if (*tbl == NULL) {
+		NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
+		return -EAFNOSUPPORT;
+	}
+
+	for (i = 0; i <= NDA_MAX; ++i) {
+		if (!tb[i])
+			continue;
+
+		switch (i) {
+		case NDA_DST:
+			if (nla_len(tb[i]) != (int)(*tbl)->key_len) {
+				NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
+				return -EINVAL;
+			}
+			*dst = nla_data(tb[i]);
+			break;
+		default:
+			NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static inline size_t neigh_nlmsg_size(void)
+{
+	return NLMSG_ALIGN(sizeof(struct ndmsg))
+	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
+	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
+	       + nla_total_size(sizeof(struct nda_cacheinfo))
+	       + nla_total_size(4)  /* NDA_PROBES */
+	       + nla_total_size(1); /* NDA_PROTOCOL */
+}
+
+static int neigh_get_reply(struct net *net, struct neighbour *neigh,
+			   u32 pid, u32 seq)
+{
+	struct sk_buff *skb;
+	int err = 0;
+
+	skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
+	if (err) {
+		kfree_skb(skb);
+		goto errout;
+	}
+
+	err = rtnl_unicast(skb, net, pid);
+errout:
+	return err;
+}
+
+static inline size_t pneigh_nlmsg_size(void)
+{
+	return NLMSG_ALIGN(sizeof(struct ndmsg))
+	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
+	       + nla_total_size(1); /* NDA_PROTOCOL */
+}
+
+static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh,
+			    u32 pid, u32 seq, struct neigh_table *tbl)
+{
+	struct sk_buff *skb;
+	int err = 0;
+
+	skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl);
+	if (err) {
+		kfree_skb(skb);
+		goto errout;
+	}
+
+	err = rtnl_unicast(skb, net, pid);
+errout:
+	return err;
+}
+
+static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+		     struct netlink_ext_ack *extack)
+{
+	struct net *net = sock_net(in_skb->sk);
+	struct net_device *dev = NULL;
+	struct neigh_table *tbl = NULL;
+	struct neighbour *neigh;
+	void *dst = NULL;
+	u8 ndm_flags = 0;
+	int dev_idx = 0;
+	int err;
+
+	err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags,
+				  extack);
+	if (err < 0)
+		return err;
+
+	if (dev_idx) {
+		dev = __dev_get_by_index(net, dev_idx);
+		if (!dev) {
+			NL_SET_ERR_MSG(extack, "Unknown device ifindex");
+			return -ENODEV;
+		}
+	}
+
+	if (!dst) {
+		NL_SET_ERR_MSG(extack, "Network address not specified");
+		return -EINVAL;
+	}
+
+	if (ndm_flags & NTF_PROXY) {
+		struct pneigh_entry *pn;
+
+		pn = pneigh_lookup(tbl, net, dst, dev, 0);
+		if (!pn) {
+			NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
+			return -ENOENT;
+		}
+		return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid,
+					nlh->nlmsg_seq, tbl);
+	}
+
+	if (!dev) {
+		NL_SET_ERR_MSG(extack, "No device specified");
+		return -EINVAL;
+	}
+
+	neigh = neigh_lookup(tbl, dst, dev);
+	if (!neigh) {
+		NL_SET_ERR_MSG(extack, "Neighbour entry not found");
+		return -ENOENT;
+	}
+
+	err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid,
+			      nlh->nlmsg_seq);
+
+	neigh_release(neigh);
+
+	return err;
+}
+
 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
 {
 	int chain;
@@ -2513,7 +2970,7 @@
 				rcu_assign_pointer(*np,
 					rcu_dereference_protected(n->next,
 						lockdep_is_held(&tbl->lock)));
-				n->dead = 1;
+				neigh_mark_dead(n);
 			} else
 				np = &n->next;
 			write_unlock(&n->lock);
@@ -2536,7 +2993,13 @@
 		if (!tbl)
 			goto out;
 		rcu_read_lock_bh();
-		neigh = __neigh_lookup_noref(tbl, addr, dev);
+		if (index == NEIGH_ARP_TABLE) {
+			u32 key = *((u32 *)addr);
+
+			neigh = __ipv4_neigh_lookup_noref(dev, key);
+		} else {
+			neigh = __neigh_lookup_noref(tbl, addr, dev);
+		}
 		if (!neigh)
 			neigh = __neigh_create(tbl, addr, dev, false);
 		err = PTR_ERR(neigh);
@@ -2570,7 +3033,7 @@
 	struct net *net = seq_file_net(seq);
 	struct neigh_hash_table *nht = state->nht;
 	struct neighbour *n = NULL;
-	int bucket = state->bucket;
+	int bucket;
 
 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
 	for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
@@ -2744,6 +3207,7 @@
 }
 
 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
+	__acquires(tbl->lock)
 	__acquires(rcu_bh)
 {
 	struct neigh_seq_state *state = seq->private;
@@ -2754,6 +3218,7 @@
 
 	rcu_read_lock_bh();
 	state->nht = rcu_dereference_bh(tbl->nht);
+	read_lock(&tbl->lock);
 
 	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
 }
@@ -2787,8 +3252,13 @@
 EXPORT_SYMBOL(neigh_seq_next);
 
 void neigh_seq_stop(struct seq_file *seq, void *v)
+	__releases(tbl->lock)
 	__releases(rcu_bh)
 {
+	struct neigh_seq_state *state = seq->private;
+	struct neigh_table *tbl = state->tbl;
+
+	read_unlock(&tbl->lock);
 	rcu_read_unlock_bh();
 }
 EXPORT_SYMBOL(neigh_seq_stop);
@@ -2874,15 +3344,6 @@
 };
 #endif /* CONFIG_PROC_FS */
 
-static inline size_t neigh_nlmsg_size(void)
-{
-	return NLMSG_ALIGN(sizeof(struct ndmsg))
-	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
-	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
-	       + nla_total_size(sizeof(struct nda_cacheinfo))
-	       + nla_total_size(4); /* NDA_PROBES */
-}
-
 static void __neigh_notify(struct neighbour *n, int type, int flags,
 			   u32 pid)
 {
@@ -2915,8 +3376,6 @@
 EXPORT_SYMBOL(neigh_app_ns);
 
 #ifdef CONFIG_SYSCTL
-static int zero;
-static int int_max = INT_MAX;
 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
 
 static int proc_unres_qlen(struct ctl_table *ctl, int write,
@@ -2925,7 +3384,7 @@
 	int size, ret;
 	struct ctl_table tmp = *ctl;
 
-	tmp.extra1 = &zero;
+	tmp.extra1 = SYSCTL_ZERO;
 	tmp.extra2 = &unres_qlen_max;
 	tmp.data = &size;
 
@@ -2990,8 +3449,8 @@
 	struct ctl_table tmp = *ctl;
 	int ret;
 
-	tmp.extra1 = &zero;
-	tmp.extra2 = &int_max;
+	tmp.extra1 = SYSCTL_ZERO;
+	tmp.extra2 = SYSCTL_INT_MAX;
 
 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
 	neigh_proc_update(ctl, write);
@@ -3136,24 +3595,24 @@
 			.procname	= "gc_thresh1",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
-			.extra1 	= &zero,
-			.extra2		= &int_max,
+			.extra1		= SYSCTL_ZERO,
+			.extra2		= SYSCTL_INT_MAX,
 			.proc_handler	= proc_dointvec_minmax,
 		},
 		[NEIGH_VAR_GC_THRESH2] = {
 			.procname	= "gc_thresh2",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
-			.extra1 	= &zero,
-			.extra2		= &int_max,
+			.extra1		= SYSCTL_ZERO,
+			.extra2		= SYSCTL_INT_MAX,
 			.proc_handler	= proc_dointvec_minmax,
 		},
 		[NEIGH_VAR_GC_THRESH3] = {
 			.procname	= "gc_thresh3",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
-			.extra1 	= &zero,
-			.extra2		= &int_max,
+			.extra1		= SYSCTL_ZERO,
+			.extra2		= SYSCTL_INT_MAX,
 			.proc_handler	= proc_dointvec_minmax,
 		},
 		{},
@@ -3266,7 +3725,7 @@
 {
 	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
-	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0);
+	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0);
 
 	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
 		      0);

diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 63881f7..3634793 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c

@@ -258,7 +258,7 @@
 		else
 			seq_printf(seq, "%04x", ntohs(pt->type));
 
-		seq_printf(seq, " %-8s %pf\n",
+		seq_printf(seq, " %-8s %ps\n",
 			   pt->dev ? pt->dev->name : "", pt->func);
 	}
 

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index bd67c4d..ae3bcb1 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c

@@ -1,18 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net-sysfs.c - network device class and attributes
  *
  * Copyright (c) 2003 Stephen Hemminger <shemminger@osdl.org>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/capability.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
-#include <net/switchdev.h>
 #include <linux/if_arp.h>
 #include <linux/slab.h>
 #include <linux/sched/signal.h>
@@ -337,7 +332,7 @@
 
 static int change_flags(struct net_device *dev, unsigned long new_flags)
 {
-	return dev_change_flags(dev, (unsigned int)new_flags);
+	return dev_change_flags(dev, (unsigned int)new_flags, NULL);
 }
 
 static ssize_t flags_store(struct device *dev, struct device_attribute *attr,
@@ -501,16 +496,11 @@
 		return restart_syscall();
 
 	if (dev_isalive(netdev)) {
-		struct switchdev_attr attr = {
-			.orig_dev = netdev,
-			.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
-			.flags = SWITCHDEV_F_NO_RECURSE,
-		};
+		struct netdev_phys_item_id ppid = { };
 
-		ret = switchdev_port_attr_get(netdev, &attr);
+		ret = dev_get_port_parent_id(netdev, &ppid, false);
 		if (!ret)
-			ret = sprintf(buf, "%*phN\n", attr.u.ppid.id_len,
-				      attr.u.ppid.id);
+			ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id);
 	}
 	rtnl_unlock();
 
@@ -760,9 +750,9 @@
 	rcu_assign_pointer(queue->rps_map, map);
 
 	if (map)
-		static_key_slow_inc(&rps_needed);
+		static_branch_inc(&rps_needed);
 	if (old_map)
-		static_key_slow_dec(&rps_needed);
+		static_branch_dec(&rps_needed);
 
 	mutex_unlock(&rps_map_mutex);
 
@@ -869,6 +859,7 @@
 #endif
 	NULL
 };
+ATTRIBUTE_GROUPS(rx_queue_default);
 
 static void rx_queue_release(struct kobject *kobj)
 {
@@ -917,7 +908,7 @@
 static struct kobj_type rx_queue_ktype __ro_after_init = {
 	.sysfs_ops = &rx_queue_sysfs_ops,
 	.release = rx_queue_release,
-	.default_attrs = rx_queue_default_attrs,
+	.default_groups = rx_queue_default_groups,
 	.namespace = rx_queue_namespace,
 	.get_ownership = rx_queue_get_ownership,
 };
@@ -932,20 +923,23 @@
 	error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
 				     "rx-%u", index);
 	if (error)
-		return error;
+		goto err;
+
+	dev_hold(queue->dev);
 
 	if (dev->sysfs_rx_queue_group) {
 		error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
-		if (error) {
-			kobject_put(kobj);
-			return error;
-		}
+		if (error)
+			goto err;
 	}
 
 	kobject_uevent(kobj, KOBJ_ADD);
-	dev_hold(queue->dev);
 
 	return error;
+
+err:
+	kobject_put(kobj);
+	return error;
 }
 #endif /* CONFIG_SYSFS */
 
@@ -1342,8 +1336,7 @@
 		if (tc < 0)
 			return -EINVAL;
 	}
-	mask = kcalloc(BITS_TO_LONGS(dev->num_rx_queues), sizeof(long),
-		       GFP_KERNEL);
+	mask = bitmap_zalloc(dev->num_rx_queues, GFP_KERNEL);
 	if (!mask)
 		return -ENOMEM;
 
@@ -1372,7 +1365,7 @@
 	rcu_read_unlock();
 
 	len = bitmap_print_to_pagebuf(false, buf, mask, dev->num_rx_queues);
-	kfree(mask);
+	bitmap_free(mask);
 
 	return len < PAGE_SIZE ? len : -EINVAL;
 }
@@ -1388,8 +1381,7 @@
 	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
-	mask = kcalloc(BITS_TO_LONGS(dev->num_rx_queues), sizeof(long),
-		       GFP_KERNEL);
+	mask = bitmap_zalloc(dev->num_rx_queues, GFP_KERNEL);
 	if (!mask)
 		return -ENOMEM;
 
@@ -1397,7 +1389,7 @@
 
 	err = bitmap_parse(buf, len, mask, dev->num_rx_queues);
 	if (err) {
-		kfree(mask);
+		bitmap_free(mask);
 		return err;
 	}
 
@@ -1405,7 +1397,7 @@
 	err = __netif_set_xps_queue(dev, mask, index, true);
 	cpus_read_unlock();
 
-	kfree(mask);
+	bitmap_free(mask);
 	return err ? : len;
 }
 
@@ -1423,6 +1415,7 @@
 #endif
 	NULL
 };
+ATTRIBUTE_GROUPS(netdev_queue_default);
 
 static void netdev_queue_release(struct kobject *kobj)
 {
@@ -1455,7 +1448,7 @@
 static struct kobj_type netdev_queue_ktype __ro_after_init = {
 	.sysfs_ops = &netdev_queue_sysfs_ops,
 	.release = netdev_queue_release,
-	.default_attrs = netdev_queue_default_attrs,
+	.default_groups = netdev_queue_default_groups,
 	.namespace = netdev_queue_namespace,
 	.get_ownership = netdev_queue_get_ownership,
 };
@@ -1470,20 +1463,22 @@
 	error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
 				     "tx-%u", index);
 	if (error)
-		return error;
+		goto err;
+
+	dev_hold(queue->dev);
 
 #ifdef CONFIG_BQL
 	error = sysfs_create_group(kobj, &dql_group);
-	if (error) {
-		kobject_put(kobj);
-		return error;
-	}
+	if (error)
+		goto err;
 #endif
 
 	kobject_uevent(kobj, KOBJ_ADD);
-	dev_hold(queue->dev);
-
 	return 0;
+
+err:
+	kobject_put(kobj);
+	return error;
 }
 #endif /* CONFIG_SYSFS */
 
@@ -1547,6 +1542,9 @@
 error:
 	netdev_queue_update_kobjects(dev, txq, 0);
 	net_rx_queue_update_kobjects(dev, rxq, 0);
+#ifdef CONFIG_SYSFS
+	kset_unregister(dev->queues_kset);
+#endif
 	return error;
 }
 

diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 419af6d..283ddb2 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c

@@ -43,6 +43,18 @@
 EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update);
 #endif
 
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+#include <trace/events/page_pool.h>
+#endif
+
+#include <trace/events/neigh.h>
+EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_update);
+EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_update_done);
+EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_timer_handler);
+EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_event_send_done);
+EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_event_send_dead);
+EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_cleanup_and_release);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 670c84b..3940284 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/workqueue.h>
@@ -38,9 +39,16 @@
 DECLARE_RWSEM(net_rwsem);
 EXPORT_SYMBOL_GPL(net_rwsem);
 
+#ifdef CONFIG_KEYS
+static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) };
+#endif
+
 struct net init_net = {
 	.count		= REFCOUNT_INIT(1),
 	.dev_base_head	= LIST_HEAD_INIT(init_net.dev_base_head),
+#ifdef CONFIG_KEYS
+	.key_domain	= &init_net_key_domain,
+#endif
 };
 EXPORT_SYMBOL(init_net);
 
@@ -144,6 +152,17 @@
 	}
 }
 
+static void ops_pre_exit_list(const struct pernet_operations *ops,
+			      struct list_head *net_exit_list)
+{
+	struct net *net;
+
+	if (ops->pre_exit) {
+		list_for_each_entry(net, net_exit_list, exit_list)
+			ops->pre_exit(net);
+	}
+}
+
 static void ops_exit_list(const struct pernet_operations *ops,
 			  struct list_head *net_exit_list)
 {
@@ -226,11 +245,12 @@
 	return __peernet2id_alloc(net, peer, &no);
 }
 
-static void rtnl_net_notifyid(struct net *net, int cmd, int id);
+static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
+			      struct nlmsghdr *nlh, gfp_t gfp);
 /* This function returns the id of a peer netns. If no id is assigned, one will
  * be allocated and returned.
  */
-int peernet2id_alloc(struct net *net, struct net *peer)
+int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
 {
 	bool alloc = false, alive = false;
 	int id;
@@ -249,7 +269,7 @@
 	id = __peernet2id_alloc(net, peer, &alloc);
 	spin_unlock_bh(&net->nsid_lock);
 	if (alloc && id >= 0)
-		rtnl_net_notifyid(net, RTM_NEWNSID, id);
+		rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL, gfp);
 	if (alive)
 		put_net(peer);
 	return id;
@@ -304,6 +324,7 @@
 
 	refcount_set(&net->count, 1);
 	refcount_set(&net->passive, 1);
+	get_random_bytes(&net->hash_mix, sizeof(u32));
 	net->dev_base_seq = 1;
 	net->user_ns = user_ns;
 	idr_init(&net->netns_ids);
@@ -328,6 +349,12 @@
 	list_add(&net->exit_list, &net_exit_list);
 	saved_ops = ops;
 	list_for_each_entry_continue_reverse(ops, &pernet_list, list)
+		ops_pre_exit_list(ops, &net_exit_list);
+
+	synchronize_rcu();
+
+	ops = saved_ops;
+	list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 		ops_exit_list(ops, &net_exit_list);
 
 	ops = saved_ops;
@@ -385,10 +412,22 @@
 	if (!net)
 		goto out_free;
 
+#ifdef CONFIG_KEYS
+	net->key_domain = kzalloc(sizeof(struct key_tag), GFP_KERNEL);
+	if (!net->key_domain)
+		goto out_free_2;
+	refcount_set(&net->key_domain->usage, 1);
+#endif
+
 	rcu_assign_pointer(net->gen, ng);
 out:
 	return net;
 
+#ifdef CONFIG_KEYS
+out_free_2:
+	kmem_cache_free(net_cachep, net);
+	net = NULL;
+#endif
 out_free:
 	kfree(ng);
 	goto out;
@@ -440,6 +479,7 @@
 
 	if (rv < 0) {
 put_userns:
+		key_remove_domain(net->key_domain);
 		put_user_ns(user_ns);
 		net_drop_ns(net);
 dec_ucounts:
@@ -494,7 +534,8 @@
 			idr_remove(&tmp->netns_ids, id);
 		spin_unlock_bh(&tmp->nsid_lock);
 		if (id >= 0)
-			rtnl_net_notifyid(tmp, RTM_DELNSID, id);
+			rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
+					  GFP_KERNEL);
 		if (tmp == last)
 			break;
 	}
@@ -539,10 +580,15 @@
 		list_add_tail(&net->exit_list, &net_exit_list);
 	}
 
+	/* Run all of the network namespace pre_exit methods */
+	list_for_each_entry_reverse(ops, &pernet_list, list)
+		ops_pre_exit_list(ops, &net_exit_list);
+
 	/*
 	 * Another CPU might be rcu-iterating the list, wait for it.
 	 * This needs to be before calling the exit() notifiers, so
 	 * the rcu_barrier() below isn't sufficient alone.
+	 * Also the pre_exit() and exit() methods need this barrier.
 	 */
 	synchronize_rcu();
 
@@ -565,6 +611,7 @@
 	list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
 		list_del_init(&net->exit_list);
 		dec_net_namespaces(net->ucounts);
+		key_remove_domain(net->key_domain);
 		put_user_ns(net->user_ns);
 		net_drop_ns(net);
 	}
@@ -669,6 +716,7 @@
 	[NETNSA_NSID]		= { .type = NLA_S32 },
 	[NETNSA_PID]		= { .type = NLA_U32 },
 	[NETNSA_FD]		= { .type = NLA_U32 },
+	[NETNSA_TARGET_NSID]	= { .type = NLA_S32 },
 };
 
 static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -680,8 +728,8 @@
 	struct net *peer;
 	int nsid, err;
 
-	err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
-			  rtnl_net_policy, extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(struct rtgenmsg), tb,
+				     NETNSA_MAX, rtnl_net_policy, extack);
 	if (err < 0)
 		return err;
 	if (!tb[NETNSA_NSID]) {
@@ -719,7 +767,8 @@
 	err = alloc_netid(net, peer, nsid);
 	spin_unlock_bh(&net->nsid_lock);
 	if (err >= 0) {
-		rtnl_net_notifyid(net, RTM_NEWNSID, err);
+		rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid,
+				  nlh, GFP_KERNEL);
 		err = 0;
 	} else if (err == -ENOSPC && nsid >= 0) {
 		err = -EEXIST;
@@ -735,23 +784,38 @@
 {
 	return NLMSG_ALIGN(sizeof(struct rtgenmsg))
 	       + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
+	       + nla_total_size(sizeof(s32)) /* NETNSA_CURRENT_NSID */
 	       ;
 }
 
-static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
-			 int cmd, struct net *net, int nsid)
+struct net_fill_args {
+	u32 portid;
+	u32 seq;
+	int flags;
+	int cmd;
+	int nsid;
+	bool add_ref;
+	int ref_nsid;
+};
+
+static int rtnl_net_fill(struct sk_buff *skb, struct net_fill_args *args)
 {
 	struct nlmsghdr *nlh;
 	struct rtgenmsg *rth;
 
-	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags);
+	nlh = nlmsg_put(skb, args->portid, args->seq, args->cmd, sizeof(*rth),
+			args->flags);
 	if (!nlh)
 		return -EMSGSIZE;
 
 	rth = nlmsg_data(nlh);
 	rth->rtgen_family = AF_UNSPEC;
 
-	if (nla_put_s32(skb, NETNSA_NSID, nsid))
+	if (nla_put_s32(skb, NETNSA_NSID, args->nsid))
+		goto nla_put_failure;
+
+	if (args->add_ref &&
+	    nla_put_s32(skb, NETNSA_CURRENT_NSID, args->ref_nsid))
 		goto nla_put_failure;
 
 	nlmsg_end(skb, nlh);
@@ -762,18 +826,59 @@
 	return -EMSGSIZE;
 }
 
+static int rtnl_net_valid_getid_req(struct sk_buff *skb,
+				    const struct nlmsghdr *nlh,
+				    struct nlattr **tb,
+				    struct netlink_ext_ack *extack)
+{
+	int i, err;
+
+	if (!netlink_strict_get_check(skb))
+		return nlmsg_parse_deprecated(nlh, sizeof(struct rtgenmsg),
+					      tb, NETNSA_MAX, rtnl_net_policy,
+					      extack);
+
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct rtgenmsg), tb,
+					    NETNSA_MAX, rtnl_net_policy,
+					    extack);
+	if (err)
+		return err;
+
+	for (i = 0; i <= NETNSA_MAX; i++) {
+		if (!tb[i])
+			continue;
+
+		switch (i) {
+		case NETNSA_PID:
+		case NETNSA_FD:
+		case NETNSA_NSID:
+		case NETNSA_TARGET_NSID:
+			break;
+		default:
+			NL_SET_ERR_MSG(extack, "Unsupported attribute in peer netns getid request");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
 			  struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(skb->sk);
 	struct nlattr *tb[NETNSA_MAX + 1];
+	struct net_fill_args fillargs = {
+		.portid = NETLINK_CB(skb).portid,
+		.seq = nlh->nlmsg_seq,
+		.cmd = RTM_NEWNSID,
+	};
+	struct net *peer, *target = net;
 	struct nlattr *nla;
 	struct sk_buff *msg;
-	struct net *peer;
-	int err, id;
+	int err;
 
-	err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
-			  rtnl_net_policy, extack);
+	err = rtnl_net_valid_getid_req(skb, nlh, tb, extack);
 	if (err < 0)
 		return err;
 	if (tb[NETNSA_PID]) {
@@ -782,6 +887,11 @@
 	} else if (tb[NETNSA_FD]) {
 		peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
 		nla = tb[NETNSA_FD];
+	} else if (tb[NETNSA_NSID]) {
+		peer = get_net_ns_by_id(net, nla_get_s32(tb[NETNSA_NSID]));
+		if (!peer)
+			peer = ERR_PTR(-ENOENT);
+		nla = tb[NETNSA_NSID];
 	} else {
 		NL_SET_ERR_MSG(extack, "Peer netns reference is missing");
 		return -EINVAL;
@@ -793,15 +903,29 @@
 		return PTR_ERR(peer);
 	}
 
+	if (tb[NETNSA_TARGET_NSID]) {
+		int id = nla_get_s32(tb[NETNSA_TARGET_NSID]);
+
+		target = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, id);
+		if (IS_ERR(target)) {
+			NL_SET_BAD_ATTR(extack, tb[NETNSA_TARGET_NSID]);
+			NL_SET_ERR_MSG(extack,
+				       "Target netns reference is invalid");
+			err = PTR_ERR(target);
+			goto out;
+		}
+		fillargs.add_ref = true;
+		fillargs.ref_nsid = peernet2id(net, peer);
+	}
+
 	msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
 	if (!msg) {
 		err = -ENOMEM;
 		goto out;
 	}
 
-	id = peernet2id(net, peer);
-	err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
-			    RTM_NEWNSID, net, id);
+	fillargs.nsid = peernet2id(target, peer);
+	err = rtnl_net_fill(msg, &fillargs);
 	if (err < 0)
 		goto err_out;
 
@@ -811,14 +935,17 @@
 err_out:
 	nlmsg_free(msg);
 out:
+	if (fillargs.add_ref)
+		put_net(target);
 	put_net(peer);
 	return err;
 }
 
 struct rtnl_net_dump_cb {
-	struct net *net;
+	struct net *tgt_net;
+	struct net *ref_net;
 	struct sk_buff *skb;
-	struct netlink_callback *cb;
+	struct net_fill_args fillargs;
 	int idx;
 	int s_idx;
 };
@@ -831,9 +958,10 @@
 	if (net_cb->idx < net_cb->s_idx)
 		goto cont;
 
-	ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid,
-			    net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI,
-			    RTM_NEWNSID, net_cb->net, id);
+	net_cb->fillargs.nsid = id;
+	if (net_cb->fillargs.add_ref)
+		net_cb->fillargs.ref_nsid = __peernet2id(net_cb->ref_net, peer);
+	ret = rtnl_net_fill(net_cb->skb, &net_cb->fillargs);
 	if (ret < 0)
 		return ret;
 
@@ -842,39 +970,112 @@
 	return 0;
 }
 
+static int rtnl_valid_dump_net_req(const struct nlmsghdr *nlh, struct sock *sk,
+				   struct rtnl_net_dump_cb *net_cb,
+				   struct netlink_callback *cb)
+{
+	struct netlink_ext_ack *extack = cb->extack;
+	struct nlattr *tb[NETNSA_MAX + 1];
+	int err, i;
+
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct rtgenmsg), tb,
+					    NETNSA_MAX, rtnl_net_policy,
+					    extack);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i <= NETNSA_MAX; i++) {
+		if (!tb[i])
+			continue;
+
+		if (i == NETNSA_TARGET_NSID) {
+			struct net *net;
+
+			net = rtnl_get_net_ns_capable(sk, nla_get_s32(tb[i]));
+			if (IS_ERR(net)) {
+				NL_SET_BAD_ATTR(extack, tb[i]);
+				NL_SET_ERR_MSG(extack,
+					       "Invalid target network namespace id");
+				return PTR_ERR(net);
+			}
+			net_cb->fillargs.add_ref = true;
+			net_cb->ref_net = net_cb->tgt_net;
+			net_cb->tgt_net = net;
+		} else {
+			NL_SET_BAD_ATTR(extack, tb[i]);
+			NL_SET_ERR_MSG(extack,
+				       "Unsupported attribute in dump request");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	struct net *net = sock_net(skb->sk);
 	struct rtnl_net_dump_cb net_cb = {
-		.net = net,
+		.tgt_net = sock_net(skb->sk),
 		.skb = skb,
-		.cb = cb,
+		.fillargs = {
+			.portid = NETLINK_CB(cb->skb).portid,
+			.seq = cb->nlh->nlmsg_seq,
+			.flags = NLM_F_MULTI,
+			.cmd = RTM_NEWNSID,
+		},
 		.idx = 0,
 		.s_idx = cb->args[0],
 	};
+	int err = 0;
 
-	spin_lock_bh(&net->nsid_lock);
-	idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
-	spin_unlock_bh(&net->nsid_lock);
+	if (cb->strict_check) {
+		err = rtnl_valid_dump_net_req(cb->nlh, skb->sk, &net_cb, cb);
+		if (err < 0)
+			goto end;
+	}
+
+	spin_lock_bh(&net_cb.tgt_net->nsid_lock);
+	if (net_cb.fillargs.add_ref &&
+	    !net_eq(net_cb.ref_net, net_cb.tgt_net) &&
+	    !spin_trylock_bh(&net_cb.ref_net->nsid_lock)) {
+		spin_unlock_bh(&net_cb.tgt_net->nsid_lock);
+		err = -EAGAIN;
+		goto end;
+	}
+	idr_for_each(&net_cb.tgt_net->netns_ids, rtnl_net_dumpid_one, &net_cb);
+	if (net_cb.fillargs.add_ref &&
+	    !net_eq(net_cb.ref_net, net_cb.tgt_net))
+		spin_unlock_bh(&net_cb.ref_net->nsid_lock);
+	spin_unlock_bh(&net_cb.tgt_net->nsid_lock);
 
 	cb->args[0] = net_cb.idx;
-	return skb->len;
+end:
+	if (net_cb.fillargs.add_ref)
+		put_net(net_cb.tgt_net);
+	return err < 0 ? err : skb->len;
 }
 
-static void rtnl_net_notifyid(struct net *net, int cmd, int id)
+static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
+			      struct nlmsghdr *nlh, gfp_t gfp)
 {
+	struct net_fill_args fillargs = {
+		.portid = portid,
+		.seq = nlh ? nlh->nlmsg_seq : 0,
+		.cmd = cmd,
+		.nsid = id,
+	};
 	struct sk_buff *msg;
 	int err = -ENOMEM;
 
-	msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
+	msg = nlmsg_new(rtnl_net_get_size(), gfp);
 	if (!msg)
 		goto out;
 
-	err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, id);
+	err = rtnl_net_fill(msg, &fillargs);
 	if (err < 0)
 		goto err_out;
 
-	rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0);
+	rtnl_notify(msg, net, portid, RTNLGRP_NSID, nlh, gfp);
 	return;
 
 err_out:
@@ -911,7 +1112,8 @@
 	init_net_initialized = true;
 	up_write(&pernet_ops_rwsem);
 
-	register_pernet_subsys(&net_ns_ops);
+	if (register_pernet_subsys(&net_ns_ops))
+		panic("Could not register network namespace subsystems");
 
 	rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL,
 		      RTNL_FLAG_DOIT_UNLOCKED);
@@ -948,6 +1150,8 @@
 out_undo:
 	/* If I have an error cleanup all namespaces I initialized */
 	list_del(&ops->list);
+	ops_pre_exit_list(ops, &net_exit_list);
+	synchronize_rcu();
 	ops_exit_list(ops, &net_exit_list);
 	ops_free_list(ops, &net_exit_list);
 	return error;
@@ -962,6 +1166,8 @@
 	/* See comment in __register_pernet_operations() */
 	for_each_net(net)
 		list_add_tail(&net->exit_list, &net_exit_list);
+	ops_pre_exit_list(ops, &net_exit_list);
+	synchronize_rcu();
 	ops_exit_list(ops, &net_exit_list);
 	ops_free_list(ops, &net_exit_list);
 }
@@ -986,6 +1192,8 @@
 	} else {
 		LIST_HEAD(net_exit_list);
 		list_add(&init_net.exit_list, &net_exit_list);
+		ops_pre_exit_list(ops, &net_exit_list);
+		synchronize_rcu();
 		ops_exit_list(ops, &net_exit_list);
 		ops_free_list(ops, &net_exit_list);
 	}

diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 7bf8335..0642f91 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/core/netclassid_cgroup.c	Classid Cgroupfs Handling
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Thomas Graf <tgraf@suug.ch>
  */
 

diff --git a/net/core/netevent.c b/net/core/netevent.c
index 8b3bc4f..d76ed77 100644
--- a/net/core/netevent.c
+++ b/net/core/netevent.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Network event notifiers
  *
@@ -5,11 +6,6 @@
  *      Tom Tucker             <tom@opengridcomputing.com>
  *      Steve Wise             <swise@opengridcomputing.com>
  *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
  *	Fixes:
  */
 

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 3ae8998..849380a 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Common framework for low-level network console, dump, and debugger code
  *
@@ -57,7 +58,6 @@
 	 MAX_UDP_CHUNK)
 
 static void zap_completion_queue(void);
-static void netpoll_async_cleanup(struct work_struct *work);
 
 static unsigned int carrier_timeout = 4;
 module_param(carrier_timeout, uint, 0644);
@@ -122,7 +122,7 @@
 		txq = netdev_get_tx_queue(dev, q_index);
 		HARD_TX_LOCK(dev, txq, smp_processor_id());
 		if (netif_xmit_frozen_or_stopped(txq) ||
-		    netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) {
+		    !dev_xmit_complete(netpoll_start_xmit(skb, dev, txq))) {
 			skb_queue_head(&npinfo->txq, skb);
 			HARD_TX_UNLOCK(dev, txq);
 			local_irq_restore(flags);
@@ -150,7 +150,7 @@
 	 * indicate that we are clearing the Tx path only.
 	 */
 	work = napi->poll(napi, 0);
-	WARN_ONCE(work, "%pF exceeded budget in poll\n", napi->poll);
+	WARN_ONCE(work, "%pS exceeded budget in poll\n", napi->poll);
 	trace_napi_poll(napi, work, 0);
 
 	clear_bit(NAPI_STATE_NPSVC, &napi->state);
@@ -324,7 +324,7 @@
 	if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
 		struct netdev_queue *txq;
 
-		txq = netdev_pick_tx(dev, skb, NULL);
+		txq = netdev_core_pick_tx(dev, skb, NULL);
 
 		/* try until next clock tick */
 		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
@@ -335,7 +335,7 @@
 
 				HARD_TX_UNLOCK(dev, txq);
 
-				if (status == NETDEV_TX_OK)
+				if (dev_xmit_complete(status))
 					break;
 
 			}
@@ -347,12 +347,12 @@
 		}
 
 		WARN_ONCE(!irqs_disabled(),
-			"netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n",
+			"netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pS)\n",
 			dev->name, dev->netdev_ops->ndo_start_xmit);
 
 	}
 
-	if (status != NETDEV_TX_OK) {
+	if (!dev_xmit_complete(status)) {
 		skb_queue_tail(&npinfo->txq, skb);
 		schedule_delayed_work(&npinfo->tx_work,0);
 	}
@@ -589,7 +589,6 @@
 
 	np->dev = ndev;
 	strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
-	INIT_WORK(&np->cleanup_work, netpoll_async_cleanup);
 
 	if (ndev->priv_flags & IFF_DISABLE_NETPOLL) {
 		np_err(np, "%s doesn't support polling, aborting\n",
@@ -665,7 +664,7 @@
 
 		np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
 
-		err = dev_open(ndev);
+		err = dev_open(ndev, NULL);
 
 		if (err) {
 			np_err(np, "failed to open %s\n", ndev->name);
@@ -697,16 +696,22 @@
 
 	if (!np->local_ip.ip) {
 		if (!np->ipv6) {
-			in_dev = __in_dev_get_rtnl(ndev);
+			const struct in_ifaddr *ifa;
 
-			if (!in_dev || !in_dev->ifa_list) {
+			in_dev = __in_dev_get_rtnl(ndev);
+			if (!in_dev)
+				goto put_noaddr;
+
+			ifa = rtnl_dereference(in_dev->ifa_list);
+			if (!ifa) {
+put_noaddr:
 				np_err(np, "no IP address for %s, aborting\n",
 				       np->dev_name);
 				err = -EDESTADDRREQ;
 				goto put;
 			}
 
-			np->local_ip.ip = in_dev->ifa_list->ifa_local;
+			np->local_ip.ip = ifa->ifa_local;
 			np_info(np, "local IP %pI4\n", &np->local_ip.ip);
 		} else {
 #if IS_ENABLED(CONFIG_IPV6)
@@ -719,7 +724,8 @@
 
 				read_lock_bh(&idev->lock);
 				list_for_each_entry(ifp, &idev->addr_list, if_list) {
-					if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)
+					if (!!(ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) !=
+					    !!(ipv6_addr_type(&np->remote_ip.in6) & IPV6_ADDR_LINKLOCAL))
 						continue;
 					np->local_ip.in6 = ifp->addr;
 					err = 0;
@@ -788,10 +794,6 @@
 {
 	struct netpoll_info *npinfo;
 
-	/* rtnl_dereference would be preferable here but
-	 * rcu_cleanup_netpoll path can put us in here safely without
-	 * holding the rtnl, so plain rcu_dereference it is
-	 */
 	npinfo = rtnl_dereference(np->dev->npinfo);
 	if (!npinfo)
 		return;
@@ -806,27 +808,22 @@
 			ops->ndo_netpoll_cleanup(np->dev);
 
 		RCU_INIT_POINTER(np->dev->npinfo, NULL);
-		call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
+		call_rcu(&npinfo->rcu, rcu_cleanup_netpoll_info);
 	} else
 		RCU_INIT_POINTER(np->dev->npinfo, NULL);
 }
 EXPORT_SYMBOL_GPL(__netpoll_cleanup);
 
-static void netpoll_async_cleanup(struct work_struct *work)
+void __netpoll_free(struct netpoll *np)
 {
-	struct netpoll *np = container_of(work, struct netpoll, cleanup_work);
+	ASSERT_RTNL();
 
-	rtnl_lock();
+	/* Wait for transmitting packets to finish before freeing. */
+	synchronize_rcu();
 	__netpoll_cleanup(np);
-	rtnl_unlock();
 	kfree(np);
 }
-
-void __netpoll_free_async(struct netpoll *np)
-{
-	schedule_work(&np->cleanup_work);
-}
-EXPORT_SYMBOL_GPL(__netpoll_free_async);
+EXPORT_SYMBOL_GPL(__netpoll_free);
 
 void netpoll_cleanup(struct netpoll *np)
 {

diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index b905747..256b795 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/core/netprio_cgroup.c	Priority Control Group
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Neil Horman <nhorman@tuxdriver.com>
  */
 
@@ -301,6 +297,4 @@
 	register_netdevice_notifier(&netprio_device_notifier);
 	return 0;
 }
-
 subsys_initcall(init_cgroup_netprio);
-MODULE_LICENSE("GPL v2");

diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 43a932c..5bc6558 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c

@@ -4,9 +4,11 @@
  *	Author:	Jesper Dangaard Brouer <netoptimizer@brouer.com>
  *	Copyright (C) 2016 Red Hat, Inc.
  */
+
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/device.h>
 
 #include <net/page_pool.h>
 #include <linux/dma-direction.h>
@@ -14,6 +16,8 @@
 #include <linux/page-flags.h>
 #include <linux/mm.h> /* for __put_page() */
 
+#include <trace/events/page_pool.h>
+
 static int page_pool_init(struct page_pool *pool,
 			  const struct page_pool_params *params)
 {
@@ -43,13 +47,21 @@
 	if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
 		return -ENOMEM;
 
+	atomic_set(&pool->pages_state_release_cnt, 0);
+
+	/* Driver calling page_pool_create() also call page_pool_destroy() */
+	refcount_set(&pool->user_cnt, 1);
+
+	if (pool->p.flags & PP_FLAG_DMA_MAP)
+		get_device(pool->p.dev);
+
 	return 0;
 }
 
 struct page_pool *page_pool_create(const struct page_pool_params *params)
 {
 	struct page_pool *pool;
-	int err = 0;
+	int err;
 
 	pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid);
 	if (!pool)
@@ -61,6 +73,7 @@
 		kfree(pool);
 		return ERR_PTR(err);
 	}
+
 	return pool;
 }
 EXPORT_SYMBOL(page_pool_create);
@@ -69,12 +82,9 @@
 static struct page *__page_pool_get_cached(struct page_pool *pool)
 {
 	struct ptr_ring *r = &pool->ring;
+	bool refill = false;
 	struct page *page;
 
-	/* Quicker fallback, avoid locks when ring is empty */
-	if (__ptr_ring_empty(r))
-		return NULL;
-
 	/* Test for safe-context, caller should provide this guarantee */
 	if (likely(in_serving_softirq())) {
 		if (likely(pool->alloc.count)) {
@@ -82,27 +92,23 @@
 			page = pool->alloc.cache[--pool->alloc.count];
 			return page;
 		}
-		/* Slower-path: Alloc array empty, time to refill
-		 *
-		 * Open-coded bulk ptr_ring consumer.
-		 *
-		 * Discussion: the ring consumer lock is not really
-		 * needed due to the softirq/NAPI protection, but
-		 * later need the ability to reclaim pages on the
-		 * ring. Thus, keeping the locks.
-		 */
-		spin_lock(&r->consumer_lock);
-		while ((page = __ptr_ring_consume(r))) {
-			if (pool->alloc.count == PP_ALLOC_CACHE_REFILL)
-				break;
-			pool->alloc.cache[pool->alloc.count++] = page;
-		}
-		spin_unlock(&r->consumer_lock);
-		return page;
+		refill = true;
 	}
 
-	/* Slow-path: Get page from locked ring queue */
-	page = ptr_ring_consume(&pool->ring);
+	/* Quicker fallback, avoid locks when ring is empty */
+	if (__ptr_ring_empty(r))
+		return NULL;
+
+	/* Slow-path: Get page from locked ring queue,
+	 * refill alloc array if requested.
+	 */
+	spin_lock(&r->consumer_lock);
+	page = __ptr_ring_consume(r);
+	if (refill)
+		pool->alloc.count = __ptr_ring_consume_batched(r,
+							pool->alloc.cache,
+							PP_ALLOC_CACHE_REFILL);
+	spin_unlock(&r->consumer_lock);
 	return page;
 }
 
@@ -136,19 +142,26 @@
 	if (!(pool->p.flags & PP_FLAG_DMA_MAP))
 		goto skip_dma_map;
 
-	/* Setup DMA mapping: use page->private for DMA-addr
+	/* Setup DMA mapping: use 'struct page' area for storing DMA-addr
+	 * since dma_addr_t can be either 32 or 64 bits and does not always fit
+	 * into page private data (i.e 32bit cpu with 64bit DMA caps)
 	 * This mapping is kept for lifetime of page, until leaving pool.
 	 */
-	dma = dma_map_page(pool->p.dev, page, 0,
-			   (PAGE_SIZE << pool->p.order),
-			   pool->p.dma_dir);
+	dma = dma_map_page_attrs(pool->p.dev, page, 0,
+				 (PAGE_SIZE << pool->p.order),
+				 pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
 	if (dma_mapping_error(pool->p.dev, dma)) {
 		put_page(page);
 		return NULL;
 	}
-	set_page_private(page, dma); /* page->private = dma; */
+	page->dma_addr = dma;
 
 skip_dma_map:
+	/* Track how many pages are held 'in-flight' */
+	pool->pages_state_hold_cnt++;
+
+	trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt);
+
 	/* When page just alloc'ed is should/must have refcnt 1. */
 	return page;
 }
@@ -171,23 +184,69 @@
 }
 EXPORT_SYMBOL(page_pool_alloc_pages);
 
+/* Calculate distance between two u32 values, valid if distance is below 2^(31)
+ *  https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution
+ */
+#define _distance(a, b)	(s32)((a) - (b))
+
+static s32 page_pool_inflight(struct page_pool *pool)
+{
+	u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
+	u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
+	s32 distance;
+
+	distance = _distance(hold_cnt, release_cnt);
+
+	trace_page_pool_inflight(pool, distance, hold_cnt, release_cnt);
+	return distance;
+}
+
+static bool __page_pool_safe_to_destroy(struct page_pool *pool)
+{
+	s32 inflight = page_pool_inflight(pool);
+
+	/* The distance should not be able to become negative */
+	WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight);
+
+	return (inflight == 0);
+}
+
 /* Cleanup page_pool state from page */
 static void __page_pool_clean_page(struct page_pool *pool,
 				   struct page *page)
 {
-	if (!(pool->p.flags & PP_FLAG_DMA_MAP))
-		return;
+	dma_addr_t dma;
 
+	if (!(pool->p.flags & PP_FLAG_DMA_MAP))
+		goto skip_dma_unmap;
+
+	dma = page->dma_addr;
 	/* DMA unmap */
-	dma_unmap_page(pool->p.dev, page_private(page),
-		       PAGE_SIZE << pool->p.order, pool->p.dma_dir);
-	set_page_private(page, 0);
+	dma_unmap_page_attrs(pool->p.dev, dma,
+			     PAGE_SIZE << pool->p.order, pool->p.dma_dir,
+			     DMA_ATTR_SKIP_CPU_SYNC);
+	page->dma_addr = 0;
+skip_dma_unmap:
+	atomic_inc(&pool->pages_state_release_cnt);
+	trace_page_pool_state_release(pool, page,
+			      atomic_read(&pool->pages_state_release_cnt));
 }
 
+/* unmap the page and clean our state */
+void page_pool_unmap_page(struct page_pool *pool, struct page *page)
+{
+	/* When page is unmapped, this implies page will not be
+	 * returned to page_pool.
+	 */
+	__page_pool_clean_page(pool, page);
+}
+EXPORT_SYMBOL(page_pool_unmap_page);
+
 /* Return a page to the page allocator, cleaning up our state */
 static void __page_pool_return_page(struct page_pool *pool, struct page *page)
 {
 	__page_pool_clean_page(pool, page);
+
 	put_page(page);
 	/* An optimization would be to call __free_pages(page, pool->p.order)
 	 * knowing page is not part of page-cache (thus avoiding a
@@ -279,21 +338,45 @@
 	}
 }
 
-static void __page_pool_destroy_rcu(struct rcu_head *rcu)
+static void __warn_in_flight(struct page_pool *pool)
 {
-	struct page_pool *pool;
+	u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
+	u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
+	s32 distance;
 
-	pool = container_of(rcu, struct page_pool, rcu);
+	distance = _distance(hold_cnt, release_cnt);
 
-	WARN(pool->alloc.count, "API usage violation");
-
-	__page_pool_empty_ring(pool);
-	ptr_ring_cleanup(&pool->ring, NULL);
-	kfree(pool);
+	/* Drivers should fix this, but only problematic when DMA is used */
+	WARN(1, "Still in-flight pages:%d hold:%u released:%u",
+	     distance, hold_cnt, release_cnt);
 }
 
-/* Cleanup and release resources */
-void page_pool_destroy(struct page_pool *pool)
+void __page_pool_free(struct page_pool *pool)
+{
+	/* Only last user actually free/release resources */
+	if (!page_pool_put(pool))
+		return;
+
+	WARN(pool->alloc.count, "API usage violation");
+	WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
+
+	/* Can happen due to forced shutdown */
+	if (!__page_pool_safe_to_destroy(pool))
+		__warn_in_flight(pool);
+
+	ptr_ring_cleanup(&pool->ring, NULL);
+
+	if (pool->p.flags & PP_FLAG_DMA_MAP)
+		put_device(pool->p.dev);
+
+	kfree(pool);
+}
+EXPORT_SYMBOL(__page_pool_free);
+
+/* Request to shutdown: release pages cached by page_pool, and check
+ * for in-flight pages
+ */
+bool __page_pool_request_shutdown(struct page_pool *pool)
 {
 	struct page *page;
 
@@ -311,7 +394,6 @@
 	 */
 	__page_pool_empty_ring(pool);
 
-	/* An xdp_mem_allocator can still ref page_pool pointer */
-	call_rcu(&pool->rcu, __page_pool_destroy_rcu);
+	return __page_pool_safe_to_destroy(pool);
 }
-EXPORT_SYMBOL(page_pool_destroy);
+EXPORT_SYMBOL(__page_pool_request_shutdown);

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 7f69384..48b1e42 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Authors:
  * Copyright 2001, 2002 by Robert Olsson <robert.olsson@its.uu.se>
@@ -8,12 +9,6 @@
  * Ben Greear <greearb@candelatech.com>
  * Jens Låås <jens.laas@data.slu.se>
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- *
  * A tool for loading the network with preconfigurated packets.
  * The tool is implemented as a linux module.  Parameters are output
  * device, delay (to hard_xmit), number of packets, and whether
@@ -60,7 +55,6 @@
  *
  * Integrated to 2.5.x 021029 --Lucio Maciel (luciomaciel@zipmail.com.br)
  *
- *
  * 021124 Finished major redesign and rewrite for new functionality.
  * See Documentation/networking/pktgen.txt for how to use this.
  *
@@ -114,7 +108,6 @@
  *
  * Fixed src_mac command to set source mac of packet to value specified in
  * command by Adit Ranadive <adit.262@gmail.com>
- *
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -158,6 +151,7 @@
 #include <linux/etherdevice.h>
 #include <linux/kthread.h>
 #include <linux/prefetch.h>
+#include <linux/mmzone.h>
 #include <net/net_namespace.h>
 #include <net/checksum.h>
 #include <net/ipv6.h>
@@ -2124,9 +2118,11 @@
 			rcu_read_lock();
 			in_dev = __in_dev_get_rcu(pkt_dev->odev);
 			if (in_dev) {
-				if (in_dev->ifa_list) {
-					pkt_dev->saddr_min =
-					    in_dev->ifa_list->ifa_address;
+				const struct in_ifaddr *ifa;
+
+				ifa = rcu_dereference(in_dev->ifa_list);
+				if (ifa) {
+					pkt_dev->saddr_min = ifa->ifa_address;
 					pkt_dev->saddr_max = pkt_dev->saddr_min;
 				}
 			}
@@ -2160,7 +2156,7 @@
 	s64 remaining;
 	struct hrtimer_sleeper t;
 
-	hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	hrtimer_set_expires(&t.timer, spin_until);
 
 	remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer));
@@ -2174,11 +2170,9 @@
 			end_time = ktime_get();
 		} while (ktime_compare(end_time, spin_until) < 0);
 	} else {
-		/* see do_nanosleep */
-		hrtimer_init_sleeper(&t, current);
 		do {
 			set_current_state(TASK_INTERRUPTIBLE);
-			hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
+			hrtimer_sleeper_start_expires(&t, HRTIMER_MODE_ABS);
 
 			if (likely(t.task))
 				schedule();
@@ -2520,7 +2514,7 @@
 		skb->_skb_refdst = (unsigned long)&pkt_dev->xdst.u.dst | SKB_DST_NOREF;
 
 	rcu_read_lock_bh();
-	err = x->outer_mode->output(x, skb);
+	err = pktgen_xfrm_outer_mode_output(x, skb);
 	rcu_read_unlock_bh();
 	if (err) {
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR);
@@ -2656,7 +2650,7 @@
 			}
 			get_page(pkt_dev->page);
 			skb_frag_set_page(skb, i, pkt_dev->page);
-			skb_shinfo(skb)->frags[i].page_offset = 0;
+			skb_frag_off_set(&skb_shinfo(skb)->frags[i], 0);
 			/*last fragment, fill rest of data*/
 			if (i == (frags - 1))
 				skb_frag_size_set(&skb_shinfo(skb)->frags[i],
@@ -3065,7 +3059,13 @@
 {
 	while (thread_is_running(t)) {
 
+		/* note: 't' will still be around even after the unlock/lock
+		 * cycle because pktgen_thread threads are only cleared at
+		 * net exit
+		 */
+		mutex_unlock(&pktgen_thread_lock);
 		msleep_interruptible(100);
+		mutex_lock(&pktgen_thread_lock);
 
 		if (signal_pending(current))
 			goto signal;
@@ -3080,6 +3080,10 @@
 	struct pktgen_thread *t;
 	int sig = 1;
 
+	/* prevent from racing with rmmod */
+	if (!try_module_get(THIS_MODULE))
+		return sig;
+
 	mutex_lock(&pktgen_thread_lock);
 
 	list_for_each_entry(t, &pn->pktgen_threads, th_list) {
@@ -3093,6 +3097,7 @@
 			t->control |= (T_STOP);
 
 	mutex_unlock(&pktgen_thread_lock);
+	module_put(THIS_MODULE);
 	return sig;
 }
 
@@ -3426,7 +3431,7 @@
 		net_info_ratelimited("%s xmit error: %d\n",
 				     pkt_dev->odevname, ret);
 		pkt_dev->errors++;
-		/* fallthru */
+		/* fall through */
 	case NETDEV_TX_BUSY:
 		/* Retry it next time */
 		refcount_dec(&(pkt_dev->skb->users));
@@ -3625,7 +3630,7 @@
 	pkt_dev->svlan_cfi = 0;
 	pkt_dev->svlan_id = 0xffff;
 	pkt_dev->burst = 1;
-	pkt_dev->node = -1;
+	pkt_dev->node = NUMA_NO_NODE;
 
 	err = pktgen_setup_dev(t->net, pkt_dev, ifname);
 	if (err)

diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index 703cf76..d964a51 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c

@@ -1,13 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* PTP classifier
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
  */
 
 /* The below program is the bpf_asm (tools/net/) representation of
@@ -185,9 +177,10 @@
 		{ 0x16,  0,  0, 0x00000000 },
 		{ 0x06,  0,  0, 0x00000000 },
 	};
-	struct sock_fprog_kern ptp_prog = {
-		.len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter,
-	};
+	struct sock_fprog_kern ptp_prog;
+
+	ptp_prog.len = ARRAY_SIZE(ptp_filter);
+	ptp_prog.filter = ptp_filter;
 
 	BUG_ON(bpf_prog_create(&ptp_insns, &ptp_prog));
 }

diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 9b8727c..f35c2e9 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * NET		Generic infrastructure for Network protocols.
  *
  * Authors:	Arnaldo Carvalho de Melo <acme@conectiva.com.br>
  *
  * 		From code originally in include/net/tcp.h
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -100,7 +96,7 @@
 
 	fastopenq = &inet_csk(lsk)->icsk_accept_queue.fastopenq;
 
-	tcp_sk(sk)->fastopen_rsk = NULL;
+	RCU_INIT_POINTER(tcp_sk(sk)->fastopen_rsk, NULL);
 	spin_lock_bh(&fastopenq->lock);
 	fastopenq->qlen--;
 	tcp_rsk(req)->tfo_listener = false;

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index ebde98b..e4ec575 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -7,11 +8,6 @@
  *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  *	Fixes:
  *	Vitaly E. Lavrov		RTA_OK arithmetics was wrong.
  */
@@ -46,7 +42,6 @@
 
 #include <linux/inet.h>
 #include <linux/netdevice.h>
-#include <net/switchdev.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/arp.h>
@@ -59,7 +54,7 @@
 #include <net/rtnetlink.h>
 #include <net/net_namespace.h>
 
-#define RTNL_MAX_TYPE		48
+#define RTNL_MAX_TYPE		50
 #define RTNL_SLAVE_MAX_TYPE	36
 
 struct rtnl_link {
@@ -130,6 +125,12 @@
 }
 EXPORT_SYMBOL(rtnl_is_locked);
 
+bool refcount_dec_and_rtnl_lock(refcount_t *r)
+{
+	return refcount_dec_and_mutex_lock(r, &rtnl_mutex);
+}
+EXPORT_SYMBOL(refcount_dec_and_rtnl_lock);
+
 #ifdef CONFIG_PROVE_LOCKING
 bool lockdep_rtnl_is_held(void)
 {
@@ -629,7 +630,7 @@
 	if (nla_put_string(skb, IFLA_INFO_SLAVE_KIND, ops->kind) < 0)
 		return -EMSGSIZE;
 	if (ops->fill_slave_info) {
-		slave_data = nla_nest_start(skb, IFLA_INFO_SLAVE_DATA);
+		slave_data = nla_nest_start_noflag(skb, IFLA_INFO_SLAVE_DATA);
 		if (!slave_data)
 			return -EMSGSIZE;
 		err = ops->fill_slave_info(skb, master_dev, dev);
@@ -661,7 +662,7 @@
 			return err;
 	}
 	if (ops->fill_info) {
-		data = nla_nest_start(skb, IFLA_INFO_DATA);
+		data = nla_nest_start_noflag(skb, IFLA_INFO_DATA);
 		if (data == NULL)
 			return -EMSGSIZE;
 		err = ops->fill_info(skb, dev);
@@ -681,7 +682,7 @@
 	struct nlattr *linkinfo;
 	int err = -EMSGSIZE;
 
-	linkinfo = nla_nest_start(skb, IFLA_LINKINFO);
+	linkinfo = nla_nest_start_noflag(skb, IFLA_LINKINFO);
 	if (linkinfo == NULL)
 		goto out;
 
@@ -750,7 +751,11 @@
 	struct nlattr *mx;
 	int i, valid = 0;
 
-	mx = nla_nest_start(skb, RTA_METRICS);
+	/* nothing is dumped for dst_default_metrics, so just skip the loop */
+	if (metrics == dst_default_metrics.metrics)
+		return 0;
+
+	mx = nla_nest_start_noflag(skb, RTA_METRICS);
 	if (mx == NULL)
 		return -ENOBUFS;
 
@@ -907,6 +912,7 @@
 		size += num_vfs *
 			(nla_total_size(0) +
 			 nla_total_size(sizeof(struct ifla_vf_mac)) +
+			 nla_total_size(sizeof(struct ifla_vf_broadcast)) +
 			 nla_total_size(sizeof(struct ifla_vf_vlan)) +
 			 nla_total_size(0) + /* nest IFLA_VF_VLAN_LIST */
 			 nla_total_size(MAX_VLAN_LIST_LEN *
@@ -1016,7 +1022,7 @@
 	       + nla_total_size(4)  /* IFLA_NEW_NETNSID */
 	       + nla_total_size(4)  /* IFLA_NEW_IFINDEX */
 	       + nla_total_size(1)  /* IFLA_PROTO_DOWN */
-	       + nla_total_size(4)  /* IFLA_IF_NETNSID */
+	       + nla_total_size(4)  /* IFLA_TARGET_NETNSID */
 	       + nla_total_size(4)  /* IFLA_CARRIER_UP_COUNT */
 	       + nla_total_size(4)  /* IFLA_CARRIER_DOWN_COUNT */
 	       + nla_total_size(4)  /* IFLA_MIN_MTU */
@@ -1031,12 +1037,12 @@
 	int vf;
 	int err;
 
-	vf_ports = nla_nest_start(skb, IFLA_VF_PORTS);
+	vf_ports = nla_nest_start_noflag(skb, IFLA_VF_PORTS);
 	if (!vf_ports)
 		return -EMSGSIZE;
 
 	for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) {
-		vf_port = nla_nest_start(skb, IFLA_VF_PORT);
+		vf_port = nla_nest_start_noflag(skb, IFLA_VF_PORT);
 		if (!vf_port)
 			goto nla_put_failure;
 		if (nla_put_u32(skb, IFLA_PORT_VF, vf))
@@ -1065,7 +1071,7 @@
 	struct nlattr *port_self;
 	int err;
 
-	port_self = nla_nest_start(skb, IFLA_PORT_SELF);
+	port_self = nla_nest_start_noflag(skb, IFLA_PORT_SELF);
 	if (!port_self)
 		return -EMSGSIZE;
 
@@ -1140,22 +1146,17 @@
 
 static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
 {
+	struct netdev_phys_item_id ppid = { };
 	int err;
-	struct switchdev_attr attr = {
-		.orig_dev = dev,
-		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
-		.flags = SWITCHDEV_F_NO_RECURSE,
-	};
 
-	err = switchdev_port_attr_get(dev, &attr);
+	err = dev_get_port_parent_id(dev, &ppid, false);
 	if (err) {
 		if (err == -EOPNOTSUPP)
 			return 0;
 		return err;
 	}
 
-	if (nla_put(skb, IFLA_PHYS_SWITCH_ID, attr.u.ppid.id_len,
-		    attr.u.ppid.id))
+	if (nla_put(skb, IFLA_PHYS_SWITCH_ID, ppid.id_len, ppid.id))
 		return -EMSGSIZE;
 
 	return 0;
@@ -1201,6 +1202,7 @@
 	struct ifla_vf_vlan vf_vlan;
 	struct ifla_vf_rate vf_rate;
 	struct ifla_vf_mac vf_mac;
+	struct ifla_vf_broadcast vf_broadcast;
 	struct ifla_vf_info ivi;
 
 	memset(&ivi, 0, sizeof(ivi));
@@ -1235,6 +1237,7 @@
 		vf_trust.vf = ivi.vf;
 
 	memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
+	memcpy(vf_broadcast.broadcast, dev->broadcast, dev->addr_len);
 	vf_vlan.vlan = ivi.vlan;
 	vf_vlan.qos = ivi.qos;
 	vf_vlan_info.vlan = ivi.vlan;
@@ -1247,10 +1250,11 @@
 	vf_linkstate.link_state = ivi.linkstate;
 	vf_rss_query_en.setting = ivi.rss_query_en;
 	vf_trust.setting = ivi.trusted;
-	vf = nla_nest_start(skb, IFLA_VF_INFO);
+	vf = nla_nest_start_noflag(skb, IFLA_VF_INFO);
 	if (!vf)
 		goto nla_put_vfinfo_failure;
 	if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
+	    nla_put(skb, IFLA_VF_BROADCAST, sizeof(vf_broadcast), &vf_broadcast) ||
 	    nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
 	    nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
 		    &vf_rate) ||
@@ -1266,7 +1270,7 @@
 	    nla_put(skb, IFLA_VF_TRUST,
 		    sizeof(vf_trust), &vf_trust))
 		goto nla_put_vf_failure;
-	vfvlanlist = nla_nest_start(skb, IFLA_VF_VLAN_LIST);
+	vfvlanlist = nla_nest_start_noflag(skb, IFLA_VF_VLAN_LIST);
 	if (!vfvlanlist)
 		goto nla_put_vf_failure;
 	if (nla_put(skb, IFLA_VF_VLAN_INFO, sizeof(vf_vlan_info),
@@ -1279,7 +1283,7 @@
 	if (dev->netdev_ops->ndo_get_vf_stats)
 		dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num,
 						&vf_stats);
-	vfstats = nla_nest_start(skb, IFLA_VF_STATS);
+	vfstats = nla_nest_start_noflag(skb, IFLA_VF_STATS);
 	if (!vfstats)
 		goto nla_put_vf_failure;
 	if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS,
@@ -1329,7 +1333,7 @@
 	if (!dev->netdev_ops->ndo_get_vf_config)
 		return 0;
 
-	vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
+	vfinfo = nla_nest_start_noflag(skb, IFLA_VFINFO_LIST);
 	if (!vfinfo)
 		return -EMSGSIZE;
 
@@ -1414,7 +1418,7 @@
 	int err;
 	u8 mode;
 
-	xdp = nla_nest_start(skb, IFLA_XDP);
+	xdp = nla_nest_start_noflag(skb, IFLA_XDP);
 	if (!xdp)
 		return -EMSGSIZE;
 
@@ -1496,14 +1500,15 @@
 	return ret;
 }
 
-static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev)
+static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev,
+			  bool force)
 {
 	int ifindex = dev_get_iflink(dev);
 
-	if (dev->ifindex == ifindex)
-		return 0;
+	if (force || dev->ifindex != ifindex)
+		return nla_put_u32(skb, IFLA_LINK, ifindex);
 
-	return nla_put_u32(skb, IFLA_LINK, ifindex);
+	return 0;
 }
 
 static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb,
@@ -1518,20 +1523,24 @@
 
 static int rtnl_fill_link_netnsid(struct sk_buff *skb,
 				  const struct net_device *dev,
-				  struct net *src_net)
+				  struct net *src_net, gfp_t gfp)
 {
+	bool put_iflink = false;
+
 	if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) {
 		struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
 
 		if (!net_eq(dev_net(dev), link_net)) {
-			int id = peernet2id_alloc(src_net, link_net);
+			int id = peernet2id_alloc(src_net, link_net, gfp);
 
 			if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
 				return -EMSGSIZE;
+
+			put_iflink = true;
 		}
 	}
 
-	return 0;
+	return nla_put_iflink(skb, dev, put_iflink);
 }
 
 static int rtnl_fill_link_af(struct sk_buff *skb,
@@ -1541,7 +1550,7 @@
 	const struct rtnl_af_ops *af_ops;
 	struct nlattr *af_spec;
 
-	af_spec = nla_nest_start(skb, IFLA_AF_SPEC);
+	af_spec = nla_nest_start_noflag(skb, IFLA_AF_SPEC);
 	if (!af_spec)
 		return -EMSGSIZE;
 
@@ -1552,7 +1561,7 @@
 		if (!af_ops->fill_link_af)
 			continue;
 
-		af = nla_nest_start(skb, af_ops->family);
+		af = nla_nest_start_noflag(skb, af_ops->family);
 		if (!af)
 			return -EMSGSIZE;
 
@@ -1580,7 +1589,7 @@
 			    int type, u32 pid, u32 seq, u32 change,
 			    unsigned int flags, u32 ext_filter_mask,
 			    u32 event, int *new_nsid, int new_ifindex,
-			    int tgt_netnsid)
+			    int tgt_netnsid, gfp_t gfp)
 {
 	struct ifinfomsg *ifm;
 	struct nlmsghdr *nlh;
@@ -1598,7 +1607,7 @@
 	ifm->ifi_flags = dev_get_flags(dev);
 	ifm->ifi_change = change;
 
-	if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_IF_NETNSID, tgt_netnsid))
+	if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid))
 		goto nla_put_failure;
 
 	if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
@@ -1617,7 +1626,6 @@
 #ifdef CONFIG_RPS
 	    nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
 #endif
-	    nla_put_iflink(skb, dev) ||
 	    put_master_ifindex(skb, dev) ||
 	    nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
 	    (dev->qdisc &&
@@ -1673,7 +1681,7 @@
 			goto nla_put_failure;
 	}
 
-	if (rtnl_fill_link_netnsid(skb, dev, src_net))
+	if (rtnl_fill_link_netnsid(skb, dev, src_net, gfp))
 		goto nla_put_failure;
 
 	if (new_nsid &&
@@ -1737,7 +1745,7 @@
 	[IFLA_XDP]		= { .type = NLA_NESTED },
 	[IFLA_EVENT]		= { .type = NLA_U32 },
 	[IFLA_GROUP]		= { .type = NLA_U32 },
-	[IFLA_IF_NETNSID]	= { .type = NLA_S32 },
+	[IFLA_TARGET_NETNSID]	= { .type = NLA_S32 },
 	[IFLA_CARRIER_UP_COUNT]	= { .type = NLA_U32 },
 	[IFLA_CARRIER_DOWN_COUNT] = { .type = NLA_U32 },
 	[IFLA_MIN_MTU]		= { .type = NLA_U32 },
@@ -1753,6 +1761,7 @@
 
 static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
 	[IFLA_VF_MAC]		= { .len = sizeof(struct ifla_vf_mac) },
+	[IFLA_VF_BROADCAST]	= { .type = NLA_REJECT },
 	[IFLA_VF_VLAN]		= { .len = sizeof(struct ifla_vf_vlan) },
 	[IFLA_VF_VLAN_LIST]     = { .type = NLA_NESTED },
 	[IFLA_VF_TX_RATE]	= { .len = sizeof(struct ifla_vf_tx_rate) },
@@ -1797,8 +1806,7 @@
 	const struct rtnl_link_ops *ops = NULL;
 	struct nlattr *linfo[IFLA_INFO_MAX + 1];
 
-	if (nla_parse_nested(linfo, IFLA_INFO_MAX, nla,
-			     ifla_info_policy, NULL) < 0)
+	if (nla_parse_nested_deprecated(linfo, IFLA_INFO_MAX, nla, ifla_info_policy, NULL) < 0)
 		return NULL;
 
 	if (linfo[IFLA_INFO_KIND]) {
@@ -1845,7 +1853,15 @@
 	return false;
 }
 
-static struct net *get_target_net(struct sock *sk, int netnsid)
+/**
+ * rtnl_get_net_ns_capable - Get netns if sufficiently privileged.
+ * @sk: netlink socket
+ * @netnsid: network namespace identifier
+ *
+ * Returns the network namespace identified by netnsid on success or an error
+ * pointer on failure.
+ */
+struct net *rtnl_get_net_ns_capable(struct sock *sk, int netnsid)
 {
 	struct net *net;
 
@@ -1862,9 +1878,56 @@
 	}
 	return net;
 }
+EXPORT_SYMBOL_GPL(rtnl_get_net_ns_capable);
+
+static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh,
+				      bool strict_check, struct nlattr **tb,
+				      struct netlink_ext_ack *extack)
+{
+	int hdrlen;
+
+	if (strict_check) {
+		struct ifinfomsg *ifm;
+
+		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+			NL_SET_ERR_MSG(extack, "Invalid header for link dump");
+			return -EINVAL;
+		}
+
+		ifm = nlmsg_data(nlh);
+		if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags ||
+		    ifm->ifi_change) {
+			NL_SET_ERR_MSG(extack, "Invalid values in header for link dump request");
+			return -EINVAL;
+		}
+		if (ifm->ifi_index) {
+			NL_SET_ERR_MSG(extack, "Filter by device index not supported for link dumps");
+			return -EINVAL;
+		}
+
+		return nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb,
+						     IFLA_MAX, ifla_policy,
+						     extack);
+	}
+
+	/* A hack to preserve kernel<->userspace interface.
+	 * The correct header is ifinfomsg. It is consistent with rtnl_getlink.
+	 * However, before Linux v3.9 the code here assumed rtgenmsg and that's
+	 * what iproute2 < v3.9.0 used.
+	 * We can detect the old iproute2. Even including the IFLA_EXT_MASK
+	 * attribute, its netlink message is shorter than struct ifinfomsg.
+	 */
+	hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ?
+		 sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
+
+	return nlmsg_parse_deprecated(nlh, hdrlen, tb, IFLA_MAX, ifla_policy,
+				      extack);
+}
 
 static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct netlink_ext_ack *extack = cb->extack;
+	const struct nlmsghdr *nlh = cb->nlh;
 	struct net *net = sock_net(skb->sk);
 	struct net *tgt_net = net;
 	int h, s_h;
@@ -1877,44 +1940,54 @@
 	unsigned int flags = NLM_F_MULTI;
 	int master_idx = 0;
 	int netnsid = -1;
-	int err;
-	int hdrlen;
+	int err, i;
 
 	s_h = cb->args[0];
 	s_idx = cb->args[1];
 
-	/* A hack to preserve kernel<->userspace interface.
-	 * The correct header is ifinfomsg. It is consistent with rtnl_getlink.
-	 * However, before Linux v3.9 the code here assumed rtgenmsg and that's
-	 * what iproute2 < v3.9.0 used.
-	 * We can detect the old iproute2. Even including the IFLA_EXT_MASK
-	 * attribute, its netlink message is shorter than struct ifinfomsg.
-	 */
-	hdrlen = nlmsg_len(cb->nlh) < sizeof(struct ifinfomsg) ?
-		 sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
+	err = rtnl_valid_dump_ifinfo_req(nlh, cb->strict_check, tb, extack);
+	if (err < 0) {
+		if (cb->strict_check)
+			return err;
 
-	if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX,
-			ifla_policy, NULL) >= 0) {
-		if (tb[IFLA_IF_NETNSID]) {
-			netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
-			tgt_net = get_target_net(skb->sk, netnsid);
-			if (IS_ERR(tgt_net))
-				return PTR_ERR(tgt_net);
-		}
-
-		if (tb[IFLA_EXT_MASK])
-			ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
-
-		if (tb[IFLA_MASTER])
-			master_idx = nla_get_u32(tb[IFLA_MASTER]);
-
-		if (tb[IFLA_LINKINFO])
-			kind_ops = linkinfo_to_kind_ops(tb[IFLA_LINKINFO]);
-
-		if (master_idx || kind_ops)
-			flags |= NLM_F_DUMP_FILTERED;
+		goto walk_entries;
 	}
 
+	for (i = 0; i <= IFLA_MAX; ++i) {
+		if (!tb[i])
+			continue;
+
+		/* new attributes should only be added with strict checking */
+		switch (i) {
+		case IFLA_TARGET_NETNSID:
+			netnsid = nla_get_s32(tb[i]);
+			tgt_net = rtnl_get_net_ns_capable(skb->sk, netnsid);
+			if (IS_ERR(tgt_net)) {
+				NL_SET_ERR_MSG(extack, "Invalid target network namespace id");
+				return PTR_ERR(tgt_net);
+			}
+			break;
+		case IFLA_EXT_MASK:
+			ext_filter_mask = nla_get_u32(tb[i]);
+			break;
+		case IFLA_MASTER:
+			master_idx = nla_get_u32(tb[i]);
+			break;
+		case IFLA_LINKINFO:
+			kind_ops = linkinfo_to_kind_ops(tb[i]);
+			break;
+		default:
+			if (cb->strict_check) {
+				NL_SET_ERR_MSG(extack, "Unsupported attribute in link dump request");
+				return -EINVAL;
+			}
+		}
+	}
+
+	if (master_idx || kind_ops)
+		flags |= NLM_F_DUMP_FILTERED;
+
+walk_entries:
 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
 		idx = 0;
 		head = &tgt_net->dev_index_head[h];
@@ -1926,10 +1999,9 @@
 			err = rtnl_fill_ifinfo(skb, dev, net,
 					       RTM_NEWLINK,
 					       NETLINK_CB(cb->skb).portid,
-					       cb->nlh->nlmsg_seq, 0,
-					       flags,
+					       nlh->nlmsg_seq, 0, flags,
 					       ext_filter_mask, 0, NULL, 0,
-					       netnsid);
+					       netnsid, GFP_KERNEL);
 
 			if (err < 0) {
 				if (likely(skb->len))
@@ -1957,7 +2029,8 @@
 int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
 			struct netlink_ext_ack *exterr)
 {
-	return nla_parse(tb, IFLA_MAX, head, len, ifla_policy, exterr);
+	return nla_parse_deprecated(tb, IFLA_MAX, head, len, ifla_policy,
+				    exterr);
 }
 EXPORT_SYMBOL(rtnl_nla_parse_ifla);
 
@@ -1982,7 +2055,7 @@
  *
  * 1. IFLA_NET_NS_PID
  * 2. IFLA_NET_NS_FD
- * 3. IFLA_IF_NETNSID
+ * 3. IFLA_TARGET_NETNSID
  */
 static struct net *rtnl_link_get_net_by_nlattr(struct net *src_net,
 					       struct nlattr *tb[])
@@ -1992,10 +2065,10 @@
 	if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD])
 		return rtnl_link_get_net(src_net, tb);
 
-	if (!tb[IFLA_IF_NETNSID])
+	if (!tb[IFLA_TARGET_NETNSID])
 		return get_net(src_net);
 
-	net = get_net_ns_by_id(src_net, nla_get_u32(tb[IFLA_IF_NETNSID]));
+	net = get_net_ns_by_id(src_net, nla_get_u32(tb[IFLA_TARGET_NETNSID]));
 	if (!net)
 		return ERR_PTR(-EINVAL);
 
@@ -2036,13 +2109,13 @@
 		return -EOPNOTSUPP;
 	}
 
-	if (tb[IFLA_IF_NETNSID] && (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]))
+	if (tb[IFLA_TARGET_NETNSID] && (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]))
 		goto invalid_attr;
 
-	if (tb[IFLA_NET_NS_PID] && (tb[IFLA_IF_NETNSID] || tb[IFLA_NET_NS_FD]))
+	if (tb[IFLA_NET_NS_PID] && (tb[IFLA_TARGET_NETNSID] || tb[IFLA_NET_NS_FD]))
 		goto invalid_attr;
 
-	if (tb[IFLA_NET_NS_FD] && (tb[IFLA_IF_NETNSID] || tb[IFLA_NET_NS_PID]))
+	if (tb[IFLA_NET_NS_FD] && (tb[IFLA_TARGET_NETNSID] || tb[IFLA_NET_NS_PID]))
 		goto invalid_attr;
 
 	return 0;
@@ -2122,6 +2195,8 @@
 	if (tb[IFLA_VF_MAC]) {
 		struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]);
 
+		if (ivm->vf >= INT_MAX)
+			return -EINVAL;
 		err = -EOPNOTSUPP;
 		if (ops->ndo_set_vf_mac)
 			err = ops->ndo_set_vf_mac(dev, ivm->vf,
@@ -2133,6 +2208,8 @@
 	if (tb[IFLA_VF_VLAN]) {
 		struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]);
 
+		if (ivv->vf >= INT_MAX)
+			return -EINVAL;
 		err = -EOPNOTSUPP;
 		if (ops->ndo_set_vf_vlan)
 			err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan,
@@ -2165,6 +2242,8 @@
 		if (len == 0)
 			return -EINVAL;
 
+		if (ivvl[0]->vf >= INT_MAX)
+			return -EINVAL;
 		err = ops->ndo_set_vf_vlan(dev, ivvl[0]->vf, ivvl[0]->vlan,
 					   ivvl[0]->qos, ivvl[0]->vlan_proto);
 		if (err < 0)
@@ -2175,6 +2254,8 @@
 		struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]);
 		struct ifla_vf_info ivf;
 
+		if (ivt->vf >= INT_MAX)
+			return -EINVAL;
 		err = -EOPNOTSUPP;
 		if (ops->ndo_get_vf_config)
 			err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf);
@@ -2193,6 +2274,8 @@
 	if (tb[IFLA_VF_RATE]) {
 		struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]);
 
+		if (ivt->vf >= INT_MAX)
+			return -EINVAL;
 		err = -EOPNOTSUPP;
 		if (ops->ndo_set_vf_rate)
 			err = ops->ndo_set_vf_rate(dev, ivt->vf,
@@ -2205,6 +2288,8 @@
 	if (tb[IFLA_VF_SPOOFCHK]) {
 		struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]);
 
+		if (ivs->vf >= INT_MAX)
+			return -EINVAL;
 		err = -EOPNOTSUPP;
 		if (ops->ndo_set_vf_spoofchk)
 			err = ops->ndo_set_vf_spoofchk(dev, ivs->vf,
@@ -2216,6 +2301,8 @@
 	if (tb[IFLA_VF_LINK_STATE]) {
 		struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]);
 
+		if (ivl->vf >= INT_MAX)
+			return -EINVAL;
 		err = -EOPNOTSUPP;
 		if (ops->ndo_set_vf_link_state)
 			err = ops->ndo_set_vf_link_state(dev, ivl->vf,
@@ -2229,6 +2316,8 @@
 
 		err = -EOPNOTSUPP;
 		ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]);
+		if (ivrssq_en->vf >= INT_MAX)
+			return -EINVAL;
 		if (ops->ndo_set_vf_rss_query_en)
 			err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf,
 							   ivrssq_en->setting);
@@ -2239,6 +2328,8 @@
 	if (tb[IFLA_VF_TRUST]) {
 		struct ifla_vf_trust *ivt = nla_data(tb[IFLA_VF_TRUST]);
 
+		if (ivt->vf >= INT_MAX)
+			return -EINVAL;
 		err = -EOPNOTSUPP;
 		if (ops->ndo_set_vf_trust)
 			err = ops->ndo_set_vf_trust(dev, ivt->vf, ivt->setting);
@@ -2249,15 +2340,18 @@
 	if (tb[IFLA_VF_IB_NODE_GUID]) {
 		struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_NODE_GUID]);
 
+		if (ivt->vf >= INT_MAX)
+			return -EINVAL;
 		if (!ops->ndo_set_vf_guid)
 			return -EOPNOTSUPP;
-
 		return handle_vf_guid(dev, ivt, IFLA_VF_IB_NODE_GUID);
 	}
 
 	if (tb[IFLA_VF_IB_PORT_GUID]) {
 		struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_PORT_GUID]);
 
+		if (ivt->vf >= INT_MAX)
+			return -EINVAL;
 		if (!ops->ndo_set_vf_guid)
 			return -EOPNOTSUPP;
 
@@ -2282,6 +2376,7 @@
 			err = ops->ndo_del_slave(upper_dev, dev);
 			if (err)
 				return err;
+			netdev_update_lockdep_key(dev);
 		} else {
 			return -EOPNOTSUPP;
 		}
@@ -2318,7 +2413,7 @@
 	if (err < 0)
 		return err;
 
-	if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_IF_NETNSID]) {
+	if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_TARGET_NETNSID]) {
 		struct net *net = rtnl_link_get_net_capable(skb, dev_net(dev),
 							    tb, CAP_NET_ADMIN);
 		if (IS_ERR(net)) {
@@ -2376,7 +2471,7 @@
 		sa->sa_family = dev->type;
 		memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
 		       dev->addr_len);
-		err = dev_set_mac_address(dev, sa);
+		err = dev_set_mac_address(dev, sa, extack);
 		kfree(sa);
 		if (err)
 			goto errout;
@@ -2421,7 +2516,8 @@
 	}
 
 	if (ifm->ifi_flags || ifm->ifi_change) {
-		err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm));
+		err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm),
+				       extack);
 		if (err < 0)
 			goto errout;
 	}
@@ -2501,8 +2597,10 @@
 				err = -EINVAL;
 				goto errout;
 			}
-			err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr,
-					       ifla_vf_policy, NULL);
+			err = nla_parse_nested_deprecated(vfinfo, IFLA_VF_MAX,
+							  attr,
+							  ifla_vf_policy,
+							  NULL);
 			if (err < 0)
 				goto errout;
 			err = do_setvfinfo(dev, vfinfo);
@@ -2529,8 +2627,10 @@
 				err = -EINVAL;
 				goto errout;
 			}
-			err = nla_parse_nested(port, IFLA_PORT_MAX, attr,
-					       ifla_port_policy, NULL);
+			err = nla_parse_nested_deprecated(port, IFLA_PORT_MAX,
+							  attr,
+							  ifla_port_policy,
+							  NULL);
 			if (err < 0)
 				goto errout;
 			if (!port[IFLA_PORT_VF]) {
@@ -2549,9 +2649,9 @@
 	if (tb[IFLA_PORT_SELF]) {
 		struct nlattr *port[IFLA_PORT_MAX+1];
 
-		err = nla_parse_nested(port, IFLA_PORT_MAX,
-				       tb[IFLA_PORT_SELF], ifla_port_policy,
-				       NULL);
+		err = nla_parse_nested_deprecated(port, IFLA_PORT_MAX,
+						  tb[IFLA_PORT_SELF],
+						  ifla_port_policy, NULL);
 		if (err < 0)
 			goto errout;
 
@@ -2598,8 +2698,9 @@
 		struct nlattr *xdp[IFLA_XDP_MAX + 1];
 		u32 xdp_flags = 0;
 
-		err = nla_parse_nested(xdp, IFLA_XDP_MAX, tb[IFLA_XDP],
-				       ifla_xdp_policy, NULL);
+		err = nla_parse_nested_deprecated(xdp, IFLA_XDP_MAX,
+						  tb[IFLA_XDP],
+						  ifla_xdp_policy, NULL);
 		if (err < 0)
 			goto errout;
 
@@ -2653,8 +2754,8 @@
 	struct nlattr *tb[IFLA_MAX+1];
 	char ifname[IFNAMSIZ];
 
-	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy,
-			  extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX,
+				     ifla_policy, extack);
 	if (err < 0)
 		goto errout;
 
@@ -2750,7 +2851,8 @@
 	int err;
 	int netnsid = -1;
 
-	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX,
+				     ifla_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -2761,9 +2863,9 @@
 	if (tb[IFLA_IFNAME])
 		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
 
-	if (tb[IFLA_IF_NETNSID]) {
-		netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
-		tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid);
+	if (tb[IFLA_TARGET_NETNSID]) {
+		netnsid = nla_get_s32(tb[IFLA_TARGET_NETNSID]);
+		tgt_net = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, netnsid);
 		if (IS_ERR(tgt_net))
 			return PTR_ERR(tgt_net);
 	}
@@ -2802,7 +2904,8 @@
 
 	old_flags = dev->flags;
 	if (ifm && (ifm->ifi_flags || ifm->ifi_change)) {
-		err = __dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm));
+		err = __dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm),
+					 NULL);
 		if (err < 0)
 			return err;
 	}
@@ -2817,9 +2920,11 @@
 }
 EXPORT_SYMBOL(rtnl_configure_link);
 
-struct net_device *rtnl_create_link(struct net *net,
-	const char *ifname, unsigned char name_assign_type,
-	const struct rtnl_link_ops *ops, struct nlattr *tb[])
+struct net_device *rtnl_create_link(struct net *net, const char *ifname,
+				    unsigned char name_assign_type,
+				    const struct rtnl_link_ops *ops,
+				    struct nlattr *tb[],
+				    struct netlink_ext_ack *extack)
 {
 	struct net_device *dev;
 	unsigned int num_tx_queues = 1;
@@ -2835,11 +2940,15 @@
 	else if (ops->get_num_rx_queues)
 		num_rx_queues = ops->get_num_rx_queues();
 
-	if (num_tx_queues < 1 || num_tx_queues > 4096)
+	if (num_tx_queues < 1 || num_tx_queues > 4096) {
+		NL_SET_ERR_MSG(extack, "Invalid number of transmit queues");
 		return ERR_PTR(-EINVAL);
+	}
 
-	if (num_rx_queues < 1 || num_rx_queues > 4096)
+	if (num_rx_queues < 1 || num_rx_queues > 4096) {
+		NL_SET_ERR_MSG(extack, "Invalid number of receive queues");
 		return ERR_PTR(-EINVAL);
+	}
 
 	dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type,
 			       ops->setup, num_tx_queues, num_rx_queues);
@@ -2897,26 +3006,31 @@
 	return 0;
 }
 
-static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
-			struct netlink_ext_ack *extack)
+static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+			  struct nlattr **attr, struct netlink_ext_ack *extack)
 {
+	struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
+	unsigned char name_assign_type = NET_NAME_USER;
+	struct nlattr *linkinfo[IFLA_INFO_MAX + 1];
+	const struct rtnl_link_ops *m_ops = NULL;
+	struct net_device *master_dev = NULL;
 	struct net *net = sock_net(skb->sk);
 	const struct rtnl_link_ops *ops;
-	const struct rtnl_link_ops *m_ops = NULL;
-	struct net_device *dev;
-	struct net_device *master_dev = NULL;
-	struct ifinfomsg *ifm;
+	struct nlattr *tb[IFLA_MAX + 1];
+	struct net *dest_net, *link_net;
+	struct nlattr **slave_data;
 	char kind[MODULE_NAME_LEN];
+	struct net_device *dev;
+	struct ifinfomsg *ifm;
 	char ifname[IFNAMSIZ];
-	struct nlattr *tb[IFLA_MAX+1];
-	struct nlattr *linkinfo[IFLA_INFO_MAX+1];
-	unsigned char name_assign_type = NET_NAME_USER;
+	struct nlattr **data;
 	int err;
 
 #ifdef CONFIG_MODULES
 replay:
 #endif
-	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX,
+				     ifla_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -2950,9 +3064,9 @@
 		return err;
 
 	if (tb[IFLA_LINKINFO]) {
-		err = nla_parse_nested(linkinfo, IFLA_INFO_MAX,
-				       tb[IFLA_LINKINFO], ifla_info_policy,
-				       NULL);
+		err = nla_parse_nested_deprecated(linkinfo, IFLA_INFO_MAX,
+						  tb[IFLA_LINKINFO],
+						  ifla_info_policy, NULL);
 		if (err < 0)
 			return err;
 	} else
@@ -2966,193 +3080,249 @@
 		ops = NULL;
 	}
 
-	if (1) {
-		struct nlattr *attr[RTNL_MAX_TYPE + 1];
-		struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
-		struct nlattr **data = NULL;
-		struct nlattr **slave_data = NULL;
-		struct net *dest_net, *link_net = NULL;
+	data = NULL;
+	if (ops) {
+		if (ops->maxtype > RTNL_MAX_TYPE)
+			return -EINVAL;
 
-		if (ops) {
-			if (ops->maxtype > RTNL_MAX_TYPE)
-				return -EINVAL;
-
-			if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
-				err = nla_parse_nested(attr, ops->maxtype,
-						       linkinfo[IFLA_INFO_DATA],
-						       ops->policy, NULL);
-				if (err < 0)
-					return err;
-				data = attr;
-			}
-			if (ops->validate) {
-				err = ops->validate(tb, data, extack);
-				if (err < 0)
-					return err;
-			}
+		if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
+			err = nla_parse_nested_deprecated(attr, ops->maxtype,
+							  linkinfo[IFLA_INFO_DATA],
+							  ops->policy, extack);
+			if (err < 0)
+				return err;
+			data = attr;
 		}
-
-		if (m_ops) {
-			if (m_ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE)
-				return -EINVAL;
-
-			if (m_ops->slave_maxtype &&
-			    linkinfo[IFLA_INFO_SLAVE_DATA]) {
-				err = nla_parse_nested(slave_attr,
-						       m_ops->slave_maxtype,
-						       linkinfo[IFLA_INFO_SLAVE_DATA],
-						       m_ops->slave_policy,
-						       NULL);
-				if (err < 0)
-					return err;
-				slave_data = slave_attr;
-			}
+		if (ops->validate) {
+			err = ops->validate(tb, data, extack);
+			if (err < 0)
+				return err;
 		}
+	}
 
-		if (dev) {
-			int status = 0;
+	slave_data = NULL;
+	if (m_ops) {
+		if (m_ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE)
+			return -EINVAL;
 
-			if (nlh->nlmsg_flags & NLM_F_EXCL)
-				return -EEXIST;
-			if (nlh->nlmsg_flags & NLM_F_REPLACE)
+		if (m_ops->slave_maxtype &&
+		    linkinfo[IFLA_INFO_SLAVE_DATA]) {
+			err = nla_parse_nested_deprecated(slave_attr,
+							  m_ops->slave_maxtype,
+							  linkinfo[IFLA_INFO_SLAVE_DATA],
+							  m_ops->slave_policy,
+							  extack);
+			if (err < 0)
+				return err;
+			slave_data = slave_attr;
+		}
+	}
+
+	if (dev) {
+		int status = 0;
+
+		if (nlh->nlmsg_flags & NLM_F_EXCL)
+			return -EEXIST;
+		if (nlh->nlmsg_flags & NLM_F_REPLACE)
+			return -EOPNOTSUPP;
+
+		if (linkinfo[IFLA_INFO_DATA]) {
+			if (!ops || ops != dev->rtnl_link_ops ||
+			    !ops->changelink)
 				return -EOPNOTSUPP;
 
-			if (linkinfo[IFLA_INFO_DATA]) {
-				if (!ops || ops != dev->rtnl_link_ops ||
-				    !ops->changelink)
-					return -EOPNOTSUPP;
-
-				err = ops->changelink(dev, tb, data, extack);
-				if (err < 0)
-					return err;
-				status |= DO_SETLINK_NOTIFY;
-			}
-
-			if (linkinfo[IFLA_INFO_SLAVE_DATA]) {
-				if (!m_ops || !m_ops->slave_changelink)
-					return -EOPNOTSUPP;
-
-				err = m_ops->slave_changelink(master_dev, dev,
-							      tb, slave_data,
-							      extack);
-				if (err < 0)
-					return err;
-				status |= DO_SETLINK_NOTIFY;
-			}
-
-			return do_setlink(skb, dev, ifm, extack, tb, ifname,
-					  status);
+			err = ops->changelink(dev, tb, data, extack);
+			if (err < 0)
+				return err;
+			status |= DO_SETLINK_NOTIFY;
 		}
 
-		if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
-			if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
-				return rtnl_group_changelink(skb, net,
+		if (linkinfo[IFLA_INFO_SLAVE_DATA]) {
+			if (!m_ops || !m_ops->slave_changelink)
+				return -EOPNOTSUPP;
+
+			err = m_ops->slave_changelink(master_dev, dev, tb,
+						      slave_data, extack);
+			if (err < 0)
+				return err;
+			status |= DO_SETLINK_NOTIFY;
+		}
+
+		return do_setlink(skb, dev, ifm, extack, tb, ifname, status);
+	}
+
+	if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+		if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
+			return rtnl_group_changelink(skb, net,
 						nla_get_u32(tb[IFLA_GROUP]),
 						ifm, extack, tb);
-			return -ENODEV;
-		}
+		return -ENODEV;
+	}
 
-		if (tb[IFLA_MAP] || tb[IFLA_PROTINFO])
-			return -EOPNOTSUPP;
+	if (tb[IFLA_MAP] || tb[IFLA_PROTINFO])
+		return -EOPNOTSUPP;
 
-		if (!ops) {
+	if (!ops) {
 #ifdef CONFIG_MODULES
-			if (kind[0]) {
-				__rtnl_unlock();
-				request_module("rtnl-link-%s", kind);
-				rtnl_lock();
-				ops = rtnl_link_ops_get(kind);
-				if (ops)
-					goto replay;
-			}
+		if (kind[0]) {
+			__rtnl_unlock();
+			request_module("rtnl-link-%s", kind);
+			rtnl_lock();
+			ops = rtnl_link_ops_get(kind);
+			if (ops)
+				goto replay;
+		}
 #endif
-			return -EOPNOTSUPP;
-		}
+		NL_SET_ERR_MSG(extack, "Unknown device type");
+		return -EOPNOTSUPP;
+	}
 
-		if (!ops->setup)
-			return -EOPNOTSUPP;
+	if (!ops->setup)
+		return -EOPNOTSUPP;
 
-		if (!ifname[0]) {
-			snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
-			name_assign_type = NET_NAME_ENUM;
-		}
+	if (!ifname[0]) {
+		snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
+		name_assign_type = NET_NAME_ENUM;
+	}
 
-		dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN);
-		if (IS_ERR(dest_net))
-			return PTR_ERR(dest_net);
+	dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN);
+	if (IS_ERR(dest_net))
+		return PTR_ERR(dest_net);
 
-		if (tb[IFLA_LINK_NETNSID]) {
-			int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
+	if (tb[IFLA_LINK_NETNSID]) {
+		int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
 
-			link_net = get_net_ns_by_id(dest_net, id);
-			if (!link_net) {
-				err =  -EINVAL;
-				goto out;
-			}
-			err = -EPERM;
-			if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN))
-				goto out;
-		}
-
-		dev = rtnl_create_link(link_net ? : dest_net, ifname,
-				       name_assign_type, ops, tb);
-		if (IS_ERR(dev)) {
-			err = PTR_ERR(dev);
+		link_net = get_net_ns_by_id(dest_net, id);
+		if (!link_net) {
+			NL_SET_ERR_MSG(extack, "Unknown network namespace id");
+			err =  -EINVAL;
 			goto out;
 		}
+		err = -EPERM;
+		if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN))
+			goto out;
+	} else {
+		link_net = NULL;
+	}
 
-		dev->ifindex = ifm->ifi_index;
-
-		if (ops->newlink) {
-			err = ops->newlink(link_net ? : net, dev, tb, data,
-					   extack);
-			/* Drivers should call free_netdev() in ->destructor
-			 * and unregister it on failure after registration
-			 * so that device could be finally freed in rtnl_unlock.
-			 */
-			if (err < 0) {
-				/* If device is not registered at all, free it now */
-				if (dev->reg_state == NETREG_UNINITIALIZED)
-					free_netdev(dev);
-				goto out;
-			}
-		} else {
-			err = register_netdevice(dev);
-			if (err < 0) {
-				free_netdev(dev);
-				goto out;
-			}
-		}
-		err = rtnl_configure_link(dev, ifm);
-		if (err < 0)
-			goto out_unregister;
-		if (link_net) {
-			err = dev_change_net_namespace(dev, dest_net, ifname);
-			if (err < 0)
-				goto out_unregister;
-		}
-		if (tb[IFLA_MASTER]) {
-			err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]),
-					    extack);
-			if (err)
-				goto out_unregister;
-		}
-out:
-		if (link_net)
-			put_net(link_net);
-		put_net(dest_net);
-		return err;
-out_unregister:
-		if (ops->newlink) {
-			LIST_HEAD(list_kill);
-
-			ops->dellink(dev, &list_kill);
-			unregister_netdevice_many(&list_kill);
-		} else {
-			unregister_netdevice(dev);
-		}
+	dev = rtnl_create_link(link_net ? : dest_net, ifname,
+			       name_assign_type, ops, tb, extack);
+	if (IS_ERR(dev)) {
+		err = PTR_ERR(dev);
 		goto out;
 	}
+
+	dev->ifindex = ifm->ifi_index;
+
+	if (ops->newlink) {
+		err = ops->newlink(link_net ? : net, dev, tb, data, extack);
+		/* Drivers should call free_netdev() in ->destructor
+		 * and unregister it on failure after registration
+		 * so that device could be finally freed in rtnl_unlock.
+		 */
+		if (err < 0) {
+			/* If device is not registered at all, free it now */
+			if (dev->reg_state == NETREG_UNINITIALIZED)
+				free_netdev(dev);
+			goto out;
+		}
+	} else {
+		err = register_netdevice(dev);
+		if (err < 0) {
+			free_netdev(dev);
+			goto out;
+		}
+	}
+	err = rtnl_configure_link(dev, ifm);
+	if (err < 0)
+		goto out_unregister;
+	if (link_net) {
+		err = dev_change_net_namespace(dev, dest_net, ifname);
+		if (err < 0)
+			goto out_unregister;
+	}
+	if (tb[IFLA_MASTER]) {
+		err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
+		if (err)
+			goto out_unregister;
+	}
+out:
+	if (link_net)
+		put_net(link_net);
+	put_net(dest_net);
+	return err;
+out_unregister:
+	if (ops->newlink) {
+		LIST_HEAD(list_kill);
+
+		ops->dellink(dev, &list_kill);
+		unregister_netdevice_many(&list_kill);
+	} else {
+		unregister_netdevice(dev);
+	}
+	goto out;
+}
+
+static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+			struct netlink_ext_ack *extack)
+{
+	struct nlattr **attr;
+	int ret;
+
+	attr = kmalloc_array(RTNL_MAX_TYPE + 1, sizeof(*attr), GFP_KERNEL);
+	if (!attr)
+		return -ENOMEM;
+
+	ret = __rtnl_newlink(skb, nlh, attr, extack);
+	kfree(attr);
+	return ret;
+}
+
+static int rtnl_valid_getlink_req(struct sk_buff *skb,
+				  const struct nlmsghdr *nlh,
+				  struct nlattr **tb,
+				  struct netlink_ext_ack *extack)
+{
+	struct ifinfomsg *ifm;
+	int i, err;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+		NL_SET_ERR_MSG(extack, "Invalid header for get link");
+		return -EINVAL;
+	}
+
+	if (!netlink_strict_get_check(skb))
+		return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX,
+					      ifla_policy, extack);
+
+	ifm = nlmsg_data(nlh);
+	if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags ||
+	    ifm->ifi_change) {
+		NL_SET_ERR_MSG(extack, "Invalid values in header for get link request");
+		return -EINVAL;
+	}
+
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFLA_MAX,
+					    ifla_policy, extack);
+	if (err)
+		return err;
+
+	for (i = 0; i <= IFLA_MAX; i++) {
+		if (!tb[i])
+			continue;
+
+		switch (i) {
+		case IFLA_IFNAME:
+		case IFLA_EXT_MASK:
+		case IFLA_TARGET_NETNSID:
+			break;
+		default:
+			NL_SET_ERR_MSG(extack, "Unsupported attribute in get link request");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
 }
 
 static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -3169,7 +3339,7 @@
 	int err;
 	u32 ext_filter_mask = 0;
 
-	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack);
+	err = rtnl_valid_getlink_req(skb, nlh, tb, extack);
 	if (err < 0)
 		return err;
 
@@ -3177,9 +3347,9 @@
 	if (err < 0)
 		return err;
 
-	if (tb[IFLA_IF_NETNSID]) {
-		netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
-		tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid);
+	if (tb[IFLA_TARGET_NETNSID]) {
+		netnsid = nla_get_s32(tb[IFLA_TARGET_NETNSID]);
+		tgt_net = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, netnsid);
 		if (IS_ERR(tgt_net))
 			return PTR_ERR(tgt_net);
 	}
@@ -3211,7 +3381,7 @@
 	err = rtnl_fill_ifinfo(nskb, dev, net,
 			       RTM_NEWLINK, NETLINK_CB(skb).portid,
 			       nlh->nlmsg_seq, 0, 0, ext_filter_mask,
-			       0, NULL, 0, netnsid);
+			       0, NULL, 0, netnsid, GFP_KERNEL);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in if_nlmsg_size */
 		WARN_ON(err == -EMSGSIZE);
@@ -3238,7 +3408,7 @@
 	hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ?
 		 sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
 
-	if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, NULL) >= 0) {
+	if (nlmsg_parse_deprecated(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, NULL) >= 0) {
 		if (tb[IFLA_EXT_MASK])
 			ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
 	}
@@ -3264,13 +3434,14 @@
 {
 	int idx;
 	int s_idx = cb->family;
+	int type = cb->nlh->nlmsg_type - RTM_BASE;
+	int ret = 0;
 
 	if (s_idx == 0)
 		s_idx = 1;
 
 	for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) {
 		struct rtnl_link **tab;
-		int type = cb->nlh->nlmsg_type-RTM_BASE;
 		struct rtnl_link *link;
 		rtnl_dumpit_func dumpit;
 
@@ -3297,12 +3468,13 @@
 			cb->prev_seq = 0;
 			cb->seq = 0;
 		}
-		if (dumpit(skb, cb))
+		ret = dumpit(skb, cb);
+		if (ret)
 			break;
 	}
 	cb->family = idx;
 
-	return skb->len;
+	return skb->len ? : ret;
 }
 
 struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
@@ -3321,7 +3493,7 @@
 
 	err = rtnl_fill_ifinfo(skb, dev, dev_net(dev),
 			       type, 0, 0, change, 0, 0, event,
-			       new_nsid, new_ifindex, -1);
+			       new_nsid, new_ifindex, -1, flags);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in if_nlmsg_size() */
 		WARN_ON(err == -EMSGSIZE);
@@ -3439,7 +3611,7 @@
 	rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
 }
 
-/**
+/*
  * ndo_dflt_fdb_add - default netdevice operation to add an FDB entry
  */
 int ndo_dflt_fdb_add(struct ndmsg *ndm,
@@ -3509,7 +3681,8 @@
 	u16 vid;
 	int err;
 
-	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, NULL,
+				     extack);
 	if (err < 0)
 		return err;
 
@@ -3531,7 +3704,7 @@
 	}
 
 	if (dev->type != ARPHRD_ETHER) {
-		NL_SET_ERR_MSG(extack, "FDB delete only supported for Ethernet devices");
+		NL_SET_ERR_MSG(extack, "FDB add only supported for Ethernet devices");
 		return -EINVAL;
 	}
 
@@ -3550,7 +3723,7 @@
 		const struct net_device_ops *ops = br_dev->netdev_ops;
 
 		err = ops->ndo_fdb_add(ndm, tb, dev, addr, vid,
-				       nlh->nlmsg_flags);
+				       nlh->nlmsg_flags, extack);
 		if (err)
 			goto out;
 		else
@@ -3562,7 +3735,8 @@
 		if (dev->netdev_ops->ndo_fdb_add)
 			err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr,
 							   vid,
-							   nlh->nlmsg_flags);
+							   nlh->nlmsg_flags,
+							   extack);
 		else
 			err = ndo_dflt_fdb_add(ndm, tb, dev, addr, vid,
 					       nlh->nlmsg_flags);
@@ -3577,7 +3751,7 @@
 	return err;
 }
 
-/**
+/*
  * ndo_dflt_fdb_del - default netdevice operation to delete an FDB entry
  */
 int ndo_dflt_fdb_del(struct ndmsg *ndm,
@@ -3618,7 +3792,8 @@
 	if (!netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
-	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, NULL,
+				     extack);
 	if (err < 0)
 		return err;
 
@@ -3640,7 +3815,7 @@
 	}
 
 	if (dev->type != ARPHRD_ETHER) {
-		NL_SET_ERR_MSG(extack, "FDB add only supported for Ethernet devices");
+		NL_SET_ERR_MSG(extack, "FDB delete only supported for Ethernet devices");
 		return -EINVAL;
 	}
 
@@ -3716,8 +3891,11 @@
 
 /**
  * ndo_dflt_fdb_dump - default netdevice operation to dump an FDB table.
- * @nlh: netlink message header
+ * @skb: socket buffer to store message in
+ * @cb: netlink callback
  * @dev: netdevice
+ * @filter_dev: ignored
+ * @idx: the number of FDB table entries dumped is added to *@idx
  *
  * Default netdevice operation to dump the existing unicast address list.
  * Returns number of addresses from list put in skb.
@@ -3744,14 +3922,101 @@
 }
 EXPORT_SYMBOL(ndo_dflt_fdb_dump);
 
+static int valid_fdb_dump_strict(const struct nlmsghdr *nlh,
+				 int *br_idx, int *brport_idx,
+				 struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[NDA_MAX + 1];
+	struct ndmsg *ndm;
+	int err, i;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
+		NL_SET_ERR_MSG(extack, "Invalid header for fdb dump request");
+		return -EINVAL;
+	}
+
+	ndm = nlmsg_data(nlh);
+	if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_state ||
+	    ndm->ndm_flags || ndm->ndm_type) {
+		NL_SET_ERR_MSG(extack, "Invalid values in header for fdb dump request");
+		return -EINVAL;
+	}
+
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
+					    NDA_MAX, NULL, extack);
+	if (err < 0)
+		return err;
+
+	*brport_idx = ndm->ndm_ifindex;
+	for (i = 0; i <= NDA_MAX; ++i) {
+		if (!tb[i])
+			continue;
+
+		switch (i) {
+		case NDA_IFINDEX:
+			if (nla_len(tb[i]) != sizeof(u32)) {
+				NL_SET_ERR_MSG(extack, "Invalid IFINDEX attribute in fdb dump request");
+				return -EINVAL;
+			}
+			*brport_idx = nla_get_u32(tb[NDA_IFINDEX]);
+			break;
+		case NDA_MASTER:
+			if (nla_len(tb[i]) != sizeof(u32)) {
+				NL_SET_ERR_MSG(extack, "Invalid MASTER attribute in fdb dump request");
+				return -EINVAL;
+			}
+			*br_idx = nla_get_u32(tb[NDA_MASTER]);
+			break;
+		default:
+			NL_SET_ERR_MSG(extack, "Unsupported attribute in fdb dump request");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int valid_fdb_dump_legacy(const struct nlmsghdr *nlh,
+				 int *br_idx, int *brport_idx,
+				 struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[IFLA_MAX+1];
+	int err;
+
+	/* A hack to preserve kernel<->userspace interface.
+	 * Before Linux v4.12 this code accepted ndmsg since iproute2 v3.3.0.
+	 * However, ndmsg is shorter than ifinfomsg thus nlmsg_parse() bails.
+	 * So, check for ndmsg with an optional u32 attribute (not used here).
+	 * Fortunately these sizes don't conflict with the size of ifinfomsg
+	 * with an optional attribute.
+	 */
+	if (nlmsg_len(nlh) != sizeof(struct ndmsg) &&
+	    (nlmsg_len(nlh) != sizeof(struct ndmsg) +
+	     nla_attr_size(sizeof(u32)))) {
+		struct ifinfomsg *ifm;
+
+		err = nlmsg_parse_deprecated(nlh, sizeof(struct ifinfomsg),
+					     tb, IFLA_MAX, ifla_policy,
+					     extack);
+		if (err < 0) {
+			return -EINVAL;
+		} else if (err == 0) {
+			if (tb[IFLA_MASTER])
+				*br_idx = nla_get_u32(tb[IFLA_MASTER]);
+		}
+
+		ifm = nlmsg_data(nlh);
+		*brport_idx = ifm->ifi_index;
+	}
+	return 0;
+}
+
 static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net_device *dev;
-	struct nlattr *tb[IFLA_MAX+1];
 	struct net_device *br_dev = NULL;
 	const struct net_device_ops *ops = NULL;
 	const struct net_device_ops *cops = NULL;
-	struct ifinfomsg *ifm = nlmsg_data(cb->nlh);
 	struct net *net = sock_net(skb->sk);
 	struct hlist_head *head;
 	int brport_idx = 0;
@@ -3761,27 +4026,14 @@
 	int err = 0;
 	int fidx = 0;
 
-	/* A hack to preserve kernel<->userspace interface.
-	 * Before Linux v4.12 this code accepted ndmsg since iproute2 v3.3.0.
-	 * However, ndmsg is shorter than ifinfomsg thus nlmsg_parse() bails.
-	 * So, check for ndmsg with an optional u32 attribute (not used here).
-	 * Fortunately these sizes don't conflict with the size of ifinfomsg
-	 * with an optional attribute.
-	 */
-	if (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) &&
-	    (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) +
-	     nla_attr_size(sizeof(u32)))) {
-		err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
-				  IFLA_MAX, ifla_policy, NULL);
-		if (err < 0) {
-			return -EINVAL;
-		} else if (err == 0) {
-			if (tb[IFLA_MASTER])
-				br_idx = nla_get_u32(tb[IFLA_MASTER]);
-		}
-
-		brport_idx = ifm->ifi_index;
-	}
+	if (cb->strict_check)
+		err = valid_fdb_dump_strict(cb->nlh, &br_idx, &brport_idx,
+					    cb->extack);
+	else
+		err = valid_fdb_dump_legacy(cb->nlh, &br_idx, &brport_idx,
+					    cb->extack);
+	if (err < 0)
+		return err;
 
 	if (br_idx) {
 		br_dev = __dev_get_by_index(net, br_idx);
@@ -3859,6 +4111,165 @@
 	return skb->len;
 }
 
+static int valid_fdb_get_strict(const struct nlmsghdr *nlh,
+				struct nlattr **tb, u8 *ndm_flags,
+				int *br_idx, int *brport_idx, u8 **addr,
+				u16 *vid, struct netlink_ext_ack *extack)
+{
+	struct ndmsg *ndm;
+	int err, i;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
+		NL_SET_ERR_MSG(extack, "Invalid header for fdb get request");
+		return -EINVAL;
+	}
+
+	ndm = nlmsg_data(nlh);
+	if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_state ||
+	    ndm->ndm_type) {
+		NL_SET_ERR_MSG(extack, "Invalid values in header for fdb get request");
+		return -EINVAL;
+	}
+
+	if (ndm->ndm_flags & ~(NTF_MASTER | NTF_SELF)) {
+		NL_SET_ERR_MSG(extack, "Invalid flags in header for fdb get request");
+		return -EINVAL;
+	}
+
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
+					    NDA_MAX, nda_policy, extack);
+	if (err < 0)
+		return err;
+
+	*ndm_flags = ndm->ndm_flags;
+	*brport_idx = ndm->ndm_ifindex;
+	for (i = 0; i <= NDA_MAX; ++i) {
+		if (!tb[i])
+			continue;
+
+		switch (i) {
+		case NDA_MASTER:
+			*br_idx = nla_get_u32(tb[i]);
+			break;
+		case NDA_LLADDR:
+			if (nla_len(tb[i]) != ETH_ALEN) {
+				NL_SET_ERR_MSG(extack, "Invalid address in fdb get request");
+				return -EINVAL;
+			}
+			*addr = nla_data(tb[i]);
+			break;
+		case NDA_VLAN:
+			err = fdb_vid_parse(tb[i], vid, extack);
+			if (err)
+				return err;
+			break;
+		case NDA_VNI:
+			break;
+		default:
+			NL_SET_ERR_MSG(extack, "Unsupported attribute in fdb get request");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int rtnl_fdb_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+			struct netlink_ext_ack *extack)
+{
+	struct net_device *dev = NULL, *br_dev = NULL;
+	const struct net_device_ops *ops = NULL;
+	struct net *net = sock_net(in_skb->sk);
+	struct nlattr *tb[NDA_MAX + 1];
+	struct sk_buff *skb;
+	int brport_idx = 0;
+	u8 ndm_flags = 0;
+	int br_idx = 0;
+	u8 *addr = NULL;
+	u16 vid = 0;
+	int err;
+
+	err = valid_fdb_get_strict(nlh, tb, &ndm_flags, &br_idx,
+				   &brport_idx, &addr, &vid, extack);
+	if (err < 0)
+		return err;
+
+	if (!addr) {
+		NL_SET_ERR_MSG(extack, "Missing lookup address for fdb get request");
+		return -EINVAL;
+	}
+
+	if (brport_idx) {
+		dev = __dev_get_by_index(net, brport_idx);
+		if (!dev) {
+			NL_SET_ERR_MSG(extack, "Unknown device ifindex");
+			return -ENODEV;
+		}
+	}
+
+	if (br_idx) {
+		if (dev) {
+			NL_SET_ERR_MSG(extack, "Master and device are mutually exclusive");
+			return -EINVAL;
+		}
+
+		br_dev = __dev_get_by_index(net, br_idx);
+		if (!br_dev) {
+			NL_SET_ERR_MSG(extack, "Invalid master ifindex");
+			return -EINVAL;
+		}
+		ops = br_dev->netdev_ops;
+	}
+
+	if (dev) {
+		if (!ndm_flags || (ndm_flags & NTF_MASTER)) {
+			if (!(dev->priv_flags & IFF_BRIDGE_PORT)) {
+				NL_SET_ERR_MSG(extack, "Device is not a bridge port");
+				return -EINVAL;
+			}
+			br_dev = netdev_master_upper_dev_get(dev);
+			if (!br_dev) {
+				NL_SET_ERR_MSG(extack, "Master of device not found");
+				return -EINVAL;
+			}
+			ops = br_dev->netdev_ops;
+		} else {
+			if (!(ndm_flags & NTF_SELF)) {
+				NL_SET_ERR_MSG(extack, "Missing NTF_SELF");
+				return -EINVAL;
+			}
+			ops = dev->netdev_ops;
+		}
+	}
+
+	if (!br_dev && !dev) {
+		NL_SET_ERR_MSG(extack, "No device specified");
+		return -ENODEV;
+	}
+
+	if (!ops || !ops->ndo_fdb_get) {
+		NL_SET_ERR_MSG(extack, "Fdb get operation not supported by device");
+		return -EOPNOTSUPP;
+	}
+
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	if (br_dev)
+		dev = br_dev;
+	err = ops->ndo_fdb_get(skb, tb, dev, addr, vid,
+			       NETLINK_CB(in_skb).portid,
+			       nlh->nlmsg_seq, extack);
+	if (err)
+		goto out;
+
+	return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+out:
+	kfree_skb(skb);
+	return err;
+}
+
 static int brport_nla_put_flag(struct sk_buff *skb, u32 flags, u32 mask,
 			       unsigned int attrnum, unsigned int flag)
 {
@@ -3907,7 +4318,7 @@
 	     nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))))
 		goto nla_put_failure;
 
-	br_afspec = nla_nest_start(skb, IFLA_AF_SPEC);
+	br_afspec = nla_nest_start_noflag(skb, IFLA_AF_SPEC);
 	if (!br_afspec)
 		goto nla_put_failure;
 
@@ -3931,7 +4342,7 @@
 	}
 	nla_nest_end(skb, br_afspec);
 
-	protinfo = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED);
+	protinfo = nla_nest_start(skb, IFLA_PROTINFO);
 	if (!protinfo)
 		goto nla_put_failure;
 
@@ -3966,28 +4377,75 @@
 }
 EXPORT_SYMBOL_GPL(ndo_dflt_bridge_getlink);
 
+static int valid_bridge_getlink_req(const struct nlmsghdr *nlh,
+				    bool strict_check, u32 *filter_mask,
+				    struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[IFLA_MAX+1];
+	int err, i;
+
+	if (strict_check) {
+		struct ifinfomsg *ifm;
+
+		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+			NL_SET_ERR_MSG(extack, "Invalid header for bridge link dump");
+			return -EINVAL;
+		}
+
+		ifm = nlmsg_data(nlh);
+		if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags ||
+		    ifm->ifi_change || ifm->ifi_index) {
+			NL_SET_ERR_MSG(extack, "Invalid values in header for bridge link dump request");
+			return -EINVAL;
+		}
+
+		err = nlmsg_parse_deprecated_strict(nlh,
+						    sizeof(struct ifinfomsg),
+						    tb, IFLA_MAX, ifla_policy,
+						    extack);
+	} else {
+		err = nlmsg_parse_deprecated(nlh, sizeof(struct ifinfomsg),
+					     tb, IFLA_MAX, ifla_policy,
+					     extack);
+	}
+	if (err < 0)
+		return err;
+
+	/* new attributes should only be added with strict checking */
+	for (i = 0; i <= IFLA_MAX; ++i) {
+		if (!tb[i])
+			continue;
+
+		switch (i) {
+		case IFLA_EXT_MASK:
+			*filter_mask = nla_get_u32(tb[i]);
+			break;
+		default:
+			if (strict_check) {
+				NL_SET_ERR_MSG(extack, "Unsupported attribute in bridge link dump request");
+				return -EINVAL;
+			}
+		}
+	}
+
+	return 0;
+}
+
 static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	const struct nlmsghdr *nlh = cb->nlh;
 	struct net *net = sock_net(skb->sk);
 	struct net_device *dev;
 	int idx = 0;
 	u32 portid = NETLINK_CB(cb->skb).portid;
-	u32 seq = cb->nlh->nlmsg_seq;
+	u32 seq = nlh->nlmsg_seq;
 	u32 filter_mask = 0;
 	int err;
 
-	if (nlmsg_len(cb->nlh) > sizeof(struct ifinfomsg)) {
-		struct nlattr *extfilt;
-
-		extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct ifinfomsg),
-					  IFLA_EXT_MASK);
-		if (extfilt) {
-			if (nla_len(extfilt) < sizeof(filter_mask))
-				return -EINVAL;
-
-			filter_mask = nla_get_u32(extfilt);
-		}
-	}
+	err = valid_bridge_getlink_req(nlh, cb->strict_check, &filter_mask,
+				       cb->extack);
+	if (err < 0 && cb->strict_check)
+		return err;
 
 	rcu_read_lock();
 	for_each_netdev_rcu(net, dev) {
@@ -4126,7 +4584,8 @@
 			goto out;
 		}
 
-		err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh, flags);
+		err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh, flags,
+							     extack);
 		if (err)
 			goto out;
 
@@ -4138,7 +4597,8 @@
 			err = -EOPNOTSUPP;
 		else
 			err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh,
-								  flags);
+								  flags,
+								  extack);
 		if (!err) {
 			flags &= ~BRIDGE_FLAGS_SELF;
 
@@ -4364,8 +4824,8 @@
 
 		if (ops && ops->fill_linkxstats) {
 			*idxattr = IFLA_STATS_LINK_XSTATS;
-			attr = nla_nest_start(skb,
-					      IFLA_STATS_LINK_XSTATS);
+			attr = nla_nest_start_noflag(skb,
+						     IFLA_STATS_LINK_XSTATS);
 			if (!attr)
 				goto nla_put_failure;
 
@@ -4387,8 +4847,8 @@
 			ops = master->rtnl_link_ops;
 		if (ops && ops->fill_linkxstats) {
 			*idxattr = IFLA_STATS_LINK_XSTATS_SLAVE;
-			attr = nla_nest_start(skb,
-					      IFLA_STATS_LINK_XSTATS_SLAVE);
+			attr = nla_nest_start_noflag(skb,
+						     IFLA_STATS_LINK_XSTATS_SLAVE);
 			if (!attr)
 				goto nla_put_failure;
 
@@ -4403,7 +4863,8 @@
 	if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS,
 			     *idxattr)) {
 		*idxattr = IFLA_STATS_LINK_OFFLOAD_XSTATS;
-		attr = nla_nest_start(skb, IFLA_STATS_LINK_OFFLOAD_XSTATS);
+		attr = nla_nest_start_noflag(skb,
+					     IFLA_STATS_LINK_OFFLOAD_XSTATS);
 		if (!attr)
 			goto nla_put_failure;
 
@@ -4422,7 +4883,7 @@
 		struct rtnl_af_ops *af_ops;
 
 		*idxattr = IFLA_STATS_AF_SPEC;
-		attr = nla_nest_start(skb, IFLA_STATS_AF_SPEC);
+		attr = nla_nest_start_noflag(skb, IFLA_STATS_AF_SPEC);
 		if (!attr)
 			goto nla_put_failure;
 
@@ -4432,7 +4893,8 @@
 				struct nlattr *af;
 				int err;
 
-				af = nla_nest_start(skb, af_ops->family);
+				af = nla_nest_start_noflag(skb,
+							   af_ops->family);
 				if (!af) {
 					rcu_read_unlock();
 					goto nla_put_failure;
@@ -4534,6 +4996,40 @@
 	return size;
 }
 
+static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check,
+				bool is_dump, struct netlink_ext_ack *extack)
+{
+	struct if_stats_msg *ifsm;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifsm))) {
+		NL_SET_ERR_MSG(extack, "Invalid header for stats dump");
+		return -EINVAL;
+	}
+
+	if (!strict_check)
+		return 0;
+
+	ifsm = nlmsg_data(nlh);
+
+	/* only requests using strict checks can pass data to influence
+	 * the dump. The legacy exception is filter_mask.
+	 */
+	if (ifsm->pad1 || ifsm->pad2 || (is_dump && ifsm->ifindex)) {
+		NL_SET_ERR_MSG(extack, "Invalid values in header for stats dump request");
+		return -EINVAL;
+	}
+	if (nlmsg_attrlen(nlh, sizeof(*ifsm))) {
+		NL_SET_ERR_MSG(extack, "Invalid attributes after stats header");
+		return -EINVAL;
+	}
+	if (ifsm->filter_mask >= IFLA_STATS_FILTER_BIT(IFLA_STATS_MAX + 1)) {
+		NL_SET_ERR_MSG(extack, "Invalid stats requested through filter mask");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh,
 			  struct netlink_ext_ack *extack)
 {
@@ -4545,8 +5041,10 @@
 	u32 filter_mask;
 	int err;
 
-	if (nlmsg_len(nlh) < sizeof(*ifsm))
-		return -EINVAL;
+	err = rtnl_valid_stats_req(nlh, netlink_strict_get_check(skb),
+				   false, extack);
+	if (err)
+		return err;
 
 	ifsm = nlmsg_data(nlh);
 	if (ifsm->ifindex > 0)
@@ -4581,6 +5079,7 @@
 
 static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct netlink_ext_ack *extack = cb->extack;
 	int h, s_h, err, s_idx, s_idxattr, s_prividx;
 	struct net *net = sock_net(skb->sk);
 	unsigned int flags = NLM_F_MULTI;
@@ -4597,13 +5096,16 @@
 
 	cb->seq = net->dev_base_seq;
 
-	if (nlmsg_len(cb->nlh) < sizeof(*ifsm))
-		return -EINVAL;
+	err = rtnl_valid_stats_req(cb->nlh, cb->strict_check, true, extack);
+	if (err)
+		return err;
 
 	ifsm = nlmsg_data(cb->nlh);
 	filter_mask = ifsm->filter_mask;
-	if (!filter_mask)
+	if (!filter_mask) {
+		NL_SET_ERR_MSG(extack, "Filter mask must be set for stats dump");
 		return -EINVAL;
+	}
 
 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
 		idx = 0;
@@ -4853,7 +5355,7 @@
 
 	rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0);
 	rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, 0);
-	rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, 0);
+	rtnl_register(PF_BRIDGE, RTM_GETNEIGH, rtnl_fdb_get, rtnl_fdb_dump, 0);
 
 	rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0);
 	rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, 0);

diff --git a/net/core/scm.c b/net/core/scm.c
index b1ff8a4..31a3823 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* scm.c - Socket level control messages processing.
  *
  * Author:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  *              Alignment and value checking mods by Craig Metz
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -29,6 +25,7 @@
 #include <linux/pid.h>
 #include <linux/nsproxy.h>
 #include <linux/slab.h>
+#include <linux/errqueue.h>
 
 #include <linux/uaccess.h>
 
@@ -252,6 +249,32 @@
 }
 EXPORT_SYMBOL(put_cmsg);
 
+void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss_internal)
+{
+	struct scm_timestamping64 tss;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tss.ts); i++) {
+		tss.ts[i].tv_sec = tss_internal->ts[i].tv_sec;
+		tss.ts[i].tv_nsec = tss_internal->ts[i].tv_nsec;
+	}
+
+	put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_NEW, sizeof(tss), &tss);
+}
+EXPORT_SYMBOL(put_cmsg_scm_timestamping64);
+
+void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_internal *tss_internal)
+{
+	struct scm_timestamping tss;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tss.ts); i++)
+		tss.ts[i] = timespec64_to_timespec(tss_internal->ts[i]);
+
+	put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_OLD, sizeof(tss), &tss);
+}
+EXPORT_SYMBOL(put_cmsg_scm_timestamping);
+
 void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 {
 	struct cmsghdr __user *cm

diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index af6ad46..7b6b1d2 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
  */

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index abbbd7f..867e61d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Routines having to do with the 'struct sk_buff' memory handlers.
  *
@@ -25,11 +26,6 @@
  *	disabled, or you better be *real* sure that the operation is atomic
  *	with respect to whatever list is being frobbed (e.g. via lock_sock()
  *	or via disabling bottom half handlers, etc).
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 /*
@@ -63,6 +59,7 @@
 #include <linux/errqueue.h>
 #include <linux/prefetch.h>
 #include <linux/if_vlan.h>
+#include <linux/mpls.h>
 
 #include <net/protocol.h>
 #include <net/dst.h>
@@ -70,15 +67,22 @@
 #include <net/checksum.h>
 #include <net/ip6_checksum.h>
 #include <net/xfrm.h>
+#include <net/mpls.h>
 
 #include <linux/uaccess.h>
 #include <trace/events/skb.h>
 #include <linux/highmem.h>
 #include <linux/capability.h>
 #include <linux/user_namespace.h>
+#include <linux/indirect_call_wrapper.h>
+
+#include "datagram.h"
 
 struct kmem_cache *skbuff_head_cache __ro_after_init;
 static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
+#ifdef CONFIG_SKB_EXTENSIONS
+static struct kmem_cache *skbuff_ext_cache __ro_after_init;
+#endif
 int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
 EXPORT_SYMBOL(sysctl_max_skb_frags);
 
@@ -253,6 +257,33 @@
 }
 EXPORT_SYMBOL(__alloc_skb);
 
+/* Caller must provide SKB that is memset cleared */
+static struct sk_buff *__build_skb_around(struct sk_buff *skb,
+					  void *data, unsigned int frag_size)
+{
+	struct skb_shared_info *shinfo;
+	unsigned int size = frag_size ? : ksize(data);
+
+	size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+	/* Assumes caller memset cleared SKB */
+	skb->truesize = SKB_TRUESIZE(size);
+	refcount_set(&skb->users, 1);
+	skb->head = data;
+	skb->data = data;
+	skb_reset_tail_pointer(skb);
+	skb->end = skb->tail + size;
+	skb->mac_header = (typeof(skb->mac_header))~0U;
+	skb->transport_header = (typeof(skb->transport_header))~0U;
+
+	/* make sure we initialize shinfo sequentially */
+	shinfo = skb_shinfo(skb);
+	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
+	atomic_set(&shinfo->dataref, 1);
+
+	return skb;
+}
+
 /**
  * __build_skb - build a network buffer
  * @data: data buffer provided by caller
@@ -274,32 +305,15 @@
  */
 struct sk_buff *__build_skb(void *data, unsigned int frag_size)
 {
-	struct skb_shared_info *shinfo;
 	struct sk_buff *skb;
-	unsigned int size = frag_size ? : ksize(data);
 
 	skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
-	if (!skb)
+	if (unlikely(!skb))
 		return NULL;
 
-	size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-
 	memset(skb, 0, offsetof(struct sk_buff, tail));
-	skb->truesize = SKB_TRUESIZE(size);
-	refcount_set(&skb->users, 1);
-	skb->head = data;
-	skb->data = data;
-	skb_reset_tail_pointer(skb);
-	skb->end = skb->tail + size;
-	skb->mac_header = (typeof(skb->mac_header))~0U;
-	skb->transport_header = (typeof(skb->transport_header))~0U;
 
-	/* make sure we initialize shinfo sequentially */
-	shinfo = skb_shinfo(skb);
-	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
-	atomic_set(&shinfo->dataref, 1);
-
-	return skb;
+	return __build_skb_around(skb, data, frag_size);
 }
 
 /* build_skb() is wrapper over __build_skb(), that specifically
@@ -320,6 +334,29 @@
 }
 EXPORT_SYMBOL(build_skb);
 
+/**
+ * build_skb_around - build a network buffer around provided skb
+ * @skb: sk_buff provide by caller, must be memset cleared
+ * @data: data buffer provided by caller
+ * @frag_size: size of data, or 0 if head was kmalloced
+ */
+struct sk_buff *build_skb_around(struct sk_buff *skb,
+				 void *data, unsigned int frag_size)
+{
+	if (unlikely(!skb))
+		return NULL;
+
+	skb = __build_skb_around(skb, data, frag_size);
+
+	if (skb && frag_size) {
+		skb->head_frag = 1;
+		if (page_is_pfmemalloc(virt_to_head_page(data)))
+			skb->pfmemalloc = 1;
+	}
+	return skb;
+}
+EXPORT_SYMBOL(build_skb_around);
+
 #define NAPI_SKB_CACHE_SIZE	64
 
 struct napi_alloc_cache {
@@ -331,19 +368,21 @@
 static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
 static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
 
-static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
+static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
 {
-	struct page_frag_cache *nc;
-	unsigned long flags;
-	void *data;
+	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
 
-	local_irq_save(flags);
-	nc = this_cpu_ptr(&netdev_alloc_cache);
-	data = page_frag_alloc(nc, fragsz, gfp_mask);
-	local_irq_restore(flags);
-	return data;
+	return page_frag_alloc(&nc->page, fragsz, gfp_mask);
 }
 
+void *napi_alloc_frag(unsigned int fragsz)
+{
+	fragsz = SKB_DATA_ALIGN(fragsz);
+
+	return __napi_alloc_frag(fragsz, GFP_ATOMIC);
+}
+EXPORT_SYMBOL(napi_alloc_frag);
+
 /**
  * netdev_alloc_frag - allocate a page fragment
  * @fragsz: fragment size
@@ -353,23 +392,22 @@
  */
 void *netdev_alloc_frag(unsigned int fragsz)
 {
-	return __netdev_alloc_frag(fragsz, GFP_ATOMIC);
+	struct page_frag_cache *nc;
+	void *data;
+
+	fragsz = SKB_DATA_ALIGN(fragsz);
+	if (in_irq() || irqs_disabled()) {
+		nc = this_cpu_ptr(&netdev_alloc_cache);
+		data = page_frag_alloc(nc, fragsz, GFP_ATOMIC);
+	} else {
+		local_bh_disable();
+		data = __napi_alloc_frag(fragsz, GFP_ATOMIC);
+		local_bh_enable();
+	}
+	return data;
 }
 EXPORT_SYMBOL(netdev_alloc_frag);
 
-static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
-{
-	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
-
-	return page_frag_alloc(&nc->page, fragsz, gfp_mask);
-}
-
-void *napi_alloc_frag(unsigned int fragsz)
-{
-	return __napi_alloc_frag(fragsz, GFP_ATOMIC);
-}
-EXPORT_SYMBOL(napi_alloc_frag);
-
 /**
  *	__netdev_alloc_skb - allocate an skbuff for rx on a specific device
  *	@dev: network device to receive on
@@ -387,7 +425,6 @@
 				   gfp_t gfp_mask)
 {
 	struct page_frag_cache *nc;
-	unsigned long flags;
 	struct sk_buff *skb;
 	bool pfmemalloc;
 	void *data;
@@ -408,13 +445,17 @@
 	if (sk_memalloc_socks())
 		gfp_mask |= __GFP_MEMALLOC;
 
-	local_irq_save(flags);
-
-	nc = this_cpu_ptr(&netdev_alloc_cache);
-	data = page_frag_alloc(nc, len, gfp_mask);
-	pfmemalloc = nc->pfmemalloc;
-
-	local_irq_restore(flags);
+	if (in_irq() || irqs_disabled()) {
+		nc = this_cpu_ptr(&netdev_alloc_cache);
+		data = page_frag_alloc(nc, len, gfp_mask);
+		pfmemalloc = nc->pfmemalloc;
+	} else {
+		local_bh_disable();
+		nc = this_cpu_ptr(&napi_alloc_cache.page);
+		data = page_frag_alloc(nc, len, gfp_mask);
+		pfmemalloc = nc->pfmemalloc;
+		local_bh_enable();
+	}
 
 	if (unlikely(!data))
 		return NULL;
@@ -606,7 +647,6 @@
 void skb_release_head_state(struct sk_buff *skb)
 {
 	skb_dst_drop(skb);
-	secpath_reset(skb);
 	if (skb->destructor) {
 		WARN_ON(in_irq());
 		skb->destructor(skb);
@@ -614,9 +654,7 @@
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 	nf_conntrack_put(skb_nfct(skb));
 #endif
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-	nf_bridge_put(skb->nf_bridge);
-#endif
+	skb_ext_put(skb);
 }
 
 /* Free everything but the sk_buff shell. */
@@ -671,6 +709,105 @@
 }
 EXPORT_SYMBOL(kfree_skb_list);
 
+/* Dump skb information and contents.
+ *
+ * Must only be called from net_ratelimit()-ed paths.
+ *
+ * Dumps up to can_dump_full whole packets if full_pkt, headers otherwise.
+ */
+void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt)
+{
+	static atomic_t can_dump_full = ATOMIC_INIT(5);
+	struct skb_shared_info *sh = skb_shinfo(skb);
+	struct net_device *dev = skb->dev;
+	struct sock *sk = skb->sk;
+	struct sk_buff *list_skb;
+	bool has_mac, has_trans;
+	int headroom, tailroom;
+	int i, len, seg_len;
+
+	if (full_pkt)
+		full_pkt = atomic_dec_if_positive(&can_dump_full) >= 0;
+
+	if (full_pkt)
+		len = skb->len;
+	else
+		len = min_t(int, skb->len, MAX_HEADER + 128);
+
+	headroom = skb_headroom(skb);
+	tailroom = skb_tailroom(skb);
+
+	has_mac = skb_mac_header_was_set(skb);
+	has_trans = skb_transport_header_was_set(skb);
+
+	printk("%sskb len=%u headroom=%u headlen=%u tailroom=%u\n"
+	       "mac=(%d,%d) net=(%d,%d) trans=%d\n"
+	       "shinfo(txflags=%u nr_frags=%u gso(size=%hu type=%u segs=%hu))\n"
+	       "csum(0x%x ip_summed=%u complete_sw=%u valid=%u level=%u)\n"
+	       "hash(0x%x sw=%u l4=%u) proto=0x%04x pkttype=%u iif=%d\n",
+	       level, skb->len, headroom, skb_headlen(skb), tailroom,
+	       has_mac ? skb->mac_header : -1,
+	       has_mac ? skb_mac_header_len(skb) : -1,
+	       skb->network_header,
+	       has_trans ? skb_network_header_len(skb) : -1,
+	       has_trans ? skb->transport_header : -1,
+	       sh->tx_flags, sh->nr_frags,
+	       sh->gso_size, sh->gso_type, sh->gso_segs,
+	       skb->csum, skb->ip_summed, skb->csum_complete_sw,
+	       skb->csum_valid, skb->csum_level,
+	       skb->hash, skb->sw_hash, skb->l4_hash,
+	       ntohs(skb->protocol), skb->pkt_type, skb->skb_iif);
+
+	if (dev)
+		printk("%sdev name=%s feat=0x%pNF\n",
+		       level, dev->name, &dev->features);
+	if (sk)
+		printk("%ssk family=%hu type=%u proto=%u\n",
+		       level, sk->sk_family, sk->sk_type, sk->sk_protocol);
+
+	if (full_pkt && headroom)
+		print_hex_dump(level, "skb headroom: ", DUMP_PREFIX_OFFSET,
+			       16, 1, skb->head, headroom, false);
+
+	seg_len = min_t(int, skb_headlen(skb), len);
+	if (seg_len)
+		print_hex_dump(level, "skb linear:   ", DUMP_PREFIX_OFFSET,
+			       16, 1, skb->data, seg_len, false);
+	len -= seg_len;
+
+	if (full_pkt && tailroom)
+		print_hex_dump(level, "skb tailroom: ", DUMP_PREFIX_OFFSET,
+			       16, 1, skb_tail_pointer(skb), tailroom, false);
+
+	for (i = 0; len && i < skb_shinfo(skb)->nr_frags; i++) {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+		u32 p_off, p_len, copied;
+		struct page *p;
+		u8 *vaddr;
+
+		skb_frag_foreach_page(frag, skb_frag_off(frag),
+				      skb_frag_size(frag), p, p_off, p_len,
+				      copied) {
+			seg_len = min_t(int, p_len, len);
+			vaddr = kmap_atomic(p);
+			print_hex_dump(level, "skb frag:     ",
+				       DUMP_PREFIX_OFFSET,
+				       16, 1, vaddr + p_off, seg_len, false);
+			kunmap_atomic(vaddr);
+			len -= seg_len;
+			if (!len)
+				break;
+		}
+	}
+
+	if (full_pkt && skb_has_frag_list(skb)) {
+		printk("skb fraglist:\n");
+		skb_walk_frags(skb, list_skb)
+			skb_dump(level, list_skb, true);
+	}
+}
+EXPORT_SYMBOL(skb_dump);
+
 /**
  *	skb_tx_error - report an sk_buff xmit error
  *	@skb: buffer that triggered an error
@@ -796,9 +933,7 @@
 	new->dev		= old->dev;
 	memcpy(new->cb, old->cb, sizeof(old->cb));
 	skb_dst_copy(new, old);
-#ifdef CONFIG_XFRM
-	new->sp			= secpath_get(old->sp);
-#endif
+	__skb_ext_copy(new, old);
 	__nf_copy(new, old, false);
 
 	/* Note : this field could be in headers_start/headers_end section
@@ -876,6 +1011,31 @@
 }
 
 /**
+ * alloc_skb_for_msg() - allocate sk_buff to wrap frag list forming a msg
+ * @first: first sk_buff of the msg
+ */
+struct sk_buff *alloc_skb_for_msg(struct sk_buff *first)
+{
+	struct sk_buff *n;
+
+	n = alloc_skb(0, GFP_ATOMIC);
+	if (!n)
+		return NULL;
+
+	n->len = first->len;
+	n->data_len = first->len;
+	n->truesize = first->truesize;
+
+	skb_shinfo(n)->frag_list = first;
+
+	__copy_skb_header(n, first);
+	n->destructor = NULL;
+
+	return n;
+}
+EXPORT_SYMBOL_GPL(alloc_skb_for_msg);
+
+/**
  *	skb_morph	-	morph one skb into another
  *	@dst: the skb to receive the contents
  *	@src: the skb to supply the contents
@@ -999,7 +1159,11 @@
 			uarg->len++;
 			uarg->bytelen = bytelen;
 			atomic_set(&sk->sk_zckey, ++next);
-			sock_zerocopy_get(uarg);
+
+			/* no extra ref when appending to datagram (MSG_MORE) */
+			if (sk->sk_type == SOCK_STREAM)
+				sock_zerocopy_get(uarg);
+
 			return uarg;
 		}
 	}
@@ -1089,7 +1253,7 @@
 }
 EXPORT_SYMBOL_GPL(sock_zerocopy_put);
 
-void sock_zerocopy_put_abort(struct ubuf_info *uarg)
+void sock_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
 {
 	if (uarg) {
 		struct sock *sk = skb_from_uarg(uarg)->sk;
@@ -1097,13 +1261,17 @@
 		atomic_dec(&sk->sk_zckey);
 		uarg->len--;
 
-		sock_zerocopy_put(uarg);
+		if (have_uref)
+			sock_zerocopy_put(uarg);
 	}
 }
 EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort);
 
-extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
-				   struct iov_iter *from, size_t length);
+int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len)
+{
+	return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len);
+}
+EXPORT_SYMBOL_GPL(skb_zerocopy_iter_dgram);
 
 int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
 			     struct msghdr *msg, int len,
@@ -1131,7 +1299,7 @@
 		return err;
 	}
 
-	skb_zcopy_set(skb, uarg);
+	skb_zcopy_set(skb, uarg, NULL);
 	return skb->len - orig_len;
 }
 EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
@@ -1151,7 +1319,7 @@
 			if (skb_copy_ubufs(nskb, GFP_ATOMIC))
 				return -EIO;
 		}
-		skb_zcopy_set(nskb, skb_uarg(orig));
+		skb_zcopy_set(nskb, skb_uarg(orig), NULL);
 	}
 	return 0;
 }
@@ -1207,7 +1375,7 @@
 		struct page *p;
 		u8 *vaddr;
 
-		skb_frag_foreach_page(f, f->page_offset, skb_frag_size(f),
+		skb_frag_foreach_page(f, skb_frag_off(f), skb_frag_size(f),
 				      p, p_off, p_len, copied) {
 			u32 copy, done = 0;
 			vaddr = kmap_atomic(p);
@@ -1925,8 +2093,6 @@
 		struct sk_buff *insp = NULL;
 
 		do {
-			BUG_ON(!list);
-
 			if (list->len <= eat) {
 				/* Eaten as whole. */
 				eat -= list->len;
@@ -1978,10 +2144,12 @@
 			skb_frag_unref(skb, i);
 			eat -= size;
 		} else {
-			skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[k];
+
+			*frag = skb_shinfo(skb)->frags[i];
 			if (eat) {
-				skb_shinfo(skb)->frags[k].page_offset += eat;
-				skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
+				skb_frag_off_add(frag, eat);
+				skb_frag_size_sub(frag, eat);
 				if (!i)
 					goto end;
 				eat = 0;
@@ -2053,7 +2221,7 @@
 				copy = len;
 
 			skb_frag_foreach_page(f,
-					      f->page_offset + offset - start,
+					      skb_frag_off(f) + offset - start,
 					      copy, p, p_off, p_len, copied) {
 				vaddr = kmap_atomic(p);
 				memcpy(to + copied, vaddr + p_off, p_len);
@@ -2229,7 +2397,7 @@
 		const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
 
 		if (__splice_segment(skb_frag_page(f),
-				     f->page_offset, skb_frag_size(f),
+				     skb_frag_off(f), skb_frag_size(f),
 				     offset, len, spd, false, sk, pipe))
 			return true;
 	}
@@ -2298,6 +2466,7 @@
 		kv.iov_base = skb->data + offset;
 		kv.iov_len = slen;
 		memset(&msg, 0, sizeof(msg));
+		msg.msg_flags = MSG_DONTWAIT;
 
 		ret = kernel_sendmsg_locked(sk, &msg, &kv, 1, slen);
 		if (ret <= 0)
@@ -2318,20 +2487,20 @@
 	for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
 		skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];
 
-		if (offset < frag->size)
+		if (offset < skb_frag_size(frag))
 			break;
 
-		offset -= frag->size;
+		offset -= skb_frag_size(frag);
 	}
 
 	for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
 		skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];
 
-		slen = min_t(size_t, len, frag->size - offset);
+		slen = min_t(size_t, len, skb_frag_size(frag) - offset);
 
 		while (slen) {
-			ret = kernel_sendpage_locked(sk, frag->page.p,
-						     frag->page_offset + offset,
+			ret = kernel_sendpage_locked(sk, skb_frag_page(frag),
+						     skb_frag_off(frag) + offset,
 						     slen, MSG_DONTWAIT);
 			if (ret <= 0)
 				goto error;
@@ -2366,19 +2535,6 @@
 }
 EXPORT_SYMBOL_GPL(skb_send_sock_locked);
 
-/* Send skb data on a socket. */
-int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len)
-{
-	int ret = 0;
-
-	lock_sock(sk);
-	ret = skb_send_sock_locked(sk, skb, offset, len);
-	release_sock(sk);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(skb_send_sock);
-
 /**
  *	skb_store_bits - store bits from kernel buffer to skb
  *	@skb: destination buffer
@@ -2426,7 +2582,7 @@
 				copy = len;
 
 			skb_frag_foreach_page(frag,
-					      frag->page_offset + offset - start,
+					      skb_frag_off(frag) + offset - start,
 					      copy, p, p_off, p_len, copied) {
 				vaddr = kmap_atomic(p);
 				memcpy(vaddr + p_off, from + copied, p_len);
@@ -2481,7 +2637,8 @@
 	if (copy > 0) {
 		if (copy > len)
 			copy = len;
-		csum = ops->update(skb->data + offset, copy, csum);
+		csum = INDIRECT_CALL_1(ops->update, csum_partial_ext,
+				       skb->data + offset, copy, csum);
 		if ((len -= copy) == 0)
 			return csum;
 		offset += copy;
@@ -2505,12 +2662,16 @@
 				copy = len;
 
 			skb_frag_foreach_page(frag,
-					      frag->page_offset + offset - start,
+					      skb_frag_off(frag) + offset - start,
 					      copy, p, p_off, p_len, copied) {
 				vaddr = kmap_atomic(p);
-				csum2 = ops->update(vaddr + p_off, p_len, 0);
+				csum2 = INDIRECT_CALL_1(ops->update,
+							csum_partial_ext,
+							vaddr + p_off, p_len, 0);
 				kunmap_atomic(vaddr);
-				csum = ops->combine(csum, csum2, pos, p_len);
+				csum = INDIRECT_CALL_1(ops->combine,
+						       csum_block_add_ext, csum,
+						       csum2, pos, p_len);
 				pos += p_len;
 			}
 
@@ -2533,7 +2694,8 @@
 				copy = len;
 			csum2 = __skb_checksum(frag_iter, offset - start,
 					       copy, 0, ops);
-			csum = ops->combine(csum, csum2, pos, copy);
+			csum = INDIRECT_CALL_1(ops->combine, csum_block_add_ext,
+					       csum, csum2, pos, copy);
 			if ((len -= copy) == 0)
 				return csum;
 			offset += copy;
@@ -2599,7 +2761,7 @@
 				copy = len;
 
 			skb_frag_foreach_page(frag,
-					      frag->page_offset + offset - start,
+					      skb_frag_off(frag) + offset - start,
 					      copy, p, p_off, p_len, copied) {
 				vaddr = kmap_atomic(p);
 				csum2 = csum_partial_copy_nocheck(vaddr + p_off,
@@ -2645,6 +2807,65 @@
 }
 EXPORT_SYMBOL(skb_copy_and_csum_bits);
 
+__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
+{
+	__sum16 sum;
+
+	sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
+	/* See comments in __skb_checksum_complete(). */
+	if (likely(!sum)) {
+		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+		    !skb->csum_complete_sw)
+			netdev_rx_csum_fault(skb->dev, skb);
+	}
+	if (!skb_shared(skb))
+		skb->csum_valid = !sum;
+	return sum;
+}
+EXPORT_SYMBOL(__skb_checksum_complete_head);
+
+/* This function assumes skb->csum already holds pseudo header's checksum,
+ * which has been changed from the hardware checksum, for example, by
+ * __skb_checksum_validate_complete(). And, the original skb->csum must
+ * have been validated unsuccessfully for CHECKSUM_COMPLETE case.
+ *
+ * It returns non-zero if the recomputed checksum is still invalid, otherwise
+ * zero. The new checksum is stored back into skb->csum unless the skb is
+ * shared.
+ */
+__sum16 __skb_checksum_complete(struct sk_buff *skb)
+{
+	__wsum csum;
+	__sum16 sum;
+
+	csum = skb_checksum(skb, 0, skb->len, 0);
+
+	sum = csum_fold(csum_add(skb->csum, csum));
+	/* This check is inverted, because we already knew the hardware
+	 * checksum is invalid before calling this function. So, if the
+	 * re-computed checksum is valid instead, then we have a mismatch
+	 * between the original skb->csum and skb_checksum(). This means either
+	 * the original hardware checksum is incorrect or we screw up skb->csum
+	 * when moving skb->data around.
+	 */
+	if (likely(!sum)) {
+		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+		    !skb->csum_complete_sw)
+			netdev_rx_csum_fault(skb->dev, skb);
+	}
+
+	if (!skb_shared(skb)) {
+		/* Save full packet checksum */
+		skb->csum = csum;
+		skb->ip_summed = CHECKSUM_COMPLETE;
+		skb->csum_complete_sw = 1;
+		skb->csum_valid = !sum;
+	}
+
+	return sum;
+}
+EXPORT_SYMBOL(__skb_checksum_complete);
+
 static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum)
 {
 	net_warn_ratelimited(
@@ -2756,11 +2977,15 @@
 	skb_zerocopy_clone(to, from, GFP_ATOMIC);
 
 	for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
+		int size;
+
 		if (!len)
 			break;
 		skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
-		skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
-		len -= skb_shinfo(to)->frags[j].size;
+		size = min_t(int, skb_frag_size(&skb_shinfo(to)->frags[j]),
+					len);
+		skb_frag_size_set(&skb_shinfo(to)->frags[j], size);
+		len -= size;
 		skb_frag_ref(to, j);
 		j++;
 	}
@@ -2962,28 +3187,6 @@
 }
 EXPORT_SYMBOL(skb_append);
 
-/**
- *	skb_insert	-	insert a buffer
- *	@old: buffer to insert before
- *	@newsk: buffer to insert
- *	@list: list to use
- *
- *	Place a packet before a given packet in a list. The list locks are
- * 	taken and this function is atomic with respect to other list locked
- *	calls.
- *
- *	A buffer cannot be placed on two lists at the same time.
- */
-void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&list->lock, flags);
-	__skb_insert(newsk, old->prev, old, list);
-	spin_unlock_irqrestore(&list->lock, flags);
-}
-EXPORT_SYMBOL(skb_insert);
-
 static inline void skb_split_inside_header(struct sk_buff *skb,
 					   struct sk_buff* skb1,
 					   const u32 len, const int pos)
@@ -3033,7 +3236,7 @@
 				 * 2. Split is accurately. We make this.
 				 */
 				skb_frag_ref(skb, i);
-				skb_shinfo(skb1)->frags[0].page_offset += len - pos;
+				skb_frag_off_add(&skb_shinfo(skb1)->frags[0], len - pos);
 				skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos);
 				skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos);
 				skb_shinfo(skb)->nr_frags++;
@@ -3096,7 +3299,7 @@
 int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
 {
 	int from, to, merge, todo;
-	struct skb_frag_struct *fragfrom, *fragto;
+	skb_frag_t *fragfrom, *fragto;
 
 	BUG_ON(shiftlen > skb->len);
 
@@ -3115,7 +3318,7 @@
 	 */
 	if (!to ||
 	    !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
-			      fragfrom->page_offset)) {
+			      skb_frag_off(fragfrom))) {
 		merge = -1;
 	} else {
 		merge = to - 1;
@@ -3132,7 +3335,7 @@
 
 			skb_frag_size_add(fragto, shiftlen);
 			skb_frag_size_sub(fragfrom, shiftlen);
-			fragfrom->page_offset += shiftlen;
+			skb_frag_off_add(fragfrom, shiftlen);
 
 			goto onlymerged;
 		}
@@ -3163,11 +3366,11 @@
 
 		} else {
 			__skb_frag_ref(fragfrom);
-			fragto->page = fragfrom->page;
-			fragto->page_offset = fragfrom->page_offset;
+			skb_frag_page_copy(fragto, fragfrom);
+			skb_frag_off_copy(fragto, fragfrom);
 			skb_frag_size_set(fragto, todo);
 
-			fragfrom->page_offset += todo;
+			skb_frag_off_add(fragfrom, todo);
 			skb_frag_size_sub(fragfrom, todo);
 			todo = 0;
 
@@ -3292,7 +3495,7 @@
 			if (!st->frag_data)
 				st->frag_data = kmap_atomic(skb_frag_page(frag));
 
-			*data = (u8 *) st->frag_data + frag->page_offset +
+			*data = (u8 *) st->frag_data + skb_frag_off(frag) +
 				(abs_offset - st->stepped_offset);
 
 			return block_limit - abs_offset;
@@ -3382,64 +3585,6 @@
 }
 EXPORT_SYMBOL(skb_find_text);
 
-/**
- * skb_append_datato_frags - append the user data to a skb
- * @sk: sock  structure
- * @skb: skb structure to be appended with user data.
- * @getfrag: call back function to be used for getting the user data
- * @from: pointer to user message iov
- * @length: length of the iov message
- *
- * Description: This procedure append the user data in the fragment part
- * of the skb if any page alloc fails user this procedure returns  -ENOMEM
- */
-int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
-			int (*getfrag)(void *from, char *to, int offset,
-					int len, int odd, struct sk_buff *skb),
-			void *from, int length)
-{
-	int frg_cnt = skb_shinfo(skb)->nr_frags;
-	int copy;
-	int offset = 0;
-	int ret;
-	struct page_frag *pfrag = &current->task_frag;
-
-	do {
-		/* Return error if we don't have space for new frag */
-		if (frg_cnt >= MAX_SKB_FRAGS)
-			return -EMSGSIZE;
-
-		if (!sk_page_frag_refill(sk, pfrag))
-			return -ENOMEM;
-
-		/* copy the user data to page */
-		copy = min_t(int, length, pfrag->size - pfrag->offset);
-
-		ret = getfrag(from, page_address(pfrag->page) + pfrag->offset,
-			      offset, copy, 0, skb);
-		if (ret < 0)
-			return -EFAULT;
-
-		/* copy was successful so update the size parameters */
-		skb_fill_page_desc(skb, frg_cnt, pfrag->page, pfrag->offset,
-				   copy);
-		frg_cnt++;
-		pfrag->offset += copy;
-		get_page(pfrag->page);
-
-		skb->truesize += copy;
-		refcount_add(copy, &sk->sk_wmem_alloc);
-		skb->len += copy;
-		skb->data_len += copy;
-		offset += copy;
-		length -= copy;
-
-	} while (length > 0);
-
-	return 0;
-}
-EXPORT_SYMBOL(skb_append_datato_frags);
-
 int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
 			 int offset, size_t size)
 {
@@ -3486,10 +3631,10 @@
 	struct page *page;
 
 	page = virt_to_head_page(frag_skb->head);
-	head_frag.page.p = page;
-	head_frag.page_offset = frag_skb->data -
-		(unsigned char *)page_address(page);
-	head_frag.size = skb_headlen(frag_skb);
+	__skb_frag_set_page(&head_frag, page);
+	skb_frag_off_set(&head_frag, frag_skb->data -
+			 (unsigned char *)page_address(page));
+	skb_frag_size_set(&head_frag, skb_headlen(frag_skb));
 	return head_frag;
 }
 
@@ -3525,6 +3670,25 @@
 	int pos;
 	int dummy;
 
+	if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) &&
+	    (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) {
+		/* gso_size is untrusted, and we have a frag_list with a linear
+		 * non head_frag head.
+		 *
+		 * (we assume checking the first list_skb member suffices;
+		 * i.e if either of the list_skb members have non head_frag
+		 * head, then the first one has too).
+		 *
+		 * If head_skb's headlen does not fit requested gso_size, it
+		 * means that the frag_list members do NOT terminate on exact
+		 * gso_size boundaries. Hence we cannot perform skb_frag_t page
+		 * sharing. Therefore we must fallback to copying the frag_list
+		 * skbs; we do so by disabling SG.
+		 */
+		if (mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb))
+			features &= ~NETIF_F_SG;
+	}
+
 	__skb_push(head_skb, doffset);
 	proto = skb_network_protocol(head_skb, &dummy);
 	if (unlikely(!proto))
@@ -3732,7 +3896,7 @@
 			size = skb_frag_size(nskb_frag);
 
 			if (pos < offset) {
-				nskb_frag->page_offset += offset - pos;
+				skb_frag_off_add(nskb_frag, offset - pos);
 				skb_frag_size_sub(nskb_frag, offset - pos);
 			}
 
@@ -3828,7 +3992,7 @@
 	unsigned int delta_truesize;
 	struct sk_buff *lp;
 
-	if (unlikely(p->len + len >= 65536))
+	if (unlikely(p->len + len >= 65536 || NAPI_GRO_CB(skb)->flush))
 		return -E2BIG;
 
 	lp = NAPI_GRO_CB(p)->last;
@@ -3853,7 +4017,7 @@
 			*--frag = *--frag2;
 		} while (--i);
 
-		frag->page_offset += offset;
+		skb_frag_off_add(frag, offset);
 		skb_frag_size_sub(frag, offset);
 
 		/* all fragments truesize : remove (head size + sk_buff) */
@@ -3882,8 +4046,8 @@
 
 		pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags;
 
-		frag->page.p	  = page;
-		frag->page_offset = first_offset;
+		__skb_frag_set_page(frag, page);
+		skb_frag_off_set(frag, first_offset);
 		skb_frag_size_set(frag, first_size);
 
 		memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
@@ -3899,7 +4063,7 @@
 	if (offset > headlen) {
 		unsigned int eat = offset - headlen;
 
-		skbinfo->frags[0].page_offset += eat;
+		skb_frag_off_add(&skbinfo->frags[0], eat);
 		skb_frag_size_sub(&skbinfo->frags[0], eat);
 		skb->data_len -= eat;
 		skb->len -= eat;
@@ -3931,6 +4095,52 @@
 }
 EXPORT_SYMBOL_GPL(skb_gro_receive);
 
+#ifdef CONFIG_SKB_EXTENSIONS
+#define SKB_EXT_ALIGN_VALUE	8
+#define SKB_EXT_CHUNKSIZEOF(x)	(ALIGN((sizeof(x)), SKB_EXT_ALIGN_VALUE) / SKB_EXT_ALIGN_VALUE)
+
+static const u8 skb_ext_type_len[] = {
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+	[SKB_EXT_BRIDGE_NF] = SKB_EXT_CHUNKSIZEOF(struct nf_bridge_info),
+#endif
+#ifdef CONFIG_XFRM
+	[SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
+#endif
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+	[TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
+#endif
+};
+
+static __always_inline unsigned int skb_ext_total_length(void)
+{
+	return SKB_EXT_CHUNKSIZEOF(struct skb_ext) +
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+		skb_ext_type_len[SKB_EXT_BRIDGE_NF] +
+#endif
+#ifdef CONFIG_XFRM
+		skb_ext_type_len[SKB_EXT_SEC_PATH] +
+#endif
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+		skb_ext_type_len[TC_SKB_EXT] +
+#endif
+		0;
+}
+
+static void skb_extensions_init(void)
+{
+	BUILD_BUG_ON(SKB_EXT_NUM >= 8);
+	BUILD_BUG_ON(skb_ext_total_length() > 255);
+
+	skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache",
+					     SKB_EXT_ALIGN_VALUE * skb_ext_total_length(),
+					     0,
+					     SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+					     NULL);
+}
+#else
+static void skb_extensions_init(void) {}
+#endif
+
 void __init skb_init(void)
 {
 	skbuff_head_cache = kmem_cache_create_usercopy("skbuff_head_cache",
@@ -3945,6 +4155,7 @@
 						0,
 						SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 						NULL);
+	skb_extensions_init();
 }
 
 static int
@@ -3983,7 +4194,7 @@
 			if (copy > len)
 				copy = len;
 			sg_set_page(&sg[elt], skb_frag_page(frag), copy,
-					frag->page_offset+offset-start);
+				    skb_frag_off(frag) + offset - start);
 			elt++;
 			if (!(len -= copy))
 				return elt;
@@ -4204,7 +4415,7 @@
 int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
 {
 	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
-	    (unsigned int)sk->sk_rcvbuf)
+	    (unsigned int)READ_ONCE(sk->sk_rcvbuf))
 		return -ENOMEM;
 
 	skb_orphan(skb);
@@ -4908,13 +5119,13 @@
 	skb->skb_iif = 0;
 	skb->ignore_df = 0;
 	skb_dst_drop(skb);
-	secpath_reset(skb);
-	nf_reset(skb);
+	skb_ext_reset(skb);
+	nf_reset_ct(skb);
 	nf_reset_trace(skb);
 
 #ifdef CONFIG_NET_SWITCHDEV
 	skb->offload_fwd_mark = 0;
-	skb->offload_mr_fwd_mark = 0;
+	skb->offload_l3_fwd_mark = 0;
 #endif
 
 	if (!xnet)
@@ -5007,6 +5218,8 @@
  *
  * This is a helper to do that correctly considering GSO_BY_FRAGS.
  *
+ * @skb: GSO skb
+ *
  * @seg_len: The segmented length (from skb_gso_*_seglen). In the
  *           GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS].
  *
@@ -5067,7 +5280,8 @@
 
 static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
 {
-	int mac_len;
+	int mac_len, meta_len;
+	void *meta;
 
 	if (skb_cow(skb, skb_headroom(skb)) < 0) {
 		kfree_skb(skb);
@@ -5079,6 +5293,13 @@
 		memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
 			mac_len - VLAN_HLEN - ETH_TLEN);
 	}
+
+	meta_len = skb_metadata_len(skb);
+	if (meta_len) {
+		meta = skb_metadata_end(skb) - meta_len;
+		memmove(meta + VLAN_HLEN, meta, meta_len);
+	}
+
 	skb->mac_header += VLAN_HLEN;
 	return skb;
 }
@@ -5184,7 +5405,7 @@
 	int err;
 
 	if (likely(skb_vlan_tag_present(skb))) {
-		skb->vlan_tci = 0;
+		__vlan_hwaccel_clear_tag(skb);
 	} else {
 		if (unlikely(!eth_type_vlan(skb->protocol)))
 			return 0;
@@ -5237,6 +5458,176 @@
 }
 EXPORT_SYMBOL(skb_vlan_push);
 
+/* Update the ethertype of hdr and the skb csum value if required. */
+static void skb_mod_eth_type(struct sk_buff *skb, struct ethhdr *hdr,
+			     __be16 ethertype)
+{
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		__be16 diff[] = { ~hdr->h_proto, ethertype };
+
+		skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
+	}
+
+	hdr->h_proto = ethertype;
+}
+
+/**
+ * skb_mpls_push() - push a new MPLS header after the mac header
+ *
+ * @skb: buffer
+ * @mpls_lse: MPLS label stack entry to push
+ * @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848)
+ * @mac_len: length of the MAC header
+ *
+ * Expects skb->data at mac header.
+ *
+ * Returns 0 on success, -errno otherwise.
+ */
+int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
+		  int mac_len)
+{
+	struct mpls_shim_hdr *lse;
+	int err;
+
+	if (unlikely(!eth_p_mpls(mpls_proto)))
+		return -EINVAL;
+
+	/* Networking stack does not allow simultaneous Tunnel and MPLS GSO. */
+	if (skb->encapsulation)
+		return -EINVAL;
+
+	err = skb_cow_head(skb, MPLS_HLEN);
+	if (unlikely(err))
+		return err;
+
+	if (!skb->inner_protocol) {
+		skb_set_inner_network_header(skb, mac_len);
+		skb_set_inner_protocol(skb, skb->protocol);
+	}
+
+	skb_push(skb, MPLS_HLEN);
+	memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
+		mac_len);
+	skb_reset_mac_header(skb);
+	skb_set_network_header(skb, mac_len);
+
+	lse = mpls_hdr(skb);
+	lse->label_stack_entry = mpls_lse;
+	skb_postpush_rcsum(skb, lse, MPLS_HLEN);
+
+	if (skb->dev && skb->dev->type == ARPHRD_ETHER)
+		skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto);
+	skb->protocol = mpls_proto;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(skb_mpls_push);
+
+/**
+ * skb_mpls_pop() - pop the outermost MPLS header
+ *
+ * @skb: buffer
+ * @next_proto: ethertype of header after popped MPLS header
+ * @mac_len: length of the MAC header
+ *
+ * Expects skb->data at mac header.
+ *
+ * Returns 0 on success, -errno otherwise.
+ */
+int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len)
+{
+	int err;
+
+	if (unlikely(!eth_p_mpls(skb->protocol)))
+		return 0;
+
+	err = skb_ensure_writable(skb, mac_len + MPLS_HLEN);
+	if (unlikely(err))
+		return err;
+
+	skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN);
+	memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
+		mac_len);
+
+	__skb_pull(skb, MPLS_HLEN);
+	skb_reset_mac_header(skb);
+	skb_set_network_header(skb, mac_len);
+
+	if (skb->dev && skb->dev->type == ARPHRD_ETHER) {
+		struct ethhdr *hdr;
+
+		/* use mpls_hdr() to get ethertype to account for VLANs. */
+		hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);
+		skb_mod_eth_type(skb, hdr, next_proto);
+	}
+	skb->protocol = next_proto;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(skb_mpls_pop);
+
+/**
+ * skb_mpls_update_lse() - modify outermost MPLS header and update csum
+ *
+ * @skb: buffer
+ * @mpls_lse: new MPLS label stack entry to update to
+ *
+ * Expects skb->data at mac header.
+ *
+ * Returns 0 on success, -errno otherwise.
+ */
+int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse)
+{
+	int err;
+
+	if (unlikely(!eth_p_mpls(skb->protocol)))
+		return -EINVAL;
+
+	err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
+	if (unlikely(err))
+		return err;
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		__be32 diff[] = { ~mpls_hdr(skb)->label_stack_entry, mpls_lse };
+
+		skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
+	}
+
+	mpls_hdr(skb)->label_stack_entry = mpls_lse;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(skb_mpls_update_lse);
+
+/**
+ * skb_mpls_dec_ttl() - decrement the TTL of the outermost MPLS header
+ *
+ * @skb: buffer
+ *
+ * Expects skb->data at mac header.
+ *
+ * Returns 0 on success, -errno otherwise.
+ */
+int skb_mpls_dec_ttl(struct sk_buff *skb)
+{
+	u32 lse;
+	u8 ttl;
+
+	if (unlikely(!eth_p_mpls(skb->protocol)))
+		return -EINVAL;
+
+	lse = be32_to_cpu(mpls_hdr(skb)->label_stack_entry);
+	ttl = (lse & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
+	if (!--ttl)
+		return -EINVAL;
+
+	lse &= ~MPLS_LS_TTL_MASK;
+	lse |= ttl << MPLS_LS_TTL_SHIFT;
+
+	return skb_mpls_update_lse(skb, cpu_to_be32(lse));
+}
+EXPORT_SYMBOL_GPL(skb_mpls_dec_ttl);
+
 /**
  * alloc_skb_with_frags - allocate skb with page frags
  *
@@ -5258,7 +5649,6 @@
 	unsigned long chunk;
 	struct sk_buff *skb;
 	struct page *page;
-	gfp_t gfp_head;
 	int i;
 
 	*errcode = -EMSGSIZE;
@@ -5268,12 +5658,8 @@
 	if (npages > MAX_SKB_FRAGS)
 		return NULL;
 
-	gfp_head = gfp_mask;
-	if (gfp_head & __GFP_DIRECT_RECLAIM)
-		gfp_head |= __GFP_RETRY_MAYFAIL;
-
 	*errcode = -ENOBUFS;
-	skb = alloc_skb(header_len, gfp_head);
+	skb = alloc_skb(header_len, gfp_mask);
 	if (!skb)
 		return NULL;
 
@@ -5482,7 +5868,7 @@
 				 *    where splitting is expensive.
 				 * 2. Split is accurately. We make this.
 				 */
-				shinfo->frags[0].page_offset += off - pos;
+				skb_frag_off_add(&shinfo->frags[0], off - pos);
 				skb_frag_size_sub(&shinfo->frags[0], off - pos);
 			}
 			skb_frag_ref(skb, i);
@@ -5581,3 +5967,148 @@
 	 */
 	skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
 }
+
+#ifdef CONFIG_SKB_EXTENSIONS
+static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id)
+{
+	return (void *)ext + (ext->offset[id] * SKB_EXT_ALIGN_VALUE);
+}
+
+static struct skb_ext *skb_ext_alloc(void)
+{
+	struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC);
+
+	if (new) {
+		memset(new->offset, 0, sizeof(new->offset));
+		refcount_set(&new->refcnt, 1);
+	}
+
+	return new;
+}
+
+static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old,
+					 unsigned int old_active)
+{
+	struct skb_ext *new;
+
+	if (refcount_read(&old->refcnt) == 1)
+		return old;
+
+	new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC);
+	if (!new)
+		return NULL;
+
+	memcpy(new, old, old->chunks * SKB_EXT_ALIGN_VALUE);
+	refcount_set(&new->refcnt, 1);
+
+#ifdef CONFIG_XFRM
+	if (old_active & (1 << SKB_EXT_SEC_PATH)) {
+		struct sec_path *sp = skb_ext_get_ptr(old, SKB_EXT_SEC_PATH);
+		unsigned int i;
+
+		for (i = 0; i < sp->len; i++)
+			xfrm_state_hold(sp->xvec[i]);
+	}
+#endif
+	__skb_ext_put(old);
+	return new;
+}
+
+/**
+ * skb_ext_add - allocate space for given extension, COW if needed
+ * @skb: buffer
+ * @id: extension to allocate space for
+ *
+ * Allocates enough space for the given extension.
+ * If the extension is already present, a pointer to that extension
+ * is returned.
+ *
+ * If the skb was cloned, COW applies and the returned memory can be
+ * modified without changing the extension space of clones buffers.
+ *
+ * Returns pointer to the extension or NULL on allocation failure.
+ */
+void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id)
+{
+	struct skb_ext *new, *old = NULL;
+	unsigned int newlen, newoff;
+
+	if (skb->active_extensions) {
+		old = skb->extensions;
+
+		new = skb_ext_maybe_cow(old, skb->active_extensions);
+		if (!new)
+			return NULL;
+
+		if (__skb_ext_exist(new, id))
+			goto set_active;
+
+		newoff = new->chunks;
+	} else {
+		newoff = SKB_EXT_CHUNKSIZEOF(*new);
+
+		new = skb_ext_alloc();
+		if (!new)
+			return NULL;
+	}
+
+	newlen = newoff + skb_ext_type_len[id];
+	new->chunks = newlen;
+	new->offset[id] = newoff;
+set_active:
+	skb->extensions = new;
+	skb->active_extensions |= 1 << id;
+	return skb_ext_get_ptr(new, id);
+}
+EXPORT_SYMBOL(skb_ext_add);
+
+#ifdef CONFIG_XFRM
+static void skb_ext_put_sp(struct sec_path *sp)
+{
+	unsigned int i;
+
+	for (i = 0; i < sp->len; i++)
+		xfrm_state_put(sp->xvec[i]);
+}
+#endif
+
+void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id)
+{
+	struct skb_ext *ext = skb->extensions;
+
+	skb->active_extensions &= ~(1 << id);
+	if (skb->active_extensions == 0) {
+		skb->extensions = NULL;
+		__skb_ext_put(ext);
+#ifdef CONFIG_XFRM
+	} else if (id == SKB_EXT_SEC_PATH &&
+		   refcount_read(&ext->refcnt) == 1) {
+		struct sec_path *sp = skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH);
+
+		skb_ext_put_sp(sp);
+		sp->len = 0;
+#endif
+	}
+}
+EXPORT_SYMBOL(__skb_ext_del);
+
+void __skb_ext_put(struct skb_ext *ext)
+{
+	/* If this is last clone, nothing can increment
+	 * it after check passes.  Avoids one atomic op.
+	 */
+	if (refcount_read(&ext->refcnt) == 1)
+		goto free_now;
+
+	if (!refcount_dec_and_test(&ext->refcnt))
+		return;
+free_now:
+#ifdef CONFIG_XFRM
+	if (__skb_ext_exist(ext, SKB_EXT_SEC_PATH))
+		skb_ext_put_sp(skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH));
+#endif
+
+	kmem_cache_free(skbuff_ext_cache, ext);
+}
+EXPORT_SYMBOL(__skb_ext_put);
+#endif /* CONFIG_SKB_EXTENSIONS */

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
new file mode 100644
index 0000000..0675d02
--- /dev/null
+++ b/net/core/skmsg.c

@@ -0,0 +1,843 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
+
+#include <linux/skmsg.h>
+#include <linux/skbuff.h>
+#include <linux/scatterlist.h>
+
+#include <net/sock.h>
+#include <net/tcp.h>
+
+static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce)
+{
+	if (msg->sg.end > msg->sg.start &&
+	    elem_first_coalesce < msg->sg.end)
+		return true;
+
+	if (msg->sg.end < msg->sg.start &&
+	    (elem_first_coalesce > msg->sg.start ||
+	     elem_first_coalesce < msg->sg.end))
+		return true;
+
+	return false;
+}
+
+int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
+		 int elem_first_coalesce)
+{
+	struct page_frag *pfrag = sk_page_frag(sk);
+	int ret = 0;
+
+	len -= msg->sg.size;
+	while (len > 0) {
+		struct scatterlist *sge;
+		u32 orig_offset;
+		int use, i;
+
+		if (!sk_page_frag_refill(sk, pfrag))
+			return -ENOMEM;
+
+		orig_offset = pfrag->offset;
+		use = min_t(int, len, pfrag->size - orig_offset);
+		if (!sk_wmem_schedule(sk, use))
+			return -ENOMEM;
+
+		i = msg->sg.end;
+		sk_msg_iter_var_prev(i);
+		sge = &msg->sg.data[i];
+
+		if (sk_msg_try_coalesce_ok(msg, elem_first_coalesce) &&
+		    sg_page(sge) == pfrag->page &&
+		    sge->offset + sge->length == orig_offset) {
+			sge->length += use;
+		} else {
+			if (sk_msg_full(msg)) {
+				ret = -ENOSPC;
+				break;
+			}
+
+			sge = &msg->sg.data[msg->sg.end];
+			sg_unmark_end(sge);
+			sg_set_page(sge, pfrag->page, use, orig_offset);
+			get_page(pfrag->page);
+			sk_msg_iter_next(msg, end);
+		}
+
+		sk_mem_charge(sk, use);
+		msg->sg.size += use;
+		pfrag->offset += use;
+		len -= use;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sk_msg_alloc);
+
+int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src,
+		 u32 off, u32 len)
+{
+	int i = src->sg.start;
+	struct scatterlist *sge = sk_msg_elem(src, i);
+	struct scatterlist *sgd = NULL;
+	u32 sge_len, sge_off;
+
+	while (off) {
+		if (sge->length > off)
+			break;
+		off -= sge->length;
+		sk_msg_iter_var_next(i);
+		if (i == src->sg.end && off)
+			return -ENOSPC;
+		sge = sk_msg_elem(src, i);
+	}
+
+	while (len) {
+		sge_len = sge->length - off;
+		if (sge_len > len)
+			sge_len = len;
+
+		if (dst->sg.end)
+			sgd = sk_msg_elem(dst, dst->sg.end - 1);
+
+		if (sgd &&
+		    (sg_page(sge) == sg_page(sgd)) &&
+		    (sg_virt(sge) + off == sg_virt(sgd) + sgd->length)) {
+			sgd->length += sge_len;
+			dst->sg.size += sge_len;
+		} else if (!sk_msg_full(dst)) {
+			sge_off = sge->offset + off;
+			sk_msg_page_add(dst, sg_page(sge), sge_len, sge_off);
+		} else {
+			return -ENOSPC;
+		}
+
+		off = 0;
+		len -= sge_len;
+		sk_mem_charge(sk, sge_len);
+		sk_msg_iter_var_next(i);
+		if (i == src->sg.end && len)
+			return -ENOSPC;
+		sge = sk_msg_elem(src, i);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(sk_msg_clone);
+
+void sk_msg_return_zero(struct sock *sk, struct sk_msg *msg, int bytes)
+{
+	int i = msg->sg.start;
+
+	do {
+		struct scatterlist *sge = sk_msg_elem(msg, i);
+
+		if (bytes < sge->length) {
+			sge->length -= bytes;
+			sge->offset += bytes;
+			sk_mem_uncharge(sk, bytes);
+			break;
+		}
+
+		sk_mem_uncharge(sk, sge->length);
+		bytes -= sge->length;
+		sge->length = 0;
+		sge->offset = 0;
+		sk_msg_iter_var_next(i);
+	} while (bytes && i != msg->sg.end);
+	msg->sg.start = i;
+}
+EXPORT_SYMBOL_GPL(sk_msg_return_zero);
+
+void sk_msg_return(struct sock *sk, struct sk_msg *msg, int bytes)
+{
+	int i = msg->sg.start;
+
+	do {
+		struct scatterlist *sge = &msg->sg.data[i];
+		int uncharge = (bytes < sge->length) ? bytes : sge->length;
+
+		sk_mem_uncharge(sk, uncharge);
+		bytes -= uncharge;
+		sk_msg_iter_var_next(i);
+	} while (i != msg->sg.end);
+}
+EXPORT_SYMBOL_GPL(sk_msg_return);
+
+static int sk_msg_free_elem(struct sock *sk, struct sk_msg *msg, u32 i,
+			    bool charge)
+{
+	struct scatterlist *sge = sk_msg_elem(msg, i);
+	u32 len = sge->length;
+
+	if (charge)
+		sk_mem_uncharge(sk, len);
+	if (!msg->skb)
+		put_page(sg_page(sge));
+	memset(sge, 0, sizeof(*sge));
+	return len;
+}
+
+static int __sk_msg_free(struct sock *sk, struct sk_msg *msg, u32 i,
+			 bool charge)
+{
+	struct scatterlist *sge = sk_msg_elem(msg, i);
+	int freed = 0;
+
+	while (msg->sg.size) {
+		msg->sg.size -= sge->length;
+		freed += sk_msg_free_elem(sk, msg, i, charge);
+		sk_msg_iter_var_next(i);
+		sk_msg_check_to_free(msg, i, msg->sg.size);
+		sge = sk_msg_elem(msg, i);
+	}
+	consume_skb(msg->skb);
+	sk_msg_init(msg);
+	return freed;
+}
+
+int sk_msg_free_nocharge(struct sock *sk, struct sk_msg *msg)
+{
+	return __sk_msg_free(sk, msg, msg->sg.start, false);
+}
+EXPORT_SYMBOL_GPL(sk_msg_free_nocharge);
+
+int sk_msg_free(struct sock *sk, struct sk_msg *msg)
+{
+	return __sk_msg_free(sk, msg, msg->sg.start, true);
+}
+EXPORT_SYMBOL_GPL(sk_msg_free);
+
+static void __sk_msg_free_partial(struct sock *sk, struct sk_msg *msg,
+				  u32 bytes, bool charge)
+{
+	struct scatterlist *sge;
+	u32 i = msg->sg.start;
+
+	while (bytes) {
+		sge = sk_msg_elem(msg, i);
+		if (!sge->length)
+			break;
+		if (bytes < sge->length) {
+			if (charge)
+				sk_mem_uncharge(sk, bytes);
+			sge->length -= bytes;
+			sge->offset += bytes;
+			msg->sg.size -= bytes;
+			break;
+		}
+
+		msg->sg.size -= sge->length;
+		bytes -= sge->length;
+		sk_msg_free_elem(sk, msg, i, charge);
+		sk_msg_iter_var_next(i);
+		sk_msg_check_to_free(msg, i, bytes);
+	}
+	msg->sg.start = i;
+}
+
+void sk_msg_free_partial(struct sock *sk, struct sk_msg *msg, u32 bytes)
+{
+	__sk_msg_free_partial(sk, msg, bytes, true);
+}
+EXPORT_SYMBOL_GPL(sk_msg_free_partial);
+
+void sk_msg_free_partial_nocharge(struct sock *sk, struct sk_msg *msg,
+				  u32 bytes)
+{
+	__sk_msg_free_partial(sk, msg, bytes, false);
+}
+
+void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len)
+{
+	int trim = msg->sg.size - len;
+	u32 i = msg->sg.end;
+
+	if (trim <= 0) {
+		WARN_ON(trim < 0);
+		return;
+	}
+
+	sk_msg_iter_var_prev(i);
+	msg->sg.size = len;
+	while (msg->sg.data[i].length &&
+	       trim >= msg->sg.data[i].length) {
+		trim -= msg->sg.data[i].length;
+		sk_msg_free_elem(sk, msg, i, true);
+		sk_msg_iter_var_prev(i);
+		if (!trim)
+			goto out;
+	}
+
+	msg->sg.data[i].length -= trim;
+	sk_mem_uncharge(sk, trim);
+	/* Adjust copybreak if it falls into the trimmed part of last buf */
+	if (msg->sg.curr == i && msg->sg.copybreak > msg->sg.data[i].length)
+		msg->sg.copybreak = msg->sg.data[i].length;
+out:
+	sk_msg_iter_var_next(i);
+	msg->sg.end = i;
+
+	/* If we trim data a full sg elem before curr pointer update
+	 * copybreak and current so that any future copy operations
+	 * start at new copy location.
+	 * However trimed data that has not yet been used in a copy op
+	 * does not require an update.
+	 */
+	if (!msg->sg.size) {
+		msg->sg.curr = msg->sg.start;
+		msg->sg.copybreak = 0;
+	} else if (sk_msg_iter_dist(msg->sg.start, msg->sg.curr) >=
+		   sk_msg_iter_dist(msg->sg.start, msg->sg.end)) {
+		sk_msg_iter_var_prev(i);
+		msg->sg.curr = i;
+		msg->sg.copybreak = msg->sg.data[i].length;
+	}
+}
+EXPORT_SYMBOL_GPL(sk_msg_trim);
+
+int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
+			      struct sk_msg *msg, u32 bytes)
+{
+	int i, maxpages, ret = 0, num_elems = sk_msg_elem_used(msg);
+	const int to_max_pages = MAX_MSG_FRAGS;
+	struct page *pages[MAX_MSG_FRAGS];
+	ssize_t orig, copied, use, offset;
+
+	orig = msg->sg.size;
+	while (bytes > 0) {
+		i = 0;
+		maxpages = to_max_pages - num_elems;
+		if (maxpages == 0) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		copied = iov_iter_get_pages(from, pages, bytes, maxpages,
+					    &offset);
+		if (copied <= 0) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		iov_iter_advance(from, copied);
+		bytes -= copied;
+		msg->sg.size += copied;
+
+		while (copied) {
+			use = min_t(int, copied, PAGE_SIZE - offset);
+			sg_set_page(&msg->sg.data[msg->sg.end],
+				    pages[i], use, offset);
+			sg_unmark_end(&msg->sg.data[msg->sg.end]);
+			sk_mem_charge(sk, use);
+
+			offset = 0;
+			copied -= use;
+			sk_msg_iter_next(msg, end);
+			num_elems++;
+			i++;
+		}
+		/* When zerocopy is mixed with sk_msg_*copy* operations we
+		 * may have a copybreak set in this case clear and prefer
+		 * zerocopy remainder when possible.
+		 */
+		msg->sg.copybreak = 0;
+		msg->sg.curr = msg->sg.end;
+	}
+out:
+	/* Revert iov_iter updates, msg will need to use 'trim' later if it
+	 * also needs to be cleared.
+	 */
+	if (ret)
+		iov_iter_revert(from, msg->sg.size - orig);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sk_msg_zerocopy_from_iter);
+
+int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
+			     struct sk_msg *msg, u32 bytes)
+{
+	int ret = -ENOSPC, i = msg->sg.curr;
+	struct scatterlist *sge;
+	u32 copy, buf_size;
+	void *to;
+
+	do {
+		sge = sk_msg_elem(msg, i);
+		/* This is possible if a trim operation shrunk the buffer */
+		if (msg->sg.copybreak >= sge->length) {
+			msg->sg.copybreak = 0;
+			sk_msg_iter_var_next(i);
+			if (i == msg->sg.end)
+				break;
+			sge = sk_msg_elem(msg, i);
+		}
+
+		buf_size = sge->length - msg->sg.copybreak;
+		copy = (buf_size > bytes) ? bytes : buf_size;
+		to = sg_virt(sge) + msg->sg.copybreak;
+		msg->sg.copybreak += copy;
+		if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY)
+			ret = copy_from_iter_nocache(to, copy, from);
+		else
+			ret = copy_from_iter(to, copy, from);
+		if (ret != copy) {
+			ret = -EFAULT;
+			goto out;
+		}
+		bytes -= copy;
+		if (!bytes)
+			break;
+		msg->sg.copybreak = 0;
+		sk_msg_iter_var_next(i);
+	} while (i != msg->sg.end);
+out:
+	msg->sg.curr = i;
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
+
+static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
+{
+	struct sock *sk = psock->sk;
+	int copied = 0, num_sge;
+	struct sk_msg *msg;
+
+	msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
+	if (unlikely(!msg))
+		return -EAGAIN;
+	if (!sk_rmem_schedule(sk, skb, skb->len)) {
+		kfree(msg);
+		return -EAGAIN;
+	}
+
+	sk_msg_init(msg);
+	num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
+	if (unlikely(num_sge < 0)) {
+		kfree(msg);
+		return num_sge;
+	}
+
+	sk_mem_charge(sk, skb->len);
+	copied = skb->len;
+	msg->sg.start = 0;
+	msg->sg.size = copied;
+	msg->sg.end = num_sge;
+	msg->skb = skb;
+
+	sk_psock_queue_msg(psock, msg);
+	sk_psock_data_ready(sk, psock);
+	return copied;
+}
+
+static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
+			       u32 off, u32 len, bool ingress)
+{
+	if (ingress)
+		return sk_psock_skb_ingress(psock, skb);
+	else
+		return skb_send_sock_locked(psock->sk, skb, off, len);
+}
+
+static void sk_psock_backlog(struct work_struct *work)
+{
+	struct sk_psock *psock = container_of(work, struct sk_psock, work);
+	struct sk_psock_work_state *state = &psock->work_state;
+	struct sk_buff *skb;
+	bool ingress;
+	u32 len, off;
+	int ret;
+
+	/* Lock sock to avoid losing sk_socket during loop. */
+	lock_sock(psock->sk);
+	if (state->skb) {
+		skb = state->skb;
+		len = state->len;
+		off = state->off;
+		state->skb = NULL;
+		goto start;
+	}
+
+	while ((skb = skb_dequeue(&psock->ingress_skb))) {
+		len = skb->len;
+		off = 0;
+start:
+		ingress = tcp_skb_bpf_ingress(skb);
+		do {
+			ret = -EIO;
+			if (likely(psock->sk->sk_socket))
+				ret = sk_psock_handle_skb(psock, skb, off,
+							  len, ingress);
+			if (ret <= 0) {
+				if (ret == -EAGAIN) {
+					state->skb = skb;
+					state->len = len;
+					state->off = off;
+					goto end;
+				}
+				/* Hard errors break pipe and stop xmit. */
+				sk_psock_report_error(psock, ret ? -ret : EPIPE);
+				sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
+				kfree_skb(skb);
+				goto end;
+			}
+			off += ret;
+			len -= ret;
+		} while (len);
+
+		if (!ingress)
+			kfree_skb(skb);
+	}
+end:
+	release_sock(psock->sk);
+}
+
+struct sk_psock *sk_psock_init(struct sock *sk, int node)
+{
+	struct sk_psock *psock = kzalloc_node(sizeof(*psock),
+					      GFP_ATOMIC | __GFP_NOWARN,
+					      node);
+	if (!psock)
+		return NULL;
+
+	psock->sk = sk;
+	psock->eval =  __SK_NONE;
+
+	INIT_LIST_HEAD(&psock->link);
+	spin_lock_init(&psock->link_lock);
+
+	INIT_WORK(&psock->work, sk_psock_backlog);
+	INIT_LIST_HEAD(&psock->ingress_msg);
+	skb_queue_head_init(&psock->ingress_skb);
+
+	sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED);
+	refcount_set(&psock->refcnt, 1);
+
+	rcu_assign_sk_user_data(sk, psock);
+	sock_hold(sk);
+
+	return psock;
+}
+EXPORT_SYMBOL_GPL(sk_psock_init);
+
+struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock)
+{
+	struct sk_psock_link *link;
+
+	spin_lock_bh(&psock->link_lock);
+	link = list_first_entry_or_null(&psock->link, struct sk_psock_link,
+					list);
+	if (link)
+		list_del(&link->list);
+	spin_unlock_bh(&psock->link_lock);
+	return link;
+}
+
+void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
+{
+	struct sk_msg *msg, *tmp;
+
+	list_for_each_entry_safe(msg, tmp, &psock->ingress_msg, list) {
+		list_del(&msg->list);
+		sk_msg_free(psock->sk, msg);
+		kfree(msg);
+	}
+}
+
+static void sk_psock_zap_ingress(struct sk_psock *psock)
+{
+	__skb_queue_purge(&psock->ingress_skb);
+	__sk_psock_purge_ingress_msg(psock);
+}
+
+static void sk_psock_link_destroy(struct sk_psock *psock)
+{
+	struct sk_psock_link *link, *tmp;
+
+	list_for_each_entry_safe(link, tmp, &psock->link, list) {
+		list_del(&link->list);
+		sk_psock_free_link(link);
+	}
+}
+
+static void sk_psock_destroy_deferred(struct work_struct *gc)
+{
+	struct sk_psock *psock = container_of(gc, struct sk_psock, gc);
+
+	/* No sk_callback_lock since already detached. */
+
+	/* Parser has been stopped */
+	if (psock->progs.skb_parser)
+		strp_done(&psock->parser.strp);
+
+	cancel_work_sync(&psock->work);
+
+	psock_progs_drop(&psock->progs);
+
+	sk_psock_link_destroy(psock);
+	sk_psock_cork_free(psock);
+	sk_psock_zap_ingress(psock);
+
+	if (psock->sk_redir)
+		sock_put(psock->sk_redir);
+	sock_put(psock->sk);
+	kfree(psock);
+}
+
+void sk_psock_destroy(struct rcu_head *rcu)
+{
+	struct sk_psock *psock = container_of(rcu, struct sk_psock, rcu);
+
+	INIT_WORK(&psock->gc, sk_psock_destroy_deferred);
+	schedule_work(&psock->gc);
+}
+EXPORT_SYMBOL_GPL(sk_psock_destroy);
+
+void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
+{
+	sk_psock_cork_free(psock);
+	sk_psock_zap_ingress(psock);
+
+	write_lock_bh(&sk->sk_callback_lock);
+	sk_psock_restore_proto(sk, psock);
+	rcu_assign_sk_user_data(sk, NULL);
+	if (psock->progs.skb_parser)
+		sk_psock_stop_strp(sk, psock);
+	write_unlock_bh(&sk->sk_callback_lock);
+	sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
+
+	call_rcu(&psock->rcu, sk_psock_destroy);
+}
+EXPORT_SYMBOL_GPL(sk_psock_drop);
+
+static int sk_psock_map_verd(int verdict, bool redir)
+{
+	switch (verdict) {
+	case SK_PASS:
+		return redir ? __SK_REDIRECT : __SK_PASS;
+	case SK_DROP:
+	default:
+		break;
+	}
+
+	return __SK_DROP;
+}
+
+int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
+			 struct sk_msg *msg)
+{
+	struct bpf_prog *prog;
+	int ret;
+
+	preempt_disable();
+	rcu_read_lock();
+	prog = READ_ONCE(psock->progs.msg_parser);
+	if (unlikely(!prog)) {
+		ret = __SK_PASS;
+		goto out;
+	}
+
+	sk_msg_compute_data_pointers(msg);
+	msg->sk = sk;
+	ret = BPF_PROG_RUN(prog, msg);
+	ret = sk_psock_map_verd(ret, msg->sk_redir);
+	psock->apply_bytes = msg->apply_bytes;
+	if (ret == __SK_REDIRECT) {
+		if (psock->sk_redir)
+			sock_put(psock->sk_redir);
+		psock->sk_redir = msg->sk_redir;
+		if (!psock->sk_redir) {
+			ret = __SK_DROP;
+			goto out;
+		}
+		sock_hold(psock->sk_redir);
+	}
+out:
+	rcu_read_unlock();
+	preempt_enable();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
+
+static int sk_psock_bpf_run(struct sk_psock *psock, struct bpf_prog *prog,
+			    struct sk_buff *skb)
+{
+	int ret;
+
+	skb->sk = psock->sk;
+	bpf_compute_data_end_sk_skb(skb);
+	preempt_disable();
+	ret = BPF_PROG_RUN(prog, skb);
+	preempt_enable();
+	/* strparser clones the skb before handing it to a upper layer,
+	 * meaning skb_orphan has been called. We NULL sk on the way out
+	 * to ensure we don't trigger a BUG_ON() in skb/sk operations
+	 * later and because we are not charging the memory of this skb
+	 * to any socket yet.
+	 */
+	skb->sk = NULL;
+	return ret;
+}
+
+static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
+{
+	struct sk_psock_parser *parser;
+
+	parser = container_of(strp, struct sk_psock_parser, strp);
+	return container_of(parser, struct sk_psock, parser);
+}
+
+static void sk_psock_verdict_apply(struct sk_psock *psock,
+				   struct sk_buff *skb, int verdict)
+{
+	struct sk_psock *psock_other;
+	struct sock *sk_other;
+	bool ingress;
+
+	switch (verdict) {
+	case __SK_PASS:
+		sk_other = psock->sk;
+		if (sock_flag(sk_other, SOCK_DEAD) ||
+		    !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
+			goto out_free;
+		}
+		if (atomic_read(&sk_other->sk_rmem_alloc) <=
+		    sk_other->sk_rcvbuf) {
+			struct tcp_skb_cb *tcp = TCP_SKB_CB(skb);
+
+			tcp->bpf.flags |= BPF_F_INGRESS;
+			skb_queue_tail(&psock->ingress_skb, skb);
+			schedule_work(&psock->work);
+			break;
+		}
+		goto out_free;
+	case __SK_REDIRECT:
+		sk_other = tcp_skb_bpf_redirect_fetch(skb);
+		if (unlikely(!sk_other))
+			goto out_free;
+		psock_other = sk_psock(sk_other);
+		if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
+		    !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED))
+			goto out_free;
+		ingress = tcp_skb_bpf_ingress(skb);
+		if ((!ingress && sock_writeable(sk_other)) ||
+		    (ingress &&
+		     atomic_read(&sk_other->sk_rmem_alloc) <=
+		     sk_other->sk_rcvbuf)) {
+			if (!ingress)
+				skb_set_owner_w(skb, sk_other);
+			skb_queue_tail(&psock_other->ingress_skb, skb);
+			schedule_work(&psock_other->work);
+			break;
+		}
+		/* fall-through */
+	case __SK_DROP:
+		/* fall-through */
+	default:
+out_free:
+		kfree_skb(skb);
+	}
+}
+
+static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
+{
+	struct sk_psock *psock = sk_psock_from_strp(strp);
+	struct bpf_prog *prog;
+	int ret = __SK_DROP;
+
+	rcu_read_lock();
+	prog = READ_ONCE(psock->progs.skb_verdict);
+	if (likely(prog)) {
+		skb_orphan(skb);
+		tcp_skb_bpf_redirect_clear(skb);
+		ret = sk_psock_bpf_run(psock, prog, skb);
+		ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
+	}
+	rcu_read_unlock();
+	sk_psock_verdict_apply(psock, skb, ret);
+}
+
+static int sk_psock_strp_read_done(struct strparser *strp, int err)
+{
+	return err;
+}
+
+static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb)
+{
+	struct sk_psock *psock = sk_psock_from_strp(strp);
+	struct bpf_prog *prog;
+	int ret = skb->len;
+
+	rcu_read_lock();
+	prog = READ_ONCE(psock->progs.skb_parser);
+	if (likely(prog))
+		ret = sk_psock_bpf_run(psock, prog, skb);
+	rcu_read_unlock();
+	return ret;
+}
+
+/* Called with socket lock held. */
+static void sk_psock_strp_data_ready(struct sock *sk)
+{
+	struct sk_psock *psock;
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (likely(psock)) {
+		write_lock_bh(&sk->sk_callback_lock);
+		strp_data_ready(&psock->parser.strp);
+		write_unlock_bh(&sk->sk_callback_lock);
+	}
+	rcu_read_unlock();
+}
+
+static void sk_psock_write_space(struct sock *sk)
+{
+	struct sk_psock *psock;
+	void (*write_space)(struct sock *sk);
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (likely(psock && sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)))
+		schedule_work(&psock->work);
+	write_space = psock->saved_write_space;
+	rcu_read_unlock();
+	write_space(sk);
+}
+
+int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
+{
+	static const struct strp_callbacks cb = {
+		.rcv_msg	= sk_psock_strp_read,
+		.read_sock_done	= sk_psock_strp_read_done,
+		.parse_msg	= sk_psock_strp_parse,
+	};
+
+	psock->parser.enabled = false;
+	return strp_init(&psock->parser.strp, sk, &cb);
+}
+
+void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
+{
+	struct sk_psock_parser *parser = &psock->parser;
+
+	if (parser->enabled)
+		return;
+
+	parser->saved_data_ready = sk->sk_data_ready;
+	sk->sk_data_ready = sk_psock_strp_data_ready;
+	sk->sk_write_space = sk_psock_write_space;
+	parser->enabled = true;
+}
+
+void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
+{
+	struct sk_psock_parser *parser = &psock->parser;
+
+	if (!parser->enabled)
+		return;
+
+	sk->sk_data_ready = parser->saved_data_ready;
+	parser->saved_data_ready = NULL;
+	strp_stop(&parser->strp);
+	parser->enabled = false;
+}

diff --git a/net/core/sock.c b/net/core/sock.c
index 748765e..ac78a57 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -6,7 +7,6 @@
  *		Generic socket support routines. Memory allocators, socket lock/release
  *		handler for protocols to use and generic option handler.
  *
- *
  * Authors:	Ross Biro
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *		Florian La Roche, <flla@stud.uni-sb.de>
@@ -81,12 +81,6 @@
  *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
  *
  * To Fix:
- *
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -137,6 +131,7 @@
 
 #include <linux/filter.h>
 #include <net/sock_reuseport.h>
+#include <net/bpf_sk_storage.h>
 
 #include <trace/events/sock.h>
 
@@ -335,14 +330,68 @@
 }
 EXPORT_SYMBOL(__sk_backlog_rcv);
 
-static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
+static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
 {
-	struct timeval tv;
+	struct __kernel_sock_timeval tv;
+	int size;
 
-	if (optlen < sizeof(tv))
-		return -EINVAL;
-	if (copy_from_user(&tv, optval, sizeof(tv)))
-		return -EFAULT;
+	if (timeo == MAX_SCHEDULE_TIMEOUT) {
+		tv.tv_sec = 0;
+		tv.tv_usec = 0;
+	} else {
+		tv.tv_sec = timeo / HZ;
+		tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ;
+	}
+
+	if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
+		struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec };
+		*(struct old_timeval32 *)optval = tv32;
+		return sizeof(tv32);
+	}
+
+	if (old_timeval) {
+		struct __kernel_old_timeval old_tv;
+		old_tv.tv_sec = tv.tv_sec;
+		old_tv.tv_usec = tv.tv_usec;
+		*(struct __kernel_old_timeval *)optval = old_tv;
+		size = sizeof(old_tv);
+	} else {
+		*(struct __kernel_sock_timeval *)optval = tv;
+		size = sizeof(tv);
+	}
+
+	return size;
+}
+
+static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool old_timeval)
+{
+	struct __kernel_sock_timeval tv;
+
+	if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
+		struct old_timeval32 tv32;
+
+		if (optlen < sizeof(tv32))
+			return -EINVAL;
+
+		if (copy_from_user(&tv32, optval, sizeof(tv32)))
+			return -EFAULT;
+		tv.tv_sec = tv32.tv_sec;
+		tv.tv_usec = tv32.tv_usec;
+	} else if (old_timeval) {
+		struct __kernel_old_timeval old_tv;
+
+		if (optlen < sizeof(old_tv))
+			return -EINVAL;
+		if (copy_from_user(&old_tv, optval, sizeof(old_tv)))
+			return -EFAULT;
+		tv.tv_sec = old_tv.tv_sec;
+		tv.tv_usec = old_tv.tv_usec;
+	} else {
+		if (optlen < sizeof(tv))
+			return -EINVAL;
+		if (copy_from_user(&tv, optval, sizeof(tv)))
+			return -EFAULT;
+	}
 	if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
 		return -EDOM;
 
@@ -360,8 +409,8 @@
 	*timeo_p = MAX_SCHEDULE_TIMEOUT;
 	if (tv.tv_sec == 0 && tv.tv_usec == 0)
 		return 0;
-	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
-		*timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC / HZ);
+	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1))
+		*timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec, USEC_PER_SEC / HZ);
 	return 0;
 }
 
@@ -473,7 +522,7 @@
 		rc = sk_backlog_rcv(sk, skb);
 
 		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
-	} else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
+	} else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
 		bh_unlock_sock(sk);
 		atomic_inc(&sk->sk_drops);
 		goto discard_and_relse;
@@ -520,6 +569,34 @@
 }
 EXPORT_SYMBOL(sk_dst_check);
 
+static int sock_setbindtodevice_locked(struct sock *sk, int ifindex)
+{
+	int ret = -ENOPROTOOPT;
+#ifdef CONFIG_NETDEVICES
+	struct net *net = sock_net(sk);
+
+	/* Sorry... */
+	ret = -EPERM;
+	if (!ns_capable(net->user_ns, CAP_NET_RAW))
+		goto out;
+
+	ret = -EINVAL;
+	if (ifindex < 0)
+		goto out;
+
+	sk->sk_bound_dev_if = ifindex;
+	if (sk->sk_prot->rehash)
+		sk->sk_prot->rehash(sk);
+	sk_dst_reset(sk);
+
+	ret = 0;
+
+out:
+#endif
+
+	return ret;
+}
+
 static int sock_setbindtodevice(struct sock *sk, char __user *optval,
 				int optlen)
 {
@@ -529,11 +606,6 @@
 	char devname[IFNAMSIZ];
 	int index;
 
-	/* Sorry... */
-	ret = -EPERM;
-	if (!ns_capable(net->user_ns, CAP_NET_RAW))
-		goto out;
-
 	ret = -EINVAL;
 	if (optlen < 0)
 		goto out;
@@ -566,12 +638,9 @@
 	}
 
 	lock_sock(sk);
-	sk->sk_bound_dev_if = index;
-	sk_dst_reset(sk);
+	ret = sock_setbindtodevice_locked(sk, index);
 	release_sock(sk);
 
-	ret = 0;
-
 out:
 #endif
 
@@ -698,6 +767,7 @@
 		break;
 	case SO_DONTROUTE:
 		sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
+		sk_dst_reset(sk);
 		break;
 	case SO_BROADCAST:
 		sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
@@ -710,8 +780,13 @@
 		 */
 		val = min_t(u32, val, sysctl_wmem_max);
 set_sndbuf:
+		/* Ensure val * 2 fits into an int, to prevent max_t()
+		 * from treating it as a negative value.
+		 */
+		val = min_t(int, val, INT_MAX / 2);
 		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
-		sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
+		WRITE_ONCE(sk->sk_sndbuf,
+			   max_t(int, val * 2, SOCK_MIN_SNDBUF));
 		/* Wake up sending tasks if we upped the value. */
 		sk->sk_write_space(sk);
 		break;
@@ -721,6 +796,12 @@
 			ret = -EPERM;
 			break;
 		}
+
+		/* No negative values (to prevent underflow, as val will be
+		 * multiplied by 2).
+		 */
+		if (val < 0)
+			val = 0;
 		goto set_sndbuf;
 
 	case SO_RCVBUF:
@@ -731,6 +812,10 @@
 		 */
 		val = min_t(u32, val, sysctl_rmem_max);
 set_rcvbuf:
+		/* Ensure val * 2 fits into an int, to prevent max_t()
+		 * from treating it as a negative value.
+		 */
+		val = min_t(int, val, INT_MAX / 2);
 		sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
 		/*
 		 * We double it on the way in to account for
@@ -747,7 +832,8 @@
 		 * returning the value we actually used in getsockopt
 		 * is the most desirable behavior.
 		 */
-		sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
+		WRITE_ONCE(sk->sk_rcvbuf,
+			   max_t(int, val * 2, SOCK_MIN_RCVBUF));
 		break;
 
 	case SO_RCVBUFFORCE:
@@ -755,6 +841,12 @@
 			ret = -EPERM;
 			break;
 		}
+
+		/* No negative values (to prevent underflow, as val will be
+		 * multiplied by 2).
+		 */
+		if (val < 0)
+			val = 0;
 		goto set_rcvbuf;
 
 	case SO_KEEPALIVE:
@@ -812,10 +904,17 @@
 			clear_bit(SOCK_PASSCRED, &sock->flags);
 		break;
 
-	case SO_TIMESTAMP:
-	case SO_TIMESTAMPNS:
+	case SO_TIMESTAMP_OLD:
+	case SO_TIMESTAMP_NEW:
+	case SO_TIMESTAMPNS_OLD:
+	case SO_TIMESTAMPNS_NEW:
 		if (valbool)  {
-			if (optname == SO_TIMESTAMP)
+			if (optname == SO_TIMESTAMP_NEW || optname == SO_TIMESTAMPNS_NEW)
+				sock_set_flag(sk, SOCK_TSTAMP_NEW);
+			else
+				sock_reset_flag(sk, SOCK_TSTAMP_NEW);
+
+			if (optname == SO_TIMESTAMP_OLD || optname == SO_TIMESTAMP_NEW)
 				sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
 			else
 				sock_set_flag(sk, SOCK_RCVTSTAMPNS);
@@ -824,10 +923,14 @@
 		} else {
 			sock_reset_flag(sk, SOCK_RCVTSTAMP);
 			sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
+			sock_reset_flag(sk, SOCK_TSTAMP_NEW);
 		}
 		break;
 
-	case SO_TIMESTAMPING:
+	case SO_TIMESTAMPING_NEW:
+		sock_set_flag(sk, SOCK_TSTAMP_NEW);
+		/* fall through */
+	case SO_TIMESTAMPING_OLD:
 		if (val & ~SOF_TIMESTAMPING_MASK) {
 			ret = -EINVAL;
 			break;
@@ -858,9 +961,13 @@
 		if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
 			sock_enable_timestamp(sk,
 					      SOCK_TIMESTAMPING_RX_SOFTWARE);
-		else
+		else {
+			if (optname == SO_TIMESTAMPING_NEW)
+				sock_reset_flag(sk, SOCK_TSTAMP_NEW);
+
 			sock_disable_timestamp(sk,
 					       (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
+		}
 		break;
 
 	case SO_RCVLOWAT:
@@ -869,15 +976,17 @@
 		if (sock->ops->set_rcvlowat)
 			ret = sock->ops->set_rcvlowat(sk, val);
 		else
-			sk->sk_rcvlowat = val ? : 1;
+			WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
 		break;
 
-	case SO_RCVTIMEO:
-		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
+	case SO_RCVTIMEO_OLD:
+	case SO_RCVTIMEO_NEW:
+		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen, optname == SO_RCVTIMEO_OLD);
 		break;
 
-	case SO_SNDTIMEO:
-		ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
+	case SO_SNDTIMEO_OLD:
+	case SO_SNDTIMEO_NEW:
+		ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen, optname == SO_SNDTIMEO_OLD);
 		break;
 
 	case SO_ATTACH_FILTER:
@@ -932,6 +1041,10 @@
 		}
 		break;
 
+	case SO_DETACH_REUSEPORT_BPF:
+		ret = reuseport_detach_prog(sk);
+		break;
+
 	case SO_DETACH_FILTER:
 		ret = sk_detach_filter(sk);
 		break;
@@ -950,10 +1063,12 @@
 			clear_bit(SOCK_PASSSEC, &sock->flags);
 		break;
 	case SO_MARK:
-		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
 			ret = -EPERM;
-		else
+		} else if (val != sk->sk_mark) {
 			sk->sk_mark = val;
+			sk_dst_reset(sk);
+		}
 		break;
 
 	case SO_RXQ_OVFL:
@@ -994,17 +1109,25 @@
 #endif
 
 	case SO_MAX_PACING_RATE:
-		if (val != ~0U)
+		{
+		unsigned long ulval = (val == ~0U) ? ~0UL : val;
+
+		if (sizeof(ulval) != sizeof(val) &&
+		    optlen >= sizeof(ulval) &&
+		    get_user(ulval, (unsigned long __user *)optval)) {
+			ret = -EFAULT;
+			break;
+		}
+		if (ulval != ~0UL)
 			cmpxchg(&sk->sk_pacing_status,
 				SK_PACING_NONE,
 				SK_PACING_NEEDED);
-		sk->sk_max_pacing_rate = val;
-		sk->sk_pacing_rate = min(sk->sk_pacing_rate,
-					 sk->sk_max_pacing_rate);
+		sk->sk_max_pacing_rate = ulval;
+		sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
 		break;
-
+		}
 	case SO_INCOMING_CPU:
-		sk->sk_incoming_cpu = val;
+		WRITE_ONCE(sk->sk_incoming_cpu, val);
 		break;
 
 	case SO_CNX_ADVICE:
@@ -1014,7 +1137,10 @@
 
 	case SO_ZEROCOPY:
 		if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
-			if (sk->sk_protocol != IPPROTO_TCP)
+			if (!((sk->sk_type == SOCK_STREAM &&
+			       sk->sk_protocol == IPPROTO_TCP) ||
+			      (sk->sk_type == SOCK_DGRAM &&
+			       sk->sk_protocol == IPPROTO_UDP)))
 				ret = -ENOTSUPP;
 		} else if (sk->sk_family != PF_RDS) {
 			ret = -ENOTSUPP;
@@ -1047,6 +1173,10 @@
 		}
 		break;
 
+	case SO_BINDTOIFINDEX:
+		ret = sock_setbindtodevice_locked(sk, val);
+		break;
+
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -1090,8 +1220,11 @@
 	union {
 		int val;
 		u64 val64;
+		unsigned long ulval;
 		struct linger ling;
-		struct timeval tm;
+		struct old_timeval32 tm32;
+		struct __kernel_old_timeval tm;
+		struct  __kernel_sock_timeval stm;
 		struct sock_txtime txtime;
 	} v;
 
@@ -1178,39 +1311,36 @@
 		sock_warn_obsolete_bsdism("getsockopt");
 		break;
 
-	case SO_TIMESTAMP:
+	case SO_TIMESTAMP_OLD:
 		v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
+				!sock_flag(sk, SOCK_TSTAMP_NEW) &&
 				!sock_flag(sk, SOCK_RCVTSTAMPNS);
 		break;
 
-	case SO_TIMESTAMPNS:
-		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
+	case SO_TIMESTAMPNS_OLD:
+		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW);
 		break;
 
-	case SO_TIMESTAMPING:
+	case SO_TIMESTAMP_NEW:
+		v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW);
+		break;
+
+	case SO_TIMESTAMPNS_NEW:
+		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW);
+		break;
+
+	case SO_TIMESTAMPING_OLD:
 		v.val = sk->sk_tsflags;
 		break;
 
-	case SO_RCVTIMEO:
-		lv = sizeof(struct timeval);
-		if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
-			v.tm.tv_sec = 0;
-			v.tm.tv_usec = 0;
-		} else {
-			v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
-			v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * USEC_PER_SEC) / HZ;
-		}
+	case SO_RCVTIMEO_OLD:
+	case SO_RCVTIMEO_NEW:
+		lv = sock_get_timeout(sk->sk_rcvtimeo, &v, SO_RCVTIMEO_OLD == optname);
 		break;
 
-	case SO_SNDTIMEO:
-		lv = sizeof(struct timeval);
-		if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
-			v.tm.tv_sec = 0;
-			v.tm.tv_usec = 0;
-		} else {
-			v.tm.tv_sec = sk->sk_sndtimeo / HZ;
-			v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * USEC_PER_SEC) / HZ;
-		}
+	case SO_SNDTIMEO_OLD:
+	case SO_SNDTIMEO_NEW:
+		lv = sock_get_timeout(sk->sk_sndtimeo, &v, SO_SNDTIMEO_OLD == optname);
 		break;
 
 	case SO_RCVLOWAT:
@@ -1336,20 +1466,23 @@
 #endif
 
 	case SO_MAX_PACING_RATE:
-		v.val = sk->sk_max_pacing_rate;
+		if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
+			lv = sizeof(v.ulval);
+			v.ulval = sk->sk_max_pacing_rate;
+		} else {
+			/* 32bit version */
+			v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U);
+		}
 		break;
 
 	case SO_INCOMING_CPU:
-		v.val = sk->sk_incoming_cpu;
+		v.val = READ_ONCE(sk->sk_incoming_cpu);
 		break;
 
 	case SO_MEMINFO:
 	{
 		u32 meminfo[SK_MEMINFO_VARS];
 
-		if (get_user(len, optlen))
-			return -EFAULT;
-
 		sk_get_meminfo(sk, meminfo);
 
 		len = min_t(unsigned int, len, sizeof(meminfo));
@@ -1390,6 +1523,10 @@
 				  SOF_TXTIME_REPORT_ERRORS : 0;
 		break;
 
+	case SO_BINDTOIFINDEX:
+		v.val = sk->sk_bound_dev_if;
+		break;
+
 	default:
 		/* We implement the SO_SNDLOWAT etc to not be settable
 		 * (1003.1g 7).
@@ -1462,7 +1599,7 @@
 		sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
 		if (!sk)
 			return sk;
-		if (priority & __GFP_ZERO)
+		if (want_init_on_alloc(priority))
 			sk_prot_clear_nulls(sk, prot->obj_size);
 	} else
 		sk = kmalloc(prot->obj_size, priority);
@@ -1565,11 +1702,13 @@
 		sk_filter_uncharge(sk, filter);
 		RCU_INIT_POINTER(sk->sk_filter, NULL);
 	}
-	if (rcu_access_pointer(sk->sk_reuseport_cb))
-		reuseport_detach_sock(sk);
 
 	sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
 
+#ifdef CONFIG_BPF_SYSCALL
+	bpf_sk_storage_free(sk);
+#endif
+
 	if (atomic_read(&sk->sk_omem_alloc))
 		pr_debug("%s: optmem leakage (%d bytes) detected\n",
 			 __func__, atomic_read(&sk->sk_omem_alloc));
@@ -1589,7 +1728,14 @@
 
 void sk_destruct(struct sock *sk)
 {
-	if (sock_flag(sk, SOCK_RCU_FREE))
+	bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);
+
+	if (rcu_access_pointer(sk->sk_reuseport_cb)) {
+		reuseport_detach_sock(sk);
+		use_call_rcu = true;
+	}
+
+	if (use_call_rcu)
 		call_rcu(&sk->sk_rcu, __sk_destruct);
 	else
 		__sk_destruct(&sk->sk_rcu);
@@ -1713,11 +1859,16 @@
 		}
 		RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
 
+		if (bpf_sk_storage_clone(sk, newsk)) {
+			sk_free_unlock_clone(newsk);
+			newsk = NULL;
+			goto out;
+		}
+
 		newsk->sk_err	   = 0;
 		newsk->sk_err_soft = 0;
 		newsk->sk_priority = 0;
 		newsk->sk_incoming_cpu = raw_smp_processor_id();
-		atomic64_set(&newsk->sk_cookie, 0);
 		if (likely(newsk->sk_net_refcnt))
 			sock_inuse_add(sock_net(newsk), 1);
 
@@ -1741,7 +1892,7 @@
 		 */
 		sk_refcnt_debug_inc(newsk);
 		sk_set_socket(newsk, NULL);
-		newsk->sk_wq = NULL;
+		RCU_INIT_POINTER(newsk->sk_wq, NULL);
 
 		if (newsk->sk_prot->sockets_allocated)
 			sk_sockets_allocated_inc(newsk);
@@ -1851,6 +2002,19 @@
 }
 EXPORT_SYMBOL(skb_set_owner_w);
 
+static bool can_skb_orphan_partial(const struct sk_buff *skb)
+{
+#ifdef CONFIG_TLS_DEVICE
+	/* Drivers depend on in-order delivery for crypto offload,
+	 * partial orphan breaks out-of-order-OK logic.
+	 */
+	if (skb->decrypted)
+		return false;
+#endif
+	return (skb->destructor == sock_wfree ||
+		(IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree));
+}
+
 /* This helper is used by netem, as it can hold packets in its
  * delay queue. We want to allow the owner socket to send more
  * packets, as if they were already TX completed by a typical driver.
@@ -1862,11 +2026,7 @@
 	if (skb_is_tcp_pure_ack(skb))
 		return;
 
-	if (skb->destructor == sock_wfree
-#ifdef CONFIG_INET
-	    || skb->destructor == tcp_wfree
-#endif
-		) {
+	if (can_skb_orphan_partial(skb)) {
 		struct sock *sk = skb->sk;
 
 		if (refcount_inc_not_zero(&sk->sk_refcnt)) {
@@ -1930,8 +2090,10 @@
 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
 			     gfp_t priority)
 {
-	if (force || refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
+	if (force ||
+	    refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) {
 		struct sk_buff *skb = alloc_skb(size, priority);
+
 		if (skb) {
 			skb_set_owner_w(skb, sk);
 			return skb;
@@ -2032,7 +2194,7 @@
 			break;
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-		if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
+		if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
 			break;
 		if (sk->sk_shutdown & SEND_SHUTDOWN)
 			break;
@@ -2067,7 +2229,7 @@
 		if (sk->sk_shutdown & SEND_SHUTDOWN)
 			goto failure;
 
-		if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
+		if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
 			break;
 
 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
@@ -2113,7 +2275,7 @@
 			return -EINVAL;
 		sockc->mark = *(u32 *)CMSG_DATA(cmsg);
 		break;
-	case SO_TIMESTAMPING:
+	case SO_TIMESTAMPING_OLD:
 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
 			return -EINVAL;
 
@@ -2176,13 +2338,14 @@
 	} else {
 		unsigned long *memory_pressure = sk->sk_prot->memory_pressure;
 
-		if (memory_pressure && *memory_pressure)
-			*memory_pressure = 0;
+		if (memory_pressure && READ_ONCE(*memory_pressure))
+			WRITE_ONCE(*memory_pressure, 0);
 	}
 }
 
 /* On 32bit arches, an skb frag is limited to 2^15 */
 #define SKB_FRAG_PAGE_ORDER	get_order(32768)
+DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
 
 /**
  * skb_page_frag_refill - check that a page_frag contains enough room
@@ -2207,7 +2370,8 @@
 	}
 
 	pfrag->offset = 0;
-	if (SKB_FRAG_PAGE_ORDER) {
+	if (SKB_FRAG_PAGE_ORDER &&
+	    !static_branch_unlikely(&net_high_order_alloc_disable_key)) {
 		/* Avoid direct reclaim but allow kswapd to wake */
 		pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
 					  __GFP_COMP | __GFP_NOWARN |
@@ -2238,67 +2402,6 @@
 }
 EXPORT_SYMBOL(sk_page_frag_refill);
 
-int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
-		int sg_start, int *sg_curr_index, unsigned int *sg_curr_size,
-		int first_coalesce)
-{
-	int sg_curr = *sg_curr_index, use = 0, rc = 0;
-	unsigned int size = *sg_curr_size;
-	struct page_frag *pfrag;
-	struct scatterlist *sge;
-
-	len -= size;
-	pfrag = sk_page_frag(sk);
-
-	while (len > 0) {
-		unsigned int orig_offset;
-
-		if (!sk_page_frag_refill(sk, pfrag)) {
-			rc = -ENOMEM;
-			goto out;
-		}
-
-		use = min_t(int, len, pfrag->size - pfrag->offset);
-
-		if (!sk_wmem_schedule(sk, use)) {
-			rc = -ENOMEM;
-			goto out;
-		}
-
-		sk_mem_charge(sk, use);
-		size += use;
-		orig_offset = pfrag->offset;
-		pfrag->offset += use;
-
-		sge = sg + sg_curr - 1;
-		if (sg_curr > first_coalesce && sg_page(sge) == pfrag->page &&
-		    sge->offset + sge->length == orig_offset) {
-			sge->length += use;
-		} else {
-			sge = sg + sg_curr;
-			sg_unmark_end(sge);
-			sg_set_page(sge, pfrag->page, use, orig_offset);
-			get_page(pfrag->page);
-			sg_curr++;
-
-			if (sg_curr == MAX_SKB_FRAGS)
-				sg_curr = 0;
-
-			if (sg_curr == sg_start) {
-				rc = -ENOSPC;
-				break;
-			}
-		}
-
-		len -= use;
-	}
-out:
-	*sg_curr_size = size;
-	*sg_curr_index = sg_curr;
-	return rc;
-}
-EXPORT_SYMBOL(sk_alloc_sg);
-
 static void __lock_sock(struct sock *sk)
 	__releases(&sk->sk_lock.slock)
 	__acquires(&sk->sk_lock.slock)
@@ -2332,7 +2435,7 @@
 			next = skb->next;
 			prefetch(next);
 			WARN_ON_ONCE(skb_dst_is_noref(skb));
-			skb->next = NULL;
+			skb_mark_not_on_list(skb);
 			sk_backlog_rcv(sk, skb);
 
 			cond_resched();
@@ -2432,7 +2535,7 @@
 	}
 
 	if (sk_has_memory_pressure(sk)) {
-		int alloc;
+		u64 alloc;
 
 		if (!sk_under_memory_pressure(sk))
 			return 1;
@@ -2707,7 +2810,7 @@
 	/* Do not wake up a writer until he can make "significant"
 	 * progress.  --DaveM
 	 */
-	if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
+	if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= READ_ONCE(sk->sk_sndbuf)) {
 		wq = rcu_dereference(sk->sk_wq);
 		if (skwq_has_sleeper(wq))
 			wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
@@ -2765,11 +2868,11 @@
 
 	if (sock) {
 		sk->sk_type	=	sock->type;
-		sk->sk_wq	=	sock->wq;
+		RCU_INIT_POINTER(sk->sk_wq, &sock->wq);
 		sock->sk	=	sk;
 		sk->sk_uid	=	SOCK_INODE(sock)->i_uid;
 	} else {
-		sk->sk_wq	=	NULL;
+		RCU_INIT_POINTER(sk->sk_wq, NULL);
 		sk->sk_uid	=	make_kuid(sock_net(sk)->user_ns, 0);
 	}
 
@@ -2803,6 +2906,9 @@
 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
 
 	sk->sk_stamp = SK_DEFAULT_STAMP;
+#if BITS_PER_LONG==32
+	seqlock_init(&sk->sk_stamp_seq);
+#endif
 	atomic_set(&sk->sk_zckey, 0);
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
@@ -2810,8 +2916,8 @@
 	sk->sk_ll_usec		=	sysctl_net_busy_read;
 #endif
 
-	sk->sk_max_pacing_rate = ~0U;
-	sk->sk_pacing_rate = ~0U;
+	sk->sk_max_pacing_rate = ~0UL;
+	sk->sk_pacing_rate = ~0UL;
 	sk->sk_pacing_shift = 10;
 	sk->sk_incoming_cpu = -1;
 
@@ -2897,37 +3003,44 @@
 }
 EXPORT_SYMBOL(lock_sock_fast);
 
-int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
+int sock_gettstamp(struct socket *sock, void __user *userstamp,
+		   bool timeval, bool time32)
 {
-	struct timeval tv;
+	struct sock *sk = sock->sk;
+	struct timespec64 ts;
 
 	sock_enable_timestamp(sk, SOCK_TIMESTAMP);
-	tv = ktime_to_timeval(sk->sk_stamp);
-	if (tv.tv_sec == -1)
-		return -ENOENT;
-	if (tv.tv_sec == 0) {
-		sk->sk_stamp = ktime_get_real();
-		tv = ktime_to_timeval(sk->sk_stamp);
-	}
-	return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
-}
-EXPORT_SYMBOL(sock_get_timestamp);
-
-int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
-{
-	struct timespec ts;
-
-	sock_enable_timestamp(sk, SOCK_TIMESTAMP);
-	ts = ktime_to_timespec(sk->sk_stamp);
+	ts = ktime_to_timespec64(sock_read_timestamp(sk));
 	if (ts.tv_sec == -1)
 		return -ENOENT;
 	if (ts.tv_sec == 0) {
-		sk->sk_stamp = ktime_get_real();
-		ts = ktime_to_timespec(sk->sk_stamp);
+		ktime_t kt = ktime_get_real();
+		sock_write_timestamp(sk, kt);;
+		ts = ktime_to_timespec64(kt);
 	}
-	return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
+
+	if (timeval)
+		ts.tv_nsec /= 1000;
+
+#ifdef CONFIG_COMPAT_32BIT_TIME
+	if (time32)
+		return put_old_timespec32(&ts, userstamp);
+#endif
+#ifdef CONFIG_SPARC64
+	/* beware of padding in sparc64 timeval */
+	if (timeval && !in_compat_syscall()) {
+		struct __kernel_old_timeval __user tv = {
+			.tv_sec = ts.tv_sec,
+			.tv_usec = ts.tv_nsec,
+		};
+		if (copy_to_user(userstamp, &tv, sizeof(tv)))
+			return -EFAULT;
+		return 0;
+	}
+#endif
+	return put_timespec64(&ts, userstamp);
 }
-EXPORT_SYMBOL(sock_get_timestampns);
+EXPORT_SYMBOL(sock_gettstamp);
 
 void sock_enable_timestamp(struct sock *sk, int flag)
 {
@@ -3095,13 +3208,13 @@
 	memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
 
 	mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
-	mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
+	mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
 	mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
-	mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
+	mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
 	mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
-	mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
+	mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
 	mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
-	mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
+	mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
 	mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
 }
 
@@ -3186,16 +3299,17 @@
 
 core_initcall(net_inuse_init);
 
-static void assign_proto_idx(struct proto *prot)
+static int assign_proto_idx(struct proto *prot)
 {
 	prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
 
 	if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
 		pr_err("PROTO_INUSE_NR exhausted\n");
-		return;
+		return -ENOSPC;
 	}
 
 	set_bit(prot->inuse_idx, proto_inuse_idx);
+	return 0;
 }
 
 static void release_proto_idx(struct proto *prot)
@@ -3204,8 +3318,9 @@
 		clear_bit(prot->inuse_idx, proto_inuse_idx);
 }
 #else
-static inline void assign_proto_idx(struct proto *prot)
+static inline int assign_proto_idx(struct proto *prot)
 {
+	return 0;
 }
 
 static inline void release_proto_idx(struct proto *prot)
@@ -3254,6 +3369,8 @@
 
 int proto_register(struct proto *prot, int alloc_slab)
 {
+	int ret = -ENOBUFS;
+
 	if (alloc_slab) {
 		prot->slab = kmem_cache_create_usercopy(prot->name,
 					prot->obj_size, 0,
@@ -3290,20 +3407,27 @@
 	}
 
 	mutex_lock(&proto_list_mutex);
+	ret = assign_proto_idx(prot);
+	if (ret) {
+		mutex_unlock(&proto_list_mutex);
+		goto out_free_timewait_sock_slab_name;
+	}
 	list_add(&prot->node, &proto_list);
-	assign_proto_idx(prot);
 	mutex_unlock(&proto_list_mutex);
-	return 0;
+	return ret;
 
 out_free_timewait_sock_slab_name:
-	kfree(prot->twsk_prot->twsk_slab_name);
+	if (alloc_slab && prot->twsk_prot)
+		kfree(prot->twsk_prot->twsk_slab_name);
 out_free_request_sock_slab:
-	req_prot_cleanup(prot->rsk_prot);
+	if (alloc_slab) {
+		req_prot_cleanup(prot->rsk_prot);
 
-	kmem_cache_destroy(prot->slab);
-	prot->slab = NULL;
+		kmem_cache_destroy(prot->slab);
+		prot->slab = NULL;
+	}
 out:
-	return -ENOBUFS;
+	return ret;
 }
 EXPORT_SYMBOL(proto_register);
 
@@ -3339,6 +3463,7 @@
 
 #ifdef CONFIG_INET
 	if (family == AF_INET &&
+	    protocol != IPPROTO_RAW &&
 	    !rcu_access_pointer(inet_protos[protocol]))
 		return -ENOENT;
 #endif
@@ -3376,7 +3501,7 @@
 	return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
 }
 
-static char *sock_prot_memory_pressure(struct proto *proto)
+static const char *sock_prot_memory_pressure(struct proto *proto)
 {
 	return proto->memory_pressure != NULL ?
 	proto_memory_pressure(proto) ? "yes" : "no" : "NI";
@@ -3475,7 +3600,7 @@
 {
 	struct sock *sk = p;
 
-	return !skb_queue_empty(&sk->sk_receive_queue) ||
+	return !skb_queue_empty_lockless(&sk->sk_receive_queue) ||
 	       sk_busy_loop_timeout(sk, start_time);
 }
 EXPORT_SYMBOL(sk_busy_loop_end);

diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 3312a58..c13ffbd 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c

@@ -19,6 +19,7 @@
 static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
 static DEFINE_MUTEX(sock_diag_table_mutex);
 static struct workqueue_struct *broadcast_wq;
+static atomic64_t cookie_gen;
 
 u64 sock_gen_cookie(struct sock *sk)
 {
@@ -27,7 +28,7 @@
 
 		if (res)
 			return res;
-		res = atomic64_inc_return(&sock_net(sk)->cookie_gen);
+		res = atomic64_inc_return(&cookie_gen);
 		atomic64_cmpxchg(&sk->sk_cookie, 0, res);
 	}
 }

diff --git a/net/core/sock_map.c b/net/core/sock_map.c
new file mode 100644
index 0000000..eb114ee
--- /dev/null
+++ b/net/core/sock_map.c

@@ -0,0 +1,1010 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
+
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/net.h>
+#include <linux/workqueue.h>
+#include <linux/skmsg.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+
+struct bpf_stab {
+	struct bpf_map map;
+	struct sock **sks;
+	struct sk_psock_progs progs;
+	raw_spinlock_t lock;
+};
+
+#define SOCK_CREATE_FLAG_MASK				\
+	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+
+static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
+{
+	struct bpf_stab *stab;
+	u64 cost;
+	int err;
+
+	if (!capable(CAP_NET_ADMIN))
+		return ERR_PTR(-EPERM);
+	if (attr->max_entries == 0 ||
+	    attr->key_size    != 4 ||
+	    attr->value_size  != 4 ||
+	    attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
+		return ERR_PTR(-EINVAL);
+
+	stab = kzalloc(sizeof(*stab), GFP_USER);
+	if (!stab)
+		return ERR_PTR(-ENOMEM);
+
+	bpf_map_init_from_attr(&stab->map, attr);
+	raw_spin_lock_init(&stab->lock);
+
+	/* Make sure page count doesn't overflow. */
+	cost = (u64) stab->map.max_entries * sizeof(struct sock *);
+	err = bpf_map_charge_init(&stab->map.memory, cost);
+	if (err)
+		goto free_stab;
+
+	stab->sks = bpf_map_area_alloc(stab->map.max_entries *
+				       sizeof(struct sock *),
+				       stab->map.numa_node);
+	if (stab->sks)
+		return &stab->map;
+	err = -ENOMEM;
+	bpf_map_charge_finish(&stab->map.memory);
+free_stab:
+	kfree(stab);
+	return ERR_PTR(err);
+}
+
+int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+	u32 ufd = attr->target_fd;
+	struct bpf_map *map;
+	struct fd f;
+	int ret;
+
+	f = fdget(ufd);
+	map = __bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+	ret = sock_map_prog_update(map, prog, attr->attach_type);
+	fdput(f);
+	return ret;
+}
+
+static void sock_map_sk_acquire(struct sock *sk)
+	__acquires(&sk->sk_lock.slock)
+{
+	lock_sock(sk);
+	preempt_disable();
+	rcu_read_lock();
+}
+
+static void sock_map_sk_release(struct sock *sk)
+	__releases(&sk->sk_lock.slock)
+{
+	rcu_read_unlock();
+	preempt_enable();
+	release_sock(sk);
+}
+
+static void sock_map_add_link(struct sk_psock *psock,
+			      struct sk_psock_link *link,
+			      struct bpf_map *map, void *link_raw)
+{
+	link->link_raw = link_raw;
+	link->map = map;
+	spin_lock_bh(&psock->link_lock);
+	list_add_tail(&link->list, &psock->link);
+	spin_unlock_bh(&psock->link_lock);
+}
+
+static void sock_map_del_link(struct sock *sk,
+			      struct sk_psock *psock, void *link_raw)
+{
+	struct sk_psock_link *link, *tmp;
+	bool strp_stop = false;
+
+	spin_lock_bh(&psock->link_lock);
+	list_for_each_entry_safe(link, tmp, &psock->link, list) {
+		if (link->link_raw == link_raw) {
+			struct bpf_map *map = link->map;
+			struct bpf_stab *stab = container_of(map, struct bpf_stab,
+							     map);
+			if (psock->parser.enabled && stab->progs.skb_parser)
+				strp_stop = true;
+			list_del(&link->list);
+			sk_psock_free_link(link);
+		}
+	}
+	spin_unlock_bh(&psock->link_lock);
+	if (strp_stop) {
+		write_lock_bh(&sk->sk_callback_lock);
+		sk_psock_stop_strp(sk, psock);
+		write_unlock_bh(&sk->sk_callback_lock);
+	}
+}
+
+static void sock_map_unref(struct sock *sk, void *link_raw)
+{
+	struct sk_psock *psock = sk_psock(sk);
+
+	if (likely(psock)) {
+		sock_map_del_link(sk, psock, link_raw);
+		sk_psock_put(sk, psock);
+	}
+}
+
+static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
+			 struct sock *sk)
+{
+	struct bpf_prog *msg_parser, *skb_parser, *skb_verdict;
+	bool skb_progs, sk_psock_is_new = false;
+	struct sk_psock *psock;
+	int ret;
+
+	skb_verdict = READ_ONCE(progs->skb_verdict);
+	skb_parser = READ_ONCE(progs->skb_parser);
+	skb_progs = skb_parser && skb_verdict;
+	if (skb_progs) {
+		skb_verdict = bpf_prog_inc_not_zero(skb_verdict);
+		if (IS_ERR(skb_verdict))
+			return PTR_ERR(skb_verdict);
+		skb_parser = bpf_prog_inc_not_zero(skb_parser);
+		if (IS_ERR(skb_parser)) {
+			bpf_prog_put(skb_verdict);
+			return PTR_ERR(skb_parser);
+		}
+	}
+
+	msg_parser = READ_ONCE(progs->msg_parser);
+	if (msg_parser) {
+		msg_parser = bpf_prog_inc_not_zero(msg_parser);
+		if (IS_ERR(msg_parser)) {
+			ret = PTR_ERR(msg_parser);
+			goto out;
+		}
+	}
+
+	psock = sk_psock_get_checked(sk);
+	if (IS_ERR(psock)) {
+		ret = PTR_ERR(psock);
+		goto out_progs;
+	}
+
+	if (psock) {
+		if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) ||
+		    (skb_progs  && READ_ONCE(psock->progs.skb_parser))) {
+			sk_psock_put(sk, psock);
+			ret = -EBUSY;
+			goto out_progs;
+		}
+	} else {
+		psock = sk_psock_init(sk, map->numa_node);
+		if (!psock) {
+			ret = -ENOMEM;
+			goto out_progs;
+		}
+		sk_psock_is_new = true;
+	}
+
+	if (msg_parser)
+		psock_set_prog(&psock->progs.msg_parser, msg_parser);
+	if (sk_psock_is_new) {
+		ret = tcp_bpf_init(sk);
+		if (ret < 0)
+			goto out_drop;
+	} else {
+		tcp_bpf_reinit(sk);
+	}
+
+	write_lock_bh(&sk->sk_callback_lock);
+	if (skb_progs && !psock->parser.enabled) {
+		ret = sk_psock_init_strp(sk, psock);
+		if (ret) {
+			write_unlock_bh(&sk->sk_callback_lock);
+			goto out_drop;
+		}
+		psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
+		psock_set_prog(&psock->progs.skb_parser, skb_parser);
+		sk_psock_start_strp(sk, psock);
+	}
+	write_unlock_bh(&sk->sk_callback_lock);
+	return 0;
+out_drop:
+	sk_psock_put(sk, psock);
+out_progs:
+	if (msg_parser)
+		bpf_prog_put(msg_parser);
+out:
+	if (skb_progs) {
+		bpf_prog_put(skb_verdict);
+		bpf_prog_put(skb_parser);
+	}
+	return ret;
+}
+
+static void sock_map_free(struct bpf_map *map)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+	int i;
+
+	synchronize_rcu();
+	rcu_read_lock();
+	raw_spin_lock_bh(&stab->lock);
+	for (i = 0; i < stab->map.max_entries; i++) {
+		struct sock **psk = &stab->sks[i];
+		struct sock *sk;
+
+		sk = xchg(psk, NULL);
+		if (sk)
+			sock_map_unref(sk, psk);
+	}
+	raw_spin_unlock_bh(&stab->lock);
+	rcu_read_unlock();
+
+	synchronize_rcu();
+
+	bpf_map_area_free(stab->sks);
+	kfree(stab);
+}
+
+static void sock_map_release_progs(struct bpf_map *map)
+{
+	psock_progs_drop(&container_of(map, struct bpf_stab, map)->progs);
+}
+
+static struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	if (unlikely(key >= map->max_entries))
+		return NULL;
+	return READ_ONCE(stab->sks[key]);
+}
+
+static void *sock_map_lookup(struct bpf_map *map, void *key)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
+			     struct sock **psk)
+{
+	struct sock *sk;
+	int err = 0;
+
+	raw_spin_lock_bh(&stab->lock);
+	sk = *psk;
+	if (!sk_test || sk_test == sk)
+		sk = xchg(psk, NULL);
+
+	if (likely(sk))
+		sock_map_unref(sk, psk);
+	else
+		err = -EINVAL;
+
+	raw_spin_unlock_bh(&stab->lock);
+	return err;
+}
+
+static void sock_map_delete_from_link(struct bpf_map *map, struct sock *sk,
+				      void *link_raw)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+
+	__sock_map_delete(stab, sk, link_raw);
+}
+
+static int sock_map_delete_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+	u32 i = *(u32 *)key;
+	struct sock **psk;
+
+	if (unlikely(i >= map->max_entries))
+		return -EINVAL;
+
+	psk = &stab->sks[i];
+	return __sock_map_delete(stab, NULL, psk);
+}
+
+static int sock_map_get_next_key(struct bpf_map *map, void *key, void *next)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+	u32 i = key ? *(u32 *)key : U32_MAX;
+	u32 *key_next = next;
+
+	if (i == stab->map.max_entries - 1)
+		return -ENOENT;
+	if (i >= stab->map.max_entries)
+		*key_next = 0;
+	else
+		*key_next = i + 1;
+	return 0;
+}
+
+static int sock_map_update_common(struct bpf_map *map, u32 idx,
+				  struct sock *sk, u64 flags)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct sk_psock_link *link;
+	struct sk_psock *psock;
+	struct sock *osk;
+	int ret;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	if (unlikely(flags > BPF_EXIST))
+		return -EINVAL;
+	if (unlikely(idx >= map->max_entries))
+		return -E2BIG;
+	if (unlikely(rcu_access_pointer(icsk->icsk_ulp_data)))
+		return -EINVAL;
+
+	link = sk_psock_init_link();
+	if (!link)
+		return -ENOMEM;
+
+	ret = sock_map_link(map, &stab->progs, sk);
+	if (ret < 0)
+		goto out_free;
+
+	psock = sk_psock(sk);
+	WARN_ON_ONCE(!psock);
+
+	raw_spin_lock_bh(&stab->lock);
+	osk = stab->sks[idx];
+	if (osk && flags == BPF_NOEXIST) {
+		ret = -EEXIST;
+		goto out_unlock;
+	} else if (!osk && flags == BPF_EXIST) {
+		ret = -ENOENT;
+		goto out_unlock;
+	}
+
+	sock_map_add_link(psock, link, map, &stab->sks[idx]);
+	stab->sks[idx] = sk;
+	if (osk)
+		sock_map_unref(osk, &stab->sks[idx]);
+	raw_spin_unlock_bh(&stab->lock);
+	return 0;
+out_unlock:
+	raw_spin_unlock_bh(&stab->lock);
+	if (psock)
+		sk_psock_put(sk, psock);
+out_free:
+	sk_psock_free_link(link);
+	return ret;
+}
+
+static bool sock_map_op_okay(const struct bpf_sock_ops_kern *ops)
+{
+	return ops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB ||
+	       ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB;
+}
+
+static bool sock_map_sk_is_suitable(const struct sock *sk)
+{
+	return sk->sk_type == SOCK_STREAM &&
+	       sk->sk_protocol == IPPROTO_TCP;
+}
+
+static int sock_map_update_elem(struct bpf_map *map, void *key,
+				void *value, u64 flags)
+{
+	u32 ufd = *(u32 *)value;
+	u32 idx = *(u32 *)key;
+	struct socket *sock;
+	struct sock *sk;
+	int ret;
+
+	sock = sockfd_lookup(ufd, &ret);
+	if (!sock)
+		return ret;
+	sk = sock->sk;
+	if (!sk) {
+		ret = -EINVAL;
+		goto out;
+	}
+	if (!sock_map_sk_is_suitable(sk) ||
+	    sk->sk_state != TCP_ESTABLISHED) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	sock_map_sk_acquire(sk);
+	ret = sock_map_update_common(map, idx, sk, flags);
+	sock_map_sk_release(sk);
+out:
+	fput(sock->file);
+	return ret;
+}
+
+BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, sops,
+	   struct bpf_map *, map, void *, key, u64, flags)
+{
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	if (likely(sock_map_sk_is_suitable(sops->sk) &&
+		   sock_map_op_okay(sops)))
+		return sock_map_update_common(map, *(u32 *)key, sops->sk,
+					      flags);
+	return -EOPNOTSUPP;
+}
+
+const struct bpf_func_proto bpf_sock_map_update_proto = {
+	.func		= bpf_sock_map_update,
+	.gpl_only	= false,
+	.pkt_access	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_CONST_MAP_PTR,
+	.arg3_type	= ARG_PTR_TO_MAP_KEY,
+	.arg4_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
+	   struct bpf_map *, map, u32, key, u64, flags)
+{
+	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+
+	if (unlikely(flags & ~(BPF_F_INGRESS)))
+		return SK_DROP;
+	tcb->bpf.flags = flags;
+	tcb->bpf.sk_redir = __sock_map_lookup_elem(map, key);
+	if (!tcb->bpf.sk_redir)
+		return SK_DROP;
+	return SK_PASS;
+}
+
+const struct bpf_func_proto bpf_sk_redirect_map_proto = {
+	.func           = bpf_sk_redirect_map,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_CONST_MAP_PTR,
+	.arg3_type      = ARG_ANYTHING,
+	.arg4_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg *, msg,
+	   struct bpf_map *, map, u32, key, u64, flags)
+{
+	if (unlikely(flags & ~(BPF_F_INGRESS)))
+		return SK_DROP;
+	msg->flags = flags;
+	msg->sk_redir = __sock_map_lookup_elem(map, key);
+	if (!msg->sk_redir)
+		return SK_DROP;
+	return SK_PASS;
+}
+
+const struct bpf_func_proto bpf_msg_redirect_map_proto = {
+	.func           = bpf_msg_redirect_map,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_CONST_MAP_PTR,
+	.arg3_type      = ARG_ANYTHING,
+	.arg4_type      = ARG_ANYTHING,
+};
+
+const struct bpf_map_ops sock_map_ops = {
+	.map_alloc		= sock_map_alloc,
+	.map_free		= sock_map_free,
+	.map_get_next_key	= sock_map_get_next_key,
+	.map_update_elem	= sock_map_update_elem,
+	.map_delete_elem	= sock_map_delete_elem,
+	.map_lookup_elem	= sock_map_lookup,
+	.map_release_uref	= sock_map_release_progs,
+	.map_check_btf		= map_check_no_btf,
+};
+
+struct bpf_htab_elem {
+	struct rcu_head rcu;
+	u32 hash;
+	struct sock *sk;
+	struct hlist_node node;
+	u8 key[0];
+};
+
+struct bpf_htab_bucket {
+	struct hlist_head head;
+	raw_spinlock_t lock;
+};
+
+struct bpf_htab {
+	struct bpf_map map;
+	struct bpf_htab_bucket *buckets;
+	u32 buckets_num;
+	u32 elem_size;
+	struct sk_psock_progs progs;
+	atomic_t count;
+};
+
+static inline u32 sock_hash_bucket_hash(const void *key, u32 len)
+{
+	return jhash(key, len, 0);
+}
+
+static struct bpf_htab_bucket *sock_hash_select_bucket(struct bpf_htab *htab,
+						       u32 hash)
+{
+	return &htab->buckets[hash & (htab->buckets_num - 1)];
+}
+
+static struct bpf_htab_elem *
+sock_hash_lookup_elem_raw(struct hlist_head *head, u32 hash, void *key,
+			  u32 key_size)
+{
+	struct bpf_htab_elem *elem;
+
+	hlist_for_each_entry_rcu(elem, head, node) {
+		if (elem->hash == hash &&
+		    !memcmp(&elem->key, key, key_size))
+			return elem;
+	}
+
+	return NULL;
+}
+
+static struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	u32 key_size = map->key_size, hash;
+	struct bpf_htab_bucket *bucket;
+	struct bpf_htab_elem *elem;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	hash = sock_hash_bucket_hash(key, key_size);
+	bucket = sock_hash_select_bucket(htab, hash);
+	elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
+
+	return elem ? elem->sk : NULL;
+}
+
+static void sock_hash_free_elem(struct bpf_htab *htab,
+				struct bpf_htab_elem *elem)
+{
+	atomic_dec(&htab->count);
+	kfree_rcu(elem, rcu);
+}
+
+static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
+				       void *link_raw)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	struct bpf_htab_elem *elem_probe, *elem = link_raw;
+	struct bpf_htab_bucket *bucket;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	bucket = sock_hash_select_bucket(htab, elem->hash);
+
+	/* elem may be deleted in parallel from the map, but access here
+	 * is okay since it's going away only after RCU grace period.
+	 * However, we need to check whether it's still present.
+	 */
+	raw_spin_lock_bh(&bucket->lock);
+	elem_probe = sock_hash_lookup_elem_raw(&bucket->head, elem->hash,
+					       elem->key, map->key_size);
+	if (elem_probe && elem_probe == elem) {
+		hlist_del_rcu(&elem->node);
+		sock_map_unref(elem->sk, elem);
+		sock_hash_free_elem(htab, elem);
+	}
+	raw_spin_unlock_bh(&bucket->lock);
+}
+
+static int sock_hash_delete_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	u32 hash, key_size = map->key_size;
+	struct bpf_htab_bucket *bucket;
+	struct bpf_htab_elem *elem;
+	int ret = -ENOENT;
+
+	hash = sock_hash_bucket_hash(key, key_size);
+	bucket = sock_hash_select_bucket(htab, hash);
+
+	raw_spin_lock_bh(&bucket->lock);
+	elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
+	if (elem) {
+		hlist_del_rcu(&elem->node);
+		sock_map_unref(elem->sk, elem);
+		sock_hash_free_elem(htab, elem);
+		ret = 0;
+	}
+	raw_spin_unlock_bh(&bucket->lock);
+	return ret;
+}
+
+static struct bpf_htab_elem *sock_hash_alloc_elem(struct bpf_htab *htab,
+						  void *key, u32 key_size,
+						  u32 hash, struct sock *sk,
+						  struct bpf_htab_elem *old)
+{
+	struct bpf_htab_elem *new;
+
+	if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
+		if (!old) {
+			atomic_dec(&htab->count);
+			return ERR_PTR(-E2BIG);
+		}
+	}
+
+	new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN,
+			   htab->map.numa_node);
+	if (!new) {
+		atomic_dec(&htab->count);
+		return ERR_PTR(-ENOMEM);
+	}
+	memcpy(new->key, key, key_size);
+	new->sk = sk;
+	new->hash = hash;
+	return new;
+}
+
+static int sock_hash_update_common(struct bpf_map *map, void *key,
+				   struct sock *sk, u64 flags)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	u32 key_size = map->key_size, hash;
+	struct bpf_htab_elem *elem, *elem_new;
+	struct bpf_htab_bucket *bucket;
+	struct sk_psock_link *link;
+	struct sk_psock *psock;
+	int ret;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	if (unlikely(flags > BPF_EXIST))
+		return -EINVAL;
+	if (unlikely(icsk->icsk_ulp_data))
+		return -EINVAL;
+
+	link = sk_psock_init_link();
+	if (!link)
+		return -ENOMEM;
+
+	ret = sock_map_link(map, &htab->progs, sk);
+	if (ret < 0)
+		goto out_free;
+
+	psock = sk_psock(sk);
+	WARN_ON_ONCE(!psock);
+
+	hash = sock_hash_bucket_hash(key, key_size);
+	bucket = sock_hash_select_bucket(htab, hash);
+
+	raw_spin_lock_bh(&bucket->lock);
+	elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
+	if (elem && flags == BPF_NOEXIST) {
+		ret = -EEXIST;
+		goto out_unlock;
+	} else if (!elem && flags == BPF_EXIST) {
+		ret = -ENOENT;
+		goto out_unlock;
+	}
+
+	elem_new = sock_hash_alloc_elem(htab, key, key_size, hash, sk, elem);
+	if (IS_ERR(elem_new)) {
+		ret = PTR_ERR(elem_new);
+		goto out_unlock;
+	}
+
+	sock_map_add_link(psock, link, map, elem_new);
+	/* Add new element to the head of the list, so that
+	 * concurrent search will find it before old elem.
+	 */
+	hlist_add_head_rcu(&elem_new->node, &bucket->head);
+	if (elem) {
+		hlist_del_rcu(&elem->node);
+		sock_map_unref(elem->sk, elem);
+		sock_hash_free_elem(htab, elem);
+	}
+	raw_spin_unlock_bh(&bucket->lock);
+	return 0;
+out_unlock:
+	raw_spin_unlock_bh(&bucket->lock);
+	sk_psock_put(sk, psock);
+out_free:
+	sk_psock_free_link(link);
+	return ret;
+}
+
+static int sock_hash_update_elem(struct bpf_map *map, void *key,
+				 void *value, u64 flags)
+{
+	u32 ufd = *(u32 *)value;
+	struct socket *sock;
+	struct sock *sk;
+	int ret;
+
+	sock = sockfd_lookup(ufd, &ret);
+	if (!sock)
+		return ret;
+	sk = sock->sk;
+	if (!sk) {
+		ret = -EINVAL;
+		goto out;
+	}
+	if (!sock_map_sk_is_suitable(sk) ||
+	    sk->sk_state != TCP_ESTABLISHED) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	sock_map_sk_acquire(sk);
+	ret = sock_hash_update_common(map, key, sk, flags);
+	sock_map_sk_release(sk);
+out:
+	fput(sock->file);
+	return ret;
+}
+
+static int sock_hash_get_next_key(struct bpf_map *map, void *key,
+				  void *key_next)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	struct bpf_htab_elem *elem, *elem_next;
+	u32 hash, key_size = map->key_size;
+	struct hlist_head *head;
+	int i = 0;
+
+	if (!key)
+		goto find_first_elem;
+	hash = sock_hash_bucket_hash(key, key_size);
+	head = &sock_hash_select_bucket(htab, hash)->head;
+	elem = sock_hash_lookup_elem_raw(head, hash, key, key_size);
+	if (!elem)
+		goto find_first_elem;
+
+	elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&elem->node)),
+				     struct bpf_htab_elem, node);
+	if (elem_next) {
+		memcpy(key_next, elem_next->key, key_size);
+		return 0;
+	}
+
+	i = hash & (htab->buckets_num - 1);
+	i++;
+find_first_elem:
+	for (; i < htab->buckets_num; i++) {
+		head = &sock_hash_select_bucket(htab, i)->head;
+		elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
+					     struct bpf_htab_elem, node);
+		if (elem_next) {
+			memcpy(key_next, elem_next->key, key_size);
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+
+static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
+{
+	struct bpf_htab *htab;
+	int i, err;
+	u64 cost;
+
+	if (!capable(CAP_NET_ADMIN))
+		return ERR_PTR(-EPERM);
+	if (attr->max_entries == 0 ||
+	    attr->key_size    == 0 ||
+	    attr->value_size  != 4 ||
+	    attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
+		return ERR_PTR(-EINVAL);
+	if (attr->key_size > MAX_BPF_STACK)
+		return ERR_PTR(-E2BIG);
+
+	htab = kzalloc(sizeof(*htab), GFP_USER);
+	if (!htab)
+		return ERR_PTR(-ENOMEM);
+
+	bpf_map_init_from_attr(&htab->map, attr);
+
+	htab->buckets_num = roundup_pow_of_two(htab->map.max_entries);
+	htab->elem_size = sizeof(struct bpf_htab_elem) +
+			  round_up(htab->map.key_size, 8);
+	if (htab->buckets_num == 0 ||
+	    htab->buckets_num > U32_MAX / sizeof(struct bpf_htab_bucket)) {
+		err = -EINVAL;
+		goto free_htab;
+	}
+
+	cost = (u64) htab->buckets_num * sizeof(struct bpf_htab_bucket) +
+	       (u64) htab->elem_size * htab->map.max_entries;
+	if (cost >= U32_MAX - PAGE_SIZE) {
+		err = -EINVAL;
+		goto free_htab;
+	}
+
+	htab->buckets = bpf_map_area_alloc(htab->buckets_num *
+					   sizeof(struct bpf_htab_bucket),
+					   htab->map.numa_node);
+	if (!htab->buckets) {
+		err = -ENOMEM;
+		goto free_htab;
+	}
+
+	for (i = 0; i < htab->buckets_num; i++) {
+		INIT_HLIST_HEAD(&htab->buckets[i].head);
+		raw_spin_lock_init(&htab->buckets[i].lock);
+	}
+
+	return &htab->map;
+free_htab:
+	kfree(htab);
+	return ERR_PTR(err);
+}
+
+static void sock_hash_free(struct bpf_map *map)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	struct bpf_htab_bucket *bucket;
+	struct bpf_htab_elem *elem;
+	struct hlist_node *node;
+	int i;
+
+	synchronize_rcu();
+	rcu_read_lock();
+	for (i = 0; i < htab->buckets_num; i++) {
+		bucket = sock_hash_select_bucket(htab, i);
+		raw_spin_lock_bh(&bucket->lock);
+		hlist_for_each_entry_safe(elem, node, &bucket->head, node) {
+			hlist_del_rcu(&elem->node);
+			sock_map_unref(elem->sk, elem);
+		}
+		raw_spin_unlock_bh(&bucket->lock);
+	}
+	rcu_read_unlock();
+
+	bpf_map_area_free(htab->buckets);
+	kfree(htab);
+}
+
+static void sock_hash_release_progs(struct bpf_map *map)
+{
+	psock_progs_drop(&container_of(map, struct bpf_htab, map)->progs);
+}
+
+BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, sops,
+	   struct bpf_map *, map, void *, key, u64, flags)
+{
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	if (likely(sock_map_sk_is_suitable(sops->sk) &&
+		   sock_map_op_okay(sops)))
+		return sock_hash_update_common(map, key, sops->sk, flags);
+	return -EOPNOTSUPP;
+}
+
+const struct bpf_func_proto bpf_sock_hash_update_proto = {
+	.func		= bpf_sock_hash_update,
+	.gpl_only	= false,
+	.pkt_access	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_CONST_MAP_PTR,
+	.arg3_type	= ARG_PTR_TO_MAP_KEY,
+	.arg4_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb,
+	   struct bpf_map *, map, void *, key, u64, flags)
+{
+	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+
+	if (unlikely(flags & ~(BPF_F_INGRESS)))
+		return SK_DROP;
+	tcb->bpf.flags = flags;
+	tcb->bpf.sk_redir = __sock_hash_lookup_elem(map, key);
+	if (!tcb->bpf.sk_redir)
+		return SK_DROP;
+	return SK_PASS;
+}
+
+const struct bpf_func_proto bpf_sk_redirect_hash_proto = {
+	.func           = bpf_sk_redirect_hash,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_CONST_MAP_PTR,
+	.arg3_type      = ARG_PTR_TO_MAP_KEY,
+	.arg4_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg *, msg,
+	   struct bpf_map *, map, void *, key, u64, flags)
+{
+	if (unlikely(flags & ~(BPF_F_INGRESS)))
+		return SK_DROP;
+	msg->flags = flags;
+	msg->sk_redir = __sock_hash_lookup_elem(map, key);
+	if (!msg->sk_redir)
+		return SK_DROP;
+	return SK_PASS;
+}
+
+const struct bpf_func_proto bpf_msg_redirect_hash_proto = {
+	.func           = bpf_msg_redirect_hash,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_CONST_MAP_PTR,
+	.arg3_type      = ARG_PTR_TO_MAP_KEY,
+	.arg4_type      = ARG_ANYTHING,
+};
+
+const struct bpf_map_ops sock_hash_ops = {
+	.map_alloc		= sock_hash_alloc,
+	.map_free		= sock_hash_free,
+	.map_get_next_key	= sock_hash_get_next_key,
+	.map_update_elem	= sock_hash_update_elem,
+	.map_delete_elem	= sock_hash_delete_elem,
+	.map_lookup_elem	= sock_map_lookup,
+	.map_release_uref	= sock_hash_release_progs,
+	.map_check_btf		= map_check_no_btf,
+};
+
+static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
+{
+	switch (map->map_type) {
+	case BPF_MAP_TYPE_SOCKMAP:
+		return &container_of(map, struct bpf_stab, map)->progs;
+	case BPF_MAP_TYPE_SOCKHASH:
+		return &container_of(map, struct bpf_htab, map)->progs;
+	default:
+		break;
+	}
+
+	return NULL;
+}
+
+int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
+			 u32 which)
+{
+	struct sk_psock_progs *progs = sock_map_progs(map);
+
+	if (!progs)
+		return -EOPNOTSUPP;
+
+	switch (which) {
+	case BPF_SK_MSG_VERDICT:
+		psock_set_prog(&progs->msg_parser, prog);
+		break;
+	case BPF_SK_SKB_STREAM_PARSER:
+		psock_set_prog(&progs->skb_parser, prog);
+		break;
+	case BPF_SK_SKB_STREAM_VERDICT:
+		psock_set_prog(&progs->skb_verdict, prog);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link)
+{
+	switch (link->map->map_type) {
+	case BPF_MAP_TYPE_SOCKMAP:
+		return sock_map_delete_from_link(link->map, sk,
+						 link->link_raw);
+	case BPF_MAP_TYPE_SOCKHASH:
+		return sock_hash_delete_from_link(link->map, sk,
+						  link->link_raw);
+	default:
+		break;
+	}
+}

diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index ba5cba5..f3ceec9 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c

@@ -144,6 +144,8 @@
  *  reuseport_add_sock - Add a socket to the reuseport group of another.
  *  @sk:  New socket to add to the group.
  *  @sk2: Socket belonging to the existing reuseport group.
+ *  @bind_inany: Whether or not the group is bound to a local INANY address.
+ *
  *  May return ENOMEM and not add socket to group under memory pressure.
  */
 int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
@@ -187,6 +189,7 @@
 		call_rcu(&old_reuse->rcu, reuseport_free_rcu);
 	return 0;
 }
+EXPORT_SYMBOL(reuseport_add_sock);
 
 void reuseport_detach_sock(struct sock *sk)
 {
@@ -292,8 +295,19 @@
 
 select_by_hash:
 		/* no bpf or invalid bpf result: fall back to hash usage */
-		if (!sk2)
-			sk2 = reuse->socks[reciprocal_scale(hash, socks)];
+		if (!sk2) {
+			int i, j;
+
+			i = j = reciprocal_scale(hash, socks);
+			while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
+				i++;
+				if (i >= reuse->num_socks)
+					i = 0;
+				if (i == j)
+					goto out;
+			}
+			sk2 = reuse->socks[i];
+		}
 	}
 
 out:
@@ -329,3 +343,27 @@
 	return 0;
 }
 EXPORT_SYMBOL(reuseport_attach_prog);
+
+int reuseport_detach_prog(struct sock *sk)
+{
+	struct sock_reuseport *reuse;
+	struct bpf_prog *old_prog;
+
+	if (!rcu_access_pointer(sk->sk_reuseport_cb))
+		return sk->sk_reuseport ? -ENOENT : -EINVAL;
+
+	old_prog = NULL;
+	spin_lock_bh(&reuseport_lock);
+	reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
+					  lockdep_is_held(&reuseport_lock));
+	rcu_swap_protected(reuse->prog, old_prog,
+			   lockdep_is_held(&reuseport_lock));
+	spin_unlock_bh(&reuseport_lock);
+
+	if (!old_prog)
+		return -ENOENT;
+
+	sk_reuseport_prog_free(old_prog);
+	return 0;
+}
+EXPORT_SYMBOL(reuseport_detach_prog);

diff --git a/net/core/stream.c b/net/core/stream.c
index 7d329fb..4f1d4aa 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c

@@ -32,7 +32,7 @@
 	struct socket *sock = sk->sk_socket;
 	struct socket_wq *wq;
 
-	if (sk_stream_is_writeable(sk) && sock) {
+	if (__sk_stream_is_writeable(sk, 1) && sock) {
 		clear_bit(SOCK_NOSPACE, &sock->flags);
 
 		rcu_read_lock();
@@ -120,7 +120,6 @@
 	int err = 0;
 	long vm_wait = 0;
 	long current_timeo = *timeo_p;
-	bool noblock = (*timeo_p ? false : true);
 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
 	if (sk_stream_memory_free(sk))
@@ -133,11 +132,8 @@
 
 		if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
 			goto do_error;
-		if (!*timeo_p) {
-			if (noblock)
-				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-			goto do_nonblock;
-		}
+		if (!*timeo_p)
+			goto do_eagain;
 		if (signal_pending(current))
 			goto do_interrupted;
 		sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
@@ -169,7 +165,13 @@
 do_error:
 	err = -EPIPE;
 	goto out;
-do_nonblock:
+do_eagain:
+	/* Make sure that whenever EAGAIN is returned, EPOLLOUT event can
+	 * be generated later.
+	 * When TCP receives ACK packets that make room, tcp_check_space()
+	 * only calls tcp_new_space() if SOCK_NOSPACE is set.
+	 */
+	set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 	err = -EAGAIN;
 	goto out;
 do_interrupted:

diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b1a2c5e..eb29e5a 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c

@@ -22,18 +22,27 @@
 #include <net/busy_poll.h>
 #include <net/pkt_sched.h>
 
-static int zero = 0;
-static int one = 1;
 static int two __maybe_unused = 2;
 static int min_sndbuf = SOCK_MIN_SNDBUF;
 static int min_rcvbuf = SOCK_MIN_RCVBUF;
 static int max_skb_frags = MAX_SKB_FRAGS;
+static long long_one __maybe_unused = 1;
+static long long_max __maybe_unused = LONG_MAX;
 
 static int net_msg_warn;	/* Unused, but still a sysctl */
 
 int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0;
 EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net);
 
+/* 0 - Keep current behavior:
+ *     IPv4: inherit all current settings from init_net
+ *     IPv6: reset all settings to default
+ * 1 - Both inherit all current settings from init_net
+ * 2 - Both reset all settings to default
+ */
+int sysctl_devconf_inherit_init_net __read_mostly;
+EXPORT_SYMBOL(sysctl_devconf_inherit_init_net);
+
 #ifdef CONFIG_RPS
 static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
 				void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -84,12 +93,12 @@
 		if (sock_table != orig_sock_table) {
 			rcu_assign_pointer(rps_sock_flow_table, sock_table);
 			if (sock_table) {
-				static_key_slow_inc(&rps_needed);
-				static_key_slow_inc(&rfs_needed);
+				static_branch_inc(&rps_needed);
+				static_branch_inc(&rfs_needed);
 			}
 			if (orig_sock_table) {
-				static_key_slow_dec(&rps_needed);
-				static_key_slow_dec(&rfs_needed);
+				static_branch_dec(&rps_needed);
+				static_branch_dec(&rfs_needed);
 				synchronize_rcu();
 				vfree(orig_sock_table);
 			}
@@ -279,7 +288,6 @@
 	return ret;
 }
 
-# ifdef CONFIG_HAVE_EBPF_JIT
 static int
 proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
 				    void __user *buffer, size_t *lenp,
@@ -290,7 +298,17 @@
 
 	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 }
-# endif
+
+static int
+proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write,
+				     void __user *buffer, size_t *lenp,
+				     loff_t *ppos)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
+}
 #endif
 
 static struct ctl_table net_core_table[] = {
@@ -370,10 +388,10 @@
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax_bpf_enable,
 # ifdef CONFIG_BPF_JIT_ALWAYS_ON
-		.extra1		= &one,
-		.extra2		= &one,
+		.extra1		= SYSCTL_ONE,
+		.extra2		= SYSCTL_ONE,
 # else
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 		.extra2		= &two,
 # endif
 	},
@@ -384,7 +402,7 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0600,
 		.proc_handler	= proc_dointvec_minmax_bpf_restricted,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 		.extra2		= &two,
 	},
 	{
@@ -393,10 +411,19 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0600,
 		.proc_handler	= proc_dointvec_minmax_bpf_restricted,
-		.extra1		= &zero,
-		.extra2		= &one,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
 	},
 # endif
+	{
+		.procname	= "bpf_jit_limit",
+		.data		= &bpf_jit_limit,
+		.maxlen		= sizeof(long),
+		.mode		= 0600,
+		.proc_handler	= proc_dolongvec_minmax_bpf_restricted,
+		.extra1		= &long_one,
+		.extra2		= &long_max,
+	},
 #endif
 	{
 		.procname	= "netdev_tstamp_prequeue",
@@ -432,8 +459,8 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &one
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE
 	},
 #ifdef CONFIG_RPS
 	{
@@ -464,7 +491,7 @@
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 	},
 	{
 		.procname	= "busy_read",
@@ -472,7 +499,7 @@
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 	},
 #endif
 #ifdef CONFIG_NET_SCHED
@@ -504,7 +531,7 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
+		.extra1		= SYSCTL_ONE,
 		.extra2		= &max_skb_frags,
 	},
 	{
@@ -513,7 +540,7 @@
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 	},
 	{
 		.procname	= "fb_tunnels_only_for_init_net",
@@ -521,8 +548,32 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &one,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "devconf_inherit_init_net",
+		.data		= &sysctl_devconf_inherit_init_net,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &two,
+	},
+	{
+		.procname	= "high_order_alloc_disable",
+		.data		= &net_high_order_alloc_disable_key.key,
+		.maxlen         = sizeof(net_high_order_alloc_disable_key),
+		.mode		= 0644,
+		.proc_handler	= proc_do_static_key,
+	},
+	{
+		.procname	= "gro_normal_batch",
+		.data		= &gro_normal_batch,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ONE,
 	},
 	{ }
 };
@@ -533,7 +584,7 @@
 		.data		= &init_net.core.sysctl_somaxconn,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 		.proc_handler	= proc_dointvec_minmax
 	},
 	{ }

diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 42689d5..7911235 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c

@@ -1,21 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * PTP 1588 clock support - support for timestamping in PHY devices
  *
  * Copyright (C) 2010 OMICRON electronics GmbH
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 #include <linux/errqueue.h>
 #include <linux/phy.h>

diff --git a/net/core/tso.c b/net/core/tso.c
index 43f4eba..d4d5c07 100644
--- a/net/core/tso.c
+++ b/net/core/tso.c

@@ -55,8 +55,8 @@
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
 
 		/* Move to next segment */
-		tso->size = frag->size;
-		tso->data = page_address(frag->page.p) + frag->page_offset;
+		tso->size = skb_frag_size(frag);
+		tso->data = skb_frag_address(frag);
 		tso->next_frag_idx++;
 	}
 }
@@ -79,8 +79,8 @@
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
 
 		/* Move to next segment */
-		tso->size = frag->size;
-		tso->data = page_address(frag->page.p) + frag->page_offset;
+		tso->size = skb_frag_size(frag);
+		tso->data = skb_frag_address(frag);
 		tso->next_frag_idx++;
 	}
 }

diff --git a/net/core/utils.c b/net/core/utils.c
index 2a597ac..6b6e51d 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Generic address resultion entity
  *
@@ -7,11 +8,6 @@
  *	in{4,6}_pton YOSHIFUJI Hideaki, Copyright (C)2006 USAGI/WIDE Project
  *
  *	Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 #include <linux/module.h>

diff --git a/net/core/xdp.c b/net/core/xdp.c
index 89b6785..d7bf62f 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c

@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* net/core/xdp.c
  *
  * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
- * Released under terms in GPL version 2.  See COPYING.
  */
 #include <linux/bpf.h>
 #include <linux/filter.h>
@@ -14,6 +14,8 @@
 #include <net/page_pool.h>
 
 #include <net/xdp.h>
+#include <net/xdp_priv.h> /* struct xdp_mem_allocator */
+#include <trace/events/xdp.h>
 
 #define REG_STATE_NEW		0x0
 #define REG_STATE_REGISTERED	0x1
@@ -29,17 +31,6 @@
 static bool mem_id_init; /* false */
 static struct rhashtable *mem_id_ht;
 
-struct xdp_mem_allocator {
-	struct xdp_mem_info mem;
-	union {
-		void *allocator;
-		struct page_pool *page_pool;
-		struct zero_copy_allocator *zc_alloc;
-	};
-	struct rhash_head node;
-	struct rcu_head rcu;
-};
-
 static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
 {
 	const u32 *k = data;
@@ -79,13 +70,13 @@
 
 	xa = container_of(rcu, struct xdp_mem_allocator, rcu);
 
+	/* Allocator have indicated safe to remove before this is called */
+	if (xa->mem.type == MEM_TYPE_PAGE_POOL)
+		page_pool_free(xa->page_pool);
+
 	/* Allow this ID to be reused */
 	ida_simple_remove(&mem_id_pool, xa->mem.id);
 
-	/* Notice, driver is expected to free the *allocator,
-	 * e.g. page_pool, and MUST also use RCU free.
-	 */
-
 	/* Poison memory */
 	xa->mem.id = 0xFFFF;
 	xa->mem.type = 0xF0F0;
@@ -94,23 +85,106 @@
 	kfree(xa);
 }
 
-static void __xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
+static bool __mem_id_disconnect(int id, bool force)
 {
 	struct xdp_mem_allocator *xa;
-	int id = xdp_rxq->mem.id;
-
-	if (id == 0)
-		return;
+	bool safe_to_remove = true;
 
 	mutex_lock(&mem_id_lock);
 
 	xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
-	if (xa && !rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
+	if (!xa) {
+		mutex_unlock(&mem_id_lock);
+		WARN(1, "Request remove non-existing id(%d), driver bug?", id);
+		return true;
+	}
+	xa->disconnect_cnt++;
+
+	/* Detects in-flight packet-pages for page_pool */
+	if (xa->mem.type == MEM_TYPE_PAGE_POOL)
+		safe_to_remove = page_pool_request_shutdown(xa->page_pool);
+
+	trace_mem_disconnect(xa, safe_to_remove, force);
+
+	if ((safe_to_remove || force) &&
+	    !rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
 		call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
 
 	mutex_unlock(&mem_id_lock);
+	return (safe_to_remove|force);
 }
 
+#define DEFER_TIME (msecs_to_jiffies(1000))
+#define DEFER_WARN_INTERVAL (30 * HZ)
+#define DEFER_MAX_RETRIES 120
+
+static void mem_id_disconnect_defer_retry(struct work_struct *wq)
+{
+	struct delayed_work *dwq = to_delayed_work(wq);
+	struct xdp_mem_allocator *xa = container_of(dwq, typeof(*xa), defer_wq);
+	bool force = false;
+
+	if (xa->disconnect_cnt > DEFER_MAX_RETRIES)
+		force = true;
+
+	if (__mem_id_disconnect(xa->mem.id, force))
+		return;
+
+	/* Periodic warning */
+	if (time_after_eq(jiffies, xa->defer_warn)) {
+		int sec = (s32)((u32)jiffies - (u32)xa->defer_start) / HZ;
+
+		pr_warn("%s() stalled mem.id=%u shutdown %d attempts %d sec\n",
+			__func__, xa->mem.id, xa->disconnect_cnt, sec);
+		xa->defer_warn = jiffies + DEFER_WARN_INTERVAL;
+	}
+
+	/* Still not ready to be disconnected, retry later */
+	schedule_delayed_work(&xa->defer_wq, DEFER_TIME);
+}
+
+void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
+{
+	struct xdp_mem_allocator *xa;
+	int id = xdp_rxq->mem.id;
+
+	if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
+		WARN(1, "Missing register, driver bug");
+		return;
+	}
+
+	if (xdp_rxq->mem.type != MEM_TYPE_PAGE_POOL &&
+	    xdp_rxq->mem.type != MEM_TYPE_ZERO_COPY) {
+		return;
+	}
+
+	if (id == 0)
+		return;
+
+	if (__mem_id_disconnect(id, false))
+		return;
+
+	/* Could not disconnect, defer new disconnect attempt to later */
+	mutex_lock(&mem_id_lock);
+
+	xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
+	if (!xa) {
+		mutex_unlock(&mem_id_lock);
+		return;
+	}
+	xa->defer_start = jiffies;
+	xa->defer_warn  = jiffies + DEFER_WARN_INTERVAL;
+
+	INIT_DELAYED_WORK(&xa->defer_wq, mem_id_disconnect_defer_retry);
+	mutex_unlock(&mem_id_lock);
+	schedule_delayed_work(&xa->defer_wq, DEFER_TIME);
+}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model);
+
+/* This unregister operation will also cleanup and destroy the
+ * allocator. The page_pool_free() operation is first called when it's
+ * safe to remove, possibly deferred to a workqueue.
+ */
 void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
 {
 	/* Simplify driver cleanup code paths, allow unreg "unused" */
@@ -119,7 +193,7 @@
 
 	WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG");
 
-	__xdp_rxq_info_unreg_mem_model(xdp_rxq);
+	xdp_rxq_info_unreg_mem_model(xdp_rxq);
 
 	xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
 	xdp_rxq->dev = NULL;
@@ -290,12 +364,18 @@
 	/* Insert allocator into ID lookup table */
 	ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
 	if (IS_ERR(ptr)) {
+		ida_simple_remove(&mem_id_pool, xdp_rxq->mem.id);
+		xdp_rxq->mem.id = 0;
 		errno = PTR_ERR(ptr);
 		goto err;
 	}
 
+	if (type == MEM_TYPE_PAGE_POOL)
+		page_pool_get(xdp_alloc->page_pool);
+
 	mutex_unlock(&mem_id_lock);
 
+	trace_mem_connect(xdp_alloc, xdp_rxq);
 	return 0;
 err:
 	mutex_unlock(&mem_id_lock);
@@ -322,10 +402,13 @@
 		/* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
 		xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
 		page = virt_to_head_page(data);
-		if (xa) {
+		if (likely(xa)) {
 			napi_direct &= !xdp_return_frame_no_direct();
 			page_pool_put_page(xa->page_pool, page, napi_direct);
 		} else {
+			/* Hopefully stack show who to blame for late return */
+			WARN_ONCE(1, "page_pool gone mem.id=%d", mem->id);
+			trace_mem_return_failed(mem, page);
 			put_page(page);
 		}
 		rcu_read_unlock();
@@ -368,6 +451,21 @@
 }
 EXPORT_SYMBOL_GPL(xdp_return_buff);
 
+/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */
+void __xdp_release_frame(void *data, struct xdp_mem_info *mem)
+{
+	struct xdp_mem_allocator *xa;
+	struct page *page;
+
+	rcu_read_lock();
+	xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
+	page = virt_to_head_page(data);
+	if (xa)
+		page_pool_release_page(xa->page_pool, page);
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(__xdp_release_frame);
+
 int xdp_attachment_query(struct xdp_attachment_info *info,
 			 struct netdev_bpf *bpf)
 {
@@ -398,3 +496,41 @@
 	info->flags = bpf->flags;
 }
 EXPORT_SYMBOL_GPL(xdp_attachment_setup);
+
+struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp)
+{
+	unsigned int metasize, totsize;
+	void *addr, *data_to_copy;
+	struct xdp_frame *xdpf;
+	struct page *page;
+
+	/* Clone into a MEM_TYPE_PAGE_ORDER0 xdp_frame. */
+	metasize = xdp_data_meta_unsupported(xdp) ? 0 :
+		   xdp->data - xdp->data_meta;
+	totsize = xdp->data_end - xdp->data + metasize;
+
+	if (sizeof(*xdpf) + totsize > PAGE_SIZE)
+		return NULL;
+
+	page = dev_alloc_page();
+	if (!page)
+		return NULL;
+
+	addr = page_to_virt(page);
+	xdpf = addr;
+	memset(xdpf, 0, sizeof(*xdpf));
+
+	addr += sizeof(*xdpf);
+	data_to_copy = metasize ? xdp->data_meta : xdp->data;
+	memcpy(addr, data_to_copy, totsize);
+
+	xdpf->data = addr + metasize;
+	xdpf->len = totsize - metasize;
+	xdpf->headroom = 0;
+	xdpf->metasize = metasize;
+	xdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
+
+	xdp_return_buff(xdp);
+	return xdpf;
+}
+EXPORT_SYMBOL_GPL(xdp_convert_zc_to_xdp_frame);

diff --git a/net/dcb/Kconfig b/net/dcb/Kconfig
index 4066d59..917e6e7 100644
--- a/net/dcb/Kconfig
+++ b/net/dcb/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config DCB
 	bool "Data Center Bridging support"
 	default n

diff --git a/net/dcb/Makefile b/net/dcb/Makefile
index c1282c9..3016e5a 100644
--- a/net/dcb/Makefile
+++ b/net/dcb/Makefile

@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_DCB) += dcbnl.o dcbevent.o

diff --git a/net/dcb/dcbevent.c b/net/dcb/dcbevent.c
index a520d80..8620564 100644
--- a/net/dcb/dcbevent.c
+++ b/net/dcb/dcbevent.c

@@ -1,18 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2010, Intel Corporation.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
  * Author: John Fastabend <john.r.fastabend@intel.com>
  */
 

diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index a556cd7..d2a4553 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c

@@ -1,18 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008-2011, Intel Corporation.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
  * Description: Data Center Bridging netlink interface
  * Author: Lucy Liu <lucy.liu@intel.com>
  */
@@ -241,12 +230,13 @@
 	if (!netdev->dcbnl_ops->getpfccfg)
 		return -EOPNOTSUPP;
 
-	ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX,
-			       tb[DCB_ATTR_PFC_CFG], dcbnl_pfc_up_nest, NULL);
+	ret = nla_parse_nested_deprecated(data, DCB_PFC_UP_ATTR_MAX,
+					  tb[DCB_ATTR_PFC_CFG],
+					  dcbnl_pfc_up_nest, NULL);
 	if (ret)
 		return ret;
 
-	nest = nla_nest_start(skb, DCB_ATTR_PFC_CFG);
+	nest = nla_nest_start_noflag(skb, DCB_ATTR_PFC_CFG);
 	if (!nest)
 		return -EMSGSIZE;
 
@@ -299,12 +289,13 @@
 	if (!netdev->dcbnl_ops->getcap)
 		return -EOPNOTSUPP;
 
-	ret = nla_parse_nested(data, DCB_CAP_ATTR_MAX, tb[DCB_ATTR_CAP],
-			       dcbnl_cap_nest, NULL);
+	ret = nla_parse_nested_deprecated(data, DCB_CAP_ATTR_MAX,
+					  tb[DCB_ATTR_CAP], dcbnl_cap_nest,
+					  NULL);
 	if (ret)
 		return ret;
 
-	nest = nla_nest_start(skb, DCB_ATTR_CAP);
+	nest = nla_nest_start_noflag(skb, DCB_ATTR_CAP);
 	if (!nest)
 		return -EMSGSIZE;
 
@@ -343,12 +334,13 @@
 	if (!netdev->dcbnl_ops->getnumtcs)
 		return -EOPNOTSUPP;
 
-	ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS],
-			       dcbnl_numtcs_nest, NULL);
+	ret = nla_parse_nested_deprecated(data, DCB_NUMTCS_ATTR_MAX,
+					  tb[DCB_ATTR_NUMTCS],
+					  dcbnl_numtcs_nest, NULL);
 	if (ret)
 		return ret;
 
-	nest = nla_nest_start(skb, DCB_ATTR_NUMTCS);
+	nest = nla_nest_start_noflag(skb, DCB_ATTR_NUMTCS);
 	if (!nest)
 		return -EMSGSIZE;
 
@@ -388,8 +380,9 @@
 	if (!netdev->dcbnl_ops->setnumtcs)
 		return -EOPNOTSUPP;
 
-	ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS],
-			       dcbnl_numtcs_nest, NULL);
+	ret = nla_parse_nested_deprecated(data, DCB_NUMTCS_ATTR_MAX,
+					  tb[DCB_ATTR_NUMTCS],
+					  dcbnl_numtcs_nest, NULL);
 	if (ret)
 		return ret;
 
@@ -447,8 +440,9 @@
 	if (!tb[DCB_ATTR_APP])
 		return -EINVAL;
 
-	ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP],
-			       dcbnl_app_nest, NULL);
+	ret = nla_parse_nested_deprecated(app_tb, DCB_APP_ATTR_MAX,
+					  tb[DCB_ATTR_APP], dcbnl_app_nest,
+					  NULL);
 	if (ret)
 		return ret;
 
@@ -479,7 +473,7 @@
 		up = dcb_getapp(netdev, &app);
 	}
 
-	app_nest = nla_nest_start(skb, DCB_ATTR_APP);
+	app_nest = nla_nest_start_noflag(skb, DCB_ATTR_APP);
 	if (!app_nest)
 		return -EMSGSIZE;
 
@@ -515,8 +509,9 @@
 	if (!tb[DCB_ATTR_APP])
 		return -EINVAL;
 
-	ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP],
-			       dcbnl_app_nest, NULL);
+	ret = nla_parse_nested_deprecated(app_tb, DCB_APP_ATTR_MAX,
+					  tb[DCB_ATTR_APP], dcbnl_app_nest,
+					  NULL);
 	if (ret)
 		return ret;
 
@@ -573,12 +568,13 @@
 	    !netdev->dcbnl_ops->getpgbwgcfgrx)
 		return -EOPNOTSUPP;
 
-	ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, tb[DCB_ATTR_PG_CFG],
-			       dcbnl_pg_nest, NULL);
+	ret = nla_parse_nested_deprecated(pg_tb, DCB_PG_ATTR_MAX,
+					  tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest,
+					  NULL);
 	if (ret)
 		return ret;
 
-	pg_nest = nla_nest_start(skb, DCB_ATTR_PG_CFG);
+	pg_nest = nla_nest_start_noflag(skb, DCB_ATTR_PG_CFG);
 	if (!pg_nest)
 		return -EMSGSIZE;
 
@@ -593,12 +589,13 @@
 			data = pg_tb[DCB_PG_ATTR_TC_ALL];
 		else
 			data = pg_tb[i];
-		ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX, data,
-				       dcbnl_tc_param_nest, NULL);
+		ret = nla_parse_nested_deprecated(param_tb,
+						  DCB_TC_ATTR_PARAM_MAX, data,
+						  dcbnl_tc_param_nest, NULL);
 		if (ret)
 			goto err_pg;
 
-		param_nest = nla_nest_start(skb, i);
+		param_nest = nla_nest_start_noflag(skb, i);
 		if (!param_nest)
 			goto err_pg;
 
@@ -730,8 +727,9 @@
 	if (!netdev->dcbnl_ops->setpfccfg)
 		return -EOPNOTSUPP;
 
-	ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX,
-			       tb[DCB_ATTR_PFC_CFG], dcbnl_pfc_up_nest, NULL);
+	ret = nla_parse_nested_deprecated(data, DCB_PFC_UP_ATTR_MAX,
+					  tb[DCB_ATTR_PFC_CFG],
+					  dcbnl_pfc_up_nest, NULL);
 	if (ret)
 		return ret;
 
@@ -786,8 +784,9 @@
 	    !netdev->dcbnl_ops->setpgbwgcfgrx)
 		return -EOPNOTSUPP;
 
-	ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, tb[DCB_ATTR_PG_CFG],
-			       dcbnl_pg_nest, NULL);
+	ret = nla_parse_nested_deprecated(pg_tb, DCB_PG_ATTR_MAX,
+					  tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest,
+					  NULL);
 	if (ret)
 		return ret;
 
@@ -795,8 +794,10 @@
 		if (!pg_tb[i])
 			continue;
 
-		ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX,
-				       pg_tb[i], dcbnl_tc_param_nest, NULL);
+		ret = nla_parse_nested_deprecated(param_tb,
+						  DCB_TC_ATTR_PARAM_MAX,
+						  pg_tb[i],
+						  dcbnl_tc_param_nest, NULL);
 		if (ret)
 			return ret;
 
@@ -884,12 +885,13 @@
 	    !netdev->dcbnl_ops->getbcncfg)
 		return -EOPNOTSUPP;
 
-	ret = nla_parse_nested(bcn_tb, DCB_BCN_ATTR_MAX, tb[DCB_ATTR_BCN],
-			       dcbnl_bcn_nest, NULL);
+	ret = nla_parse_nested_deprecated(bcn_tb, DCB_BCN_ATTR_MAX,
+					  tb[DCB_ATTR_BCN], dcbnl_bcn_nest,
+					  NULL);
 	if (ret)
 		return ret;
 
-	bcn_nest = nla_nest_start(skb, DCB_ATTR_BCN);
+	bcn_nest = nla_nest_start_noflag(skb, DCB_ATTR_BCN);
 	if (!bcn_nest)
 		return -EMSGSIZE;
 
@@ -943,8 +945,9 @@
 	    !netdev->dcbnl_ops->setbcnrp)
 		return -EOPNOTSUPP;
 
-	ret = nla_parse_nested(data, DCB_BCN_ATTR_MAX, tb[DCB_ATTR_BCN],
-			       dcbnl_pfc_up_nest, NULL);
+	ret = nla_parse_nested_deprecated(data, DCB_BCN_ATTR_MAX,
+					  tb[DCB_ATTR_BCN], dcbnl_pfc_up_nest,
+					  NULL);
 	if (ret)
 		return ret;
 
@@ -1002,7 +1005,7 @@
 		 */
 		err = -EMSGSIZE;
 
-		app = nla_nest_start(skb, app_nested_type);
+		app = nla_nest_start_noflag(skb, app_nested_type);
 		if (!app)
 			goto nla_put_failure;
 
@@ -1036,7 +1039,7 @@
 	if (nla_put_string(skb, DCB_ATTR_IFNAME, netdev->name))
 		return -EMSGSIZE;
 
-	ieee = nla_nest_start(skb, DCB_ATTR_IEEE);
+	ieee = nla_nest_start_noflag(skb, DCB_ATTR_IEEE);
 	if (!ieee)
 		return -EMSGSIZE;
 
@@ -1106,7 +1109,7 @@
 			return -EMSGSIZE;
 	}
 
-	app = nla_nest_start(skb, DCB_ATTR_IEEE_APP_TABLE);
+	app = nla_nest_start_noflag(skb, DCB_ATTR_IEEE_APP_TABLE);
 	if (!app)
 		return -EMSGSIZE;
 
@@ -1174,13 +1177,13 @@
 	u8 pgid, up_map, prio, tc_pct;
 	const struct dcbnl_rtnl_ops *ops = dev->dcbnl_ops;
 	int i = dir ? DCB_ATTR_CEE_TX_PG : DCB_ATTR_CEE_RX_PG;
-	struct nlattr *pg = nla_nest_start(skb, i);
+	struct nlattr *pg = nla_nest_start_noflag(skb, i);
 
 	if (!pg)
 		return -EMSGSIZE;
 
 	for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) {
-		struct nlattr *tc_nest = nla_nest_start(skb, i);
+		struct nlattr *tc_nest = nla_nest_start_noflag(skb, i);
 
 		if (!tc_nest)
 			return -EMSGSIZE;
@@ -1231,7 +1234,7 @@
 
 	if (nla_put_string(skb, DCB_ATTR_IFNAME, netdev->name))
 		goto nla_put_failure;
-	cee = nla_nest_start(skb, DCB_ATTR_CEE);
+	cee = nla_nest_start_noflag(skb, DCB_ATTR_CEE);
 	if (!cee)
 		goto nla_put_failure;
 
@@ -1250,7 +1253,8 @@
 
 	/* local pfc */
 	if (ops->getpfccfg) {
-		struct nlattr *pfc_nest = nla_nest_start(skb, DCB_ATTR_CEE_PFC);
+		struct nlattr *pfc_nest = nla_nest_start_noflag(skb,
+								DCB_ATTR_CEE_PFC);
 
 		if (!pfc_nest)
 			goto nla_put_failure;
@@ -1265,14 +1269,14 @@
 
 	/* local app */
 	spin_lock_bh(&dcb_lock);
-	app = nla_nest_start(skb, DCB_ATTR_CEE_APP_TABLE);
+	app = nla_nest_start_noflag(skb, DCB_ATTR_CEE_APP_TABLE);
 	if (!app)
 		goto dcb_unlock;
 
 	list_for_each_entry(itr, &dcb_app_list, list) {
 		if (itr->ifindex == netdev->ifindex) {
-			struct nlattr *app_nest = nla_nest_start(skb,
-								 DCB_ATTR_APP);
+			struct nlattr *app_nest = nla_nest_start_noflag(skb,
+									DCB_ATTR_APP);
 			if (!app_nest)
 				goto dcb_unlock;
 
@@ -1305,7 +1309,8 @@
 
 	/* features flags */
 	if (ops->getfeatcfg) {
-		struct nlattr *feat = nla_nest_start(skb, DCB_ATTR_CEE_FEAT);
+		struct nlattr *feat = nla_nest_start_noflag(skb,
+							    DCB_ATTR_CEE_FEAT);
 		if (!feat)
 			goto nla_put_failure;
 
@@ -1429,8 +1434,9 @@
 	if (!tb[DCB_ATTR_IEEE])
 		return -EINVAL;
 
-	err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, tb[DCB_ATTR_IEEE],
-			       dcbnl_ieee_policy, NULL);
+	err = nla_parse_nested_deprecated(ieee, DCB_ATTR_IEEE_MAX,
+					  tb[DCB_ATTR_IEEE],
+					  dcbnl_ieee_policy, NULL);
 	if (err)
 		return err;
 
@@ -1529,8 +1535,9 @@
 	if (!tb[DCB_ATTR_IEEE])
 		return -EINVAL;
 
-	err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, tb[DCB_ATTR_IEEE],
-			       dcbnl_ieee_policy, NULL);
+	err = nla_parse_nested_deprecated(ieee, DCB_ATTR_IEEE_MAX,
+					  tb[DCB_ATTR_IEEE],
+					  dcbnl_ieee_policy, NULL);
 	if (err)
 		return err;
 
@@ -1602,12 +1609,13 @@
 	if (!tb[DCB_ATTR_FEATCFG])
 		return -EINVAL;
 
-	ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX,
-			       tb[DCB_ATTR_FEATCFG], dcbnl_featcfg_nest, NULL);
+	ret = nla_parse_nested_deprecated(data, DCB_FEATCFG_ATTR_MAX,
+					  tb[DCB_ATTR_FEATCFG],
+					  dcbnl_featcfg_nest, NULL);
 	if (ret)
 		return ret;
 
-	nest = nla_nest_start(skb, DCB_ATTR_FEATCFG);
+	nest = nla_nest_start_noflag(skb, DCB_ATTR_FEATCFG);
 	if (!nest)
 		return -EMSGSIZE;
 
@@ -1646,8 +1654,9 @@
 	if (!tb[DCB_ATTR_FEATCFG])
 		return -EINVAL;
 
-	ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX,
-			       tb[DCB_ATTR_FEATCFG], dcbnl_featcfg_nest, NULL);
+	ret = nla_parse_nested_deprecated(data, DCB_FEATCFG_ATTR_MAX,
+					  tb[DCB_ATTR_FEATCFG],
+					  dcbnl_featcfg_nest, NULL);
 
 	if (ret)
 		goto err;
@@ -1736,8 +1745,8 @@
 	if ((nlh->nlmsg_type == RTM_SETDCB) && !netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
-	ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX,
-			  dcbnl_rtnl_policy, extack);
+	ret = nlmsg_parse_deprecated(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX,
+				     dcbnl_rtnl_policy, extack);
 	if (ret < 0)
 		return ret;
 

diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index b270e84..f7c7495 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 menuconfig IP_DCCP
 	tristate "The DCCP Protocol"
 	depends on INET

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 2a24f7d..0a72510 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  net/dccp/ackvec.c
  *
  *  An implementation of Ack Vectors for the DCCP protocol
  *  Copyright (c) 2007 University of Aberdeen, Scotland, UK
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
- *
- *      This program is free software; you can redistribute it and/or modify it
- *      under the terms of the GNU General Public License as published by the
- *      Free Software Foundation; version 2 of the License;
  */
 #include "dccp.h"
 #include <linux/kernel.h>

diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 3284bfa..d2c4220 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 #ifndef _ACKVEC_H
 #define _ACKVEC_H
 /*
@@ -6,9 +7,6 @@
  *  An implementation of Ack Vectors for the DCCP protocol
  *  Copyright (c) 2007 University of Aberdeen, Scotland, UK
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com>
- *	This program is free software; you can redistribute it and/or modify it
- *	under the terms of the GNU General Public License version 2 as
- *	published by the Free Software Foundation.
  */
 
 #include <linux/dccp.h>

diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index 90f77d0..1e9bb12 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  net/dccp/ccid.c
  *
@@ -5,10 +6,6 @@
  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
  *
  *  CCID infrastructure
- *
- *	This program is free software; you can redistribute it and/or modify it
- *	under the terms of the GNU General Public License version 2 as
- *	published by the Free Software Foundation.
  */
 
 #include <linux/slab.h>

diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 6eb837a..70f88f2 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 #ifndef _CCID_H
 #define _CCID_H
 /*
@@ -7,10 +8,6 @@
  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
  *
  *  CCID infrastructure
- *
- *	This program is free software; you can redistribute it and/or modify it
- *	under the terms of the GNU General Public License version 2 as
- *	published by the Free Software Foundation.
  */
 
 #include <net/sock.h>
@@ -202,7 +199,7 @@
 static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
 					   u8 pkt, u8 opt, u8 *val, u8 len)
 {
-	if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL)
+	if (!ccid || !ccid->ccid_ops->ccid_hc_tx_parse_options)
 		return 0;
 	return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len);
 }
@@ -214,7 +211,7 @@
 static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
 					   u8 pkt, u8 opt, u8 *val, u8 len)
 {
-	if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL)
+	if (!ccid || !ccid->ccid_ops->ccid_hc_rx_parse_options)
 		return 0;
 	return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len);
 }

diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 8ba3fc9..4a358e6 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 menu "DCCP CCIDs Configuration"
 
 config IP_DCCP_CCID2_DEBUG

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 842a9c7..3da1f77 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c

@@ -1,23 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  Copyright (c) 2005, 2006 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
  *
  *  Changes to meet Linux coding standards, and DCCP infrastructure fixes.
  *
  *  Copyright (c) 2006 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 /*

diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 1af0116..330c7b4 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h

@@ -1,19 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  *  Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 #ifndef _DCCP_CCID2_H_
 #define _DCCP_CCID2_H_

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 12877a1..9ef9bee 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  Copyright (c) 2007   The University of Aberdeen, Scotland, UK
  *  Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
@@ -17,20 +18,6 @@
  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
  *
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 #include "../dccp.h"
 #include "ccid3.h"

diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 813d91c..081c195 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  *  Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
  *  Copyright (c) 2007   The University of Aberdeen, Scotland, UK
@@ -17,20 +18,6 @@
  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
  *
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 #ifndef _DCCP_CCID3_H_
 #define _DCCP_CCID3_H_

diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 57f9fd7..67abad6 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  Copyright (c) 2007   The University of Aberdeen, Scotland, UK
  *  Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
  *  Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
  */
 #include <net/sock.h>
 #include "tfrc.h"

diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index 57f631a..c3d95f8 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 #ifndef _DCCP_LI_HIST_
 #define _DCCP_LI_HIST_
 /*
@@ -5,11 +6,6 @@
  *  Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
  *  Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
  */
 #include <linux/ktime.h>
 #include <linux/list.h>

diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 876e185..2d41bb0 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  Copyright (c) 2007   The University of Aberdeen, Scotland, UK
  *  Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
@@ -17,20 +18,6 @@
  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
  *
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #include <linux/string.h>

diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index ee362b0..a157d87 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  *  Packet RX/TX history data structures and routines for TFRC-based protocols.
  *
@@ -17,20 +18,6 @@
  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
  *
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #ifndef _DCCP_PKT_HIST_

diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index 40ee7d6..0a63e87 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 #ifndef _TFRC_H_
 #define _TFRC_H_
 /*
@@ -6,11 +7,6 @@
  *  Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *  Copyright (c) 2005   Arnaldo Carvalho de Melo <acme@conectiva.com.br>
  *  Copyright (c) 2003   Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
  */
 #include <linux/types.h>
 #include <linux/math64.h>

diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index 88ef982..e2a337f 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
  *  Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
  *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
  */
 
 #include <linux/module.h>

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index f91e381..9c3b27c 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 #ifndef _DCCP_H
 #define _DCCP_H
 /*
@@ -6,10 +7,6 @@
  *  An implementation of the DCCP protocol
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
  *  Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
- *
- *	This program is free software; you can redistribute it and/or modify it
- *	under the terms of the GNU General Public License version 2 as
- *	published by the Free Software Foundation.
  */
 
 #include <linux/dccp.h>

diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index 2d84303..73ef73a 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  net/dccp/diag.c
  *
  *  An implementation of the DCCP protocol
  *  Arnaldo Carvalho de Melo <acme@mandriva.com>
- *
- *	This program is free software; you can redistribute it and/or modify it
- *	under the terms of the GNU General Public License version 2 as
- *	published by the Free Software Foundation.
  */
 
 

diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index f227f00..9c3b5e0 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  net/dccp/feat.c
  *
@@ -7,7 +8,6 @@
  *  Rewrote from scratch, some bits from earlier code by
  *  Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
  *
- *
  *  ASSUMPTIONS
  *  -----------
  *  o Feature negotiation is coordinated with connection setup (as in TCP), wild
@@ -16,11 +16,6 @@
  *  o All currently known SP features have 1-byte quantities. If in the future
  *    extensions of RFCs 4340..42 define features with item lengths larger than
  *    one byte, a feature-specific extension of the code will be required.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
  */
 #include <linux/module.h>
 #include <linux/slab.h>
@@ -738,7 +733,12 @@
 	if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len))
 		return -ENOMEM;
 
-	return dccp_feat_push_change(fn, feat, is_local, mandatory, &fval);
+	if (dccp_feat_push_change(fn, feat, is_local, mandatory, &fval)) {
+		kfree(fval.sp.vec);
+		return -ENOMEM;
+	}
+
+	return 0;
 }
 
 /**

diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index 0e75ceb..d76c9be 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 #ifndef _DCCP_FEAT_H
 #define _DCCP_FEAT_H
 /*
@@ -6,10 +7,6 @@
  *  Feature negotiation for the DCCP protocol (RFC 4340, section 6)
  *  Copyright (c) 2008 Gerrit Renker <gerrit@erg.abdn.ac.uk>
  *  Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
  */
 #include <linux/types.h>
 #include "dccp.h"

diff --git a/net/dccp/input.c b/net/dccp/input.c
index 85d6c87..6dce68a 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  net/dccp/input.c
  *
  *  An implementation of the DCCP protocol
  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/dccp.h>
@@ -480,7 +476,7 @@
 			sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
 		}
 
-		if (sk->sk_write_pending || icsk->icsk_ack.pingpong ||
+		if (sk->sk_write_pending || inet_csk_in_pingpong_mode(sk) ||
 		    icsk->icsk_accept_queue.rskq_defer_accept) {
 			/* Save one ACK. Data will be ready after
 			 * several ticks, if write_pending is set.

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 8e08cea..d19557c 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  net/dccp/ipv4.c
  *
  *  An implementation of the DCCP protocol
  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/dccp.h>
@@ -121,7 +117,7 @@
 						    inet->inet_daddr,
 						    inet->inet_sport,
 						    inet->inet_dport);
-	inet->inet_id = dp->dccps_iss ^ jiffies;
+	inet->inet_id = prandom_u32();
 
 	err = dccp_connect(sk);
 	rt = NULL;
@@ -231,7 +227,7 @@
  * check at all. A more general error queue to queue errors for later handling
  * is probably better.
  */
-static void dccp_v4_err(struct sk_buff *skb, u32 info)
+static int dccp_v4_err(struct sk_buff *skb, u32 info)
 {
 	const struct iphdr *iph = (struct iphdr *)skb->data;
 	const u8 offset = iph->ihl << 2;
@@ -259,16 +255,18 @@
 				       inet_iif(skb), 0);
 	if (!sk) {
 		__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
-		return;
+		return -ENOENT;
 	}
 
 	if (sk->sk_state == DCCP_TIME_WAIT) {
 		inet_twsk_put(inet_twsk(sk));
-		return;
+		return 0;
 	}
 	seq = dccp_hdr_seq(dh);
-	if (sk->sk_state == DCCP_NEW_SYN_RECV)
-		return dccp_req_err(sk, seq);
+	if (sk->sk_state == DCCP_NEW_SYN_RECV) {
+		dccp_req_err(sk, seq);
+		return 0;
+	}
 
 	bh_lock_sock(sk);
 	/* If too many ICMPs get dropped on busy
@@ -357,6 +355,7 @@
 out:
 	bh_unlock_sock(sk);
 	sock_put(sk);
+	return 0;
 }
 
 static inline __sum16 dccp_v4_csum_finish(struct sk_buff *skb,
@@ -417,7 +416,7 @@
 	RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
 	newinet->mc_index  = inet_iif(skb);
 	newinet->mc_ttl	   = ip_hdr(skb)->ttl;
-	newinet->inet_id   = jiffies;
+	newinet->inet_id   = prandom_u32();
 
 	if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
 		goto put_and_exit;
@@ -872,7 +871,7 @@
 
 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_and_relse;
-	nf_reset(skb);
+	nf_reset_ct(skb);
 
 	return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted);
 
@@ -988,6 +987,7 @@
 	/* FIXME: work on tcp_poll to rename it to inet_csk_poll */
 	.poll		   = dccp_poll,
 	.ioctl		   = inet_ioctl,
+	.gettstamp	   = sock_gettstamp,
 	/* FIXME: work on inet_listen to rename it to sock_common_listen */
 	.listen		   = inet_dccp_listen,
 	.shutdown	   = inet_shutdown,

diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6344f1b..25aab67 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	DCCP over IPv6
  *	Linux INET6 implementation
@@ -5,11 +6,6 @@
  *	Based on net/dccp6/ipv6.c
  *
  *	Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -68,7 +64,7 @@
 
 }
 
-static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			u8 type, u8 code, int offset, __be32 info)
 {
 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
@@ -96,16 +92,18 @@
 	if (!sk) {
 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
 				  ICMP6_MIB_INERRORS);
-		return;
+		return -ENOENT;
 	}
 
 	if (sk->sk_state == DCCP_TIME_WAIT) {
 		inet_twsk_put(inet_twsk(sk));
-		return;
+		return 0;
 	}
 	seq = dccp_hdr_seq(dh);
-	if (sk->sk_state == DCCP_NEW_SYN_RECV)
-		return dccp_req_err(sk, seq);
+	if (sk->sk_state == DCCP_NEW_SYN_RECV) {
+		dccp_req_err(sk, seq);
+		return 0;
+	}
 
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk))
@@ -183,6 +181,7 @@
 out:
 	bh_unlock_sock(sk);
 	sock_put(sk);
+	return 0;
 }
 
 
@@ -231,7 +230,8 @@
 		opt = ireq->ipv6_opt;
 		if (!opt)
 			opt = rcu_dereference(np->opt);
-		err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass);
+		err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass,
+			       sk->sk_priority);
 		rcu_read_unlock();
 		err = net_xmit_eval(err);
 	}
@@ -285,7 +285,7 @@
 	dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
 	if (!IS_ERR(dst)) {
 		skb_dst_set(skb, dst);
-		ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0);
+		ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0, 0);
 		DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
 		DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
 		return;
@@ -433,8 +433,8 @@
 		newnp->ipv6_mc_list = NULL;
 		newnp->ipv6_ac_list = NULL;
 		newnp->ipv6_fl_list = NULL;
-		newnp->mcast_oif   = inet6_iif(skb);
-		newnp->mcast_hops  = ipv6_hdr(skb)->hop_limit;
+		newnp->mcast_oif   = inet_iif(skb);
+		newnp->mcast_hops  = ip_hdr(skb)->ttl;
 
 		/*
 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
@@ -831,7 +831,7 @@
 		if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) {
 			struct ip6_flowlabel *flowlabel;
 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
-			if (flowlabel == NULL)
+			if (IS_ERR(flowlabel))
 				return -EINVAL;
 			fl6_sock_release(flowlabel);
 		}
@@ -1072,6 +1072,7 @@
 	.getname	   = inet6_getname,
 	.poll		   = dccp_poll,
 	.ioctl		   = inet6_ioctl,
+	.gettstamp	   = sock_gettstamp,
 	.listen		   = inet_dccp_listen,
 	.shutdown	   = inet_shutdown,
 	.setsockopt	   = sock_common_setsockopt,

diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h
index af259e1..7e4c2a3 100644
--- a/net/dccp/ipv6.h
+++ b/net/dccp/ipv6.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 #ifndef _DCCP_IPV6_H
 #define _DCCP_IPV6_H
 /*
@@ -5,10 +6,6 @@
  *
  *  An implementation of the DCCP protocol
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
- *
- *	This program is free software; you can redistribute it and/or modify it
- *	under the terms of the GNU General Public License version 2 as
- *	published by the Free Software Foundation.
  */
 
 #include <linux/dccp.h>

diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index ba6fc3c..2518752 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  net/dccp/minisocks.c
  *
  *  An implementation of the DCCP protocol
  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/dccp.h>

diff --git a/net/dccp/options.c b/net/dccp/options.c
index 4e40db0..3b42f5c 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  net/dccp/options.c
  *
@@ -5,11 +6,6 @@
  *  Copyright (c) 2005 Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org>
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
  *  Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 #include <linux/dccp.h>
 #include <linux/module.h>

diff --git a/net/dccp/output.c b/net/dccp/output.c
index 91a15b3..6433187 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  net/dccp/output.c
  *
  *  An implementation of the DCCP protocol
  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/dccp.h>

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 43733ac..5bad08d 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  net/dccp/proto.c
  *
  *  An implementation of the DCCP protocol
  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- *
- *	This program is free software; you can redistribute it and/or modify it
- *	under the terms of the GNU General Public License version 2 as
- *	published by the Free Software Foundation.
  */
 
 #include <linux/dccp.h>
@@ -263,7 +260,6 @@
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct inet_sock *inet = inet_sk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
-	int err = 0;
 	const int old_state = sk->sk_state;
 
 	if (old_state != DCCP_CLOSED)
@@ -307,7 +303,7 @@
 	WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
 
 	sk->sk_error_report(sk);
-	return err;
+	return 0;
 }
 
 EXPORT_SYMBOL_GPL(dccp_disconnect);
@@ -948,6 +944,7 @@
 	if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
 		goto out;
 
+	sk->sk_max_ack_backlog = backlog;
 	/* Really, if the socket is already in listen state
 	 * we can only allow the backlog to be adjusted.
 	 */
@@ -960,7 +957,6 @@
 		if (err)
 			goto out;
 	}
-	sk->sk_max_ack_backlog = backlog;
 	err = 0;
 
 out:
@@ -1131,6 +1127,7 @@
 static int __init dccp_init(void)
 {
 	unsigned long goal;
+	unsigned long nr_pages = totalram_pages();
 	int ehash_order, bhash_order, i;
 	int rc;
 
@@ -1139,8 +1136,11 @@
 	rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
 	if (rc)
 		goto out_fail;
-	rc = -ENOBUFS;
 	inet_hashinfo_init(&dccp_hashinfo);
+	rc = inet_hashinfo2_init_mod(&dccp_hashinfo);
+	if (rc)
+		goto out_fail;
+	rc = -ENOBUFS;
 	dccp_hashinfo.bind_bucket_cachep =
 		kmem_cache_create("dccp_bind_bucket",
 				  sizeof(struct inet_bind_bucket), 0,
@@ -1154,10 +1154,10 @@
 	 *
 	 * The methodology is similar to that of the buffer cache.
 	 */
-	if (totalram_pages >= (128 * 1024))
-		goal = totalram_pages >> (21 - PAGE_SHIFT);
+	if (nr_pages >= (128 * 1024))
+		goal = nr_pages >> (21 - PAGE_SHIFT);
 	else
-		goal = totalram_pages >> (23 - PAGE_SHIFT);
+		goal = nr_pages >> (23 - PAGE_SHIFT);
 
 	if (thash_entries)
 		goal = (thash_entries *

diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c
index 63c30bf..db2448c 100644
--- a/net/dccp/qpolicy.c
+++ b/net/dccp/qpolicy.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  net/dccp/qpolicy.c
  *
  *  Policy-based packet dequeueing interface for DCCP.
  *
  *  Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License v2
- *  as published by the Free Software Foundation.
  */
 #include "dccp.h"
 

diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index 53731e4..ee8d4f5 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  net/dccp/sysctl.c
  *
  *  An implementation of the DCCP protocol
  *  Arnaldo Carvalho de Melo <acme@mandriva.com>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License v2
- *	as published by the Free Software Foundation.
  */
 
 #include <linux/mm.h>
@@ -19,9 +16,7 @@
 #endif
 
 /* Boundary values */
-static int		zero     = 0,
-			one      = 1,
-			u8_max   = 0xFF;
+static int		u8_max   = 0xFF;
 static unsigned long	seqw_min = DCCPF_SEQ_WMIN,
 			seqw_max = 0xFFFFFFFF;		/* maximum on 32 bit */
 
@@ -41,7 +36,7 @@
 		.maxlen		= sizeof(sysctl_dccp_rx_ccid),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 		.extra2		= &u8_max,		/* RFC 4340, 10. */
 	},
 	{
@@ -50,7 +45,7 @@
 		.maxlen		= sizeof(sysctl_dccp_tx_ccid),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 		.extra2		= &u8_max,		/* RFC 4340, 10. */
 	},
 	{
@@ -59,7 +54,7 @@
 		.maxlen		= sizeof(sysctl_dccp_request_retries),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
+		.extra1		= SYSCTL_ONE,
 		.extra2		= &u8_max,
 	},
 	{
@@ -68,7 +63,7 @@
 		.maxlen		= sizeof(sysctl_dccp_retries1),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 		.extra2		= &u8_max,
 	},
 	{
@@ -77,7 +72,7 @@
 		.maxlen		= sizeof(sysctl_dccp_retries2),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 		.extra2		= &u8_max,
 	},
 	{
@@ -86,7 +81,7 @@
 		.maxlen		= sizeof(sysctl_dccp_tx_qlen),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 	},
 	{
 		.procname	= "sync_ratelimit",

diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 1501a20..c0b3672 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  net/dccp/timer.c
  *
  *  An implementation of the DCCP protocol
  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/dccp.h>
@@ -199,7 +195,7 @@
 	icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
 
 	if (inet_csk_ack_scheduled(sk)) {
-		if (!icsk->icsk_ack.pingpong) {
+		if (!inet_csk_in_pingpong_mode(sk)) {
 			/* Delayed ACK missed: inflate ATO. */
 			icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1,
 						 icsk->icsk_rto);
@@ -207,7 +203,7 @@
 			/* Delayed ACK missed: leave pingpong mode and
 			 * deflate ATO.
 			 */
-			icsk->icsk_ack.pingpong = 0;
+			inet_csk_exit_pingpong_mode(sk);
 			icsk->icsk_ack.ato = TCP_ATO_MIN;
 		}
 		dccp_send_ack(sk);

diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig
index dcc7495..0935453 100644
--- a/net/decnet/Kconfig
+++ b/net/decnet/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # DECnet configuration
 #

diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 7d6ff98..3349ea8 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 
 /*
  * DECnet       An implementation of the DECnet protocol suite for the LINUX
@@ -46,15 +47,6 @@
 /******************************************************************************
     (c) 1995-1998 E.M. Serrat		emserrat@geocities.com
 
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
 
 HISTORY:
 
@@ -192,7 +184,7 @@
 static unsigned short port_alloc(struct sock *sk)
 {
 	struct dn_scp *scp = DN_SK(sk);
-static unsigned short port = 0x2000;
+	static unsigned short port = 0x2000;
 	unsigned short i_port = port;
 
 	while(check_port(cpu_to_le16(++port)) != 0) {
@@ -444,7 +436,7 @@
 	skb_queue_purge(&scp->other_xmit_queue);
 	skb_queue_purge(&scp->other_receive_queue);
 
-	dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
+	dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1));
 }
 
 static unsigned long dn_memory_pressure;
@@ -1213,7 +1205,7 @@
 	struct dn_scp *scp = DN_SK(sk);
 	__poll_t mask = datagram_poll(file, sock, wait);
 
-	if (!skb_queue_empty(&scp->other_receive_queue))
+	if (!skb_queue_empty_lockless(&scp->other_receive_queue))
 		mask |= EPOLLRDBAND;
 
 	return mask;
@@ -2405,7 +2397,7 @@
 
 	proto_unregister(&dn_proto);
 
-	rcu_barrier_bh(); /* Wait for completion of call_rcu_bh()'s */
+	rcu_barrier(); /* Wait for completion of call_rcu()'s */
 }
 module_exit(decnet_exit);
 #endif

diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index bfd43e8..cca7ae7 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c

@@ -56,7 +56,7 @@
 #include <net/dn_neigh.h>
 #include <net/dn_fib.h>
 
-#define DN_IFREQ_SIZE (sizeof(struct ifreq) - sizeof(struct sockaddr) + sizeof(struct sockaddr_dn))
+#define DN_IFREQ_SIZE (offsetof(struct ifreq, ifr_ifru) + sizeof(struct sockaddr_dn))
 
 static char dn_rt_all_end_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x04,0x00,0x00};
 static char dn_rt_all_rt_mcast[ETH_ALEN]  = {0xAB,0x00,0x00,0x03,0x00,0x00};
@@ -583,8 +583,8 @@
 	if (!net_eq(net, &init_net))
 		goto errout;
 
-	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy,
-			  extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
+				     dn_ifa_policy, extack);
 	if (err < 0)
 		goto errout;
 
@@ -629,8 +629,8 @@
 	if (!net_eq(net, &init_net))
 		return -EINVAL;
 
-	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy,
-			  extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
+				     dn_ifa_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -1363,7 +1363,7 @@
 
 		seq_printf(seq, "%-8s %1s     %04u %04u   %04lu %04lu"
 				"   %04hu    %03d %02x    %-10s %-7s %-7s\n",
-				dev->name ? dev->name : "???",
+				dev->name,
 				dn_type2asc(dn_db->parms.mode),
 				0, 0,
 				dn_db->t3, dn_db->parms.t3,

diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index f78fe58..77fbf8e 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c

@@ -42,7 +42,7 @@
 #include <net/dn_fib.h>
 #include <net/dn_neigh.h>
 #include <net/dn_dev.h>
-#include <net/nexthop.h>
+#include <net/rtnh.h>
 
 #define RT_MIN_TABLE 1
 
@@ -282,7 +282,7 @@
 	    (nhs = dn_fib_count_nhs(attrs[RTA_MULTIPATH])) == 0)
 		goto err_inval;
 
-	fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL);
+	fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL);
 	err = -ENOBUFS;
 	if (fi == NULL)
 		goto failure;
@@ -517,8 +517,8 @@
 	if (!net_eq(net, &init_net))
 		return -EINVAL;
 
-	err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy,
-			  extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*r), attrs, RTA_MAX,
+				     rtm_dn_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -544,8 +544,8 @@
 	if (!net_eq(net, &init_net))
 		return -EINVAL;
 
-	err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy,
-			  extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*r), attrs, RTA_MAX,
+				     rtm_dn_policy, extack);
 	if (err < 0)
 		return err;
 

diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 2fb5e05..e4161e0 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * DECnet       An implementation of the DECnet protocol suite for the LINUX
  *              operating system.  DECnet is implemented using the  BSD Socket
@@ -34,15 +35,6 @@
 /******************************************************************************
     (c) 1995-1998 E.M. Serrat		emserrat@geocities.com
 
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
 *******************************************************************************/
 
 #include <linux/errno.h>

diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index a1779de..00f2ed7 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * DECnet       An implementation of the DECnet protocol suite for the LINUX
  *              operating system.  DECnet is implemented using the  BSD Socket
@@ -26,15 +27,6 @@
 /******************************************************************************
     (c) 1995-1998 E.M. Serrat		emserrat@geocities.com
 
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
 *******************************************************************************/
 
 #include <linux/errno.h>

diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 1c002c0..aea9181 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * DECnet       An implementation of the DECnet protocol suite for the LINUX
  *              operating system.  DECnet is implemented using the  BSD Socket
@@ -44,15 +45,6 @@
 /******************************************************************************
     (c) 1995-1998 E.M. Serrat		emserrat@geocities.com
 
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
 *******************************************************************************/
 
 #include <linux/errno.h>
@@ -1651,8 +1643,8 @@
 	if (!net_eq(net, &init_net))
 		return -EINVAL;
 
-	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_dn_policy,
-			  extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
+				     rtm_dn_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -1866,7 +1858,7 @@
 	dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
 	add_timer(&dn_route_timer);
 
-	goal = totalram_pages >> (26 - PAGE_SHIFT);
+	goal = totalram_pages() >> (26 - PAGE_SHIFT);
 
 	for(order = 0; (1UL << order) < goal; order++)
 		/* NOTHING */;

diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index f0710b5..33fefb0 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c

@@ -348,7 +348,8 @@
 		struct rtnexthop *nhp;
 		struct nlattr *mp_head;
 
-		if (!(mp_head = nla_nest_start(skb, RTA_MULTIPATH)))
+		mp_head = nla_nest_start_noflag(skb, RTA_MULTIPATH);
+		if (!mp_head)
 			goto errout;
 
 		for_nexthops(fi) {

diff --git a/net/decnet/netfilter/Kconfig b/net/decnet/netfilter/Kconfig
index 8d7c109..14ec4ef 100644
--- a/net/decnet/netfilter/Kconfig
+++ b/net/decnet/netfilter/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # DECnet netfilter configuration
 #

diff --git a/net/decnet/netfilter/Makefile b/net/decnet/netfilter/Makefile
index b579e52..429c842 100644
--- a/net/decnet/netfilter/Makefile
+++ b/net/decnet/netfilter/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for DECnet netfilter modules
 #

diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index a4faaca..dc70576 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * DECnet       An implementation of the DECnet protocol suite for the LINUX
  *              operating system.  DECnet is implemented using the  BSD Socket
@@ -6,11 +7,8 @@
  *              DECnet Routing Message Grabulator
  *
  *              (C) 2000 ChyGwyn Limited  -  http://www.chygwyn.com/
- *              This code may be copied under the GPL v.2 or at your option
- *              any later version.
  *
  * Author:      Steven Whitehouse <steve@chygwyn.com>
- *
  */
 #include <linux/module.h>
 #include <linux/skbuff.h>

diff --git a/net/dns_resolver/Kconfig b/net/dns_resolver/Kconfig
index 50d49f7..0a1c223 100644
--- a/net/dns_resolver/Kconfig
+++ b/net/dns_resolver/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Configuration for DNS Resolver
 #

diff --git a/net/dns_resolver/Makefile b/net/dns_resolver/Makefile
index d5c13c2..877532d 100644
--- a/net/dns_resolver/Makefile
+++ b/net/dns_resolver/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Linux DNS Resolver.
 #

diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 7f45348..3e1a906 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c

@@ -29,6 +29,7 @@
 #include <linux/keyctl.h>
 #include <linux/err.h>
 #include <linux/seq_file.h>
+#include <linux/dns_resolver.h>
 #include <keys/dns_resolver-type.h>
 #include <keys/user-type.h>
 #include "internal.h"
@@ -48,27 +49,86 @@
 /*
  * Preparse instantiation data for a dns_resolver key.
  *
- * The data must be a NUL-terminated string, with the NUL char accounted in
- * datalen.
+ * For normal hostname lookups, the data must be a NUL-terminated string, with
+ * the NUL char accounted in datalen.
  *
  * If the data contains a '#' characters, then we take the clause after each
  * one to be an option of the form 'key=value'.  The actual data of interest is
  * the string leading up to the first '#'.  For instance:
  *
  *        "ip1,ip2,...#foo=bar"
+ *
+ * For server list requests, the data must begin with a NUL char and be
+ * followed by a byte indicating the version of the data format.  Version 1
+ * looks something like (note this is packed):
+ *
+ *	u8      Non-string marker (ie. 0)
+ *	u8	Content (DNS_PAYLOAD_IS_*)
+ *	u8	Version (e.g. 1)
+ *	u8	Source of server list
+ *	u8	Lookup status of server list
+ *	u8	Number of servers
+ *	foreach-server {
+ *		__le16	Name length
+ *		__le16	Priority (as per SRV record, low first)
+ *		__le16	Weight (as per SRV record, higher first)
+ *		__le16	Port
+ *		u8	Source of address list
+ *		u8	Lookup status of address list
+ *		u8	Protocol (DNS_SERVER_PROTOCOL_*)
+ *		u8	Number of addresses
+ *		char[]	Name (not NUL-terminated)
+ *		foreach-address {
+ *			u8		Family (DNS_ADDRESS_IS_*)
+ *			union {
+ *				u8[4]	ipv4_addr
+ *				u8[16]	ipv6_addr
+ *			}
+ *		}
+ *	}
+ *
  */
 static int
 dns_resolver_preparse(struct key_preparsed_payload *prep)
 {
+	const struct dns_payload_header *bin;
 	struct user_key_payload *upayload;
 	unsigned long derrno;
 	int ret;
 	int datalen = prep->datalen, result_len = 0;
 	const char *data = prep->data, *end, *opt;
 
+	if (datalen <= 1 || !data)
+		return -EINVAL;
+
+	if (data[0] == 0) {
+		/* It may be a server list. */
+		if (datalen <= sizeof(*bin))
+			return -EINVAL;
+
+		bin = (const struct dns_payload_header *)data;
+		kenter("[%u,%u],%u", bin->content, bin->version, datalen);
+		if (bin->content != DNS_PAYLOAD_IS_SERVER_LIST) {
+			pr_warn_ratelimited(
+				"dns_resolver: Unsupported content type (%u)\n",
+				bin->content);
+			return -EINVAL;
+		}
+
+		if (bin->version != 1) {
+			pr_warn_ratelimited(
+				"dns_resolver: Unsupported server list version (%u)\n",
+				bin->version);
+			return -EINVAL;
+		}
+
+		result_len = datalen;
+		goto store_result;
+	}
+
 	kenter("'%*.*s',%u", datalen, datalen, data, datalen);
 
-	if (datalen <= 1 || !data || data[datalen - 1] != '\0')
+	if (!data || data[datalen - 1] != '\0')
 		return -EINVAL;
 	datalen--;
 
@@ -144,6 +204,7 @@
 		return 0;
 	}
 
+store_result:
 	kdebug("store result");
 	prep->quotalen = result_len;
 
@@ -253,6 +314,7 @@
 
 struct key_type key_type_dns_resolver = {
 	.name		= "dns_resolver",
+	.flags		= KEY_TYPE_NET_DOMAIN,
 	.preparse	= dns_resolver_preparse,
 	.free_preparse	= dns_resolver_free_preparse,
 	.instantiate	= generic_key_instantiate,

diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index 49da670..cab4e0d 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c

@@ -40,6 +40,7 @@
 #include <linux/cred.h>
 #include <linux/dns_resolver.h>
 #include <linux/err.h>
+#include <net/net_namespace.h>
 
 #include <keys/dns_resolver-type.h>
 #include <keys/user-type.h>
@@ -48,12 +49,14 @@
 
 /**
  * dns_query - Query the DNS
+ * @net: The network namespace to operate in.
  * @type: Query type (or NULL for straight host->IP lookup)
  * @name: Name to look up
  * @namelen: Length of name
  * @options: Request options (or NULL if no options)
  * @_result: Where to place the returned data (or NULL)
  * @_expiry: Where to store the result expiry time (or NULL)
+ * @invalidate: Always invalidate the key after use
  *
  * The data will be returned in the pointer at *result, if provided, and the
  * caller is responsible for freeing it.
@@ -68,8 +71,10 @@
  *
  * Returns the size of the result on success, -ve error code otherwise.
  */
-int dns_query(const char *type, const char *name, size_t namelen,
-	      const char *options, char **_result, time64_t *_expiry)
+int dns_query(struct net *net,
+	      const char *type, const char *name, size_t namelen,
+	      const char *options, char **_result, time64_t *_expiry,
+	      bool invalidate)
 {
 	struct key *rkey;
 	struct user_key_payload *upayload;
@@ -94,8 +99,6 @@
 		desclen += typelen + 1;
 	}
 
-	if (!namelen)
-		namelen = strnlen(name, 256);
 	if (namelen < 3 || namelen > 255)
 		return -EINVAL;
 	desclen += namelen + 1;
@@ -122,7 +125,7 @@
 	 * add_key() to preinstall malicious redirections
 	 */
 	saved_cred = override_creds(dns_resolver_cache);
-	rkey = request_key(&key_type_dns_resolver, desc, options);
+	rkey = request_key_net(&key_type_dns_resolver, desc, net, options);
 	revert_creds(saved_cred);
 	kfree(desc);
 	if (IS_ERR(rkey)) {
@@ -148,12 +151,9 @@
 
 	if (_result) {
 		ret = -ENOMEM;
-		*_result = kmalloc(len + 1, GFP_KERNEL);
+		*_result = kmemdup_nul(upayload->data, len, GFP_KERNEL);
 		if (!*_result)
 			goto put;
-
-		memcpy(*_result, upayload->data, len);
-		(*_result)[len] = '\0';
 	}
 
 	if (_expiry)
@@ -162,6 +162,8 @@
 	ret = len;
 put:
 	up_read(&rkey->sem);
+	if (invalidate)
+		key_invalidate(rkey);
 	key_put(rkey);
 out:
 	kleave(" = %d", ret);

diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 4183e4b..29e2bd5 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig

@@ -1,56 +1,110 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config HAVE_NET_DSA
 	def_bool y
 	depends on INET && NETDEVICES && !S390
 
 # Drivers must select NET_DSA and the appropriate tagging format
 
-config NET_DSA
+menuconfig NET_DSA
 	tristate "Distributed Switch Architecture"
-	depends on HAVE_NET_DSA && MAY_USE_DEVLINK
+	depends on HAVE_NET_DSA
 	depends on BRIDGE || BRIDGE=n
 	select NET_SWITCHDEV
 	select PHYLINK
+	select NET_DEVLINK
 	---help---
 	  Say Y if you want to enable support for the hardware switches supported
 	  by the Distributed Switch Architecture.
 
 if NET_DSA
 
-config NET_DSA_LEGACY
-	bool "Support for older platform device and Device Tree registration"
-	default y
-	---help---
-	  Say Y if you want to enable support for the older platform device and
-	  deprecated Device Tree binding registration.
-
-	  This feature is scheduled for removal in 4.17.
-
 # tagging formats
+config NET_DSA_TAG_8021Q
+	tristate "Tag driver for switches using custom 802.1Q VLAN headers"
+	select VLAN_8021Q
+	help
+	  Unlike the other tagging protocols, the 802.1Q config option simply
+	  provides helpers for other tagging implementations that might rely on
+	  VLAN in one way or another. It is not a complete solution.
+
+	  Drivers which use these helpers should select this as dependency.
+
+config NET_DSA_TAG_BRCM_COMMON
+	tristate
+	default n
+
 config NET_DSA_TAG_BRCM
-	bool
+	tristate "Tag driver for Broadcom switches using in-frame headers"
+	select NET_DSA_TAG_BRCM_COMMON
+	help
+	  Say Y if you want to enable support for tagging frames for the
+	  Broadcom switches which place the tag after the MAC source address.
+
 
 config NET_DSA_TAG_BRCM_PREPEND
-	bool
+	tristate "Tag driver for Broadcom switches using prepended headers"
+	select NET_DSA_TAG_BRCM_COMMON
+	help
+	  Say Y if you want to enable support for tagging frames for the
+	  Broadcom switches which places the tag before the Ethernet header
+	  (prepended).
+
+config NET_DSA_TAG_GSWIP
+	tristate "Tag driver for Lantiq / Intel GSWIP switches"
+	help
+	  Say Y or M if you want to enable support for tagging frames for the
+	  Lantiq / Intel GSWIP switches.
 
 config NET_DSA_TAG_DSA
-	bool
+	tristate "Tag driver for Marvell switches using DSA headers"
+	help
+	  Say Y or M if you want to enable support for tagging frames for the
+	  Marvell switches which use DSA headers.
 
 config NET_DSA_TAG_EDSA
-	bool
-
-config NET_DSA_TAG_KSZ
-	bool
-
-config NET_DSA_TAG_LAN9303
-	bool
+	tristate "Tag driver for Marvell switches using EtherType DSA headers"
+	help
+	  Say Y or M if you want to enable support for tagging frames for the
+	  Marvell switches which use EtherType DSA headers.
 
 config NET_DSA_TAG_MTK
-	bool
+	tristate "Tag driver for Mediatek switches"
+	help
+	  Say Y or M if you want to enable support for tagging frames for
+	  Mediatek switches.
 
-config NET_DSA_TAG_TRAILER
-	bool
+config NET_DSA_TAG_KSZ
+	tristate "Tag driver for Microchip 8795/9477/9893 families of switches"
+	help
+	  Say Y if you want to enable support for tagging frames for the
+	  Microchip 8795/9477/9893 families of switches.
 
 config NET_DSA_TAG_QCA
-	bool
+	tristate "Tag driver for Qualcomm Atheros QCA8K switches"
+	help
+	  Say Y or M if you want to enable support for tagging frames for
+	  the Qualcomm Atheros QCA8K switches.
+
+config NET_DSA_TAG_LAN9303
+	tristate "Tag driver for SMSC/Microchip LAN9303 family of switches"
+	help
+	  Say Y or M if you want to enable support for tagging frames for the
+	  SMSC/Microchip LAN9303 family of switches.
+
+config NET_DSA_TAG_SJA1105
+	tristate "Tag driver for NXP SJA1105 switches"
+	select NET_DSA_TAG_8021Q
+	select PACKING
+	help
+	  Say Y or M if you want to enable support for tagging frames with the
+	  NXP SJA1105 switch family. Both the native tagging protocol (which
+	  is only for link-local traffic) as well as non-native tagging (based
+	  on a custom 802.1Q VLAN header) are available.
+
+config NET_DSA_TAG_TRAILER
+	tristate "Tag driver for switches using a trailer tag"
+	help
+	  Say Y or M if you want to enable support for tagging frames at
+	  with a trailed. e.g. Marvell 88E6060.
 
 endif

diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 9e4d353..2c6d286 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile

@@ -2,15 +2,16 @@
 # the core
 obj-$(CONFIG_NET_DSA) += dsa_core.o
 dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o
-dsa_core-$(CONFIG_NET_DSA_LEGACY) += legacy.o
 
 # tagging formats
-dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
-dsa_core-$(CONFIG_NET_DSA_TAG_BRCM_PREPEND) += tag_brcm.o
-dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
-dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
-dsa_core-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o
-dsa_core-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o
-dsa_core-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
-dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o
-dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
+obj-$(CONFIG_NET_DSA_TAG_8021Q) += tag_8021q.o
+obj-$(CONFIG_NET_DSA_TAG_BRCM_COMMON) += tag_brcm.o
+obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
+obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
+obj-$(CONFIG_NET_DSA_TAG_GSWIP) += tag_gswip.o
+obj-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o
+obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o
+obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
+obj-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o
+obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o
+obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o

diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 9f3209f..43120a3 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/dsa/dsa.c - Hardware switch handling
  * Copyright (c) 2008-2009 Marvell Semiconductor
  * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/device.h>
@@ -27,6 +23,9 @@
 
 #include "dsa_priv.h"
 
+static LIST_HEAD(dsa_tag_drivers_list);
+static DEFINE_MUTEX(dsa_tag_drivers_lock);
+
 static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb,
 					    struct net_device *dev)
 {
@@ -35,55 +34,103 @@
 }
 
 static const struct dsa_device_ops none_ops = {
+	.name	= "none",
+	.proto	= DSA_TAG_PROTO_NONE,
 	.xmit	= dsa_slave_notag_xmit,
 	.rcv	= NULL,
 };
 
-const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
-#ifdef CONFIG_NET_DSA_TAG_BRCM
-	[DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops,
-#endif
-#ifdef CONFIG_NET_DSA_TAG_BRCM_PREPEND
-	[DSA_TAG_PROTO_BRCM_PREPEND] = &brcm_prepend_netdev_ops,
-#endif
-#ifdef CONFIG_NET_DSA_TAG_DSA
-	[DSA_TAG_PROTO_DSA] = &dsa_netdev_ops,
-#endif
-#ifdef CONFIG_NET_DSA_TAG_EDSA
-	[DSA_TAG_PROTO_EDSA] = &edsa_netdev_ops,
-#endif
-#ifdef CONFIG_NET_DSA_TAG_KSZ
-	[DSA_TAG_PROTO_KSZ] = &ksz_netdev_ops,
-#endif
-#ifdef CONFIG_NET_DSA_TAG_LAN9303
-	[DSA_TAG_PROTO_LAN9303] = &lan9303_netdev_ops,
-#endif
-#ifdef CONFIG_NET_DSA_TAG_MTK
-	[DSA_TAG_PROTO_MTK] = &mtk_netdev_ops,
-#endif
-#ifdef CONFIG_NET_DSA_TAG_QCA
-	[DSA_TAG_PROTO_QCA] = &qca_netdev_ops,
-#endif
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
-	[DSA_TAG_PROTO_TRAILER] = &trailer_netdev_ops,
-#endif
-	[DSA_TAG_PROTO_NONE] = &none_ops,
+DSA_TAG_DRIVER(none_ops);
+
+static void dsa_tag_driver_register(struct dsa_tag_driver *dsa_tag_driver,
+				    struct module *owner)
+{
+	dsa_tag_driver->owner = owner;
+
+	mutex_lock(&dsa_tag_drivers_lock);
+	list_add_tail(&dsa_tag_driver->list, &dsa_tag_drivers_list);
+	mutex_unlock(&dsa_tag_drivers_lock);
+}
+
+void dsa_tag_drivers_register(struct dsa_tag_driver *dsa_tag_driver_array[],
+			      unsigned int count, struct module *owner)
+{
+	unsigned int i;
+
+	for (i = 0; i < count; i++)
+		dsa_tag_driver_register(dsa_tag_driver_array[i], owner);
+}
+
+static void dsa_tag_driver_unregister(struct dsa_tag_driver *dsa_tag_driver)
+{
+	mutex_lock(&dsa_tag_drivers_lock);
+	list_del(&dsa_tag_driver->list);
+	mutex_unlock(&dsa_tag_drivers_lock);
+}
+EXPORT_SYMBOL_GPL(dsa_tag_drivers_register);
+
+void dsa_tag_drivers_unregister(struct dsa_tag_driver *dsa_tag_driver_array[],
+				unsigned int count)
+{
+	unsigned int i;
+
+	for (i = 0; i < count; i++)
+		dsa_tag_driver_unregister(dsa_tag_driver_array[i]);
+}
+EXPORT_SYMBOL_GPL(dsa_tag_drivers_unregister);
+
+const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops)
+{
+	return ops->name;
 };
 
-const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol)
+const struct dsa_device_ops *dsa_tag_driver_get(int tag_protocol)
 {
+	struct dsa_tag_driver *dsa_tag_driver;
 	const struct dsa_device_ops *ops;
+	char module_name[128];
+	bool found = false;
 
-	if (tag_protocol >= DSA_TAG_LAST)
-		return ERR_PTR(-EINVAL);
-	ops = dsa_device_ops[tag_protocol];
+	snprintf(module_name, 127, "%s%d", DSA_TAG_DRIVER_ALIAS,
+		 tag_protocol);
 
-	if (!ops)
-		return ERR_PTR(-ENOPROTOOPT);
+	request_module(module_name);
+
+	mutex_lock(&dsa_tag_drivers_lock);
+	list_for_each_entry(dsa_tag_driver, &dsa_tag_drivers_list, list) {
+		ops = dsa_tag_driver->ops;
+		if (ops->proto == tag_protocol) {
+			found = true;
+			break;
+		}
+	}
+
+	if (found) {
+		if (!try_module_get(dsa_tag_driver->owner))
+			ops = ERR_PTR(-ENOPROTOOPT);
+	} else {
+		ops = ERR_PTR(-ENOPROTOOPT);
+	}
+
+	mutex_unlock(&dsa_tag_drivers_lock);
 
 	return ops;
 }
 
+void dsa_tag_driver_put(const struct dsa_device_ops *ops)
+{
+	struct dsa_tag_driver *dsa_tag_driver;
+
+	mutex_lock(&dsa_tag_drivers_lock);
+	list_for_each_entry(dsa_tag_driver, &dsa_tag_drivers_list, list) {
+		if (dsa_tag_driver->ops == ops) {
+			module_put(dsa_tag_driver->owner);
+			break;
+		}
+	}
+	mutex_unlock(&dsa_tag_drivers_lock);
+}
+
 static int dev_is_class(struct device *dev, void *class)
 {
 	if (dev->class != NULL && !strcmp(dev->class->name, class))
@@ -293,23 +340,28 @@
 
 	rc = dsa_slave_register_notifier();
 	if (rc)
-		return rc;
-
-	rc = dsa_legacy_register();
-	if (rc)
-		return rc;
+		goto register_notifier_fail;
 
 	dev_add_pack(&dsa_pack_type);
 
+	dsa_tag_driver_register(&DSA_TAG_DRIVER_NAME(none_ops),
+				THIS_MODULE);
+
 	return 0;
+
+register_notifier_fail:
+	destroy_workqueue(dsa_owq);
+
+	return rc;
 }
 module_init(dsa_init_module);
 
 static void __exit dsa_cleanup_module(void)
 {
+	dsa_tag_driver_unregister(&DSA_TAG_DRIVER_NAME(none_ops));
+
 	dsa_slave_unregister_notifier();
 	dev_remove_pack(&dsa_pack_type);
-	dsa_legacy_unregister();
 	destroy_workqueue(dsa_owq);
 }
 module_exit(dsa_cleanup_module);

diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index a191702..716d265 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/dsa/dsa2.c - Hardware switch handling, binding version 2
  * Copyright (c) 2008-2009 Marvell Semiconductor
  * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org>
  * Copyright (c) 2016 Andrew Lunn <andrew@lunn.ch>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/device.h>
@@ -18,6 +14,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/of.h>
 #include <linux/of_net.h>
+#include <net/devlink.h>
 
 #include "dsa_priv.h"
 
@@ -49,7 +46,7 @@
 	dst->index = index;
 
 	INIT_LIST_HEAD(&dst->list);
-	list_add_tail(&dsa_tree_list, &dst->list);
+	list_add_tail(&dst->list, &dsa_tree_list);
 
 	kref_init(&dst->refcount);
 
@@ -258,78 +255,108 @@
 static int dsa_port_setup(struct dsa_port *dp)
 {
 	struct dsa_switch *ds = dp->ds;
+	struct dsa_switch_tree *dst = ds->dst;
+	const unsigned char *id = (const unsigned char *)&dst->index;
+	const unsigned char len = sizeof(dst->index);
+	struct devlink_port *dlp = &dp->devlink_port;
+	bool dsa_port_link_registered = false;
+	bool devlink_port_registered = false;
+	struct devlink *dl = ds->devlink;
+	bool dsa_port_enabled = false;
 	int err = 0;
 
-	memset(&dp->devlink_port, 0, sizeof(dp->devlink_port));
-
-	if (dp->type != DSA_PORT_TYPE_UNUSED)
-		err = devlink_port_register(ds->devlink, &dp->devlink_port,
-					    dp->index);
-	if (err)
-		return err;
-
 	switch (dp->type) {
 	case DSA_PORT_TYPE_UNUSED:
+		dsa_port_disable(dp);
 		break;
 	case DSA_PORT_TYPE_CPU:
-		/* dp->index is used now as port_number. However
-		 * CPU ports should have separate numbering
-		 * independent from front panel port numbers.
-		 */
-		devlink_port_attrs_set(&dp->devlink_port,
-				       DEVLINK_PORT_FLAVOUR_CPU,
-				       dp->index, false, 0);
+		memset(dlp, 0, sizeof(*dlp));
+		devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_CPU,
+				       dp->index, false, 0, id, len);
+		err = devlink_port_register(dl, dlp, dp->index);
+		if (err)
+			break;
+		devlink_port_registered = true;
+
 		err = dsa_port_link_register_of(dp);
-		if (err) {
-			dev_err(ds->dev, "failed to setup link for port %d.%d\n",
-				ds->index, dp->index);
-			return err;
-		}
+		if (err)
+			break;
+		dsa_port_link_registered = true;
+
+		err = dsa_port_enable(dp, NULL);
+		if (err)
+			break;
+		dsa_port_enabled = true;
+
 		break;
 	case DSA_PORT_TYPE_DSA:
-		/* dp->index is used now as port_number. However
-		 * DSA ports should have separate numbering
-		 * independent from front panel port numbers.
-		 */
-		devlink_port_attrs_set(&dp->devlink_port,
-				       DEVLINK_PORT_FLAVOUR_DSA,
-				       dp->index, false, 0);
+		memset(dlp, 0, sizeof(*dlp));
+		devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_DSA,
+				       dp->index, false, 0, id, len);
+		err = devlink_port_register(dl, dlp, dp->index);
+		if (err)
+			break;
+		devlink_port_registered = true;
+
 		err = dsa_port_link_register_of(dp);
-		if (err) {
-			dev_err(ds->dev, "failed to setup link for port %d.%d\n",
-				ds->index, dp->index);
-			return err;
-		}
+		if (err)
+			break;
+		dsa_port_link_registered = true;
+
+		err = dsa_port_enable(dp, NULL);
+		if (err)
+			break;
+		dsa_port_enabled = true;
+
 		break;
 	case DSA_PORT_TYPE_USER:
-		devlink_port_attrs_set(&dp->devlink_port,
-				       DEVLINK_PORT_FLAVOUR_PHYSICAL,
-				       dp->index, false, 0);
+		memset(dlp, 0, sizeof(*dlp));
+		devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_PHYSICAL,
+				       dp->index, false, 0, id, len);
+		err = devlink_port_register(dl, dlp, dp->index);
+		if (err)
+			break;
+		devlink_port_registered = true;
+
+		dp->mac = of_get_mac_address(dp->dn);
 		err = dsa_slave_create(dp);
 		if (err)
-			dev_err(ds->dev, "failed to create slave for port %d.%d\n",
-				ds->index, dp->index);
-		else
-			devlink_port_type_eth_set(&dp->devlink_port, dp->slave);
+			break;
+
+		devlink_port_type_eth_set(dlp, dp->slave);
 		break;
 	}
 
-	return 0;
+	if (err && dsa_port_enabled)
+		dsa_port_disable(dp);
+	if (err && dsa_port_link_registered)
+		dsa_port_link_unregister_of(dp);
+	if (err && devlink_port_registered)
+		devlink_port_unregister(dlp);
+
+	return err;
 }
 
 static void dsa_port_teardown(struct dsa_port *dp)
 {
-	if (dp->type != DSA_PORT_TYPE_UNUSED)
-		devlink_port_unregister(&dp->devlink_port);
+	struct devlink_port *dlp = &dp->devlink_port;
 
 	switch (dp->type) {
 	case DSA_PORT_TYPE_UNUSED:
 		break;
 	case DSA_PORT_TYPE_CPU:
+		dsa_port_disable(dp);
+		dsa_tag_driver_put(dp->tag_ops);
+		devlink_port_unregister(dlp);
+		dsa_port_link_unregister_of(dp);
+		break;
 	case DSA_PORT_TYPE_DSA:
+		dsa_port_disable(dp);
+		devlink_port_unregister(dlp);
 		dsa_port_link_unregister_of(dp);
 		break;
 	case DSA_PORT_TYPE_USER:
+		devlink_port_unregister(dlp);
 		if (dp->slave) {
 			dsa_slave_destroy(dp->slave);
 			dp->slave = NULL;
@@ -340,7 +367,7 @@
 
 static int dsa_switch_setup(struct dsa_switch *ds)
 {
-	int err;
+	int err = 0;
 
 	/* Initialize ds->phys_mii_mask before registering the slave MDIO bus
 	 * driver and before ops->setup() has run, since the switch drivers and
@@ -358,29 +385,41 @@
 
 	err = devlink_register(ds->devlink, ds->dev);
 	if (err)
-		return err;
-
-	err = ds->ops->setup(ds);
-	if (err < 0)
-		return err;
+		goto free_devlink;
 
 	err = dsa_switch_register_notifier(ds);
 	if (err)
-		return err;
+		goto unregister_devlink;
+
+	err = ds->ops->setup(ds);
+	if (err < 0)
+		goto unregister_notifier;
 
 	if (!ds->slave_mii_bus && ds->ops->phy_read) {
 		ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
-		if (!ds->slave_mii_bus)
-			return -ENOMEM;
+		if (!ds->slave_mii_bus) {
+			err = -ENOMEM;
+			goto unregister_notifier;
+		}
 
 		dsa_slave_mii_bus_init(ds);
 
 		err = mdiobus_register(ds->slave_mii_bus);
 		if (err < 0)
-			return err;
+			goto unregister_notifier;
 	}
 
 	return 0;
+
+unregister_notifier:
+	dsa_switch_unregister_notifier(ds);
+unregister_devlink:
+	devlink_unregister(ds->devlink);
+free_devlink:
+	devlink_free(ds->devlink);
+	ds->devlink = NULL;
+
+	return err;
 }
 
 static void dsa_switch_teardown(struct dsa_switch *ds)
@@ -390,6 +429,9 @@
 
 	dsa_switch_unregister_notifier(ds);
 
+	if (ds->ops->teardown)
+		ds->ops->teardown(ds);
+
 	if (ds->devlink) {
 		devlink_unregister(ds->devlink);
 		devlink_free(ds->devlink);
@@ -402,8 +444,8 @@
 {
 	struct dsa_switch *ds;
 	struct dsa_port *dp;
-	int device, port;
-	int err;
+	int device, port, i;
+	int err = 0;
 
 	for (device = 0; device < DSA_MAX_SWITCHES; device++) {
 		ds = dst->ds[device];
@@ -412,18 +454,41 @@
 
 		err = dsa_switch_setup(ds);
 		if (err)
-			return err;
+			goto switch_teardown;
 
 		for (port = 0; port < ds->num_ports; port++) {
 			dp = &ds->ports[port];
 
 			err = dsa_port_setup(dp);
 			if (err)
-				return err;
+				goto ports_teardown;
 		}
 	}
 
 	return 0;
+
+ports_teardown:
+	for (i = 0; i < port; i++)
+		dsa_port_teardown(&ds->ports[i]);
+
+	dsa_switch_teardown(ds);
+
+switch_teardown:
+	for (i = 0; i < device; i++) {
+		ds = dst->ds[i];
+		if (!ds)
+			continue;
+
+		for (port = 0; port < ds->num_ports; port++) {
+			dp = &ds->ports[port];
+
+			dsa_port_teardown(dp);
+		}
+
+		dsa_switch_teardown(ds);
+	}
+
+	return err;
 }
 
 static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst)
@@ -485,17 +550,24 @@
 
 	err = dsa_tree_setup_switches(dst);
 	if (err)
-		return err;
+		goto teardown_default_cpu;
 
 	err = dsa_tree_setup_master(dst);
 	if (err)
-		return err;
+		goto teardown_switches;
 
 	dst->setup = true;
 
 	pr_info("DSA: tree %d setup\n", dst->index);
 
 	return 0;
+
+teardown_switches:
+	dsa_tree_teardown_switches(dst);
+teardown_default_cpu:
+	dsa_tree_teardown_default_cpu(dst);
+
+	return err;
 }
 
 static void dsa_tree_teardown(struct dsa_switch_tree *dst)
@@ -536,8 +608,10 @@
 	dst->ds[index] = ds;
 
 	err = dsa_tree_setup(dst);
-	if (err)
-		dsa_tree_remove_switch(dst, index);
+	if (err) {
+		dst->ds[index] = NULL;
+		dsa_tree_put(dst);
+	}
 
 	return err;
 }
@@ -568,13 +642,16 @@
 	enum dsa_tag_protocol tag_protocol;
 
 	tag_protocol = ds->ops->get_tag_protocol(ds, dp->index);
-	tag_ops = dsa_resolve_tag_protocol(tag_protocol);
+	tag_ops = dsa_tag_driver_get(tag_protocol);
 	if (IS_ERR(tag_ops)) {
+		if (PTR_ERR(tag_ops) == -ENOPROTOOPT)
+			return -EPROBE_DEFER;
 		dev_warn(ds->dev, "No tagger for this switch\n");
 		return PTR_ERR(tag_ops);
 	}
 
 	dp->type = DSA_PORT_TYPE_CPU;
+	dp->filter = tag_ops->filter;
 	dp->rcv = tag_ops->rcv;
 	dp->tag_ops = tag_ops;
 	dp->master = master;
@@ -612,8 +689,8 @@
 {
 	struct device_node *ports, *port;
 	struct dsa_port *dp;
+	int err = 0;
 	u32 reg;
-	int err;
 
 	ports = of_get_child_by_name(dn, "ports");
 	if (!ports) {
@@ -624,19 +701,23 @@
 	for_each_available_child_of_node(ports, port) {
 		err = of_property_read_u32(port, "reg", &reg);
 		if (err)
-			return err;
+			goto out_put_node;
 
-		if (reg >= ds->num_ports)
-			return -EINVAL;
+		if (reg >= ds->num_ports) {
+			err = -EINVAL;
+			goto out_put_node;
+		}
 
 		dp = &ds->ports[reg];
 
 		err = dsa_port_parse_of(dp, port);
 		if (err)
-			return err;
+			goto out_put_node;
 	}
 
-	return 0;
+out_put_node:
+	of_node_put(ports);
+	return err;
 }
 
 static int dsa_switch_parse_member_of(struct dsa_switch *ds,
@@ -767,28 +848,13 @@
 
 struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
 {
-	size_t size = sizeof(struct dsa_switch) + n * sizeof(struct dsa_port);
 	struct dsa_switch *ds;
 	int i;
 
-	ds = devm_kzalloc(dev, size, GFP_KERNEL);
+	ds = devm_kzalloc(dev, struct_size(ds, ports, n), GFP_KERNEL);
 	if (!ds)
 		return NULL;
 
-	/* We avoid allocating memory outside dsa_switch
-	 * if it is not needed.
-	 */
-	if (n <= sizeof(ds->_bitmap) * 8) {
-		ds->bitmap = &ds->_bitmap;
-	} else {
-		ds->bitmap = devm_kcalloc(dev,
-					  BITS_TO_LONGS(n),
-					  sizeof(unsigned long),
-					  GFP_KERNEL);
-		if (unlikely(!ds->bitmap))
-			return NULL;
-	}
-
 	ds->dev = dev;
 	ds->num_ports = n;
 

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 3964c6f..12f8c7e 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h

@@ -1,11 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * net/dsa/dsa_priv.h - Hardware switch handling
  * Copyright (c) 2008-2009 Marvell Semiconductor
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __DSA_PRIV_H
@@ -84,25 +80,17 @@
 };
 
 /* dsa.c */
-const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol);
+const struct dsa_device_ops *dsa_tag_driver_get(int tag_protocol);
+void dsa_tag_driver_put(const struct dsa_device_ops *ops);
+
 bool dsa_schedule_work(struct work_struct *work);
+const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops);
 
-/* legacy.c */
-#if IS_ENABLED(CONFIG_NET_DSA_LEGACY)
-int dsa_legacy_register(void);
-void dsa_legacy_unregister(void);
-#else
-static inline int dsa_legacy_register(void)
-{
-	return 0;
-}
-
-static inline void dsa_legacy_unregister(void) { }
-#endif
 int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 		       struct net_device *dev,
 		       const unsigned char *addr, u16 vid,
-		       u16 flags);
+		       u16 flags,
+		       struct netlink_ext_ack *extack);
 int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
 		       struct net_device *dev,
 		       const unsigned char *addr, u16 vid);
@@ -141,7 +129,7 @@
 int dsa_port_set_state(struct dsa_port *dp, u8 state,
 		       struct switchdev_trans *trans);
 int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
-void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy);
+void dsa_port_disable(struct dsa_port *dp);
 int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br);
 void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
 int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
@@ -158,13 +146,38 @@
 		     struct switchdev_trans *trans);
 int dsa_port_mdb_del(const struct dsa_port *dp,
 		     const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_pre_bridge_flags(const struct dsa_port *dp, unsigned long flags,
+			      struct switchdev_trans *trans);
+int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags,
+			  struct switchdev_trans *trans);
+int dsa_port_mrouter(struct dsa_port *dp, bool mrouter,
+		     struct switchdev_trans *trans);
 int dsa_port_vlan_add(struct dsa_port *dp,
 		      const struct switchdev_obj_port_vlan *vlan,
 		      struct switchdev_trans *trans);
 int dsa_port_vlan_del(struct dsa_port *dp,
 		      const struct switchdev_obj_port_vlan *vlan);
+int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags);
+int dsa_port_vid_del(struct dsa_port *dp, u16 vid);
 int dsa_port_link_register_of(struct dsa_port *dp);
 void dsa_port_link_unregister_of(struct dsa_port *dp);
+void dsa_port_phylink_validate(struct phylink_config *config,
+			       unsigned long *supported,
+			       struct phylink_link_state *state);
+int dsa_port_phylink_mac_link_state(struct phylink_config *config,
+				    struct phylink_link_state *state);
+void dsa_port_phylink_mac_config(struct phylink_config *config,
+				 unsigned int mode,
+				 const struct phylink_link_state *state);
+void dsa_port_phylink_mac_an_restart(struct phylink_config *config);
+void dsa_port_phylink_mac_link_down(struct phylink_config *config,
+				    unsigned int mode,
+				    phy_interface_t interface);
+void dsa_port_phylink_mac_link_up(struct phylink_config *config,
+				  unsigned int mode,
+				  phy_interface_t interface,
+				  struct phy_device *phydev);
+extern const struct phylink_mac_ops dsa_port_phylink_mac_ops;
 
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
@@ -176,6 +189,8 @@
 int dsa_slave_register_notifier(void);
 void dsa_slave_unregister_notifier(void);
 
+void *dsa_defer_xmit(struct sk_buff *skb, struct net_device *dev);
+
 static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
@@ -194,30 +209,4 @@
 /* switch.c */
 int dsa_switch_register_notifier(struct dsa_switch *ds);
 void dsa_switch_unregister_notifier(struct dsa_switch *ds);
-
-/* tag_brcm.c */
-extern const struct dsa_device_ops brcm_netdev_ops;
-extern const struct dsa_device_ops brcm_prepend_netdev_ops;
-
-/* tag_dsa.c */
-extern const struct dsa_device_ops dsa_netdev_ops;
-
-/* tag_edsa.c */
-extern const struct dsa_device_ops edsa_netdev_ops;
-
-/* tag_ksz.c */
-extern const struct dsa_device_ops ksz_netdev_ops;
-
-/* tag_lan9303.c */
-extern const struct dsa_device_ops lan9303_netdev_ops;
-
-/* tag_mtk.c */
-extern const struct dsa_device_ops mtk_netdev_ops;
-
-/* tag_qca.c */
-extern const struct dsa_device_ops qca_netdev_ops;
-
-/* tag_trailer.c */
-extern const struct dsa_device_ops trailer_netdev_ops;
-
 #endif

diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
deleted file mode 100644
index 42a7b85..0000000
--- a/net/dsa/legacy.c
+++ /dev/null

@@ -1,748 +0,0 @@
-/*
- * net/dsa/legacy.c - Hardware switch handling
- * Copyright (c) 2008-2009 Marvell Semiconductor
- * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/device.h>
-#include <linux/list.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_mdio.h>
-#include <linux/of_platform.h>
-#include <linux/of_net.h>
-#include <linux/netdevice.h>
-#include <linux/sysfs.h>
-#include <linux/phy_fixed.h>
-#include <linux/etherdevice.h>
-
-#include "dsa_priv.h"
-
-/* switch driver registration ***********************************************/
-static DEFINE_MUTEX(dsa_switch_drivers_mutex);
-static LIST_HEAD(dsa_switch_drivers);
-
-void register_switch_driver(struct dsa_switch_driver *drv)
-{
-	mutex_lock(&dsa_switch_drivers_mutex);
-	list_add_tail(&drv->list, &dsa_switch_drivers);
-	mutex_unlock(&dsa_switch_drivers_mutex);
-}
-EXPORT_SYMBOL_GPL(register_switch_driver);
-
-void unregister_switch_driver(struct dsa_switch_driver *drv)
-{
-	mutex_lock(&dsa_switch_drivers_mutex);
-	list_del_init(&drv->list);
-	mutex_unlock(&dsa_switch_drivers_mutex);
-}
-EXPORT_SYMBOL_GPL(unregister_switch_driver);
-
-static const struct dsa_switch_ops *
-dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr,
-		 const char **_name, void **priv)
-{
-	const struct dsa_switch_ops *ret;
-	struct list_head *list;
-	const char *name;
-
-	ret = NULL;
-	name = NULL;
-
-	mutex_lock(&dsa_switch_drivers_mutex);
-	list_for_each(list, &dsa_switch_drivers) {
-		const struct dsa_switch_ops *ops;
-		struct dsa_switch_driver *drv;
-
-		drv = list_entry(list, struct dsa_switch_driver, list);
-		ops = drv->ops;
-
-		name = ops->probe(parent, host_dev, sw_addr, priv);
-		if (name != NULL) {
-			ret = ops;
-			break;
-		}
-	}
-	mutex_unlock(&dsa_switch_drivers_mutex);
-
-	*_name = name;
-
-	return ret;
-}
-
-/* basic switch operations **************************************************/
-static int dsa_cpu_dsa_setups(struct dsa_switch *ds)
-{
-	int ret, port;
-
-	for (port = 0; port < ds->num_ports; port++) {
-		if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
-			continue;
-
-		ret = dsa_port_link_register_of(&ds->ports[port]);
-		if (ret)
-			return ret;
-	}
-	return 0;
-}
-
-static int dsa_switch_setup_one(struct dsa_switch *ds,
-				struct net_device *master)
-{
-	const struct dsa_switch_ops *ops = ds->ops;
-	struct dsa_switch_tree *dst = ds->dst;
-	struct dsa_chip_data *cd = ds->cd;
-	bool valid_name_found = false;
-	int index = ds->index;
-	struct dsa_port *dp;
-	int i, ret;
-
-	/*
-	 * Validate supplied switch configuration.
-	 */
-	for (i = 0; i < ds->num_ports; i++) {
-		char *name;
-
-		dp = &ds->ports[i];
-
-		name = cd->port_names[i];
-		if (name == NULL)
-			continue;
-		dp->name = name;
-
-		if (!strcmp(name, "cpu")) {
-			if (dst->cpu_dp) {
-				netdev_err(master,
-					   "multiple cpu ports?!\n");
-				return -EINVAL;
-			}
-			dst->cpu_dp = &ds->ports[i];
-			dst->cpu_dp->master = master;
-			dp->type = DSA_PORT_TYPE_CPU;
-		} else if (!strcmp(name, "dsa")) {
-			dp->type = DSA_PORT_TYPE_DSA;
-		} else {
-			dp->type = DSA_PORT_TYPE_USER;
-		}
-		valid_name_found = true;
-	}
-
-	if (!valid_name_found && i == ds->num_ports)
-		return -EINVAL;
-
-	/* Make the built-in MII bus mask match the number of ports,
-	 * switch drivers can override this later
-	 */
-	ds->phys_mii_mask |= dsa_user_ports(ds);
-
-	/*
-	 * If the CPU connects to this switch, set the switch tree
-	 * tagging protocol to the preferred tagging format of this
-	 * switch.
-	 */
-	if (dst->cpu_dp->ds == ds) {
-		const struct dsa_device_ops *tag_ops;
-		enum dsa_tag_protocol tag_protocol;
-
-		tag_protocol = ops->get_tag_protocol(ds, dst->cpu_dp->index);
-		tag_ops = dsa_resolve_tag_protocol(tag_protocol);
-		if (IS_ERR(tag_ops))
-			return PTR_ERR(tag_ops);
-
-		dst->cpu_dp->tag_ops = tag_ops;
-
-		/* Few copies for faster access in master receive hot path */
-		dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv;
-		dst->cpu_dp->dst = dst;
-	}
-
-	memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable));
-
-	/*
-	 * Do basic register setup.
-	 */
-	ret = ops->setup(ds);
-	if (ret < 0)
-		return ret;
-
-	ret = dsa_switch_register_notifier(ds);
-	if (ret)
-		return ret;
-
-	if (!ds->slave_mii_bus && ops->phy_read) {
-		ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
-		if (!ds->slave_mii_bus)
-			return -ENOMEM;
-		dsa_slave_mii_bus_init(ds);
-
-		ret = mdiobus_register(ds->slave_mii_bus);
-		if (ret < 0)
-			return ret;
-	}
-
-	/*
-	 * Create network devices for physical switch ports.
-	 */
-	for (i = 0; i < ds->num_ports; i++) {
-		ds->ports[i].dn = cd->port_dn[i];
-		ds->ports[i].cpu_dp = dst->cpu_dp;
-
-		if (!dsa_is_user_port(ds, i))
-			continue;
-
-		ret = dsa_slave_create(&ds->ports[i]);
-		if (ret < 0)
-			netdev_err(master, "[%d]: can't create dsa slave device for port %d(%s): %d\n",
-				   index, i, cd->port_names[i], ret);
-	}
-
-	/* Perform configuration of the CPU and DSA ports */
-	ret = dsa_cpu_dsa_setups(ds);
-	if (ret < 0)
-		netdev_err(master, "[%d] : can't configure CPU and DSA ports\n",
-			   index);
-
-	return 0;
-}
-
-static struct dsa_switch *
-dsa_switch_setup(struct dsa_switch_tree *dst, struct net_device *master,
-		 int index, struct device *parent, struct device *host_dev)
-{
-	struct dsa_chip_data *cd = dst->pd->chip + index;
-	const struct dsa_switch_ops *ops;
-	struct dsa_switch *ds;
-	int ret;
-	const char *name;
-	void *priv;
-
-	/*
-	 * Probe for switch model.
-	 */
-	ops = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv);
-	if (!ops) {
-		netdev_err(master, "[%d]: could not detect attached switch\n",
-			   index);
-		return ERR_PTR(-EINVAL);
-	}
-	netdev_info(master, "[%d]: detected a %s switch\n",
-		    index, name);
-
-
-	/*
-	 * Allocate and initialise switch state.
-	 */
-	ds = dsa_switch_alloc(parent, DSA_MAX_PORTS);
-	if (!ds)
-		return ERR_PTR(-ENOMEM);
-
-	ds->dst = dst;
-	ds->index = index;
-	ds->cd = cd;
-	ds->ops = ops;
-	ds->priv = priv;
-
-	ret = dsa_switch_setup_one(ds, master);
-	if (ret)
-		return ERR_PTR(ret);
-
-	return ds;
-}
-
-static void dsa_switch_destroy(struct dsa_switch *ds)
-{
-	int port;
-
-	/* Destroy network devices for physical switch ports. */
-	for (port = 0; port < ds->num_ports; port++) {
-		if (!dsa_is_user_port(ds, port))
-			continue;
-
-		if (!ds->ports[port].slave)
-			continue;
-
-		dsa_slave_destroy(ds->ports[port].slave);
-	}
-
-	/* Disable configuration of the CPU and DSA ports */
-	for (port = 0; port < ds->num_ports; port++) {
-		if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
-			continue;
-		dsa_port_link_unregister_of(&ds->ports[port]);
-	}
-
-	if (ds->slave_mii_bus && ds->ops->phy_read)
-		mdiobus_unregister(ds->slave_mii_bus);
-
-	dsa_switch_unregister_notifier(ds);
-}
-
-/* platform driver init and cleanup *****************************************/
-static int dev_is_class(struct device *dev, void *class)
-{
-	if (dev->class != NULL && !strcmp(dev->class->name, class))
-		return 1;
-
-	return 0;
-}
-
-static struct device *dev_find_class(struct device *parent, char *class)
-{
-	if (dev_is_class(parent, class)) {
-		get_device(parent);
-		return parent;
-	}
-
-	return device_find_child(parent, class, dev_is_class);
-}
-
-struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev)
-{
-	struct device *d;
-
-	d = dev_find_class(dev, "mdio_bus");
-	if (d != NULL) {
-		struct mii_bus *bus;
-
-		bus = to_mii_bus(d);
-		put_device(d);
-
-		return bus;
-	}
-
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(dsa_host_dev_to_mii_bus);
-
-#ifdef CONFIG_OF
-static int dsa_of_setup_routing_table(struct dsa_platform_data *pd,
-					struct dsa_chip_data *cd,
-					int chip_index, int port_index,
-					struct device_node *link)
-{
-	const __be32 *reg;
-	int link_sw_addr;
-	struct device_node *parent_sw;
-	int len;
-
-	parent_sw = of_get_parent(link);
-	if (!parent_sw)
-		return -EINVAL;
-
-	reg = of_get_property(parent_sw, "reg", &len);
-	if (!reg || (len != sizeof(*reg) * 2))
-		return -EINVAL;
-
-	/*
-	 * Get the destination switch number from the second field of its 'reg'
-	 * property, i.e. for "reg = <0x19 1>" sw_addr is '1'.
-	 */
-	link_sw_addr = be32_to_cpup(reg + 1);
-
-	if (link_sw_addr >= pd->nr_chips)
-		return -EINVAL;
-
-	cd->rtable[link_sw_addr] = port_index;
-
-	return 0;
-}
-
-static int dsa_of_probe_links(struct dsa_platform_data *pd,
-			      struct dsa_chip_data *cd,
-			      int chip_index, int port_index,
-			      struct device_node *port,
-			      const char *port_name)
-{
-	struct device_node *link;
-	int link_index;
-	int ret;
-
-	for (link_index = 0;; link_index++) {
-		link = of_parse_phandle(port, "link", link_index);
-		if (!link)
-			break;
-
-		if (!strcmp(port_name, "dsa") && pd->nr_chips > 1) {
-			ret = dsa_of_setup_routing_table(pd, cd, chip_index,
-							 port_index, link);
-			if (ret)
-				return ret;
-		}
-	}
-	return 0;
-}
-
-static void dsa_of_free_platform_data(struct dsa_platform_data *pd)
-{
-	int i;
-	int port_index;
-
-	for (i = 0; i < pd->nr_chips; i++) {
-		port_index = 0;
-		while (port_index < DSA_MAX_PORTS) {
-			kfree(pd->chip[i].port_names[port_index]);
-			port_index++;
-		}
-
-		/* Drop our reference to the MDIO bus device */
-		if (pd->chip[i].host_dev)
-			put_device(pd->chip[i].host_dev);
-	}
-	kfree(pd->chip);
-}
-
-static int dsa_of_probe(struct device *dev)
-{
-	struct device_node *np = dev->of_node;
-	struct device_node *child, *mdio, *ethernet, *port;
-	struct mii_bus *mdio_bus, *mdio_bus_switch;
-	struct net_device *ethernet_dev;
-	struct dsa_platform_data *pd;
-	struct dsa_chip_data *cd;
-	const char *port_name;
-	int chip_index, port_index;
-	const unsigned int *sw_addr, *port_reg;
-	u32 eeprom_len;
-	int ret;
-
-	mdio = of_parse_phandle(np, "dsa,mii-bus", 0);
-	if (!mdio)
-		return -EINVAL;
-
-	mdio_bus = of_mdio_find_bus(mdio);
-	if (!mdio_bus)
-		return -EPROBE_DEFER;
-
-	ethernet = of_parse_phandle(np, "dsa,ethernet", 0);
-	if (!ethernet) {
-		ret = -EINVAL;
-		goto out_put_mdio;
-	}
-
-	ethernet_dev = of_find_net_device_by_node(ethernet);
-	if (!ethernet_dev) {
-		ret = -EPROBE_DEFER;
-		goto out_put_mdio;
-	}
-
-	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
-	if (!pd) {
-		ret = -ENOMEM;
-		goto out_put_ethernet;
-	}
-
-	dev->platform_data = pd;
-	pd->of_netdev = ethernet_dev;
-	pd->nr_chips = of_get_available_child_count(np);
-	if (pd->nr_chips > DSA_MAX_SWITCHES)
-		pd->nr_chips = DSA_MAX_SWITCHES;
-
-	pd->chip = kcalloc(pd->nr_chips, sizeof(struct dsa_chip_data),
-			   GFP_KERNEL);
-	if (!pd->chip) {
-		ret = -ENOMEM;
-		goto out_free;
-	}
-
-	chip_index = -1;
-	for_each_available_child_of_node(np, child) {
-		int i;
-
-		chip_index++;
-		cd = &pd->chip[chip_index];
-
-		cd->of_node = child;
-
-		/* Initialize the routing table */
-		for (i = 0; i < DSA_MAX_SWITCHES; ++i)
-			cd->rtable[i] = DSA_RTABLE_NONE;
-
-		/* When assigning the host device, increment its refcount */
-		cd->host_dev = get_device(&mdio_bus->dev);
-
-		sw_addr = of_get_property(child, "reg", NULL);
-		if (!sw_addr)
-			continue;
-
-		cd->sw_addr = be32_to_cpup(sw_addr);
-		if (cd->sw_addr >= PHY_MAX_ADDR)
-			continue;
-
-		if (!of_property_read_u32(child, "eeprom-length", &eeprom_len))
-			cd->eeprom_len = eeprom_len;
-
-		mdio = of_parse_phandle(child, "mii-bus", 0);
-		if (mdio) {
-			mdio_bus_switch = of_mdio_find_bus(mdio);
-			if (!mdio_bus_switch) {
-				ret = -EPROBE_DEFER;
-				goto out_free_chip;
-			}
-
-			/* Drop the mdio_bus device ref, replacing the host
-			 * device with the mdio_bus_switch device, keeping
-			 * the refcount from of_mdio_find_bus() above.
-			 */
-			put_device(cd->host_dev);
-			cd->host_dev = &mdio_bus_switch->dev;
-		}
-
-		for_each_available_child_of_node(child, port) {
-			port_reg = of_get_property(port, "reg", NULL);
-			if (!port_reg)
-				continue;
-
-			port_index = be32_to_cpup(port_reg);
-			if (port_index >= DSA_MAX_PORTS)
-				break;
-
-			port_name = of_get_property(port, "label", NULL);
-			if (!port_name)
-				continue;
-
-			cd->port_dn[port_index] = port;
-
-			cd->port_names[port_index] = kstrdup(port_name,
-					GFP_KERNEL);
-			if (!cd->port_names[port_index]) {
-				ret = -ENOMEM;
-				goto out_free_chip;
-			}
-
-			ret = dsa_of_probe_links(pd, cd, chip_index,
-						 port_index, port, port_name);
-			if (ret)
-				goto out_free_chip;
-
-		}
-	}
-
-	/* The individual chips hold their own refcount on the mdio bus,
-	 * so drop ours */
-	put_device(&mdio_bus->dev);
-
-	return 0;
-
-out_free_chip:
-	dsa_of_free_platform_data(pd);
-out_free:
-	kfree(pd);
-	dev->platform_data = NULL;
-out_put_ethernet:
-	put_device(&ethernet_dev->dev);
-out_put_mdio:
-	put_device(&mdio_bus->dev);
-	return ret;
-}
-
-static void dsa_of_remove(struct device *dev)
-{
-	struct dsa_platform_data *pd = dev->platform_data;
-
-	if (!dev->of_node)
-		return;
-
-	dsa_of_free_platform_data(pd);
-	put_device(&pd->of_netdev->dev);
-	kfree(pd);
-}
-#else
-static inline int dsa_of_probe(struct device *dev)
-{
-	return 0;
-}
-
-static inline void dsa_of_remove(struct device *dev)
-{
-}
-#endif
-
-static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
-			 struct device *parent, struct dsa_platform_data *pd)
-{
-	int i;
-	unsigned configured = 0;
-
-	dst->pd = pd;
-
-	for (i = 0; i < pd->nr_chips; i++) {
-		struct dsa_switch *ds;
-
-		ds = dsa_switch_setup(dst, dev, i, parent, pd->chip[i].host_dev);
-		if (IS_ERR(ds)) {
-			netdev_err(dev, "[%d]: couldn't create dsa switch instance (error %ld)\n",
-				   i, PTR_ERR(ds));
-			continue;
-		}
-
-		dst->ds[i] = ds;
-
-		++configured;
-	}
-
-	/*
-	 * If no switch was found, exit cleanly
-	 */
-	if (!configured)
-		return -EPROBE_DEFER;
-
-	return dsa_master_setup(dst->cpu_dp->master, dst->cpu_dp);
-}
-
-static int dsa_probe(struct platform_device *pdev)
-{
-	struct dsa_platform_data *pd = pdev->dev.platform_data;
-	struct net_device *dev;
-	struct dsa_switch_tree *dst;
-	int ret;
-
-	if (pdev->dev.of_node) {
-		ret = dsa_of_probe(&pdev->dev);
-		if (ret)
-			return ret;
-
-		pd = pdev->dev.platform_data;
-	}
-
-	if (pd == NULL || (pd->netdev == NULL && pd->of_netdev == NULL))
-		return -EINVAL;
-
-	if (pd->of_netdev) {
-		dev = pd->of_netdev;
-		dev_hold(dev);
-	} else {
-		dev = dsa_dev_to_net_device(pd->netdev);
-	}
-	if (dev == NULL) {
-		ret = -EPROBE_DEFER;
-		goto out;
-	}
-
-	if (dev->dsa_ptr != NULL) {
-		dev_put(dev);
-		ret = -EEXIST;
-		goto out;
-	}
-
-	dst = devm_kzalloc(&pdev->dev, sizeof(*dst), GFP_KERNEL);
-	if (dst == NULL) {
-		dev_put(dev);
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	platform_set_drvdata(pdev, dst);
-
-	ret = dsa_setup_dst(dst, dev, &pdev->dev, pd);
-	if (ret) {
-		dev_put(dev);
-		goto out;
-	}
-
-	return 0;
-
-out:
-	dsa_of_remove(&pdev->dev);
-
-	return ret;
-}
-
-static void dsa_remove_dst(struct dsa_switch_tree *dst)
-{
-	int i;
-
-	dsa_master_teardown(dst->cpu_dp->master);
-
-	for (i = 0; i < dst->pd->nr_chips; i++) {
-		struct dsa_switch *ds = dst->ds[i];
-
-		if (ds)
-			dsa_switch_destroy(ds);
-	}
-
-	dev_put(dst->cpu_dp->master);
-}
-
-static int dsa_remove(struct platform_device *pdev)
-{
-	struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
-
-	dsa_remove_dst(dst);
-	dsa_of_remove(&pdev->dev);
-
-	return 0;
-}
-
-static void dsa_shutdown(struct platform_device *pdev)
-{
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int dsa_suspend(struct device *d)
-{
-	struct platform_device *pdev = to_platform_device(d);
-	struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
-	int i, ret = 0;
-
-	for (i = 0; i < dst->pd->nr_chips; i++) {
-		struct dsa_switch *ds = dst->ds[i];
-
-		if (ds != NULL)
-			ret = dsa_switch_suspend(ds);
-	}
-
-	return ret;
-}
-
-static int dsa_resume(struct device *d)
-{
-	struct platform_device *pdev = to_platform_device(d);
-	struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
-	int i, ret = 0;
-
-	for (i = 0; i < dst->pd->nr_chips; i++) {
-		struct dsa_switch *ds = dst->ds[i];
-
-		if (ds != NULL)
-			ret = dsa_switch_resume(ds);
-	}
-
-	return ret;
-}
-#endif
-
-static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume);
-
-static const struct of_device_id dsa_of_match_table[] = {
-	{ .compatible = "marvell,dsa", },
-	{}
-};
-MODULE_DEVICE_TABLE(of, dsa_of_match_table);
-
-static struct platform_driver dsa_driver = {
-	.probe		= dsa_probe,
-	.remove		= dsa_remove,
-	.shutdown	= dsa_shutdown,
-	.driver = {
-		.name	= "dsa",
-		.of_match_table = dsa_of_match_table,
-		.pm	= &dsa_pm_ops,
-	},
-};
-
-int dsa_legacy_register(void)
-{
-	return platform_driver_register(&dsa_driver);
-}
-
-void dsa_legacy_unregister(void)
-{
-	platform_driver_unregister(&dsa_driver);
-}

diff --git a/net/dsa/master.c b/net/dsa/master.c
index c90ee32..3255dfc 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c

@@ -1,17 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Handling of a master device, switching frames via its switch fabric CPU port
  *
  * Copyright (c) 2017 Savoir-faire Linux Inc.
  *	Vivien Didelot <vivien.didelot@savoirfairelinux.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include "dsa_priv.h"
 
+static int dsa_master_get_regs_len(struct net_device *dev)
+{
+	struct dsa_port *cpu_dp = dev->dsa_ptr;
+	const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
+	struct dsa_switch *ds = cpu_dp->ds;
+	int port = cpu_dp->index;
+	int ret = 0;
+	int len;
+
+	if (ops->get_regs_len) {
+		len = ops->get_regs_len(dev);
+		if (len < 0)
+			return len;
+		ret += len;
+	}
+
+	ret += sizeof(struct ethtool_drvinfo);
+	ret += sizeof(struct ethtool_regs);
+
+	if (ds->ops->get_regs_len) {
+		len = ds->ops->get_regs_len(ds, port);
+		if (len < 0)
+			return len;
+		ret += len;
+	}
+
+	return ret;
+}
+
+static void dsa_master_get_regs(struct net_device *dev,
+				struct ethtool_regs *regs, void *data)
+{
+	struct dsa_port *cpu_dp = dev->dsa_ptr;
+	const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
+	struct dsa_switch *ds = cpu_dp->ds;
+	struct ethtool_drvinfo *cpu_info;
+	struct ethtool_regs *cpu_regs;
+	int port = cpu_dp->index;
+	int len;
+
+	if (ops->get_regs_len && ops->get_regs) {
+		len = ops->get_regs_len(dev);
+		if (len < 0)
+			return;
+		regs->len = len;
+		ops->get_regs(dev, regs, data);
+		data += regs->len;
+	}
+
+	cpu_info = (struct ethtool_drvinfo *)data;
+	strlcpy(cpu_info->driver, "dsa", sizeof(cpu_info->driver));
+	data += sizeof(*cpu_info);
+	cpu_regs = (struct ethtool_regs *)data;
+	data += sizeof(*cpu_regs);
+
+	if (ds->ops->get_regs_len && ds->ops->get_regs) {
+		len = ds->ops->get_regs_len(ds, port);
+		if (len < 0)
+			return;
+		cpu_regs->len = len;
+		ds->ops->get_regs(ds, port, cpu_regs, data);
+	}
+}
+
 static void dsa_master_get_ethtool_stats(struct net_device *dev,
 					 struct ethtool_stats *stats,
 					 uint64_t *data)
@@ -126,6 +186,17 @@
 	}
 }
 
+static int dsa_master_get_phys_port_name(struct net_device *dev,
+					 char *name, size_t len)
+{
+	struct dsa_port *cpu_dp = dev->dsa_ptr;
+
+	if (snprintf(name, len, "p%d", cpu_dp->index) >= len)
+		return -EINVAL;
+
+	return 0;
+}
+
 static int dsa_master_ethtool_setup(struct net_device *dev)
 {
 	struct dsa_port *cpu_dp = dev->dsa_ptr;
@@ -140,6 +211,8 @@
 	if (cpu_dp->orig_ethtool_ops)
 		memcpy(ops, cpu_dp->orig_ethtool_ops, sizeof(*ops));
 
+	ops->get_regs_len = dsa_master_get_regs_len;
+	ops->get_regs = dsa_master_get_regs;
 	ops->get_sset_count = dsa_master_get_sset_count;
 	ops->get_ethtool_stats = dsa_master_get_ethtool_stats;
 	ops->get_strings = dsa_master_get_strings;
@@ -158,8 +231,91 @@
 	cpu_dp->orig_ethtool_ops = NULL;
 }
 
+static int dsa_master_ndo_setup(struct net_device *dev)
+{
+	struct dsa_port *cpu_dp = dev->dsa_ptr;
+	struct dsa_switch *ds = cpu_dp->ds;
+	struct net_device_ops *ops;
+
+	if (dev->netdev_ops->ndo_get_phys_port_name)
+		return 0;
+
+	ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL);
+	if (!ops)
+		return -ENOMEM;
+
+	cpu_dp->orig_ndo_ops = dev->netdev_ops;
+	if (cpu_dp->orig_ndo_ops)
+		memcpy(ops, cpu_dp->orig_ndo_ops, sizeof(*ops));
+
+	ops->ndo_get_phys_port_name = dsa_master_get_phys_port_name;
+
+	dev->netdev_ops  = ops;
+
+	return 0;
+}
+
+static void dsa_master_ndo_teardown(struct net_device *dev)
+{
+	struct dsa_port *cpu_dp = dev->dsa_ptr;
+
+	dev->netdev_ops = cpu_dp->orig_ndo_ops;
+	cpu_dp->orig_ndo_ops = NULL;
+}
+
+static ssize_t tagging_show(struct device *d, struct device_attribute *attr,
+			    char *buf)
+{
+	struct net_device *dev = to_net_dev(d);
+	struct dsa_port *cpu_dp = dev->dsa_ptr;
+
+	return sprintf(buf, "%s\n",
+		       dsa_tag_protocol_to_str(cpu_dp->tag_ops));
+}
+static DEVICE_ATTR_RO(tagging);
+
+static struct attribute *dsa_slave_attrs[] = {
+	&dev_attr_tagging.attr,
+	NULL
+};
+
+static const struct attribute_group dsa_group = {
+	.name	= "dsa",
+	.attrs	= dsa_slave_attrs,
+};
+
+static void dsa_master_set_mtu(struct net_device *dev, struct dsa_port *cpu_dp)
+{
+	unsigned int mtu = ETH_DATA_LEN + cpu_dp->tag_ops->overhead;
+	int err;
+
+	rtnl_lock();
+	if (mtu <= dev->max_mtu) {
+		err = dev_set_mtu(dev, mtu);
+		if (err)
+			netdev_dbg(dev, "Unable to set MTU to include for DSA overheads\n");
+	}
+	rtnl_unlock();
+}
+
+static void dsa_master_reset_mtu(struct net_device *dev)
+{
+	int err;
+
+	rtnl_lock();
+	err = dev_set_mtu(dev, ETH_DATA_LEN);
+	if (err)
+		netdev_dbg(dev,
+			   "Unable to reset MTU to exclude DSA overheads\n");
+	rtnl_unlock();
+}
+
 int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
 {
+	int ret;
+
+	dsa_master_set_mtu(dev,  cpu_dp);
+
 	/* If we use a tagging format that doesn't have an ethertype
 	 * field, make sure that all packets from this point on get
 	 * sent to the tag format's receive function.
@@ -167,13 +323,33 @@
 	wmb();
 
 	dev->dsa_ptr = cpu_dp;
+	ret = dsa_master_ethtool_setup(dev);
+	if (ret)
+		return ret;
 
-	return dsa_master_ethtool_setup(dev);
+	ret = dsa_master_ndo_setup(dev);
+	if (ret)
+		goto out_err_ethtool_teardown;
+
+	ret = sysfs_create_group(&dev->dev.kobj, &dsa_group);
+	if (ret)
+		goto out_err_ndo_teardown;
+
+	return ret;
+
+out_err_ndo_teardown:
+	dsa_master_ndo_teardown(dev);
+out_err_ethtool_teardown:
+	dsa_master_ethtool_teardown(dev);
+	return ret;
 }
 
 void dsa_master_teardown(struct net_device *dev)
 {
+	sysfs_remove_group(&dev->dev.kobj, &dsa_group);
+	dsa_master_ndo_teardown(dev);
 	dsa_master_ethtool_teardown(dev);
+	dsa_master_reset_mtu(dev);
 
 	dev->dsa_ptr = NULL;
 

diff --git a/net/dsa/port.c b/net/dsa/port.c
index ed05954..9b54e5a 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Handling of a single switch port
  *
  * Copyright (c) 2017 Savoir-faire Linux Inc.
  *	Vivien Didelot <vivien.didelot@savoirfairelinux.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/if_bridge.h>
@@ -69,7 +65,6 @@
 
 int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
 {
-	u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING;
 	struct dsa_switch *ds = dp->ds;
 	int port = dp->index;
 	int err;
@@ -80,20 +75,22 @@
 			return err;
 	}
 
-	dsa_port_set_state_now(dp, stp_state);
+	if (!dp->bridge_dev)
+		dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
 
 	return 0;
 }
 
-void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy)
+void dsa_port_disable(struct dsa_port *dp)
 {
 	struct dsa_switch *ds = dp->ds;
 	int port = dp->index;
 
-	dsa_port_set_state_now(dp, BR_STATE_DISABLED);
+	if (!dp->bridge_dev)
+		dsa_port_set_state_now(dp, BR_STATE_DISABLED);
 
 	if (ds->ops->port_disable)
-		ds->ops->port_disable(ds, port, phy);
+		ds->ops->port_disable(ds, port);
 }
 
 int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
@@ -105,16 +102,23 @@
 	};
 	int err;
 
-	/* Here the port is already bridged. Reflect the current configuration
-	 * so that drivers can program their chips accordingly.
+	/* Set the flooding mode before joining the port in the switch */
+	err = dsa_port_bridge_flags(dp, BR_FLOOD | BR_MCAST_FLOOD, NULL);
+	if (err)
+		return err;
+
+	/* Here the interface is already bridged. Reflect the current
+	 * configuration so that drivers can program their chips accordingly.
 	 */
 	dp->bridge_dev = br;
 
 	err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_JOIN, &info);
 
 	/* The bridging is rolled back on error */
-	if (err)
+	if (err) {
+		dsa_port_bridge_flags(dp, 0, NULL);
 		dp->bridge_dev = NULL;
+	}
 
 	return err;
 }
@@ -137,25 +141,76 @@
 	if (err)
 		pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n");
 
+	/* Port is leaving the bridge, disable flooding */
+	dsa_port_bridge_flags(dp, 0, NULL);
+
 	/* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
 	 * so allow it to be in BR_STATE_FORWARDING to be kept functional
 	 */
 	dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
 }
 
+static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp,
+					      bool vlan_filtering)
+{
+	struct dsa_switch *ds = dp->ds;
+	int i;
+
+	if (!ds->vlan_filtering_is_global)
+		return true;
+
+	/* For cases where enabling/disabling VLAN awareness is global to the
+	 * switch, we need to handle the case where multiple bridges span
+	 * different ports of the same switch device and one of them has a
+	 * different setting than what is being requested.
+	 */
+	for (i = 0; i < ds->num_ports; i++) {
+		struct net_device *other_bridge;
+
+		other_bridge = dsa_to_port(ds, i)->bridge_dev;
+		if (!other_bridge)
+			continue;
+		/* If it's the same bridge, it also has same
+		 * vlan_filtering setting => no need to check
+		 */
+		if (other_bridge == dp->bridge_dev)
+			continue;
+		if (br_vlan_enabled(other_bridge) != vlan_filtering) {
+			dev_err(ds->dev, "VLAN filtering is a global setting\n");
+			return false;
+		}
+	}
+	return true;
+}
+
 int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
 			    struct switchdev_trans *trans)
 {
 	struct dsa_switch *ds = dp->ds;
+	int err;
 
 	/* bridge skips -EOPNOTSUPP, so skip the prepare phase */
 	if (switchdev_trans_ph_prepare(trans))
 		return 0;
 
-	if (ds->ops->port_vlan_filtering)
-		return ds->ops->port_vlan_filtering(ds, dp->index,
-						    vlan_filtering);
+	if (!ds->ops->port_vlan_filtering)
+		return 0;
 
+	if (!dsa_port_can_apply_vlan_filtering(dp, vlan_filtering))
+		return -EINVAL;
+
+	if (dsa_port_is_vlan_filtering(dp) == vlan_filtering)
+		return 0;
+
+	err = ds->ops->port_vlan_filtering(ds, dp->index,
+					   vlan_filtering);
+	if (err)
+		return err;
+
+	if (ds->vlan_filtering_is_global)
+		ds->vlan_filtering = vlan_filtering;
+	else
+		dp->vlan_filtering = vlan_filtering;
 	return 0;
 }
 
@@ -177,6 +232,47 @@
 	return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info);
 }
 
+int dsa_port_pre_bridge_flags(const struct dsa_port *dp, unsigned long flags,
+			      struct switchdev_trans *trans)
+{
+	struct dsa_switch *ds = dp->ds;
+
+	if (!ds->ops->port_egress_floods ||
+	    (flags & ~(BR_FLOOD | BR_MCAST_FLOOD)))
+		return -EINVAL;
+
+	return 0;
+}
+
+int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags,
+			  struct switchdev_trans *trans)
+{
+	struct dsa_switch *ds = dp->ds;
+	int port = dp->index;
+	int err = 0;
+
+	if (switchdev_trans_ph_prepare(trans))
+		return 0;
+
+	if (ds->ops->port_egress_floods)
+		err = ds->ops->port_egress_floods(ds, port, flags & BR_FLOOD,
+						  flags & BR_MCAST_FLOOD);
+
+	return err;
+}
+
+int dsa_port_mrouter(struct dsa_port *dp, bool mrouter,
+		     struct switchdev_trans *trans)
+{
+	struct dsa_switch *ds = dp->ds;
+	int port = dp->index;
+
+	if (switchdev_trans_ph_prepare(trans))
+		return ds->ops->port_egress_floods ? 0 : -EOPNOTSUPP;
+
+	return ds->ops->port_egress_floods(ds, port, true, mrouter);
+}
+
 int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
 		     u16 vid)
 {
@@ -252,13 +348,7 @@
 		.vlan = vlan,
 	};
 
-	if (netif_is_bridge_master(vlan->obj.orig_dev))
-		return -EOPNOTSUPP;
-
-	if (br_vlan_enabled(dp->bridge_dev))
-		return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
-
-	return 0;
+	return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
 }
 
 int dsa_port_vlan_del(struct dsa_port *dp,
@@ -270,15 +360,42 @@
 		.vlan = vlan,
 	};
 
-	if (netif_is_bridge_master(vlan->obj.orig_dev))
-		return -EOPNOTSUPP;
-
-	if (br_vlan_enabled(dp->bridge_dev))
-		return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
-
-	return 0;
+	return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
 }
 
+int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags)
+{
+	struct switchdev_obj_port_vlan vlan = {
+		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+		.flags = flags,
+		.vid_begin = vid,
+		.vid_end = vid,
+	};
+	struct switchdev_trans trans;
+	int err;
+
+	trans.ph_prepare = true;
+	err = dsa_port_vlan_add(dp, &vlan, &trans);
+	if (err)
+		return err;
+
+	trans.ph_prepare = false;
+	return dsa_port_vlan_add(dp, &vlan, &trans);
+}
+EXPORT_SYMBOL(dsa_port_vid_add);
+
+int dsa_port_vid_del(struct dsa_port *dp, u16 vid)
+{
+	struct switchdev_obj_port_vlan vlan = {
+		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+		.vid_begin = vid,
+		.vid_end = vid,
+	};
+
+	return dsa_port_vlan_del(dp, &vlan);
+}
+EXPORT_SYMBOL(dsa_port_vid_del);
+
 static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp)
 {
 	struct device_node *phy_dn;
@@ -294,9 +411,112 @@
 		return ERR_PTR(-EPROBE_DEFER);
 	}
 
+	of_node_put(phy_dn);
 	return phydev;
 }
 
+void dsa_port_phylink_validate(struct phylink_config *config,
+			       unsigned long *supported,
+			       struct phylink_link_state *state)
+{
+	struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+	struct dsa_switch *ds = dp->ds;
+
+	if (!ds->ops->phylink_validate)
+		return;
+
+	ds->ops->phylink_validate(ds, dp->index, supported, state);
+}
+EXPORT_SYMBOL_GPL(dsa_port_phylink_validate);
+
+int dsa_port_phylink_mac_link_state(struct phylink_config *config,
+				    struct phylink_link_state *state)
+{
+	struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+	struct dsa_switch *ds = dp->ds;
+
+	/* Only called for SGMII and 802.3z */
+	if (!ds->ops->phylink_mac_link_state)
+		return -EOPNOTSUPP;
+
+	return ds->ops->phylink_mac_link_state(ds, dp->index, state);
+}
+EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_link_state);
+
+void dsa_port_phylink_mac_config(struct phylink_config *config,
+				 unsigned int mode,
+				 const struct phylink_link_state *state)
+{
+	struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+	struct dsa_switch *ds = dp->ds;
+
+	if (!ds->ops->phylink_mac_config)
+		return;
+
+	ds->ops->phylink_mac_config(ds, dp->index, mode, state);
+}
+EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_config);
+
+void dsa_port_phylink_mac_an_restart(struct phylink_config *config)
+{
+	struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+	struct dsa_switch *ds = dp->ds;
+
+	if (!ds->ops->phylink_mac_an_restart)
+		return;
+
+	ds->ops->phylink_mac_an_restart(ds, dp->index);
+}
+EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_an_restart);
+
+void dsa_port_phylink_mac_link_down(struct phylink_config *config,
+				    unsigned int mode,
+				    phy_interface_t interface)
+{
+	struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+	struct phy_device *phydev = NULL;
+	struct dsa_switch *ds = dp->ds;
+
+	if (dsa_is_user_port(ds, dp->index))
+		phydev = dp->slave->phydev;
+
+	if (!ds->ops->phylink_mac_link_down) {
+		if (ds->ops->adjust_link && phydev)
+			ds->ops->adjust_link(ds, dp->index, phydev);
+		return;
+	}
+
+	ds->ops->phylink_mac_link_down(ds, dp->index, mode, interface);
+}
+EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_link_down);
+
+void dsa_port_phylink_mac_link_up(struct phylink_config *config,
+				  unsigned int mode,
+				  phy_interface_t interface,
+				  struct phy_device *phydev)
+{
+	struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+	struct dsa_switch *ds = dp->ds;
+
+	if (!ds->ops->phylink_mac_link_up) {
+		if (ds->ops->adjust_link && phydev)
+			ds->ops->adjust_link(ds, dp->index, phydev);
+		return;
+	}
+
+	ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev);
+}
+EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_link_up);
+
+const struct phylink_mac_ops dsa_port_phylink_mac_ops = {
+	.validate = dsa_port_phylink_validate,
+	.mac_link_state = dsa_port_phylink_mac_link_state,
+	.mac_config = dsa_port_phylink_mac_config,
+	.mac_an_restart = dsa_port_phylink_mac_an_restart,
+	.mac_link_down = dsa_port_phylink_mac_link_down,
+	.mac_link_up = dsa_port_phylink_mac_link_up,
+};
+
 static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable)
 {
 	struct dsa_switch *ds = dp->ds;
@@ -312,10 +532,6 @@
 		return PTR_ERR(phydev);
 
 	if (enable) {
-		err = genphy_config_init(phydev);
-		if (err < 0)
-			goto err_put_dev;
-
 		err = genphy_resume(phydev);
 		if (err < 0)
 			goto err_put_dev;
@@ -363,7 +579,6 @@
 		mode = PHY_INTERFACE_MODE_NA;
 	phydev->interface = mode;
 
-	genphy_config_init(phydev);
 	genphy_read_status(phydev);
 
 	if (ds->ops->adjust_link)
@@ -374,8 +589,53 @@
 	return 0;
 }
 
+static int dsa_port_phylink_register(struct dsa_port *dp)
+{
+	struct dsa_switch *ds = dp->ds;
+	struct device_node *port_dn = dp->dn;
+	int mode, err;
+
+	mode = of_get_phy_mode(port_dn);
+	if (mode < 0)
+		mode = PHY_INTERFACE_MODE_NA;
+
+	dp->pl_config.dev = ds->dev;
+	dp->pl_config.type = PHYLINK_DEV;
+
+	dp->pl = phylink_create(&dp->pl_config, of_fwnode_handle(port_dn),
+				mode, &dsa_port_phylink_mac_ops);
+	if (IS_ERR(dp->pl)) {
+		pr_err("error creating PHYLINK: %ld\n", PTR_ERR(dp->pl));
+		return PTR_ERR(dp->pl);
+	}
+
+	err = phylink_of_phy_connect(dp->pl, port_dn, 0);
+	if (err && err != -ENODEV) {
+		pr_err("could not attach to PHY: %d\n", err);
+		goto err_phy_connect;
+	}
+
+	rtnl_lock();
+	phylink_start(dp->pl);
+	rtnl_unlock();
+
+	return 0;
+
+err_phy_connect:
+	phylink_destroy(dp->pl);
+	return err;
+}
+
 int dsa_port_link_register_of(struct dsa_port *dp)
 {
+	struct dsa_switch *ds = dp->ds;
+
+	if (!ds->ops->adjust_link)
+		return dsa_port_phylink_register(dp);
+
+	dev_warn(ds->dev,
+		 "Using legacy PHYLIB callbacks. Please migrate to PHYLINK!\n");
+
 	if (of_phy_is_fixed_link(dp->dn))
 		return dsa_port_fixed_link_register_of(dp);
 	else
@@ -384,6 +644,16 @@
 
 void dsa_port_link_unregister_of(struct dsa_port *dp)
 {
+	struct dsa_switch *ds = dp->ds;
+
+	if (!ds->ops->adjust_link) {
+		rtnl_lock();
+		phylink_disconnect_phy(dp->pl);
+		rtnl_unlock();
+		phylink_destroy(dp->pl);
+		return;
+	}
+
 	if (of_phy_is_fixed_link(dp->dn))
 		of_phy_deregister_fixed_link(dp->dn);
 	else

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 1c45c1d..028e65f 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/dsa/slave.c - Slave device handling
  * Copyright (c) 2008-2009 Marvell Semiconductor
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/list.h>
@@ -26,7 +22,7 @@
 
 #include "dsa_priv.h"
 
-static bool dsa_slave_dev_check(struct net_device *dev);
+static bool dsa_slave_dev_check(const struct net_device *dev);
 
 /* slave mii_bus handling ***************************************************/
 static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg)
@@ -120,9 +116,12 @@
 	struct net_device *master = dsa_slave_to_master(dev);
 	struct dsa_port *dp = dsa_slave_to_port(dev);
 
+	cancel_work_sync(&dp->xmit_work);
+	skb_queue_purge(&dp->xmit_queue);
+
 	phylink_stop(dp->pl);
 
-	dsa_port_disable(dp, dev->phydev);
+	dsa_port_disable(dp);
 
 	dev_mc_unsync(master, dev);
 	dev_uc_unsync(master, dev);
@@ -140,11 +139,14 @@
 static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
 {
 	struct net_device *master = dsa_slave_to_master(dev);
-
-	if (change & IFF_ALLMULTI)
-		dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1);
-	if (change & IFF_PROMISC)
-		dev_set_promiscuity(master, dev->flags & IFF_PROMISC ? 1 : -1);
+	if (dev->flags & IFF_UP) {
+		if (change & IFF_ALLMULTI)
+			dev_set_allmulti(master,
+					 dev->flags & IFF_ALLMULTI ? 1 : -1);
+		if (change & IFF_PROMISC)
+			dev_set_promiscuity(master,
+					    dev->flags & IFF_PROMISC ? 1 : -1);
+	}
 }
 
 static void dsa_slave_set_rx_mode(struct net_device *dev)
@@ -292,6 +294,16 @@
 	case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
 		ret = dsa_port_ageing_time(dp, attr->u.ageing_time, trans);
 		break;
+	case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
+		ret = dsa_port_pre_bridge_flags(dp, attr->u.brport_flags,
+						trans);
+		break;
+	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
+		ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, trans);
+		break;
+	case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER:
+		ret = dsa_port_mrouter(dp->cpu_dp, attr->u.mrouter, trans);
+		break;
 	default:
 		ret = -EOPNOTSUPP;
 		break;
@@ -300,9 +312,43 @@
 	return ret;
 }
 
+static int dsa_slave_vlan_add(struct net_device *dev,
+			      const struct switchdev_obj *obj,
+			      struct switchdev_trans *trans)
+{
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct switchdev_obj_port_vlan vlan;
+	int err;
+
+	if (obj->orig_dev != dev)
+		return -EOPNOTSUPP;
+
+	if (dp->bridge_dev && !br_vlan_enabled(dp->bridge_dev))
+		return 0;
+
+	vlan = *SWITCHDEV_OBJ_PORT_VLAN(obj);
+
+	err = dsa_port_vlan_add(dp, &vlan, trans);
+	if (err)
+		return err;
+
+	/* We need the dedicated CPU port to be a member of the VLAN as well.
+	 * Even though drivers often handle CPU membership in special ways,
+	 * it doesn't make sense to program a PVID, so clear this flag.
+	 */
+	vlan.flags &= ~BRIDGE_VLAN_INFO_PVID;
+
+	err = dsa_port_vlan_add(dp->cpu_dp, &vlan, trans);
+	if (err)
+		return err;
+
+	return 0;
+}
+
 static int dsa_slave_port_obj_add(struct net_device *dev,
 				  const struct switchdev_obj *obj,
-				  struct switchdev_trans *trans)
+				  struct switchdev_trans *trans,
+				  struct netlink_ext_ack *extack)
 {
 	struct dsa_port *dp = dsa_slave_to_port(dev);
 	int err;
@@ -314,6 +360,8 @@
 
 	switch (obj->id) {
 	case SWITCHDEV_OBJ_ID_PORT_MDB:
+		if (obj->orig_dev != dev)
+			return -EOPNOTSUPP;
 		err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans);
 		break;
 	case SWITCHDEV_OBJ_ID_HOST_MDB:
@@ -324,8 +372,7 @@
 				       trans);
 		break;
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
-		err = dsa_port_vlan_add(dp, SWITCHDEV_OBJ_PORT_VLAN(obj),
-					trans);
+		err = dsa_slave_vlan_add(dev, obj, trans);
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -335,6 +382,23 @@
 	return err;
 }
 
+static int dsa_slave_vlan_del(struct net_device *dev,
+			      const struct switchdev_obj *obj)
+{
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+
+	if (obj->orig_dev != dev)
+		return -EOPNOTSUPP;
+
+	if (dp->bridge_dev && !br_vlan_enabled(dp->bridge_dev))
+		return 0;
+
+	/* Do not deprogram the CPU port as it may be shared with other user
+	 * ports which can be members of this VLAN as well.
+	 */
+	return dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj));
+}
+
 static int dsa_slave_port_obj_del(struct net_device *dev,
 				  const struct switchdev_obj *obj)
 {
@@ -343,6 +407,8 @@
 
 	switch (obj->id) {
 	case SWITCHDEV_OBJ_ID_PORT_MDB:
+		if (obj->orig_dev != dev)
+			return -EOPNOTSUPP;
 		err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
 		break;
 	case SWITCHDEV_OBJ_ID_HOST_MDB:
@@ -352,7 +418,7 @@
 		err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj));
 		break;
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
-		err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj));
+		err = dsa_slave_vlan_del(dev, obj);
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -362,24 +428,22 @@
 	return err;
 }
 
-static int dsa_slave_port_attr_get(struct net_device *dev,
-				   struct switchdev_attr *attr)
+static int dsa_slave_get_port_parent_id(struct net_device *dev,
+					struct netdev_phys_item_id *ppid)
 {
 	struct dsa_port *dp = dsa_slave_to_port(dev);
 	struct dsa_switch *ds = dp->ds;
 	struct dsa_switch_tree *dst = ds->dst;
 
-	switch (attr->id) {
-	case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
-		attr->u.ppid.id_len = sizeof(dst->index);
-		memcpy(&attr->u.ppid.id, &dst->index, attr->u.ppid.id_len);
-		break;
-	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
-		attr->u.brport_flags_support = 0;
-		break;
-	default:
+	/* For non-legacy ports, devlink is used and it takes
+	 * care of the name generation. This ndo implementation
+	 * should be removed with legacy support.
+	 */
+	if (dp->ds->devlink)
 		return -EOPNOTSUPP;
-	}
+
+	ppid->id_len = sizeof(dst->index);
+	memcpy(&ppid->id, &dst->index, ppid->id_len);
 
 	return 0;
 }
@@ -416,12 +480,32 @@
 	if (!clone)
 		return;
 
+	DSA_SKB_CB(skb)->clone = clone;
+
 	if (ds->ops->port_txtstamp(ds, p->dp->index, clone, type))
 		return;
 
 	kfree_skb(clone);
 }
 
+netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev)
+{
+	/* SKB for netpoll still need to be mangled with the protocol-specific
+	 * tag to be successfully transmitted
+	 */
+	if (unlikely(netpoll_tx_running(dev)))
+		return dsa_slave_netpoll_send_skb(dev, skb);
+
+	/* Queue the SKB for transmission on the parent interface, but
+	 * do not modify its EtherType
+	 */
+	skb->dev = dsa_slave_to_master(dev);
+	dev_queue_xmit(skb);
+
+	return NETDEV_TX_OK;
+}
+EXPORT_SYMBOL_GPL(dsa_enqueue_skb);
+
 static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
@@ -434,6 +518,9 @@
 	s->tx_bytes += skb->len;
 	u64_stats_update_end(&s->syncp);
 
+	DSA_SKB_CB(skb)->deferred_xmit = false;
+	DSA_SKB_CB(skb)->clone = NULL;
+
 	/* Identify PTP protocol packets, clone them, and pass them to the
 	 * switch driver
 	 */
@@ -444,23 +531,37 @@
 	 */
 	nskb = p->xmit(skb, dev);
 	if (!nskb) {
-		kfree_skb(skb);
+		if (!DSA_SKB_CB(skb)->deferred_xmit)
+			kfree_skb(skb);
 		return NETDEV_TX_OK;
 	}
 
-	/* SKB for netpoll still need to be mangled with the protocol-specific
-	 * tag to be successfully transmitted
-	 */
-	if (unlikely(netpoll_tx_running(dev)))
-		return dsa_slave_netpoll_send_skb(dev, nskb);
+	return dsa_enqueue_skb(nskb, dev);
+}
 
-	/* Queue the SKB for transmission on the parent interface, but
-	 * do not modify its EtherType
-	 */
-	nskb->dev = dsa_slave_to_master(dev);
-	dev_queue_xmit(nskb);
+void *dsa_defer_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct dsa_port *dp = dsa_slave_to_port(dev);
 
-	return NETDEV_TX_OK;
+	DSA_SKB_CB(skb)->deferred_xmit = true;
+
+	skb_queue_tail(&dp->xmit_queue, skb);
+	schedule_work(&dp->xmit_work);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(dsa_defer_xmit);
+
+static void dsa_port_xmit_work(struct work_struct *work)
+{
+	struct dsa_port *dp = container_of(work, struct dsa_port, xmit_work);
+	struct dsa_switch *ds = dp->ds;
+	struct sk_buff *skb;
+
+	if (unlikely(!ds->ops->port_deferred_xmit))
+		return;
+
+	while ((skb = skb_dequeue(&dp->xmit_queue)) != NULL)
+		ds->ops->port_deferred_xmit(ds, dp->index, skb);
 }
 
 /* ethtool operations *******************************************************/
@@ -639,7 +740,7 @@
 	int ret;
 
 	/* Port's PHY and MAC both need to be EEE capable */
-	if (!dev->phydev && !dp->pl)
+	if (!dev->phydev || !dp->pl)
 		return -ENODEV;
 
 	if (!ds->ops->set_mac_eee)
@@ -659,7 +760,7 @@
 	int ret;
 
 	/* Port's PHY and MAC both need to be EEE capable */
-	if (!dev->phydev && !dp->pl)
+	if (!dev->phydev || !dp->pl)
 		return -ENODEV;
 
 	if (!ds->ops->get_mac_eee)
@@ -722,7 +823,7 @@
 
 	p->netpoll = NULL;
 
-	__netpoll_free_async(netpoll);
+	__netpoll_free(netpoll);
 }
 
 static void dsa_slave_poll_controller(struct net_device *dev)
@@ -735,6 +836,13 @@
 {
 	struct dsa_port *dp = dsa_slave_to_port(dev);
 
+	/* For non-legacy ports, devlink is used and it takes
+	 * care of the name generation. This ndo implementation
+	 * should be removed with legacy support.
+	 */
+	if (dp->ds->devlink)
+		return -EOPNOTSUPP;
+
 	if (snprintf(name, len, "p%d", dp->index) >= len)
 		return -EINVAL;
 
@@ -763,27 +871,25 @@
 	struct dsa_mall_tc_entry *mall_tc_entry;
 	__be16 protocol = cls->common.protocol;
 	struct dsa_switch *ds = dp->ds;
-	struct net_device *to_dev;
-	const struct tc_action *a;
+	struct flow_action_entry *act;
 	struct dsa_port *to_dp;
 	int err = -EOPNOTSUPP;
 
 	if (!ds->ops->port_mirror_add)
 		return err;
 
-	if (!tcf_exts_has_one_action(cls->exts))
+	if (!flow_offload_has_one_action(&cls->rule->action))
 		return err;
 
-	a = tcf_exts_first_action(cls->exts);
+	act = &cls->rule->action.entries[0];
 
-	if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) {
+	if (act->id == FLOW_ACTION_MIRRED && protocol == htons(ETH_P_ALL)) {
 		struct dsa_mall_mirror_tc_entry *mirror;
 
-		to_dev = tcf_mirred_dev(a);
-		if (!to_dev)
+		if (!act->dev)
 			return -EINVAL;
 
-		if (!dsa_slave_dev_check(to_dev))
+		if (!dsa_slave_dev_check(act->dev))
 			return -EOPNOTSUPP;
 
 		mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL);
@@ -794,7 +900,7 @@
 		mall_tc_entry->type = DSA_PORT_MALL_MIRROR;
 		mirror = &mall_tc_entry->mirror;
 
-		to_dp = dsa_slave_to_port(to_dev);
+		to_dp = dsa_slave_to_port(act->dev);
 
 		mirror->to_local_port = to_dp->index;
 		mirror->ingress = ingress;
@@ -884,23 +990,42 @@
 	return dsa_slave_setup_tc_block_cb(type, type_data, cb_priv, false);
 }
 
-static int dsa_slave_setup_tc_block(struct net_device *dev,
-				    struct tc_block_offload *f)
-{
-	tc_setup_cb_t *cb;
+static LIST_HEAD(dsa_slave_block_cb_list);
 
-	if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+static int dsa_slave_setup_tc_block(struct net_device *dev,
+				    struct flow_block_offload *f)
+{
+	struct flow_block_cb *block_cb;
+	flow_setup_cb_t *cb;
+
+	if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
 		cb = dsa_slave_setup_tc_block_cb_ig;
-	else if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
+	else if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
 		cb = dsa_slave_setup_tc_block_cb_eg;
 	else
 		return -EOPNOTSUPP;
 
+	f->driver_block_list = &dsa_slave_block_cb_list;
+
 	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block, cb, dev, dev, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block, cb, dev);
+	case FLOW_BLOCK_BIND:
+		if (flow_block_cb_is_busy(cb, dev, &dsa_slave_block_cb_list))
+			return -EBUSY;
+
+		block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
+		if (IS_ERR(block_cb))
+			return PTR_ERR(block_cb);
+
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list, &dsa_slave_block_cb_list);
+		return 0;
+	case FLOW_BLOCK_UNBIND:
+		block_cb = flow_block_cb_lookup(f->block, cb, dev);
+		if (!block_cb)
+			return -ENOENT;
+
+		flow_block_cb_remove(block_cb, f);
+		list_del(&block_cb->driver_list);
 		return 0;
 	default:
 		return -EOPNOTSUPP;
@@ -910,12 +1035,16 @@
 static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			      void *type_data)
 {
-	switch (type) {
-	case TC_SETUP_BLOCK:
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct dsa_switch *ds = dp->ds;
+
+	if (type == TC_SETUP_BLOCK)
 		return dsa_slave_setup_tc_block(dev, type_data);
-	default:
+
+	if (!ds->ops->port_setup_tc)
 		return -EOPNOTSUPP;
-	}
+
+	return ds->ops->port_setup_tc(ds, dp->index, type, type_data);
 }
 
 static void dsa_slave_get_stats64(struct net_device *dev,
@@ -982,6 +1111,69 @@
 	return ds->ops->get_ts_info(ds, p->dp->index, ts);
 }
 
+static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
+				     u16 vid)
+{
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct bridge_vlan_info info;
+	int ret;
+
+	/* Check for a possible bridge VLAN entry now since there is no
+	 * need to emulate the switchdev prepare + commit phase.
+	 */
+	if (dp->bridge_dev) {
+		if (!br_vlan_enabled(dp->bridge_dev))
+			return 0;
+
+		/* br_vlan_get_info() returns -EINVAL or -ENOENT if the
+		 * device, respectively the VID is not found, returning
+		 * 0 means success, which is a failure for us here.
+		 */
+		ret = br_vlan_get_info(dp->bridge_dev, vid, &info);
+		if (ret == 0)
+			return -EBUSY;
+	}
+
+	ret = dsa_port_vid_add(dp, vid, 0);
+	if (ret)
+		return ret;
+
+	ret = dsa_port_vid_add(dp->cpu_dp, vid, 0);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
+				      u16 vid)
+{
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct bridge_vlan_info info;
+	int ret;
+
+	/* Check for a possible bridge VLAN entry now since there is no
+	 * need to emulate the switchdev prepare + commit phase.
+	 */
+	if (dp->bridge_dev) {
+		if (!br_vlan_enabled(dp->bridge_dev))
+			return 0;
+
+		/* br_vlan_get_info() returns -EINVAL or -ENOENT if the
+		 * device, respectively the VID is not found, returning
+		 * 0 means success, which is a failure for us here.
+		 */
+		ret = br_vlan_get_info(dp->bridge_dev, vid, &info);
+		if (ret == 0)
+			return -EBUSY;
+	}
+
+	/* Do not deprogram the CPU port as it may be shared with other user
+	 * ports which can be members of this VLAN as well.
+	 */
+	return dsa_port_vid_del(dp, vid);
+}
+
 static const struct ethtool_ops dsa_slave_ethtool_ops = {
 	.get_drvinfo		= dsa_slave_get_drvinfo,
 	.get_regs_len		= dsa_slave_get_regs_len,
@@ -1009,7 +1201,8 @@
 int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 		       struct net_device *dev,
 		       const unsigned char *addr, u16 vid,
-		       u16 flags)
+		       u16 flags,
+		       struct netlink_ext_ack *extack)
 {
 	struct dsa_port *dp = dsa_slave_to_port(dev);
 
@@ -1025,6 +1218,13 @@
 	return dsa_port_fdb_del(dp, addr, vid);
 }
 
+static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev)
+{
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+
+	return dp->ds->devlink ? &dp->devlink_port : NULL;
+}
+
 static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_open	 	= dsa_slave_open,
 	.ndo_stop		= dsa_slave_close,
@@ -1045,111 +1245,16 @@
 	.ndo_get_phys_port_name	= dsa_slave_get_phys_port_name,
 	.ndo_setup_tc		= dsa_slave_setup_tc,
 	.ndo_get_stats64	= dsa_slave_get_stats64,
-};
-
-static const struct switchdev_ops dsa_slave_switchdev_ops = {
-	.switchdev_port_attr_get	= dsa_slave_port_attr_get,
-	.switchdev_port_attr_set	= dsa_slave_port_attr_set,
-	.switchdev_port_obj_add		= dsa_slave_port_obj_add,
-	.switchdev_port_obj_del		= dsa_slave_port_obj_del,
+	.ndo_get_port_parent_id	= dsa_slave_get_port_parent_id,
+	.ndo_vlan_rx_add_vid	= dsa_slave_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= dsa_slave_vlan_rx_kill_vid,
+	.ndo_get_devlink_port	= dsa_slave_get_devlink_port,
 };
 
 static struct device_type dsa_type = {
 	.name	= "dsa",
 };
 
-static void dsa_slave_phylink_validate(struct net_device *dev,
-				       unsigned long *supported,
-				       struct phylink_link_state *state)
-{
-	struct dsa_port *dp = dsa_slave_to_port(dev);
-	struct dsa_switch *ds = dp->ds;
-
-	if (!ds->ops->phylink_validate)
-		return;
-
-	ds->ops->phylink_validate(ds, dp->index, supported, state);
-}
-
-static int dsa_slave_phylink_mac_link_state(struct net_device *dev,
-					    struct phylink_link_state *state)
-{
-	struct dsa_port *dp = dsa_slave_to_port(dev);
-	struct dsa_switch *ds = dp->ds;
-
-	/* Only called for SGMII and 802.3z */
-	if (!ds->ops->phylink_mac_link_state)
-		return -EOPNOTSUPP;
-
-	return ds->ops->phylink_mac_link_state(ds, dp->index, state);
-}
-
-static void dsa_slave_phylink_mac_config(struct net_device *dev,
-					 unsigned int mode,
-					 const struct phylink_link_state *state)
-{
-	struct dsa_port *dp = dsa_slave_to_port(dev);
-	struct dsa_switch *ds = dp->ds;
-
-	if (!ds->ops->phylink_mac_config)
-		return;
-
-	ds->ops->phylink_mac_config(ds, dp->index, mode, state);
-}
-
-static void dsa_slave_phylink_mac_an_restart(struct net_device *dev)
-{
-	struct dsa_port *dp = dsa_slave_to_port(dev);
-	struct dsa_switch *ds = dp->ds;
-
-	if (!ds->ops->phylink_mac_an_restart)
-		return;
-
-	ds->ops->phylink_mac_an_restart(ds, dp->index);
-}
-
-static void dsa_slave_phylink_mac_link_down(struct net_device *dev,
-					    unsigned int mode,
-					    phy_interface_t interface)
-{
-	struct dsa_port *dp = dsa_slave_to_port(dev);
-	struct dsa_switch *ds = dp->ds;
-
-	if (!ds->ops->phylink_mac_link_down) {
-		if (ds->ops->adjust_link && dev->phydev)
-			ds->ops->adjust_link(ds, dp->index, dev->phydev);
-		return;
-	}
-
-	ds->ops->phylink_mac_link_down(ds, dp->index, mode, interface);
-}
-
-static void dsa_slave_phylink_mac_link_up(struct net_device *dev,
-					  unsigned int mode,
-					  phy_interface_t interface,
-					  struct phy_device *phydev)
-{
-	struct dsa_port *dp = dsa_slave_to_port(dev);
-	struct dsa_switch *ds = dp->ds;
-
-	if (!ds->ops->phylink_mac_link_up) {
-		if (ds->ops->adjust_link && dev->phydev)
-			ds->ops->adjust_link(ds, dp->index, dev->phydev);
-		return;
-	}
-
-	ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev);
-}
-
-static const struct phylink_mac_ops dsa_slave_phylink_mac_ops = {
-	.validate = dsa_slave_phylink_validate,
-	.mac_link_state = dsa_slave_phylink_mac_link_state,
-	.mac_config = dsa_slave_phylink_mac_config,
-	.mac_an_restart = dsa_slave_phylink_mac_an_restart,
-	.mac_link_down = dsa_slave_phylink_mac_link_down,
-	.mac_link_up = dsa_slave_phylink_mac_link_up,
-};
-
 void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up)
 {
 	const struct dsa_port *dp = dsa_to_port(ds, port);
@@ -1197,8 +1302,11 @@
 	if (mode < 0)
 		mode = PHY_INTERFACE_MODE_NA;
 
-	dp->pl = phylink_create(slave_dev, of_fwnode_handle(port_dn), mode,
-				&dsa_slave_phylink_mac_ops);
+	dp->pl_config.dev = &slave_dev->dev;
+	dp->pl_config.type = PHYLINK_NETDEV;
+
+	dp->pl = phylink_create(&dp->pl_config, of_fwnode_handle(port_dn), mode,
+				&dsa_port_phylink_mac_ops);
 	if (IS_ERR(dp->pl)) {
 		netdev_err(slave_dev,
 			   "error creating PHYLINK: %ld\n", PTR_ERR(dp->pl));
@@ -1216,9 +1324,9 @@
 		phy_flags = ds->ops->get_phy_flags(ds, dp->index);
 
 	ret = phylink_of_phy_connect(dp->pl, port_dn, phy_flags);
-	if (ret == -ENODEV) {
-		/* We could not connect to a designated PHY or SFP, so use the
-		 * switch internal MDIO bus instead
+	if (ret == -ENODEV && ds->slave_mii_bus) {
+		/* We could not connect to a designated PHY or SFP, so try to
+		 * use the switch internal MDIO bus instead
 		 */
 		ret = dsa_slave_phy_connect(slave_dev, dp->index);
 		if (ret) {
@@ -1230,16 +1338,7 @@
 		}
 	}
 
-	return 0;
-}
-
-static struct lock_class_key dsa_slave_netdev_xmit_lock_key;
-static void dsa_slave_set_lockdep_class_one(struct net_device *dev,
-					    struct netdev_queue *txq,
-					    void *_unused)
-{
-	lockdep_set_class(&txq->_xmit_lock,
-			  &dsa_slave_netdev_xmit_lock_key);
+	return ret;
 }
 
 int dsa_slave_suspend(struct net_device *slave_dev)
@@ -1249,6 +1348,9 @@
 	if (!netif_running(slave_dev))
 		return 0;
 
+	cancel_work_sync(&dp->xmit_work);
+	skb_queue_purge(&dp->xmit_queue);
+
 	netif_device_detach(slave_dev);
 
 	rtnl_lock();
@@ -1308,19 +1410,20 @@
 		return -ENOMEM;
 
 	slave_dev->features = master->vlan_features | NETIF_F_HW_TC;
+	if (ds->ops->port_vlan_add && ds->ops->port_vlan_del)
+		slave_dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 	slave_dev->hw_features |= NETIF_F_HW_TC;
 	slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
-	eth_hw_addr_inherit(slave_dev, master);
+	if (!IS_ERR_OR_NULL(port->mac))
+		ether_addr_copy(slave_dev->dev_addr, port->mac);
+	else
+		eth_hw_addr_inherit(slave_dev, master);
 	slave_dev->priv_flags |= IFF_NO_QUEUE;
 	slave_dev->netdev_ops = &dsa_slave_netdev_ops;
-	slave_dev->switchdev_ops = &dsa_slave_switchdev_ops;
 	slave_dev->min_mtu = 0;
 	slave_dev->max_mtu = ETH_MAX_MTU;
 	SET_NETDEV_DEVTYPE(slave_dev, &dsa_type);
 
-	netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one,
-				 NULL);
-
 	SET_NETDEV_DEV(slave_dev, port->ds->dev);
 	slave_dev->dev.of_node = port->dn;
 	slave_dev->vlan_features = master->vlan_features;
@@ -1333,6 +1436,8 @@
 	}
 	p->dp = port;
 	INIT_LIST_HEAD(&p->mall_tc_list);
+	INIT_WORK(&port->xmit_work, dsa_port_xmit_work);
+	skb_queue_head_init(&port->xmit_queue);
 	p->xmit = cpu_dp->tag_ops->xmit;
 	port->slave = slave_dev;
 
@@ -1384,7 +1489,7 @@
 	free_netdev(slave_dev);
 }
 
-static bool dsa_slave_dev_check(struct net_device *dev)
+static bool dsa_slave_dev_check(const struct net_device *dev)
 {
 	return dev->netdev_ops == &dsa_slave_netdev_ops;
 }
@@ -1408,16 +1513,49 @@
 	return err;
 }
 
+static int dsa_slave_upper_vlan_check(struct net_device *dev,
+				      struct netdev_notifier_changeupper_info *
+				      info)
+{
+	struct netlink_ext_ack *ext_ack;
+	struct net_device *slave;
+	struct dsa_port *dp;
+
+	ext_ack = netdev_notifier_info_to_extack(&info->info);
+
+	if (!is_vlan_dev(dev))
+		return NOTIFY_DONE;
+
+	slave = vlan_dev_real_dev(dev);
+	if (!dsa_slave_dev_check(slave))
+		return NOTIFY_DONE;
+
+	dp = dsa_slave_to_port(slave);
+	if (!dp->bridge_dev)
+		return NOTIFY_DONE;
+
+	/* Deny enslaving a VLAN device into a VLAN-aware bridge */
+	if (br_vlan_enabled(dp->bridge_dev) &&
+	    netif_is_bridge_master(info->upper_dev) && info->linking) {
+		NL_SET_ERR_MSG_MOD(ext_ack,
+				   "Cannot enslave VLAN device into VLAN aware bridge");
+		return notifier_from_errno(-EINVAL);
+	}
+
+	return NOTIFY_DONE;
+}
+
 static int dsa_slave_netdevice_event(struct notifier_block *nb,
 				     unsigned long event, void *ptr)
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 
-	if (!dsa_slave_dev_check(dev))
-		return NOTIFY_DONE;
+	if (event == NETDEV_CHANGEUPPER) {
+		if (!dsa_slave_dev_check(dev))
+			return dsa_slave_upper_vlan_check(dev, ptr);
 
-	if (event == NETDEV_CHANGEUPPER)
 		return dsa_slave_changeupper(dev, ptr);
+	}
 
 	return NOTIFY_DONE;
 }
@@ -1450,8 +1588,9 @@
 			netdev_dbg(dev, "fdb add failed err=%d\n", err);
 			break;
 		}
+		fdb_info->offloaded = true;
 		call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev,
-					 &fdb_info->info);
+					 &fdb_info->info, NULL);
 		break;
 
 	case SWITCHDEV_FDB_DEL_TO_DEVICE:
@@ -1495,6 +1634,14 @@
 {
 	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
 	struct dsa_switchdev_event_work *switchdev_work;
+	int err;
+
+	if (event == SWITCHDEV_PORT_ATTR_SET) {
+		err = switchdev_handle_port_attr_set(dev, ptr,
+						     dsa_slave_dev_check,
+						     dsa_slave_port_attr_set);
+		return notifier_from_errno(err);
+	}
 
 	if (!dsa_slave_dev_check(dev))
 		return NOTIFY_DONE;
@@ -1528,6 +1675,33 @@
 	return NOTIFY_BAD;
 }
 
+static int dsa_slave_switchdev_blocking_event(struct notifier_block *unused,
+					      unsigned long event, void *ptr)
+{
+	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+	int err;
+
+	switch (event) {
+	case SWITCHDEV_PORT_OBJ_ADD:
+		err = switchdev_handle_port_obj_add(dev, ptr,
+						    dsa_slave_dev_check,
+						    dsa_slave_port_obj_add);
+		return notifier_from_errno(err);
+	case SWITCHDEV_PORT_OBJ_DEL:
+		err = switchdev_handle_port_obj_del(dev, ptr,
+						    dsa_slave_dev_check,
+						    dsa_slave_port_obj_del);
+		return notifier_from_errno(err);
+	case SWITCHDEV_PORT_ATTR_SET:
+		err = switchdev_handle_port_attr_set(dev, ptr,
+						     dsa_slave_dev_check,
+						     dsa_slave_port_attr_set);
+		return notifier_from_errno(err);
+	}
+
+	return NOTIFY_DONE;
+}
+
 static struct notifier_block dsa_slave_nb __read_mostly = {
 	.notifier_call  = dsa_slave_netdevice_event,
 };
@@ -1536,8 +1710,13 @@
 	.notifier_call = dsa_slave_switchdev_event,
 };
 
+static struct notifier_block dsa_slave_switchdev_blocking_notifier = {
+	.notifier_call = dsa_slave_switchdev_blocking_event,
+};
+
 int dsa_slave_register_notifier(void)
 {
+	struct notifier_block *nb;
 	int err;
 
 	err = register_netdevice_notifier(&dsa_slave_nb);
@@ -1548,8 +1727,15 @@
 	if (err)
 		goto err_switchdev_nb;
 
+	nb = &dsa_slave_switchdev_blocking_notifier;
+	err = register_switchdev_blocking_notifier(nb);
+	if (err)
+		goto err_switchdev_blocking_nb;
+
 	return 0;
 
+err_switchdev_blocking_nb:
+	unregister_switchdev_notifier(&dsa_slave_switchdev_notifier);
 err_switchdev_nb:
 	unregister_netdevice_notifier(&dsa_slave_nb);
 	return err;
@@ -1557,8 +1743,14 @@
 
 void dsa_slave_unregister_notifier(void)
 {
+	struct notifier_block *nb;
 	int err;
 
+	nb = &dsa_slave_switchdev_blocking_notifier;
+	err = unregister_switchdev_blocking_notifier(nb);
+	if (err)
+		pr_err("DSA: failed to unregister switchdev blocking notifier (%d)\n", err);
+
 	err = unregister_switchdev_notifier(&dsa_slave_switchdev_notifier);
 	if (err)
 		pr_err("DSA: failed to unregister switchdev notifier (%d)\n", err);

diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 142b294..6a96075 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c

@@ -1,17 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Handling of a single switch chip, part of a switch fabric
  *
  * Copyright (c) 2017 Savoir-faire Linux Inc.
  *	Vivien Didelot <vivien.didelot@savoirfairelinux.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
+#include <linux/if_bridge.h>
 #include <linux/netdevice.h>
 #include <linux/notifier.h>
+#include <linux/if_vlan.h>
 #include <net/switchdev.h>
 
 #include "dsa_priv.h"
@@ -70,6 +68,9 @@
 static int dsa_switch_bridge_leave(struct dsa_switch *ds,
 				   struct dsa_notifier_bridge_info *info)
 {
+	bool unset_vlan_filtering = br_vlan_enabled(info->br);
+	int err, i;
+
 	if (ds->index == info->sw_index && ds->ops->port_bridge_leave)
 		ds->ops->port_bridge_leave(ds, info->port, info->br);
 
@@ -77,6 +78,31 @@
 		ds->ops->crosschip_bridge_leave(ds, info->sw_index, info->port,
 						info->br);
 
+	/* If the bridge was vlan_filtering, the bridge core doesn't trigger an
+	 * event for changing vlan_filtering setting upon slave ports leaving
+	 * it. That is a good thing, because that lets us handle it and also
+	 * handle the case where the switch's vlan_filtering setting is global
+	 * (not per port). When that happens, the correct moment to trigger the
+	 * vlan_filtering callback is only when the last port left this bridge.
+	 */
+	if (unset_vlan_filtering && ds->vlan_filtering_is_global) {
+		for (i = 0; i < ds->num_ports; i++) {
+			if (i == info->port)
+				continue;
+			if (dsa_to_port(ds, i)->bridge_dev == info->br) {
+				unset_vlan_filtering = false;
+				break;
+			}
+		}
+	}
+	if (unset_vlan_filtering) {
+		struct switchdev_trans trans = {0};
+
+		err = dsa_port_vlan_filtering(&ds->ports[info->port],
+					      false, &trans);
+		if (err && err != EOPNOTSUPP)
+			return err;
+	}
 	return 0;
 }
 
@@ -102,54 +128,51 @@
 	return ds->ops->port_fdb_del(ds, port, info->addr, info->vid);
 }
 
-static int
-dsa_switch_mdb_prepare_bitmap(struct dsa_switch *ds,
-			      const struct switchdev_obj_port_mdb *mdb,
-			      const unsigned long *bitmap)
+static bool dsa_switch_mdb_match(struct dsa_switch *ds, int port,
+				 struct dsa_notifier_mdb_info *info)
+{
+	if (ds->index == info->sw_index && port == info->port)
+		return true;
+
+	if (dsa_is_dsa_port(ds, port))
+		return true;
+
+	return false;
+}
+
+static int dsa_switch_mdb_prepare(struct dsa_switch *ds,
+				  struct dsa_notifier_mdb_info *info)
 {
 	int port, err;
 
 	if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
 		return -EOPNOTSUPP;
 
-	for_each_set_bit(port, bitmap, ds->num_ports) {
-		err = ds->ops->port_mdb_prepare(ds, port, mdb);
-		if (err)
-			return err;
+	for (port = 0; port < ds->num_ports; port++) {
+		if (dsa_switch_mdb_match(ds, port, info)) {
+			err = ds->ops->port_mdb_prepare(ds, port, info->mdb);
+			if (err)
+				return err;
+		}
 	}
 
 	return 0;
 }
 
-static void dsa_switch_mdb_add_bitmap(struct dsa_switch *ds,
-				      const struct switchdev_obj_port_mdb *mdb,
-				      const unsigned long *bitmap)
-{
-	int port;
-
-	for_each_set_bit(port, bitmap, ds->num_ports)
-		ds->ops->port_mdb_add(ds, port, mdb);
-}
-
 static int dsa_switch_mdb_add(struct dsa_switch *ds,
 			      struct dsa_notifier_mdb_info *info)
 {
-	const struct switchdev_obj_port_mdb *mdb = info->mdb;
-	struct switchdev_trans *trans = info->trans;
 	int port;
 
-	/* Build a mask of Multicast group members */
-	bitmap_zero(ds->bitmap, ds->num_ports);
-	if (ds->index == info->sw_index)
-		set_bit(info->port, ds->bitmap);
+	if (switchdev_trans_ph_prepare(info->trans))
+		return dsa_switch_mdb_prepare(ds, info);
+
+	if (!ds->ops->port_mdb_add)
+		return 0;
+
 	for (port = 0; port < ds->num_ports; port++)
-		if (dsa_is_dsa_port(ds, port))
-			set_bit(port, ds->bitmap);
-
-	if (switchdev_trans_ph_prepare(trans))
-		return dsa_switch_mdb_prepare_bitmap(ds, mdb, ds->bitmap);
-
-	dsa_switch_mdb_add_bitmap(ds, mdb, ds->bitmap);
+		if (dsa_switch_mdb_match(ds, port, info))
+			ds->ops->port_mdb_add(ds, port, info->mdb);
 
 	return 0;
 }
@@ -157,66 +180,101 @@
 static int dsa_switch_mdb_del(struct dsa_switch *ds,
 			      struct dsa_notifier_mdb_info *info)
 {
-	const struct switchdev_obj_port_mdb *mdb = info->mdb;
-
 	if (!ds->ops->port_mdb_del)
 		return -EOPNOTSUPP;
 
 	if (ds->index == info->sw_index)
-		return ds->ops->port_mdb_del(ds, info->port, mdb);
+		return ds->ops->port_mdb_del(ds, info->port, info->mdb);
 
 	return 0;
 }
 
-static int
-dsa_switch_vlan_prepare_bitmap(struct dsa_switch *ds,
-			       const struct switchdev_obj_port_vlan *vlan,
-			       const unsigned long *bitmap)
+static int dsa_port_vlan_device_check(struct net_device *vlan_dev,
+				      int vlan_dev_vid,
+				      void *arg)
+{
+	struct switchdev_obj_port_vlan *vlan = arg;
+	u16 vid;
+
+	for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
+		if (vid == vlan_dev_vid)
+			return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int dsa_port_vlan_check(struct dsa_switch *ds, int port,
+			       const struct switchdev_obj_port_vlan *vlan)
+{
+	const struct dsa_port *dp = dsa_to_port(ds, port);
+	int err = 0;
+
+	/* Device is not bridged, let it proceed with the VLAN device
+	 * creation.
+	 */
+	if (!dp->bridge_dev)
+		return err;
+
+	/* dsa_slave_vlan_rx_{add,kill}_vid() cannot use the prepare phase and
+	 * already checks whether there is an overlapping bridge VLAN entry
+	 * with the same VID, so here we only need to check that if we are
+	 * adding a bridge VLAN entry there is not an overlapping VLAN device
+	 * claiming that VID.
+	 */
+	return vlan_for_each(dp->slave, dsa_port_vlan_device_check,
+			     (void *)vlan);
+}
+
+static bool dsa_switch_vlan_match(struct dsa_switch *ds, int port,
+				  struct dsa_notifier_vlan_info *info)
+{
+	if (ds->index == info->sw_index && port == info->port)
+		return true;
+
+	if (dsa_is_dsa_port(ds, port))
+		return true;
+
+	return false;
+}
+
+static int dsa_switch_vlan_prepare(struct dsa_switch *ds,
+				   struct dsa_notifier_vlan_info *info)
 {
 	int port, err;
 
 	if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
 		return -EOPNOTSUPP;
 
-	for_each_set_bit(port, bitmap, ds->num_ports) {
-		err = ds->ops->port_vlan_prepare(ds, port, vlan);
-		if (err)
-			return err;
+	for (port = 0; port < ds->num_ports; port++) {
+		if (dsa_switch_vlan_match(ds, port, info)) {
+			err = dsa_port_vlan_check(ds, port, info->vlan);
+			if (err)
+				return err;
+
+			err = ds->ops->port_vlan_prepare(ds, port, info->vlan);
+			if (err)
+				return err;
+		}
 	}
 
 	return 0;
 }
 
-static void
-dsa_switch_vlan_add_bitmap(struct dsa_switch *ds,
-			   const struct switchdev_obj_port_vlan *vlan,
-			   const unsigned long *bitmap)
-{
-	int port;
-
-	for_each_set_bit(port, bitmap, ds->num_ports)
-		ds->ops->port_vlan_add(ds, port, vlan);
-}
-
 static int dsa_switch_vlan_add(struct dsa_switch *ds,
 			       struct dsa_notifier_vlan_info *info)
 {
-	const struct switchdev_obj_port_vlan *vlan = info->vlan;
-	struct switchdev_trans *trans = info->trans;
 	int port;
 
-	/* Build a mask of VLAN members */
-	bitmap_zero(ds->bitmap, ds->num_ports);
-	if (ds->index == info->sw_index)
-		set_bit(info->port, ds->bitmap);
+	if (switchdev_trans_ph_prepare(info->trans))
+		return dsa_switch_vlan_prepare(ds, info);
+
+	if (!ds->ops->port_vlan_add)
+		return 0;
+
 	for (port = 0; port < ds->num_ports; port++)
-		if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
-			set_bit(port, ds->bitmap);
-
-	if (switchdev_trans_ph_prepare(trans))
-		return dsa_switch_vlan_prepare_bitmap(ds, vlan, ds->bitmap);
-
-	dsa_switch_vlan_add_bitmap(ds, vlan, ds->bitmap);
+		if (dsa_switch_vlan_match(ds, port, info))
+			ds->ops->port_vlan_add(ds, port, info->vlan);
 
 	return 0;
 }
@@ -224,14 +282,15 @@
 static int dsa_switch_vlan_del(struct dsa_switch *ds,
 			       struct dsa_notifier_vlan_info *info)
 {
-	const struct switchdev_obj_port_vlan *vlan = info->vlan;
-
 	if (!ds->ops->port_vlan_del)
 		return -EOPNOTSUPP;
 
 	if (ds->index == info->sw_index)
-		return ds->ops->port_vlan_del(ds, info->port, vlan);
+		return ds->ops->port_vlan_del(ds, info->port, info->vlan);
 
+	/* Do not deprogram the DSA links as they may be used as conduit
+	 * for other VLAN members in the fabric.
+	 */
 	return 0;
 }
 

diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
new file mode 100644
index 0000000..9e5a883
--- /dev/null
+++ b/net/dsa/tag_8021q.c

@@ -0,0 +1,354 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
+ *
+ * This module is not a complete tagger implementation. It only provides
+ * primitives for taggers that rely on 802.1Q VLAN tags to use. The
+ * dsa_8021q_netdev_ops is registered for API compliance and not used
+ * directly by callers.
+ */
+#include <linux/if_bridge.h>
+#include <linux/if_vlan.h>
+
+#include "dsa_priv.h"
+
+/* Binary structure of the fake 12-bit VID field (when the TPID is
+ * ETH_P_DSA_8021Q):
+ *
+ * | 11  | 10  |  9  |  8  |  7  |  6  |  5  |  4  |  3  |  2  |  1  |  0  |
+ * +-----------+-----+-----------------+-----------+-----------------------+
+ * |    DIR    | RSV |    SWITCH_ID    |    RSV    |          PORT         |
+ * +-----------+-----+-----------------+-----------+-----------------------+
+ *
+ * DIR - VID[11:10]:
+ *	Direction flags.
+ *	* 1 (0b01) for RX VLAN,
+ *	* 2 (0b10) for TX VLAN.
+ *	These values make the special VIDs of 0, 1 and 4095 to be left
+ *	unused by this coding scheme.
+ *
+ * RSV - VID[9]:
+ *	To be used for further expansion of SWITCH_ID or for other purposes.
+ *	Must be transmitted as zero and ignored on receive.
+ *
+ * SWITCH_ID - VID[8:6]:
+ *	Index of switch within DSA tree. Must be between 0 and
+ *	DSA_MAX_SWITCHES - 1.
+ *
+ * RSV - VID[5:4]:
+ *	To be used for further expansion of PORT or for other purposes.
+ *	Must be transmitted as zero and ignored on receive.
+ *
+ * PORT - VID[3:0]:
+ *	Index of switch port. Must be between 0 and DSA_MAX_PORTS - 1.
+ */
+
+#define DSA_8021Q_DIR_SHIFT		10
+#define DSA_8021Q_DIR_MASK		GENMASK(11, 10)
+#define DSA_8021Q_DIR(x)		(((x) << DSA_8021Q_DIR_SHIFT) & \
+						 DSA_8021Q_DIR_MASK)
+#define DSA_8021Q_DIR_RX		DSA_8021Q_DIR(1)
+#define DSA_8021Q_DIR_TX		DSA_8021Q_DIR(2)
+
+#define DSA_8021Q_SWITCH_ID_SHIFT	6
+#define DSA_8021Q_SWITCH_ID_MASK	GENMASK(8, 6)
+#define DSA_8021Q_SWITCH_ID(x)		(((x) << DSA_8021Q_SWITCH_ID_SHIFT) & \
+						 DSA_8021Q_SWITCH_ID_MASK)
+
+#define DSA_8021Q_PORT_SHIFT		0
+#define DSA_8021Q_PORT_MASK		GENMASK(3, 0)
+#define DSA_8021Q_PORT(x)		(((x) << DSA_8021Q_PORT_SHIFT) & \
+						 DSA_8021Q_PORT_MASK)
+
+/* Returns the VID to be inserted into the frame from xmit for switch steering
+ * instructions on egress. Encodes switch ID and port ID.
+ */
+u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port)
+{
+	return DSA_8021Q_DIR_TX | DSA_8021Q_SWITCH_ID(ds->index) |
+	       DSA_8021Q_PORT(port);
+}
+EXPORT_SYMBOL_GPL(dsa_8021q_tx_vid);
+
+/* Returns the VID that will be installed as pvid for this switch port, sent as
+ * tagged egress towards the CPU port and decoded by the rcv function.
+ */
+u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port)
+{
+	return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(ds->index) |
+	       DSA_8021Q_PORT(port);
+}
+EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid);
+
+/* Returns the decoded switch ID from the RX VID. */
+int dsa_8021q_rx_switch_id(u16 vid)
+{
+	return (vid & DSA_8021Q_SWITCH_ID_MASK) >> DSA_8021Q_SWITCH_ID_SHIFT;
+}
+EXPORT_SYMBOL_GPL(dsa_8021q_rx_switch_id);
+
+/* Returns the decoded port ID from the RX VID. */
+int dsa_8021q_rx_source_port(u16 vid)
+{
+	return (vid & DSA_8021Q_PORT_MASK) >> DSA_8021Q_PORT_SHIFT;
+}
+EXPORT_SYMBOL_GPL(dsa_8021q_rx_source_port);
+
+static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port)
+{
+	struct bridge_vlan_info vinfo;
+	struct net_device *slave;
+	u16 pvid;
+	int err;
+
+	if (!dsa_is_user_port(ds, port))
+		return 0;
+
+	slave = ds->ports[port].slave;
+
+	err = br_vlan_get_pvid(slave, &pvid);
+	if (!pvid || err < 0)
+		/* There is no pvid on the bridge for this port, which is
+		 * perfectly valid. Nothing to restore, bye-bye!
+		 */
+		return 0;
+
+	err = br_vlan_get_info(slave, pvid, &vinfo);
+	if (err < 0) {
+		dev_err(ds->dev, "Couldn't determine PVID attributes\n");
+		return err;
+	}
+
+	return dsa_port_vid_add(&ds->ports[port], pvid, vinfo.flags);
+}
+
+/* If @enabled is true, installs @vid with @flags into the switch port's HW
+ * filter.
+ * If @enabled is false, deletes @vid (ignores @flags) from the port. Had the
+ * user explicitly configured this @vid through the bridge core, then the @vid
+ * is installed again, but this time with the flags from the bridge layer.
+ */
+static int dsa_8021q_vid_apply(struct dsa_switch *ds, int port, u16 vid,
+			       u16 flags, bool enabled)
+{
+	struct dsa_port *dp = &ds->ports[port];
+	struct bridge_vlan_info vinfo;
+	int err;
+
+	if (enabled)
+		return dsa_port_vid_add(dp, vid, flags);
+
+	err = dsa_port_vid_del(dp, vid);
+	if (err < 0)
+		return err;
+
+	/* Nothing to restore from the bridge for a non-user port.
+	 * The CPU port VLANs are restored implicitly with the user ports,
+	 * similar to how the bridge does in dsa_slave_vlan_add and
+	 * dsa_slave_vlan_del.
+	 */
+	if (!dsa_is_user_port(ds, port))
+		return 0;
+
+	err = br_vlan_get_info(dp->slave, vid, &vinfo);
+	/* Couldn't determine bridge attributes for this vid,
+	 * it means the bridge had not configured it.
+	 */
+	if (err < 0)
+		return 0;
+
+	/* Restore the VID from the bridge */
+	err = dsa_port_vid_add(dp, vid, vinfo.flags);
+	if (err < 0)
+		return err;
+
+	vinfo.flags &= ~BRIDGE_VLAN_INFO_PVID;
+
+	return dsa_port_vid_add(dp->cpu_dp, vid, vinfo.flags);
+}
+
+/* RX VLAN tagging (left) and TX VLAN tagging (right) setup shown for a single
+ * front-panel switch port (here swp0).
+ *
+ * Port identification through VLAN (802.1Q) tags has different requirements
+ * for it to work effectively:
+ *  - On RX (ingress from network): each front-panel port must have a pvid
+ *    that uniquely identifies it, and the egress of this pvid must be tagged
+ *    towards the CPU port, so that software can recover the source port based
+ *    on the VID in the frame. But this would only work for standalone ports;
+ *    if bridged, this VLAN setup would break autonomous forwarding and would
+ *    force all switched traffic to pass through the CPU. So we must also make
+ *    the other front-panel ports members of this VID we're adding, albeit
+ *    we're not making it their PVID (they'll still have their own).
+ *    By the way - just because we're installing the same VID in multiple
+ *    switch ports doesn't mean that they'll start to talk to one another, even
+ *    while not bridged: the final forwarding decision is still an AND between
+ *    the L2 forwarding information (which is limiting forwarding in this case)
+ *    and the VLAN-based restrictions (of which there are none in this case,
+ *    since all ports are members).
+ *  - On TX (ingress from CPU and towards network) we are faced with a problem.
+ *    If we were to tag traffic (from within DSA) with the port's pvid, all
+ *    would be well, assuming the switch ports were standalone. Frames would
+ *    have no choice but to be directed towards the correct front-panel port.
+ *    But because we also want the RX VLAN to not break bridging, then
+ *    inevitably that means that we have to give them a choice (of what
+ *    front-panel port to go out on), and therefore we cannot steer traffic
+ *    based on the RX VID. So what we do is simply install one more VID on the
+ *    front-panel and CPU ports, and profit off of the fact that steering will
+ *    work just by virtue of the fact that there is only one other port that's
+ *    a member of the VID we're tagging the traffic with - the desired one.
+ *
+ * So at the end, each front-panel port will have one RX VID (also the PVID),
+ * the RX VID of all other front-panel ports, and one TX VID. Whereas the CPU
+ * port will have the RX and TX VIDs of all front-panel ports, and on top of
+ * that, is also tagged-input and tagged-output (VLAN trunk).
+ *
+ *               CPU port                               CPU port
+ * +-------------+-----+-------------+    +-------------+-----+-------------+
+ * |  RX VID     |     |             |    |  TX VID     |     |             |
+ * |  of swp0    |     |             |    |  of swp0    |     |             |
+ * |             +-----+             |    |             +-----+             |
+ * |                ^ T              |    |                | Tagged         |
+ * |                |                |    |                | ingress        |
+ * |    +-------+---+---+-------+    |    |    +-----------+                |
+ * |    |       |       |       |    |    |    | Untagged                   |
+ * |    |     U v     U v     U v    |    |    v egress                     |
+ * | +-----+ +-----+ +-----+ +-----+ |    | +-----+ +-----+ +-----+ +-----+ |
+ * | |     | |     | |     | |     | |    | |     | |     | |     | |     | |
+ * | |PVID | |     | |     | |     | |    | |     | |     | |     | |     | |
+ * +-+-----+-+-----+-+-----+-+-----+-+    +-+-----+-+-----+-+-----+-+-----+-+
+ *   swp0    swp1    swp2    swp3           swp0    swp1    swp2    swp3
+ */
+int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled)
+{
+	int upstream = dsa_upstream_port(ds, port);
+	u16 rx_vid = dsa_8021q_rx_vid(ds, port);
+	u16 tx_vid = dsa_8021q_tx_vid(ds, port);
+	int i, err;
+
+	/* The CPU port is implicitly configured by
+	 * configuring the front-panel ports
+	 */
+	if (!dsa_is_user_port(ds, port))
+		return 0;
+
+	/* Add this user port's RX VID to the membership list of all others
+	 * (including itself). This is so that bridging will not be hindered.
+	 * L2 forwarding rules still take precedence when there are no VLAN
+	 * restrictions, so there are no concerns about leaking traffic.
+	 */
+	for (i = 0; i < ds->num_ports; i++) {
+		u16 flags;
+
+		if (i == upstream)
+			continue;
+		else if (i == port)
+			/* The RX VID is pvid on this port */
+			flags = BRIDGE_VLAN_INFO_UNTAGGED |
+				BRIDGE_VLAN_INFO_PVID;
+		else
+			/* The RX VID is a regular VLAN on all others */
+			flags = BRIDGE_VLAN_INFO_UNTAGGED;
+
+		err = dsa_8021q_vid_apply(ds, i, rx_vid, flags, enabled);
+		if (err) {
+			dev_err(ds->dev, "Failed to apply RX VID %d to port %d: %d\n",
+				rx_vid, port, err);
+			return err;
+		}
+	}
+
+	/* CPU port needs to see this port's RX VID
+	 * as tagged egress.
+	 */
+	err = dsa_8021q_vid_apply(ds, upstream, rx_vid, 0, enabled);
+	if (err) {
+		dev_err(ds->dev, "Failed to apply RX VID %d to port %d: %d\n",
+			rx_vid, port, err);
+		return err;
+	}
+
+	/* Finally apply the TX VID on this port and on the CPU port */
+	err = dsa_8021q_vid_apply(ds, port, tx_vid, BRIDGE_VLAN_INFO_UNTAGGED,
+				  enabled);
+	if (err) {
+		dev_err(ds->dev, "Failed to apply TX VID %d on port %d: %d\n",
+			tx_vid, port, err);
+		return err;
+	}
+	err = dsa_8021q_vid_apply(ds, upstream, tx_vid, 0, enabled);
+	if (err) {
+		dev_err(ds->dev, "Failed to apply TX VID %d on port %d: %d\n",
+			tx_vid, upstream, err);
+		return err;
+	}
+
+	if (!enabled)
+		err = dsa_8021q_restore_pvid(ds, port);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(dsa_port_setup_8021q_tagging);
+
+struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
+			       u16 tpid, u16 tci)
+{
+	/* skb->data points at skb_mac_header, which
+	 * is fine for vlan_insert_tag.
+	 */
+	return vlan_insert_tag(skb, htons(tpid), tci);
+}
+EXPORT_SYMBOL_GPL(dsa_8021q_xmit);
+
+/* In the DSA packet_type handler, skb->data points in the middle of the VLAN
+ * tag, after tpid and before tci. This is because so far, ETH_HLEN
+ * (DMAC, SMAC, EtherType) bytes were pulled.
+ * There are 2 bytes of VLAN tag left in skb->data, and upper
+ * layers expect the 'real' EtherType to be consumed as well.
+ * Coincidentally, a VLAN header is also of the same size as
+ * the number of bytes that need to be pulled.
+ *
+ * skb_mac_header                                      skb->data
+ * |                                                       |
+ * v                                                       v
+ * |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |
+ * +-----------------------+-----------------------+-------+-------+-------+
+ * |    Destination MAC    |      Source MAC       |  TPID |  TCI  | EType |
+ * +-----------------------+-----------------------+-------+-------+-------+
+ * ^                                               |               |
+ * |<--VLAN_HLEN-->to                              <---VLAN_HLEN--->
+ * from            |
+ *       >>>>>>>   v
+ *       >>>>>>>   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |
+ *       >>>>>>>   +-----------------------+-----------------------+-------+
+ *       >>>>>>>   |    Destination MAC    |      Source MAC       | EType |
+ *                 +-----------------------+-----------------------+-------+
+ *                 ^                                                       ^
+ * (now part of    |                                                       |
+ *  skb->head)     skb_mac_header                                  skb->data
+ */
+struct sk_buff *dsa_8021q_remove_header(struct sk_buff *skb)
+{
+	u8 *from = skb_mac_header(skb);
+	u8 *dest = from + VLAN_HLEN;
+
+	memmove(dest, from, ETH_HLEN - VLAN_HLEN);
+	skb_pull(skb, VLAN_HLEN);
+	skb_push(skb, ETH_HLEN);
+	skb_reset_mac_header(skb);
+	skb_reset_mac_len(skb);
+	skb_pull_rcsum(skb, ETH_HLEN);
+
+	return skb;
+}
+EXPORT_SYMBOL_GPL(dsa_8021q_remove_header);
+
+static const struct dsa_device_ops dsa_8021q_netdev_ops = {
+	.name		= "8021q",
+	.proto		= DSA_TAG_PROTO_8021Q,
+	.overhead	= VLAN_HLEN,
+};
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_8021Q);
+
+module_dsa_tag_driver(dsa_8021q_netdev_ops);

diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 2b06bb9..9c31141 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Broadcom tag support
  *
  * Copyright (C) 2014 Broadcom Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/etherdevice.h>
@@ -59,6 +55,9 @@
 #define BRCM_EG_TC_MASK		0x7
 #define BRCM_EG_PID_MASK	0x1f
 
+#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM) || \
+	IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_PREPEND)
+
 static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb,
 					struct net_device *dev,
 					unsigned int offset)
@@ -143,8 +142,9 @@
 
 	return skb;
 }
+#endif
 
-#ifdef CONFIG_NET_DSA_TAG_BRCM
+#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM)
 static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb,
 				     struct net_device *dev)
 {
@@ -171,13 +171,19 @@
 	return nskb;
 }
 
-const struct dsa_device_ops brcm_netdev_ops = {
+static const struct dsa_device_ops brcm_netdev_ops = {
+	.name	= "brcm",
+	.proto	= DSA_TAG_PROTO_BRCM,
 	.xmit	= brcm_tag_xmit,
 	.rcv	= brcm_tag_rcv,
+	.overhead = BRCM_TAG_LEN,
 };
+
+DSA_TAG_DRIVER(brcm_netdev_ops);
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM);
 #endif
 
-#ifdef CONFIG_NET_DSA_TAG_BRCM_PREPEND
+#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_PREPEND)
 static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb,
 					     struct net_device *dev)
 {
@@ -193,8 +199,27 @@
 	return brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN);
 }
 
-const struct dsa_device_ops brcm_prepend_netdev_ops = {
+static const struct dsa_device_ops brcm_prepend_netdev_ops = {
+	.name	= "brcm-prepend",
+	.proto	= DSA_TAG_PROTO_BRCM_PREPEND,
 	.xmit	= brcm_tag_xmit_prepend,
 	.rcv	= brcm_tag_rcv_prepend,
+	.overhead = BRCM_TAG_LEN,
 };
+
+DSA_TAG_DRIVER(brcm_prepend_netdev_ops);
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM_PREPEND);
 #endif
+
+static struct dsa_tag_driver *dsa_tag_driver_array[] =	{
+#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM)
+	&DSA_TAG_DRIVER_NAME(brcm_netdev_ops),
+#endif
+#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_PREPEND)
+	&DSA_TAG_DRIVER_NAME(brcm_prepend_netdev_ops),
+#endif
+};
+
+module_dsa_tag_drivers(dsa_tag_driver_array);
+
+MODULE_LICENSE("GPL");

diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index cd13cfc..7ddec97 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * net/dsa/tag_dsa.c - (Non-ethertype) DSA tagging
  * Copyright (c) 2008-2009 Marvell Semiconductor
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/etherdevice.h>
@@ -146,7 +142,24 @@
 	return skb;
 }
 
-const struct dsa_device_ops dsa_netdev_ops = {
+static int dsa_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto,
+				int *offset)
+{
+	*offset = 4;
+	*proto = ((__be16 *)skb->data)[1];
+	return 0;
+}
+
+static const struct dsa_device_ops dsa_netdev_ops = {
+	.name	= "dsa",
+	.proto	= DSA_TAG_PROTO_DSA,
 	.xmit	= dsa_xmit,
 	.rcv	= dsa_rcv,
+	.flow_dissect   = dsa_tag_flow_dissect,
+	.overhead = DSA_HLEN,
 };
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_DSA);
+
+module_dsa_tag_driver(dsa_netdev_ops);

diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 4083326..e8eaa80 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * net/dsa/tag_edsa.c - Ethertype DSA tagging
  * Copyright (c) 2008-2009 Marvell Semiconductor
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/etherdevice.h>
@@ -165,7 +161,24 @@
 	return skb;
 }
 
-const struct dsa_device_ops edsa_netdev_ops = {
+static int edsa_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto,
+				 int *offset)
+{
+	*offset = 8;
+	*proto = ((__be16 *)skb->data)[3];
+	return 0;
+}
+
+static const struct dsa_device_ops edsa_netdev_ops = {
+	.name	= "edsa",
+	.proto	= DSA_TAG_PROTO_EDSA,
 	.xmit	= edsa_xmit,
 	.rcv	= edsa_rcv,
+	.flow_dissect   = edsa_tag_flow_dissect,
+	.overhead = EDSA_HLEN,
 };
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_EDSA);
+
+module_dsa_tag_driver(edsa_netdev_ops);

diff --git a/net/dsa/tag_gswip.c b/net/dsa/tag_gswip.c
new file mode 100644
index 0000000..b678160
--- /dev/null
+++ b/net/dsa/tag_gswip.c

@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel / Lantiq GSWIP V2.0 PMAC tag support
+ *
+ * Copyright (C) 2017 - 2018 Hauke Mehrtens <hauke@hauke-m.de>
+ */
+
+#include <linux/bitops.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <net/dsa.h>
+
+#include "dsa_priv.h"
+
+#define GSWIP_TX_HEADER_LEN		4
+
+/* special tag in TX path header */
+/* Byte 0 */
+#define GSWIP_TX_SLPID_SHIFT		0	/* source port ID */
+#define  GSWIP_TX_SLPID_CPU		2
+#define  GSWIP_TX_SLPID_APP1		3
+#define  GSWIP_TX_SLPID_APP2		4
+#define  GSWIP_TX_SLPID_APP3		5
+#define  GSWIP_TX_SLPID_APP4		6
+#define  GSWIP_TX_SLPID_APP5		7
+
+/* Byte 1 */
+#define GSWIP_TX_CRCGEN_DIS		BIT(7)
+#define GSWIP_TX_DPID_SHIFT		0	/* destination group ID */
+#define  GSWIP_TX_DPID_ELAN		0
+#define  GSWIP_TX_DPID_EWAN		1
+#define  GSWIP_TX_DPID_CPU		2
+#define  GSWIP_TX_DPID_APP1		3
+#define  GSWIP_TX_DPID_APP2		4
+#define  GSWIP_TX_DPID_APP3		5
+#define  GSWIP_TX_DPID_APP4		6
+#define  GSWIP_TX_DPID_APP5		7
+
+/* Byte 2 */
+#define GSWIP_TX_PORT_MAP_EN		BIT(7)
+#define GSWIP_TX_PORT_MAP_SEL		BIT(6)
+#define GSWIP_TX_LRN_DIS		BIT(5)
+#define GSWIP_TX_CLASS_EN		BIT(4)
+#define GSWIP_TX_CLASS_SHIFT		0
+#define GSWIP_TX_CLASS_MASK		GENMASK(3, 0)
+
+/* Byte 3 */
+#define GSWIP_TX_DPID_EN		BIT(0)
+#define GSWIP_TX_PORT_MAP_SHIFT		1
+#define GSWIP_TX_PORT_MAP_MASK		GENMASK(6, 1)
+
+#define GSWIP_RX_HEADER_LEN	8
+
+/* special tag in RX path header */
+/* Byte 7 */
+#define GSWIP_RX_SPPID_SHIFT		4
+#define GSWIP_RX_SPPID_MASK		GENMASK(6, 4)
+
+static struct sk_buff *gswip_tag_xmit(struct sk_buff *skb,
+				      struct net_device *dev)
+{
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	int err;
+	u8 *gswip_tag;
+
+	err = skb_cow_head(skb, GSWIP_TX_HEADER_LEN);
+	if (err)
+		return NULL;
+
+	skb_push(skb, GSWIP_TX_HEADER_LEN);
+
+	gswip_tag = skb->data;
+	gswip_tag[0] = GSWIP_TX_SLPID_CPU;
+	gswip_tag[1] = GSWIP_TX_DPID_ELAN;
+	gswip_tag[2] = GSWIP_TX_PORT_MAP_EN | GSWIP_TX_PORT_MAP_SEL;
+	gswip_tag[3] = BIT(dp->index + GSWIP_TX_PORT_MAP_SHIFT) & GSWIP_TX_PORT_MAP_MASK;
+	gswip_tag[3] |= GSWIP_TX_DPID_EN;
+
+	return skb;
+}
+
+static struct sk_buff *gswip_tag_rcv(struct sk_buff *skb,
+				     struct net_device *dev,
+				     struct packet_type *pt)
+{
+	int port;
+	u8 *gswip_tag;
+
+	if (unlikely(!pskb_may_pull(skb, GSWIP_RX_HEADER_LEN)))
+		return NULL;
+
+	gswip_tag = skb->data - ETH_HLEN;
+
+	/* Get source port information */
+	port = (gswip_tag[7] & GSWIP_RX_SPPID_MASK) >> GSWIP_RX_SPPID_SHIFT;
+	skb->dev = dsa_master_find_slave(dev, 0, port);
+	if (!skb->dev)
+		return NULL;
+
+	/* remove GSWIP tag */
+	skb_pull_rcsum(skb, GSWIP_RX_HEADER_LEN);
+
+	return skb;
+}
+
+static const struct dsa_device_ops gswip_netdev_ops = {
+	.name = "gwsip",
+	.proto	= DSA_TAG_PROTO_GSWIP,
+	.xmit = gswip_tag_xmit,
+	.rcv = gswip_tag_rcv,
+	.overhead = GSWIP_RX_HEADER_LEN,
+};
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_GSWIP);
+
+module_dsa_tag_driver(gswip_netdev_ops);

diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 0f62eff..73605bc 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * net/dsa/tag_ksz.c - Microchip KSZ Switch tag format handling
  * Copyright (c) 2017 Microchip Technology
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/etherdevice.h>
@@ -14,34 +10,19 @@
 #include <net/dsa.h>
 #include "dsa_priv.h"
 
-/* For Ingress (Host -> KSZ), 2 bytes are added before FCS.
- * ---------------------------------------------------------------------------
- * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|tag1(1byte)|FCS(4bytes)
- * ---------------------------------------------------------------------------
- * tag0 : Prioritization (not used now)
- * tag1 : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x10=port5)
- *
- * For Egress (KSZ -> Host), 1 byte is added before FCS.
- * ---------------------------------------------------------------------------
- * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes)
- * ---------------------------------------------------------------------------
- * tag0 : zero-based value represents port
- *	  (eg, 0x00=port1, 0x02=port3, 0x06=port7)
- */
+/* Typically only one byte is used for tail tag. */
+#define KSZ_EGRESS_TAG_LEN		1
+#define KSZ_INGRESS_TAG_LEN		1
 
-#define	KSZ_INGRESS_TAG_LEN	2
-#define	KSZ_EGRESS_TAG_LEN	1
-
-static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
+static struct sk_buff *ksz_common_xmit(struct sk_buff *skb,
+				       struct net_device *dev, int len)
 {
-	struct dsa_port *dp = dsa_slave_to_port(dev);
 	struct sk_buff *nskb;
 	int padlen;
-	u8 *tag;
 
 	padlen = (skb->len >= ETH_ZLEN) ? 0 : ETH_ZLEN - skb->len;
 
-	if (skb_tailroom(skb) >= padlen + KSZ_INGRESS_TAG_LEN) {
+	if (skb_tailroom(skb) >= padlen + len) {
 		/* Let dsa_slave_xmit() free skb */
 		if (__skb_put_padto(skb, skb->len + padlen, false))
 			return NULL;
@@ -49,7 +30,7 @@
 		nskb = skb;
 	} else {
 		nskb = alloc_skb(NET_IP_ALIGN + skb->len +
-				 padlen + KSZ_INGRESS_TAG_LEN, GFP_ATOMIC);
+				 padlen + len, GFP_ATOMIC);
 		if (!nskb)
 			return NULL;
 		skb_reserve(nskb, NET_IP_ALIGN);
@@ -70,33 +51,204 @@
 		consume_skb(skb);
 	}
 
-	tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN);
-	tag[0] = 0;
-	tag[1] = 1 << dp->index; /* destination port */
-
 	return nskb;
 }
 
-static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev,
-			       struct packet_type *pt)
+static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
+				      struct net_device *dev,
+				      unsigned int port, unsigned int len)
 {
-	u8 *tag;
-	int source_port;
-
-	tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
-
-	source_port = tag[0] & 7;
-
-	skb->dev = dsa_master_find_slave(dev, 0, source_port);
+	skb->dev = dsa_master_find_slave(dev, 0, port);
 	if (!skb->dev)
 		return NULL;
 
-	pskb_trim_rcsum(skb, skb->len - KSZ_EGRESS_TAG_LEN);
+	pskb_trim_rcsum(skb, skb->len - len);
+
+	skb->offload_fwd_mark = true;
 
 	return skb;
 }
 
-const struct dsa_device_ops ksz_netdev_ops = {
-	.xmit	= ksz_xmit,
-	.rcv	= ksz_rcv,
+/*
+ * For Ingress (Host -> KSZ8795), 1 byte is added before FCS.
+ * ---------------------------------------------------------------------------
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag(1byte)|FCS(4bytes)
+ * ---------------------------------------------------------------------------
+ * tag : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x10=port5)
+ *
+ * For Egress (KSZ8795 -> Host), 1 byte is added before FCS.
+ * ---------------------------------------------------------------------------
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes)
+ * ---------------------------------------------------------------------------
+ * tag0 : zero-based value represents port
+ *	  (eg, 0x00=port1, 0x02=port3, 0x06=port7)
+ */
+
+#define KSZ8795_INGRESS_TAG_LEN		1
+
+#define KSZ8795_TAIL_TAG_OVERRIDE	BIT(6)
+#define KSZ8795_TAIL_TAG_LOOKUP		BIT(7)
+
+static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct sk_buff *nskb;
+	u8 *tag;
+	u8 *addr;
+
+	nskb = ksz_common_xmit(skb, dev, KSZ8795_INGRESS_TAG_LEN);
+	if (!nskb)
+		return NULL;
+
+	/* Tag encoding */
+	tag = skb_put(nskb, KSZ8795_INGRESS_TAG_LEN);
+	addr = skb_mac_header(nskb);
+
+	*tag = 1 << dp->index;
+	if (is_link_local_ether_addr(addr))
+		*tag |= KSZ8795_TAIL_TAG_OVERRIDE;
+
+	return nskb;
+}
+
+static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev,
+				  struct packet_type *pt)
+{
+	u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
+
+	return ksz_common_rcv(skb, dev, tag[0] & 7, KSZ_EGRESS_TAG_LEN);
+}
+
+static const struct dsa_device_ops ksz8795_netdev_ops = {
+	.name	= "ksz8795",
+	.proto	= DSA_TAG_PROTO_KSZ8795,
+	.xmit	= ksz8795_xmit,
+	.rcv	= ksz8795_rcv,
+	.overhead = KSZ8795_INGRESS_TAG_LEN,
 };
+
+DSA_TAG_DRIVER(ksz8795_netdev_ops);
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ8795);
+
+/*
+ * For Ingress (Host -> KSZ9477), 2 bytes are added before FCS.
+ * ---------------------------------------------------------------------------
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|tag1(1byte)|FCS(4bytes)
+ * ---------------------------------------------------------------------------
+ * tag0 : Prioritization (not used now)
+ * tag1 : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x10=port5)
+ *
+ * For Egress (KSZ9477 -> Host), 1 byte is added before FCS.
+ * ---------------------------------------------------------------------------
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes)
+ * ---------------------------------------------------------------------------
+ * tag0 : zero-based value represents port
+ *	  (eg, 0x00=port1, 0x02=port3, 0x06=port7)
+ */
+
+#define KSZ9477_INGRESS_TAG_LEN		2
+#define KSZ9477_PTP_TAG_LEN		4
+#define KSZ9477_PTP_TAG_INDICATION	0x80
+
+#define KSZ9477_TAIL_TAG_OVERRIDE	BIT(9)
+#define KSZ9477_TAIL_TAG_LOOKUP		BIT(10)
+
+static struct sk_buff *ksz9477_xmit(struct sk_buff *skb,
+				    struct net_device *dev)
+{
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct sk_buff *nskb;
+	u16 *tag;
+	u8 *addr;
+
+	nskb = ksz_common_xmit(skb, dev, KSZ9477_INGRESS_TAG_LEN);
+	if (!nskb)
+		return NULL;
+
+	/* Tag encoding */
+	tag = skb_put(nskb, KSZ9477_INGRESS_TAG_LEN);
+	addr = skb_mac_header(nskb);
+
+	*tag = BIT(dp->index);
+
+	if (is_link_local_ether_addr(addr))
+		*tag |= KSZ9477_TAIL_TAG_OVERRIDE;
+
+	*tag = cpu_to_be16(*tag);
+
+	return nskb;
+}
+
+static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev,
+				   struct packet_type *pt)
+{
+	/* Tag decoding */
+	u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
+	unsigned int port = tag[0] & 7;
+	unsigned int len = KSZ_EGRESS_TAG_LEN;
+
+	/* Extra 4-bytes PTP timestamp */
+	if (tag[0] & KSZ9477_PTP_TAG_INDICATION)
+		len += KSZ9477_PTP_TAG_LEN;
+
+	return ksz_common_rcv(skb, dev, port, len);
+}
+
+static const struct dsa_device_ops ksz9477_netdev_ops = {
+	.name	= "ksz9477",
+	.proto	= DSA_TAG_PROTO_KSZ9477,
+	.xmit	= ksz9477_xmit,
+	.rcv	= ksz9477_rcv,
+	.overhead = KSZ9477_INGRESS_TAG_LEN,
+};
+
+DSA_TAG_DRIVER(ksz9477_netdev_ops);
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ9477);
+
+#define KSZ9893_TAIL_TAG_OVERRIDE	BIT(5)
+#define KSZ9893_TAIL_TAG_LOOKUP		BIT(6)
+
+static struct sk_buff *ksz9893_xmit(struct sk_buff *skb,
+				    struct net_device *dev)
+{
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct sk_buff *nskb;
+	u8 *addr;
+	u8 *tag;
+
+	nskb = ksz_common_xmit(skb, dev, KSZ_INGRESS_TAG_LEN);
+	if (!nskb)
+		return NULL;
+
+	/* Tag encoding */
+	tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN);
+	addr = skb_mac_header(nskb);
+
+	*tag = BIT(dp->index);
+
+	if (is_link_local_ether_addr(addr))
+		*tag |= KSZ9893_TAIL_TAG_OVERRIDE;
+
+	return nskb;
+}
+
+static const struct dsa_device_ops ksz9893_netdev_ops = {
+	.name	= "ksz9893",
+	.proto	= DSA_TAG_PROTO_KSZ9893,
+	.xmit	= ksz9893_xmit,
+	.rcv	= ksz9477_rcv,
+	.overhead = KSZ_INGRESS_TAG_LEN,
+};
+
+DSA_TAG_DRIVER(ksz9893_netdev_ops);
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ9893);
+
+static struct dsa_tag_driver *dsa_tag_driver_array[] = {
+	&DSA_TAG_DRIVER_NAME(ksz8795_netdev_ops),
+	&DSA_TAG_DRIVER_NAME(ksz9477_netdev_ops),
+	&DSA_TAG_DRIVER_NAME(ksz9893_netdev_ops),
+};
+
+module_dsa_tag_drivers(dsa_tag_driver_array);
+
+MODULE_LICENSE("GPL");

diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index 548c002..eb0e7a3 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c

@@ -1,15 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2017 Pengutronix, Juergen Borleis <jbe@pengutronix.de>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  */
 #include <linux/dsa/lan9303.h>
 #include <linux/etherdevice.h>
@@ -137,7 +128,15 @@
 	return skb;
 }
 
-const struct dsa_device_ops lan9303_netdev_ops = {
+static const struct dsa_device_ops lan9303_netdev_ops = {
+	.name = "lan9303",
+	.proto	= DSA_TAG_PROTO_LAN9303,
 	.xmit = lan9303_xmit,
 	.rcv = lan9303_rcv,
+	.overhead = LAN9303_TAG_LEN,
 };
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_LAN9303);
+
+module_dsa_tag_driver(lan9303_netdev_ops);

diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index 11535bc..b5705cb 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c

@@ -1,15 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Mediatek DSA Tag support
  * Copyright (C) 2017 Landen Chao <landen.chao@mediatek.com>
  *		      Sean Wang <sean.wang@mediatek.com>
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/etherdevice.h>
@@ -105,8 +98,16 @@
 	return 0;
 }
 
-const struct dsa_device_ops mtk_netdev_ops = {
+static const struct dsa_device_ops mtk_netdev_ops = {
+	.name		= "mtk",
+	.proto		= DSA_TAG_PROTO_MTK,
 	.xmit		= mtk_tag_xmit,
 	.rcv		= mtk_tag_rcv,
 	.flow_dissect	= mtk_tag_flow_dissect,
+	.overhead	= MTK_HDR_LEN,
 };
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_MTK);
+
+module_dsa_tag_driver(mtk_netdev_ops);

diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 613f4ee..c958852 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c

@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2015, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/etherdevice.h>
@@ -98,7 +90,25 @@
 	return skb;
 }
 
-const struct dsa_device_ops qca_netdev_ops = {
+static int qca_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto,
+                                int *offset)
+{
+	*offset = QCA_HDR_LEN;
+	*proto = ((__be16 *)skb->data)[0];
+
+	return 0;
+}
+
+static const struct dsa_device_ops qca_netdev_ops = {
+	.name	= "qca",
+	.proto	= DSA_TAG_PROTO_QCA,
 	.xmit	= qca_tag_xmit,
 	.rcv	= qca_tag_rcv,
+	.flow_dissect = qca_tag_flow_dissect,
+	.overhead = QCA_HDR_LEN,
 };
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_QCA);
+
+module_dsa_tag_driver(qca_netdev_ops);

diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
new file mode 100644
index 0000000..63ef2a1
--- /dev/null
+++ b/net/dsa/tag_sja1105.c

@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
+ */
+#include <linux/if_vlan.h>
+#include <linux/dsa/sja1105.h>
+#include <linux/dsa/8021q.h>
+#include <linux/packing.h>
+#include "dsa_priv.h"
+
+/* Similar to is_link_local_ether_addr(hdr->h_dest) but also covers PTP */
+static inline bool sja1105_is_link_local(const struct sk_buff *skb)
+{
+	const struct ethhdr *hdr = eth_hdr(skb);
+	u64 dmac = ether_addr_to_u64(hdr->h_dest);
+
+	if (ntohs(hdr->h_proto) == ETH_P_SJA1105_META)
+		return false;
+	if ((dmac & SJA1105_LINKLOCAL_FILTER_A_MASK) ==
+		    SJA1105_LINKLOCAL_FILTER_A)
+		return true;
+	if ((dmac & SJA1105_LINKLOCAL_FILTER_B_MASK) ==
+		    SJA1105_LINKLOCAL_FILTER_B)
+		return true;
+	return false;
+}
+
+struct sja1105_meta {
+	u64 tstamp;
+	u64 dmac_byte_4;
+	u64 dmac_byte_3;
+	u64 source_port;
+	u64 switch_id;
+};
+
+static void sja1105_meta_unpack(const struct sk_buff *skb,
+				struct sja1105_meta *meta)
+{
+	u8 *buf = skb_mac_header(skb) + ETH_HLEN;
+
+	/* UM10944.pdf section 4.2.17 AVB Parameters:
+	 * Structure of the meta-data follow-up frame.
+	 * It is in network byte order, so there are no quirks
+	 * while unpacking the meta frame.
+	 *
+	 * Also SJA1105 E/T only populates bits 23:0 of the timestamp
+	 * whereas P/Q/R/S does 32 bits. Since the structure is the
+	 * same and the E/T puts zeroes in the high-order byte, use
+	 * a unified unpacking command for both device series.
+	 */
+	packing(buf,     &meta->tstamp,     31, 0, 4, UNPACK, 0);
+	packing(buf + 4, &meta->dmac_byte_4, 7, 0, 1, UNPACK, 0);
+	packing(buf + 5, &meta->dmac_byte_3, 7, 0, 1, UNPACK, 0);
+	packing(buf + 6, &meta->source_port, 7, 0, 1, UNPACK, 0);
+	packing(buf + 7, &meta->switch_id,   7, 0, 1, UNPACK, 0);
+}
+
+static inline bool sja1105_is_meta_frame(const struct sk_buff *skb)
+{
+	const struct ethhdr *hdr = eth_hdr(skb);
+	u64 smac = ether_addr_to_u64(hdr->h_source);
+	u64 dmac = ether_addr_to_u64(hdr->h_dest);
+
+	if (smac != SJA1105_META_SMAC)
+		return false;
+	if (dmac != SJA1105_META_DMAC)
+		return false;
+	if (ntohs(hdr->h_proto) != ETH_P_SJA1105_META)
+		return false;
+	return true;
+}
+
+/* This is the first time the tagger sees the frame on RX.
+ * Figure out if we can decode it.
+ */
+static bool sja1105_filter(const struct sk_buff *skb, struct net_device *dev)
+{
+	if (!dsa_port_is_vlan_filtering(dev->dsa_ptr))
+		return true;
+	if (sja1105_is_link_local(skb))
+		return true;
+	if (sja1105_is_meta_frame(skb))
+		return true;
+	return false;
+}
+
+static struct sk_buff *sja1105_xmit(struct sk_buff *skb,
+				    struct net_device *netdev)
+{
+	struct dsa_port *dp = dsa_slave_to_port(netdev);
+	struct dsa_switch *ds = dp->ds;
+	u16 tx_vid = dsa_8021q_tx_vid(ds, dp->index);
+	u16 queue_mapping = skb_get_queue_mapping(skb);
+	u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
+
+	/* Transmitting management traffic does not rely upon switch tagging,
+	 * but instead SPI-installed management routes. Part 2 of this
+	 * is the .port_deferred_xmit driver callback.
+	 */
+	if (unlikely(sja1105_is_link_local(skb)))
+		return dsa_defer_xmit(skb, netdev);
+
+	/* If we are under a vlan_filtering bridge, IP termination on
+	 * switch ports based on 802.1Q tags is simply too brittle to
+	 * be passable. So just defer to the dsa_slave_notag_xmit
+	 * implementation.
+	 */
+	if (dsa_port_is_vlan_filtering(dp))
+		return skb;
+
+	return dsa_8021q_xmit(skb, netdev, ETH_P_SJA1105,
+			     ((pcp << VLAN_PRIO_SHIFT) | tx_vid));
+}
+
+static void sja1105_transfer_meta(struct sk_buff *skb,
+				  const struct sja1105_meta *meta)
+{
+	struct ethhdr *hdr = eth_hdr(skb);
+
+	hdr->h_dest[3] = meta->dmac_byte_3;
+	hdr->h_dest[4] = meta->dmac_byte_4;
+	SJA1105_SKB_CB(skb)->meta_tstamp = meta->tstamp;
+}
+
+/* This is a simple state machine which follows the hardware mechanism of
+ * generating RX timestamps:
+ *
+ * After each timestampable skb (all traffic for which send_meta1 and
+ * send_meta0 is true, aka all MAC-filtered link-local traffic) a meta frame
+ * containing a partial timestamp is immediately generated by the switch and
+ * sent as a follow-up to the link-local frame on the CPU port.
+ *
+ * The meta frames have no unique identifier (such as sequence number) by which
+ * one may pair them to the correct timestampable frame.
+ * Instead, the switch has internal logic that ensures no frames are sent on
+ * the CPU port between a link-local timestampable frame and its corresponding
+ * meta follow-up. It also ensures strict ordering between ports (lower ports
+ * have higher priority towards the CPU port). For this reason, a per-port
+ * data structure is not needed/desirable.
+ *
+ * This function pairs the link-local frame with its partial timestamp from the
+ * meta follow-up frame. The full timestamp will be reconstructed later in a
+ * work queue.
+ */
+static struct sk_buff
+*sja1105_rcv_meta_state_machine(struct sk_buff *skb,
+				struct sja1105_meta *meta,
+				bool is_link_local,
+				bool is_meta)
+{
+	struct sja1105_port *sp;
+	struct dsa_port *dp;
+
+	dp = dsa_slave_to_port(skb->dev);
+	sp = dp->priv;
+
+	/* Step 1: A timestampable frame was received.
+	 * Buffer it until we get its meta frame.
+	 */
+	if (is_link_local) {
+		if (!test_bit(SJA1105_HWTS_RX_EN, &sp->data->state))
+			/* Do normal processing. */
+			return skb;
+
+		spin_lock(&sp->data->meta_lock);
+		/* Was this a link-local frame instead of the meta
+		 * that we were expecting?
+		 */
+		if (sp->data->stampable_skb) {
+			dev_err_ratelimited(dp->ds->dev,
+					    "Expected meta frame, is %12llx "
+					    "in the DSA master multicast filter?\n",
+					    SJA1105_META_DMAC);
+			kfree_skb(sp->data->stampable_skb);
+		}
+
+		/* Hold a reference to avoid dsa_switch_rcv
+		 * from freeing the skb.
+		 */
+		sp->data->stampable_skb = skb_get(skb);
+		spin_unlock(&sp->data->meta_lock);
+
+		/* Tell DSA we got nothing */
+		return NULL;
+
+	/* Step 2: The meta frame arrived.
+	 * Time to take the stampable skb out of the closet, annotate it
+	 * with the partial timestamp, and pretend that we received it
+	 * just now (basically masquerade the buffered frame as the meta
+	 * frame, which serves no further purpose).
+	 */
+	} else if (is_meta) {
+		struct sk_buff *stampable_skb;
+
+		/* Drop the meta frame if we're not in the right state
+		 * to process it.
+		 */
+		if (!test_bit(SJA1105_HWTS_RX_EN, &sp->data->state))
+			return NULL;
+
+		spin_lock(&sp->data->meta_lock);
+
+		stampable_skb = sp->data->stampable_skb;
+		sp->data->stampable_skb = NULL;
+
+		/* Was this a meta frame instead of the link-local
+		 * that we were expecting?
+		 */
+		if (!stampable_skb) {
+			dev_err_ratelimited(dp->ds->dev,
+					    "Unexpected meta frame\n");
+			spin_unlock(&sp->data->meta_lock);
+			return NULL;
+		}
+
+		if (stampable_skb->dev != skb->dev) {
+			dev_err_ratelimited(dp->ds->dev,
+					    "Meta frame on wrong port\n");
+			spin_unlock(&sp->data->meta_lock);
+			return NULL;
+		}
+
+		/* Free the meta frame and give DSA the buffered stampable_skb
+		 * for further processing up the network stack.
+		 */
+		kfree_skb(skb);
+		skb = stampable_skb;
+		sja1105_transfer_meta(skb, meta);
+
+		spin_unlock(&sp->data->meta_lock);
+	}
+
+	return skb;
+}
+
+static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
+				   struct net_device *netdev,
+				   struct packet_type *pt)
+{
+	struct sja1105_meta meta = {0};
+	int source_port, switch_id;
+	struct vlan_ethhdr *hdr;
+	u16 tpid, vid, tci;
+	bool is_link_local;
+	bool is_tagged;
+	bool is_meta;
+
+	hdr = vlan_eth_hdr(skb);
+	tpid = ntohs(hdr->h_vlan_proto);
+	is_tagged = (tpid == ETH_P_SJA1105);
+	is_link_local = sja1105_is_link_local(skb);
+	is_meta = sja1105_is_meta_frame(skb);
+
+	skb->offload_fwd_mark = 1;
+
+	if (is_tagged) {
+		/* Normal traffic path. */
+		tci = ntohs(hdr->h_vlan_TCI);
+		vid = tci & VLAN_VID_MASK;
+		source_port = dsa_8021q_rx_source_port(vid);
+		switch_id = dsa_8021q_rx_switch_id(vid);
+		skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+	} else if (is_link_local) {
+		/* Management traffic path. Switch embeds the switch ID and
+		 * port ID into bytes of the destination MAC, courtesy of
+		 * the incl_srcpt options.
+		 */
+		source_port = hdr->h_dest[3];
+		switch_id = hdr->h_dest[4];
+		/* Clear the DMAC bytes that were mangled by the switch */
+		hdr->h_dest[3] = 0;
+		hdr->h_dest[4] = 0;
+	} else if (is_meta) {
+		sja1105_meta_unpack(skb, &meta);
+		source_port = meta.source_port;
+		switch_id = meta.switch_id;
+	} else {
+		return NULL;
+	}
+
+	skb->dev = dsa_master_find_slave(netdev, switch_id, source_port);
+	if (!skb->dev) {
+		netdev_warn(netdev, "Couldn't decode source port\n");
+		return NULL;
+	}
+
+	/* Delete/overwrite fake VLAN header, DSA expects to not find
+	 * it there, see dsa_switch_rcv: skb_push(skb, ETH_HLEN).
+	 */
+	if (is_tagged)
+		skb = dsa_8021q_remove_header(skb);
+
+	return sja1105_rcv_meta_state_machine(skb, &meta, is_link_local,
+					      is_meta);
+}
+
+static struct dsa_device_ops sja1105_netdev_ops = {
+	.name = "sja1105",
+	.proto = DSA_TAG_PROTO_SJA1105,
+	.xmit = sja1105_xmit,
+	.rcv = sja1105_rcv,
+	.filter = sja1105_filter,
+	.overhead = VLAN_HLEN,
+};
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_SJA1105);
+
+module_dsa_tag_driver(sja1105_netdev_ops);

diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index 56197f0..4f8ab62 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * net/dsa/tag_trailer.c - Trailer tag format handling
  * Copyright (c) 2008-2009 Marvell Semiconductor
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/etherdevice.h>
@@ -81,7 +77,15 @@
 	return skb;
 }
 
-const struct dsa_device_ops trailer_netdev_ops = {
+static const struct dsa_device_ops trailer_netdev_ops = {
+	.name	= "trailer",
+	.proto	= DSA_TAG_PROTO_TRAILER,
 	.xmit	= trailer_xmit,
 	.rcv	= trailer_rcv,
+	.overhead = 4,
 };
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_TRAILER);
+
+module_dsa_tag_driver(trailer_netdev_ops);

diff --git a/net/ethernet/Makefile b/net/ethernet/Makefile
index 3231775..e03eff9 100644
--- a/net/ethernet/Makefile
+++ b/net/ethernet/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Linux Ethernet layer.
 #

diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index fd8faa0..17374af 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -31,11 +32,6 @@
  *				  older network drivers and IFF_ALLMULTI.
  *	Christer Weinigel	: Better rebuild header message.
  *             Andrew Morton    : 26Feb01: kill ether_setup() - use netdev_boot_setup().
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  */
 #include <linux/module.h>
 #include <linux/types.h>
@@ -47,6 +43,7 @@
 #include <linux/inet.h>
 #include <linux/ip.h>
 #include <linux/netdevice.h>
+#include <linux/nvmem-consumer.h>
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 #include <linux/errno.h>
@@ -118,13 +115,14 @@
 
 /**
  * eth_get_headlen - determine the length of header for an ethernet frame
+ * @dev: pointer to network device
  * @data: pointer to start of frame
  * @len: total length of frame
  *
  * Make a best effort attempt to pull the length for all of the headers for
  * a given frame in a linear buffer.
  */
-u32 eth_get_headlen(void *data, unsigned int len)
+u32 eth_get_headlen(const struct net_device *dev, void *data, unsigned int len)
 {
 	const unsigned int flags = FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
 	const struct ethhdr *eth = (const struct ethhdr *)data;
@@ -135,8 +133,9 @@
 		return len;
 
 	/* parse any remaining L2/L3 headers, check for L4 */
-	if (!skb_flow_dissect_flow_keys_basic(NULL, &keys, data, eth->h_proto,
-					      sizeof(*eth), len, flags))
+	if (!skb_flow_dissect_flow_keys_basic(dev_net(dev), NULL, &keys, data,
+					      eth->h_proto, sizeof(*eth),
+					      len, flags))
 		return max_t(u32, keys.control.thoff, sizeof(*eth));
 
 	/* parse for any L4 headers */
@@ -165,23 +164,29 @@
 	eth = (struct ethhdr *)skb->data;
 	skb_pull_inline(skb, ETH_HLEN);
 
-	if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) {
-		if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast))
-			skb->pkt_type = PACKET_BROADCAST;
-		else
-			skb->pkt_type = PACKET_MULTICAST;
+	if (unlikely(!ether_addr_equal_64bits(eth->h_dest,
+					      dev->dev_addr))) {
+		if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) {
+			if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast))
+				skb->pkt_type = PACKET_BROADCAST;
+			else
+				skb->pkt_type = PACKET_MULTICAST;
+		} else {
+			skb->pkt_type = PACKET_OTHERHOST;
+		}
 	}
-	else if (unlikely(!ether_addr_equal_64bits(eth->h_dest,
-						   dev->dev_addr)))
-		skb->pkt_type = PACKET_OTHERHOST;
 
 	/*
 	 * Some variants of DSA tagging don't have an ethertype field
 	 * at all, so we check here whether one of those tagging
 	 * variants has been configured on the receiving interface,
 	 * and if so, set skb->protocol without looking at the packet.
+	 * The DSA tagging protocol may be able to decode some but not all
+	 * traffic (for example only for management). In that case give it the
+	 * option to filter the packets from which it can decode source port
+	 * information.
 	 */
-	if (unlikely(netdev_uses_dsa(dev)))
+	if (unlikely(netdev_uses_dsa(dev)) && dsa_can_decode(skb, dev))
 		return htons(ETH_P_XDSA);
 
 	if (likely(eth_proto_is_802_3(eth->h_proto)))
@@ -262,6 +267,18 @@
 EXPORT_SYMBOL(eth_header_cache_update);
 
 /**
+ * eth_header_parser_protocol - extract protocol from L2 header
+ * @skb: packet to extract protocol from
+ */
+__be16 eth_header_parse_protocol(const struct sk_buff *skb)
+{
+	const struct ethhdr *eth = eth_hdr(skb);
+
+	return eth->h_proto;
+}
+EXPORT_SYMBOL(eth_header_parse_protocol);
+
+/**
  * eth_prepare_mac_addr_change - prepare for mac change
  * @dev: network device
  * @p: socket address
@@ -343,6 +360,7 @@
 	.parse		= eth_header_parse,
 	.cache		= eth_header_cache,
 	.cache_update	= eth_header_cache_update,
+	.parse_protocol	= eth_header_parse_protocol,
 };
 
 /**
@@ -527,24 +545,55 @@
 
 int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr)
 {
-	const unsigned char *addr;
-	struct device_node *dp;
+	const unsigned char *addr = NULL;
 
-	if (dev_is_pci(dev))
-		dp = pci_device_to_OF_node(to_pci_dev(dev));
-	else
-		dp = dev->of_node;
-
-	addr = NULL;
-	if (dp)
-		addr = of_get_mac_address(dp);
-	if (!addr)
+	if (dev->of_node)
+		addr = of_get_mac_address(dev->of_node);
+	if (IS_ERR_OR_NULL(addr))
 		addr = arch_get_platform_mac_address();
 
 	if (!addr)
 		return -ENODEV;
 
 	ether_addr_copy(mac_addr, addr);
+
 	return 0;
 }
 EXPORT_SYMBOL(eth_platform_get_mac_address);
+
+/**
+ * Obtain the MAC address from an nvmem cell named 'mac-address' associated
+ * with given device.
+ *
+ * @dev:	Device with which the mac-address cell is associated.
+ * @addrbuf:	Buffer to which the MAC address will be copied on success.
+ *
+ * Returns 0 on success or a negative error number on failure.
+ */
+int nvmem_get_mac_address(struct device *dev, void *addrbuf)
+{
+	struct nvmem_cell *cell;
+	const void *mac;
+	size_t len;
+
+	cell = nvmem_cell_get(dev, "mac-address");
+	if (IS_ERR(cell))
+		return PTR_ERR(cell);
+
+	mac = nvmem_cell_read(cell, &len);
+	nvmem_cell_put(cell);
+
+	if (IS_ERR(mac))
+		return PTR_ERR(mac);
+
+	if (len != ETH_ALEN || !is_valid_ether_addr(mac)) {
+		kfree(mac);
+		return -EINVAL;
+	}
+
+	ether_addr_copy(addrbuf, mac);
+	kfree(mac);
+
+	return 0;
+}
+EXPORT_SYMBOL(nvmem_get_mac_address);

diff --git a/net/hsr/Kconfig b/net/hsr/Kconfig
index 4b683fd..9c58f87 100644
--- a/net/hsr/Kconfig
+++ b/net/hsr/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # IEC 62439-3 High-availability Seamless Redundancy
 #

diff --git a/net/hsr/Makefile b/net/hsr/Makefile
index 9ae972a..75df90d 100644
--- a/net/hsr/Makefile
+++ b/net/hsr/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for HSR
 #
@@ -6,3 +7,4 @@
 
 hsr-y			:= hsr_main.o hsr_framereg.o hsr_device.o \
 			   hsr_netlink.o hsr_slave.o hsr_forward.o
+hsr-$(CONFIG_DEBUG_FS) += hsr_debugfs.o

diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c
new file mode 100644
index 0000000..9444797
--- /dev/null
+++ b/net/hsr/hsr_debugfs.c

@@ -0,0 +1,119 @@
+/*
+ * hsr_debugfs code
+ * Copyright (C) 2019 Texas Instruments Incorporated
+ *
+ * Author(s):
+ *	Murali Karicheri <m-karicheri2@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/debugfs.h>
+#include "hsr_main.h"
+#include "hsr_framereg.h"
+
+static void print_mac_address(struct seq_file *sfp, unsigned char *mac)
+{
+	seq_printf(sfp, "%02x:%02x:%02x:%02x:%02x:%02x:",
+		   mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
+}
+
+/* hsr_node_table_show - Formats and prints node_table entries */
+static int
+hsr_node_table_show(struct seq_file *sfp, void *data)
+{
+	struct hsr_priv *priv = (struct hsr_priv *)sfp->private;
+	struct hsr_node *node;
+
+	seq_puts(sfp, "Node Table entries\n");
+	seq_puts(sfp, "MAC-Address-A,   MAC-Address-B, time_in[A], ");
+	seq_puts(sfp, "time_in[B], Address-B port\n");
+	rcu_read_lock();
+	list_for_each_entry_rcu(node, &priv->node_db, mac_list) {
+		/* skip self node */
+		if (hsr_addr_is_self(priv, node->macaddress_A))
+			continue;
+		print_mac_address(sfp, &node->macaddress_A[0]);
+		seq_puts(sfp, " ");
+		print_mac_address(sfp, &node->macaddress_B[0]);
+		seq_printf(sfp, "0x%lx, ", node->time_in[HSR_PT_SLAVE_A]);
+		seq_printf(sfp, "0x%lx ", node->time_in[HSR_PT_SLAVE_B]);
+		seq_printf(sfp, "0x%x\n", node->addr_B_port);
+	}
+	rcu_read_unlock();
+	return 0;
+}
+
+/* hsr_node_table_open - Open the node_table file
+ *
+ * Description:
+ * This routine opens a debugfs file node_table of specific hsr device
+ */
+static int
+hsr_node_table_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, hsr_node_table_show, inode->i_private);
+}
+
+static const struct file_operations hsr_fops = {
+	.owner	= THIS_MODULE,
+	.open	= hsr_node_table_open,
+	.read	= seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/* hsr_debugfs_init - create hsr node_table file for dumping
+ * the node table
+ *
+ * Description:
+ * When debugfs is configured this routine sets up the node_table file per
+ * hsr device for dumping the node_table entries
+ */
+int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev)
+{
+	int rc = -1;
+	struct dentry *de = NULL;
+
+	de = debugfs_create_dir(hsr_dev->name, NULL);
+	if (!de) {
+		pr_err("Cannot create hsr debugfs root\n");
+		return rc;
+	}
+
+	priv->node_tbl_root = de;
+
+	de = debugfs_create_file("node_table", S_IFREG | 0444,
+				 priv->node_tbl_root, priv,
+				 &hsr_fops);
+	if (!de) {
+		pr_err("Cannot create hsr node_table directory\n");
+		return rc;
+	}
+	priv->node_tbl_file = de;
+
+	return 0;
+}
+
+/* hsr_debugfs_term - Tear down debugfs intrastructure
+ *
+ * Description:
+ * When Debufs is configured this routine removes debugfs file system
+ * elements that are specific to hsr
+ */
+void
+hsr_debugfs_term(struct hsr_priv *priv)
+{
+	debugfs_remove(priv->node_tbl_file);
+	priv->node_tbl_file = NULL;
+	debugfs_remove(priv->node_tbl_root);
+	priv->node_tbl_root = NULL;
+}

diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index b8cd43c..f509b49 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Copyright 2011-2014 Autronica Fire and Security AS
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
  * Author(s):
  *	2011-2014 Arvid Brodin, arvid.brodin@alten.se
  *
@@ -23,7 +19,6 @@
 #include "hsr_main.h"
 #include "hsr_forward.h"
 
-
 static bool is_admin_up(struct net_device *dev)
 {
 	return dev && (dev->flags & IFF_UP);
@@ -68,7 +63,7 @@
 
 	rcu_read_lock();
 	hsr_for_each_port(master->hsr, port)
-		if ((port->type != HSR_PT_MASTER) && is_slave_up(port->dev)) {
+		if (port->type != HSR_PT_MASTER && is_slave_up(port->dev)) {
 			has_carrier = true;
 			break;
 		}
@@ -82,7 +77,6 @@
 	return has_carrier;
 }
 
-
 static void hsr_check_announce(struct net_device *hsr_dev,
 			       unsigned char old_operstate)
 {
@@ -90,16 +84,14 @@
 
 	hsr = netdev_priv(hsr_dev);
 
-	if ((hsr_dev->operstate == IF_OPER_UP)
-			&& (old_operstate != IF_OPER_UP)) {
+	if (hsr_dev->operstate == IF_OPER_UP && old_operstate != IF_OPER_UP) {
 		/* Went up */
 		hsr->announce_count = 0;
-		hsr->announce_timer.expires = jiffies +
-				msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
-		add_timer(&hsr->announce_timer);
+		mod_timer(&hsr->announce_timer,
+			  jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL));
 	}
 
-	if ((hsr_dev->operstate != IF_OPER_UP) && (old_operstate == IF_OPER_UP))
+	if (hsr_dev->operstate != IF_OPER_UP && old_operstate == IF_OPER_UP)
 		/* Went down */
 		del_timer(&hsr->announce_timer);
 }
@@ -137,7 +129,6 @@
 	return mtu_max - HSR_HLEN;
 }
 
-
 static int hsr_dev_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct hsr_priv *hsr;
@@ -192,14 +183,12 @@
 	return 0;
 }
 
-
 static int hsr_dev_close(struct net_device *dev)
 {
 	/* Nothing to do here. */
 	return 0;
 }
 
-
 static netdev_features_t hsr_features_recompute(struct hsr_priv *hsr,
 						netdev_features_t features)
 {
@@ -232,7 +221,6 @@
 	return hsr_features_recompute(hsr, features);
 }
 
-
 static int hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct hsr_priv *hsr = netdev_priv(dev);
@@ -241,18 +229,16 @@
 	master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
 	skb->dev = master->dev;
 	hsr_forward_skb(skb, master);
-
 	return NETDEV_TX_OK;
 }
 
-
 static const struct header_ops hsr_header_ops = {
 	.create	 = eth_header,
 	.parse	 = eth_header_parse,
 };
 
 static void send_hsr_supervision_frame(struct hsr_port *master,
-		u8 type, u8 hsrVer)
+				       u8 type, u8 hsr_ver)
 {
 	struct sk_buff *skb;
 	int hlen, tlen;
@@ -263,39 +249,38 @@
 
 	hlen = LL_RESERVED_SPACE(master->dev);
 	tlen = master->dev->needed_tailroom;
-	skb = dev_alloc_skb(
-			sizeof(struct hsr_tag) +
-			sizeof(struct hsr_sup_tag) +
-			sizeof(struct hsr_sup_payload) + hlen + tlen);
+	skb = dev_alloc_skb(sizeof(struct hsr_tag) +
+			    sizeof(struct hsr_sup_tag) +
+			    sizeof(struct hsr_sup_payload) + hlen + tlen);
 
-	if (skb == NULL)
+	if (!skb)
 		return;
 
 	skb_reserve(skb, hlen);
 
 	skb->dev = master->dev;
-	skb->protocol = htons(hsrVer ? ETH_P_HSR : ETH_P_PRP);
+	skb->protocol = htons(hsr_ver ? ETH_P_HSR : ETH_P_PRP);
 	skb->priority = TC_PRIO_CONTROL;
 
-	if (dev_hard_header(skb, skb->dev, (hsrVer ? ETH_P_HSR : ETH_P_PRP),
+	if (dev_hard_header(skb, skb->dev, (hsr_ver ? ETH_P_HSR : ETH_P_PRP),
 			    master->hsr->sup_multicast_addr,
 			    skb->dev->dev_addr, skb->len) <= 0)
 		goto out;
 	skb_reset_mac_header(skb);
 
-	if (hsrVer > 0) {
+	if (hsr_ver > 0) {
 		hsr_tag = skb_put(skb, sizeof(struct hsr_tag));
 		hsr_tag->encap_proto = htons(ETH_P_PRP);
 		set_hsr_tag_LSDU_size(hsr_tag, HSR_V1_SUP_LSDUSIZE);
 	}
 
 	hsr_stag = skb_put(skb, sizeof(struct hsr_sup_tag));
-	set_hsr_stag_path(hsr_stag, (hsrVer ? 0x0 : 0xf));
-	set_hsr_stag_HSR_Ver(hsr_stag, hsrVer);
+	set_hsr_stag_path(hsr_stag, (hsr_ver ? 0x0 : 0xf));
+	set_hsr_stag_HSR_ver(hsr_stag, hsr_ver);
 
 	/* From HSRv1 on we have separate supervision sequence numbers. */
 	spin_lock_irqsave(&master->hsr->seqnr_lock, irqflags);
-	if (hsrVer > 0) {
+	if (hsr_ver > 0) {
 		hsr_stag->sequence_nr = htons(master->hsr->sup_sequence_nr);
 		hsr_tag->sequence_nr = htons(master->hsr->sequence_nr);
 		master->hsr->sup_sequence_nr++;
@@ -306,13 +291,14 @@
 	}
 	spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags);
 
-	hsr_stag->HSR_TLV_Type = type;
+	hsr_stag->HSR_TLV_type = type;
 	/* TODO: Why 12 in HSRv0? */
-	hsr_stag->HSR_TLV_Length = hsrVer ? sizeof(struct hsr_sup_payload) : 12;
+	hsr_stag->HSR_TLV_length =
+				hsr_ver ? sizeof(struct hsr_sup_payload) : 12;
 
 	/* Payload: MacAddressA */
 	hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload));
-	ether_addr_copy(hsr_sp->MacAddressA, master->dev->dev_addr);
+	ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr);
 
 	if (skb_put_padto(skb, ETH_ZLEN + HSR_HLEN))
 		return;
@@ -325,60 +311,60 @@
 	kfree_skb(skb);
 }
 
-
 /* Announce (supervision frame) timer function
  */
 static void hsr_announce(struct timer_list *t)
 {
 	struct hsr_priv *hsr;
 	struct hsr_port *master;
+	unsigned long interval;
 
 	hsr = from_timer(hsr, t, announce_timer);
 
 	rcu_read_lock();
 	master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
 
-	if (hsr->announce_count < 3 && hsr->protVersion == 0) {
+	if (hsr->announce_count < 3 && hsr->prot_version == 0) {
 		send_hsr_supervision_frame(master, HSR_TLV_ANNOUNCE,
-				hsr->protVersion);
+					   hsr->prot_version);
 		hsr->announce_count++;
 
-		hsr->announce_timer.expires = jiffies +
-				msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
+		interval = msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
 	} else {
 		send_hsr_supervision_frame(master, HSR_TLV_LIFE_CHECK,
-				hsr->protVersion);
+					   hsr->prot_version);
 
-		hsr->announce_timer.expires = jiffies +
-				msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL);
+		interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL);
 	}
 
 	if (is_admin_up(master->dev))
-		add_timer(&hsr->announce_timer);
+		mod_timer(&hsr->announce_timer, jiffies + interval);
 
 	rcu_read_unlock();
 }
 
-
-/* According to comments in the declaration of struct net_device, this function
- * is "Called from unregister, can be used to call free_netdev". Ok then...
+/* This has to be called after all the readers are gone.
+ * Otherwise we would have to check the return value of
+ * hsr_port_get_hsr().
  */
 static void hsr_dev_destroy(struct net_device *hsr_dev)
 {
 	struct hsr_priv *hsr;
 	struct hsr_port *port;
+	struct hsr_port *tmp;
 
 	hsr = netdev_priv(hsr_dev);
 
-	rtnl_lock();
-	hsr_for_each_port(hsr, port)
+	hsr_debugfs_term(hsr);
+
+	list_for_each_entry_safe(port, tmp, &hsr->ports, port_list)
 		hsr_del_port(port);
-	rtnl_unlock();
 
 	del_timer_sync(&hsr->prune_timer);
 	del_timer_sync(&hsr->announce_timer);
 
-	synchronize_rcu();
+	hsr_del_self_node(&hsr->self_node_db);
+	hsr_del_nodes(&hsr->node_db);
 }
 
 static const struct net_device_ops hsr_device_ops = {
@@ -387,6 +373,7 @@
 	.ndo_stop = hsr_dev_close,
 	.ndo_start_xmit = hsr_dev_xmit,
 	.ndo_fix_features = hsr_fix_features,
+	.ndo_uninit = hsr_dev_destroy,
 };
 
 static struct device_type hsr_type = {
@@ -405,7 +392,6 @@
 	dev->priv_flags |= IFF_NO_QUEUE;
 
 	dev->needs_free_netdev = true;
-	dev->priv_destructor = hsr_dev_destroy;
 
 	dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
 			   NETIF_F_GSO_MASK | NETIF_F_HW_CSUM |
@@ -425,7 +411,6 @@
 	dev->features |= NETIF_F_NETNS_LOCAL;
 }
 
-
 /* Return true if dev is a HSR master; return false otherwise.
  */
 inline bool is_hsr_master(struct net_device *dev)
@@ -443,6 +428,7 @@
 {
 	struct hsr_priv *hsr;
 	struct hsr_port *port;
+	struct hsr_port *tmp;
 	int res;
 
 	hsr = netdev_priv(hsr_dev);
@@ -469,7 +455,7 @@
 	ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr);
 	hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec;
 
-	hsr->protVersion = protocol_version;
+	hsr->prot_version = protocol_version;
 
 	/* FIXME: should I modify the value of these?
 	 *
@@ -486,7 +472,7 @@
 
 	res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER);
 	if (res)
-		return res;
+		goto err_add_port;
 
 	res = register_netdevice(hsr_dev);
 	if (res)
@@ -500,12 +486,17 @@
 		goto fail;
 
 	mod_timer(&hsr->prune_timer, jiffies + msecs_to_jiffies(PRUNE_PERIOD));
+	res = hsr_debugfs_init(hsr, hsr_dev);
+	if (res)
+		goto fail;
 
 	return 0;
 
 fail:
-	hsr_for_each_port(hsr, port)
+	list_for_each_entry_safe(port, tmp, &hsr->ports, port_list)
 		hsr_del_port(port);
+err_add_port:
+	hsr_del_self_node(&hsr->self_node_db);
 
 	return res;
 }

diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h
index 9975e31..6d7759c 100644
--- a/net/hsr/hsr_device.h
+++ b/net/hsr/hsr_device.h

@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Copyright 2011-2014 Autronica Fire and Security AS
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
  * Author(s):
  *	2011-2014 Arvid Brodin, arvid.brodin@alten.se
  */

diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 04b5450..ddd9605 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Copyright 2011-2014 Autronica Fire and Security AS
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
  * Author(s):
  *	2011-2014 Arvid Brodin, arvid.brodin@alten.se
  */
@@ -17,7 +13,6 @@
 #include "hsr_main.h"
 #include "hsr_framereg.h"
 
-
 struct hsr_node;
 
 struct hsr_frame_info {
@@ -32,7 +27,6 @@
 	bool is_local_exclusive;
 };
 
-
 /* The uses I can see for these HSR supervision frames are:
  * 1) Use the frames that are sent after node initialization ("HSR_TLV.Type =
  *    22") to reset any sequence_nr counters belonging to that node. Useful if
@@ -50,46 +44,45 @@
  */
 static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb)
 {
-	struct ethhdr *ethHdr;
-	struct hsr_sup_tag *hsrSupTag;
-	struct hsrv1_ethhdr_sp *hsrV1Hdr;
+	struct ethhdr *eth_hdr;
+	struct hsr_sup_tag *hsr_sup_tag;
+	struct hsrv1_ethhdr_sp *hsr_V1_hdr;
 
 	WARN_ON_ONCE(!skb_mac_header_was_set(skb));
-	ethHdr = (struct ethhdr *) skb_mac_header(skb);
+	eth_hdr = (struct ethhdr *)skb_mac_header(skb);
 
 	/* Correct addr? */
-	if (!ether_addr_equal(ethHdr->h_dest,
+	if (!ether_addr_equal(eth_hdr->h_dest,
 			      hsr->sup_multicast_addr))
 		return false;
 
 	/* Correct ether type?. */
-	if (!(ethHdr->h_proto == htons(ETH_P_PRP)
-			|| ethHdr->h_proto == htons(ETH_P_HSR)))
+	if (!(eth_hdr->h_proto == htons(ETH_P_PRP) ||
+	      eth_hdr->h_proto == htons(ETH_P_HSR)))
 		return false;
 
 	/* Get the supervision header from correct location. */
-	if (ethHdr->h_proto == htons(ETH_P_HSR)) { /* Okay HSRv1. */
-		hsrV1Hdr = (struct hsrv1_ethhdr_sp *) skb_mac_header(skb);
-		if (hsrV1Hdr->hsr.encap_proto != htons(ETH_P_PRP))
+	if (eth_hdr->h_proto == htons(ETH_P_HSR)) { /* Okay HSRv1. */
+		hsr_V1_hdr = (struct hsrv1_ethhdr_sp *)skb_mac_header(skb);
+		if (hsr_V1_hdr->hsr.encap_proto != htons(ETH_P_PRP))
 			return false;
 
-		hsrSupTag = &hsrV1Hdr->hsr_sup;
+		hsr_sup_tag = &hsr_V1_hdr->hsr_sup;
 	} else {
-		hsrSupTag = &((struct hsrv0_ethhdr_sp *) skb_mac_header(skb))->hsr_sup;
+		hsr_sup_tag =
+		     &((struct hsrv0_ethhdr_sp *)skb_mac_header(skb))->hsr_sup;
 	}
 
-	if ((hsrSupTag->HSR_TLV_Type != HSR_TLV_ANNOUNCE) &&
-	    (hsrSupTag->HSR_TLV_Type != HSR_TLV_LIFE_CHECK))
+	if (hsr_sup_tag->HSR_TLV_type != HSR_TLV_ANNOUNCE &&
+	    hsr_sup_tag->HSR_TLV_type != HSR_TLV_LIFE_CHECK)
 		return false;
-	if ((hsrSupTag->HSR_TLV_Length != 12) &&
-			(hsrSupTag->HSR_TLV_Length !=
-					sizeof(struct hsr_sup_payload)))
+	if (hsr_sup_tag->HSR_TLV_length != 12 &&
+	    hsr_sup_tag->HSR_TLV_length != sizeof(struct hsr_sup_payload))
 		return false;
 
 	return true;
 }
 
-
 static struct sk_buff *create_stripped_skb(struct sk_buff *skb_in,
 					   struct hsr_frame_info *frame)
 {
@@ -100,7 +93,7 @@
 	skb_pull(skb_in, HSR_HLEN);
 	skb = __pskb_copy(skb_in, skb_headroom(skb_in) - HSR_HLEN, GFP_ATOMIC);
 	skb_push(skb_in, HSR_HLEN);
-	if (skb == NULL)
+	if (!skb)
 		return NULL;
 
 	skb_reset_mac_header(skb);
@@ -108,7 +101,7 @@
 	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		skb->csum_start -= HSR_HLEN;
 
-	copylen = 2*ETH_ALEN;
+	copylen = 2 * ETH_ALEN;
 	if (frame->is_vlan)
 		copylen += VLAN_HLEN;
 	src = skb_mac_header(skb_in);
@@ -127,9 +120,8 @@
 	return skb_clone(frame->skb_std, GFP_ATOMIC);
 }
 
-
 static void hsr_fill_tag(struct sk_buff *skb, struct hsr_frame_info *frame,
-			 struct hsr_port *port, u8 protoVersion)
+			 struct hsr_port *port, u8 proto_version)
 {
 	struct hsr_ethhdr *hsr_ethhdr;
 	int lane_id;
@@ -144,13 +136,13 @@
 	if (frame->is_vlan)
 		lsdu_size -= 4;
 
-	hsr_ethhdr = (struct hsr_ethhdr *) skb_mac_header(skb);
+	hsr_ethhdr = (struct hsr_ethhdr *)skb_mac_header(skb);
 
 	set_hsr_tag_path(&hsr_ethhdr->hsr_tag, lane_id);
 	set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, lsdu_size);
 	hsr_ethhdr->hsr_tag.sequence_nr = htons(frame->sequence_nr);
 	hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto;
-	hsr_ethhdr->ethhdr.h_proto = htons(protoVersion ?
+	hsr_ethhdr->ethhdr.h_proto = htons(proto_version ?
 			ETH_P_HSR : ETH_P_PRP);
 }
 
@@ -164,7 +156,7 @@
 
 	/* Create the new skb with enough headroom to fit the HSR tag */
 	skb = __pskb_copy(skb_o, skb_headroom(skb_o) + HSR_HLEN, GFP_ATOMIC);
-	if (skb == NULL)
+	if (!skb)
 		return NULL;
 	skb_reset_mac_header(skb);
 
@@ -180,7 +172,7 @@
 	memmove(dst, src, movelen);
 	skb_reset_mac_header(skb);
 
-	hsr_fill_tag(skb, frame, port, port->hsr->protVersion);
+	hsr_fill_tag(skb, frame, port, port->hsr->prot_version);
 
 	return skb;
 }
@@ -194,7 +186,7 @@
 	if (frame->skb_hsr)
 		return skb_clone(frame->skb_hsr, GFP_ATOMIC);
 
-	if ((port->type != HSR_PT_SLAVE_A) && (port->type != HSR_PT_SLAVE_B)) {
+	if (port->type != HSR_PT_SLAVE_A && port->type != HSR_PT_SLAVE_B) {
 		WARN_ONCE(1, "HSR: Bug: trying to create a tagged frame for a non-ring port");
 		return NULL;
 	}
@@ -202,7 +194,6 @@
 	return create_tagged_skb(frame->skb_std, frame, port);
 }
 
-
 static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev,
 			       struct hsr_node *node_src)
 {
@@ -237,7 +228,6 @@
 	return dev_queue_xmit(skb);
 }
 
-
 /* Forward the frame through all devices except:
  * - Back through the receiving device
  * - If it's a HSR frame: through a device where it has passed before
@@ -260,11 +250,11 @@
 			continue;
 
 		/* Don't deliver locally unless we should */
-		if ((port->type == HSR_PT_MASTER) && !frame->is_local_dest)
+		if (port->type == HSR_PT_MASTER && !frame->is_local_dest)
 			continue;
 
 		/* Deliver frames directly addressed to us to master only */
-		if ((port->type != HSR_PT_MASTER) && frame->is_local_exclusive)
+		if (port->type != HSR_PT_MASTER && frame->is_local_exclusive)
 			continue;
 
 		/* Don't send frame over port where it has been sent before */
@@ -272,7 +262,7 @@
 					   frame->sequence_nr))
 			continue;
 
-		if (frame->is_supervision && (port->type == HSR_PT_MASTER)) {
+		if (frame->is_supervision && port->type == HSR_PT_MASTER) {
 			hsr_handle_sup_frame(frame->skb_hsr,
 					     frame->node_src,
 					     frame->port_rcv);
@@ -283,7 +273,7 @@
 			skb = frame_get_tagged_skb(frame, port);
 		else
 			skb = frame_get_stripped_skb(frame, port);
-		if (skb == NULL) {
+		if (!skb) {
 			/* FIXME: Record the dropped frame? */
 			continue;
 		}
@@ -296,7 +286,6 @@
 	}
 }
 
-
 static void check_local_dest(struct hsr_priv *hsr, struct sk_buff *skb,
 			     struct hsr_frame_info *frame)
 {
@@ -307,16 +296,15 @@
 		frame->is_local_exclusive = false;
 	}
 
-	if ((skb->pkt_type == PACKET_HOST) ||
-	    (skb->pkt_type == PACKET_MULTICAST) ||
-	    (skb->pkt_type == PACKET_BROADCAST)) {
+	if (skb->pkt_type == PACKET_HOST ||
+	    skb->pkt_type == PACKET_MULTICAST ||
+	    skb->pkt_type == PACKET_BROADCAST) {
 		frame->is_local_dest = true;
 	} else {
 		frame->is_local_dest = false;
 	}
 }
 
-
 static int hsr_fill_frame_info(struct hsr_frame_info *frame,
 			       struct sk_buff *skb, struct hsr_port *port)
 {
@@ -325,18 +313,18 @@
 
 	frame->is_supervision = is_supervision_frame(port->hsr, skb);
 	frame->node_src = hsr_get_node(port, skb, frame->is_supervision);
-	if (frame->node_src == NULL)
+	if (!frame->node_src)
 		return -1; /* Unknown node and !is_supervision, or no mem */
 
-	ethhdr = (struct ethhdr *) skb_mac_header(skb);
+	ethhdr = (struct ethhdr *)skb_mac_header(skb);
 	frame->is_vlan = false;
 	if (ethhdr->h_proto == htons(ETH_P_8021Q)) {
 		frame->is_vlan = true;
 		/* FIXME: */
 		WARN_ONCE(1, "HSR: VLAN not yet supported");
 	}
-	if (ethhdr->h_proto == htons(ETH_P_PRP)
-			|| ethhdr->h_proto == htons(ETH_P_HSR)) {
+	if (ethhdr->h_proto == htons(ETH_P_PRP) ||
+	    ethhdr->h_proto == htons(ETH_P_HSR)) {
 		frame->skb_std = NULL;
 		frame->skb_hsr = skb;
 		frame->sequence_nr = hsr_get_skb_sequence_nr(skb);
@@ -371,10 +359,17 @@
 		goto out_drop;
 	hsr_register_frame_in(frame.node_src, port, frame.sequence_nr);
 	hsr_forward_do(&frame);
+	/* Gets called for ingress frames as well as egress from master port.
+	 * So check and increment stats for master port only here.
+	 */
+	if (port->type == HSR_PT_MASTER) {
+		port->dev->stats.tx_packets++;
+		port->dev->stats.tx_bytes += skb->len;
+	}
 
-	if (frame.skb_hsr != NULL)
+	if (frame.skb_hsr)
 		kfree_skb(frame.skb_hsr);
-	if (frame.skb_std != NULL)
+	if (frame.skb_std)
 		kfree_skb(frame.skb_std);
 	return;
 

diff --git a/net/hsr/hsr_forward.h b/net/hsr/hsr_forward.h
index 5c5bc4b..51a6929 100644
--- a/net/hsr/hsr_forward.h
+++ b/net/hsr/hsr_forward.h

@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Copyright 2011-2014 Autronica Fire and Security AS
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
  * Author(s):
  *	2011-2014 Arvid Brodin, arvid.brodin@alten.se
  */

diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 286ceb4..292be44 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Copyright 2011-2014 Autronica Fire and Security AS
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
  * Author(s):
  *	2011-2014 Arvid Brodin, arvid.brodin@alten.se
  *
@@ -22,23 +18,8 @@
 #include "hsr_framereg.h"
 #include "hsr_netlink.h"
 
-
-struct hsr_node {
-	struct list_head	mac_list;
-	unsigned char		MacAddressA[ETH_ALEN];
-	unsigned char		MacAddressB[ETH_ALEN];
-	/* Local slave through which AddrB frames are received from this node */
-	enum hsr_port_type	AddrB_port;
-	unsigned long		time_in[HSR_PT_PORTS];
-	bool			time_in_stale[HSR_PT_PORTS];
-	u16			seq_out[HSR_PT_PORTS];
-	struct rcu_head		rcu_head;
-};
-
-
 /*	TODO: use hash lists for mac addresses (linux/jhash.h)?    */
 
-
 /* seq_nr_after(a, b) - return true if a is after (higher in sequence than) b,
  * false otherwise.
  */
@@ -47,16 +28,16 @@
 	/* Remove inconsistency where
 	 * seq_nr_after(a, b) == seq_nr_before(a, b)
 	 */
-	if ((int) b - a == 32768)
+	if ((int)b - a == 32768)
 		return false;
 
-	return (((s16) (b - a)) < 0);
+	return (((s16)(b - a)) < 0);
 }
+
 #define seq_nr_before(a, b)		seq_nr_after((b), (a))
 #define seq_nr_after_or_eq(a, b)	(!seq_nr_before((a), (b)))
 #define seq_nr_before_or_eq(a, b)	(!seq_nr_after((a), (b)))
 
-
 bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr)
 {
 	struct hsr_node *node;
@@ -68,9 +49,9 @@
 		return false;
 	}
 
-	if (ether_addr_equal(addr, node->MacAddressA))
+	if (ether_addr_equal(addr, node->macaddress_A))
 		return true;
-	if (ether_addr_equal(addr, node->MacAddressB))
+	if (ether_addr_equal(addr, node->macaddress_B))
 		return true;
 
 	return false;
@@ -78,20 +59,19 @@
 
 /* Search for mac entry. Caller must hold rcu read lock.
  */
-static struct hsr_node *find_node_by_AddrA(struct list_head *node_db,
-					   const unsigned char addr[ETH_ALEN])
+static struct hsr_node *find_node_by_addr_A(struct list_head *node_db,
+					    const unsigned char addr[ETH_ALEN])
 {
 	struct hsr_node *node;
 
 	list_for_each_entry_rcu(node, node_db, mac_list) {
-		if (ether_addr_equal(node->MacAddressA, addr))
+		if (ether_addr_equal(node->macaddress_A, addr))
 			return node;
 	}
 
 	return NULL;
 }
 
-
 /* Helper for device init; the self_node_db is used in hsr_rcv() to recognize
  * frames from self that's been looped over the HSR ring.
  */
@@ -105,12 +85,12 @@
 	if (!node)
 		return -ENOMEM;
 
-	ether_addr_copy(node->MacAddressA, addr_a);
-	ether_addr_copy(node->MacAddressB, addr_b);
+	ether_addr_copy(node->macaddress_A, addr_a);
+	ether_addr_copy(node->macaddress_B, addr_b);
 
 	rcu_read_lock();
 	oldnode = list_first_or_null_rcu(self_node_db,
-						struct hsr_node, mac_list);
+					 struct hsr_node, mac_list);
 	if (oldnode) {
 		list_replace_rcu(&oldnode->mac_list, &node->mac_list);
 		rcu_read_unlock();
@@ -124,8 +104,29 @@
 	return 0;
 }
 
+void hsr_del_self_node(struct list_head *self_node_db)
+{
+	struct hsr_node *node;
 
-/* Allocate an hsr_node and add it to node_db. 'addr' is the node's AddressA;
+	rcu_read_lock();
+	node = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list);
+	rcu_read_unlock();
+	if (node) {
+		list_del_rcu(&node->mac_list);
+		kfree(node);
+	}
+}
+
+void hsr_del_nodes(struct list_head *node_db)
+{
+	struct hsr_node *node;
+	struct hsr_node *tmp;
+
+	list_for_each_entry_safe(node, tmp, node_db, mac_list)
+		kfree(node);
+}
+
+/* Allocate an hsr_node and add it to node_db. 'addr' is the node's address_A;
  * seq_out is used to initialize filtering of outgoing duplicate frames
  * originating from the newly added node.
  */
@@ -140,7 +141,7 @@
 	if (!node)
 		return NULL;
 
-	ether_addr_copy(node->MacAddressA, addr);
+	ether_addr_copy(node->macaddress_A, addr);
 
 	/* We are only interested in time diffs here, so use current jiffies
 	 * as initialization. (0 could trigger an spurious ring error warning).
@@ -169,19 +170,19 @@
 	if (!skb_mac_header_was_set(skb))
 		return NULL;
 
-	ethhdr = (struct ethhdr *) skb_mac_header(skb);
+	ethhdr = (struct ethhdr *)skb_mac_header(skb);
 
 	list_for_each_entry_rcu(node, node_db, mac_list) {
-		if (ether_addr_equal(node->MacAddressA, ethhdr->h_source))
+		if (ether_addr_equal(node->macaddress_A, ethhdr->h_source))
 			return node;
-		if (ether_addr_equal(node->MacAddressB, ethhdr->h_source))
+		if (ether_addr_equal(node->macaddress_B, ethhdr->h_source))
 			return node;
 	}
 
 	/* Everyone may create a node entry, connected node to a HSR device. */
 
-	if (ethhdr->h_proto == htons(ETH_P_PRP)
-			|| ethhdr->h_proto == htons(ETH_P_HSR)) {
+	if (ethhdr->h_proto == htons(ETH_P_PRP) ||
+	    ethhdr->h_proto == htons(ETH_P_HSR)) {
 		/* Use the existing sequence_nr from the tag as starting point
 		 * for filtering duplicate frames.
 		 */
@@ -198,8 +199,8 @@
 	return hsr_add_node(node_db, ethhdr->h_source, seq_out);
 }
 
-/* Use the Supervision frame's info about an eventual MacAddressB for merging
- * nodes that has previously had their MacAddressB registered as a separate
+/* Use the Supervision frame's info about an eventual macaddress_B for merging
+ * nodes that has previously had their macaddress_B registered as a separate
  * node.
  */
 void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
@@ -211,7 +212,7 @@
 	struct list_head *node_db;
 	int i;
 
-	ethhdr = (struct ethhdr *) skb_mac_header(skb);
+	ethhdr = (struct ethhdr *)skb_mac_header(skb);
 
 	/* Leave the ethernet header. */
 	skb_pull(skb, sizeof(struct ethhdr));
@@ -223,14 +224,14 @@
 	/* And leave the HSR sup tag. */
 	skb_pull(skb, sizeof(struct hsr_sup_tag));
 
-	hsr_sp = (struct hsr_sup_payload *) skb->data;
+	hsr_sp = (struct hsr_sup_payload *)skb->data;
 
-	/* Merge node_curr (registered on MacAddressB) into node_real */
+	/* Merge node_curr (registered on macaddress_B) into node_real */
 	node_db = &port_rcv->hsr->node_db;
-	node_real = find_node_by_AddrA(node_db, hsr_sp->MacAddressA);
+	node_real = find_node_by_addr_A(node_db, hsr_sp->macaddress_A);
 	if (!node_real)
 		/* No frame received from AddrA of this node yet */
-		node_real = hsr_add_node(node_db, hsr_sp->MacAddressA,
+		node_real = hsr_add_node(node_db, hsr_sp->macaddress_A,
 					 HSR_SEQNR_START - 1);
 	if (!node_real)
 		goto done; /* No mem */
@@ -238,17 +239,18 @@
 		/* Node has already been merged */
 		goto done;
 
-	ether_addr_copy(node_real->MacAddressB, ethhdr->h_source);
+	ether_addr_copy(node_real->macaddress_B, ethhdr->h_source);
 	for (i = 0; i < HSR_PT_PORTS; i++) {
 		if (!node_curr->time_in_stale[i] &&
 		    time_after(node_curr->time_in[i], node_real->time_in[i])) {
 			node_real->time_in[i] = node_curr->time_in[i];
-			node_real->time_in_stale[i] = node_curr->time_in_stale[i];
+			node_real->time_in_stale[i] =
+						node_curr->time_in_stale[i];
 		}
 		if (seq_nr_after(node_curr->seq_out[i], node_real->seq_out[i]))
 			node_real->seq_out[i] = node_curr->seq_out[i];
 	}
-	node_real->AddrB_port = port_rcv->type;
+	node_real->addr_B_port = port_rcv->type;
 
 	list_del_rcu(&node_curr->mac_list);
 	kfree_rcu(node_curr, rcu_head);
@@ -257,11 +259,10 @@
 	skb_push(skb, sizeof(struct hsrv1_ethhdr_sp));
 }
 
-
 /* 'skb' is a frame meant for this host, that is to be passed to upper layers.
  *
  * If the frame was sent by a node's B interface, replace the source
- * address with that node's "official" address (MacAddressA) so that upper
+ * address with that node's "official" address (macaddress_A) so that upper
  * layers recognize where it came from.
  */
 void hsr_addr_subst_source(struct hsr_node *node, struct sk_buff *skb)
@@ -271,7 +272,7 @@
 		return;
 	}
 
-	memcpy(&eth_hdr(skb)->h_source, node->MacAddressA, ETH_ALEN);
+	memcpy(&eth_hdr(skb)->h_source, node->macaddress_A, ETH_ALEN);
 }
 
 /* 'skb' is a frame meant for another host.
@@ -296,18 +297,18 @@
 	if (!is_unicast_ether_addr(eth_hdr(skb)->h_dest))
 		return;
 
-	node_dst = find_node_by_AddrA(&port->hsr->node_db, eth_hdr(skb)->h_dest);
+	node_dst = find_node_by_addr_A(&port->hsr->node_db,
+				       eth_hdr(skb)->h_dest);
 	if (!node_dst) {
 		WARN_ONCE(1, "%s: Unknown node\n", __func__);
 		return;
 	}
-	if (port->type != node_dst->AddrB_port)
+	if (port->type != node_dst->addr_B_port)
 		return;
 
-	ether_addr_copy(eth_hdr(skb)->h_dest, node_dst->MacAddressB);
+	ether_addr_copy(eth_hdr(skb)->h_dest, node_dst->macaddress_B);
 }
 
-
 void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port,
 			   u16 sequence_nr)
 {
@@ -340,7 +341,6 @@
 	return 0;
 }
 
-
 static struct hsr_port *get_late_port(struct hsr_priv *hsr,
 				      struct hsr_node *node)
 {
@@ -361,7 +361,6 @@
 	return NULL;
 }
 
-
 /* Remove stale sequence_nr records. Called by timer every
  * HSR_LIFE_CHECK_INTERVAL (two seconds or so).
  */
@@ -375,14 +374,22 @@
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(node, &hsr->node_db, mac_list) {
+		/* Don't prune own node. Neither time_in[HSR_PT_SLAVE_A]
+		 * nor time_in[HSR_PT_SLAVE_B], will ever be updated for
+		 * the master port. Thus the master node will be repeatedly
+		 * pruned leading to packet loss.
+		 */
+		if (hsr_addr_is_self(hsr, node->macaddress_A))
+			continue;
+
 		/* Shorthand */
 		time_a = node->time_in[HSR_PT_SLAVE_A];
 		time_b = node->time_in[HSR_PT_SLAVE_B];
 
 		/* Check for timestamps old enough to risk wrap-around */
-		if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET/2))
+		if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET / 2))
 			node->time_in_stale[HSR_PT_SLAVE_A] = true;
-		if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET/2))
+		if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET / 2))
 			node->time_in_stale[HSR_PT_SLAVE_B] = true;
 
 		/* Get age of newest frame from node.
@@ -397,26 +404,29 @@
 
 		/* Warn of ring error only as long as we get frames at all */
 		if (time_is_after_jiffies(timestamp +
-					msecs_to_jiffies(1.5*MAX_SLAVE_DIFF))) {
+				msecs_to_jiffies(1.5 * MAX_SLAVE_DIFF))) {
 			rcu_read_lock();
 			port = get_late_port(hsr, node);
-			if (port != NULL)
-				hsr_nl_ringerror(hsr, node->MacAddressA, port);
+			if (port)
+				hsr_nl_ringerror(hsr, node->macaddress_A, port);
 			rcu_read_unlock();
 		}
 
 		/* Prune old entries */
 		if (time_is_before_jiffies(timestamp +
-					msecs_to_jiffies(HSR_NODE_FORGET_TIME))) {
-			hsr_nl_nodedown(hsr, node->MacAddressA);
+				msecs_to_jiffies(HSR_NODE_FORGET_TIME))) {
+			hsr_nl_nodedown(hsr, node->macaddress_A);
 			list_del_rcu(&node->mac_list);
 			/* Note that we need to free this entry later: */
 			kfree_rcu(node, rcu_head);
 		}
 	}
 	rcu_read_unlock();
-}
 
+	/* Restart timer */
+	mod_timer(&hsr->prune_timer,
+		  jiffies + msecs_to_jiffies(PRUNE_PERIOD));
+}
 
 void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos,
 			unsigned char addr[ETH_ALEN])
@@ -427,20 +437,19 @@
 		node = list_first_or_null_rcu(&hsr->node_db,
 					      struct hsr_node, mac_list);
 		if (node)
-			ether_addr_copy(addr, node->MacAddressA);
+			ether_addr_copy(addr, node->macaddress_A);
 		return node;
 	}
 
 	node = _pos;
 	list_for_each_entry_continue_rcu(node, &hsr->node_db, mac_list) {
-		ether_addr_copy(addr, node->MacAddressA);
+		ether_addr_copy(addr, node->macaddress_A);
 		return node;
 	}
 
 	return NULL;
 }
 
-
 int hsr_get_node_data(struct hsr_priv *hsr,
 		      const unsigned char *addr,
 		      unsigned char addr_b[ETH_ALEN],
@@ -454,15 +463,14 @@
 	struct hsr_port *port;
 	unsigned long tdiff;
 
-
 	rcu_read_lock();
-	node = find_node_by_AddrA(&hsr->node_db, addr);
+	node = find_node_by_addr_A(&hsr->node_db, addr);
 	if (!node) {
 		rcu_read_unlock();
 		return -ENOENT;	/* No such entry */
 	}
 
-	ether_addr_copy(addr_b, node->MacAddressB);
+	ether_addr_copy(addr_b, node->macaddress_B);
 
 	tdiff = jiffies - node->time_in[HSR_PT_SLAVE_A];
 	if (node->time_in_stale[HSR_PT_SLAVE_A])
@@ -488,8 +496,8 @@
 	*if1_seq = node->seq_out[HSR_PT_SLAVE_B];
 	*if2_seq = node->seq_out[HSR_PT_SLAVE_A];
 
-	if (node->AddrB_port != HSR_PT_NONE) {
-		port = hsr_port_get_hsr(hsr, node->AddrB_port);
+	if (node->addr_B_port != HSR_PT_NONE) {
+		port = hsr_port_get_hsr(hsr, node->addr_B_port);
 		*addr_b_ifindex = port->dev->ifindex;
 	} else {
 		*addr_b_ifindex = -1;

diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index 370b459..89a3ce3 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h

@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Copyright 2011-2014 Autronica Fire and Security AS
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
  * Author(s):
  *	2011-2014 Arvid Brodin, arvid.brodin@alten.se
  */
@@ -16,6 +12,8 @@
 
 struct hsr_node;
 
+void hsr_del_self_node(struct list_head *self_node_db);
+void hsr_del_nodes(struct list_head *node_db);
 struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[],
 			      u16 seq_out);
 struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
@@ -51,4 +49,16 @@
 		      int *if2_age,
 		      u16 *if2_seq);
 
+struct hsr_node {
+	struct list_head	mac_list;
+	unsigned char		macaddress_A[ETH_ALEN];
+	unsigned char		macaddress_B[ETH_ALEN];
+	/* Local slave through which AddrB frames are received from this node */
+	enum hsr_port_type	addr_B_port;
+	unsigned long		time_in[HSR_PT_PORTS];
+	bool			time_in_stale[HSR_PT_PORTS];
+	u16			seq_out[HSR_PT_PORTS];
+	struct rcu_head		rcu_head;
+};
+
 #endif /* __HSR_FRAMEREG_H */

diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index cd37d00..b9988a6 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Copyright 2011-2014 Autronica Fire and Security AS
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
  * Author(s):
  *	2011-2014 Arvid Brodin, arvid.brodin@alten.se
  */
@@ -19,7 +15,6 @@
 #include "hsr_framereg.h"
 #include "hsr_slave.h"
 
-
 static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
 			     void *ptr)
 {
@@ -31,12 +26,12 @@
 
 	dev = netdev_notifier_info_to_dev(ptr);
 	port = hsr_port_get_rtnl(dev);
-	if (port == NULL) {
+	if (!port) {
 		if (!is_hsr_master(dev))
 			return NOTIFY_DONE;	/* Not an HSR device */
 		hsr = netdev_priv(dev);
 		port = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
-		if (port == NULL) {
+		if (!port) {
 			/* Resend of notification concerning removed device? */
 			return NOTIFY_DONE;
 		}
@@ -63,7 +58,8 @@
 
 		if (port->type == HSR_PT_SLAVE_A) {
 			ether_addr_copy(master->dev->dev_addr, dev->dev_addr);
-			call_netdevice_notifiers(NETDEV_CHANGEADDR, master->dev);
+			call_netdevice_notifiers(NETDEV_CHANGEADDR,
+						 master->dev);
 		}
 
 		/* Make sure we recognize frames from ourselves in hsr_rcv() */
@@ -97,7 +93,6 @@
 	return NOTIFY_DONE;
 }
 
-
 struct hsr_port *hsr_port_get_hsr(struct hsr_priv *hsr, enum hsr_port_type pt)
 {
 	struct hsr_port *port;
@@ -112,7 +107,6 @@
 	.notifier_call = hsr_netdev_notify,	/* Slave event notifications */
 };
 
-
 static int __init hsr_init(void)
 {
 	int res;

diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 9b9909e..96fac69 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h

@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Copyright 2011-2014 Autronica Fire and Security AS
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
  * Author(s):
  *	2011-2014 Arvid Brodin, arvid.brodin@alten.se
  */
@@ -15,7 +11,6 @@
 #include <linux/netdevice.h>
 #include <linux/list.h>
 
-
 /* Time constants as specified in the HSR specification (IEC-62439-3 2010)
  * Table 8.
  * All values in milliseconds.
@@ -24,7 +19,6 @@
 #define HSR_NODE_FORGET_TIME		60000 /* ms */
 #define HSR_ANNOUNCE_INTERVAL		  100 /* ms */
 
-
 /* By how much may slave1 and slave2 timestamps of latest received frame from
  * each node differ before we notify of communication problem?
  */
@@ -32,17 +26,14 @@
 #define HSR_SEQNR_START			(USHRT_MAX - 1024)
 #define HSR_SUP_SEQNR_START		(HSR_SEQNR_START / 2)
 
-
 /* How often shall we check for broken ring and remove node entries older than
  * HSR_NODE_FORGET_TIME?
  */
 #define PRUNE_PERIOD			 3000 /* ms */
 
-
 #define HSR_TLV_ANNOUNCE		   22
 #define HSR_TLV_LIFE_CHECK		   23
 
-
 /* HSR Tag.
  * As defined in IEC-62439-3:2010, the HSR tag is really { ethertype = 0x88FB,
  * path, LSDU_size, sequence Nr }. But we let eth_header() create { h_dest,
@@ -83,15 +74,14 @@
 
 static inline void set_hsr_tag_path(struct hsr_tag *ht, u16 path)
 {
-	ht->path_and_LSDU_size = htons(
-			(ntohs(ht->path_and_LSDU_size) & 0x0FFF) | (path << 12));
+	ht->path_and_LSDU_size =
+		htons((ntohs(ht->path_and_LSDU_size) & 0x0FFF) | (path << 12));
 }
 
 static inline void set_hsr_tag_LSDU_size(struct hsr_tag *ht, u16 LSDU_size)
 {
-	ht->path_and_LSDU_size = htons(
-			(ntohs(ht->path_and_LSDU_size) & 0xF000) |
-			(LSDU_size & 0x0FFF));
+	ht->path_and_LSDU_size = htons((ntohs(ht->path_and_LSDU_size) &
+				       0xF000) | (LSDU_size & 0x0FFF));
 }
 
 struct hsr_ethhdr {
@@ -99,39 +89,38 @@
 	struct hsr_tag	hsr_tag;
 } __packed;
 
-
 /* HSR Supervision Frame data types.
  * Field names as defined in the IEC:2010 standard for HSR.
  */
 struct hsr_sup_tag {
-	__be16		path_and_HSR_Ver;
+	__be16		path_and_HSR_ver;
 	__be16		sequence_nr;
-	__u8		HSR_TLV_Type;
-	__u8		HSR_TLV_Length;
+	__u8		HSR_TLV_type;
+	__u8		HSR_TLV_length;
 } __packed;
 
 struct hsr_sup_payload {
-	unsigned char	MacAddressA[ETH_ALEN];
+	unsigned char	macaddress_A[ETH_ALEN];
 } __packed;
 
 static inline u16 get_hsr_stag_path(struct hsr_sup_tag *hst)
 {
-	return get_hsr_tag_path((struct hsr_tag *) hst);
+	return get_hsr_tag_path((struct hsr_tag *)hst);
 }
 
 static inline u16 get_hsr_stag_HSR_ver(struct hsr_sup_tag *hst)
 {
-	return get_hsr_tag_LSDU_size((struct hsr_tag *) hst);
+	return get_hsr_tag_LSDU_size((struct hsr_tag *)hst);
 }
 
 static inline void set_hsr_stag_path(struct hsr_sup_tag *hst, u16 path)
 {
-	set_hsr_tag_path((struct hsr_tag *) hst, path);
+	set_hsr_tag_path((struct hsr_tag *)hst, path);
 }
 
-static inline void set_hsr_stag_HSR_Ver(struct hsr_sup_tag *hst, u16 HSR_Ver)
+static inline void set_hsr_stag_HSR_ver(struct hsr_sup_tag *hst, u16 HSR_ver)
 {
-	set_hsr_tag_LSDU_size((struct hsr_tag *) hst, HSR_Ver);
+	set_hsr_tag_LSDU_size((struct hsr_tag *)hst, HSR_ver);
 }
 
 struct hsrv0_ethhdr_sp {
@@ -145,7 +134,6 @@
 	struct hsr_sup_tag	hsr_sup;
 } __packed;
 
-
 enum hsr_port_type {
 	HSR_PT_NONE = 0,	/* Must be 0, used by framereg */
 	HSR_PT_SLAVE_A,
@@ -171,10 +159,14 @@
 	struct timer_list	prune_timer;
 	int announce_count;
 	u16 sequence_nr;
-	u16 sup_sequence_nr;			/* For HSRv1 separate seq_nr for supervision */
-	u8 protVersion;					/* Indicate if HSRv0 or HSRv1. */
+	u16 sup_sequence_nr;	/* For HSRv1 separate seq_nr for supervision */
+	u8 prot_version;		/* Indicate if HSRv0 or HSRv1. */
 	spinlock_t seqnr_lock;			/* locking for sequence_nr */
 	unsigned char		sup_multicast_addr[ETH_ALEN];
+#ifdef	CONFIG_DEBUG_FS
+	struct dentry *node_tbl_root;
+	struct dentry *node_tbl_file;
+#endif
 };
 
 #define hsr_for_each_port(hsr, port) \
@@ -187,8 +179,22 @@
 {
 	struct hsr_ethhdr *hsr_ethhdr;
 
-	hsr_ethhdr = (struct hsr_ethhdr *) skb_mac_header(skb);
+	hsr_ethhdr = (struct hsr_ethhdr *)skb_mac_header(skb);
 	return ntohs(hsr_ethhdr->hsr_tag.sequence_nr);
 }
 
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev);
+void hsr_debugfs_term(struct hsr_priv *priv);
+#else
+static inline int hsr_debugfs_init(struct hsr_priv *priv,
+				   struct net_device *hsr_dev)
+{
+	return 0;
+}
+
+static inline void hsr_debugfs_term(struct hsr_priv *priv)
+{}
+#endif
+
 #endif /*  __HSR_PRIVATE_H */

diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index b9cce0f..8f8337f 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Copyright 2011-2014 Autronica Fire and Security AS
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
  * Author(s):
  *	2011-2014 Arvid Brodin, arvid.brodin@alten.se
  *
@@ -28,7 +24,6 @@
 	[IFLA_HSR_SEQ_NR]		= { .type = NLA_U16 },
 };
 
-
 /* Here, it seems a netdevice has already been allocated for us, and the
  * hsr_dev_setup routine has been executed. Nice!
  */
@@ -47,12 +42,14 @@
 		netdev_info(dev, "HSR: Slave1 device not specified\n");
 		return -EINVAL;
 	}
-	link[0] = __dev_get_by_index(src_net, nla_get_u32(data[IFLA_HSR_SLAVE1]));
+	link[0] = __dev_get_by_index(src_net,
+				     nla_get_u32(data[IFLA_HSR_SLAVE1]));
 	if (!data[IFLA_HSR_SLAVE2]) {
 		netdev_info(dev, "HSR: Slave2 device not specified\n");
 		return -EINVAL;
 	}
-	link[1] = __dev_get_by_index(src_net, nla_get_u32(data[IFLA_HSR_SLAVE2]));
+	link[1] = __dev_get_by_index(src_net,
+				     nla_get_u32(data[IFLA_HSR_SLAVE2]));
 
 	if (!link[0] || !link[1])
 		return -ENODEV;
@@ -119,8 +116,6 @@
 	.fill_info	= hsr_fill_info,
 };
 
-
-
 /* attribute policy */
 static const struct nla_policy hsr_genl_policy[HSR_A_MAX + 1] = {
 	[HSR_A_NODE_ADDR] = { .len = ETH_ALEN },
@@ -138,8 +133,6 @@
 	{ .name = "hsr-network", },
 };
 
-
-
 /* This is called if for some node with MAC address addr, we only get frames
  * over one of the slave interfaces. This would indicate an open network ring
  * (i.e. a link has failed somewhere).
@@ -156,7 +149,8 @@
 	if (!skb)
 		goto fail;
 
-	msg_head = genlmsg_put(skb, 0, 0, &hsr_genl_family, 0, HSR_C_RING_ERROR);
+	msg_head = genlmsg_put(skb, 0, 0, &hsr_genl_family, 0,
+			       HSR_C_RING_ERROR);
 	if (!msg_head)
 		goto nla_put_failure;
 
@@ -201,7 +195,6 @@
 	if (!msg_head)
 		goto nla_put_failure;
 
-
 	res = nla_put(skb, HSR_A_NODE_ADDR, ETH_ALEN, addr);
 	if (res < 0)
 		goto nla_put_failure;
@@ -221,7 +214,6 @@
 	rcu_read_unlock();
 }
 
-
 /* HSR_C_GET_NODE_STATUS lets userspace query the internal HSR node table
  * about the status of a specific node in the network, defined by its MAC
  * address.
@@ -260,15 +252,13 @@
 		goto invalid;
 
 	hsr_dev = __dev_get_by_index(genl_info_net(info),
-					nla_get_u32(info->attrs[HSR_A_IFINDEX]));
+				     nla_get_u32(info->attrs[HSR_A_IFINDEX]));
 	if (!hsr_dev)
 		goto invalid;
 	if (!is_hsr_master(hsr_dev))
 		goto invalid;
 
-
 	/* Send reply */
-
 	skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb_out) {
 		res = -ENOMEM;
@@ -276,8 +266,8 @@
 	}
 
 	msg_head = genlmsg_put(skb_out, NETLINK_CB(skb_in).portid,
-				info->snd_seq, &hsr_genl_family, 0,
-				HSR_C_SET_NODE_STATUS);
+			       info->snd_seq, &hsr_genl_family, 0,
+			       HSR_C_SET_NODE_STATUS);
 	if (!msg_head) {
 		res = -ENOMEM;
 		goto nla_put_failure;
@@ -289,28 +279,30 @@
 
 	hsr = netdev_priv(hsr_dev);
 	res = hsr_get_node_data(hsr,
-			(unsigned char *) nla_data(info->attrs[HSR_A_NODE_ADDR]),
-			hsr_node_addr_b,
-			&addr_b_ifindex,
-			&hsr_node_if1_age,
-			&hsr_node_if1_seq,
-			&hsr_node_if2_age,
-			&hsr_node_if2_seq);
+				(unsigned char *)
+				nla_data(info->attrs[HSR_A_NODE_ADDR]),
+					 hsr_node_addr_b,
+					 &addr_b_ifindex,
+					 &hsr_node_if1_age,
+					 &hsr_node_if1_seq,
+					 &hsr_node_if2_age,
+					 &hsr_node_if2_seq);
 	if (res < 0)
 		goto nla_put_failure;
 
 	res = nla_put(skb_out, HSR_A_NODE_ADDR, ETH_ALEN,
-					nla_data(info->attrs[HSR_A_NODE_ADDR]));
+		      nla_data(info->attrs[HSR_A_NODE_ADDR]));
 	if (res < 0)
 		goto nla_put_failure;
 
 	if (addr_b_ifindex > -1) {
 		res = nla_put(skb_out, HSR_A_NODE_ADDR_B, ETH_ALEN,
-								hsr_node_addr_b);
+			      hsr_node_addr_b);
 		if (res < 0)
 			goto nla_put_failure;
 
-		res = nla_put_u32(skb_out, HSR_A_ADDR_B_IFINDEX, addr_b_ifindex);
+		res = nla_put_u32(skb_out, HSR_A_ADDR_B_IFINDEX,
+				  addr_b_ifindex);
 		if (res < 0)
 			goto nla_put_failure;
 	}
@@ -392,9 +384,7 @@
 	if (!is_hsr_master(hsr_dev))
 		goto invalid;
 
-
 	/* Send reply */
-
 	skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb_out) {
 		res = -ENOMEM;
@@ -402,8 +392,8 @@
 	}
 
 	msg_head = genlmsg_put(skb_out, NETLINK_CB(skb_in).portid,
-				info->snd_seq, &hsr_genl_family, 0,
-				HSR_C_SET_NODE_LIST);
+			       info->snd_seq, &hsr_genl_family, 0,
+			       HSR_C_SET_NODE_LIST);
 	if (!msg_head) {
 		res = -ENOMEM;
 		goto nla_put_failure;
@@ -444,19 +434,18 @@
 	return res;
 }
 
-
 static const struct genl_ops hsr_ops[] = {
 	{
 		.cmd = HSR_C_GET_NODE_STATUS,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = 0,
-		.policy = hsr_genl_policy,
 		.doit = hsr_get_node_status,
 		.dumpit = NULL,
 	},
 	{
 		.cmd = HSR_C_GET_NODE_LIST,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = 0,
-		.policy = hsr_genl_policy,
 		.doit = hsr_get_node_list,
 		.dumpit = NULL,
 	},
@@ -467,6 +456,7 @@
 	.name = "HSR",
 	.version = 1,
 	.maxattr = HSR_A_MAX,
+	.policy = hsr_genl_policy,
 	.module = THIS_MODULE,
 	.ops = hsr_ops,
 	.n_ops = ARRAY_SIZE(hsr_ops),

diff --git a/net/hsr/hsr_netlink.h b/net/hsr/hsr_netlink.h
index 3f6b95b..1121bb1 100644
--- a/net/hsr/hsr_netlink.h
+++ b/net/hsr/hsr_netlink.h

@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Copyright 2011-2014 Autronica Fire and Security AS
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
  * Author(s):
  *	2011-2014 Arvid Brodin, arvid.brodin@alten.se
  */

diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index 56080da..ee56129 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Copyright 2011-2014 Autronica Fire and Security AS
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
  * Author(s):
  *	2011-2014 Arvid Brodin, arvid.brodin@alten.se
  */
@@ -18,7 +14,6 @@
 #include "hsr_forward.h"
 #include "hsr_framereg.h"
 
-
 static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb)
 {
 	struct sk_buff *skb = *pskb;
@@ -61,12 +56,11 @@
 	return rcu_access_pointer(dev->rx_handler) == hsr_handle_frame;
 }
 
-
 static int hsr_check_dev_ok(struct net_device *dev)
 {
 	/* Don't allow HSR on non-ethernet like devices */
-	if ((dev->flags & IFF_LOOPBACK) || (dev->type != ARPHRD_ETHER) ||
-	    (dev->addr_len != ETH_ALEN)) {
+	if ((dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
+	    dev->addr_len != ETH_ALEN) {
 		netdev_info(dev, "Cannot use loopback or non-ethernet device as HSR slave.\n");
 		return -EINVAL;
 	}
@@ -99,7 +93,6 @@
 	return 0;
 }
 
-
 /* Setup device to be added to the HSR bridge. */
 static int hsr_portdev_setup(struct net_device *dev, struct hsr_port *port)
 {
@@ -143,11 +136,11 @@
 	}
 
 	port = hsr_port_get_hsr(hsr, type);
-	if (port != NULL)
+	if (port)
 		return -EBUSY;	/* This port already exists */
 
 	port = kzalloc(sizeof(*port), GFP_KERNEL);
-	if (port == NULL)
+	if (!port)
 		return -ENOMEM;
 
 	if (type != HSR_PT_MASTER) {
@@ -184,7 +177,7 @@
 	list_del_rcu(&port->port_list);
 
 	if (port != master) {
-		if (master != NULL) {
+		if (master) {
 			netdev_update_features(master->dev);
 			dev_set_mtu(master->dev, hsr_get_max_mtu(hsr));
 		}
@@ -200,4 +193,5 @@
 
 	if (port != master)
 		dev_put(port->dev);
+	kfree(port);
 }

diff --git a/net/hsr/hsr_slave.h b/net/hsr/hsr_slave.h
index 3ccfbf7..64b5495 100644
--- a/net/hsr/hsr_slave.h
+++ b/net/hsr/hsr_slave.h

@@ -1,11 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Copyright 2011-2014 Autronica Fire and Security AS
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * Author(s):
  *	2011-2014 Arvid Brodin, arvid.brodin@alten.se
  */
 

diff --git a/net/ieee802154/6lowpan/Kconfig b/net/ieee802154/6lowpan/Kconfig
index d24f985..d1b4655 100644
--- a/net/ieee802154/6lowpan/Kconfig
+++ b/net/ieee802154/6lowpan/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config IEEE802154_6LOWPAN
 	tristate "6lowpan support over IEEE 802.15.4"
 	depends on 6LOWPAN

diff --git a/net/ieee802154/6lowpan/Makefile b/net/ieee802154/6lowpan/Makefile
index 6bfb270..f11d637 100644
--- a/net/ieee802154/6lowpan/Makefile
+++ b/net/ieee802154/6lowpan/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_IEEE802154_6LOWPAN) += ieee802154_6lowpan.o
 
 ieee802154_6lowpan-y := core.o rx.o reassembly.o tx.o

diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 3297e7f..c0b107c 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c

@@ -58,13 +58,6 @@
 	.create	= lowpan_header_create,
 };
 
-static int lowpan_dev_init(struct net_device *ldev)
-{
-	netdev_lockdep_set_classes(ldev);
-
-	return 0;
-}
-
 static int lowpan_open(struct net_device *dev)
 {
 	if (!open_count)
@@ -96,7 +89,6 @@
 }
 
 static const struct net_device_ops lowpan_netdev_ops = {
-	.ndo_init		= lowpan_dev_init,
 	.ndo_start_xmit		= lowpan_xmit,
 	.ndo_open		= lowpan_open,
 	.ndo_stop		= lowpan_stop,

diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index e7857a8..bbe9b3b 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c

@@ -1,15 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*	6LoWPAN fragment reassembly
  *
- *
  *	Authors:
  *	Alexander Aring		<aar@pengutronix.de>
  *
  *	Based on: net/ipv6/reassembly.c
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) "6LoWPAN: " fmt
@@ -27,6 +22,7 @@
 #include <net/6lowpan.h>
 #include <net/ipv6_frag.h>
 #include <net/inet_frag.h>
+#include <net/ip.h>
 
 #include "6lowpan_i.h"
 
@@ -34,8 +30,8 @@
 
 static struct inet_frags lowpan_frags;
 
-static int lowpan_frag_reasm(struct lowpan_frag_queue *fq,
-			     struct sk_buff *prev, struct net_device *ldev);
+static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *skb,
+			     struct sk_buff *prev,  struct net_device *ldev);
 
 static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
 {
@@ -78,7 +74,7 @@
 	key.src = *src;
 	key.dst = *dst;
 
-	q = inet_frag_find(&ieee802154_lowpan->frags, &key);
+	q = inet_frag_find(ieee802154_lowpan->fqdir, &key);
 	if (!q)
 		return NULL;
 
@@ -88,9 +84,15 @@
 static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
 			     struct sk_buff *skb, u8 frag_type)
 {
-	struct sk_buff *prev, *next;
+	struct sk_buff *prev_tail;
 	struct net_device *ldev;
-	int end, offset;
+	int end, offset, err;
+
+	/* inet_frag_queue_* functions use skb->cb; see struct ipfrag_skb_cb
+	 * in inet_fragment.c
+	 */
+	BUILD_BUG_ON(sizeof(struct lowpan_802154_cb) > sizeof(struct inet_skb_parm));
+	BUILD_BUG_ON(sizeof(struct lowpan_802154_cb) > sizeof(struct inet6_skb_parm));
 
 	if (fq->q.flags & INET_FRAG_COMPLETE)
 		goto err;
@@ -117,45 +119,22 @@
 		}
 	}
 
-	/* Find out which fragments are in front and at the back of us
-	 * in the chain of fragments so far.  We must know where to put
-	 * this fragment, right?
-	 */
-	prev = fq->q.fragments_tail;
-	if (!prev ||
-	    lowpan_802154_cb(prev)->d_offset <
-	    lowpan_802154_cb(skb)->d_offset) {
-		next = NULL;
-		goto found;
-	}
-	prev = NULL;
-	for (next = fq->q.fragments; next != NULL; next = next->next) {
-		if (lowpan_802154_cb(next)->d_offset >=
-		    lowpan_802154_cb(skb)->d_offset)
-			break;	/* bingo! */
-		prev = next;
-	}
-
-found:
-	/* Insert this fragment in the chain of fragments. */
-	skb->next = next;
-	if (!next)
-		fq->q.fragments_tail = skb;
-	if (prev)
-		prev->next = skb;
-	else
-		fq->q.fragments = skb;
-
 	ldev = skb->dev;
 	if (ldev)
 		skb->dev = NULL;
+	barrier();
+
+	prev_tail = fq->q.fragments_tail;
+	err = inet_frag_queue_insert(&fq->q, skb, offset, end);
+	if (err)
+		goto err;
 
 	fq->q.stamp = skb->tstamp;
 	if (frag_type == LOWPAN_DISPATCH_FRAG1)
 		fq->q.flags |= INET_FRAG_FIRST_IN;
 
 	fq->q.meat += skb->len;
-	add_frag_mem_limit(fq->q.net, skb->truesize);
+	add_frag_mem_limit(fq->q.fqdir, skb->truesize);
 
 	if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
 	    fq->q.meat == fq->q.len) {
@@ -163,10 +142,11 @@
 		unsigned long orefdst = skb->_skb_refdst;
 
 		skb->_skb_refdst = 0UL;
-		res = lowpan_frag_reasm(fq, prev, ldev);
+		res = lowpan_frag_reasm(fq, skb, prev_tail, ldev);
 		skb->_skb_refdst = orefdst;
 		return res;
 	}
+	skb_dst_drop(skb);
 
 	return -1;
 err:
@@ -175,97 +155,28 @@
 }
 
 /*	Check if this packet is complete.
- *	Returns NULL on failure by any reason, and pointer
- *	to current nexthdr field in reassembled frame.
  *
  *	It is called with locked fq, and caller must check that
  *	queue is eligible for reassembly i.e. it is not COMPLETE,
  *	the last and the first frames arrived and all the bits are here.
  */
-static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
-			     struct net_device *ldev)
+static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *skb,
+			     struct sk_buff *prev_tail, struct net_device *ldev)
 {
-	struct sk_buff *fp, *head = fq->q.fragments;
-	int sum_truesize;
+	void *reasm_data;
 
 	inet_frag_kill(&fq->q);
 
-	/* Make the one we just received the head. */
-	if (prev) {
-		head = prev->next;
-		fp = skb_clone(head, GFP_ATOMIC);
-
-		if (!fp)
-			goto out_oom;
-
-		fp->next = head->next;
-		if (!fp->next)
-			fq->q.fragments_tail = fp;
-		prev->next = fp;
-
-		skb_morph(head, fq->q.fragments);
-		head->next = fq->q.fragments->next;
-
-		consume_skb(fq->q.fragments);
-		fq->q.fragments = head;
-	}
-
-	/* Head of list must not be cloned. */
-	if (skb_unclone(head, GFP_ATOMIC))
+	reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
+	if (!reasm_data)
 		goto out_oom;
+	inet_frag_reasm_finish(&fq->q, skb, reasm_data, false);
 
-	/* If the first fragment is fragmented itself, we split
-	 * it to two chunks: the first with data and paged part
-	 * and the second, holding only fragments.
-	 */
-	if (skb_has_frag_list(head)) {
-		struct sk_buff *clone;
-		int i, plen = 0;
-
-		clone = alloc_skb(0, GFP_ATOMIC);
-		if (!clone)
-			goto out_oom;
-		clone->next = head->next;
-		head->next = clone;
-		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
-		skb_frag_list_init(head);
-		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
-			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
-		clone->len = head->data_len - plen;
-		clone->data_len = clone->len;
-		head->data_len -= clone->len;
-		head->len -= clone->len;
-		add_frag_mem_limit(fq->q.net, clone->truesize);
-	}
-
-	WARN_ON(head == NULL);
-
-	sum_truesize = head->truesize;
-	for (fp = head->next; fp;) {
-		bool headstolen;
-		int delta;
-		struct sk_buff *next = fp->next;
-
-		sum_truesize += fp->truesize;
-		if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
-			kfree_skb_partial(fp, headstolen);
-		} else {
-			if (!skb_shinfo(head)->frag_list)
-				skb_shinfo(head)->frag_list = fp;
-			head->data_len += fp->len;
-			head->len += fp->len;
-			head->truesize += fp->truesize;
-		}
-		fp = next;
-	}
-	sub_frag_mem_limit(fq->q.net, sum_truesize);
-
-	head->next = NULL;
-	head->dev = ldev;
-	head->tstamp = fq->q.stamp;
-
-	fq->q.fragments = NULL;
+	skb->dev = ldev;
+	skb->tstamp = fq->q.stamp;
+	fq->q.rb_fragments = RB_ROOT;
 	fq->q.fragments_tail = NULL;
+	fq->q.last_run_head = NULL;
 
 	return 1;
 out_oom:
@@ -410,23 +321,18 @@
 static struct ctl_table lowpan_frags_ns_ctl_table[] = {
 	{
 		.procname	= "6lowpanfrag_high_thresh",
-		.data		= &init_net.ieee802154_lowpan.frags.high_thresh,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
-		.extra1		= &init_net.ieee802154_lowpan.frags.low_thresh
 	},
 	{
 		.procname	= "6lowpanfrag_low_thresh",
-		.data		= &init_net.ieee802154_lowpan.frags.low_thresh,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
-		.extra2		= &init_net.ieee802154_lowpan.frags.high_thresh
 	},
 	{
 		.procname	= "6lowpanfrag_time",
-		.data		= &init_net.ieee802154_lowpan.frags.timeout,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
@@ -461,18 +367,17 @@
 		if (table == NULL)
 			goto err_alloc;
 
-		table[0].data = &ieee802154_lowpan->frags.high_thresh;
-		table[0].extra1 = &ieee802154_lowpan->frags.low_thresh;
-		table[0].extra2 = &init_net.ieee802154_lowpan.frags.high_thresh;
-		table[1].data = &ieee802154_lowpan->frags.low_thresh;
-		table[1].extra2 = &ieee802154_lowpan->frags.high_thresh;
-		table[2].data = &ieee802154_lowpan->frags.timeout;
-
 		/* Don't export sysctls to unprivileged users */
 		if (net->user_ns != &init_user_ns)
 			table[0].procname = NULL;
 	}
 
+	table[0].data	= &ieee802154_lowpan->fqdir->high_thresh;
+	table[0].extra1	= &ieee802154_lowpan->fqdir->low_thresh;
+	table[1].data	= &ieee802154_lowpan->fqdir->low_thresh;
+	table[1].extra2	= &ieee802154_lowpan->fqdir->high_thresh;
+	table[2].data	= &ieee802154_lowpan->fqdir->timeout;
+
 	hdr = register_net_sysctl(net, "net/ieee802154/6lowpan", table);
 	if (hdr == NULL)
 		goto err_reg;
@@ -539,32 +444,42 @@
 		net_ieee802154_lowpan(net);
 	int res;
 
-	ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
-	ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
-	ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
-	ieee802154_lowpan->frags.f = &lowpan_frags;
 
-	res = inet_frags_init_net(&ieee802154_lowpan->frags);
+	res = fqdir_init(&ieee802154_lowpan->fqdir, &lowpan_frags, net);
 	if (res < 0)
 		return res;
+
+	ieee802154_lowpan->fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
+	ieee802154_lowpan->fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
+	ieee802154_lowpan->fqdir->timeout = IPV6_FRAG_TIMEOUT;
+
 	res = lowpan_frags_ns_sysctl_register(net);
 	if (res < 0)
-		inet_frags_exit_net(&ieee802154_lowpan->frags);
+		fqdir_exit(ieee802154_lowpan->fqdir);
 	return res;
 }
 
+static void __net_exit lowpan_frags_pre_exit_net(struct net *net)
+{
+	struct netns_ieee802154_lowpan *ieee802154_lowpan =
+		net_ieee802154_lowpan(net);
+
+	fqdir_pre_exit(ieee802154_lowpan->fqdir);
+}
+
 static void __net_exit lowpan_frags_exit_net(struct net *net)
 {
 	struct netns_ieee802154_lowpan *ieee802154_lowpan =
 		net_ieee802154_lowpan(net);
 
 	lowpan_frags_ns_sysctl_unregister(net);
-	inet_frags_exit_net(&ieee802154_lowpan->frags);
+	fqdir_exit(ieee802154_lowpan->fqdir);
 }
 
 static struct pernet_operations lowpan_frags_ops = {
-	.init = lowpan_frags_init_net,
-	.exit = lowpan_frags_exit_net,
+	.init		= lowpan_frags_init_net,
+	.pre_exit	= lowpan_frags_pre_exit_net,
+	.exit		= lowpan_frags_exit_net,
 };
 
 static u32 lowpan_key_hashfn(const void *data, u32 len, u32 seed)
@@ -629,7 +544,7 @@
 
 void lowpan_net_frag_exit(void)
 {
-	inet_frags_fini(&lowpan_frags);
 	lowpan_frags_sysctl_unregister();
 	unregister_pernet_subsys(&lowpan_frags_ops);
+	inet_frags_fini(&lowpan_frags);
 }

diff --git a/net/ieee802154/6lowpan/rx.c b/net/ieee802154/6lowpan/rx.c
index 649e7d4..ee17938 100644
--- a/net/ieee802154/6lowpan/rx.c
+++ b/net/ieee802154/6lowpan/rx.c

@@ -1,12 +1,4 @@
-/* This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
+// SPDX-License-Identifier: GPL-2.0-only
 
 #include <linux/if_arp.h>
 

diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c
index ca53efa..0c07662 100644
--- a/net/ieee802154/6lowpan/tx.c
+++ b/net/ieee802154/6lowpan/tx.c

@@ -1,12 +1,4 @@
-/* This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
+// SPDX-License-Identifier: GPL-2.0-only
 
 #include <net/6lowpan.h>
 #include <net/ndisc.h>
@@ -48,6 +40,9 @@
 	const struct ipv6hdr *hdr = ipv6_hdr(skb);
 	struct neighbour *n;
 
+	if (!daddr)
+		return -EINVAL;
+
 	/* TODO:
 	 * if this package isn't ipv6 one, where should it be routed?
 	 */

diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig
index 188135b..5dbbc2c 100644
--- a/net/ieee802154/Kconfig
+++ b/net/ieee802154/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 menuconfig IEEE802154
 	tristate "IEEE Std 802.15.4 Low-Rate Wireless Personal Area Networks support"
 	---help---

diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c
index fe225d9..de259b5 100644
--- a/net/ieee802154/core.c
+++ b/net/ieee802154/core.c

@@ -1,15 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2007, 2008, 2009 Siemens AG
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  */
 
 #include <linux/slab.h>
@@ -32,11 +23,6 @@
 LIST_HEAD(cfg802154_rdev_list);
 int cfg802154_rdev_list_generation;
 
-static int wpan_phy_match(struct device *dev, const void *data)
-{
-	return !strcmp(dev_name(dev), (const char *)data);
-}
-
 struct wpan_phy *wpan_phy_find(const char *str)
 {
 	struct device *dev;
@@ -44,7 +30,7 @@
 	if (WARN_ON(!str))
 		return NULL;
 
-	dev = class_find_device(&wpan_phy_class, NULL, str, wpan_phy_match);
+	dev = class_find_device_by_name(&wpan_phy_class, str);
 	if (!dev)
 		return NULL;
 

diff --git a/net/ieee802154/header_ops.c b/net/ieee802154/header_ops.c
index c7439f0..af337cf 100644
--- a/net/ieee802154/header_ops.c
+++ b/net/ieee802154/header_ops.c

@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2014 Fraunhofer ITWM
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by:
  * Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
  */

diff --git a/net/ieee802154/ieee802154.h b/net/ieee802154/ieee802154.h
index a5d7515..c5d91f7 100644
--- a/net/ieee802154/ieee802154.h
+++ b/net/ieee802154/ieee802154.h

@@ -1,15 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2007, 2008, 2009 Siemens AG
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  */
 #ifndef IEEE_802154_LOCAL_H
 #define IEEE_802154_LOCAL_H
@@ -20,7 +11,6 @@
 #define IEEE802154_OP(_cmd, _func)			\
 	{						\
 		.cmd	= _cmd,				\
-		.policy	= ieee802154_policy,		\
 		.doit	= _func,			\
 		.dumpit	= NULL,				\
 		.flags	= GENL_ADMIN_PERM,		\
@@ -29,7 +19,6 @@
 #define IEEE802154_DUMP(_cmd, _func, _dump)		\
 	{						\
 		.cmd	= _cmd,				\
-		.policy	= ieee802154_policy,		\
 		.doit	= _func,			\
 		.dumpit	= _dump,			\
 	}

diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 96636e3..7fe3b6b 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c

@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Netlink interface for IEEE 802.15.4 stack
  *
  * Copyright 2007, 2008 Siemens AG
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by:
  * Sergey Lapin <slapin@ossfans.org>
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
@@ -136,6 +128,7 @@
 	.name		= IEEE802154_NL_NAME,
 	.version	= 1,
 	.maxattr	= IEEE802154_ATTR_MAX,
+	.policy		= ieee802154_policy,
 	.module		= THIS_MODULE,
 	.ops		= ieee802154_ops,
 	.n_ops		= ARRAY_SIZE(ieee802154_ops),

diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index d3cbb32..6d091e4 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c

@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Netlink interface for IEEE 802.15.4 stack
  *
  * Copyright 2007, 2008 Siemens AG
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by:
  * Sergey Lapin <slapin@ossfans.org>
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>

diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index b231e40..2cdc7e6 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c

@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Netlink interface for IEEE 802.15.4 stack
  *
  * Copyright 2007, 2008 Siemens AG
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by:
  * Sergey Lapin <slapin@ossfans.org>
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
@@ -242,7 +234,7 @@
 		 * dev_set_mac_address require RTNL_LOCK
 		 */
 		rtnl_lock();
-		rc = dev_set_mac_address(dev, &addr);
+		rc = dev_set_mac_address(dev, &addr, NULL);
 		rtnl_unlock();
 		if (rc)
 			goto dev_unregister;

diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 99f6c25..ffcfcef 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c

@@ -1,11 +1,5 @@
-/* This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+// SPDX-License-Identifier: GPL-2.0-only
+/*
  *
  * Authors:
  * Alexander Aring <aar@pengutronix.de>
@@ -247,9 +241,11 @@
 	rtnl_lock();
 
 	if (!cb->args[0]) {
-		err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize,
-				  genl_family_attrbuf(&nl802154_fam),
-				  nl802154_fam.maxattr, nl802154_policy, NULL);
+		err = nlmsg_parse_deprecated(cb->nlh,
+					     GENL_HDRLEN + nl802154_fam.hdrsize,
+					     genl_family_attrbuf(&nl802154_fam),
+					     nl802154_fam.maxattr,
+					     nl802154_policy, NULL);
 		if (err)
 			goto out_unlock;
 
@@ -312,7 +308,7 @@
 static int
 nl802154_put_flags(struct sk_buff *msg, int attr, u32 mask)
 {
-	struct nlattr *nl_flags = nla_nest_start(msg, attr);
+	struct nlattr *nl_flags = nla_nest_start_noflag(msg, attr);
 	int i;
 
 	if (!nl_flags)
@@ -338,7 +334,7 @@
 	struct nlattr *nl_page;
 	unsigned long page;
 
-	nl_page = nla_nest_start(msg, NL802154_ATTR_CHANNELS_SUPPORTED);
+	nl_page = nla_nest_start_noflag(msg, NL802154_ATTR_CHANNELS_SUPPORTED);
 	if (!nl_page)
 		return -ENOBUFS;
 
@@ -360,11 +356,11 @@
 	struct nlattr *nl_caps, *nl_channels;
 	int i;
 
-	nl_caps = nla_nest_start(msg, NL802154_ATTR_WPAN_PHY_CAPS);
+	nl_caps = nla_nest_start_noflag(msg, NL802154_ATTR_WPAN_PHY_CAPS);
 	if (!nl_caps)
 		return -ENOBUFS;
 
-	nl_channels = nla_nest_start(msg, NL802154_CAP_ATTR_CHANNELS);
+	nl_channels = nla_nest_start_noflag(msg, NL802154_CAP_ATTR_CHANNELS);
 	if (!nl_channels)
 		return -ENOBUFS;
 
@@ -380,8 +376,8 @@
 	if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_ED_LEVEL) {
 		struct nlattr *nl_ed_lvls;
 
-		nl_ed_lvls = nla_nest_start(msg,
-					    NL802154_CAP_ATTR_CCA_ED_LEVELS);
+		nl_ed_lvls = nla_nest_start_noflag(msg,
+						   NL802154_CAP_ATTR_CCA_ED_LEVELS);
 		if (!nl_ed_lvls)
 			return -ENOBUFS;
 
@@ -396,7 +392,8 @@
 	if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_TXPOWER) {
 		struct nlattr *nl_tx_pwrs;
 
-		nl_tx_pwrs = nla_nest_start(msg, NL802154_CAP_ATTR_TX_POWERS);
+		nl_tx_pwrs = nla_nest_start_noflag(msg,
+						   NL802154_CAP_ATTR_TX_POWERS);
 		if (!nl_tx_pwrs)
 			return -ENOBUFS;
 
@@ -504,7 +501,7 @@
 	if (nl802154_put_capabilities(msg, rdev))
 		goto nla_put_failure;
 
-	nl_cmds = nla_nest_start(msg, NL802154_ATTR_SUPPORTED_COMMANDS);
+	nl_cmds = nla_nest_start_noflag(msg, NL802154_ATTR_SUPPORTED_COMMANDS);
 	if (!nl_cmds)
 		goto nla_put_failure;
 
@@ -561,8 +558,10 @@
 					struct nl802154_dump_wpan_phy_state *state)
 {
 	struct nlattr **tb = genl_family_attrbuf(&nl802154_fam);
-	int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize, tb,
-			      nl802154_fam.maxattr, nl802154_policy, NULL);
+	int ret = nlmsg_parse_deprecated(cb->nlh,
+					 GENL_HDRLEN + nl802154_fam.hdrsize,
+					 tb, nl802154_fam.maxattr,
+					 nl802154_policy, NULL);
 
 	/* TODO check if we can handle error here,
 	 * we have no backward compatibility
@@ -693,7 +692,8 @@
 
 	switch (desc->mode) {
 	case NL802154_KEY_ID_MODE_IMPLICIT:
-		nl_dev_addr = nla_nest_start(msg, NL802154_KEY_ID_ATTR_IMPLICIT);
+		nl_dev_addr = nla_nest_start_noflag(msg,
+						    NL802154_KEY_ID_ATTR_IMPLICIT);
 		if (!nl_dev_addr)
 			return -ENOBUFS;
 
@@ -768,7 +768,7 @@
 			 params.frame_counter))
 		return -ENOBUFS;
 
-	nl_key_id = nla_nest_start(msg, NL802154_ATTR_SEC_OUT_KEY_ID);
+	nl_key_id = nla_nest_start_noflag(msg, NL802154_ATTR_SEC_OUT_KEY_ID);
 	if (!nl_key_id)
 		return -ENOBUFS;
 
@@ -1306,8 +1306,7 @@
 {
 	struct nlattr *attrs[NL802154_DEV_ADDR_ATTR_MAX + 1];
 
-	if (!nla || nla_parse_nested(attrs, NL802154_DEV_ADDR_ATTR_MAX, nla,
-				     nl802154_dev_addr_policy, NULL))
+	if (!nla || nla_parse_nested_deprecated(attrs, NL802154_DEV_ADDR_ATTR_MAX, nla, nl802154_dev_addr_policy, NULL))
 		return -EINVAL;
 
 	if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] ||
@@ -1346,8 +1345,7 @@
 {
 	struct nlattr *attrs[NL802154_KEY_ID_ATTR_MAX + 1];
 
-	if (!nla || nla_parse_nested(attrs, NL802154_KEY_ID_ATTR_MAX, nla,
-				     nl802154_key_id_policy, NULL))
+	if (!nla || nla_parse_nested_deprecated(attrs, NL802154_KEY_ID_ATTR_MAX, nla, nl802154_key_id_policy, NULL))
 		return -EINVAL;
 
 	if (!attrs[NL802154_KEY_ID_ATTR_MODE])
@@ -1455,11 +1453,11 @@
 	if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
 		goto nla_put_failure;
 
-	nl_key = nla_nest_start(msg, NL802154_ATTR_SEC_KEY);
+	nl_key = nla_nest_start_noflag(msg, NL802154_ATTR_SEC_KEY);
 	if (!nl_key)
 		goto nla_put_failure;
 
-	nl_key_id = nla_nest_start(msg, NL802154_KEY_ATTR_ID);
+	nl_key_id = nla_nest_start_noflag(msg, NL802154_KEY_ATTR_ID);
 	if (!nl_key_id)
 		goto nla_put_failure;
 
@@ -1562,9 +1560,7 @@
 	struct ieee802154_llsec_key_id id = { };
 	u32 commands[NL802154_CMD_FRAME_NR_IDS / 32] = { };
 
-	if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX,
-			     info->attrs[NL802154_ATTR_SEC_KEY],
-			     nl802154_key_policy, info->extack))
+	if (nla_parse_nested_deprecated(attrs, NL802154_KEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_KEY], nl802154_key_policy, info->extack))
 		return -EINVAL;
 
 	if (!attrs[NL802154_KEY_ATTR_USAGE_FRAMES] ||
@@ -1612,9 +1608,7 @@
 	struct nlattr *attrs[NL802154_KEY_ATTR_MAX + 1];
 	struct ieee802154_llsec_key_id id;
 
-	if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX,
-			     info->attrs[NL802154_ATTR_SEC_KEY],
-			     nl802154_key_policy, info->extack))
+	if (nla_parse_nested_deprecated(attrs, NL802154_KEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_KEY], nl802154_key_policy, info->extack))
 		return -EINVAL;
 
 	if (ieee802154_llsec_parse_key_id(attrs[NL802154_KEY_ATTR_ID], &id) < 0)
@@ -1639,7 +1633,7 @@
 	if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
 		goto nla_put_failure;
 
-	nl_device = nla_nest_start(msg, NL802154_ATTR_SEC_DEVICE);
+	nl_device = nla_nest_start_noflag(msg, NL802154_ATTR_SEC_DEVICE);
 	if (!nl_device)
 		goto nla_put_failure;
 
@@ -1728,8 +1722,7 @@
 {
 	struct nlattr *attrs[NL802154_DEV_ATTR_MAX + 1];
 
-	if (!nla || nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX,
-				     nla, nl802154_dev_policy, NULL))
+	if (!nla || nla_parse_nested_deprecated(attrs, NL802154_DEV_ATTR_MAX, nla, nl802154_dev_policy, NULL))
 		return -EINVAL;
 
 	memset(dev, 0, sizeof(*dev));
@@ -1780,9 +1773,7 @@
 	struct nlattr *attrs[NL802154_DEV_ATTR_MAX + 1];
 	__le64 extended_addr;
 
-	if (nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX,
-			     info->attrs[NL802154_ATTR_SEC_DEVICE],
-			     nl802154_dev_policy, info->extack))
+	if (nla_parse_nested_deprecated(attrs, NL802154_DEV_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_DEVICE], nl802154_dev_policy, info->extack))
 		return -EINVAL;
 
 	if (!attrs[NL802154_DEV_ATTR_EXTENDED_ADDR])
@@ -1808,7 +1799,7 @@
 	if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
 		goto nla_put_failure;
 
-	nl_devkey = nla_nest_start(msg, NL802154_ATTR_SEC_DEVKEY);
+	nl_devkey = nla_nest_start_noflag(msg, NL802154_ATTR_SEC_DEVKEY);
 	if (!nl_devkey)
 		goto nla_put_failure;
 
@@ -1818,7 +1809,7 @@
 			devkey->frame_counter))
 		goto nla_put_failure;
 
-	nl_key_id = nla_nest_start(msg, NL802154_DEVKEY_ATTR_ID);
+	nl_key_id = nla_nest_start_noflag(msg, NL802154_DEVKEY_ATTR_ID);
 	if (!nl_key_id)
 		goto nla_put_failure;
 
@@ -1908,9 +1899,7 @@
 	__le64 extended_addr;
 
 	if (!info->attrs[NL802154_ATTR_SEC_DEVKEY] ||
-	    nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX,
-			     info->attrs[NL802154_ATTR_SEC_DEVKEY],
-			     nl802154_devkey_policy, info->extack) < 0)
+	    nla_parse_nested_deprecated(attrs, NL802154_DEVKEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_DEVKEY], nl802154_devkey_policy, info->extack) < 0)
 		return -EINVAL;
 
 	if (!attrs[NL802154_DEVKEY_ATTR_FRAME_COUNTER] ||
@@ -1940,9 +1929,7 @@
 	struct ieee802154_llsec_device_key key;
 	__le64 extended_addr;
 
-	if (nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX,
-			     info->attrs[NL802154_ATTR_SEC_DEVKEY],
-			     nl802154_devkey_policy, info->extack))
+	if (nla_parse_nested_deprecated(attrs, NL802154_DEVKEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_DEVKEY], nl802154_devkey_policy, info->extack))
 		return -EINVAL;
 
 	if (!attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR])
@@ -1976,7 +1963,7 @@
 	if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
 		goto nla_put_failure;
 
-	nl_seclevel = nla_nest_start(msg, NL802154_ATTR_SEC_LEVEL);
+	nl_seclevel = nla_nest_start_noflag(msg, NL802154_ATTR_SEC_LEVEL);
 	if (!nl_seclevel)
 		goto nla_put_failure;
 
@@ -2062,8 +2049,7 @@
 {
 	struct nlattr *attrs[NL802154_SECLEVEL_ATTR_MAX + 1];
 
-	if (!nla || nla_parse_nested(attrs, NL802154_SECLEVEL_ATTR_MAX,
-				     nla, nl802154_seclevel_policy, NULL))
+	if (!nla || nla_parse_nested_deprecated(attrs, NL802154_SECLEVEL_ATTR_MAX, nla, nl802154_seclevel_policy, NULL))
 		return -EINVAL;
 
 	memset(sl, 0, sizeof(*sl));
@@ -2217,131 +2203,131 @@
 static const struct genl_ops nl802154_ops[] = {
 	{
 		.cmd = NL802154_CMD_GET_WPAN_PHY,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_get_wpan_phy,
 		.dumpit = nl802154_dump_wpan_phy,
 		.done = nl802154_dump_wpan_phy_done,
-		.policy = nl802154_policy,
 		/* can be retrieved by unprivileged users */
 		.internal_flags = NL802154_FLAG_NEED_WPAN_PHY |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_GET_INTERFACE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_get_interface,
 		.dumpit = nl802154_dump_interface,
-		.policy = nl802154_policy,
 		/* can be retrieved by unprivileged users */
 		.internal_flags = NL802154_FLAG_NEED_WPAN_DEV |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_NEW_INTERFACE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_new_interface,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_WPAN_PHY |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_DEL_INTERFACE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_del_interface,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_WPAN_DEV |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_SET_CHANNEL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_set_channel,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_WPAN_PHY |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_SET_CCA_MODE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_set_cca_mode,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_WPAN_PHY |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_SET_CCA_ED_LEVEL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_set_cca_ed_level,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_WPAN_PHY |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_SET_TX_POWER,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_set_tx_power,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_WPAN_PHY |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_SET_WPAN_PHY_NETNS,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_wpan_phy_netns,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_WPAN_PHY |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_SET_PAN_ID,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_set_pan_id,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_NETDEV |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_SET_SHORT_ADDR,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_set_short_addr,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_NETDEV |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_SET_BACKOFF_EXPONENT,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_set_backoff_exponent,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_NETDEV |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_SET_MAX_CSMA_BACKOFFS,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_set_max_csma_backoffs,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_NETDEV |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_SET_MAX_FRAME_RETRIES,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_set_max_frame_retries,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_NETDEV |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_SET_LBT_MODE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_set_lbt_mode,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_NETDEV |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_SET_ACKREQ_DEFAULT,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_set_ackreq_default,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_NETDEV |
 				  NL802154_FLAG_NEED_RTNL,
@@ -2349,33 +2335,33 @@
 #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
 	{
 		.cmd = NL802154_CMD_SET_SEC_PARAMS,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_set_llsec_params,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_NETDEV |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_GET_SEC_KEY,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		/* TODO .doit by matching key id? */
 		.dumpit = nl802154_dump_llsec_key,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_NETDEV |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_NEW_SEC_KEY,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_add_llsec_key,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_NETDEV |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_DEL_SEC_KEY,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_del_llsec_key,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_NETDEV |
 				  NL802154_FLAG_NEED_RTNL,
@@ -2383,25 +2369,25 @@
 	/* TODO unique identifier must short+pan OR extended_addr */
 	{
 		.cmd = NL802154_CMD_GET_SEC_DEV,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		/* TODO .doit by matching extended_addr? */
 		.dumpit = nl802154_dump_llsec_dev,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_NETDEV |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_NEW_SEC_DEV,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_add_llsec_dev,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_NETDEV |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_DEL_SEC_DEV,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_del_llsec_dev,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_NETDEV |
 				  NL802154_FLAG_NEED_RTNL,
@@ -2409,51 +2395,51 @@
 	/* TODO remove complete devkey, put it as nested? */
 	{
 		.cmd = NL802154_CMD_GET_SEC_DEVKEY,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		/* TODO doit by matching ??? */
 		.dumpit = nl802154_dump_llsec_devkey,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_NETDEV |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_NEW_SEC_DEVKEY,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_add_llsec_devkey,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_NETDEV |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_DEL_SEC_DEVKEY,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_del_llsec_devkey,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_NETDEV |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_GET_SEC_LEVEL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		/* TODO .doit by matching frame_type? */
 		.dumpit = nl802154_dump_llsec_seclevel,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_NETDEV |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_NEW_SEC_LEVEL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl802154_add_llsec_seclevel,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_NETDEV |
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL802154_CMD_DEL_SEC_LEVEL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		/* TODO match frame_type only? */
 		.doit = nl802154_del_llsec_seclevel,
-		.policy = nl802154_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL802154_FLAG_NEED_NETDEV |
 				  NL802154_FLAG_NEED_RTNL,
@@ -2466,6 +2452,7 @@
 	.hdrsize = 0,			/* no private header */
 	.version = 1,			/* no particular meaning now */
 	.maxattr = NL802154_ATTR_MAX,
+	.policy = nl802154_policy,
 	.netnsok = true,
 	.pre_doit = nl802154_pre_doit,
 	.post_doit = nl802154_post_doit,

diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c
index 78f6f12..2c7a38d 100644
--- a/net/ieee802154/nl_policy.c
+++ b/net/ieee802154/nl_policy.c

@@ -1,17 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * nl802154.h
  *
  * Copyright (C) 2007, 2008 Siemens AG
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  */
 
 #include <linux/kernel.h>

diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index bc6b912..d93d453 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c

@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * IEEE802154.4 socket interface
  *
  * Copyright 2007, 2008 Siemens AG
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by:
  * Sergey Lapin <slapin@ossfans.org>
  * Maxim Gorbachyov <maxim.gorbachev@siemens.com>
@@ -164,10 +156,6 @@
 	struct sock *sk = sock->sk;
 
 	switch (cmd) {
-	case SIOCGSTAMP:
-		return sock_get_timestamp(sk, (struct timeval __user *)arg);
-	case SIOCGSTAMPNS:
-		return sock_get_timestampns(sk, (struct timespec __user *)arg);
 	case SIOCGIFADDR:
 	case SIOCSIFADDR:
 		return ieee802154_dev_ioctl(sk, (struct ifreq __user *)arg,
@@ -426,6 +414,7 @@
 	.getname	   = sock_no_getname,
 	.poll		   = datagram_poll,
 	.ioctl		   = ieee802154_sock_ioctl,
+	.gettstamp	   = sock_gettstamp,
 	.listen		   = sock_no_listen,
 	.shutdown	   = sock_no_shutdown,
 	.setsockopt	   = sock_common_setsockopt,
@@ -988,6 +977,7 @@
 	.getname	   = sock_no_getname,
 	.poll		   = datagram_poll,
 	.ioctl		   = ieee802154_sock_ioctl,
+	.gettstamp	   = sock_gettstamp,
 	.listen		   = sock_no_listen,
 	.shutdown	   = sock_no_shutdown,
 	.setsockopt	   = sock_common_setsockopt,
@@ -1018,6 +1008,9 @@
 
 	switch (sock->type) {
 	case SOCK_RAW:
+		rc = -EPERM;
+		if (!capable(CAP_NET_RAW))
+			goto out;
 		proto = &ieee802154_raw_prot;
 		ops = &ieee802154_raw_ops;
 		break;
@@ -1102,7 +1095,7 @@
 
 static int __init af_ieee802154_init(void)
 {
-	int rc = -EINVAL;
+	int rc;
 
 	rc = proto_register(&ieee802154_raw_prot, 1);
 	if (rc)

diff --git a/net/ieee802154/sysfs.c b/net/ieee802154/sysfs.c
index bd88525..d290393 100644
--- a/net/ieee802154/sysfs.c
+++ b/net/ieee802154/sysfs.c

@@ -1,11 +1,5 @@
-/* This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+// SPDX-License-Identifier: GPL-2.0-only
+/*
  *
  * Authors:
  * Alexander Aring <aar@pengutronix.de>

diff --git a/net/ife/Kconfig b/net/ife/Kconfig
index 31e48b6..bcf6505 100644
--- a/net/ife/Kconfig
+++ b/net/ife/Kconfig

@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # IFE subsystem configuration
 #
 
 menuconfig NET_IFE
 	depends on NET
-        tristate "Inter-FE based on IETF ForCES InterFE LFB"
+	tristate "Inter-FE based on IETF ForCES InterFE LFB"
 	default n
 	help
 	  Say Y here to add support of IFE encapsulation protocol

diff --git a/net/ife/Makefile b/net/ife/Makefile
index 2a90d97..1258fcb 100644
--- a/net/ife/Makefile
+++ b/net/ife/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the IFE encapsulation protocol
 #

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 32cae39..03381f3 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # IP configuration
 #
@@ -304,7 +305,7 @@
 	tristate "Virtual (secure) IP: tunneling"
 	select INET_TUNNEL
 	select NET_IP_TUNNEL
-	depends on INET_XFRM_MODE_TUNNEL
+	select XFRM
 	---help---
 	  Tunneling means encapsulating data of one protocol type within
 	  another protocol and sending it over a channel that understands the
@@ -396,33 +397,6 @@
 	tristate
 	default n
 
-config INET_XFRM_MODE_TRANSPORT
-	tristate "IP: IPsec transport mode"
-	default y
-	select XFRM
-	---help---
-	  Support for IPsec transport mode.
-
-	  If unsure, say Y.
-
-config INET_XFRM_MODE_TUNNEL
-	tristate "IP: IPsec tunnel mode"
-	default y
-	select XFRM
-	---help---
-	  Support for IPsec tunnel mode.
-
-	  If unsure, say Y.
-
-config INET_XFRM_MODE_BEET
-	tristate "IP: IPsec BEET mode"
-	default y
-	select XFRM
-	---help---
-	  Support for IPsec BEET mode.
-
-	  If unsure, say Y.
-
 config INET_DIAG
 	tristate "INET: socket monitoring interface"
 	default y
@@ -518,8 +492,8 @@
 	wired networks and throughput over wireless links.
 
 config TCP_CONG_HTCP
-        tristate "H-TCP"
-        default m
+	tristate "H-TCP"
+	default m
 	---help---
 	H-TCP is a send-side only modifications of the TCP Reno
 	protocol stack that optimizes the performance of TCP

diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 7446b98..d57ecfa 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile

@@ -14,7 +14,7 @@
 	     udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
 	     fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \
 	     inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
-	     metrics.o netlink.o
+	     metrics.o netlink.o nexthop.o
 
 obj-$(CONFIG_BPFILTER) += bpfilter/
 
@@ -37,10 +37,7 @@
 obj-$(CONFIG_INET_ESP_OFFLOAD) += esp4_offload.o
 obj-$(CONFIG_INET_IPCOMP) += ipcomp.o
 obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o
-obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o
 obj-$(CONFIG_INET_TUNNEL) += tunnel4.o
-obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o
-obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o
 obj-$(CONFIG_IP_PNP) += ipconfig.o
 obj-$(CONFIG_NETFILTER)	+= netfilter.o netfilter/
 obj-$(CONFIG_INET_DIAG) += inet_diag.o
@@ -63,6 +60,7 @@
 obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
 obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
 obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
+obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o
 obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
 
 obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1fbe2f8..70f92aa 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -58,11 +59,6 @@
  *					Some other random speedups.
  *		Cyrus Durgin	:	Cleaned up file for kmod hacks.
  *		Andi Kleen	:	Fix inet_stream_connect TCP race.
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) "IPv4: " fmt
@@ -136,6 +132,10 @@
 	struct inet_sock *inet = inet_sk(sk);
 
 	__skb_queue_purge(&sk->sk_receive_queue);
+	if (sk->sk_rx_skb_cache) {
+		__kfree_skb(sk->sk_rx_skb_cache);
+		sk->sk_rx_skb_cache = NULL;
+	}
 	__skb_queue_purge(&sk->sk_error_queue);
 
 	sk_mem_reclaim(sk);
@@ -156,7 +156,7 @@
 	WARN_ON(sk->sk_forward_alloc);
 
 	kfree(rcu_dereference_protected(inet->inet_opt, 1));
-	dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
+	dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1));
 	dst_release(sk->sk_rx_dst);
 	sk_refcnt_debug_dec(sk);
 }
@@ -208,6 +208,7 @@
 	if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
 		goto out;
 
+	sk->sk_max_ack_backlog = backlog;
 	/* Really, if the socket is already in listen state
 	 * we can only allow the backlog to be adjusted.
 	 */
@@ -231,7 +232,6 @@
 			goto out;
 		tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_LISTEN_CB, 0, NULL);
 	}
-	sk->sk_max_ack_backlog = backlog;
 	err = 0;
 
 out:
@@ -424,8 +424,8 @@
 		if (sock_flag(sk, SOCK_LINGER) &&
 		    !(current->flags & PF_EXITING))
 			timeout = sk->sk_lingertime;
-		sock->sk = NULL;
 		sk->sk_prot->close(sk, timeout);
+		sock->sk = NULL;
 	}
 	return 0;
 }
@@ -784,10 +784,8 @@
 }
 EXPORT_SYMBOL(inet_getname);
 
-int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+int inet_send_prepare(struct sock *sk)
 {
-	struct sock *sk = sock->sk;
-
 	sock_rps_record_flow(sk);
 
 	/* We may need to bind the socket. */
@@ -795,7 +793,19 @@
 	    inet_autobind(sk))
 		return -EAGAIN;
 
-	return sk->sk_prot->sendmsg(sk, msg, size);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(inet_send_prepare);
+
+int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+{
+	struct sock *sk = sock->sk;
+
+	if (unlikely(inet_send_prepare(sk)))
+		return -EAGAIN;
+
+	return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udp_sendmsg,
+			       sk, msg, size);
 }
 EXPORT_SYMBOL(inet_sendmsg);
 
@@ -804,11 +814,7 @@
 {
 	struct sock *sk = sock->sk;
 
-	sock_rps_record_flow(sk);
-
-	/* We may need to bind the socket. */
-	if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
-	    inet_autobind(sk))
+	if (unlikely(inet_send_prepare(sk)))
 		return -EAGAIN;
 
 	if (sk->sk_prot->sendpage)
@@ -817,6 +823,8 @@
 }
 EXPORT_SYMBOL(inet_sendpage);
 
+INDIRECT_CALLABLE_DECLARE(int udp_recvmsg(struct sock *, struct msghdr *,
+					  size_t, int, int, int *));
 int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 		 int flags)
 {
@@ -827,8 +835,9 @@
 	if (likely(!(flags & MSG_ERRQUEUE)))
 		sock_rps_record_flow(sk);
 
-	err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
-				   flags & ~MSG_DONTWAIT, &addr_len);
+	err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udp_recvmsg,
+			      sk, msg, size, flags & MSG_DONTWAIT,
+			      flags & ~MSG_DONTWAIT, &addr_len);
 	if (err >= 0)
 		msg->msg_namelen = addr_len;
 	return err;
@@ -911,12 +920,6 @@
 	struct rtentry rt;
 
 	switch (cmd) {
-	case SIOCGSTAMP:
-		err = sock_get_timestamp(sk, (struct timeval __user *)arg);
-		break;
-	case SIOCGSTAMPNS:
-		err = sock_get_timestampns(sk, (struct timespec __user *)arg);
-		break;
 	case SIOCADDRT:
 	case SIOCDELRT:
 		if (copy_from_user(&rt, p, sizeof(struct rtentry)))
@@ -988,6 +991,7 @@
 	.getname	   = inet_getname,
 	.poll		   = tcp_poll,
 	.ioctl		   = inet_ioctl,
+	.gettstamp	   = sock_gettstamp,
 	.listen		   = inet_listen,
 	.shutdown	   = inet_shutdown,
 	.setsockopt	   = sock_common_setsockopt,
@@ -1023,6 +1027,7 @@
 	.getname	   = inet_getname,
 	.poll		   = udp_poll,
 	.ioctl		   = inet_ioctl,
+	.gettstamp	   = sock_gettstamp,
 	.listen		   = sock_no_listen,
 	.shutdown	   = inet_shutdown,
 	.setsockopt	   = sock_common_setsockopt,
@@ -1055,6 +1060,7 @@
 	.getname	   = inet_getname,
 	.poll		   = datagram_poll,
 	.ioctl		   = inet_ioctl,
+	.gettstamp	   = sock_gettstamp,
 	.listen		   = sock_no_listen,
 	.shutdown	   = inet_shutdown,
 	.setsockopt	   = sock_common_setsockopt,
@@ -1385,6 +1391,19 @@
 }
 EXPORT_SYMBOL(inet_gso_segment);
 
+static struct sk_buff *ipip_gso_segment(struct sk_buff *skb,
+					netdev_features_t features)
+{
+	if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP4))
+		return ERR_PTR(-EINVAL);
+
+	return inet_gso_segment(skb, features);
+}
+
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *,
+							   struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *,
+							   struct sk_buff *));
 struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
 	const struct net_offload *ops;
@@ -1494,7 +1513,8 @@
 	skb_gro_pull(skb, sizeof(*iph));
 	skb_set_transport_header(skb, skb_gro_offset(skb));
 
-	pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+	pp = indirect_call_gro_receive(tcp4_gro_receive, udp4_gro_receive,
+				       ops->callbacks.gro_receive, head, skb);
 
 out_unlock:
 	rcu_read_unlock();
@@ -1556,6 +1576,8 @@
 	return -EINVAL;
 }
 
+INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *, int));
+INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int));
 int inet_gro_complete(struct sk_buff *skb, int nhoff)
 {
 	__be16 newlen = htons(skb->len - nhoff);
@@ -1581,7 +1603,9 @@
 	 * because any hdr with option will have been flushed in
 	 * inet_gro_receive().
 	 */
-	err = ops->callbacks.gro_complete(skb, nhoff + sizeof(*iph));
+	err = INDIRECT_CALL_2(ops->callbacks.gro_complete,
+			      tcp4_gro_complete, udp4_gro_complete,
+			      skb, nhoff + sizeof(*iph));
 
 out_unlock:
 	rcu_read_unlock();
@@ -1821,13 +1845,8 @@
 	return 0;
 }
 
-static __net_exit void inet_exit_net(struct net *net)
-{
-}
-
 static __net_initdata struct pernet_operations af_inet_ops = {
 	.init = inet_init_net,
-	.exit = inet_exit_net,
 };
 
 static int __init init_inet_pernet_ops(void)
@@ -1852,7 +1871,7 @@
 
 static const struct net_offload ipip_offload = {
 	.callbacks = {
-		.gso_segment	= inet_gso_segment,
+		.gso_segment	= ipip_gso_segment,
 		.gro_receive	= ipip_gro_receive,
 		.gro_complete	= ipip_gro_complete,
 	},
@@ -1964,6 +1983,8 @@
 	/* Add UDP-Lite (RFC 3828) */
 	udplite4_register();
 
+	raw_init();
+
 	ping_init();
 
 	/*

diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 4dd95cd..974179b 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #define pr_fmt(fmt) "IPsec: " fmt
 
 #include <crypto/algapi.h>
@@ -461,9 +462,9 @@
 		return 0;
 
 	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
-		ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0);
+		ipv4_update_pmtu(skb, net, info, 0, IPPROTO_AH);
 	else
-		ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0);
+		ipv4_redirect(skb, net, 0, IPPROTO_AH);
 	xfrm_state_put(x);
 
 	return 0;
@@ -589,8 +590,7 @@
 {
 	if (xfrm4_protocol_deregister(&ah4_protocol, IPPROTO_AH) < 0)
 		pr_info("%s: can't remove protocol\n", __func__);
-	if (xfrm_unregister_type(&ah_type, AF_INET) < 0)
-		pr_info("%s: can't remove xfrm type\n", __func__);
+	xfrm_unregister_type(&ah_type, AF_INET);
 }
 
 module_init(ah4_init);

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index e90c89e..05eb42f 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* linux/net/ipv4/arp.c
  *
  * Copyright (C) 1994 by Florian  La Roche
@@ -7,11 +8,6 @@
  * high-level addresses) into a low-level hardware address (like an Ethernet
  * address).
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
  * Fixes:
  *		Alan Cox	:	Removed the Ethernet assumptions in
  *					Florian's code
@@ -1255,6 +1251,8 @@
 		change_info = ptr;
 		if (change_info->flags_changed & IFF_NOARP)
 			neigh_changeaddr(&arp_tbl, dev);
+		if (!netif_carrier_ok(dev))
+			neigh_carrier_down(&arp_tbl, dev);
 		break;
 	default:
 		break;

diff --git a/net/ipv4/bpfilter/Makefile b/net/ipv4/bpfilter/Makefile
index e9e42f9..00af530 100644
--- a/net/ipv4/bpfilter/Makefile
+++ b/net/ipv4/bpfilter/Makefile

@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_BPFILTER) += sockopt.o

diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c
index 5e04ed2..0480918 100644
--- a/net/ipv4/bpfilter/sockopt.c
+++ b/net/ipv4/bpfilter/sockopt.c

@@ -1,28 +1,52 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/module.h>
 #include <linux/uaccess.h>
 #include <linux/bpfilter.h>
 #include <uapi/linux/bpf.h>
 #include <linux/wait.h>
 #include <linux/kmod.h>
+#include <linux/fs.h>
+#include <linux/file.h>
 
-int (*bpfilter_process_sockopt)(struct sock *sk, int optname,
-				char __user *optval,
-				unsigned int optlen, bool is_set);
-EXPORT_SYMBOL_GPL(bpfilter_process_sockopt);
+struct bpfilter_umh_ops bpfilter_ops;
+EXPORT_SYMBOL_GPL(bpfilter_ops);
+
+static void bpfilter_umh_cleanup(struct umh_info *info)
+{
+	mutex_lock(&bpfilter_ops.lock);
+	bpfilter_ops.stop = true;
+	fput(info->pipe_to_umh);
+	fput(info->pipe_from_umh);
+	info->pid = 0;
+	mutex_unlock(&bpfilter_ops.lock);
+}
 
 static int bpfilter_mbox_request(struct sock *sk, int optname,
 				 char __user *optval,
 				 unsigned int optlen, bool is_set)
 {
-	if (!bpfilter_process_sockopt) {
-		int err = request_module("bpfilter");
+	int err;
+	mutex_lock(&bpfilter_ops.lock);
+	if (!bpfilter_ops.sockopt) {
+		mutex_unlock(&bpfilter_ops.lock);
+		request_module("bpfilter");
+		mutex_lock(&bpfilter_ops.lock);
 
-		if (err)
-			return err;
-		if (!bpfilter_process_sockopt)
-			return -ECHILD;
+		if (!bpfilter_ops.sockopt) {
+			err = -ENOPROTOOPT;
+			goto out;
+		}
 	}
-	return bpfilter_process_sockopt(sk, optname, optval, optlen, is_set);
+	if (bpfilter_ops.stop) {
+		err = bpfilter_ops.start();
+		if (err)
+			goto out;
+	}
+	err = bpfilter_ops.sockopt(sk, optname, optval, optlen, is_set);
+out:
+	mutex_unlock(&bpfilter_ops.lock);
+	return err;
 }
 
 int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval,
@@ -41,3 +65,14 @@
 
 	return bpfilter_mbox_request(sk, optname, optval, len, false);
 }
+
+static int __init bpfilter_sockopt_init(void)
+{
+	mutex_init(&bpfilter_ops.lock);
+	bpfilter_ops.stop = true;
+	bpfilter_ops.info.cmdline = "bpfilter_umh";
+	bpfilter_ops.info.cleanup = &bpfilter_umh_cleanup;
+
+	return 0;
+}
+device_initcall(bpfilter_sockopt_init);

diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 777fa3b..3768822 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * CIPSO - Commercial IP Security Option
  *
@@ -14,25 +15,10 @@
  *   http://www.itl.nist.gov/fipspubs/fip188.htm
  *
  * Author: Paul Moore <paul.moore@hp.com>
- *
  */
 
 /*
  * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
- *
- * This program is free software;  you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY;  without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- * the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program;  if not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #include <linux/init.h>
@@ -667,7 +653,8 @@
 	case CIPSO_V4_MAP_PASS:
 		return 0;
 	case CIPSO_V4_MAP_TRANS:
-		if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL)
+		if ((level < doi_def->map.std->lvl.cipso_size) &&
+		    (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL))
 			return 0;
 		break;
 	}
@@ -1735,13 +1722,26 @@
  */
 void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
 {
+	unsigned char optbuf[sizeof(struct ip_options) + 40];
+	struct ip_options *opt = (struct ip_options *)optbuf;
+
 	if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
 		return;
 
+	/*
+	 * We might be called above the IP layer,
+	 * so we can not use icmp_send and IPCB here.
+	 */
+
+	memset(opt, 0, sizeof(struct ip_options));
+	opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
+	if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL))
+		return;
+
 	if (gateway)
-		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0);
+		__icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt);
 	else
-		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0);
+		__icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt);
 }
 
 /**

diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index f915abf..4a8550c 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	common UDP/RAW code
  *	Linux INET implementation
  *
  * Authors:
  * 	Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
- *
- * 	This program is free software; you can redistribute it and/or
- * 	modify it under the terms of the GNU General Public License
- * 	as published by the Free Software Foundation; either version
- * 	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/types.h>
@@ -19,6 +15,7 @@
 #include <net/sock.h>
 #include <net/route.h>
 #include <net/tcp_states.h>
+#include <net/sock_reuseport.h>
 
 int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
@@ -42,7 +39,7 @@
 	oif = sk->sk_bound_dev_if;
 	saddr = inet->inet_saddr;
 	if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
-		if (!oif)
+		if (!oif || netif_index_is_l3_master(sock_net(sk), oif))
 			oif = inet->mc_index;
 		if (!saddr)
 			saddr = inet->mc_addr;
@@ -73,9 +70,10 @@
 	}
 	inet->inet_daddr = fl4->daddr;
 	inet->inet_dport = usin->sin_port;
+	reuseport_has_conns(sk, true);
 	sk->sk_state = TCP_ESTABLISHED;
 	sk_set_txhash(sk);
-	inet->inet_id = jiffies;
+	inet->inet_id = prandom_u32();
 
 	sk_dst_set(sk, &rt->dst);
 	err = 0;

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ea4bd8a..a4b5bd4 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	NET3	IP device support routines.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  *	Derived from the IP parts of dev.c 1.0.19
  * 		Authors:	Ross Biro
  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -66,6 +62,11 @@
 #include <net/net_namespace.h>
 #include <net/addrconf.h>
 
+#define IPV6ONLY_FLAGS	\
+		(IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
+		 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
+		 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
+
 static struct ipv4_devconf ipv4_devconf = {
 	.data = {
 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
@@ -100,6 +101,16 @@
 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
 	[IFA_FLAGS]		= { .type = NLA_U32 },
 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
+	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
+};
+
+struct inet_fill_args {
+	u32 portid;
+	u32 seq;
+	int event;
+	unsigned int flags;
+	int netnsid;
+	int ifindex;
 };
 
 #define IN4_ADDR_HSIZE_SHIFT	8
@@ -184,7 +195,8 @@
 
 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+static void inet_del_ifa(struct in_device *in_dev,
+			 struct in_ifaddr __rcu **ifap,
 			 int destroy);
 #ifdef CONFIG_SYSCTL
 static int devinet_sysctl_register(struct in_device *idev);
@@ -290,8 +302,8 @@
 
 static void inetdev_destroy(struct in_device *in_dev)
 {
-	struct in_ifaddr *ifa;
 	struct net_device *dev;
+	struct in_ifaddr *ifa;
 
 	ASSERT_RTNL();
 
@@ -301,7 +313,7 @@
 
 	ip_mc_destroy_dev(in_dev);
 
-	while ((ifa = in_dev->ifa_list) != NULL) {
+	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
 		inet_free_ifa(ifa);
 	}
@@ -317,30 +329,35 @@
 
 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
 {
+	const struct in_ifaddr *ifa;
+
 	rcu_read_lock();
-	for_primary_ifa(in_dev) {
+	in_dev_for_each_ifa_rcu(ifa, in_dev) {
 		if (inet_ifa_match(a, ifa)) {
 			if (!b || inet_ifa_match(b, ifa)) {
 				rcu_read_unlock();
 				return 1;
 			}
 		}
-	} endfor_ifa(in_dev);
+	}
 	rcu_read_unlock();
 	return 0;
 }
 
-static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
-			 int destroy, struct nlmsghdr *nlh, u32 portid)
+static void __inet_del_ifa(struct in_device *in_dev,
+			   struct in_ifaddr __rcu **ifap,
+			   int destroy, struct nlmsghdr *nlh, u32 portid)
 {
 	struct in_ifaddr *promote = NULL;
-	struct in_ifaddr *ifa, *ifa1 = *ifap;
-	struct in_ifaddr *last_prim = in_dev->ifa_list;
+	struct in_ifaddr *ifa, *ifa1;
+	struct in_ifaddr *last_prim;
 	struct in_ifaddr *prev_prom = NULL;
 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
 
 	ASSERT_RTNL();
 
+	ifa1 = rtnl_dereference(*ifap);
+	last_prim = rtnl_dereference(in_dev->ifa_list);
 	if (in_dev->dead)
 		goto no_promotions;
 
@@ -349,9 +366,9 @@
 	 **/
 
 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
-		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
+		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
 
-		while ((ifa = *ifap1) != NULL) {
+		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
 			    ifa1->ifa_scope <= ifa->ifa_scope)
 				last_prim = ifa;
@@ -384,7 +401,7 @@
 	 * and later to add them back with new prefsrc. Do this
 	 * while all addresses are on the device list.
 	 */
-	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
+	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
 		if (ifa1->ifa_mask == ifa->ifa_mask &&
 		    inet_ifa_match(ifa1->ifa_address, ifa))
 			fib_del_ifaddr(ifa, ifa1);
@@ -410,19 +427,25 @@
 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
 
 	if (promote) {
-		struct in_ifaddr *next_sec = promote->ifa_next;
+		struct in_ifaddr *next_sec;
 
+		next_sec = rtnl_dereference(promote->ifa_next);
 		if (prev_prom) {
-			prev_prom->ifa_next = promote->ifa_next;
-			promote->ifa_next = last_prim->ifa_next;
-			last_prim->ifa_next = promote;
+			struct in_ifaddr *last_sec;
+
+			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
+
+			last_sec = rtnl_dereference(last_prim->ifa_next);
+			rcu_assign_pointer(promote->ifa_next, last_sec);
+			rcu_assign_pointer(last_prim->ifa_next, promote);
 		}
 
 		promote->ifa_flags &= ~IFA_F_SECONDARY;
 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
 		blocking_notifier_call_chain(&inetaddr_chain,
 				NETDEV_UP, promote);
-		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
+		for (ifa = next_sec; ifa;
+		     ifa = rtnl_dereference(ifa->ifa_next)) {
 			if (ifa1->ifa_mask != ifa->ifa_mask ||
 			    !inet_ifa_match(ifa1->ifa_address, ifa))
 					continue;
@@ -434,7 +457,8 @@
 		inet_free_ifa(ifa1);
 }
 
-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+static void inet_del_ifa(struct in_device *in_dev,
+			 struct in_ifaddr __rcu **ifap,
 			 int destroy)
 {
 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
@@ -447,9 +471,10 @@
 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 			     u32 portid, struct netlink_ext_ack *extack)
 {
+	struct in_ifaddr __rcu **last_primary, **ifap;
 	struct in_device *in_dev = ifa->ifa_dev;
-	struct in_ifaddr *ifa1, **ifap, **last_primary;
 	struct in_validator_info ivi;
+	struct in_ifaddr *ifa1;
 	int ret;
 
 	ASSERT_RTNL();
@@ -462,8 +487,13 @@
 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
 	last_primary = &in_dev->ifa_list;
 
-	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
-	     ifap = &ifa1->ifa_next) {
+	/* Don't set IPv6 only flags to IPv4 addresses */
+	ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
+
+	ifap = &in_dev->ifa_list;
+	ifa1 = rtnl_dereference(*ifap);
+
+	while (ifa1) {
 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
 		    ifa->ifa_scope <= ifa1->ifa_scope)
 			last_primary = &ifa1->ifa_next;
@@ -479,6 +509,9 @@
 			}
 			ifa->ifa_flags |= IFA_F_SECONDARY;
 		}
+
+		ifap = &ifa1->ifa_next;
+		ifa1 = rtnl_dereference(*ifap);
 	}
 
 	/* Allow any devices that wish to register ifaddr validtors to weigh
@@ -504,8 +537,8 @@
 		ifap = last_primary;
 	}
 
-	ifa->ifa_next = *ifap;
-	*ifap = ifa;
+	rcu_assign_pointer(ifa->ifa_next, *ifap);
+	rcu_assign_pointer(*ifap, ifa);
 
 	inet_hash_insert(dev_net(in_dev->dev), ifa);
 
@@ -570,12 +603,14 @@
 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
 				    __be32 mask)
 {
+	struct in_ifaddr *ifa;
+
 	ASSERT_RTNL();
 
-	for_primary_ifa(in_dev) {
+	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
 			return ifa;
-	} endfor_ifa(in_dev);
+	}
 	return NULL;
 }
 
@@ -603,16 +638,18 @@
 			    struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(skb->sk);
+	struct in_ifaddr __rcu **ifap;
 	struct nlattr *tb[IFA_MAX+1];
 	struct in_device *in_dev;
 	struct ifaddrmsg *ifm;
-	struct in_ifaddr *ifa, **ifap;
+	struct in_ifaddr *ifa;
+
 	int err = -EINVAL;
 
 	ASSERT_RTNL();
 
-	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
-			  extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
+				     ifa_ipv4_policy, extack);
 	if (err < 0)
 		goto errout;
 
@@ -623,7 +660,7 @@
 		goto errout;
 	}
 
-	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
+	for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
 	     ifap = &ifa->ifa_next) {
 		if (tb[IFA_LOCAL] &&
 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
@@ -711,15 +748,19 @@
 
 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
 			    age >= ifa->ifa_valid_lft) {
-				struct in_ifaddr **ifap;
+				struct in_ifaddr __rcu **ifap;
+				struct in_ifaddr *tmp;
 
-				for (ifap = &ifa->ifa_dev->ifa_list;
-				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
-					if (*ifap == ifa) {
+				ifap = &ifa->ifa_dev->ifa_list;
+				tmp = rtnl_dereference(*ifap);
+				while (tmp) {
+					if (tmp == ifa) {
 						inet_del_ifa(ifa->ifa_dev,
 							     ifap, 1);
 						break;
 					}
+					ifap = &tmp->ifa_next;
+					tmp = rtnl_dereference(*ifap);
 				}
 			} else if (ifa->ifa_preferred_lft !=
 				   INFINITY_LIFE_TIME &&
@@ -773,7 +814,8 @@
 }
 
 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
-				       __u32 *pvalid_lft, __u32 *pprefered_lft)
+				       __u32 *pvalid_lft, __u32 *pprefered_lft,
+				       struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[IFA_MAX+1];
 	struct in_ifaddr *ifa;
@@ -782,8 +824,8 @@
 	struct in_device *in_dev;
 	int err;
 
-	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
-			  NULL);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
+				     ifa_ipv4_policy, extack);
 	if (err < 0)
 		goto errout;
 
@@ -862,13 +904,12 @@
 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
 {
 	struct in_device *in_dev = ifa->ifa_dev;
-	struct in_ifaddr *ifa1, **ifap;
+	struct in_ifaddr *ifa1;
 
 	if (!ifa->ifa_local)
 		return NULL;
 
-	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
-	     ifap = &ifa1->ifa_next) {
+	in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
 		if (ifa1->ifa_mask == ifa->ifa_mask &&
 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
 		    ifa1->ifa_local == ifa->ifa_local)
@@ -888,7 +929,7 @@
 
 	ASSERT_RTNL();
 
-	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
+	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
 	if (IS_ERR(ifa))
 		return PTR_ERR(ifa);
 
@@ -941,17 +982,18 @@
 {
 	int rc = -1;	/* Something else, probably a multicast. */
 
-	if (ipv4_is_zeronet(addr))
+	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
 		rc = 0;
 	else {
 		__u32 haddr = ntohl(addr);
-
 		if (IN_CLASSA(haddr))
 			rc = 8;
 		else if (IN_CLASSB(haddr))
 			rc = 16;
 		else if (IN_CLASSC(haddr))
 			rc = 24;
+		else if (IN_CLASSE(haddr))
+			rc = 32;
 	}
 
 	return rc;
@@ -962,8 +1004,8 @@
 {
 	struct sockaddr_in sin_orig;
 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
+	struct in_ifaddr __rcu **ifap = NULL;
 	struct in_device *in_dev;
-	struct in_ifaddr **ifap = NULL;
 	struct in_ifaddr *ifa = NULL;
 	struct net_device *dev;
 	char *colon;
@@ -1034,7 +1076,9 @@
 			/* note: we only do this for a limited set of ioctls
 			   and only if the original address family was AF_INET.
 			   This is checked above. */
-			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
+
+			for (ifap = &in_dev->ifa_list;
+			     (ifa = rtnl_dereference(*ifap)) != NULL;
 			     ifap = &ifa->ifa_next) {
 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
 				    sin_orig.sin_addr.s_addr ==
@@ -1047,7 +1091,8 @@
 		   4.3BSD-style and passed in junk so we fall back to
 		   comparing just the label */
 		if (!ifa) {
-			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
+			for (ifap = &in_dev->ifa_list;
+			     (ifa = rtnl_dereference(*ifap)) != NULL;
 			     ifap = &ifa->ifa_next)
 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
 					break;
@@ -1089,7 +1134,7 @@
 				inet_del_ifa(in_dev, ifap, 1);
 			break;
 		}
-		ret = dev_change_flags(dev, ifr->ifr_flags);
+		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
 		break;
 
 	case SIOCSIFADDR:	/* Set interface address (and family) */
@@ -1196,7 +1241,7 @@
 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
 {
 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
-	struct in_ifaddr *ifa;
+	const struct in_ifaddr *ifa;
 	struct ifreq ifr;
 	int done = 0;
 
@@ -1206,7 +1251,7 @@
 	if (!in_dev)
 		goto out;
 
-	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
+	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
 		if (!buf) {
 			done += size;
 			continue;
@@ -1234,18 +1279,24 @@
 static __be32 in_dev_select_addr(const struct in_device *in_dev,
 				 int scope)
 {
-	for_primary_ifa(in_dev) {
+	const struct in_ifaddr *ifa;
+
+	in_dev_for_each_ifa_rcu(ifa, in_dev) {
+		if (ifa->ifa_flags & IFA_F_SECONDARY)
+			continue;
 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
 		    ifa->ifa_scope <= scope)
 			return ifa->ifa_local;
-	} endfor_ifa(in_dev);
+	}
 
 	return 0;
 }
 
 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
 {
+	const struct in_ifaddr *ifa;
 	__be32 addr = 0;
+	unsigned char localnet_scope = RT_SCOPE_HOST;
 	struct in_device *in_dev;
 	struct net *net = dev_net(dev);
 	int master_idx;
@@ -1255,8 +1306,13 @@
 	if (!in_dev)
 		goto no_in_dev;
 
-	for_primary_ifa(in_dev) {
-		if (ifa->ifa_scope > scope)
+	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
+		localnet_scope = RT_SCOPE_LINK;
+
+	in_dev_for_each_ifa_rcu(ifa, in_dev) {
+		if (ifa->ifa_flags & IFA_F_SECONDARY)
+			continue;
+		if (min(ifa->ifa_scope, localnet_scope) > scope)
 			continue;
 		if (!dst || inet_ifa_match(dst, ifa)) {
 			addr = ifa->ifa_local;
@@ -1264,7 +1320,7 @@
 		}
 		if (!addr)
 			addr = ifa->ifa_local;
-	} endfor_ifa(in_dev);
+	}
 
 	if (addr)
 		goto out_unlock;
@@ -1309,13 +1365,20 @@
 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
 			      __be32 local, int scope)
 {
-	int same = 0;
+	unsigned char localnet_scope = RT_SCOPE_HOST;
+	const struct in_ifaddr *ifa;
 	__be32 addr = 0;
+	int same = 0;
 
-	for_ifa(in_dev) {
+	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
+		localnet_scope = RT_SCOPE_LINK;
+
+	in_dev_for_each_ifa_rcu(ifa, in_dev) {
+		unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
+
 		if (!addr &&
 		    (local == ifa->ifa_local || !local) &&
-		    ifa->ifa_scope <= scope) {
+		    min_scope <= scope) {
 			addr = ifa->ifa_local;
 			if (same)
 				break;
@@ -1330,7 +1393,7 @@
 				if (inet_ifa_match(addr, ifa))
 					break;
 				/* No, then can we use new local src? */
-				if (ifa->ifa_scope <= scope) {
+				if (min_scope <= scope) {
 					addr = ifa->ifa_local;
 					break;
 				}
@@ -1338,7 +1401,7 @@
 				same = 0;
 			}
 		}
-	} endfor_ifa(in_dev);
+	}
 
 	return same ? addr : 0;
 }
@@ -1412,7 +1475,7 @@
 	struct in_ifaddr *ifa;
 	int named = 0;
 
-	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
+	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
 		char old[IFNAMSIZ], *dot;
 
 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
@@ -1442,10 +1505,9 @@
 					struct in_device *in_dev)
 
 {
-	struct in_ifaddr *ifa;
+	const struct in_ifaddr *ifa;
 
-	for (ifa = in_dev->ifa_list; ifa;
-	     ifa = ifa->ifa_next) {
+	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
 			 ifa->ifa_local, dev,
 			 ifa->ifa_local, NULL,
@@ -1584,13 +1646,14 @@
 }
 
 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
-			    u32 portid, u32 seq, int event, unsigned int flags)
+			    struct inet_fill_args *args)
 {
 	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
 	u32 preferred, valid;
 
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
+	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
+			args->flags);
 	if (!nlh)
 		return -EMSGSIZE;
 
@@ -1601,6 +1664,10 @@
 	ifm->ifa_scope = ifa->ifa_scope;
 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
 
+	if (args->netnsid >= 0 &&
+	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
+		goto nla_put_failure;
+
 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
 		preferred = ifa->ifa_preferred_lft;
 		valid = ifa->ifa_valid_lft;
@@ -1645,27 +1712,144 @@
 	return -EMSGSIZE;
 }
 
+static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
+				      struct inet_fill_args *fillargs,
+				      struct net **tgt_net, struct sock *sk,
+				      struct netlink_callback *cb)
+{
+	struct netlink_ext_ack *extack = cb->extack;
+	struct nlattr *tb[IFA_MAX+1];
+	struct ifaddrmsg *ifm;
+	int err, i;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
+		return -EINVAL;
+	}
+
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
+		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
+		return -EINVAL;
+	}
+
+	fillargs->ifindex = ifm->ifa_index;
+	if (fillargs->ifindex) {
+		cb->answer_flags |= NLM_F_DUMP_FILTERED;
+		fillargs->flags |= NLM_F_DUMP_FILTERED;
+	}
+
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
+					    ifa_ipv4_policy, extack);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i <= IFA_MAX; ++i) {
+		if (!tb[i])
+			continue;
+
+		if (i == IFA_TARGET_NETNSID) {
+			struct net *net;
+
+			fillargs->netnsid = nla_get_s32(tb[i]);
+
+			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
+			if (IS_ERR(net)) {
+				fillargs->netnsid = -1;
+				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
+				return PTR_ERR(net);
+			}
+			*tgt_net = net;
+		} else {
+			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
+			    struct netlink_callback *cb, int s_ip_idx,
+			    struct inet_fill_args *fillargs)
+{
+	struct in_ifaddr *ifa;
+	int ip_idx = 0;
+	int err;
+
+	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
+		if (ip_idx < s_ip_idx) {
+			ip_idx++;
+			continue;
+		}
+		err = inet_fill_ifaddr(skb, ifa, fillargs);
+		if (err < 0)
+			goto done;
+
+		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+		ip_idx++;
+	}
+	err = 0;
+
+done:
+	cb->args[2] = ip_idx;
+
+	return err;
+}
+
 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	const struct nlmsghdr *nlh = cb->nlh;
+	struct inet_fill_args fillargs = {
+		.portid = NETLINK_CB(cb->skb).portid,
+		.seq = nlh->nlmsg_seq,
+		.event = RTM_NEWADDR,
+		.flags = NLM_F_MULTI,
+		.netnsid = -1,
+	};
 	struct net *net = sock_net(skb->sk);
+	struct net *tgt_net = net;
 	int h, s_h;
 	int idx, s_idx;
-	int ip_idx, s_ip_idx;
+	int s_ip_idx;
 	struct net_device *dev;
 	struct in_device *in_dev;
-	struct in_ifaddr *ifa;
 	struct hlist_head *head;
+	int err = 0;
 
 	s_h = cb->args[0];
 	s_idx = idx = cb->args[1];
-	s_ip_idx = ip_idx = cb->args[2];
+	s_ip_idx = cb->args[2];
+
+	if (cb->strict_check) {
+		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
+						 skb->sk, cb);
+		if (err < 0)
+			goto put_tgt_net;
+
+		err = 0;
+		if (fillargs.ifindex) {
+			dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
+			if (!dev) {
+				err = -ENODEV;
+				goto put_tgt_net;
+			}
+
+			in_dev = __in_dev_get_rtnl(dev);
+			if (in_dev) {
+				err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
+						       &fillargs);
+			}
+			goto put_tgt_net;
+		}
+	}
 
 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
 		idx = 0;
-		head = &net->dev_index_head[h];
+		head = &tgt_net->dev_index_head[h];
 		rcu_read_lock();
-		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
-			  net->dev_base_seq;
+		cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
+			  tgt_net->dev_base_seq;
 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
 			if (idx < s_idx)
 				goto cont;
@@ -1675,18 +1859,11 @@
 			if (!in_dev)
 				goto cont;
 
-			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
-			     ifa = ifa->ifa_next, ip_idx++) {
-				if (ip_idx < s_ip_idx)
-					continue;
-				if (inet_fill_ifaddr(skb, ifa,
-					     NETLINK_CB(cb->skb).portid,
-					     cb->nlh->nlmsg_seq,
-					     RTM_NEWADDR, NLM_F_MULTI) < 0) {
-					rcu_read_unlock();
-					goto done;
-				}
-				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+			err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
+					       &fillargs);
+			if (err < 0) {
+				rcu_read_unlock();
+				goto done;
 			}
 cont:
 			idx++;
@@ -1697,16 +1874,24 @@
 done:
 	cb->args[0] = h;
 	cb->args[1] = idx;
-	cb->args[2] = ip_idx;
+put_tgt_net:
+	if (fillargs.netnsid >= 0)
+		put_net(tgt_net);
 
-	return skb->len;
+	return skb->len ? : err;
 }
 
 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 		      u32 portid)
 {
+	struct inet_fill_args fillargs = {
+		.portid = portid,
+		.seq = nlh ? nlh->nlmsg_seq : 0,
+		.event = event,
+		.flags = 0,
+		.netnsid = -1,
+	};
 	struct sk_buff *skb;
-	u32 seq = nlh ? nlh->nlmsg_seq : 0;
 	int err = -ENOBUFS;
 	struct net *net;
 
@@ -1715,7 +1900,7 @@
 	if (!skb)
 		goto errout;
 
-	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
+	err = inet_fill_ifaddr(skb, ifa, &fillargs);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
 		WARN_ON(err == -EMSGSIZE);
@@ -1773,7 +1958,8 @@
 	if (dev && !__in_dev_get_rcu(dev))
 		return -EAFNOSUPPORT;
 
-	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
+					  inet_af_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -1801,7 +1987,7 @@
 	if (!in_dev)
 		return -EAFNOSUPPORT;
 
-	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
+	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
 		BUG();
 
 	if (tb[IFLA_INET_CONF]) {
@@ -1930,13 +2116,51 @@
 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
 };
 
+static int inet_netconf_valid_get_req(struct sk_buff *skb,
+				      const struct nlmsghdr *nlh,
+				      struct nlattr **tb,
+				      struct netlink_ext_ack *extack)
+{
+	int i, err;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
+		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
+		return -EINVAL;
+	}
+
+	if (!netlink_strict_get_check(skb))
+		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
+					      tb, NETCONFA_MAX,
+					      devconf_ipv4_policy, extack);
+
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
+					    tb, NETCONFA_MAX,
+					    devconf_ipv4_policy, extack);
+	if (err)
+		return err;
+
+	for (i = 0; i <= NETCONFA_MAX; i++) {
+		if (!tb[i])
+			continue;
+
+		switch (i) {
+		case NETCONFA_IFINDEX:
+			break;
+		default:
+			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
 				    struct nlmsghdr *nlh,
 				    struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(in_skb->sk);
 	struct nlattr *tb[NETCONFA_MAX+1];
-	struct netconfmsg *ncm;
 	struct sk_buff *skb;
 	struct ipv4_devconf *devconf;
 	struct in_device *in_dev;
@@ -1944,9 +2168,8 @@
 	int ifindex;
 	int err;
 
-	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
-			  devconf_ipv4_policy, extack);
-	if (err < 0)
+	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
+	if (err)
 		goto errout;
 
 	err = -EINVAL;
@@ -1995,6 +2218,7 @@
 static int inet_netconf_dump_devconf(struct sk_buff *skb,
 				     struct netlink_callback *cb)
 {
+	const struct nlmsghdr *nlh = cb->nlh;
 	struct net *net = sock_net(skb->sk);
 	int h, s_h;
 	int idx, s_idx;
@@ -2002,6 +2226,21 @@
 	struct in_device *in_dev;
 	struct hlist_head *head;
 
+	if (cb->strict_check) {
+		struct netlink_ext_ack *extack = cb->extack;
+		struct netconfmsg *ncm;
+
+		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
+			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
+			return -EINVAL;
+		}
+
+		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
+			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
+			return -EINVAL;
+		}
+	}
+
 	s_h = cb->args[0];
 	s_idx = idx = cb->args[1];
 
@@ -2021,7 +2260,7 @@
 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
 						      &in_dev->cnf,
 						      NETLINK_CB(cb->skb).portid,
-						      cb->nlh->nlmsg_seq,
+						      nlh->nlmsg_seq,
 						      RTM_NEWNETCONF,
 						      NLM_F_MULTI,
 						      NETCONFA_ALL) < 0) {
@@ -2038,7 +2277,7 @@
 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
 					      net->ipv4.devconf_all,
 					      NETLINK_CB(cb->skb).portid,
-					      cb->nlh->nlmsg_seq,
+					      nlh->nlmsg_seq,
 					      RTM_NEWNETCONF, NLM_F_MULTI,
 					      NETCONFA_ALL) < 0)
 			goto done;
@@ -2049,7 +2288,7 @@
 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
 					      net->ipv4.devconf_dflt,
 					      NETLINK_CB(cb->skb).portid,
-					      cb->nlh->nlmsg_seq,
+					      nlh->nlmsg_seq,
 					      RTM_NEWNETCONF, NLM_F_MULTI,
 					      NETCONFA_ALL) < 0)
 			goto done;
@@ -2407,32 +2646,34 @@
 	int err;
 	struct ipv4_devconf *all, *dflt;
 #ifdef CONFIG_SYSCTL
-	struct ctl_table *tbl = ctl_forward_entry;
+	struct ctl_table *tbl;
 	struct ctl_table_header *forw_hdr;
 #endif
 
 	err = -ENOMEM;
-	all = &ipv4_devconf;
-	dflt = &ipv4_devconf_dflt;
+	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
+	if (!all)
+		goto err_alloc_all;
 
-	if (!net_eq(net, &init_net)) {
-		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
-		if (!all)
-			goto err_alloc_all;
-
-		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
-		if (!dflt)
-			goto err_alloc_dflt;
+	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
+	if (!dflt)
+		goto err_alloc_dflt;
 
 #ifdef CONFIG_SYSCTL
-		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
-		if (!tbl)
-			goto err_alloc_ctl;
+	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
+	if (!tbl)
+		goto err_alloc_ctl;
 
-		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
-		tbl[0].extra1 = all;
-		tbl[0].extra2 = net;
+	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
+	tbl[0].extra1 = all;
+	tbl[0].extra2 = net;
 #endif
+
+	if ((!IS_ENABLED(CONFIG_SYSCTL) ||
+	     sysctl_devconf_inherit_init_net != 2) &&
+	    !net_eq(net, &init_net)) {
+		memcpy(all, init_net.ipv4.devconf_all, sizeof(ipv4_devconf));
+		memcpy(dflt, init_net.ipv4.devconf_dflt, sizeof(ipv4_devconf_dflt));
 	}
 
 #ifdef CONFIG_SYSCTL
@@ -2462,15 +2703,12 @@
 err_reg_dflt:
 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
 err_reg_all:
-	if (tbl != ctl_forward_entry)
-		kfree(tbl);
+	kfree(tbl);
 err_alloc_ctl:
 #endif
-	if (dflt != &ipv4_devconf_dflt)
-		kfree(dflt);
+	kfree(dflt);
 err_alloc_dflt:
-	if (all != &ipv4_devconf)
-		kfree(all);
+	kfree(all);
 err_alloc_all:
 	return err;
 }

diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 9768901..5c96776 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #define pr_fmt(fmt) "IPsec: " fmt
 
 #include <crypto/aead.h>
@@ -32,8 +33,6 @@
 
 #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0]))
 
-static u32 esp4_get_mtu(struct xfrm_state *x, int mtu);
-
 /*
  * Allocate an AEAD request structure with extra space for SG and IV.
  *
@@ -125,10 +124,13 @@
 	void *tmp;
 	struct xfrm_state *x;
 
-	if (xo && (xo->flags & XFRM_DEV_RESUME))
-		x = skb->sp->xvec[skb->sp->len - 1];
-	else
+	if (xo && (xo->flags & XFRM_DEV_RESUME)) {
+		struct sec_path *sp = skb_sec_path(skb);
+
+		x = sp->xvec[sp->len - 1];
+	} else {
 		x = skb_dst(skb)->xfrm;
+	}
 
 	tmp = ESP_SKB_CB(skb)->tmp;
 	esp_ssg_unref(x, tmp);
@@ -223,7 +225,7 @@
 	tail[plen - 1] = proto;
 }
 
-static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
+static int esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
 {
 	int encap_type;
 	struct udphdr *uh;
@@ -231,6 +233,7 @@
 	__be16 sport, dport;
 	struct xfrm_encap_tmpl *encap = x->encap;
 	struct ip_esp_hdr *esph = esp->esph;
+	unsigned int len;
 
 	spin_lock_bh(&x->lock);
 	sport = encap->encap_sport;
@@ -238,11 +241,14 @@
 	encap_type = encap->encap_type;
 	spin_unlock_bh(&x->lock);
 
+	len = skb->len + esp->tailen - skb_transport_offset(skb);
+	if (len + sizeof(struct iphdr) >= IP_MAX_MTU)
+		return -EMSGSIZE;
+
 	uh = (struct udphdr *)esph;
 	uh->source = sport;
 	uh->dest = dport;
-	uh->len = htons(skb->len + esp->tailen
-		  - skb_transport_offset(skb));
+	uh->len = htons(len);
 	uh->check = 0;
 
 	switch (encap_type) {
@@ -259,6 +265,8 @@
 
 	*skb_mac_header(skb) = IPPROTO_UDP;
 	esp->esph = esph;
+
+	return 0;
 }
 
 int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
@@ -272,8 +280,12 @@
 	int tailen = esp->tailen;
 
 	/* this is non-NULL only with UDP Encapsulation */
-	if (x->encap)
-		esp_output_udp_encap(x, skb, esp);
+	if (x->encap) {
+		int err = esp_output_udp_encap(x, skb, esp);
+
+		if (err < 0)
+			return err;
+	}
 
 	if (!skb_cloned(skb)) {
 		if (tailen <= skb_tailroom(skb)) {
@@ -325,7 +337,7 @@
 			skb->len += tailen;
 			skb->data_len += tailen;
 			skb->truesize += tailen;
-			if (sk)
+			if (sk && sk_fullsock(sk))
 				refcount_add(tailen, &sk->sk_wmem_alloc);
 
 			goto out;
@@ -492,7 +504,7 @@
 		struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
 		u32 padto;
 
-		padto = min(x->tfcpad, esp4_get_mtu(x, dst->child_mtu_cached));
+		padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached));
 		if (skb->len < padto)
 			esp.tfclen = padto - skb->len;
 	}
@@ -683,12 +695,11 @@
  */
 static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct ip_esp_hdr *esph;
 	struct crypto_aead *aead = x->data;
 	struct aead_request *req;
 	struct sk_buff *trailer;
 	int ivlen = crypto_aead_ivsize(aead);
-	int elen = skb->len - sizeof(*esph) - ivlen;
+	int elen = skb->len - sizeof(struct ip_esp_hdr) - ivlen;
 	int nfrags;
 	int assoclen;
 	int seqhilen;
@@ -698,13 +709,13 @@
 	struct scatterlist *sg;
 	int err = -EINVAL;
 
-	if (!pskb_may_pull(skb, sizeof(*esph) + ivlen))
+	if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + ivlen))
 		goto out;
 
 	if (elen <= 0)
 		goto out;
 
-	assoclen = sizeof(*esph);
+	assoclen = sizeof(struct ip_esp_hdr);
 	seqhilen = 0;
 
 	if (x->props.flags & XFRM_STATE_ESN) {
@@ -775,28 +786,6 @@
 	return err;
 }
 
-static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
-{
-	struct crypto_aead *aead = x->data;
-	u32 blksize = ALIGN(crypto_aead_blocksize(aead), 4);
-	unsigned int net_adj;
-
-	switch (x->props.mode) {
-	case XFRM_MODE_TRANSPORT:
-	case XFRM_MODE_BEET:
-		net_adj = sizeof(struct iphdr);
-		break;
-	case XFRM_MODE_TUNNEL:
-		net_adj = 0;
-		break;
-	default:
-		BUG();
-	}
-
-	return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
-		 net_adj) & ~(blksize - 1)) + net_adj - 2;
-}
-
 static int esp4_err(struct sk_buff *skb, u32 info)
 {
 	struct net *net = dev_net(skb->dev);
@@ -820,9 +809,9 @@
 		return 0;
 
 	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
-		ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0);
+		ipv4_update_pmtu(skb, net, info, 0, IPPROTO_ESP);
 	else
-		ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0);
+		ipv4_redirect(skb, net, 0, IPPROTO_ESP);
 	xfrm_state_put(x);
 
 	return 0;
@@ -1022,7 +1011,6 @@
 	.flags		= XFRM_TYPE_REPLAY_PROT,
 	.init_state	= esp_init_state,
 	.destructor	= esp_destroy,
-	.get_mtu	= esp4_get_mtu,
 	.input		= esp_input,
 	.output		= esp_output,
 };
@@ -1053,8 +1041,7 @@
 {
 	if (xfrm4_protocol_deregister(&esp4_protocol, IPPROTO_ESP) < 0)
 		pr_info("%s: can't remove protocol\n", __func__);
-	if (xfrm_unregister_type(&esp_type, AF_INET) < 0)
-		pr_info("%s: can't remove xfrm type\n", __func__);
+	xfrm_unregister_type(&esp_type, AF_INET);
 }
 
 module_init(esp4_init);

diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 58834a1..0e4a7cf 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * IPV4 GSO/GRO offload support
  * Linux INET implementation
@@ -5,10 +6,6 @@
  * Copyright (C) 2016 secunet Security Networks AG
  * Author: Steffen Klassert <steffen.klassert@secunet.com>
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
  * ESP GRO support
  */
 
@@ -46,26 +43,27 @@
 
 	xo = xfrm_offload(skb);
 	if (!xo || !(xo->flags & CRYPTO_DONE)) {
-		err = secpath_set(skb);
-		if (err)
+		struct sec_path *sp = secpath_set(skb);
+
+		if (!sp)
 			goto out;
 
-		if (skb->sp->len == XFRM_MAX_DEPTH)
-			goto out;
+		if (sp->len == XFRM_MAX_DEPTH)
+			goto out_reset;
 
 		x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
 				      (xfrm_address_t *)&ip_hdr(skb)->daddr,
 				      spi, IPPROTO_ESP, AF_INET);
 		if (!x)
-			goto out;
+			goto out_reset;
 
-		skb->sp->xvec[skb->sp->len++] = x;
-		skb->sp->olen++;
+		sp->xvec[sp->len++] = x;
+		sp->olen++;
 
 		xo = xfrm_offload(skb);
 		if (!xo) {
 			xfrm_state_put(x);
-			goto out;
+			goto out_reset;
 		}
 	}
 
@@ -81,6 +79,8 @@
 	xfrm_input(skb, IPPROTO_ESP, spi, -2);
 
 	return ERR_PTR(-EINPROGRESS);
+out_reset:
+	secpath_reset(skb);
 out:
 	skb_push(skb, offset);
 	NAPI_GRO_CB(skb)->same_flow = 0;
@@ -106,6 +106,44 @@
 	xo->proto = proto;
 }
 
+static struct sk_buff *xfrm4_tunnel_gso_segment(struct xfrm_state *x,
+						struct sk_buff *skb,
+						netdev_features_t features)
+{
+	__skb_push(skb, skb->mac_len);
+	return skb_mac_gso_segment(skb, features);
+}
+
+static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x,
+						   struct sk_buff *skb,
+						   netdev_features_t features)
+{
+	const struct net_offload *ops;
+	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	struct xfrm_offload *xo = xfrm_offload(skb);
+
+	skb->transport_header += x->props.header_len;
+	ops = rcu_dereference(inet_offloads[xo->proto]);
+	if (likely(ops && ops->callbacks.gso_segment))
+		segs = ops->callbacks.gso_segment(skb, features);
+
+	return segs;
+}
+
+static struct sk_buff *xfrm4_outer_mode_gso_segment(struct xfrm_state *x,
+						    struct sk_buff *skb,
+						    netdev_features_t features)
+{
+	switch (x->outer_mode.encap) {
+	case XFRM_MODE_TUNNEL:
+		return xfrm4_tunnel_gso_segment(x, skb, features);
+	case XFRM_MODE_TRANSPORT:
+		return xfrm4_transport_gso_segment(x, skb, features);
+	}
+
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
 static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
 				        netdev_features_t features)
 {
@@ -114,6 +152,7 @@
 	struct crypto_aead *aead;
 	netdev_features_t esp_features = features;
 	struct xfrm_offload *xo = xfrm_offload(skb);
+	struct sec_path *sp;
 
 	if (!xo)
 		return ERR_PTR(-EINVAL);
@@ -121,7 +160,8 @@
 	if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP))
 		return ERR_PTR(-EINVAL);
 
-	x = skb->sp->xvec[skb->sp->len - 1];
+	sp = skb_sec_path(skb);
+	x = sp->xvec[sp->len - 1];
 	aead = x->data;
 	esph = ip_esp_hdr(skb);
 
@@ -135,14 +175,16 @@
 
 	skb->encap_hdr_csum = 1;
 
-	if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev)
+	if ((!(skb->dev->gso_partial_features & NETIF_F_HW_ESP) &&
+	     !(features & NETIF_F_HW_ESP)) || x->xso.dev != skb->dev)
 		esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
-	else if (!(features & NETIF_F_HW_ESP_TX_CSUM))
+	else if (!(features & NETIF_F_HW_ESP_TX_CSUM) &&
+		 !(skb->dev->gso_partial_features & NETIF_F_HW_ESP_TX_CSUM))
 		esp_features = features & ~NETIF_F_CSUM_MASK;
 
 	xo->flags |= XFRM_GSO_SEGMENT;
 
-	return x->outer_mode->gso_segment(x, skb, esp_features);
+	return xfrm4_outer_mode_gso_segment(x, skb, esp_features);
 }
 
 static int esp_input_tail(struct xfrm_state *x, struct sk_buff *skb)
@@ -178,7 +220,9 @@
 	if (!xo)
 		return -EINVAL;
 
-	if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev) {
+	if ((!(features & NETIF_F_HW_ESP) &&
+	     !(skb->dev->gso_partial_features & NETIF_F_HW_ESP)) ||
+	    x->xso.dev != skb->dev) {
 		xo->flags |= CRYPTO_FALLBACK;
 		hw_offload = false;
 	}
@@ -268,9 +312,7 @@
 
 static void __exit esp4_offload_exit(void)
 {
-	if (xfrm_unregister_type_offload(&esp_type_offload, AF_INET) < 0)
-		pr_info("%s: can't remove xfrm type offload\n", __func__);
-
+	xfrm_unregister_type_offload(&esp_type_offload, AF_INET);
 	inet_del_offload(&esp4_offload, IPPROTO_ESP);
 }
 

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 0113993..71c78d2 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -6,11 +7,6 @@
  *		IPv4 Forwarding Information Base: FIB frontend.
  *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -43,6 +39,7 @@
 #include <net/sock.h>
 #include <net/arp.h>
 #include <net/ip_fib.h>
+#include <net/nexthop.h>
 #include <net/rtnetlink.h>
 #include <net/xfrm.h>
 #include <net/l3mdev.h>
@@ -127,7 +124,8 @@
 	h = id & (FIB_TABLE_HASHSZ - 1);
 
 	head = &net->ipv4.fib_table_hash[h];
-	hlist_for_each_entry_rcu(tb, head, tb_hlist) {
+	hlist_for_each_entry_rcu(tb, head, tb_hlist,
+				 lockdep_rtnl_is_held()) {
 		if (tb->tb_id == id)
 			return tb;
 	}
@@ -192,7 +190,7 @@
 	return 0;
 }
 
-static void fib_flush(struct net *net)
+void fib_flush(struct net *net)
 {
 	int flushed = 0;
 	unsigned int h;
@@ -203,7 +201,7 @@
 		struct fib_table *tb;
 
 		hlist_for_each_entry_safe(tb, tmp, head, tb_hlist)
-			flushed += fib_table_flush(net, tb);
+			flushed += fib_table_flush(net, tb, false);
 	}
 
 	if (flushed)
@@ -234,7 +232,9 @@
 	if (table) {
 		ret = RTN_UNICAST;
 		if (!fib_table_lookup(table, &fl4, &res, FIB_LOOKUP_NOREF)) {
-			if (!dev || dev == res.fi->fib_dev)
+			struct fib_nh_common *nhc = fib_info_nhc(res.fi, 0);
+
+			if (!dev || dev == nhc->nhc_dev)
 				ret = res.type;
 		}
 	}
@@ -307,7 +307,7 @@
 			.flowi4_mark = vmark ? skb->mark : 0,
 		};
 		if (!fib_lookup(net, &fl4, &res, 0))
-			return FIB_RES_PREFSRC(net, res);
+			return fib_result_prefsrc(net, &res);
 	} else {
 		scope = RT_SCOPE_LINK;
 	}
@@ -315,6 +315,32 @@
 	return inet_select_addr(dev, ip_hdr(skb)->saddr, scope);
 }
 
+bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev)
+{
+	bool dev_match = false;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	int ret;
+
+	for (ret = 0; ret < fib_info_num_path(fi); ret++) {
+		const struct fib_nh_common *nhc = fib_info_nhc(fi, ret);
+
+		if (nhc->nhc_dev == dev) {
+			dev_match = true;
+			break;
+		} else if (l3mdev_master_ifindex_rcu(nhc->nhc_dev) == dev->ifindex) {
+			dev_match = true;
+			break;
+		}
+	}
+#else
+	if (fib_info_nhc(fi, 0)->nhc_dev == dev)
+		dev_match = true;
+#endif
+
+	return dev_match;
+}
+EXPORT_SYMBOL_GPL(fib_info_nh_uses_dev);
+
 /* Given (packet source, input interface) and optional (dst, oif, tos):
  * - (main) check, that source is valid i.e. not broadcast or our local
  *   address.
@@ -361,26 +387,15 @@
 	    (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
 		goto e_inval;
 	fib_combine_itag(itag, &res);
-	dev_match = false;
 
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-	for (ret = 0; ret < res.fi->fib_nhs; ret++) {
-		struct fib_nh *nh = &res.fi->fib_nh[ret];
-
-		if (nh->nh_dev == dev) {
-			dev_match = true;
-			break;
-		} else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) {
-			dev_match = true;
-			break;
-		}
-	}
-#else
-	if (FIB_RES_DEV(res) == dev)
-		dev_match = true;
-#endif
+	dev_match = fib_info_nh_uses_dev(res.fi, dev);
+	/* This is not common, loopback packets retain skb_dst so normally they
+	 * would not even hit this slow path.
+	 */
+	dev_match = dev_match || (res.type == RTN_LOCAL &&
+				  dev == net->loopback_dev);
 	if (dev_match) {
-		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
+		ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST;
 		return ret;
 	}
 	if (no_addr)
@@ -392,7 +407,7 @@
 	ret = 0;
 	if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) {
 		if (res.type == RTN_UNICAST)
-			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
+			ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST;
 	}
 	return ret;
 
@@ -530,14 +545,22 @@
 		cfg->fc_oif = dev->ifindex;
 		cfg->fc_table = l3mdev_fib_table(dev);
 		if (colon) {
-			struct in_ifaddr *ifa;
-			struct in_device *in_dev = __in_dev_get_rtnl(dev);
+			const struct in_ifaddr *ifa;
+			struct in_device *in_dev;
+
+			in_dev = __in_dev_get_rtnl(dev);
 			if (!in_dev)
 				return -ENODEV;
+
 			*colon = ':';
-			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
+
+			rcu_read_lock();
+			in_dev_for_each_ifa_rcu(ifa, in_dev) {
 				if (strcmp(ifa->ifa_label, devname) == 0)
 					break;
+			}
+			rcu_read_unlock();
+
 			if (!ifa)
 				return -ENODEV;
 			cfg->fc_prefsrc = ifa->ifa_local;
@@ -548,7 +571,8 @@
 	if (rt->rt_gateway.sa_family == AF_INET && addr) {
 		unsigned int addr_type;
 
-		cfg->fc_gw = addr;
+		cfg->fc_gw4 = addr;
+		cfg->fc_gw_family = AF_INET;
 		addr_type = inet_addr_type_table(net, addr, cfg->fc_table);
 		if (rt->rt_flags & RTF_GATEWAY &&
 		    addr_type == RTN_UNICAST)
@@ -558,7 +582,7 @@
 	if (cmd == SIOCDELRT)
 		return 0;
 
-	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
+	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw_family)
 		return -EINVAL;
 
 	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
@@ -634,6 +658,7 @@
 }
 
 const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
+	[RTA_UNSPEC]		= { .strict_start_type = RTA_DPORT + 1 },
 	[RTA_DST]		= { .type = NLA_U32 },
 	[RTA_SRC]		= { .type = NLA_U32 },
 	[RTA_IIF]		= { .type = NLA_U32 },
@@ -652,18 +677,64 @@
 	[RTA_IP_PROTO]		= { .type = NLA_U8 },
 	[RTA_SPORT]		= { .type = NLA_U16 },
 	[RTA_DPORT]		= { .type = NLA_U16 },
+	[RTA_NH_ID]		= { .type = NLA_U32 },
 };
 
+int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla,
+		    struct netlink_ext_ack *extack)
+{
+	struct rtvia *via;
+	int alen;
+
+	if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) {
+		NL_SET_ERR_MSG(extack, "Invalid attribute length for RTA_VIA");
+		return -EINVAL;
+	}
+
+	via = nla_data(nla);
+	alen = nla_len(nla) - offsetof(struct rtvia, rtvia_addr);
+
+	switch (via->rtvia_family) {
+	case AF_INET:
+		if (alen != sizeof(__be32)) {
+			NL_SET_ERR_MSG(extack, "Invalid IPv4 address in RTA_VIA");
+			return -EINVAL;
+		}
+		cfg->fc_gw_family = AF_INET;
+		cfg->fc_gw4 = *((__be32 *)via->rtvia_addr);
+		break;
+	case AF_INET6:
+#ifdef CONFIG_IPV6
+		if (alen != sizeof(struct in6_addr)) {
+			NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_VIA");
+			return -EINVAL;
+		}
+		cfg->fc_gw_family = AF_INET6;
+		cfg->fc_gw6 = *((struct in6_addr *)via->rtvia_addr);
+#else
+		NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel");
+		return -EINVAL;
+#endif
+		break;
+	default:
+		NL_SET_ERR_MSG(extack, "Unsupported address family in RTA_VIA");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
 			     struct nlmsghdr *nlh, struct fib_config *cfg,
 			     struct netlink_ext_ack *extack)
 {
+	bool has_gw = false, has_via = false;
 	struct nlattr *attr;
 	int err, remaining;
 	struct rtmsg *rtm;
 
-	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy,
-			     extack);
+	err = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX,
+					rtm_ipv4_policy, extack);
 	if (err < 0)
 		goto errout;
 
@@ -698,7 +769,16 @@
 			cfg->fc_oif = nla_get_u32(attr);
 			break;
 		case RTA_GATEWAY:
-			cfg->fc_gw = nla_get_be32(attr);
+			has_gw = true;
+			cfg->fc_gw4 = nla_get_be32(attr);
+			if (cfg->fc_gw4)
+				cfg->fc_gw_family = AF_INET;
+			break;
+		case RTA_VIA:
+			has_via = true;
+			err = fib_gw_from_via(cfg, attr, extack);
+			if (err)
+				goto errout;
 			break;
 		case RTA_PRIORITY:
 			cfg->fc_priority = nla_get_u32(attr);
@@ -735,9 +815,27 @@
 			if (err < 0)
 				goto errout;
 			break;
+		case RTA_NH_ID:
+			cfg->fc_nh_id = nla_get_u32(attr);
+			break;
 		}
 	}
 
+	if (cfg->fc_nh_id) {
+		if (cfg->fc_oif || cfg->fc_gw_family ||
+		    cfg->fc_encap || cfg->fc_mp) {
+			NL_SET_ERR_MSG(extack,
+				       "Nexthop specification and nexthop id are mutually exclusive");
+			return -EINVAL;
+		}
+	}
+
+	if (has_gw && has_via) {
+		NL_SET_ERR_MSG(extack,
+			       "Nexthop configuration can not contain both GATEWAY and VIA");
+		goto errout;
+	}
+
 	return 0;
 errout:
 	return err;
@@ -755,6 +853,12 @@
 	if (err < 0)
 		goto errout;
 
+	if (cfg.fc_nh_id && !nexthop_find_by_id(net, cfg.fc_nh_id)) {
+		NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+		err = -EINVAL;
+		goto errout;
+	}
+
 	tb = fib_get_table(net, cfg.fc_table);
 	if (!tb) {
 		NL_SET_ERR_MSG(extack, "FIB table does not exist");
@@ -792,8 +896,86 @@
 	return err;
 }
 
+int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
+			  struct fib_dump_filter *filter,
+			  struct netlink_callback *cb)
+{
+	struct netlink_ext_ack *extack = cb->extack;
+	struct nlattr *tb[RTA_MAX + 1];
+	struct rtmsg *rtm;
+	int err, i;
+
+	ASSERT_RTNL();
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
+		NL_SET_ERR_MSG(extack, "Invalid header for FIB dump request");
+		return -EINVAL;
+	}
+
+	rtm = nlmsg_data(nlh);
+	if (rtm->rtm_dst_len || rtm->rtm_src_len  || rtm->rtm_tos   ||
+	    rtm->rtm_scope) {
+		NL_SET_ERR_MSG(extack, "Invalid values in header for FIB dump request");
+		return -EINVAL;
+	}
+
+	if (rtm->rtm_flags & ~(RTM_F_CLONED | RTM_F_PREFIX)) {
+		NL_SET_ERR_MSG(extack, "Invalid flags for FIB dump request");
+		return -EINVAL;
+	}
+	if (rtm->rtm_flags & RTM_F_CLONED)
+		filter->dump_routes = false;
+	else
+		filter->dump_exceptions = false;
+
+	filter->dump_all_families = (rtm->rtm_family == AF_UNSPEC);
+	filter->flags    = rtm->rtm_flags;
+	filter->protocol = rtm->rtm_protocol;
+	filter->rt_type  = rtm->rtm_type;
+	filter->table_id = rtm->rtm_table;
+
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
+					    rtm_ipv4_policy, extack);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i <= RTA_MAX; ++i) {
+		int ifindex;
+
+		if (!tb[i])
+			continue;
+
+		switch (i) {
+		case RTA_TABLE:
+			filter->table_id = nla_get_u32(tb[i]);
+			break;
+		case RTA_OIF:
+			ifindex = nla_get_u32(tb[i]);
+			filter->dev = __dev_get_by_index(net, ifindex);
+			if (!filter->dev)
+				return -ENODEV;
+			break;
+		default:
+			NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request");
+			return -EINVAL;
+		}
+	}
+
+	if (filter->flags || filter->protocol || filter->rt_type ||
+	    filter->table_id || filter->dev) {
+		filter->filter_set = 1;
+		cb->answer_flags = NLM_F_DUMP_FILTERED;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ip_valid_fib_dump_req);
+
 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct fib_dump_filter filter = { .dump_routes = true,
+					  .dump_exceptions = true };
+	const struct nlmsghdr *nlh = cb->nlh;
 	struct net *net = sock_net(skb->sk);
 	unsigned int h, s_h;
 	unsigned int e = 0, s_e;
@@ -801,10 +983,34 @@
 	struct hlist_head *head;
 	int dumped = 0, err;
 
-	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
-	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
+	if (cb->strict_check) {
+		err = ip_valid_fib_dump_req(net, nlh, &filter, cb);
+		if (err < 0)
+			return err;
+	} else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
+		struct rtmsg *rtm = nlmsg_data(nlh);
+
+		filter.flags = rtm->rtm_flags & (RTM_F_PREFIX | RTM_F_CLONED);
+	}
+
+	/* ipv4 does not use prefix flag */
+	if (filter.flags & RTM_F_PREFIX)
 		return skb->len;
 
+	if (filter.table_id) {
+		tb = fib_get_table(net, filter.table_id);
+		if (!tb) {
+			if (filter.dump_all_families)
+				return skb->len;
+
+			NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist");
+			return -ENOENT;
+		}
+
+		err = fib_table_dump(tb, skb, cb, &filter);
+		return skb->len ? : err;
+	}
+
 	s_h = cb->args[0];
 	s_e = cb->args[1];
 
@@ -819,7 +1025,7 @@
 			if (dumped)
 				memset(&cb->args[2], 0, sizeof(cb->args) -
 						 2 * sizeof(cb->args[0]));
-			err = fib_table_dump(tb, skb, cb);
+			err = fib_table_dump(tb, skb, cb, &filter);
 			if (err < 0) {
 				if (likely(skb->len))
 					goto out;
@@ -942,7 +1148,7 @@
 	if (!(dev->flags & IFF_UP) ||
 	    ifa->ifa_flags & (IFA_F_SECONDARY | IFA_F_NOPREFIXROUTE) ||
 	    ipv4_is_zeronet(prefix) ||
-	    prefix == ifa->ifa_local || ifa->ifa_prefixlen == 32)
+	    (prefix == ifa->ifa_local && ifa->ifa_prefixlen == 32))
 		return;
 
 	/* add the new */
@@ -1009,8 +1215,8 @@
 	 *
 	 * Scan address list to be sure that addresses are really gone.
 	 */
-
-	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
+	rcu_read_lock();
+	in_dev_for_each_ifa_rcu(ifa1, in_dev) {
 		if (ifa1 == ifa) {
 			/* promotion, keep the IP */
 			gone = 0;
@@ -1078,6 +1284,7 @@
 			}
 		}
 	}
+	rcu_read_unlock();
 
 no_promotions:
 	if (!(ok & BRD_OK))
@@ -1247,6 +1454,7 @@
 	struct netdev_notifier_info_ext *info_ext = ptr;
 	struct in_device *in_dev;
 	struct net *net = dev_net(dev);
+	struct in_ifaddr *ifa;
 	unsigned int flags;
 
 	if (event == NETDEV_UNREGISTER) {
@@ -1261,9 +1469,9 @@
 
 	switch (event) {
 	case NETDEV_UP:
-		for_ifa(in_dev) {
+		in_dev_for_each_ifa_rtnl(ifa, in_dev) {
 			fib_add_ifaddr(ifa);
-		} endfor_ifa(in_dev);
+		}
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 		fib_sync_up(dev, RTNH_F_DEAD);
 #endif
@@ -1357,7 +1565,7 @@
 
 		hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) {
 			hlist_del(&tb->tb_hlist);
-			fib_table_flush(net, tb);
+			fib_table_flush(net, tb, true);
 			fib_free_table(tb);
 		}
 	}

diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index e6ff282..a68b5e2 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h

@@ -5,6 +5,7 @@
 #include <linux/types.h>
 #include <linux/list.h>
 #include <net/ip_fib.h>
+#include <net/nexthop.h>
 
 struct fib_alias {
 	struct hlist_node	fa_list;
@@ -45,6 +46,7 @@
 {
 	/* we used to play games with refcounts, but we now use RCU */
 	res->fi = fi;
+	res->nhc = fib_info_nhc(fi, 0);
 }
 
 struct fib_prop {

diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index f8eb78d..b43a7ba 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -8,11 +9,6 @@
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  *		Thomas Graf <tgraf@suug.ch>
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Fixes:
  *		Rani Assaf	:	local_rule cannot be deleted
  *		Marc Boucher	:	routing by fwmark
@@ -31,6 +27,7 @@
 #include <net/route.h>
 #include <net/tcp.h>
 #include <net/ip_fib.h>
+#include <net/nexthop.h>
 #include <net/fib_rules.h>
 
 struct fib4_rule {
@@ -145,8 +142,11 @@
 	struct fib_result *result = (struct fib_result *) arg->result;
 	struct net_device *dev = NULL;
 
-	if (result->fi)
-		dev = result->fi->fib_dev;
+	if (result->fi) {
+		struct fib_nh_common *nhc = fib_info_nhc(result->fi, 0);
+
+		dev = nhc->nhc_dev;
+	}
 
 	/* do not accept result if the route does
 	 * not meet the required prefix length
@@ -198,11 +198,15 @@
 
 static struct fib_table *fib_empty_table(struct net *net)
 {
-	u32 id;
+	u32 id = 1;
 
-	for (id = 1; id <= RT_TABLE_MAX; id++)
+	while (1) {
 		if (!fib_get_table(net, id))
 			return fib_new_table(net, id);
+
+		if (id++ == RT_TABLE_MAX)
+			break;
+	}
 	return NULL;
 }
 

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 446204c..f1888c6 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -6,11 +7,6 @@
  *		IPv4 Forwarding Information Base: semantics.
  *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  */
 
 #include <linux/uaccess.h>
@@ -41,10 +37,13 @@
 #include <net/tcp.h>
 #include <net/sock.h>
 #include <net/ip_fib.h>
-#include <net/netlink.h>
+#include <net/ip6_fib.h>
 #include <net/nexthop.h>
+#include <net/netlink.h>
+#include <net/rtnh.h>
 #include <net/lwtunnel.h>
 #include <net/fib_notifier.h>
+#include <net/addrconf.h>
 
 #include "fib_lookup.h"
 
@@ -58,18 +57,21 @@
 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
 
+/* for_nexthops and change_nexthops only used when nexthop object
+ * is not set in a fib_info. The logic within can reference fib_nh.
+ */
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 
 #define for_nexthops(fi) {						\
 	int nhsel; const struct fib_nh *nh;				\
 	for (nhsel = 0, nh = (fi)->fib_nh;				\
-	     nhsel < (fi)->fib_nhs;					\
+	     nhsel < fib_info_num_path((fi));				\
 	     nh++, nhsel++)
 
 #define change_nexthops(fi) {						\
 	int nhsel; struct fib_nh *nexthop_nh;				\
 	for (nhsel = 0,	nexthop_nh = (struct fib_nh *)((fi)->fib_nh);	\
-	     nhsel < (fi)->fib_nhs;					\
+	     nhsel < fib_info_num_path((fi));				\
 	     nexthop_nh++, nhsel++)
 
 #else /* CONFIG_IP_ROUTE_MULTIPATH */
@@ -157,12 +159,12 @@
 	dst_release_immediate(&rt->dst);
 }
 
-static void free_nh_exceptions(struct fib_nh *nh)
+static void free_nh_exceptions(struct fib_nh_common *nhc)
 {
 	struct fnhe_hash_bucket *hash;
 	int i;
 
-	hash = rcu_dereference_protected(nh->nh_exceptions, 1);
+	hash = rcu_dereference_protected(nhc->nhc_exceptions, 1);
 	if (!hash)
 		return;
 	for (i = 0; i < FNHE_HASH_SIZE; i++) {
@@ -204,24 +206,42 @@
 	free_percpu(rtp);
 }
 
+void fib_nh_common_release(struct fib_nh_common *nhc)
+{
+	if (nhc->nhc_dev)
+		dev_put(nhc->nhc_dev);
+
+	lwtstate_put(nhc->nhc_lwtstate);
+	rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
+	rt_fibinfo_free(&nhc->nhc_rth_input);
+	free_nh_exceptions(nhc);
+}
+EXPORT_SYMBOL_GPL(fib_nh_common_release);
+
+void fib_nh_release(struct net *net, struct fib_nh *fib_nh)
+{
+#ifdef CONFIG_IP_ROUTE_CLASSID
+	if (fib_nh->nh_tclassid)
+		net->ipv4.fib_num_tclassid_users--;
+#endif
+	fib_nh_common_release(&fib_nh->nh_common);
+}
+
 /* Release a nexthop info record */
 static void free_fib_info_rcu(struct rcu_head *head)
 {
 	struct fib_info *fi = container_of(head, struct fib_info, rcu);
-	struct dst_metrics *m;
 
-	change_nexthops(fi) {
-		if (nexthop_nh->nh_dev)
-			dev_put(nexthop_nh->nh_dev);
-		lwtstate_put(nexthop_nh->nh_lwtstate);
-		free_nh_exceptions(nexthop_nh);
-		rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output);
-		rt_fibinfo_free(&nexthop_nh->nh_rth_input);
-	} endfor_nexthops(fi);
+	if (fi->nh) {
+		nexthop_put(fi->nh);
+	} else {
+		change_nexthops(fi) {
+			fib_nh_release(fi->fib_net, nexthop_nh);
+		} endfor_nexthops(fi);
+	}
 
-	m = fi->fib_metrics;
-	if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
-		kfree(m);
+	ip_fib_metrics_put(fi->fib_metrics);
+
 	kfree(fi);
 }
 
@@ -232,12 +252,7 @@
 		return;
 	}
 	fib_info_cnt--;
-#ifdef CONFIG_IP_ROUTE_CLASSID
-	change_nexthops(fi) {
-		if (nexthop_nh->nh_tclassid)
-			fi->fib_net->ipv4.fib_num_tclassid_users--;
-	} endfor_nexthops(fi);
-#endif
+
 	call_rcu(&fi->rcu, free_fib_info_rcu);
 }
 EXPORT_SYMBOL_GPL(free_fib_info);
@@ -249,35 +264,54 @@
 		hlist_del(&fi->fib_hash);
 		if (fi->fib_prefsrc)
 			hlist_del(&fi->fib_lhash);
-		change_nexthops(fi) {
-			if (!nexthop_nh->nh_dev)
-				continue;
-			hlist_del(&nexthop_nh->nh_hash);
-		} endfor_nexthops(fi)
+		if (fi->nh) {
+			list_del(&fi->nh_list);
+		} else {
+			change_nexthops(fi) {
+				if (!nexthop_nh->fib_nh_dev)
+					continue;
+				hlist_del(&nexthop_nh->nh_hash);
+			} endfor_nexthops(fi)
+		}
 		fi->fib_dead = 1;
 		fib_info_put(fi);
 	}
 	spin_unlock_bh(&fib_info_lock);
 }
 
-static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
+static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
 {
-	const struct fib_nh *onh = ofi->fib_nh;
+	const struct fib_nh *onh;
+
+	if (fi->nh || ofi->nh)
+		return nexthop_cmp(fi->nh, ofi->nh) ? 0 : -1;
+
+	if (ofi->fib_nhs == 0)
+		return 0;
 
 	for_nexthops(fi) {
-		if (nh->nh_oif != onh->nh_oif ||
-		    nh->nh_gw  != onh->nh_gw ||
-		    nh->nh_scope != onh->nh_scope ||
+		onh = fib_info_nh(ofi, nhsel);
+
+		if (nh->fib_nh_oif != onh->fib_nh_oif ||
+		    nh->fib_nh_gw_family != onh->fib_nh_gw_family ||
+		    nh->fib_nh_scope != onh->fib_nh_scope ||
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-		    nh->nh_weight != onh->nh_weight ||
+		    nh->fib_nh_weight != onh->fib_nh_weight ||
 #endif
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		    nh->nh_tclassid != onh->nh_tclassid ||
 #endif
-		    lwtunnel_cmp_encap(nh->nh_lwtstate, onh->nh_lwtstate) ||
-		    ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_COMPARE_MASK))
+		    lwtunnel_cmp_encap(nh->fib_nh_lws, onh->fib_nh_lws) ||
+		    ((nh->fib_nh_flags ^ onh->fib_nh_flags) & ~RTNH_COMPARE_MASK))
 			return -1;
-		onh++;
+
+		if (nh->fib_nh_gw_family == AF_INET &&
+		    nh->fib_nh_gw4 != onh->fib_nh_gw4)
+			return -1;
+
+		if (nh->fib_nh_gw_family == AF_INET6 &&
+		    ipv6_addr_cmp(&nh->fib_nh_gw6, &onh->fib_nh_gw6))
+			return -1;
 	} endfor_nexthops(fi);
 	return 0;
 }
@@ -291,22 +325,78 @@
 		(val >> (DEVINDEX_HASHBITS * 2))) & mask;
 }
 
-static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
+static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
+				      u32 prefsrc, u32 priority)
+{
+	unsigned int val = init_val;
+
+	val ^= (protocol << 8) | scope;
+	val ^= prefsrc;
+	val ^= priority;
+
+	return val;
+}
+
+static unsigned int fib_info_hashfn_result(unsigned int val)
 {
 	unsigned int mask = (fib_info_hash_size - 1);
-	unsigned int val = fi->fib_nhs;
-
-	val ^= (fi->fib_protocol << 8) | fi->fib_scope;
-	val ^= (__force u32)fi->fib_prefsrc;
-	val ^= fi->fib_priority;
-	for_nexthops(fi) {
-		val ^= fib_devindex_hashfn(nh->nh_oif);
-	} endfor_nexthops(fi)
 
 	return (val ^ (val >> 7) ^ (val >> 12)) & mask;
 }
 
-static struct fib_info *fib_find_info(const struct fib_info *nfi)
+static inline unsigned int fib_info_hashfn(struct fib_info *fi)
+{
+	unsigned int val;
+
+	val = fib_info_hashfn_1(fi->fib_nhs, fi->fib_protocol,
+				fi->fib_scope, (__force u32)fi->fib_prefsrc,
+				fi->fib_priority);
+
+	if (fi->nh) {
+		val ^= fib_devindex_hashfn(fi->nh->id);
+	} else {
+		for_nexthops(fi) {
+			val ^= fib_devindex_hashfn(nh->fib_nh_oif);
+		} endfor_nexthops(fi)
+	}
+
+	return fib_info_hashfn_result(val);
+}
+
+/* no metrics, only nexthop id */
+static struct fib_info *fib_find_info_nh(struct net *net,
+					 const struct fib_config *cfg)
+{
+	struct hlist_head *head;
+	struct fib_info *fi;
+	unsigned int hash;
+
+	hash = fib_info_hashfn_1(fib_devindex_hashfn(cfg->fc_nh_id),
+				 cfg->fc_protocol, cfg->fc_scope,
+				 (__force u32)cfg->fc_prefsrc,
+				 cfg->fc_priority);
+	hash = fib_info_hashfn_result(hash);
+	head = &fib_info_hash[hash];
+
+	hlist_for_each_entry(fi, head, fib_hash) {
+		if (!net_eq(fi->fib_net, net))
+			continue;
+		if (!fi->nh || fi->nh->id != cfg->fc_nh_id)
+			continue;
+		if (cfg->fc_protocol == fi->fib_protocol &&
+		    cfg->fc_scope == fi->fib_scope &&
+		    cfg->fc_prefsrc == fi->fib_prefsrc &&
+		    cfg->fc_priority == fi->fib_priority &&
+		    cfg->fc_type == fi->fib_type &&
+		    cfg->fc_table == fi->fib_tb_id &&
+		    !((cfg->fc_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK))
+			return fi;
+	}
+
+	return NULL;
+}
+
+static struct fib_info *fib_find_info(struct fib_info *nfi)
 {
 	struct hlist_head *head;
 	struct fib_info *fi;
@@ -328,7 +418,7 @@
 		    memcmp(nfi->fib_metrics, fi->fib_metrics,
 			   sizeof(u32) * RTAX_MAX) == 0 &&
 		    !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
-		    (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
+		    nh_comp(fi, nfi) == 0)
 			return fi;
 	}
 
@@ -349,9 +439,9 @@
 	hash = fib_devindex_hashfn(dev->ifindex);
 	head = &fib_info_devhash[hash];
 	hlist_for_each_entry(nh, head, nh_hash) {
-		if (nh->nh_dev == dev &&
-		    nh->nh_gw == gw &&
-		    !(nh->nh_flags & RTNH_F_DEAD)) {
+		if (nh->fib_nh_dev == dev &&
+		    nh->fib_nh_gw4 == gw &&
+		    !(nh->fib_nh_flags & RTNH_F_DEAD)) {
 			spin_unlock(&fib_info_lock);
 			return 0;
 		}
@@ -370,34 +460,40 @@
 			 + nla_total_size(4) /* RTA_PRIORITY */
 			 + nla_total_size(4) /* RTA_PREFSRC */
 			 + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
+	unsigned int nhs = fib_info_num_path(fi);
 
 	/* space for nested metrics */
 	payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
 
-	if (fi->fib_nhs) {
+	if (fi->nh)
+		payload += nla_total_size(4); /* RTA_NH_ID */
+
+	if (nhs) {
 		size_t nh_encapsize = 0;
-		/* Also handles the special case fib_nhs == 1 */
+		/* Also handles the special case nhs == 1 */
 
 		/* each nexthop is packed in an attribute */
 		size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
+		unsigned int i;
 
 		/* may contain flow and gateway attribute */
 		nhsize += 2 * nla_total_size(4);
 
 		/* grab encap info */
-		for_nexthops(fi) {
-			if (nh->nh_lwtstate) {
+		for (i = 0; i < fib_info_num_path(fi); i++) {
+			struct fib_nh_common *nhc = fib_info_nhc(fi, i);
+
+			if (nhc->nhc_lwtstate) {
 				/* RTA_ENCAP_TYPE */
 				nh_encapsize += lwtunnel_get_encap_size(
-						nh->nh_lwtstate);
+						nhc->nhc_lwtstate);
 				/* RTA_ENCAP */
 				nh_encapsize +=  nla_total_size(2);
 			}
-		} endfor_nexthops(fi);
+		}
 
 		/* all nexthops are packed in a nested attribute */
-		payload += nla_total_size((fi->fib_nhs * nhsize) +
-					  nh_encapsize);
+		payload += nla_total_size((nhs * nhsize) + nh_encapsize);
 
 	}
 
@@ -437,10 +533,18 @@
 			    struct fib_info **last_resort, int *last_idx,
 			    int dflt)
 {
+	const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
 	struct neighbour *n;
 	int state = NUD_NONE;
 
-	n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
+	if (likely(nhc->nhc_gw_family == AF_INET))
+		n = neigh_lookup(&arp_tbl, &nhc->nhc_gw.ipv4, nhc->nhc_dev);
+	else if (nhc->nhc_gw_family == AF_INET6)
+		n = neigh_lookup(ipv6_stub->nd_tbl, &nhc->nhc_gw.ipv6,
+				 nhc->nhc_dev);
+	else
+		n = NULL;
+
 	if (n) {
 		state = n->nud_state;
 		neigh_release(n);
@@ -459,6 +563,75 @@
 	return 1;
 }
 
+int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap,
+		       u16 encap_type, void *cfg, gfp_t gfp_flags,
+		       struct netlink_ext_ack *extack)
+{
+	int err;
+
+	nhc->nhc_pcpu_rth_output = alloc_percpu_gfp(struct rtable __rcu *,
+						    gfp_flags);
+	if (!nhc->nhc_pcpu_rth_output)
+		return -ENOMEM;
+
+	if (encap) {
+		struct lwtunnel_state *lwtstate;
+
+		if (encap_type == LWTUNNEL_ENCAP_NONE) {
+			NL_SET_ERR_MSG(extack, "LWT encap type not specified");
+			err = -EINVAL;
+			goto lwt_failure;
+		}
+		err = lwtunnel_build_state(encap_type, encap, nhc->nhc_family,
+					   cfg, &lwtstate, extack);
+		if (err)
+			goto lwt_failure;
+
+		nhc->nhc_lwtstate = lwtstate_get(lwtstate);
+	}
+
+	return 0;
+
+lwt_failure:
+	rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
+	nhc->nhc_pcpu_rth_output = NULL;
+	return err;
+}
+EXPORT_SYMBOL_GPL(fib_nh_common_init);
+
+int fib_nh_init(struct net *net, struct fib_nh *nh,
+		struct fib_config *cfg, int nh_weight,
+		struct netlink_ext_ack *extack)
+{
+	int err;
+
+	nh->fib_nh_family = AF_INET;
+
+	err = fib_nh_common_init(&nh->nh_common, cfg->fc_encap,
+				 cfg->fc_encap_type, cfg, GFP_KERNEL, extack);
+	if (err)
+		return err;
+
+	nh->fib_nh_oif = cfg->fc_oif;
+	nh->fib_nh_gw_family = cfg->fc_gw_family;
+	if (cfg->fc_gw_family == AF_INET)
+		nh->fib_nh_gw4 = cfg->fc_gw4;
+	else if (cfg->fc_gw_family == AF_INET6)
+		nh->fib_nh_gw6 = cfg->fc_gw6;
+
+	nh->fib_nh_flags = cfg->fc_flags;
+
+#ifdef CONFIG_IP_ROUTE_CLASSID
+	nh->nh_tclassid = cfg->fc_flow;
+	if (nh->nh_tclassid)
+		net->ipv4.fib_num_tclassid_users++;
+#endif
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	nh->fib_nh_weight = nh_weight;
+#endif
+	return 0;
+}
+
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 
 static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining,
@@ -481,15 +654,21 @@
 	return nhs;
 }
 
+/* only called when fib_nh is integrated into fib_info */
 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
 		       int remaining, struct fib_config *cfg,
 		       struct netlink_ext_ack *extack)
 {
+	struct net *net = fi->fib_net;
+	struct fib_config fib_cfg;
+	struct fib_nh *nh;
 	int ret;
 
 	change_nexthops(fi) {
 		int attrlen;
 
+		memset(&fib_cfg, 0, sizeof(fib_cfg));
+
 		if (!rtnh_ok(rtnh, remaining)) {
 			NL_SET_ERR_MSG(extack,
 				       "Invalid nexthop configuration - extra data after nexthop");
@@ -502,107 +681,128 @@
 			return -EINVAL;
 		}
 
-		nexthop_nh->nh_flags =
-			(cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
-		nexthop_nh->nh_oif = rtnh->rtnh_ifindex;
-		nexthop_nh->nh_weight = rtnh->rtnh_hops + 1;
+		fib_cfg.fc_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
+		fib_cfg.fc_oif = rtnh->rtnh_ifindex;
 
 		attrlen = rtnh_attrlen(rtnh);
 		if (attrlen > 0) {
-			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+			struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh);
 
 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
-			nexthop_nh->nh_gw = nla ? nla_get_in_addr(nla) : 0;
-#ifdef CONFIG_IP_ROUTE_CLASSID
-			nla = nla_find(attrs, attrlen, RTA_FLOW);
-			nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
-			if (nexthop_nh->nh_tclassid)
-				fi->fib_net->ipv4.fib_num_tclassid_users++;
-#endif
-			nla = nla_find(attrs, attrlen, RTA_ENCAP);
+			nlav = nla_find(attrs, attrlen, RTA_VIA);
+			if (nla && nlav) {
+				NL_SET_ERR_MSG(extack,
+					       "Nexthop configuration can not contain both GATEWAY and VIA");
+				return -EINVAL;
+			}
 			if (nla) {
-				struct lwtunnel_state *lwtstate;
-				struct nlattr *nla_entype;
-
-				nla_entype = nla_find(attrs, attrlen,
-						      RTA_ENCAP_TYPE);
-				if (!nla_entype) {
-					NL_SET_BAD_ATTR(extack, nla);
-					NL_SET_ERR_MSG(extack,
-						       "Encap type is missing");
-					goto err_inval;
-				}
-
-				ret = lwtunnel_build_state(nla_get_u16(
-							   nla_entype),
-							   nla,  AF_INET, cfg,
-							   &lwtstate, extack);
+				fib_cfg.fc_gw4 = nla_get_in_addr(nla);
+				if (fib_cfg.fc_gw4)
+					fib_cfg.fc_gw_family = AF_INET;
+			} else if (nlav) {
+				ret = fib_gw_from_via(&fib_cfg, nlav, extack);
 				if (ret)
 					goto errout;
-				nexthop_nh->nh_lwtstate =
-					lwtstate_get(lwtstate);
 			}
+
+			nla = nla_find(attrs, attrlen, RTA_FLOW);
+			if (nla)
+				fib_cfg.fc_flow = nla_get_u32(nla);
+
+			fib_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
+			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
+			if (nla)
+				fib_cfg.fc_encap_type = nla_get_u16(nla);
 		}
 
+		ret = fib_nh_init(net, nexthop_nh, &fib_cfg,
+				  rtnh->rtnh_hops + 1, extack);
+		if (ret)
+			goto errout;
+
 		rtnh = rtnh_next(rtnh, &remaining);
 	} endfor_nexthops(fi);
 
-	return 0;
-
-err_inval:
 	ret = -EINVAL;
-
+	nh = fib_info_nh(fi, 0);
+	if (cfg->fc_oif && nh->fib_nh_oif != cfg->fc_oif) {
+		NL_SET_ERR_MSG(extack,
+			       "Nexthop device index does not match RTA_OIF");
+		goto errout;
+	}
+	if (cfg->fc_gw_family) {
+		if (cfg->fc_gw_family != nh->fib_nh_gw_family ||
+		    (cfg->fc_gw_family == AF_INET &&
+		     nh->fib_nh_gw4 != cfg->fc_gw4) ||
+		    (cfg->fc_gw_family == AF_INET6 &&
+		     ipv6_addr_cmp(&nh->fib_nh_gw6, &cfg->fc_gw6))) {
+			NL_SET_ERR_MSG(extack,
+				       "Nexthop gateway does not match RTA_GATEWAY or RTA_VIA");
+			goto errout;
+		}
+	}
+#ifdef CONFIG_IP_ROUTE_CLASSID
+	if (cfg->fc_flow && nh->nh_tclassid != cfg->fc_flow) {
+		NL_SET_ERR_MSG(extack,
+			       "Nexthop class id does not match RTA_FLOW");
+		goto errout;
+	}
+#endif
+	ret = 0;
 errout:
 	return ret;
 }
 
+/* only called when fib_nh is integrated into fib_info */
 static void fib_rebalance(struct fib_info *fi)
 {
 	int total;
 	int w;
-	struct in_device *in_dev;
 
-	if (fi->fib_nhs < 2)
+	if (fib_info_num_path(fi) < 2)
 		return;
 
 	total = 0;
 	for_nexthops(fi) {
-		if (nh->nh_flags & RTNH_F_DEAD)
+		if (nh->fib_nh_flags & RTNH_F_DEAD)
 			continue;
 
-		in_dev = __in_dev_get_rtnl(nh->nh_dev);
-
-		if (in_dev &&
-		    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
-		    nh->nh_flags & RTNH_F_LINKDOWN)
+		if (ip_ignore_linkdown(nh->fib_nh_dev) &&
+		    nh->fib_nh_flags & RTNH_F_LINKDOWN)
 			continue;
 
-		total += nh->nh_weight;
+		total += nh->fib_nh_weight;
 	} endfor_nexthops(fi);
 
 	w = 0;
 	change_nexthops(fi) {
 		int upper_bound;
 
-		in_dev = __in_dev_get_rtnl(nexthop_nh->nh_dev);
-
-		if (nexthop_nh->nh_flags & RTNH_F_DEAD) {
+		if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD) {
 			upper_bound = -1;
-		} else if (in_dev &&
-			   IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
-			   nexthop_nh->nh_flags & RTNH_F_LINKDOWN) {
+		} else if (ip_ignore_linkdown(nexthop_nh->fib_nh_dev) &&
+			   nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN) {
 			upper_bound = -1;
 		} else {
-			w += nexthop_nh->nh_weight;
+			w += nexthop_nh->fib_nh_weight;
 			upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31,
 							    total) - 1;
 		}
 
-		atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
+		atomic_set(&nexthop_nh->fib_nh_upper_bound, upper_bound);
 	} endfor_nexthops(fi);
 }
 #else /* CONFIG_IP_ROUTE_MULTIPATH */
 
+static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
+		       int remaining, struct fib_config *cfg,
+		       struct netlink_ext_ack *extack)
+{
+	NL_SET_ERR_MSG(extack, "Multipath support not enabled in kernel");
+
+	return -EINVAL;
+}
+
 #define fib_rebalance(fi) do { } while (0)
 
 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
@@ -622,7 +822,7 @@
 	ret = lwtunnel_build_state(encap_type, encap, AF_INET,
 				   cfg, &lwtstate, extack);
 	if (!ret) {
-		result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate);
+		result = lwtunnel_cmp_encap(lwtstate, nh->fib_nh_lws);
 		lwtstate_free(lwtstate);
 	}
 
@@ -640,21 +840,39 @@
 	if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
 		return 1;
 
-	if (cfg->fc_oif || cfg->fc_gw) {
+	if (cfg->fc_nh_id) {
+		if (fi->nh && cfg->fc_nh_id == fi->nh->id)
+			return 0;
+		return 1;
+	}
+
+	if (cfg->fc_oif || cfg->fc_gw_family) {
+		struct fib_nh *nh = fib_info_nh(fi, 0);
+
 		if (cfg->fc_encap) {
 			if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
-					    fi->fib_nh, cfg, extack))
+					    nh, cfg, extack))
 				return 1;
 		}
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		if (cfg->fc_flow &&
-		    cfg->fc_flow != fi->fib_nh->nh_tclassid)
+		    cfg->fc_flow != nh->nh_tclassid)
 			return 1;
 #endif
-		if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
-		    (!cfg->fc_gw  || cfg->fc_gw == fi->fib_nh->nh_gw))
-			return 0;
-		return 1;
+		if ((cfg->fc_oif && cfg->fc_oif != nh->fib_nh_oif) ||
+		    (cfg->fc_gw_family &&
+		     cfg->fc_gw_family != nh->fib_nh_gw_family))
+			return 1;
+
+		if (cfg->fc_gw_family == AF_INET &&
+		    cfg->fc_gw4 != nh->fib_nh_gw4)
+			return 1;
+
+		if (cfg->fc_gw_family == AF_INET6 &&
+		    ipv6_addr_cmp(&cfg->fc_gw6, &nh->fib_nh_gw6))
+			return 1;
+
+		return 0;
 	}
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -670,16 +888,48 @@
 		if (!rtnh_ok(rtnh, remaining))
 			return -EINVAL;
 
-		if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
+		if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->fib_nh_oif)
 			return 1;
 
 		attrlen = rtnh_attrlen(rtnh);
 		if (attrlen > 0) {
-			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+			struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh);
 
 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
-			if (nla && nla_get_in_addr(nla) != nh->nh_gw)
-				return 1;
+			nlav = nla_find(attrs, attrlen, RTA_VIA);
+			if (nla && nlav) {
+				NL_SET_ERR_MSG(extack,
+					       "Nexthop configuration can not contain both GATEWAY and VIA");
+				return -EINVAL;
+			}
+
+			if (nla) {
+				if (nh->fib_nh_gw_family != AF_INET ||
+				    nla_get_in_addr(nla) != nh->fib_nh_gw4)
+					return 1;
+			} else if (nlav) {
+				struct fib_config cfg2;
+				int err;
+
+				err = fib_gw_from_via(&cfg2, nlav, extack);
+				if (err)
+					return err;
+
+				switch (nh->fib_nh_gw_family) {
+				case AF_INET:
+					if (cfg2.fc_gw_family != AF_INET ||
+					    cfg2.fc_gw4 != nh->fib_nh_gw4)
+						return 1;
+					break;
+				case AF_INET6:
+					if (cfg2.fc_gw_family != AF_INET6 ||
+					    ipv6_addr_cmp(&cfg2.fc_gw6,
+							  &nh->fib_nh_gw6))
+						return 1;
+					break;
+				}
+			}
+
 #ifdef CONFIG_IP_ROUTE_CLASSID
 			nla = nla_find(attrs, attrlen, RTA_FLOW);
 			if (nla && nla_get_u32(nla) != nh->nh_tclassid)
@@ -733,6 +983,30 @@
 	return true;
 }
 
+static int fib_check_nh_v6_gw(struct net *net, struct fib_nh *nh,
+			      u32 table, struct netlink_ext_ack *extack)
+{
+	struct fib6_config cfg = {
+		.fc_table = table,
+		.fc_flags = nh->fib_nh_flags | RTF_GATEWAY,
+		.fc_ifindex = nh->fib_nh_oif,
+		.fc_gateway = nh->fib_nh_gw6,
+	};
+	struct fib6_nh fib6_nh = {};
+	int err;
+
+	err = ipv6_stub->fib6_nh_init(net, &fib6_nh, &cfg, GFP_KERNEL, extack);
+	if (!err) {
+		nh->fib_nh_dev = fib6_nh.fib_nh_dev;
+		dev_hold(nh->fib_nh_dev);
+		nh->fib_nh_oif = nh->fib_nh_dev->ifindex;
+		nh->fib_nh_scope = RT_SCOPE_LINK;
+
+		ipv6_stub->fib6_nh_release(&fib6_nh);
+	}
+
+	return err;
+}
 
 /*
  * Picture
@@ -777,131 +1051,150 @@
  *					|
  *					|-> {local prefix} (terminal node)
  */
-static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh,
-			struct netlink_ext_ack *extack)
+static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table,
+			      u8 scope, struct netlink_ext_ack *extack)
 {
-	int err = 0;
-	struct net *net;
 	struct net_device *dev;
+	struct fib_result res;
+	int err = 0;
 
-	net = cfg->fc_nlinfo.nl_net;
-	if (nh->nh_gw) {
-		struct fib_result res;
+	if (nh->fib_nh_flags & RTNH_F_ONLINK) {
+		unsigned int addr_type;
 
-		if (nh->nh_flags & RTNH_F_ONLINK) {
-			unsigned int addr_type;
-
-			if (cfg->fc_scope >= RT_SCOPE_LINK) {
-				NL_SET_ERR_MSG(extack,
-					       "Nexthop has invalid scope");
-				return -EINVAL;
-			}
-			dev = __dev_get_by_index(net, nh->nh_oif);
-			if (!dev)
-				return -ENODEV;
-			if (!(dev->flags & IFF_UP)) {
-				NL_SET_ERR_MSG(extack,
-					       "Nexthop device is not up");
-				return -ENETDOWN;
-			}
-			addr_type = inet_addr_type_dev_table(net, dev, nh->nh_gw);
-			if (addr_type != RTN_UNICAST) {
-				NL_SET_ERR_MSG(extack,
-					       "Nexthop has invalid gateway");
-				return -EINVAL;
-			}
-			if (!netif_carrier_ok(dev))
-				nh->nh_flags |= RTNH_F_LINKDOWN;
-			nh->nh_dev = dev;
-			dev_hold(dev);
-			nh->nh_scope = RT_SCOPE_LINK;
-			return 0;
+		if (scope >= RT_SCOPE_LINK) {
+			NL_SET_ERR_MSG(extack, "Nexthop has invalid scope");
+			return -EINVAL;
 		}
-		rcu_read_lock();
-		{
-			struct fib_table *tbl = NULL;
-			struct flowi4 fl4 = {
-				.daddr = nh->nh_gw,
-				.flowi4_scope = cfg->fc_scope + 1,
-				.flowi4_oif = nh->nh_oif,
-				.flowi4_iif = LOOPBACK_IFINDEX,
-			};
-
-			/* It is not necessary, but requires a bit of thinking */
-			if (fl4.flowi4_scope < RT_SCOPE_LINK)
-				fl4.flowi4_scope = RT_SCOPE_LINK;
-
-			if (cfg->fc_table)
-				tbl = fib_get_table(net, cfg->fc_table);
-
-			if (tbl)
-				err = fib_table_lookup(tbl, &fl4, &res,
-						       FIB_LOOKUP_IGNORE_LINKSTATE |
-						       FIB_LOOKUP_NOREF);
-
-			/* on error or if no table given do full lookup. This
-			 * is needed for example when nexthops are in the local
-			 * table rather than the given table
-			 */
-			if (!tbl || err) {
-				err = fib_lookup(net, &fl4, &res,
-						 FIB_LOOKUP_IGNORE_LINKSTATE);
-			}
-
-			if (err) {
-				NL_SET_ERR_MSG(extack,
-					       "Nexthop has invalid gateway");
-				rcu_read_unlock();
-				return err;
-			}
+		dev = __dev_get_by_index(net, nh->fib_nh_oif);
+		if (!dev) {
+			NL_SET_ERR_MSG(extack, "Nexthop device required for onlink");
+			return -ENODEV;
 		}
-		err = -EINVAL;
-		if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) {
+		if (!(dev->flags & IFF_UP)) {
+			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+			return -ENETDOWN;
+		}
+		addr_type = inet_addr_type_dev_table(net, dev, nh->fib_nh_gw4);
+		if (addr_type != RTN_UNICAST) {
+			NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
+			return -EINVAL;
+		}
+		if (!netif_carrier_ok(dev))
+			nh->fib_nh_flags |= RTNH_F_LINKDOWN;
+		nh->fib_nh_dev = dev;
+		dev_hold(dev);
+		nh->fib_nh_scope = RT_SCOPE_LINK;
+		return 0;
+	}
+	rcu_read_lock();
+	{
+		struct fib_table *tbl = NULL;
+		struct flowi4 fl4 = {
+			.daddr = nh->fib_nh_gw4,
+			.flowi4_scope = scope + 1,
+			.flowi4_oif = nh->fib_nh_oif,
+			.flowi4_iif = LOOPBACK_IFINDEX,
+		};
+
+		/* It is not necessary, but requires a bit of thinking */
+		if (fl4.flowi4_scope < RT_SCOPE_LINK)
+			fl4.flowi4_scope = RT_SCOPE_LINK;
+
+		if (table)
+			tbl = fib_get_table(net, table);
+
+		if (tbl)
+			err = fib_table_lookup(tbl, &fl4, &res,
+					       FIB_LOOKUP_IGNORE_LINKSTATE |
+					       FIB_LOOKUP_NOREF);
+
+		/* on error or if no table given do full lookup. This
+		 * is needed for example when nexthops are in the local
+		 * table rather than the given table
+		 */
+		if (!tbl || err) {
+			err = fib_lookup(net, &fl4, &res,
+					 FIB_LOOKUP_IGNORE_LINKSTATE);
+		}
+
+		if (err) {
 			NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
 			goto out;
 		}
-		nh->nh_scope = res.scope;
-		nh->nh_oif = FIB_RES_OIF(res);
-		nh->nh_dev = dev = FIB_RES_DEV(res);
-		if (!dev) {
-			NL_SET_ERR_MSG(extack,
-				       "No egress device for nexthop gateway");
-			goto out;
-		}
-		dev_hold(dev);
-		if (!netif_carrier_ok(dev))
-			nh->nh_flags |= RTNH_F_LINKDOWN;
-		err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
-	} else {
-		struct in_device *in_dev;
-
-		if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) {
-			NL_SET_ERR_MSG(extack,
-				       "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set");
-			return -EINVAL;
-		}
-		rcu_read_lock();
-		err = -ENODEV;
-		in_dev = inetdev_by_index(net, nh->nh_oif);
-		if (!in_dev)
-			goto out;
-		err = -ENETDOWN;
-		if (!(in_dev->dev->flags & IFF_UP)) {
-			NL_SET_ERR_MSG(extack, "Device for nexthop is not up");
-			goto out;
-		}
-		nh->nh_dev = in_dev->dev;
-		dev_hold(nh->nh_dev);
-		nh->nh_scope = RT_SCOPE_HOST;
-		if (!netif_carrier_ok(nh->nh_dev))
-			nh->nh_flags |= RTNH_F_LINKDOWN;
-		err = 0;
 	}
+
+	err = -EINVAL;
+	if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) {
+		NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
+		goto out;
+	}
+	nh->fib_nh_scope = res.scope;
+	nh->fib_nh_oif = FIB_RES_OIF(res);
+	nh->fib_nh_dev = dev = FIB_RES_DEV(res);
+	if (!dev) {
+		NL_SET_ERR_MSG(extack,
+			       "No egress device for nexthop gateway");
+		goto out;
+	}
+	dev_hold(dev);
+	if (!netif_carrier_ok(dev))
+		nh->fib_nh_flags |= RTNH_F_LINKDOWN;
+	err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
 out:
 	rcu_read_unlock();
 	return err;
 }
 
+static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh,
+			      struct netlink_ext_ack *extack)
+{
+	struct in_device *in_dev;
+	int err;
+
+	if (nh->fib_nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) {
+		NL_SET_ERR_MSG(extack,
+			       "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set");
+		return -EINVAL;
+	}
+
+	rcu_read_lock();
+
+	err = -ENODEV;
+	in_dev = inetdev_by_index(net, nh->fib_nh_oif);
+	if (!in_dev)
+		goto out;
+	err = -ENETDOWN;
+	if (!(in_dev->dev->flags & IFF_UP)) {
+		NL_SET_ERR_MSG(extack, "Device for nexthop is not up");
+		goto out;
+	}
+
+	nh->fib_nh_dev = in_dev->dev;
+	dev_hold(nh->fib_nh_dev);
+	nh->fib_nh_scope = RT_SCOPE_HOST;
+	if (!netif_carrier_ok(nh->fib_nh_dev))
+		nh->fib_nh_flags |= RTNH_F_LINKDOWN;
+	err = 0;
+out:
+	rcu_read_unlock();
+	return err;
+}
+
+int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
+		 struct netlink_ext_ack *extack)
+{
+	int err;
+
+	if (nh->fib_nh_gw_family == AF_INET)
+		err = fib_check_nh_v4_gw(net, nh, table, scope, extack);
+	else if (nh->fib_nh_gw_family == AF_INET6)
+		err = fib_check_nh_v6_gw(net, nh, table, extack);
+	else
+		err = fib_check_nh_nongw(net, nh, extack);
+
+	return err;
+}
+
 static inline unsigned int fib_laddr_hashfn(__be32 val)
 {
 	unsigned int mask = (fib_info_hash_size - 1);
@@ -984,16 +1277,39 @@
 	fib_info_hash_free(old_laddrhash, bytes);
 }
 
-__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh)
+__be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc,
+				 unsigned char scope)
 {
-	nh->nh_saddr = inet_select_addr(nh->nh_dev,
-					nh->nh_gw,
-					nh->nh_parent->fib_scope);
+	struct fib_nh *nh;
+
+	if (nhc->nhc_family != AF_INET)
+		return inet_select_addr(nhc->nhc_dev, 0, scope);
+
+	nh = container_of(nhc, struct fib_nh, nh_common);
+	nh->nh_saddr = inet_select_addr(nh->fib_nh_dev, nh->fib_nh_gw4, scope);
 	nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
 
 	return nh->nh_saddr;
 }
 
+__be32 fib_result_prefsrc(struct net *net, struct fib_result *res)
+{
+	struct fib_nh_common *nhc = res->nhc;
+
+	if (res->fi->fib_prefsrc)
+		return res->fi->fib_prefsrc;
+
+	if (nhc->nhc_family == AF_INET) {
+		struct fib_nh *nh;
+
+		nh = container_of(nhc, struct fib_nh, nh_common);
+		if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid))
+			return nh->nh_saddr;
+	}
+
+	return fib_info_update_nhc_saddr(net, nhc, res->fi->fib_scope);
+}
+
 static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
 {
 	if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
@@ -1018,18 +1334,12 @@
 	return true;
 }
 
-static int
-fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
-{
-	return ip_metrics_convert(fi->fib_net, cfg->fc_mx, cfg->fc_mx_len,
-				  fi->fib_metrics->metrics);
-}
-
 struct fib_info *fib_create_info(struct fib_config *cfg,
 				 struct netlink_ext_ack *extack)
 {
 	int err;
 	struct fib_info *fi = NULL;
+	struct nexthop *nh = NULL;
 	struct fib_info *ofi;
 	int nhs = 1;
 	struct net *net = cfg->fc_nlinfo.nl_net;
@@ -1049,6 +1359,23 @@
 		goto err_inval;
 	}
 
+	if (cfg->fc_nh_id) {
+		if (!cfg->fc_mx) {
+			fi = fib_find_info_nh(net, cfg);
+			if (fi) {
+				fi->fib_treeref++;
+				return fi;
+			}
+		}
+
+		nh = nexthop_find_by_id(net, cfg->fc_nh_id);
+		if (!nh) {
+			NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+			goto err_inval;
+		}
+		nhs = 0;
+	}
+
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	if (cfg->fc_mp) {
 		nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack);
@@ -1079,19 +1406,17 @@
 			goto failure;
 	}
 
-	fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
+	fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL);
 	if (!fi)
 		goto failure;
-	if (cfg->fc_mx) {
-		fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL);
-		if (unlikely(!fi->fib_metrics)) {
-			kfree(fi);
-			return ERR_PTR(err);
-		}
-		refcount_set(&fi->fib_metrics->refcnt, 1);
-	} else {
-		fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
+	fi->fib_metrics = ip_fib_metrics_init(fi->fib_net, cfg->fc_mx,
+					      cfg->fc_mx_len, extack);
+	if (IS_ERR(fi->fib_metrics)) {
+		err = PTR_ERR(fi->fib_metrics);
+		kfree(fi);
+		return ERR_PTR(err);
 	}
+
 	fib_info_cnt++;
 	fi->fib_net = net;
 	fi->fib_protocol = cfg->fc_protocol;
@@ -1103,78 +1428,31 @@
 	fi->fib_tb_id = cfg->fc_table;
 
 	fi->fib_nhs = nhs;
-	change_nexthops(fi) {
-		nexthop_nh->nh_parent = fi;
-		nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *);
-		if (!nexthop_nh->nh_pcpu_rth_output)
-			goto failure;
-	} endfor_nexthops(fi)
-
-	err = fib_convert_metrics(fi, cfg);
-	if (err)
-		goto failure;
-
-	if (cfg->fc_mp) {
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-		err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack);
-		if (err != 0)
-			goto failure;
-		if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) {
-			NL_SET_ERR_MSG(extack,
-				       "Nexthop device index does not match RTA_OIF");
-			goto err_inval;
+	if (nh) {
+		if (!nexthop_get(nh)) {
+			NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
+			err = -EINVAL;
+		} else {
+			err = 0;
+			fi->nh = nh;
 		}
-		if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) {
-			NL_SET_ERR_MSG(extack,
-				       "Nexthop gateway does not match RTA_GATEWAY");
-			goto err_inval;
-		}
-#ifdef CONFIG_IP_ROUTE_CLASSID
-		if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) {
-			NL_SET_ERR_MSG(extack,
-				       "Nexthop class id does not match RTA_FLOW");
-			goto err_inval;
-		}
-#endif
-#else
-		NL_SET_ERR_MSG(extack,
-			       "Multipath support not enabled in kernel");
-		goto err_inval;
-#endif
 	} else {
-		struct fib_nh *nh = fi->fib_nh;
+		change_nexthops(fi) {
+			nexthop_nh->nh_parent = fi;
+		} endfor_nexthops(fi)
 
-		if (cfg->fc_encap) {
-			struct lwtunnel_state *lwtstate;
-
-			if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE) {
-				NL_SET_ERR_MSG(extack,
-					       "LWT encap type not specified");
-				goto err_inval;
-			}
-			err = lwtunnel_build_state(cfg->fc_encap_type,
-						   cfg->fc_encap, AF_INET, cfg,
-						   &lwtstate, extack);
-			if (err)
-				goto failure;
-
-			nh->nh_lwtstate = lwtstate_get(lwtstate);
-		}
-		nh->nh_oif = cfg->fc_oif;
-		nh->nh_gw = cfg->fc_gw;
-		nh->nh_flags = cfg->fc_flags;
-#ifdef CONFIG_IP_ROUTE_CLASSID
-		nh->nh_tclassid = cfg->fc_flow;
-		if (nh->nh_tclassid)
-			fi->fib_net->ipv4.fib_num_tclassid_users++;
-#endif
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-		nh->nh_weight = 1;
-#endif
+		if (cfg->fc_mp)
+			err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg,
+					  extack);
+		else
+			err = fib_nh_init(net, fi->fib_nh, cfg, 1, extack);
 	}
 
+	if (err != 0)
+		goto failure;
+
 	if (fib_props[cfg->fc_type].error) {
-		if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) {
+		if (cfg->fc_gw_family || cfg->fc_oif || cfg->fc_mp) {
 			NL_SET_ERR_MSG(extack,
 				       "Gateway, device and multipath can not be specified for this route type");
 			goto err_inval;
@@ -1199,7 +1477,11 @@
 		goto err_inval;
 	}
 
-	if (cfg->fc_scope == RT_SCOPE_HOST) {
+	if (fi->nh) {
+		err = fib_check_nexthop(fi->nh, cfg->fc_scope, extack);
+		if (err)
+			goto failure;
+	} else if (cfg->fc_scope == RT_SCOPE_HOST) {
 		struct fib_nh *nh = fi->fib_nh;
 
 		/* Local address is added. */
@@ -1208,24 +1490,26 @@
 				       "Route with host scope can not have multiple nexthops");
 			goto err_inval;
 		}
-		if (nh->nh_gw) {
+		if (nh->fib_nh_gw_family) {
 			NL_SET_ERR_MSG(extack,
 				       "Route with host scope can not have a gateway");
 			goto err_inval;
 		}
-		nh->nh_scope = RT_SCOPE_NOWHERE;
-		nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
+		nh->fib_nh_scope = RT_SCOPE_NOWHERE;
+		nh->fib_nh_dev = dev_get_by_index(net, nh->fib_nh_oif);
 		err = -ENODEV;
-		if (!nh->nh_dev)
+		if (!nh->fib_nh_dev)
 			goto failure;
 	} else {
 		int linkdown = 0;
 
 		change_nexthops(fi) {
-			err = fib_check_nh(cfg, nexthop_nh, extack);
+			err = fib_check_nh(cfg->fc_nlinfo.nl_net, nexthop_nh,
+					   cfg->fc_table, cfg->fc_scope,
+					   extack);
 			if (err != 0)
 				goto failure;
-			if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
+			if (nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN)
 				linkdown++;
 		} endfor_nexthops(fi)
 		if (linkdown == fi->fib_nhs)
@@ -1237,11 +1521,16 @@
 		goto err_inval;
 	}
 
-	change_nexthops(fi) {
-		fib_info_update_nh_saddr(net, nexthop_nh);
-	} endfor_nexthops(fi)
+	if (!fi->nh) {
+		change_nexthops(fi) {
+			fib_info_update_nhc_saddr(net, &nexthop_nh->nh_common,
+						  fi->fib_scope);
+			if (nexthop_nh->fib_nh_gw_family == AF_INET6)
+				fi->fib_nh_is_v6 = true;
+		} endfor_nexthops(fi)
 
-	fib_rebalance(fi);
+		fib_rebalance(fi);
+	}
 
 link_it:
 	ofi = fib_find_info(fi);
@@ -1263,16 +1552,20 @@
 		head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
 		hlist_add_head(&fi->fib_lhash, head);
 	}
-	change_nexthops(fi) {
-		struct hlist_head *head;
-		unsigned int hash;
+	if (fi->nh) {
+		list_add(&fi->nh_list, &nh->fi_list);
+	} else {
+		change_nexthops(fi) {
+			struct hlist_head *head;
+			unsigned int hash;
 
-		if (!nexthop_nh->nh_dev)
-			continue;
-		hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex);
-		head = &fib_info_devhash[hash];
-		hlist_add_head(&nexthop_nh->nh_hash, head);
-	} endfor_nexthops(fi)
+			if (!nexthop_nh->fib_nh_dev)
+				continue;
+			hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex);
+			head = &fib_info_devhash[hash];
+			hlist_add_head(&nexthop_nh->nh_hash, head);
+		} endfor_nexthops(fi)
+	}
 	spin_unlock_bh(&fib_info_lock);
 	return fi;
 
@@ -1288,10 +1581,154 @@
 	return ERR_PTR(err);
 }
 
+int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc,
+		     u8 rt_family, unsigned char *flags, bool skip_oif)
+{
+	if (nhc->nhc_flags & RTNH_F_DEAD)
+		*flags |= RTNH_F_DEAD;
+
+	if (nhc->nhc_flags & RTNH_F_LINKDOWN) {
+		*flags |= RTNH_F_LINKDOWN;
+
+		rcu_read_lock();
+		switch (nhc->nhc_family) {
+		case AF_INET:
+			if (ip_ignore_linkdown(nhc->nhc_dev))
+				*flags |= RTNH_F_DEAD;
+			break;
+		case AF_INET6:
+			if (ip6_ignore_linkdown(nhc->nhc_dev))
+				*flags |= RTNH_F_DEAD;
+			break;
+		}
+		rcu_read_unlock();
+	}
+
+	switch (nhc->nhc_gw_family) {
+	case AF_INET:
+		if (nla_put_in_addr(skb, RTA_GATEWAY, nhc->nhc_gw.ipv4))
+			goto nla_put_failure;
+		break;
+	case AF_INET6:
+		/* if gateway family does not match nexthop family
+		 * gateway is encoded as RTA_VIA
+		 */
+		if (rt_family != nhc->nhc_gw_family) {
+			int alen = sizeof(struct in6_addr);
+			struct nlattr *nla;
+			struct rtvia *via;
+
+			nla = nla_reserve(skb, RTA_VIA, alen + 2);
+			if (!nla)
+				goto nla_put_failure;
+
+			via = nla_data(nla);
+			via->rtvia_family = AF_INET6;
+			memcpy(via->rtvia_addr, &nhc->nhc_gw.ipv6, alen);
+		} else if (nla_put_in6_addr(skb, RTA_GATEWAY,
+					    &nhc->nhc_gw.ipv6) < 0) {
+			goto nla_put_failure;
+		}
+		break;
+	}
+
+	*flags |= (nhc->nhc_flags & RTNH_F_ONLINK);
+	if (nhc->nhc_flags & RTNH_F_OFFLOAD)
+		*flags |= RTNH_F_OFFLOAD;
+
+	if (!skip_oif && nhc->nhc_dev &&
+	    nla_put_u32(skb, RTA_OIF, nhc->nhc_dev->ifindex))
+		goto nla_put_failure;
+
+	if (nhc->nhc_lwtstate &&
+	    lwtunnel_fill_encap(skb, nhc->nhc_lwtstate,
+				RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(fib_nexthop_info);
+
+#if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6)
+int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc,
+		    int nh_weight, u8 rt_family)
+{
+	const struct net_device *dev = nhc->nhc_dev;
+	struct rtnexthop *rtnh;
+	unsigned char flags = 0;
+
+	rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
+	if (!rtnh)
+		goto nla_put_failure;
+
+	rtnh->rtnh_hops = nh_weight - 1;
+	rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
+
+	if (fib_nexthop_info(skb, nhc, rt_family, &flags, true) < 0)
+		goto nla_put_failure;
+
+	rtnh->rtnh_flags = flags;
+
+	/* length of rtnetlink header + attributes */
+	rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(fib_add_nexthop);
+#endif
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
+{
+	struct nlattr *mp;
+
+	mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
+	if (!mp)
+		goto nla_put_failure;
+
+	if (unlikely(fi->nh)) {
+		if (nexthop_mpath_fill_node(skb, fi->nh, AF_INET) < 0)
+			goto nla_put_failure;
+		goto mp_end;
+	}
+
+	for_nexthops(fi) {
+		if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight,
+				    AF_INET) < 0)
+			goto nla_put_failure;
+#ifdef CONFIG_IP_ROUTE_CLASSID
+		if (nh->nh_tclassid &&
+		    nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
+			goto nla_put_failure;
+#endif
+	} endfor_nexthops(fi);
+
+mp_end:
+	nla_nest_end(skb, mp);
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+#else
+static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
+{
+	return 0;
+}
+#endif
+
 int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 		  u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos,
 		  struct fib_info *fi, unsigned int flags)
 {
+	unsigned int nhs = fib_info_num_path(fi);
 	struct nlmsghdr *nlh;
 	struct rtmsg *rtm;
 
@@ -1327,81 +1764,37 @@
 	if (fi->fib_prefsrc &&
 	    nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
 		goto nla_put_failure;
-	if (fi->fib_nhs == 1) {
-		if (fi->fib_nh->nh_gw &&
-		    nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
-			goto nla_put_failure;
-		if (fi->fib_nh->nh_oif &&
-		    nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
-			goto nla_put_failure;
-		if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) {
-			struct in_device *in_dev;
 
-			rcu_read_lock();
-			in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev);
-			if (in_dev &&
-			    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
-				rtm->rtm_flags |= RTNH_F_DEAD;
-			rcu_read_unlock();
-		}
-		if (fi->fib_nh->nh_flags & RTNH_F_OFFLOAD)
-			rtm->rtm_flags |= RTNH_F_OFFLOAD;
-#ifdef CONFIG_IP_ROUTE_CLASSID
-		if (fi->fib_nh[0].nh_tclassid &&
-		    nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
+	if (fi->nh) {
+		if (nla_put_u32(skb, RTA_NH_ID, fi->nh->id))
 			goto nla_put_failure;
-#endif
-		if (fi->fib_nh->nh_lwtstate &&
-		    lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate) < 0)
-			goto nla_put_failure;
+		if (nexthop_is_blackhole(fi->nh))
+			rtm->rtm_type = RTN_BLACKHOLE;
 	}
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-	if (fi->fib_nhs > 1) {
-		struct rtnexthop *rtnh;
-		struct nlattr *mp;
 
-		mp = nla_nest_start(skb, RTA_MULTIPATH);
-		if (!mp)
+	if (nhs == 1) {
+		const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
+		unsigned char flags = 0;
+
+		if (fib_nexthop_info(skb, nhc, AF_INET, &flags, false) < 0)
 			goto nla_put_failure;
 
-		for_nexthops(fi) {
-			rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
-			if (!rtnh)
-				goto nla_put_failure;
-
-			rtnh->rtnh_flags = nh->nh_flags & 0xFF;
-			if (nh->nh_flags & RTNH_F_LINKDOWN) {
-				struct in_device *in_dev;
-
-				rcu_read_lock();
-				in_dev = __in_dev_get_rcu(nh->nh_dev);
-				if (in_dev &&
-				    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
-					rtnh->rtnh_flags |= RTNH_F_DEAD;
-				rcu_read_unlock();
-			}
-			rtnh->rtnh_hops = nh->nh_weight - 1;
-			rtnh->rtnh_ifindex = nh->nh_oif;
-
-			if (nh->nh_gw &&
-			    nla_put_in_addr(skb, RTA_GATEWAY, nh->nh_gw))
-				goto nla_put_failure;
+		rtm->rtm_flags = flags;
 #ifdef CONFIG_IP_ROUTE_CLASSID
+		if (nhc->nhc_family == AF_INET) {
+			struct fib_nh *nh;
+
+			nh = container_of(nhc, struct fib_nh, nh_common);
 			if (nh->nh_tclassid &&
 			    nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
 				goto nla_put_failure;
+		}
 #endif
-			if (nh->nh_lwtstate &&
-			    lwtunnel_fill_encap(skb, nh->nh_lwtstate) < 0)
-				goto nla_put_failure;
-
-			/* length of rtnetlink header + attributes */
-			rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
-		} endfor_nexthops(fi);
-
-		nla_nest_end(skb, mp);
+	} else {
+		if (fib_add_multipath(skb, fi) < 0)
+			goto nla_put_failure;
 	}
-#endif
+
 	nlmsg_end(skb, nlh);
 	return 0;
 
@@ -1421,8 +1814,8 @@
 	int ret = 0;
 	unsigned int hash = fib_laddr_hashfn(local);
 	struct hlist_head *head = &fib_info_laddrhash[hash];
+	int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
 	struct net *net = dev_net(dev);
-	int tb_id = l3mdev_fib_table(dev);
 	struct fib_info *fi;
 
 	if (!fib_info_laddrhash || local == 0)
@@ -1440,28 +1833,26 @@
 	return ret;
 }
 
-static int call_fib_nh_notifiers(struct fib_nh *fib_nh,
+static int call_fib_nh_notifiers(struct fib_nh *nh,
 				 enum fib_event_type event_type)
 {
-	struct in_device *in_dev = __in_dev_get_rtnl(fib_nh->nh_dev);
+	bool ignore_link_down = ip_ignore_linkdown(nh->fib_nh_dev);
 	struct fib_nh_notifier_info info = {
-		.fib_nh = fib_nh,
+		.fib_nh = nh,
 	};
 
 	switch (event_type) {
 	case FIB_EVENT_NH_ADD:
-		if (fib_nh->nh_flags & RTNH_F_DEAD)
+		if (nh->fib_nh_flags & RTNH_F_DEAD)
 			break;
-		if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
-		    fib_nh->nh_flags & RTNH_F_LINKDOWN)
+		if (ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN)
 			break;
-		return call_fib4_notifiers(dev_net(fib_nh->nh_dev), event_type,
+		return call_fib4_notifiers(dev_net(nh->fib_nh_dev), event_type,
 					   &info.info);
 	case FIB_EVENT_NH_DEL:
-		if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
-		     fib_nh->nh_flags & RTNH_F_LINKDOWN) ||
-		    (fib_nh->nh_flags & RTNH_F_DEAD))
-			return call_fib4_notifiers(dev_net(fib_nh->nh_dev),
+		if ((ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN) ||
+		    (nh->fib_nh_flags & RTNH_F_DEAD))
+			return call_fib4_notifiers(dev_net(nh->fib_nh_dev),
 						   event_type, &info.info);
 	default:
 		break;
@@ -1480,12 +1871,12 @@
  * - if the new MTU is greater than the PMTU, don't make any change
  * - otherwise, unlock and set PMTU
  */
-static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig)
+void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
 {
 	struct fnhe_hash_bucket *bucket;
 	int i;
 
-	bucket = rcu_dereference_protected(nh->nh_exceptions, 1);
+	bucket = rcu_dereference_protected(nhc->nhc_exceptions, 1);
 	if (!bucket)
 		return;
 
@@ -1515,8 +1906,8 @@
 	struct fib_nh *nh;
 
 	hlist_for_each_entry(nh, head, nh_hash) {
-		if (nh->nh_dev == dev)
-			nh_update_mtu(nh, dev->mtu, orig_mtu);
+		if (nh->fib_nh_dev == dev)
+			fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu);
 	}
 }
 
@@ -1525,6 +1916,8 @@
  * NETDEV_DOWN        0     LINKDOWN|DEAD   Link down, not for scope host
  * NETDEV_DOWN        1     LINKDOWN|DEAD   Last address removed
  * NETDEV_UNREGISTER  1     LINKDOWN|DEAD   Device removed
+ *
+ * only used when fib_nh is built into fib_info
  */
 int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
 {
@@ -1543,22 +1936,22 @@
 		int dead;
 
 		BUG_ON(!fi->fib_nhs);
-		if (nh->nh_dev != dev || fi == prev_fi)
+		if (nh->fib_nh_dev != dev || fi == prev_fi)
 			continue;
 		prev_fi = fi;
 		dead = 0;
 		change_nexthops(fi) {
-			if (nexthop_nh->nh_flags & RTNH_F_DEAD)
+			if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD)
 				dead++;
-			else if (nexthop_nh->nh_dev == dev &&
-				 nexthop_nh->nh_scope != scope) {
+			else if (nexthop_nh->fib_nh_dev == dev &&
+				 nexthop_nh->fib_nh_scope != scope) {
 				switch (event) {
 				case NETDEV_DOWN:
 				case NETDEV_UNREGISTER:
-					nexthop_nh->nh_flags |= RTNH_F_DEAD;
+					nexthop_nh->fib_nh_flags |= RTNH_F_DEAD;
 					/* fall through */
 				case NETDEV_CHANGE:
-					nexthop_nh->nh_flags |= RTNH_F_LINKDOWN;
+					nexthop_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
 					break;
 				}
 				call_fib_nh_notifiers(nexthop_nh,
@@ -1567,7 +1960,7 @@
 			}
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 			if (event == NETDEV_UNREGISTER &&
-			    nexthop_nh->nh_dev == dev) {
+			    nexthop_nh->fib_nh_dev == dev) {
 				dead = fi->fib_nhs;
 				break;
 			}
@@ -1606,6 +1999,7 @@
 
 	hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
 		struct fib_info *next_fi = fa->fa_info;
+		struct fib_nh *nh;
 
 		if (fa->fa_slen != slen)
 			continue;
@@ -1627,8 +2021,9 @@
 		if (next_fi->fib_scope != res->scope ||
 		    fa->fa_type != RTN_UNICAST)
 			continue;
-		if (!next_fi->fib_nh[0].nh_gw ||
-		    next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
+
+		nh = fib_info_nh(next_fi, 0);
+		if (!nh->fib_nh_gw4 || nh->fib_nh_scope != RT_SCOPE_LINK)
 			continue;
 
 		fib_alias_accessed(fa);
@@ -1670,8 +2065,10 @@
 /*
  * Dead device goes up. We wake up dead nexthops.
  * It takes sense only on multipath routes.
+ *
+ * only used when fib_nh is built into fib_info
  */
-int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
+int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
 {
 	struct fib_info *prev_fi;
 	unsigned int hash;
@@ -1699,24 +2096,24 @@
 		int alive;
 
 		BUG_ON(!fi->fib_nhs);
-		if (nh->nh_dev != dev || fi == prev_fi)
+		if (nh->fib_nh_dev != dev || fi == prev_fi)
 			continue;
 
 		prev_fi = fi;
 		alive = 0;
 		change_nexthops(fi) {
-			if (!(nexthop_nh->nh_flags & nh_flags)) {
+			if (!(nexthop_nh->fib_nh_flags & nh_flags)) {
 				alive++;
 				continue;
 			}
-			if (!nexthop_nh->nh_dev ||
-			    !(nexthop_nh->nh_dev->flags & IFF_UP))
+			if (!nexthop_nh->fib_nh_dev ||
+			    !(nexthop_nh->fib_nh_dev->flags & IFF_UP))
 				continue;
-			if (nexthop_nh->nh_dev != dev ||
+			if (nexthop_nh->fib_nh_dev != dev ||
 			    !__in_dev_get_rtnl(dev))
 				continue;
 			alive++;
-			nexthop_nh->nh_flags &= ~nh_flags;
+			nexthop_nh->fib_nh_flags &= ~nh_flags;
 			call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD);
 		} endfor_nexthops(fi)
 
@@ -1736,13 +2133,19 @@
 {
 	int state = NUD_REACHABLE;
 
-	if (nh->nh_scope == RT_SCOPE_LINK) {
+	if (nh->fib_nh_scope == RT_SCOPE_LINK) {
 		struct neighbour *n;
 
 		rcu_read_lock_bh();
 
-		n = __ipv4_neigh_lookup_noref(nh->nh_dev,
-					      (__force u32)nh->nh_gw);
+		if (likely(nh->fib_nh_gw_family == AF_INET))
+			n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
+						   (__force u32)nh->fib_nh_gw4);
+		else if (nh->fib_nh_gw_family == AF_INET6)
+			n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev,
+							   &nh->fib_nh_gw6);
+		else
+			n = NULL;
 		if (n)
 			state = n->nud_state;
 
@@ -1758,20 +2161,27 @@
 	struct net *net = fi->fib_net;
 	bool first = false;
 
-	for_nexthops(fi) {
+	if (unlikely(res->fi->nh)) {
+		nexthop_path_fib_result(res, hash);
+		return;
+	}
+
+	change_nexthops(fi) {
 		if (net->ipv4.sysctl_fib_multipath_use_neigh) {
-			if (!fib_good_nh(nh))
+			if (!fib_good_nh(nexthop_nh))
 				continue;
 			if (!first) {
 				res->nh_sel = nhsel;
+				res->nhc = &nexthop_nh->nh_common;
 				first = true;
 			}
 		}
 
-		if (hash > atomic_read(&nh->nh_upper_bound))
+		if (hash > atomic_read(&nexthop_nh->fib_nh_upper_bound))
 			continue;
 
 		res->nh_sel = nhsel;
+		res->nhc = &nexthop_nh->nh_common;
 		return;
 	} endfor_nexthops(fi);
 }
@@ -1784,7 +2194,7 @@
 		goto check_saddr;
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-	if (res->fi->fib_nhs > 1) {
+	if (fib_info_num_path(res->fi) > 1) {
 		int h = fib_multipath_hash(net, fl4, skb, NULL);
 
 		fib_select_multipath(res, h);
@@ -1798,5 +2208,5 @@
 
 check_saddr:
 	if (!fl4->saddr)
-		fl4->saddr = FIB_RES_PREFSRC(net, *res);
+		fl4->saddr = fib_result_prefsrc(net, res);
 }

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5bc0c89..1ab2fb6 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation; either version
- *   2 of the License, or (at your option) any later version.
  *
  *   Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
  *     & Swedish University of Agricultural Sciences.
@@ -18,28 +15,19 @@
  * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
  * http://www.csc.kth.se/~snilsson/software/dyntrie2/
  *
- *
  * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
  * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
  *
- *
  * Code from fib_hash has been reused which includes the following header:
  *
- *
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
  *		interface as the means of communication with the user level.
  *
  *		IPv4 FIB: lookup engine and maintenance routines.
  *
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Substantial contributions to this work comes from:
  *
  *		David S. Miller, <davem@davemloft.net>
@@ -183,14 +171,16 @@
 };
 
 static struct key_vector *resize(struct trie *t, struct key_vector *tn);
-static size_t tnode_free_size;
+static unsigned int tnode_free_size;
 
 /*
- * synchronize_rcu after call_rcu for that many pages; it should be especially
- * useful before resizing the root node with PREEMPT_NONE configs; the value was
- * obtained experimentally, aiming to avoid visible slowdown.
+ * synchronize_rcu after call_rcu for outstanding dirty memory; it should be
+ * especially useful before resizing the root node with PREEMPT_NONE configs;
+ * the value was obtained experimentally, aiming to avoid visible slowdown.
  */
-static const int sync_pages = 128;
+unsigned int sysctl_fib_sync_mem = 512 * 1024;
+unsigned int sysctl_fib_sync_mem_min = 64 * 1024;
+unsigned int sysctl_fib_sync_mem_max = 64 * 1024 * 1024;
 
 static struct kmem_cache *fn_alias_kmem __ro_after_init;
 static struct kmem_cache *trie_leaf_kmem __ro_after_init;
@@ -348,12 +338,18 @@
 
 static inline void empty_child_inc(struct key_vector *n)
 {
-	++tn_info(n)->empty_children ? : ++tn_info(n)->full_children;
+	tn_info(n)->empty_children++;
+
+	if (!tn_info(n)->empty_children)
+		tn_info(n)->full_children++;
 }
 
 static inline void empty_child_dec(struct key_vector *n)
 {
-	tn_info(n)->empty_children-- ? : tn_info(n)->full_children--;
+	if (!tn_info(n)->empty_children)
+		tn_info(n)->full_children--;
+
+	tn_info(n)->empty_children--;
 }
 
 static struct key_vector *leaf_new(t_key key, struct fib_alias *fa)
@@ -504,7 +500,7 @@
 		tn = container_of(head, struct tnode, rcu)->kv;
 	}
 
-	if (tnode_free_size >= PAGE_SIZE * sync_pages) {
+	if (tnode_free_size >= sysctl_fib_sync_mem) {
 		tnode_free_size = 0;
 		synchronize_rcu();
 	}
@@ -1459,6 +1455,7 @@
 		fib_alias_accessed(fa);
 		err = fib_props[fa->fa_type].error;
 		if (unlikely(err < 0)) {
+out_reject:
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 			this_cpu_inc(stats->semantic_match_passed);
 #endif
@@ -1467,20 +1464,24 @@
 		}
 		if (fi->fib_flags & RTNH_F_DEAD)
 			continue;
-		for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
-			const struct fib_nh *nh = &fi->fib_nh[nhsel];
-			struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev);
 
-			if (nh->nh_flags & RTNH_F_DEAD)
+		if (unlikely(fi->nh && nexthop_is_blackhole(fi->nh))) {
+			err = fib_props[RTN_BLACKHOLE].error;
+			goto out_reject;
+		}
+
+		for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
+			struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel);
+
+			if (nhc->nhc_flags & RTNH_F_DEAD)
 				continue;
-			if (in_dev &&
-			    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
-			    nh->nh_flags & RTNH_F_LINKDOWN &&
+			if (ip_ignore_linkdown(nhc->nhc_dev) &&
+			    nhc->nhc_flags & RTNH_F_LINKDOWN &&
 			    !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
 				continue;
 			if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
 				if (flp->flowi4_oif &&
-				    flp->flowi4_oif != nh->nh_oif)
+				    flp->flowi4_oif != nhc->nhc_oif)
 					continue;
 			}
 
@@ -1490,6 +1491,7 @@
 			res->prefix = htonl(n->key);
 			res->prefixlen = KEYLENGTH - fa->fa_slen;
 			res->nh_sel = nhsel;
+			res->nhc = nhc;
 			res->type = fa->fa_type;
 			res->scope = fi->fib_scope;
 			res->fi = fi;
@@ -1498,7 +1500,7 @@
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 			this_cpu_inc(stats->semantic_match_passed);
 #endif
-			trace_fib_table_lookup(tb->tb_id, flp, nh, err);
+			trace_fib_table_lookup(tb->tb_id, flp, nhc, err);
 
 			return err;
 		}
@@ -1856,7 +1858,7 @@
 }
 
 /* Caller must hold RTNL. */
-int fib_table_flush(struct net *net, struct fib_table *tb)
+int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
 {
 	struct trie *t = (struct trie *)tb->tb_data;
 	struct key_vector *pn = t->kv;
@@ -1904,8 +1906,17 @@
 		hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
 			struct fib_info *fi = fa->fa_info;
 
-			if (!fi || !(fi->fib_flags & RTNH_F_DEAD) ||
-			    tb->tb_id != fa->tb_id) {
+			if (!fi || tb->tb_id != fa->tb_id ||
+			    (!(fi->fib_flags & RTNH_F_DEAD) &&
+			     !fib_props[fa->fa_type].error)) {
+				slen = fa->fa_slen;
+				continue;
+			}
+
+			/* Do not flush error routes if network namespace is
+			 * not being dismantled
+			 */
+			if (!flush_all && fib_props[fa->fa_type].error) {
 				slen = fa->fa_slen;
 				continue;
 			}
@@ -1933,6 +1944,77 @@
 	return found;
 }
 
+/* derived from fib_trie_free */
+static void __fib_info_notify_update(struct net *net, struct fib_table *tb,
+				     struct nl_info *info)
+{
+	struct trie *t = (struct trie *)tb->tb_data;
+	struct key_vector *pn = t->kv;
+	unsigned long cindex = 1;
+	struct fib_alias *fa;
+
+	for (;;) {
+		struct key_vector *n;
+
+		if (!(cindex--)) {
+			t_key pkey = pn->key;
+
+			if (IS_TRIE(pn))
+				break;
+
+			pn = node_parent(pn);
+			cindex = get_index(pkey, pn);
+			continue;
+		}
+
+		/* grab the next available node */
+		n = get_child(pn, cindex);
+		if (!n)
+			continue;
+
+		if (IS_TNODE(n)) {
+			/* record pn and cindex for leaf walking */
+			pn = n;
+			cindex = 1ul << n->bits;
+
+			continue;
+		}
+
+		hlist_for_each_entry(fa, &n->leaf, fa_list) {
+			struct fib_info *fi = fa->fa_info;
+
+			if (!fi || !fi->nh_updated || fa->tb_id != tb->tb_id)
+				continue;
+
+			rtmsg_fib(RTM_NEWROUTE, htonl(n->key), fa,
+				  KEYLENGTH - fa->fa_slen, tb->tb_id,
+				  info, NLM_F_REPLACE);
+
+			/* call_fib_entry_notifiers will be removed when
+			 * in-kernel notifier is implemented and supported
+			 * for nexthop objects
+			 */
+			call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
+						 n->key,
+						 KEYLENGTH - fa->fa_slen, fa,
+						 NULL);
+		}
+	}
+}
+
+void fib_info_notify_update(struct net *net, struct nl_info *info)
+{
+	unsigned int h;
+
+	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
+		struct fib_table *tb;
+
+		hlist_for_each_entry_rcu(tb, head, tb_hlist)
+			__fib_info_notify_update(net, tb, info);
+	}
+}
+
 static void fib_leaf_notify(struct net *net, struct key_vector *l,
 			    struct fib_table *tb, struct notifier_block *nb)
 {
@@ -2003,48 +2085,87 @@
 }
 
 static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
-			     struct sk_buff *skb, struct netlink_callback *cb)
+			     struct sk_buff *skb, struct netlink_callback *cb,
+			     struct fib_dump_filter *filter)
 {
+	unsigned int flags = NLM_F_MULTI;
 	__be32 xkey = htonl(l->key);
+	int i, s_i, i_fa, s_fa, err;
 	struct fib_alias *fa;
-	int i, s_i;
+
+	if (filter->filter_set ||
+	    !filter->dump_exceptions || !filter->dump_routes)
+		flags |= NLM_F_DUMP_FILTERED;
 
 	s_i = cb->args[4];
+	s_fa = cb->args[5];
 	i = 0;
 
 	/* rcu_read_lock is hold by caller */
 	hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
-		int err;
+		struct fib_info *fi = fa->fa_info;
 
-		if (i < s_i) {
-			i++;
-			continue;
+		if (i < s_i)
+			goto next;
+
+		i_fa = 0;
+
+		if (tb->tb_id != fa->tb_id)
+			goto next;
+
+		if (filter->filter_set) {
+			if (filter->rt_type && fa->fa_type != filter->rt_type)
+				goto next;
+
+			if ((filter->protocol &&
+			     fi->fib_protocol != filter->protocol))
+				goto next;
+
+			if (filter->dev &&
+			    !fib_info_nh_uses_dev(fi, filter->dev))
+				goto next;
 		}
 
-		if (tb->tb_id != fa->tb_id) {
-			i++;
-			continue;
+		if (filter->dump_routes) {
+			if (!s_fa) {
+				err = fib_dump_info(skb,
+						    NETLINK_CB(cb->skb).portid,
+						    cb->nlh->nlmsg_seq,
+						    RTM_NEWROUTE,
+						    tb->tb_id, fa->fa_type,
+						    xkey,
+						    KEYLENGTH - fa->fa_slen,
+						    fa->fa_tos, fi, flags);
+				if (err < 0)
+					goto stop;
+			}
+
+			i_fa++;
 		}
 
-		err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
-				    cb->nlh->nlmsg_seq, RTM_NEWROUTE,
-				    tb->tb_id, fa->fa_type,
-				    xkey, KEYLENGTH - fa->fa_slen,
-				    fa->fa_tos, fa->fa_info, NLM_F_MULTI);
-		if (err < 0) {
-			cb->args[4] = i;
-			return err;
+		if (filter->dump_exceptions) {
+			err = fib_dump_info_fnhe(skb, cb, tb->tb_id, fi,
+						 &i_fa, s_fa, flags);
+			if (err < 0)
+				goto stop;
 		}
+
+next:
 		i++;
 	}
 
 	cb->args[4] = i;
 	return skb->len;
+
+stop:
+	cb->args[4] = i;
+	cb->args[5] = i_fa;
+	return err;
 }
 
 /* rcu_read_lock needs to be hold by caller from readside */
 int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
-		   struct netlink_callback *cb)
+		   struct netlink_callback *cb, struct fib_dump_filter *filter)
 {
 	struct trie *t = (struct trie *)tb->tb_data;
 	struct key_vector *l, *tp = t->kv;
@@ -2057,7 +2178,7 @@
 	while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
 		int err;
 
-		err = fn_trie_dump_leaf(l, tb, skb, cb);
+		err = fn_trie_dump_leaf(l, tb, skb, cb, filter);
 		if (err < 0) {
 			cb->args[3] = key;
 			cb->args[2] = count;
@@ -2621,14 +2742,18 @@
 	rcu_read_unlock();
 }
 
-static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
+static unsigned int fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
 {
 	unsigned int flags = 0;
 
 	if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
 		flags = RTF_REJECT;
-	if (fi && fi->fib_nh->nh_gw)
-		flags |= RTF_GATEWAY;
+	if (fi) {
+		const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
+
+		if (nhc->nhc_gw.ipv4)
+			flags |= RTF_GATEWAY;
+	}
 	if (mask == htonl(0xFFFFFFFF))
 		flags |= RTF_HOST;
 	flags |= RTF_UP;
@@ -2659,7 +2784,7 @@
 	prefix = htonl(l->key);
 
 	hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
-		const struct fib_info *fi = fa->fa_info;
+		struct fib_info *fi = fa->fa_info;
 		__be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen);
 		unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
 
@@ -2672,26 +2797,31 @@
 
 		seq_setwidth(seq, 127);
 
-		if (fi)
+		if (fi) {
+			struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
+			__be32 gw = 0;
+
+			if (nhc->nhc_gw_family == AF_INET)
+				gw = nhc->nhc_gw.ipv4;
+
 			seq_printf(seq,
 				   "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
 				   "%d\t%08X\t%d\t%u\t%u",
-				   fi->fib_dev ? fi->fib_dev->name : "*",
-				   prefix,
-				   fi->fib_nh->nh_gw, flags, 0, 0,
+				   nhc->nhc_dev ? nhc->nhc_dev->name : "*",
+				   prefix, gw, flags, 0, 0,
 				   fi->fib_priority,
 				   mask,
 				   (fi->fib_advmss ?
 				    fi->fib_advmss + 40 : 0),
 				   fi->fib_window,
 				   fi->fib_rtt >> 3);
-		else
+		} else {
 			seq_printf(seq,
 				   "*\t%08X\t%08X\t%04X\t%d\t%u\t"
 				   "%d\t%08X\t%d\t%u\t%u",
 				   prefix, 0, flags, 0, 0, 0,
 				   mask, 0, 0, 0);
-
+		}
 		seq_pad(seq, '\n');
 	}
 

diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 500a599..30fa771 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c

@@ -1,8 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/socket.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
+#include <linux/icmp.h>
 #include <linux/udp.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
@@ -120,6 +122,7 @@
 	struct guehdr *guehdr;
 	void *data;
 	u16 doffset = 0;
+	u8 proto_ctype;
 
 	if (!fou)
 		return 1;
@@ -135,7 +138,7 @@
 		break;
 
 	case 1: {
-		/* Direct encasulation of IPv4 or IPv6 */
+		/* Direct encapsulation of IPv4 or IPv6 */
 
 		int prot;
 
@@ -169,9 +172,7 @@
 	/* guehdr may change after pull */
 	guehdr = (struct guehdr *)&udp_hdr(skb)[1];
 
-	hdrlen = sizeof(struct guehdr) + optlen;
-
-	if (guehdr->version != 0 || validate_gue_flags(guehdr, optlen))
+	if (validate_gue_flags(guehdr, optlen))
 		goto drop;
 
 	hdrlen = sizeof(struct guehdr) + optlen;
@@ -211,13 +212,14 @@
 	if (unlikely(guehdr->control))
 		return gue_control_message(skb, guehdr);
 
+	proto_ctype = guehdr->proto_ctype;
 	__skb_pull(skb, sizeof(struct udphdr) + hdrlen);
 	skb_reset_transport_header(skb);
 
 	if (iptunnel_pull_offloads(skb))
 		goto drop;
 
-	return -guehdr->proto_ctype;
+	return -proto_ctype;
 
 drop:
 	kfree_skb(skb);
@@ -498,15 +500,45 @@
 	return err;
 }
 
-static int fou_add_to_port_list(struct net *net, struct fou *fou)
+static bool fou_cfg_cmp(struct fou *fou, struct fou_cfg *cfg)
+{
+	struct sock *sk = fou->sock->sk;
+	struct udp_port_cfg *udp_cfg = &cfg->udp_config;
+
+	if (fou->family != udp_cfg->family ||
+	    fou->port != udp_cfg->local_udp_port ||
+	    sk->sk_dport != udp_cfg->peer_udp_port ||
+	    sk->sk_bound_dev_if != udp_cfg->bind_ifindex)
+		return false;
+
+	if (fou->family == AF_INET) {
+		if (sk->sk_rcv_saddr != udp_cfg->local_ip.s_addr ||
+		    sk->sk_daddr != udp_cfg->peer_ip.s_addr)
+			return false;
+		else
+			return true;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		if (ipv6_addr_cmp(&sk->sk_v6_rcv_saddr, &udp_cfg->local_ip6) ||
+		    ipv6_addr_cmp(&sk->sk_v6_daddr, &udp_cfg->peer_ip6))
+			return false;
+		else
+			return true;
+#endif
+	}
+
+	return false;
+}
+
+static int fou_add_to_port_list(struct net *net, struct fou *fou,
+				struct fou_cfg *cfg)
 {
 	struct fou_net *fn = net_generic(net, fou_net_id);
 	struct fou *fout;
 
 	mutex_lock(&fn->fou_lock);
 	list_for_each_entry(fout, &fn->fou_list, list) {
-		if (fou->port == fout->port &&
-		    fou->family == fout->family) {
+		if (fou_cfg_cmp(fout, cfg)) {
 			mutex_unlock(&fn->fou_lock);
 			return -EALREADY;
 		}
@@ -584,7 +616,7 @@
 
 	sk->sk_allocation = GFP_ATOMIC;
 
-	err = fou_add_to_port_list(net, fou);
+	err = fou_add_to_port_list(net, fou, cfg);
 	if (err)
 		goto error;
 
@@ -604,14 +636,12 @@
 static int fou_destroy(struct net *net, struct fou_cfg *cfg)
 {
 	struct fou_net *fn = net_generic(net, fou_net_id);
-	__be16 port = cfg->udp_config.local_udp_port;
-	u8 family = cfg->udp_config.family;
 	int err = -EINVAL;
 	struct fou *fou;
 
 	mutex_lock(&fn->fou_lock);
 	list_for_each_entry(fou, &fn->fou_list, list) {
-		if (fou->port == port && fou->family == family) {
+		if (fou_cfg_cmp(fou, cfg)) {
 			fou_release(fou);
 			err = 0;
 			break;
@@ -625,16 +655,27 @@
 static struct genl_family fou_nl_family;
 
 static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
-	[FOU_ATTR_PORT] = { .type = NLA_U16, },
-	[FOU_ATTR_AF] = { .type = NLA_U8, },
-	[FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
-	[FOU_ATTR_TYPE] = { .type = NLA_U8, },
-	[FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, },
+	[FOU_ATTR_PORT]			= { .type = NLA_U16, },
+	[FOU_ATTR_AF]			= { .type = NLA_U8, },
+	[FOU_ATTR_IPPROTO]		= { .type = NLA_U8, },
+	[FOU_ATTR_TYPE]			= { .type = NLA_U8, },
+	[FOU_ATTR_REMCSUM_NOPARTIAL]	= { .type = NLA_FLAG, },
+	[FOU_ATTR_LOCAL_V4]		= { .type = NLA_U32, },
+	[FOU_ATTR_PEER_V4]		= { .type = NLA_U32, },
+	[FOU_ATTR_LOCAL_V6]		= { .type = sizeof(struct in6_addr), },
+	[FOU_ATTR_PEER_V6]		= { .type = sizeof(struct in6_addr), },
+	[FOU_ATTR_PEER_PORT]		= { .type = NLA_U16, },
+	[FOU_ATTR_IFINDEX]		= { .type = NLA_S32, },
 };
 
 static int parse_nl_config(struct genl_info *info,
 			   struct fou_cfg *cfg)
 {
+	bool has_local = false, has_peer = false;
+	struct nlattr *attr;
+	int ifindex;
+	__be16 port;
+
 	memset(cfg, 0, sizeof(*cfg));
 
 	cfg->udp_config.family = AF_INET;
@@ -656,8 +697,7 @@
 	}
 
 	if (info->attrs[FOU_ATTR_PORT]) {
-		__be16 port = nla_get_be16(info->attrs[FOU_ATTR_PORT]);
-
+		port = nla_get_be16(info->attrs[FOU_ATTR_PORT]);
 		cfg->udp_config.local_udp_port = port;
 	}
 
@@ -670,6 +710,52 @@
 	if (info->attrs[FOU_ATTR_REMCSUM_NOPARTIAL])
 		cfg->flags |= FOU_F_REMCSUM_NOPARTIAL;
 
+	if (cfg->udp_config.family == AF_INET) {
+		if (info->attrs[FOU_ATTR_LOCAL_V4]) {
+			attr = info->attrs[FOU_ATTR_LOCAL_V4];
+			cfg->udp_config.local_ip.s_addr = nla_get_in_addr(attr);
+			has_local = true;
+		}
+
+		if (info->attrs[FOU_ATTR_PEER_V4]) {
+			attr = info->attrs[FOU_ATTR_PEER_V4];
+			cfg->udp_config.peer_ip.s_addr = nla_get_in_addr(attr);
+			has_peer = true;
+		}
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		if (info->attrs[FOU_ATTR_LOCAL_V6]) {
+			attr = info->attrs[FOU_ATTR_LOCAL_V6];
+			cfg->udp_config.local_ip6 = nla_get_in6_addr(attr);
+			has_local = true;
+		}
+
+		if (info->attrs[FOU_ATTR_PEER_V6]) {
+			attr = info->attrs[FOU_ATTR_PEER_V6];
+			cfg->udp_config.peer_ip6 = nla_get_in6_addr(attr);
+			has_peer = true;
+		}
+#endif
+	}
+
+	if (has_peer) {
+		if (info->attrs[FOU_ATTR_PEER_PORT]) {
+			port = nla_get_be16(info->attrs[FOU_ATTR_PEER_PORT]);
+			cfg->udp_config.peer_udp_port = port;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	if (info->attrs[FOU_ATTR_IFINDEX]) {
+		if (!has_local)
+			return -EINVAL;
+
+		ifindex = nla_get_s32(info->attrs[FOU_ATTR_IFINDEX]);
+
+		cfg->udp_config.bind_ifindex = ifindex;
+	}
+
 	return 0;
 }
 
@@ -701,15 +787,37 @@
 
 static int fou_fill_info(struct fou *fou, struct sk_buff *msg)
 {
+	struct sock *sk = fou->sock->sk;
+
 	if (nla_put_u8(msg, FOU_ATTR_AF, fou->sock->sk->sk_family) ||
 	    nla_put_be16(msg, FOU_ATTR_PORT, fou->port) ||
+	    nla_put_be16(msg, FOU_ATTR_PEER_PORT, sk->sk_dport) ||
 	    nla_put_u8(msg, FOU_ATTR_IPPROTO, fou->protocol) ||
-	    nla_put_u8(msg, FOU_ATTR_TYPE, fou->type))
+	    nla_put_u8(msg, FOU_ATTR_TYPE, fou->type) ||
+	    nla_put_s32(msg, FOU_ATTR_IFINDEX, sk->sk_bound_dev_if))
 		return -1;
 
 	if (fou->flags & FOU_F_REMCSUM_NOPARTIAL)
 		if (nla_put_flag(msg, FOU_ATTR_REMCSUM_NOPARTIAL))
 			return -1;
+
+	if (fou->sock->sk->sk_family == AF_INET) {
+		if (nla_put_in_addr(msg, FOU_ATTR_LOCAL_V4, sk->sk_rcv_saddr))
+			return -1;
+
+		if (nla_put_in_addr(msg, FOU_ATTR_PEER_V4, sk->sk_daddr))
+			return -1;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		if (nla_put_in6_addr(msg, FOU_ATTR_LOCAL_V6,
+				     &sk->sk_v6_rcv_saddr))
+			return -1;
+
+		if (nla_put_in6_addr(msg, FOU_ATTR_PEER_V6, &sk->sk_v6_daddr))
+			return -1;
+#endif
+	}
+
 	return 0;
 }
 
@@ -762,7 +870,7 @@
 	ret = -ESRCH;
 	mutex_lock(&fn->fou_lock);
 	list_for_each_entry(fout, &fn->fou_list, list) {
-		if (port == fout->port && family == fout->family) {
+		if (fou_cfg_cmp(fout, &cfg)) {
 			ret = fou_dump_info(fout, info->snd_portid,
 					    info->snd_seq, 0, msg,
 					    info->genlhdr->cmd);
@@ -806,21 +914,21 @@
 static const struct genl_ops fou_nl_ops[] = {
 	{
 		.cmd = FOU_CMD_ADD,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = fou_nl_cmd_add_port,
-		.policy = fou_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
 	{
 		.cmd = FOU_CMD_DEL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = fou_nl_cmd_rm_port,
-		.policy = fou_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
 	{
 		.cmd = FOU_CMD_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = fou_nl_cmd_get_port,
 		.dumpit = fou_nl_dump,
-		.policy = fou_nl_policy,
 	},
 };
 
@@ -829,6 +937,7 @@
 	.name		= FOU_GENL_NAME,
 	.version	= FOU_GENL_VERSION,
 	.maxattr	= FOU_ATTR_MAX,
+	.policy = fou_nl_policy,
 	.netnsok	= true,
 	.module		= THIS_MODULE,
 	.ops		= fou_nl_ops,
@@ -1003,15 +1112,95 @@
 	return 0;
 }
 
+static int gue_err_proto_handler(int proto, struct sk_buff *skb, u32 info)
+{
+	const struct net_protocol *ipprot = rcu_dereference(inet_protos[proto]);
+
+	if (ipprot && ipprot->err_handler) {
+		if (!ipprot->err_handler(skb, info))
+			return 0;
+	}
+
+	return -ENOENT;
+}
+
+static int gue_err(struct sk_buff *skb, u32 info)
+{
+	int transport_offset = skb_transport_offset(skb);
+	struct guehdr *guehdr;
+	size_t len, optlen;
+	int ret;
+
+	len = sizeof(struct udphdr) + sizeof(struct guehdr);
+	if (!pskb_may_pull(skb, transport_offset + len))
+		return -EINVAL;
+
+	guehdr = (struct guehdr *)&udp_hdr(skb)[1];
+
+	switch (guehdr->version) {
+	case 0: /* Full GUE header present */
+		break;
+	case 1: {
+		/* Direct encapsulation of IPv4 or IPv6 */
+		skb_set_transport_header(skb, -(int)sizeof(struct icmphdr));
+
+		switch (((struct iphdr *)guehdr)->version) {
+		case 4:
+			ret = gue_err_proto_handler(IPPROTO_IPIP, skb, info);
+			goto out;
+#if IS_ENABLED(CONFIG_IPV6)
+		case 6:
+			ret = gue_err_proto_handler(IPPROTO_IPV6, skb, info);
+			goto out;
+#endif
+		default:
+			ret = -EOPNOTSUPP;
+			goto out;
+		}
+	}
+	default: /* Undefined version */
+		return -EOPNOTSUPP;
+	}
+
+	if (guehdr->control)
+		return -ENOENT;
+
+	optlen = guehdr->hlen << 2;
+
+	if (!pskb_may_pull(skb, transport_offset + len + optlen))
+		return -EINVAL;
+
+	guehdr = (struct guehdr *)&udp_hdr(skb)[1];
+	if (validate_gue_flags(guehdr, optlen))
+		return -EINVAL;
+
+	/* Handling exceptions for direct UDP encapsulation in GUE would lead to
+	 * recursion. Besides, this kind of encapsulation can't even be
+	 * configured currently. Discard this.
+	 */
+	if (guehdr->proto_ctype == IPPROTO_UDP ||
+	    guehdr->proto_ctype == IPPROTO_UDPLITE)
+		return -EOPNOTSUPP;
+
+	skb_set_transport_header(skb, -(int)sizeof(struct icmphdr));
+	ret = gue_err_proto_handler(guehdr->proto_ctype, skb, info);
+
+out:
+	skb_set_transport_header(skb, transport_offset);
+	return ret;
+}
+
 
 static const struct ip_tunnel_encap_ops fou_iptun_ops = {
 	.encap_hlen = fou_encap_hlen,
 	.build_header = fou_build_header,
+	.err_handler = gue_err,
 };
 
 static const struct ip_tunnel_encap_ops gue_iptun_ops = {
 	.encap_hlen = gue_encap_hlen,
 	.build_header = gue_build_header,
+	.err_handler = gue_err,
 };
 
 static int ip_tunnel_encap_add_fou_ops(void)

diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index b798862..44bfeec 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c

@@ -1,13 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	GRE over IPv4 demultiplexer driver
  *
  *	Authors: Dmitry Kozlov (xeb@mail.ru)
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -25,6 +20,7 @@
 #include <linux/spinlock.h>
 #include <net/protocol.h>
 #include <net/gre.h>
+#include <net/erspan.h>
 
 #include <net/icmp.h>
 #include <net/route.h>
@@ -86,13 +82,14 @@
 
 	options = (__be32 *)(greh + 1);
 	if (greh->flags & GRE_CSUM) {
-		if (skb_checksum_simple_validate(skb)) {
+		if (!skb_checksum_simple_validate(skb)) {
+			skb_checksum_try_convert(skb, IPPROTO_GRE,
+						 null_compute_pseudo);
+		} else if (csum_err) {
 			*csum_err = true;
 			return -EINVAL;
 		}
 
-		skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
-					 null_compute_pseudo);
 		options++;
 	}
 
@@ -118,6 +115,22 @@
 			hdr_len += 4;
 	}
 	tpi->hdr_len = hdr_len;
+
+	/* ERSPAN ver 1 and 2 protocol sets GRE key field
+	 * to 0 and sets the configured key in the
+	 * inner erspan header field
+	 */
+	if (greh->protocol == htons(ETH_P_ERSPAN) ||
+	    greh->protocol == htons(ETH_P_ERSPAN2)) {
+		struct erspan_base_hdr *ershdr;
+
+		if (!pskb_may_pull(skb, nhs + hdr_len + sizeof(*ershdr)))
+			return -EINVAL;
+
+		ershdr = (struct erspan_base_hdr *)options;
+		tpi->key = cpu_to_be32(get_session_id(ershdr));
+	}
+
 	return hdr_len;
 }
 EXPORT_SYMBOL(gre_parse_header);
@@ -150,20 +163,25 @@
 	return NET_RX_DROP;
 }
 
-static void gre_err(struct sk_buff *skb, u32 info)
+static int gre_err(struct sk_buff *skb, u32 info)
 {
 	const struct gre_protocol *proto;
 	const struct iphdr *iph = (const struct iphdr *)skb->data;
 	u8 ver = skb->data[(iph->ihl<<2) + 1]&0x7f;
+	int err = 0;
 
 	if (ver >= GREPROTO_MAX)
-		return;
+		return -EINVAL;
 
 	rcu_read_lock();
 	proto = rcu_dereference(gre_proto[ver]);
 	if (proto && proto->err_handler)
 		proto->err_handler(skb, info);
+	else
+		err = -EPROTONOSUPPORT;
 	rcu_read_unlock();
+
+	return err;
 }
 
 static const struct net_protocol net_gre_protocol = {

diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 6c63524..4de7e96 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	IPV4 GSO/GRO offload support
  *	Linux INET implementation
  *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
  *	GRE GSO support
  */
 

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 695979b..4298aae 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	NET3:	Implementation of the ICMP protocol layer.
  *
  *		Alan Cox, <alan@lxorguk.ukuu.org.uk>
  *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
  *	Some of the function names and the icmp unreach table for this
  *	module were derived from [icmp.c 1.0.11 06/02/93] by
  *	Ross Biro, Fred N. van Kempen, Mark Evans, Alan Cox, Gerhard Koerting.
@@ -59,7 +55,6 @@
  *
  *	- Should use skb_pull() instead of all the manual checking.
  *	  This would also greatly simply some upper layer error handlers. --AK
- *
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -206,7 +201,7 @@
  */
 static struct sock *icmp_sk(struct net *net)
 {
-	return *this_cpu_ptr(net->ipv4.icmp_sk);
+	return this_cpu_read(*net->ipv4.icmp_sk);
 }
 
 /* Called with BH disabled */
@@ -570,7 +565,8 @@
  *			MUST reply to only the first fragment.
  */
 
-void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
+		 const struct ip_options *opt)
 {
 	struct iphdr *iph;
 	int room;
@@ -586,7 +582,13 @@
 
 	if (!rt)
 		goto out;
-	net = dev_net(rt->dst.dev);
+
+	if (rt->dst.dev)
+		net = dev_net(rt->dst.dev);
+	else if (skb_in->dev)
+		net = dev_net(skb_in->dev);
+	else
+		goto out;
 
 	/*
 	 *	Find the original header. It is expected to be valid, of course.
@@ -691,7 +693,7 @@
 					  iph->tos;
 	mark = IP4_REPLY_MARK(net, skb_in->mark);
 
-	if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in))
+	if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt))
 		goto out_unlock;
 
 
@@ -742,7 +744,7 @@
 	local_bh_enable();
 out:;
 }
-EXPORT_SYMBOL(icmp_send);
+EXPORT_SYMBOL(__icmp_send);
 
 
 static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
@@ -906,7 +908,7 @@
 		return false;
 	}
 
-	icmp_socket_deliver(skb, icmp_hdr(skb)->un.gateway);
+	icmp_socket_deliver(skb, ntohl(icmp_hdr(skb)->un.gateway));
 	return true;
 }
 
@@ -1079,7 +1081,7 @@
 	goto drop;
 }
 
-void icmp_err(struct sk_buff *skb, u32 info)
+int icmp_err(struct sk_buff *skb, u32 info)
 {
 	struct iphdr *iph = (struct iphdr *)skb->data;
 	int offset = iph->ihl<<2;
@@ -1094,13 +1096,15 @@
 	 */
 	if (icmph->type != ICMP_ECHOREPLY) {
 		ping_err(skb, offset, info);
-		return;
+		return 0;
 	}
 
 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
-		ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ICMP, 0);
+		ipv4_update_pmtu(skb, net, info, 0, IPPROTO_ICMP);
 	else if (type == ICMP_REDIRECT)
-		ipv4_redirect(skb, net, 0, 0, IPPROTO_ICMP, 0);
+		ipv4_redirect(skb, net, 0, IPPROTO_ICMP);
+
+	return 0;
 }
 
 /*
@@ -1243,9 +1247,7 @@
 	return 0;
 
 fail:
-	for_each_possible_cpu(i)
-		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i));
-	free_percpu(net->ipv4.icmp_sk);
+	icmp_sk_exit(net);
 	return err;
 }
 

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 4da3944..480d0b2 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Linux NET3:	Internet Group Management Protocol  [IGMP]
  *
@@ -11,11 +12,6 @@
  *	Authors:
  *		Alan Cox <alan@lxorguk.ukuu.org.uk>
  *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
  *	Fixes:
  *
  *		Alan Cox	:	Added lots of __inline__ to optimise
@@ -111,13 +107,10 @@
 #ifdef CONFIG_IP_MULTICAST
 /* Parameter names and values are taken from igmp-v2-06 draft */
 
-#define IGMP_V1_ROUTER_PRESENT_TIMEOUT		(400*HZ)
-#define IGMP_V2_ROUTER_PRESENT_TIMEOUT		(400*HZ)
 #define IGMP_V2_UNSOLICITED_REPORT_INTERVAL	(10*HZ)
 #define IGMP_V3_UNSOLICITED_REPORT_INTERVAL	(1*HZ)
+#define IGMP_QUERY_INTERVAL			(125*HZ)
 #define IGMP_QUERY_RESPONSE_INTERVAL		(10*HZ)
-#define IGMP_QUERY_ROBUSTNESS_VARIABLE		2
-
 
 #define IGMP_INITIAL_REPORT_DELAY		(1)
 
@@ -162,7 +155,8 @@
 	return interval_jiffies;
 }
 
-static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im);
+static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im,
+			      gfp_t gfp);
 static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im);
 static void igmpv3_clear_delrec(struct in_device *in_dev);
 static int sf_setstate(struct ip_mc_list *pmc);
@@ -190,6 +184,17 @@
 	     pmc != NULL;					\
 	     pmc = rtnl_dereference(pmc->next_rcu))
 
+static void ip_sf_list_clear_all(struct ip_sf_list *psf)
+{
+	struct ip_sf_list *next;
+
+	while (psf) {
+		next = psf->sf_next;
+		kfree(psf);
+		psf = next;
+	}
+}
+
 #ifdef CONFIG_IP_MULTICAST
 
 /*
@@ -327,14 +332,15 @@
 				 const struct flowi4 *fl4)
 {
 	struct in_device *in_dev = __in_dev_get_rcu(dev);
+	const struct in_ifaddr *ifa;
 
 	if (!in_dev)
 		return htonl(INADDR_ANY);
 
-	for_ifa(in_dev) {
+	in_dev_for_each_ifa_rcu(ifa, in_dev) {
 		if (fl4->saddr == ifa->ifa_local)
 			return fl4->saddr;
-	} endfor_ifa(in_dev);
+	}
 
 	return htonl(INADDR_ANY);
 }
@@ -635,6 +641,13 @@
 	}
 }
 
+static void kfree_pmc(struct ip_mc_list *pmc)
+{
+	ip_sf_list_clear_all(pmc->sources);
+	ip_sf_list_clear_all(pmc->tomb);
+	kfree(pmc);
+}
+
 static void igmpv3_send_cr(struct in_device *in_dev)
 {
 	struct ip_mc_list *pmc, *pmc_prev, *pmc_next;
@@ -671,7 +684,7 @@
 			else
 				in_dev->mc_tomb = pmc_next;
 			in_dev_put(pmc->interface);
-			kfree(pmc);
+			kfree_pmc(pmc);
 		} else
 			pmc_prev = pmc;
 	}
@@ -935,13 +948,15 @@
 
 			max_delay = IGMP_QUERY_RESPONSE_INTERVAL;
 			in_dev->mr_v1_seen = jiffies +
-				IGMP_V1_ROUTER_PRESENT_TIMEOUT;
+				(in_dev->mr_qrv * in_dev->mr_qi) +
+				in_dev->mr_qri;
 			group = 0;
 		} else {
 			/* v2 router present */
 			max_delay = ih->code*(HZ/IGMP_TIMER_SCALE);
 			in_dev->mr_v2_seen = jiffies +
-				IGMP_V2_ROUTER_PRESENT_TIMEOUT;
+				(in_dev->mr_qrv * in_dev->mr_qi) +
+				in_dev->mr_qri;
 		}
 		/* cancel the interface change timer */
 		in_dev->mr_ifc_count = 0;
@@ -981,8 +996,21 @@
 		if (!max_delay)
 			max_delay = 1;	/* can't mod w/ 0 */
 		in_dev->mr_maxdelay = max_delay;
-		if (ih3->qrv)
-			in_dev->mr_qrv = ih3->qrv;
+
+		/* RFC3376, 4.1.6. QRV and 4.1.7. QQIC, when the most recently
+		 * received value was zero, use the default or statically
+		 * configured value.
+		 */
+		in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv;
+		in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;
+
+		/* RFC3376, 8.3. Query Response Interval:
+		 * The number of seconds represented by the [Query Response
+		 * Interval] must be less than the [Query Interval].
+		 */
+		if (in_dev->mr_qri >= in_dev->mr_qi)
+			in_dev->mr_qri = (in_dev->mr_qi/HZ - 1)*HZ;
+
 		if (!group) { /* general query */
 			if (ih3->nsrcs)
 				return true;	/* no sources allowed */
@@ -1133,7 +1161,8 @@
 /*
  * deleted ip_mc_list manipulation
  */
-static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
+static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im,
+			      gfp_t gfp)
 {
 	struct ip_mc_list *pmc;
 	struct net *net = dev_net(in_dev->dev);
@@ -1144,7 +1173,7 @@
 	 * for deleted items allows change reports to use common code with
 	 * non-deleted or query-response MCA's.
 	 */
-	pmc = kzalloc(sizeof(*pmc), GFP_KERNEL);
+	pmc = kzalloc(sizeof(*pmc), gfp);
 	if (!pmc)
 		return;
 	spin_lock_init(&pmc->lock);
@@ -1200,15 +1229,15 @@
 	if (pmc) {
 		im->interface = pmc->interface;
 		if (im->sfmode == MCAST_INCLUDE) {
-			im->tomb = pmc->tomb;
-			im->sources = pmc->sources;
+			swap(im->tomb, pmc->tomb);
+			swap(im->sources, pmc->sources);
 			for (psf = im->sources; psf; psf = psf->sf_next)
 				psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
 		} else {
 			im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
 		}
 		in_dev_put(pmc->interface);
-		kfree(pmc);
+		kfree_pmc(pmc);
 	}
 	spin_unlock_bh(&im->lock);
 }
@@ -1229,27 +1258,24 @@
 		nextpmc = pmc->next;
 		ip_mc_clear_src(pmc);
 		in_dev_put(pmc->interface);
-		kfree(pmc);
+		kfree_pmc(pmc);
 	}
 	/* clear dead sources, too */
 	rcu_read_lock();
 	for_each_pmc_rcu(in_dev, pmc) {
-		struct ip_sf_list *psf, *psf_next;
+		struct ip_sf_list *psf;
 
 		spin_lock_bh(&pmc->lock);
 		psf = pmc->tomb;
 		pmc->tomb = NULL;
 		spin_unlock_bh(&pmc->lock);
-		for (; psf; psf = psf_next) {
-			psf_next = psf->sf_next;
-			kfree(psf);
-		}
+		ip_sf_list_clear_all(psf);
 	}
 	rcu_read_unlock();
 }
 #endif
 
-static void igmp_group_dropped(struct ip_mc_list *im)
+static void __igmp_group_dropped(struct ip_mc_list *im, gfp_t gfp)
 {
 	struct in_device *in_dev = im->interface;
 #ifdef CONFIG_IP_MULTICAST
@@ -1280,13 +1306,18 @@
 			return;
 		}
 		/* IGMPv3 */
-		igmpv3_add_delrec(in_dev, im);
+		igmpv3_add_delrec(in_dev, im, gfp);
 
 		igmp_ifc_event(in_dev);
 	}
 #endif
 }
 
+static void igmp_group_dropped(struct ip_mc_list *im)
+{
+	__igmp_group_dropped(im, GFP_KERNEL);
+}
+
 static void igmp_group_added(struct ip_mc_list *im)
 {
 	struct in_device *in_dev = im->interface;
@@ -1388,8 +1419,8 @@
 /*
  *	A socket has joined a multicast group on device dev.
  */
-static void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr,
-			      unsigned int mode)
+static void ____ip_mc_inc_group(struct in_device *in_dev, __be32 addr,
+				unsigned int mode, gfp_t gfp)
 {
 	struct ip_mc_list *im;
 
@@ -1403,7 +1434,7 @@
 		}
 	}
 
-	im = kzalloc(sizeof(*im), GFP_KERNEL);
+	im = kzalloc(sizeof(*im), gfp);
 	if (!im)
 		goto out;
 
@@ -1436,9 +1467,15 @@
 	return;
 }
 
+void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, gfp_t gfp)
+{
+	____ip_mc_inc_group(in_dev, addr, MCAST_EXCLUDE, gfp);
+}
+EXPORT_SYMBOL(__ip_mc_inc_group);
+
 void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 {
-	__ip_mc_inc_group(in_dev, addr, MCAST_EXCLUDE);
+	__ip_mc_inc_group(in_dev, addr, GFP_KERNEL);
 }
 EXPORT_SYMBOL(ip_mc_inc_group);
 
@@ -1481,22 +1518,22 @@
 
 	len += sizeof(struct igmpv3_report);
 
-	return pskb_may_pull(skb, len) ? 0 : -EINVAL;
+	return ip_mc_may_pull(skb, len) ? 0 : -EINVAL;
 }
 
 static int ip_mc_check_igmp_query(struct sk_buff *skb)
 {
-	unsigned int len = skb_transport_offset(skb);
-
-	len += sizeof(struct igmphdr);
-	if (skb->len < len)
-		return -EINVAL;
+	unsigned int transport_len = ip_transport_len(skb);
+	unsigned int len;
 
 	/* IGMPv{1,2}? */
-	if (skb->len != len) {
+	if (transport_len != sizeof(struct igmphdr)) {
 		/* or IGMPv3? */
-		len += sizeof(struct igmpv3_query) - sizeof(struct igmphdr);
-		if (skb->len < len || !pskb_may_pull(skb, len))
+		if (transport_len < sizeof(struct igmpv3_query))
+			return -EINVAL;
+
+		len = skb_transport_offset(skb) + sizeof(struct igmpv3_query);
+		if (!ip_mc_may_pull(skb, len))
 			return -EINVAL;
 	}
 
@@ -1516,7 +1553,6 @@
 	case IGMP_HOST_LEAVE_MESSAGE:
 	case IGMP_HOST_MEMBERSHIP_REPORT:
 	case IGMPV2_HOST_MEMBERSHIP_REPORT:
-		/* fall through */
 		return 0;
 	case IGMPV3_HOST_MEMBERSHIP_REPORT:
 		return ip_mc_check_igmp_reportv3(skb);
@@ -1532,47 +1568,29 @@
 	return skb_checksum_simple_validate(skb);
 }
 
-static int __ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed)
-
+static int ip_mc_check_igmp_csum(struct sk_buff *skb)
 {
-	struct sk_buff *skb_chk;
-	unsigned int transport_len;
 	unsigned int len = skb_transport_offset(skb) + sizeof(struct igmphdr);
-	int ret = -EINVAL;
+	unsigned int transport_len = ip_transport_len(skb);
+	struct sk_buff *skb_chk;
 
-	transport_len = ntohs(ip_hdr(skb)->tot_len) - ip_hdrlen(skb);
+	if (!ip_mc_may_pull(skb, len))
+		return -EINVAL;
 
 	skb_chk = skb_checksum_trimmed(skb, transport_len,
 				       ip_mc_validate_checksum);
 	if (!skb_chk)
-		goto err;
+		return -EINVAL;
 
-	if (!pskb_may_pull(skb_chk, len))
-		goto err;
-
-	ret = ip_mc_check_igmp_msg(skb_chk);
-	if (ret)
-		goto err;
-
-	if (skb_trimmed)
-		*skb_trimmed = skb_chk;
-	/* free now unneeded clone */
-	else if (skb_chk != skb)
+	if (skb_chk != skb)
 		kfree_skb(skb_chk);
 
-	ret = 0;
-
-err:
-	if (ret && skb_chk && skb_chk != skb)
-		kfree_skb(skb_chk);
-
-	return ret;
+	return 0;
 }
 
 /**
  * ip_mc_check_igmp - checks whether this is a sane IGMP packet
  * @skb: the skb to validate
- * @skb_trimmed: to store an skb pointer trimmed to IPv4 packet tail (optional)
  *
  * Checks whether an IPv4 packet is a valid IGMP packet. If so sets
  * skb transport header accordingly and returns zero.
@@ -1582,18 +1600,10 @@
  * -ENOMSG: IP header validation succeeded but it is not an IGMP packet.
  * -ENOMEM: A memory allocation failure happened.
  *
- * Optionally, an skb pointer might be provided via skb_trimmed (or set it
- * to NULL): After parsing an IGMP packet successfully it will point to
- * an skb which has its tail aligned to the IP packet end. This might
- * either be the originally provided skb or a trimmed, cloned version if
- * the skb frame had data beyond the IP packet. A cloned skb allows us
- * to leave the original skb and its full frame unchanged (which might be
- * desirable for layer 2 frame jugglers).
- *
  * Caller needs to set the skb network header and free any returned skb if it
  * differs from the provided skb.
  */
-int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed)
+int ip_mc_check_igmp(struct sk_buff *skb)
 {
 	int ret = ip_mc_check_iphdr(skb);
 
@@ -1603,7 +1613,11 @@
 	if (ip_hdr(skb)->protocol != IPPROTO_IGMP)
 		return -ENOMSG;
 
-	return __ip_mc_check_igmp(skb, skb_trimmed);
+	ret = ip_mc_check_igmp_csum(skb);
+	if (ret < 0)
+		return ret;
+
+	return ip_mc_check_igmp_msg(skb);
 }
 EXPORT_SYMBOL(ip_mc_check_igmp);
 
@@ -1644,7 +1658,7 @@
  *	A socket has left a multicast group on device dev
  */
 
-void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
+void __ip_mc_dec_group(struct in_device *in_dev, __be32 addr, gfp_t gfp)
 {
 	struct ip_mc_list *i;
 	struct ip_mc_list __rcu **ip;
@@ -1659,7 +1673,7 @@
 				ip_mc_hash_remove(in_dev, i);
 				*ip = i->next_rcu;
 				in_dev->mc_count--;
-				igmp_group_dropped(i);
+				__igmp_group_dropped(i, gfp);
 				ip_mc_clear_src(i);
 
 				if (!in_dev->dead)
@@ -1672,7 +1686,7 @@
 		}
 	}
 }
-EXPORT_SYMBOL(ip_mc_dec_group);
+EXPORT_SYMBOL(__ip_mc_dec_group);
 
 /* Device changing type */
 
@@ -1723,18 +1737,30 @@
 	ip_mc_dec_group(in_dev, IGMP_ALL_HOSTS);
 }
 
+#ifdef CONFIG_IP_MULTICAST
+static void ip_mc_reset(struct in_device *in_dev)
+{
+	struct net *net = dev_net(in_dev->dev);
+
+	in_dev->mr_qi = IGMP_QUERY_INTERVAL;
+	in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;
+	in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
+}
+#else
+static void ip_mc_reset(struct in_device *in_dev)
+{
+}
+#endif
+
 void ip_mc_init_dev(struct in_device *in_dev)
 {
-#ifdef CONFIG_IP_MULTICAST
-	struct net *net = dev_net(in_dev->dev);
-#endif
 	ASSERT_RTNL();
 
 #ifdef CONFIG_IP_MULTICAST
 	timer_setup(&in_dev->mr_gq_timer, igmp_gq_timer_expire, 0);
 	timer_setup(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, 0);
-	in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
 #endif
+	ip_mc_reset(in_dev);
 
 	spin_lock_init(&in_dev->mc_tomb_lock);
 }
@@ -1744,15 +1770,10 @@
 void ip_mc_up(struct in_device *in_dev)
 {
 	struct ip_mc_list *pmc;
-#ifdef CONFIG_IP_MULTICAST
-	struct net *net = dev_net(in_dev->dev);
-#endif
 
 	ASSERT_RTNL();
 
-#ifdef CONFIG_IP_MULTICAST
-	in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
-#endif
+	ip_mc_reset(in_dev);
 	ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
 
 	for_each_pmc_rtnl(in_dev, pmc) {
@@ -2114,7 +2135,7 @@
 
 static void ip_mc_clear_src(struct ip_mc_list *pmc)
 {
-	struct ip_sf_list *psf, *nextpsf, *tomb, *sources;
+	struct ip_sf_list *tomb, *sources;
 
 	spin_lock_bh(&pmc->lock);
 	tomb = pmc->tomb;
@@ -2126,14 +2147,8 @@
 	pmc->sfcount[MCAST_EXCLUDE] = 1;
 	spin_unlock_bh(&pmc->lock);
 
-	for (psf = tomb; psf; psf = nextpsf) {
-		nextpsf = psf->sf_next;
-		kfree(psf);
-	}
-	for (psf = sources; psf; psf = nextpsf) {
-		nextpsf = psf->sf_next;
-		kfree(psf);
-	}
+	ip_sf_list_clear_all(tomb);
+	ip_sf_list_clear_all(sources);
 }
 
 /* Join a multicast group
@@ -2182,7 +2197,7 @@
 	iml->sflist = NULL;
 	iml->sfmode = mode;
 	rcu_assign_pointer(inet->mc_list, iml);
-	__ip_mc_inc_group(in_dev, addr, mode);
+	____ip_mc_inc_group(in_dev, addr, mode, GFP_KERNEL);
 	err = 0;
 done:
 	return err;

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 15e7f79..eb30fc1 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -6,11 +7,6 @@
  *		Support for INET connection oriented protocols.
  *
  * Authors:	See the TCP sources
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or(at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -183,7 +179,9 @@
 	int i, low, high, attempt_half;
 	struct inet_bind_bucket *tb;
 	u32 remaining, offset;
+	int l3mdev;
 
+	l3mdev = inet_sk_bound_l3mdev(sk);
 	attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
 other_half_scan:
 	inet_get_local_port_range(net, &low, &high);
@@ -219,7 +217,8 @@
 						  hinfo->bhash_size)];
 		spin_lock_bh(&head->lock);
 		inet_bind_bucket_for_each(tb, &head->chain)
-			if (net_eq(ib_net(tb), net) && tb->port == port) {
+			if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+			    tb->port == port) {
 				if (!inet_csk_bind_conflict(sk, tb, false, false))
 					goto success;
 				goto next_port;
@@ -293,6 +292,9 @@
 	struct net *net = sock_net(sk);
 	struct inet_bind_bucket *tb = NULL;
 	kuid_t uid = sock_i_uid(sk);
+	int l3mdev;
+
+	l3mdev = inet_sk_bound_l3mdev(sk);
 
 	if (!port) {
 		head = inet_csk_find_open_port(sk, &tb, &port);
@@ -306,11 +308,12 @@
 					  hinfo->bhash_size)];
 	spin_lock_bh(&head->lock);
 	inet_bind_bucket_for_each(tb, &head->chain)
-		if (net_eq(ib_net(tb), net) && tb->port == port)
+		if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+		    tb->port == port)
 			goto tb_found;
 tb_not_found:
 	tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
-				     net, head, port);
+				     net, head, port, l3mdev);
 	if (!tb)
 		goto fail_unlock;
 tb_found:
@@ -646,8 +649,7 @@
 EXPORT_SYMBOL(inet_rtx_syn_ack);
 
 /* return true if req was found in the ehash table */
-static bool reqsk_queue_unlink(struct request_sock_queue *queue,
-			       struct request_sock *req)
+static bool reqsk_queue_unlink(struct request_sock *req)
 {
 	struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo;
 	bool found = false;
@@ -666,7 +668,7 @@
 
 void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
 {
-	if (reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req)) {
+	if (reqsk_queue_unlink(req)) {
 		reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
 		reqsk_put(req);
 	}
@@ -749,10 +751,6 @@
 static void reqsk_queue_hash_req(struct request_sock *req,
 				 unsigned long timeout)
 {
-	req->num_retrans = 0;
-	req->num_timeout = 0;
-	req->sk = NULL;
-
 	timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
 	mod_timer(&req->rsk_timer, jiffies + timeout);
 
@@ -874,7 +872,6 @@
 
 	reqsk_queue_alloc(&icsk->icsk_accept_queue);
 
-	sk->sk_max_ack_backlog = backlog;
 	sk->sk_ack_backlog = 0;
 	inet_csk_delack_init(sk);
 
@@ -909,7 +906,7 @@
 	percpu_counter_inc(sk->sk_prot->orphan_count);
 
 	if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
-		BUG_ON(tcp_sk(child)->fastopen_rsk != req);
+		BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req);
 		BUG_ON(sk != req->rsk_listener);
 
 		/* Paranoid, to prevent race condition if
@@ -918,7 +915,7 @@
 		 * Also to satisfy an assertion in
 		 * tcp_v4_destroy_sock().
 		 */
-		tcp_sk(child)->fastopen_rsk = NULL;
+		RCU_INIT_POINTER(tcp_sk(child)->fastopen_rsk, NULL);
 	}
 	inet_csk_destroy_sock(child);
 }
@@ -937,7 +934,7 @@
 		req->sk = child;
 		req->dl_next = NULL;
 		if (queue->rskq_accept_head == NULL)
-			queue->rskq_accept_head = req;
+			WRITE_ONCE(queue->rskq_accept_head, req);
 		else
 			queue->rskq_accept_tail->dl_next = req;
 		queue->rskq_accept_tail = req;

diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 4e5bc4b..7dc79b9 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * inet_diag.c	Module for monitoring INET transport protocols sockets.
  *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 #include <linux/kernel.h>
@@ -108,6 +104,7 @@
 		+ nla_total_size(1) /* INET_DIAG_TOS */
 		+ nla_total_size(1) /* INET_DIAG_TCLASS */
 		+ nla_total_size(4) /* INET_DIAG_MARK */
+		+ nla_total_size(4) /* INET_DIAG_CLASS_ID */
 		+ nla_total_size(sizeof(struct inet_diag_meminfo))
 		+ nla_total_size(sizeof(struct inet_diag_msg))
 		+ nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
@@ -196,7 +193,7 @@
 	if (ext & (1 << (INET_DIAG_MEMINFO - 1))) {
 		struct inet_diag_meminfo minfo = {
 			.idiag_rmem = sk_rmem_alloc_get(sk),
-			.idiag_wmem = sk->sk_wmem_queued,
+			.idiag_wmem = READ_ONCE(sk->sk_wmem_queued),
 			.idiag_fmem = sk->sk_forward_alloc,
 			.idiag_tmem = sk_wmem_alloc_get(sk),
 		};
@@ -287,12 +284,19 @@
 			goto errout;
 	}
 
-	if (ext & (1 << (INET_DIAG_CLASS_ID - 1))) {
+	if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) ||
+	    ext & (1 << (INET_DIAG_TCLASS - 1))) {
 		u32 classid = 0;
 
 #ifdef CONFIG_SOCK_CGROUP_DATA
 		classid = sock_cgroup_classid(&sk->sk_cgrp_data);
 #endif
+		/* Fallback to socket priority if class id isn't set.
+		 * Classful qdiscs use it as direct reference to class.
+		 * For cgroup2 classid is always zero.
+		 */
+		if (!classid)
+			classid = sk->sk_priority;
 
 		if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid))
 			goto errout;
@@ -998,7 +1002,9 @@
 			if (!inet_diag_bc_sk(bc, sk))
 				goto next_normal;
 
-			sock_hold(sk);
+			if (!refcount_inc_not_zero(&sk->sk_refcnt))
+				goto next_normal;
+
 			num_arr[accum] = num;
 			sk_arr[accum] = sk;
 			if (++accum == SKARR_SZ)

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 760a9e5..10d3173 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * inet fragments management
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * 		Authors:	Pavel Emelyanov <xemul@openvz.org>
  *				Started as consolidation of ipv4/ip_fragment.c,
  *				ipv6/reassembly. and ipv6 nf conntrack reassembly
@@ -25,6 +21,62 @@
 #include <net/sock.h>
 #include <net/inet_frag.h>
 #include <net/inet_ecn.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+
+/* Use skb->cb to track consecutive/adjacent fragments coming at
+ * the end of the queue. Nodes in the rb-tree queue will
+ * contain "runs" of one or more adjacent fragments.
+ *
+ * Invariants:
+ * - next_frag is NULL at the tail of a "run";
+ * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
+ */
+struct ipfrag_skb_cb {
+	union {
+		struct inet_skb_parm	h4;
+		struct inet6_skb_parm	h6;
+	};
+	struct sk_buff		*next_frag;
+	int			frag_run_len;
+};
+
+#define FRAG_CB(skb)		((struct ipfrag_skb_cb *)((skb)->cb))
+
+static void fragcb_clear(struct sk_buff *skb)
+{
+	RB_CLEAR_NODE(&skb->rbnode);
+	FRAG_CB(skb)->next_frag = NULL;
+	FRAG_CB(skb)->frag_run_len = skb->len;
+}
+
+/* Append skb to the last "run". */
+static void fragrun_append_to_last(struct inet_frag_queue *q,
+				   struct sk_buff *skb)
+{
+	fragcb_clear(skb);
+
+	FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
+	FRAG_CB(q->fragments_tail)->next_frag = skb;
+	q->fragments_tail = skb;
+}
+
+/* Create a new "run" with the skb. */
+static void fragrun_create(struct inet_frag_queue *q, struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
+	fragcb_clear(skb);
+
+	if (q->last_run_head)
+		rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
+			     &q->last_run_head->rbnode.rb_right);
+	else
+		rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
+	rb_insert_color(&skb->rbnode, &q->rb_fragments);
+
+	q->fragments_tail = skb;
+	q->last_run_head = skb;
+}
 
 /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
  * Value : 0xff if frame should be dropped.
@@ -54,48 +106,90 @@
 	if (!f->frags_cachep)
 		return -ENOMEM;
 
+	refcount_set(&f->refcnt, 1);
+	init_completion(&f->completion);
 	return 0;
 }
 EXPORT_SYMBOL(inet_frags_init);
 
 void inet_frags_fini(struct inet_frags *f)
 {
-	/* We must wait that all inet_frag_destroy_rcu() have completed. */
-	rcu_barrier();
+	if (refcount_dec_and_test(&f->refcnt))
+		complete(&f->completion);
+
+	wait_for_completion(&f->completion);
 
 	kmem_cache_destroy(f->frags_cachep);
 	f->frags_cachep = NULL;
 }
 EXPORT_SYMBOL(inet_frags_fini);
 
+/* called from rhashtable_free_and_destroy() at netns_frags dismantle */
 static void inet_frags_free_cb(void *ptr, void *arg)
 {
 	struct inet_frag_queue *fq = ptr;
+	int count;
 
-	/* If we can not cancel the timer, it means this frag_queue
-	 * is already disappearing, we have nothing to do.
-	 * Otherwise, we own a refcount until the end of this function.
-	 */
-	if (!del_timer(&fq->timer))
-		return;
+	count = del_timer_sync(&fq->timer) ? 1 : 0;
 
 	spin_lock_bh(&fq->lock);
 	if (!(fq->flags & INET_FRAG_COMPLETE)) {
 		fq->flags |= INET_FRAG_COMPLETE;
-		refcount_dec(&fq->refcnt);
+		count++;
+	} else if (fq->flags & INET_FRAG_HASH_DEAD) {
+		count++;
 	}
 	spin_unlock_bh(&fq->lock);
 
-	inet_frag_put(fq);
+	if (refcount_sub_and_test(count, &fq->refcnt))
+		inet_frag_destroy(fq);
 }
 
-void inet_frags_exit_net(struct netns_frags *nf)
+static void fqdir_work_fn(struct work_struct *work)
 {
-	nf->high_thresh = 0; /* prevent creation of new frags */
+	struct fqdir *fqdir = container_of(work, struct fqdir, destroy_work);
+	struct inet_frags *f = fqdir->f;
 
-	rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL);
+	rhashtable_free_and_destroy(&fqdir->rhashtable, inet_frags_free_cb, NULL);
+
+	/* We need to make sure all ongoing call_rcu(..., inet_frag_destroy_rcu)
+	 * have completed, since they need to dereference fqdir.
+	 * Would it not be nice to have kfree_rcu_barrier() ? :)
+	 */
+	rcu_barrier();
+
+	if (refcount_dec_and_test(&f->refcnt))
+		complete(&f->completion);
+
+	kfree(fqdir);
 }
-EXPORT_SYMBOL(inet_frags_exit_net);
+
+int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net)
+{
+	struct fqdir *fqdir = kzalloc(sizeof(*fqdir), GFP_KERNEL);
+	int res;
+
+	if (!fqdir)
+		return -ENOMEM;
+	fqdir->f = f;
+	fqdir->net = net;
+	res = rhashtable_init(&fqdir->rhashtable, &fqdir->f->rhash_params);
+	if (res < 0) {
+		kfree(fqdir);
+		return res;
+	}
+	refcount_inc(&f->refcnt);
+	*fqdirp = fqdir;
+	return 0;
+}
+EXPORT_SYMBOL(fqdir_init);
+
+void fqdir_exit(struct fqdir *fqdir)
+{
+	INIT_WORK(&fqdir->destroy_work, fqdir_work_fn);
+	queue_work(system_wq, &fqdir->destroy_work);
+}
+EXPORT_SYMBOL(fqdir_exit);
 
 void inet_frag_kill(struct inet_frag_queue *fq)
 {
@@ -103,11 +197,23 @@
 		refcount_dec(&fq->refcnt);
 
 	if (!(fq->flags & INET_FRAG_COMPLETE)) {
-		struct netns_frags *nf = fq->net;
+		struct fqdir *fqdir = fq->fqdir;
 
 		fq->flags |= INET_FRAG_COMPLETE;
-		rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params);
-		refcount_dec(&fq->refcnt);
+		rcu_read_lock();
+		/* The RCU read lock provides a memory barrier
+		 * guaranteeing that if fqdir->dead is false then
+		 * the hash table destruction will not start until
+		 * after we unlock.  Paired with inet_frags_exit_net().
+		 */
+		if (!fqdir->dead) {
+			rhashtable_remove_fast(&fqdir->rhashtable, &fq->node,
+					       fqdir->f->rhash_params);
+			refcount_dec(&fq->refcnt);
+		} else {
+			fq->flags |= INET_FRAG_HASH_DEAD;
+		}
+		rcu_read_unlock();
 	}
 }
 EXPORT_SYMBOL(inet_frag_kill);
@@ -116,17 +222,38 @@
 {
 	struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
 						 rcu);
-	struct inet_frags *f = q->net->f;
+	struct inet_frags *f = q->fqdir->f;
 
 	if (f->destructor)
 		f->destructor(q);
 	kmem_cache_free(f->frags_cachep, q);
 }
 
+unsigned int inet_frag_rbtree_purge(struct rb_root *root)
+{
+	struct rb_node *p = rb_first(root);
+	unsigned int sum = 0;
+
+	while (p) {
+		struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
+
+		p = rb_next(p);
+		rb_erase(&skb->rbnode, root);
+		while (skb) {
+			struct sk_buff *next = FRAG_CB(skb)->next_frag;
+
+			sum += skb->truesize;
+			kfree_skb(skb);
+			skb = next;
+		}
+	}
+	return sum;
+}
+EXPORT_SYMBOL(inet_frag_rbtree_purge);
+
 void inet_frag_destroy(struct inet_frag_queue *q)
 {
-	struct sk_buff *fp;
-	struct netns_frags *nf;
+	struct fqdir *fqdir;
 	unsigned int sum, sum_truesize = 0;
 	struct inet_frags *f;
 
@@ -134,29 +261,18 @@
 	WARN_ON(del_timer(&q->timer) != 0);
 
 	/* Release all fragment data. */
-	fp = q->fragments;
-	nf = q->net;
-	f = nf->f;
-	if (fp) {
-		do {
-			struct sk_buff *xp = fp->next;
-
-			sum_truesize += fp->truesize;
-			kfree_skb(fp);
-			fp = xp;
-		} while (fp);
-	} else {
-		sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
-	}
+	fqdir = q->fqdir;
+	f = fqdir->f;
+	sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
 	sum = sum_truesize + f->qsize;
 
 	call_rcu(&q->rcu, inet_frag_destroy_rcu);
 
-	sub_frag_mem_limit(nf, sum);
+	sub_frag_mem_limit(fqdir, sum);
 }
 EXPORT_SYMBOL(inet_frag_destroy);
 
-static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
+static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir,
 					       struct inet_frags *f,
 					       void *arg)
 {
@@ -166,9 +282,9 @@
 	if (!q)
 		return NULL;
 
-	q->net = nf;
+	q->fqdir = fqdir;
 	f->constructor(q, arg);
-	add_frag_mem_limit(nf, f->qsize);
+	add_frag_mem_limit(fqdir, f->qsize);
 
 	timer_setup(&q->timer, f->frag_expire, 0);
 	spin_lock_init(&q->lock);
@@ -177,21 +293,21 @@
 	return q;
 }
 
-static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
+static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
 						void *arg,
 						struct inet_frag_queue **prev)
 {
-	struct inet_frags *f = nf->f;
+	struct inet_frags *f = fqdir->f;
 	struct inet_frag_queue *q;
 
-	q = inet_frag_alloc(nf, f, arg);
+	q = inet_frag_alloc(fqdir, f, arg);
 	if (!q) {
 		*prev = ERR_PTR(-ENOMEM);
 		return NULL;
 	}
-	mod_timer(&q->timer, jiffies + nf->timeout);
+	mod_timer(&q->timer, jiffies + fqdir->timeout);
 
-	*prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key,
+	*prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key,
 						 &q->node, f->rhash_params);
 	if (*prev) {
 		q->flags |= INET_FRAG_COMPLETE;
@@ -203,19 +319,19 @@
 }
 
 /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
+struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
 {
 	struct inet_frag_queue *fq = NULL, *prev;
 
-	if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
+	if (!fqdir->high_thresh || frag_mem_limit(fqdir) > fqdir->high_thresh)
 		return NULL;
 
 	rcu_read_lock();
 
-	prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
+	prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params);
 	if (!prev)
-		fq = inet_frag_create(nf, key, &prev);
-	if (prev && !IS_ERR(prev)) {
+		fq = inet_frag_create(fqdir, key, &prev);
+	if (!IS_ERR_OR_NULL(prev)) {
 		fq = prev;
 		if (!refcount_inc_not_zero(&fq->refcnt))
 			fq = NULL;
@@ -224,3 +340,229 @@
 	return fq;
 }
 EXPORT_SYMBOL(inet_frag_find);
+
+int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
+			   int offset, int end)
+{
+	struct sk_buff *last = q->fragments_tail;
+
+	/* RFC5722, Section 4, amended by Errata ID : 3089
+	 *                          When reassembling an IPv6 datagram, if
+	 *   one or more its constituent fragments is determined to be an
+	 *   overlapping fragment, the entire datagram (and any constituent
+	 *   fragments) MUST be silently discarded.
+	 *
+	 * Duplicates, however, should be ignored (i.e. skb dropped, but the
+	 * queue/fragments kept for later reassembly).
+	 */
+	if (!last)
+		fragrun_create(q, skb);  /* First fragment. */
+	else if (last->ip_defrag_offset + last->len < end) {
+		/* This is the common case: skb goes to the end. */
+		/* Detect and discard overlaps. */
+		if (offset < last->ip_defrag_offset + last->len)
+			return IPFRAG_OVERLAP;
+		if (offset == last->ip_defrag_offset + last->len)
+			fragrun_append_to_last(q, skb);
+		else
+			fragrun_create(q, skb);
+	} else {
+		/* Binary search. Note that skb can become the first fragment,
+		 * but not the last (covered above).
+		 */
+		struct rb_node **rbn, *parent;
+
+		rbn = &q->rb_fragments.rb_node;
+		do {
+			struct sk_buff *curr;
+			int curr_run_end;
+
+			parent = *rbn;
+			curr = rb_to_skb(parent);
+			curr_run_end = curr->ip_defrag_offset +
+					FRAG_CB(curr)->frag_run_len;
+			if (end <= curr->ip_defrag_offset)
+				rbn = &parent->rb_left;
+			else if (offset >= curr_run_end)
+				rbn = &parent->rb_right;
+			else if (offset >= curr->ip_defrag_offset &&
+				 end <= curr_run_end)
+				return IPFRAG_DUP;
+			else
+				return IPFRAG_OVERLAP;
+		} while (*rbn);
+		/* Here we have parent properly set, and rbn pointing to
+		 * one of its NULL left/right children. Insert skb.
+		 */
+		fragcb_clear(skb);
+		rb_link_node(&skb->rbnode, parent, rbn);
+		rb_insert_color(&skb->rbnode, &q->rb_fragments);
+	}
+
+	skb->ip_defrag_offset = offset;
+
+	return IPFRAG_OK;
+}
+EXPORT_SYMBOL(inet_frag_queue_insert);
+
+void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
+			      struct sk_buff *parent)
+{
+	struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments);
+	struct sk_buff **nextp;
+	int delta;
+
+	if (head != skb) {
+		fp = skb_clone(skb, GFP_ATOMIC);
+		if (!fp)
+			return NULL;
+		FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
+		if (RB_EMPTY_NODE(&skb->rbnode))
+			FRAG_CB(parent)->next_frag = fp;
+		else
+			rb_replace_node(&skb->rbnode, &fp->rbnode,
+					&q->rb_fragments);
+		if (q->fragments_tail == skb)
+			q->fragments_tail = fp;
+		skb_morph(skb, head);
+		FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
+		rb_replace_node(&head->rbnode, &skb->rbnode,
+				&q->rb_fragments);
+		consume_skb(head);
+		head = skb;
+	}
+	WARN_ON(head->ip_defrag_offset != 0);
+
+	delta = -head->truesize;
+
+	/* Head of list must not be cloned. */
+	if (skb_unclone(head, GFP_ATOMIC))
+		return NULL;
+
+	delta += head->truesize;
+	if (delta)
+		add_frag_mem_limit(q->fqdir, delta);
+
+	/* If the first fragment is fragmented itself, we split
+	 * it to two chunks: the first with data and paged part
+	 * and the second, holding only fragments.
+	 */
+	if (skb_has_frag_list(head)) {
+		struct sk_buff *clone;
+		int i, plen = 0;
+
+		clone = alloc_skb(0, GFP_ATOMIC);
+		if (!clone)
+			return NULL;
+		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+		skb_frag_list_init(head);
+		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
+		clone->data_len = head->data_len - plen;
+		clone->len = clone->data_len;
+		head->truesize += clone->truesize;
+		clone->csum = 0;
+		clone->ip_summed = head->ip_summed;
+		add_frag_mem_limit(q->fqdir, clone->truesize);
+		skb_shinfo(head)->frag_list = clone;
+		nextp = &clone->next;
+	} else {
+		nextp = &skb_shinfo(head)->frag_list;
+	}
+
+	return nextp;
+}
+EXPORT_SYMBOL(inet_frag_reasm_prepare);
+
+void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
+			    void *reasm_data, bool try_coalesce)
+{
+	struct sk_buff **nextp = (struct sk_buff **)reasm_data;
+	struct rb_node *rbn;
+	struct sk_buff *fp;
+	int sum_truesize;
+
+	skb_push(head, head->data - skb_network_header(head));
+
+	/* Traverse the tree in order, to build frag_list. */
+	fp = FRAG_CB(head)->next_frag;
+	rbn = rb_next(&head->rbnode);
+	rb_erase(&head->rbnode, &q->rb_fragments);
+
+	sum_truesize = head->truesize;
+	while (rbn || fp) {
+		/* fp points to the next sk_buff in the current run;
+		 * rbn points to the next run.
+		 */
+		/* Go through the current run. */
+		while (fp) {
+			struct sk_buff *next_frag = FRAG_CB(fp)->next_frag;
+			bool stolen;
+			int delta;
+
+			sum_truesize += fp->truesize;
+			if (head->ip_summed != fp->ip_summed)
+				head->ip_summed = CHECKSUM_NONE;
+			else if (head->ip_summed == CHECKSUM_COMPLETE)
+				head->csum = csum_add(head->csum, fp->csum);
+
+			if (try_coalesce && skb_try_coalesce(head, fp, &stolen,
+							     &delta)) {
+				kfree_skb_partial(fp, stolen);
+			} else {
+				fp->prev = NULL;
+				memset(&fp->rbnode, 0, sizeof(fp->rbnode));
+				fp->sk = NULL;
+
+				head->data_len += fp->len;
+				head->len += fp->len;
+				head->truesize += fp->truesize;
+
+				*nextp = fp;
+				nextp = &fp->next;
+			}
+
+			fp = next_frag;
+		}
+		/* Move to the next run. */
+		if (rbn) {
+			struct rb_node *rbnext = rb_next(rbn);
+
+			fp = rb_to_skb(rbn);
+			rb_erase(rbn, &q->rb_fragments);
+			rbn = rbnext;
+		}
+	}
+	sub_frag_mem_limit(q->fqdir, sum_truesize);
+
+	*nextp = NULL;
+	skb_mark_not_on_list(head);
+	head->prev = NULL;
+	head->tstamp = q->stamp;
+}
+EXPORT_SYMBOL(inet_frag_reasm_finish);
+
+struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q)
+{
+	struct sk_buff *head, *skb;
+
+	head = skb_rb_first(&q->rb_fragments);
+	if (!head)
+		return NULL;
+	skb = FRAG_CB(head)->next_frag;
+	if (skb)
+		rb_replace_node(&head->rbnode, &skb->rbnode,
+				&q->rb_fragments);
+	else
+		rb_erase(&head->rbnode, &q->rb_fragments);
+	memset(&head->rbnode, 0, sizeof(head->rbnode));
+	barrier();
+
+	if (head == q->fragments_tail)
+		q->fragments_tail = NULL;
+
+	sub_frag_mem_limit(q->fqdir, head->truesize);
+
+	return head;
+}
+EXPORT_SYMBOL(inet_frag_pull_head);

diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index f5c9ef2..83fb001 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the BSD Socket
@@ -6,11 +7,6 @@
  *		Generic INET transport hashtables
  *
  * Authors:	Lotsa people, from code originally in tcp
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -19,7 +15,7 @@
 #include <linux/slab.h>
 #include <linux/wait.h>
 #include <linux/vmalloc.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <net/addrconf.h>
 #include <net/inet_connection_sock.h>
@@ -65,12 +61,14 @@
 struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
 						 struct net *net,
 						 struct inet_bind_hashbucket *head,
-						 const unsigned short snum)
+						 const unsigned short snum,
+						 int l3mdev)
 {
 	struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
 
 	if (tb) {
 		write_pnet(&tb->ib_net, net);
+		tb->l3mdev    = l3mdev;
 		tb->port      = snum;
 		tb->fastreuse = 0;
 		tb->fastreuseport = 0;
@@ -135,6 +133,7 @@
 			table->bhash_size);
 	struct inet_bind_hashbucket *head = &table->bhash[bhash];
 	struct inet_bind_bucket *tb;
+	int l3mdev;
 
 	spin_lock(&head->lock);
 	tb = inet_csk(sk)->icsk_bind_hash;
@@ -143,6 +142,8 @@
 		return -ENOENT;
 	}
 	if (tb->port != port) {
+		l3mdev = inet_sk_bound_l3mdev(sk);
+
 		/* NOTE: using tproxy and redirecting skbs to a proxy
 		 * on a different listener port breaks the assumption
 		 * that the listener socket's icsk_bind_hash is the same
@@ -150,12 +151,13 @@
 		 * create a new bind bucket for the child here. */
 		inet_bind_bucket_for_each(tb, &head->chain) {
 			if (net_eq(ib_net(tb), sock_net(sk)) &&
-			    tb->port == port)
+			    tb->l3mdev == l3mdev && tb->port == port)
 				break;
 		}
 		if (!tb) {
 			tb = inet_bind_bucket_create(table->bind_bucket_cachep,
-						     sock_net(sk), head, port);
+						     sock_net(sk), head, port,
+						     l3mdev);
 			if (!tb) {
 				spin_unlock(&head->lock);
 				return -ENOMEM;
@@ -228,27 +230,17 @@
 				const int dif, const int sdif, bool exact_dif)
 {
 	int score = -1;
-	struct inet_sock *inet = inet_sk(sk);
 
-	if (net_eq(sock_net(sk), net) && inet->inet_num == hnum &&
+	if (net_eq(sock_net(sk), net) && sk->sk_num == hnum &&
 			!ipv6_only_sock(sk)) {
-		__be32 rcv_saddr = inet->inet_rcv_saddr;
-		score = sk->sk_family == PF_INET ? 2 : 1;
-		if (rcv_saddr) {
-			if (rcv_saddr != daddr)
-				return -1;
-			score += 4;
-		}
-		if (sk->sk_bound_dev_if || exact_dif) {
-			bool dev_match = (sk->sk_bound_dev_if == dif ||
-					  sk->sk_bound_dev_if == sdif);
+		if (sk->sk_rcv_saddr != daddr)
+			return -1;
 
-			if (!dev_match)
-				return -1;
-			if (sk->sk_bound_dev_if)
-				score += 4;
-		}
-		if (sk->sk_incoming_cpu == raw_smp_processor_id())
+		if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
+			return -1;
+
+		score = sk->sk_family == PF_INET ? 2 : 1;
+		if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
 			score++;
 	}
 	return score;
@@ -303,26 +295,12 @@
 				    const __be32 daddr, const unsigned short hnum,
 				    const int dif, const int sdif)
 {
-	unsigned int hash = inet_lhashfn(net, hnum);
-	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
-	bool exact_dif = inet_exact_dif_match(net, skb);
 	struct inet_listen_hashbucket *ilb2;
-	struct sock *sk, *result = NULL;
-	int score, hiscore = 0;
+	struct sock *result = NULL;
 	unsigned int hash2;
-	u32 phash = 0;
-
-	if (ilb->count <= 10 || !hashinfo->lhash2)
-		goto port_lookup;
-
-	/* Too many sk in the ilb bucket (which is hashed by port alone).
-	 * Try lhash2 (which is hashed by port and addr) instead.
-	 */
 
 	hash2 = ipv4_portaddr_hash(net, daddr, hnum);
 	ilb2 = inet_lhash2_bucket(hashinfo, hash2);
-	if (ilb2->count > ilb->count)
-		goto port_lookup;
 
 	result = inet_lhash2_lookup(net, ilb2, skb, doff,
 				    saddr, sport, daddr, hnum,
@@ -331,36 +309,14 @@
 		goto done;
 
 	/* Lookup lhash2 with INADDR_ANY */
-
 	hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
 	ilb2 = inet_lhash2_bucket(hashinfo, hash2);
-	if (ilb2->count > ilb->count)
-		goto port_lookup;
 
 	result = inet_lhash2_lookup(net, ilb2, skb, doff,
-				    saddr, sport, daddr, hnum,
+				    saddr, sport, htonl(INADDR_ANY), hnum,
 				    dif, sdif);
-	goto done;
-
-port_lookup:
-	sk_for_each_rcu(sk, &ilb->head) {
-		score = compute_score(sk, net, hnum, daddr,
-				      dif, sdif, exact_dif);
-		if (score > hiscore) {
-			if (sk->sk_reuseport) {
-				phash = inet_ehashfn(net, daddr, hnum,
-						     saddr, sport);
-				result = reuseport_select_sock(sk, phash,
-							       skb, doff);
-				if (result)
-					goto done;
-			}
-			result = sk;
-			hiscore = score;
-		}
-	}
 done:
-	if (unlikely(IS_ERR(result)))
+	if (IS_ERR(result))
 		return NULL;
 	return result;
 }
@@ -675,6 +631,7 @@
 	u32 remaining, offset;
 	int ret, i, low, high;
 	static u32 hint;
+	int l3mdev;
 
 	if (port) {
 		head = &hinfo->bhash[inet_bhashfn(net, port,
@@ -693,6 +650,8 @@
 		return ret;
 	}
 
+	l3mdev = inet_sk_bound_l3mdev(sk);
+
 	inet_get_local_port_range(net, &low, &high);
 	high++; /* [32768, 60999] -> [32768, 61000[ */
 	remaining = high - low;
@@ -719,7 +678,8 @@
 		 * the established check is already unique enough.
 		 */
 		inet_bind_bucket_for_each(tb, &head->chain) {
-			if (net_eq(ib_net(tb), net) && tb->port == port) {
+			if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+			    tb->port == port) {
 				if (tb->fastreuse >= 0 ||
 				    tb->fastreuseport >= 0)
 					goto next_port;
@@ -732,7 +692,7 @@
 		}
 
 		tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
-					     net, head, port);
+					     net, head, port, l3mdev);
 		if (!tb) {
 			spin_unlock_bh(&head->lock);
 			return -ENOMEM;
@@ -798,13 +758,22 @@
 }
 EXPORT_SYMBOL_GPL(inet_hashinfo_init);
 
+static void init_hashinfo_lhash2(struct inet_hashinfo *h)
+{
+	int i;
+
+	for (i = 0; i <= h->lhash2_mask; i++) {
+		spin_lock_init(&h->lhash2[i].lock);
+		INIT_HLIST_HEAD(&h->lhash2[i].head);
+		h->lhash2[i].count = 0;
+	}
+}
+
 void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
 				unsigned long numentries, int scale,
 				unsigned long low_limit,
 				unsigned long high_limit)
 {
-	unsigned int i;
-
 	h->lhash2 = alloc_large_system_hash(name,
 					    sizeof(*h->lhash2),
 					    numentries,
@@ -814,14 +783,24 @@
 					    &h->lhash2_mask,
 					    low_limit,
 					    high_limit);
-
-	for (i = 0; i <= h->lhash2_mask; i++) {
-		spin_lock_init(&h->lhash2[i].lock);
-		INIT_HLIST_HEAD(&h->lhash2[i].head);
-		h->lhash2[i].count = 0;
-	}
+	init_hashinfo_lhash2(h);
 }
 
+int inet_hashinfo2_init_mod(struct inet_hashinfo *h)
+{
+	h->lhash2 = kmalloc_array(INET_LHTABLE_SIZE, sizeof(*h->lhash2), GFP_KERNEL);
+	if (!h->lhash2)
+		return -ENOMEM;
+
+	h->lhash2_mask = INET_LHTABLE_SIZE - 1;
+	/* INET_LHTABLE_SIZE must be a power of 2 */
+	BUG_ON(INET_LHTABLE_SIZE & h->lhash2_mask);
+
+	init_hashinfo_lhash2(h);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(inet_hashinfo2_init_mod);
+
 int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
 {
 	unsigned int locksz = sizeof(spinlock_t);

diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 88c5069..c411c87 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket

diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index d757b96..be77859 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c

@@ -216,6 +216,7 @@
 			atomic_set(&p->rid, 0);
 			p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
 			p->rate_tokens = 0;
+			p->n_redirects = 0;
 			/* 60*HZ is arbitrary, but chosen enough high so that the first
 			 * calculation of tokens is at its maximum.
 			 */

diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 32662e9..00ec819 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c

@@ -69,9 +69,17 @@
 	__IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
 	__IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
 
+#ifdef CONFIG_NET_SWITCHDEV
+	if (skb->offload_l3_fwd_mark) {
+		consume_skb(skb);
+		return 0;
+	}
+#endif
+
 	if (unlikely(opt->optlen))
 		ip_forward_options(skb);
 
+	skb->tstamp = 0;
 	return dst_output(net, sk, skb);
 }
 

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index f686d77..cfeb889 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c

@@ -57,57 +57,6 @@
  */
 static const char ip_frag_cache_name[] = "ip4-frags";
 
-/* Use skb->cb to track consecutive/adjacent fragments coming at
- * the end of the queue. Nodes in the rb-tree queue will
- * contain "runs" of one or more adjacent fragments.
- *
- * Invariants:
- * - next_frag is NULL at the tail of a "run";
- * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
- */
-struct ipfrag_skb_cb {
-	struct inet_skb_parm	h;
-	struct sk_buff		*next_frag;
-	int			frag_run_len;
-};
-
-#define FRAG_CB(skb)		((struct ipfrag_skb_cb *)((skb)->cb))
-
-static void ip4_frag_init_run(struct sk_buff *skb)
-{
-	BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
-
-	FRAG_CB(skb)->next_frag = NULL;
-	FRAG_CB(skb)->frag_run_len = skb->len;
-}
-
-/* Append skb to the last "run". */
-static void ip4_frag_append_to_last_run(struct inet_frag_queue *q,
-					struct sk_buff *skb)
-{
-	RB_CLEAR_NODE(&skb->rbnode);
-	FRAG_CB(skb)->next_frag = NULL;
-
-	FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
-	FRAG_CB(q->fragments_tail)->next_frag = skb;
-	q->fragments_tail = skb;
-}
-
-/* Create a new "run" with the skb. */
-static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb)
-{
-	if (q->last_run_head)
-		rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
-			     &q->last_run_head->rbnode.rb_right);
-	else
-		rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
-	rb_insert_color(&skb->rbnode, &q->rb_fragments);
-
-	ip4_frag_init_run(skb);
-	q->fragments_tail = skb;
-	q->last_run_head = skb;
-}
-
 /* Describe an entry in the "incomplete datagrams" queue. */
 struct ipq {
 	struct inet_frag_queue q;
@@ -133,15 +82,13 @@
 static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
 {
 	struct ipq *qp = container_of(q, struct ipq, q);
-	struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4,
-					       frags);
-	struct net *net = container_of(ipv4, struct net, ipv4);
+	struct net *net = q->fqdir->net;
 
 	const struct frag_v4_compare_key *key = a;
 
 	q->key.v4 = *key;
 	qp->ecn = 0;
-	qp->peer = q->net->max_dist ?
+	qp->peer = q->fqdir->max_dist ?
 		inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) :
 		NULL;
 }
@@ -193,9 +140,13 @@
 	int err;
 
 	qp = container_of(frag, struct ipq, q);
-	net = container_of(qp->q.net, struct net, ipv4.frags);
+	net = qp->q.fqdir->net;
 
 	rcu_read_lock();
+
+	if (qp->q.fqdir->dead)
+		goto out_rcu_unlock;
+
 	spin_lock(&qp->q.lock);
 
 	if (qp->q.flags & INET_FRAG_COMPLETE)
@@ -212,27 +163,9 @@
 	 * pull the head out of the tree in order to be able to
 	 * deal with head->dev.
 	 */
-	if (qp->q.fragments) {
-		head = qp->q.fragments;
-		qp->q.fragments = head->next;
-	} else {
-		head = skb_rb_first(&qp->q.rb_fragments);
-		if (!head)
-			goto out;
-		if (FRAG_CB(head)->next_frag)
-			rb_replace_node(&head->rbnode,
-					&FRAG_CB(head)->next_frag->rbnode,
-					&qp->q.rb_fragments);
-		else
-			rb_erase(&head->rbnode, &qp->q.rb_fragments);
-		memset(&head->rbnode, 0, sizeof(head->rbnode));
-		barrier();
-	}
-	if (head == qp->q.fragments_tail)
-		qp->q.fragments_tail = NULL;
-
-	sub_frag_mem_limit(qp->q.net, head->truesize);
-
+	head = inet_frag_pull_head(&qp->q);
+	if (!head)
+		goto out;
 	head->dev = dev_get_by_index_rcu(net, qp->iif);
 	if (!head->dev)
 		goto out;
@@ -260,8 +193,7 @@
 	spin_unlock(&qp->q.lock);
 out_rcu_unlock:
 	rcu_read_unlock();
-	if (head)
-		kfree_skb(head);
+	kfree_skb(head);
 	ipq_put(qp);
 }
 
@@ -281,7 +213,7 @@
 	};
 	struct inet_frag_queue *q;
 
-	q = inet_frag_find(&net->ipv4.frags, &key);
+	q = inet_frag_find(net->ipv4.fqdir, &key);
 	if (!q)
 		return NULL;
 
@@ -292,7 +224,7 @@
 static int ip_frag_too_far(struct ipq *qp)
 {
 	struct inet_peer *peer = qp->peer;
-	unsigned int max = qp->q.net->max_dist;
+	unsigned int max = qp->q.fqdir->max_dist;
 	unsigned int start, end;
 
 	int rc;
@@ -306,12 +238,8 @@
 
 	rc = qp->q.fragments_tail && (end - start) > max;
 
-	if (rc) {
-		struct net *net;
-
-		net = container_of(qp->q.net, struct net, ipv4.frags);
-		__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
-	}
+	if (rc)
+		__IP_INC_STATS(qp->q.fqdir->net, IPSTATS_MIB_REASMFAILS);
 
 	return rc;
 }
@@ -320,18 +248,17 @@
 {
 	unsigned int sum_truesize = 0;
 
-	if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
+	if (!mod_timer(&qp->q.timer, jiffies + qp->q.fqdir->timeout)) {
 		refcount_inc(&qp->q.refcnt);
 		return -ETIMEDOUT;
 	}
 
 	sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments);
-	sub_frag_mem_limit(qp->q.net, sum_truesize);
+	sub_frag_mem_limit(qp->q.fqdir, sum_truesize);
 
 	qp->q.flags = 0;
 	qp->q.len = 0;
 	qp->q.meat = 0;
-	qp->q.fragments = NULL;
 	qp->q.rb_fragments = RB_ROOT;
 	qp->q.fragments_tail = NULL;
 	qp->q.last_run_head = NULL;
@@ -344,13 +271,11 @@
 /* Add new segment to existing queue. */
 static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 {
-	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
-	struct rb_node **rbn, *parent;
-	struct sk_buff *skb1, *prev_tail;
+	struct net *net = qp->q.fqdir->net;
+	int ihl, end, flags, offset;
+	struct sk_buff *prev_tail;
 	struct net_device *dev;
 	unsigned int fragsize;
-	int flags, offset;
-	int ihl, end;
 	int err = -ENOENT;
 	u8 ecn;
 
@@ -382,7 +307,7 @@
 		 */
 		if (end < qp->q.len ||
 		    ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len))
-			goto err;
+			goto discard_qp;
 		qp->q.flags |= INET_FRAG_LAST_IN;
 		qp->q.len = end;
 	} else {
@@ -394,80 +319,38 @@
 		if (end > qp->q.len) {
 			/* Some bits beyond end -> corruption. */
 			if (qp->q.flags & INET_FRAG_LAST_IN)
-				goto err;
+				goto discard_qp;
 			qp->q.len = end;
 		}
 	}
 	if (end == offset)
-		goto err;
+		goto discard_qp;
 
 	err = -ENOMEM;
 	if (!pskb_pull(skb, skb_network_offset(skb) + ihl))
-		goto err;
+		goto discard_qp;
 
 	err = pskb_trim_rcsum(skb, end - offset);
 	if (err)
-		goto err;
+		goto discard_qp;
 
 	/* Note : skb->rbnode and skb->dev share the same location. */
 	dev = skb->dev;
 	/* Makes sure compiler wont do silly aliasing games */
 	barrier();
 
-	/* RFC5722, Section 4, amended by Errata ID : 3089
-	 *                          When reassembling an IPv6 datagram, if
-	 *   one or more its constituent fragments is determined to be an
-	 *   overlapping fragment, the entire datagram (and any constituent
-	 *   fragments) MUST be silently discarded.
-	 *
-	 * We do the same here for IPv4 (and increment an snmp counter).
-	 */
-
-	/* Find out where to put this fragment.  */
 	prev_tail = qp->q.fragments_tail;
-	if (!prev_tail)
-		ip4_frag_create_run(&qp->q, skb);  /* First fragment. */
-	else if (prev_tail->ip_defrag_offset + prev_tail->len < end) {
-		/* This is the common case: skb goes to the end. */
-		/* Detect and discard overlaps. */
-		if (offset < prev_tail->ip_defrag_offset + prev_tail->len)
-			goto discard_qp;
-		if (offset == prev_tail->ip_defrag_offset + prev_tail->len)
-			ip4_frag_append_to_last_run(&qp->q, skb);
-		else
-			ip4_frag_create_run(&qp->q, skb);
-	} else {
-		/* Binary search. Note that skb can become the first fragment,
-		 * but not the last (covered above).
-		 */
-		rbn = &qp->q.rb_fragments.rb_node;
-		do {
-			parent = *rbn;
-			skb1 = rb_to_skb(parent);
-			if (end <= skb1->ip_defrag_offset)
-				rbn = &parent->rb_left;
-			else if (offset >= skb1->ip_defrag_offset +
-						FRAG_CB(skb1)->frag_run_len)
-				rbn = &parent->rb_right;
-			else /* Found an overlap with skb1. */
-				goto discard_qp;
-		} while (*rbn);
-		/* Here we have parent properly set, and rbn pointing to
-		 * one of its NULL left/right children. Insert skb.
-		 */
-		ip4_frag_init_run(skb);
-		rb_link_node(&skb->rbnode, parent, rbn);
-		rb_insert_color(&skb->rbnode, &qp->q.rb_fragments);
-	}
+	err = inet_frag_queue_insert(&qp->q, skb, offset, end);
+	if (err)
+		goto insert_error;
 
 	if (dev)
 		qp->iif = dev->ifindex;
-	skb->ip_defrag_offset = offset;
 
 	qp->q.stamp = skb->tstamp;
 	qp->q.meat += skb->len;
 	qp->ecn |= ecn;
-	add_frag_mem_limit(qp->q.net, skb->truesize);
+	add_frag_mem_limit(qp->q.fqdir, skb->truesize);
 	if (offset == 0)
 		qp->q.flags |= INET_FRAG_FIRST_IN;
 
@@ -487,34 +370,42 @@
 		skb->_skb_refdst = 0UL;
 		err = ip_frag_reasm(qp, skb, prev_tail, dev);
 		skb->_skb_refdst = orefdst;
+		if (err)
+			inet_frag_kill(&qp->q);
 		return err;
 	}
 
 	skb_dst_drop(skb);
 	return -EINPROGRESS;
 
-discard_qp:
-	inet_frag_kill(&qp->q);
+insert_error:
+	if (err == IPFRAG_DUP) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
 	err = -EINVAL;
 	__IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
+discard_qp:
+	inet_frag_kill(&qp->q);
+	__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
 err:
 	kfree_skb(skb);
 	return err;
 }
 
+static bool ip_frag_coalesce_ok(const struct ipq *qp)
+{
+	return qp->q.key.v4.user == IP_DEFRAG_LOCAL_DELIVER;
+}
+
 /* Build a new IP datagram from all its fragments. */
 static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
 			 struct sk_buff *prev_tail, struct net_device *dev)
 {
-	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+	struct net *net = qp->q.fqdir->net;
 	struct iphdr *iph;
-	struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments);
-	struct sk_buff **nextp; /* To build frag_list. */
-	struct rb_node *rbn;
-	int len;
-	int ihlen;
-	int delta;
-	int err;
+	void *reasm_data;
+	int len, err;
 	u8 ecn;
 
 	ipq_kill(qp);
@@ -524,117 +415,24 @@
 		err = -EINVAL;
 		goto out_fail;
 	}
+
 	/* Make the one we just received the head. */
-	if (head != skb) {
-		fp = skb_clone(skb, GFP_ATOMIC);
-		if (!fp)
-			goto out_nomem;
-		FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
-		if (RB_EMPTY_NODE(&skb->rbnode))
-			FRAG_CB(prev_tail)->next_frag = fp;
-		else
-			rb_replace_node(&skb->rbnode, &fp->rbnode,
-					&qp->q.rb_fragments);
-		if (qp->q.fragments_tail == skb)
-			qp->q.fragments_tail = fp;
-		skb_morph(skb, head);
-		FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
-		rb_replace_node(&head->rbnode, &skb->rbnode,
-				&qp->q.rb_fragments);
-		consume_skb(head);
-		head = skb;
-	}
+	reasm_data = inet_frag_reasm_prepare(&qp->q, skb, prev_tail);
+	if (!reasm_data)
+		goto out_nomem;
 
-	WARN_ON(head->ip_defrag_offset != 0);
-
-	/* Allocate a new buffer for the datagram. */
-	ihlen = ip_hdrlen(head);
-	len = ihlen + qp->q.len;
-
+	len = ip_hdrlen(skb) + qp->q.len;
 	err = -E2BIG;
 	if (len > 65535)
 		goto out_oversize;
 
-	delta = - head->truesize;
+	inet_frag_reasm_finish(&qp->q, skb, reasm_data,
+			       ip_frag_coalesce_ok(qp));
 
-	/* Head of list must not be cloned. */
-	if (skb_unclone(head, GFP_ATOMIC))
-		goto out_nomem;
+	skb->dev = dev;
+	IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
 
-	delta += head->truesize;
-	if (delta)
-		add_frag_mem_limit(qp->q.net, delta);
-
-	/* If the first fragment is fragmented itself, we split
-	 * it to two chunks: the first with data and paged part
-	 * and the second, holding only fragments. */
-	if (skb_has_frag_list(head)) {
-		struct sk_buff *clone;
-		int i, plen = 0;
-
-		clone = alloc_skb(0, GFP_ATOMIC);
-		if (!clone)
-			goto out_nomem;
-		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
-		skb_frag_list_init(head);
-		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
-			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
-		clone->len = clone->data_len = head->data_len - plen;
-		head->truesize += clone->truesize;
-		clone->csum = 0;
-		clone->ip_summed = head->ip_summed;
-		add_frag_mem_limit(qp->q.net, clone->truesize);
-		skb_shinfo(head)->frag_list = clone;
-		nextp = &clone->next;
-	} else {
-		nextp = &skb_shinfo(head)->frag_list;
-	}
-
-	skb_push(head, head->data - skb_network_header(head));
-
-	/* Traverse the tree in order, to build frag_list. */
-	fp = FRAG_CB(head)->next_frag;
-	rbn = rb_next(&head->rbnode);
-	rb_erase(&head->rbnode, &qp->q.rb_fragments);
-	while (rbn || fp) {
-		/* fp points to the next sk_buff in the current run;
-		 * rbn points to the next run.
-		 */
-		/* Go through the current run. */
-		while (fp) {
-			*nextp = fp;
-			nextp = &fp->next;
-			fp->prev = NULL;
-			memset(&fp->rbnode, 0, sizeof(fp->rbnode));
-			fp->sk = NULL;
-			head->data_len += fp->len;
-			head->len += fp->len;
-			if (head->ip_summed != fp->ip_summed)
-				head->ip_summed = CHECKSUM_NONE;
-			else if (head->ip_summed == CHECKSUM_COMPLETE)
-				head->csum = csum_add(head->csum, fp->csum);
-			head->truesize += fp->truesize;
-			fp = FRAG_CB(fp)->next_frag;
-		}
-		/* Move to the next run. */
-		if (rbn) {
-			struct rb_node *rbnext = rb_next(rbn);
-
-			fp = rb_to_skb(rbn);
-			rb_erase(rbn, &qp->q.rb_fragments);
-			rbn = rbnext;
-		}
-	}
-	sub_frag_mem_limit(qp->q.net, head->truesize);
-
-	*nextp = NULL;
-	head->next = NULL;
-	head->prev = NULL;
-	head->dev = dev;
-	head->tstamp = qp->q.stamp;
-	IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
-
-	iph = ip_hdr(head);
+	iph = ip_hdr(skb);
 	iph->tot_len = htons(len);
 	iph->tos |= ecn;
 
@@ -647,7 +445,7 @@
 	 * from one very small df-fragment and one large non-df frag.
 	 */
 	if (qp->max_df_size == qp->q.max_size) {
-		IPCB(head)->flags |= IPSKB_FRAG_PMTU;
+		IPCB(skb)->flags |= IPSKB_FRAG_PMTU;
 		iph->frag_off = htons(IP_DF);
 	} else {
 		iph->frag_off = 0;
@@ -656,7 +454,6 @@
 	ip_send_check(iph);
 
 	__IP_INC_STATS(net, IPSTATS_MIB_REASMOKS);
-	qp->q.fragments = NULL;
 	qp->q.rb_fragments = RB_ROOT;
 	qp->q.fragments_tail = NULL;
 	qp->q.last_run_head = NULL;
@@ -745,58 +542,30 @@
 }
 EXPORT_SYMBOL(ip_check_defrag);
 
-unsigned int inet_frag_rbtree_purge(struct rb_root *root)
-{
-	struct rb_node *p = rb_first(root);
-	unsigned int sum = 0;
-
-	while (p) {
-		struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
-
-		p = rb_next(p);
-		rb_erase(&skb->rbnode, root);
-		while (skb) {
-			struct sk_buff *next = FRAG_CB(skb)->next_frag;
-
-			sum += skb->truesize;
-			kfree_skb(skb);
-			skb = next;
-		}
-	}
-	return sum;
-}
-EXPORT_SYMBOL(inet_frag_rbtree_purge);
-
 #ifdef CONFIG_SYSCTL
 static int dist_min;
 
 static struct ctl_table ip4_frags_ns_ctl_table[] = {
 	{
 		.procname	= "ipfrag_high_thresh",
-		.data		= &init_net.ipv4.frags.high_thresh,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
-		.extra1		= &init_net.ipv4.frags.low_thresh
 	},
 	{
 		.procname	= "ipfrag_low_thresh",
-		.data		= &init_net.ipv4.frags.low_thresh,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
-		.extra2		= &init_net.ipv4.frags.high_thresh
 	},
 	{
 		.procname	= "ipfrag_time",
-		.data		= &init_net.ipv4.frags.timeout,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "ipfrag_max_dist",
-		.data		= &init_net.ipv4.frags.max_dist,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
@@ -829,14 +598,13 @@
 		if (!table)
 			goto err_alloc;
 
-		table[0].data = &net->ipv4.frags.high_thresh;
-		table[0].extra1 = &net->ipv4.frags.low_thresh;
-		table[0].extra2 = &init_net.ipv4.frags.high_thresh;
-		table[1].data = &net->ipv4.frags.low_thresh;
-		table[1].extra2 = &net->ipv4.frags.high_thresh;
-		table[2].data = &net->ipv4.frags.timeout;
-		table[3].data = &net->ipv4.frags.max_dist;
 	}
+	table[0].data	= &net->ipv4.fqdir->high_thresh;
+	table[0].extra1	= &net->ipv4.fqdir->low_thresh;
+	table[1].data	= &net->ipv4.fqdir->low_thresh;
+	table[1].extra2	= &net->ipv4.fqdir->high_thresh;
+	table[2].data	= &net->ipv4.fqdir->timeout;
+	table[3].data	= &net->ipv4.fqdir->max_dist;
 
 	hdr = register_net_sysctl(net, "net/ipv4", table);
 	if (!hdr)
@@ -884,6 +652,9 @@
 {
 	int res;
 
+	res = fqdir_init(&net->ipv4.fqdir, &ip4_frags, net);
+	if (res < 0)
+		return res;
 	/* Fragment cache limits.
 	 *
 	 * The fragment memory accounting code, (tries to) account for
@@ -898,36 +669,38 @@
 	 * we will prune down to 3MB, making room for approx 8 big 64K
 	 * fragments 8x128k.
 	 */
-	net->ipv4.frags.high_thresh = 4 * 1024 * 1024;
-	net->ipv4.frags.low_thresh  = 3 * 1024 * 1024;
+	net->ipv4.fqdir->high_thresh = 4 * 1024 * 1024;
+	net->ipv4.fqdir->low_thresh  = 3 * 1024 * 1024;
 	/*
 	 * Important NOTE! Fragment queue must be destroyed before MSL expires.
 	 * RFC791 is wrong proposing to prolongate timer each fragment arrival
 	 * by TTL.
 	 */
-	net->ipv4.frags.timeout = IP_FRAG_TIME;
+	net->ipv4.fqdir->timeout = IP_FRAG_TIME;
 
-	net->ipv4.frags.max_dist = 64;
-	net->ipv4.frags.f = &ip4_frags;
+	net->ipv4.fqdir->max_dist = 64;
 
-	res = inet_frags_init_net(&net->ipv4.frags);
-	if (res < 0)
-		return res;
 	res = ip4_frags_ns_ctl_register(net);
 	if (res < 0)
-		inet_frags_exit_net(&net->ipv4.frags);
+		fqdir_exit(net->ipv4.fqdir);
 	return res;
 }
 
+static void __net_exit ipv4_frags_pre_exit_net(struct net *net)
+{
+	fqdir_pre_exit(net->ipv4.fqdir);
+}
+
 static void __net_exit ipv4_frags_exit_net(struct net *net)
 {
 	ip4_frags_ns_ctl_unregister(net);
-	inet_frags_exit_net(&net->ipv4.frags);
+	fqdir_exit(net->ipv4.fqdir);
 }
 
 static struct pernet_operations ip4_frags_ops = {
-	.init = ipv4_frags_init_net,
-	.exit = ipv4_frags_exit_net,
+	.init		= ipv4_frags_init_net,
+	.pre_exit	= ipv4_frags_pre_exit_net,
+	.exit		= ipv4_frags_exit_net,
 };
 
 

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 8cce0e9..10636fb 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c

@@ -1,13 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Linux NET3:	GRE over IP protocol decoder.
  *
  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -121,8 +116,8 @@
 static unsigned int gre_tap_net_id __read_mostly;
 static unsigned int erspan_net_id __read_mostly;
 
-static void ipgre_err(struct sk_buff *skb, u32 info,
-		      const struct tnl_ptk_info *tpi)
+static int ipgre_err(struct sk_buff *skb, u32 info,
+		     const struct tnl_ptk_info *tpi)
 {
 
 	/* All the routers (except for Linux) return only
@@ -146,36 +141,6 @@
 	unsigned int data_len = 0;
 	struct ip_tunnel *t;
 
-	switch (type) {
-	default:
-	case ICMP_PARAMETERPROB:
-		return;
-
-	case ICMP_DEST_UNREACH:
-		switch (code) {
-		case ICMP_SR_FAILED:
-		case ICMP_PORT_UNREACH:
-			/* Impossible event. */
-			return;
-		default:
-			/* All others are translated to HOST_UNREACH.
-			   rfc2003 contains "deep thoughts" about NET_UNREACH,
-			   I believe they are just ether pollution. --ANK
-			 */
-			break;
-		}
-		break;
-
-	case ICMP_TIME_EXCEEDED:
-		if (code != ICMP_EXC_TTL)
-			return;
-		data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
-		break;
-
-	case ICMP_REDIRECT:
-		break;
-	}
-
 	if (tpi->proto == htons(ETH_P_TEB))
 		itn = net_generic(net, gre_tap_net_id);
 	else if (tpi->proto == htons(ETH_P_ERSPAN) ||
@@ -189,27 +154,59 @@
 			     iph->daddr, iph->saddr, tpi->key);
 
 	if (!t)
-		return;
+		return -ENOENT;
+
+	switch (type) {
+	default:
+	case ICMP_PARAMETERPROB:
+		return 0;
+
+	case ICMP_DEST_UNREACH:
+		switch (code) {
+		case ICMP_SR_FAILED:
+		case ICMP_PORT_UNREACH:
+			/* Impossible event. */
+			return 0;
+		default:
+			/* All others are translated to HOST_UNREACH.
+			   rfc2003 contains "deep thoughts" about NET_UNREACH,
+			   I believe they are just ether pollution. --ANK
+			 */
+			break;
+		}
+		break;
+
+	case ICMP_TIME_EXCEEDED:
+		if (code != ICMP_EXC_TTL)
+			return 0;
+		data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
+		break;
+
+	case ICMP_REDIRECT:
+		break;
+	}
 
 #if IS_ENABLED(CONFIG_IPV6)
        if (tpi->proto == htons(ETH_P_IPV6) &&
            !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
 				       type, data_len))
-               return;
+               return 0;
 #endif
 
 	if (t->parms.iph.daddr == 0 ||
 	    ipv4_is_multicast(t->parms.iph.daddr))
-		return;
+		return 0;
 
 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
-		return;
+		return 0;
 
 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 		t->err_count++;
 	else
 		t->err_count = 1;
 	t->err_time = jiffies;
+
+	return 0;
 }
 
 static void gre_err(struct sk_buff *skb, u32 info)
@@ -232,22 +229,19 @@
 	const int type = icmp_hdr(skb)->type;
 	const int code = icmp_hdr(skb)->code;
 	struct tnl_ptk_info tpi;
-	bool csum_err = false;
 
-	if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP),
-			     iph->ihl * 4) < 0) {
-		if (!csum_err)		/* ignore csum errors. */
-			return;
-	}
+	if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
+			     iph->ihl * 4) < 0)
+		return;
 
 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
-				 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
+				 skb->dev->ifindex, IPPROTO_GRE);
 		return;
 	}
 	if (type == ICMP_REDIRECT) {
-		ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
-			      IPPROTO_GRE, 0);
+		ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
+			      IPPROTO_GRE);
 		return;
 	}
 
@@ -260,7 +254,6 @@
 	struct net *net = dev_net(skb->dev);
 	struct metadata_dst *tun_dst = NULL;
 	struct erspan_base_hdr *ershdr;
-	struct erspan_metadata *pkt_md;
 	struct ip_tunnel_net *itn;
 	struct ip_tunnel *tunnel;
 	const struct iphdr *iph;
@@ -269,20 +262,11 @@
 	int len;
 
 	itn = net_generic(net, erspan_net_id);
-	len = gre_hdr_len + sizeof(*ershdr);
-
-	/* Check based hdr len */
-	if (unlikely(!pskb_may_pull(skb, len)))
-		return PACKET_REJECT;
 
 	iph = ip_hdr(skb);
 	ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
 	ver = ershdr->ver;
 
-	/* The original GRE header does not have key field,
-	 * Use ERSPAN 10-bit session ID as key.
-	 */
-	tpi->key = cpu_to_be32(get_session_id(ershdr));
 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
 				  tpi->flags | TUNNEL_KEY,
 				  iph->saddr, iph->daddr, tpi->key);
@@ -292,9 +276,6 @@
 		if (unlikely(!pskb_may_pull(skb, len)))
 			return PACKET_REJECT;
 
-		ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
-		pkt_md = (struct erspan_metadata *)(ershdr + 1);
-
 		if (__iptunnel_pull_header(skb,
 					   len,
 					   htons(ETH_P_TEB),
@@ -302,8 +283,9 @@
 			goto drop;
 
 		if (tunnel->collect_md) {
+			struct erspan_metadata *pkt_md, *md;
 			struct ip_tunnel_info *info;
-			struct erspan_metadata *md;
+			unsigned char *gh;
 			__be64 tun_id;
 			__be16 flags;
 
@@ -316,6 +298,14 @@
 			if (!tun_dst)
 				return PACKET_REJECT;
 
+			/* skb can be uncloned in __iptunnel_pull_header, so
+			 * old pkt_md is no longer valid and we need to reset
+			 * it
+			 */
+			gh = skb_network_header(skb) +
+			     skb_network_header_len(skb);
+			pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
+							    sizeof(*ershdr));
 			md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
 			md->version = ver;
 			md2 = &md->u.md2;
@@ -459,81 +449,14 @@
 	return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
 }
 
-static struct rtable *gre_get_rt(struct sk_buff *skb,
-				 struct net_device *dev,
-				 struct flowi4 *fl,
-				 const struct ip_tunnel_key *key)
-{
-	struct net *net = dev_net(dev);
-
-	memset(fl, 0, sizeof(*fl));
-	fl->daddr = key->u.ipv4.dst;
-	fl->saddr = key->u.ipv4.src;
-	fl->flowi4_tos = RT_TOS(key->tos);
-	fl->flowi4_mark = skb->mark;
-	fl->flowi4_proto = IPPROTO_GRE;
-
-	return ip_route_output_key(net, fl);
-}
-
-static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
-				      struct net_device *dev,
-				      struct flowi4 *fl,
-				      int tunnel_hlen)
-{
-	struct ip_tunnel_info *tun_info;
-	const struct ip_tunnel_key *key;
-	struct rtable *rt = NULL;
-	int min_headroom;
-	bool use_cache;
-	int err;
-
-	tun_info = skb_tunnel_info(skb);
-	key = &tun_info->key;
-	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
-
-	if (use_cache)
-		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
-	if (!rt) {
-		rt = gre_get_rt(skb, dev, fl, key);
-		if (IS_ERR(rt))
-			goto err_free_skb;
-		if (use_cache)
-			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
-					  fl->saddr);
-	}
-
-	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
-			+ tunnel_hlen + sizeof(struct iphdr);
-	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
-		int head_delta = SKB_DATA_ALIGN(min_headroom -
-						skb_headroom(skb) +
-						16);
-		err = pskb_expand_head(skb, max_t(int, head_delta, 0),
-				       0, GFP_ATOMIC);
-		if (unlikely(err))
-			goto err_free_rt;
-	}
-	return rt;
-
-err_free_rt:
-	ip_rt_put(rt);
-err_free_skb:
-	kfree_skb(skb);
-	dev->stats.tx_dropped++;
-	return NULL;
-}
-
 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
 			__be16 proto)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct ip_tunnel_info *tun_info;
 	const struct ip_tunnel_key *key;
-	struct rtable *rt = NULL;
-	struct flowi4 fl;
 	int tunnel_hlen;
-	__be16 df, flags;
+	__be16 flags;
 
 	tun_info = skb_tunnel_info(skb);
 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
@@ -543,13 +466,12 @@
 	key = &tun_info->key;
 	tunnel_hlen = gre_calc_hlen(key->tun_flags);
 
-	rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
-	if (!rt)
-		return;
+	if (skb_cow_head(skb, dev->needed_headroom))
+		goto err_free_skb;
 
 	/* Push Tunnel header. */
 	if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
-		goto err_free_rt;
+		goto err_free_skb;
 
 	flags = tun_info->key.tun_flags &
 		(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
@@ -557,32 +479,25 @@
 			 tunnel_id_to_key32(tun_info->key.tun_id),
 			 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
 
-	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
+	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
 
-	iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
-		      key->tos, key->ttl, df, false);
 	return;
 
-err_free_rt:
-	ip_rt_put(rt);
 err_free_skb:
 	kfree_skb(skb);
 	dev->stats.tx_dropped++;
 }
 
-static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
-			   __be16 proto)
+static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct ip_tunnel_info *tun_info;
 	const struct ip_tunnel_key *key;
 	struct erspan_metadata *md;
-	struct rtable *rt = NULL;
 	bool truncate = false;
-	struct flowi4 fl;
+	__be16 proto;
 	int tunnel_hlen;
 	int version;
-	__be16 df;
 	int nhoff;
 	int thoff;
 
@@ -593,21 +508,20 @@
 
 	key = &tun_info->key;
 	if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
-		goto err_free_rt;
+		goto err_free_skb;
+	if (tun_info->options_len < sizeof(*md))
+		goto err_free_skb;
 	md = ip_tunnel_info_opts(tun_info);
-	if (!md)
-		goto err_free_rt;
 
 	/* ERSPAN has fixed 8 byte GRE header */
 	version = md->version;
 	tunnel_hlen = 8 + erspan_hdr_len(version);
 
-	rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
-	if (!rt)
-		return;
+	if (skb_cow_head(skb, dev->needed_headroom))
+		goto err_free_skb;
 
 	if (gre_handle_offloads(skb, false))
-		goto err_free_rt;
+		goto err_free_skb;
 
 	if (skb->len > dev->mtu + dev->hard_header_len) {
 		pskb_trim(skb, dev->mtu + dev->hard_header_len);
@@ -627,27 +541,25 @@
 	if (version == 1) {
 		erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
 				    ntohl(md->u.index), truncate, true);
+		proto = htons(ETH_P_ERSPAN);
 	} else if (version == 2) {
 		erspan_build_header_v2(skb,
 				       ntohl(tunnel_id_to_key32(key->tun_id)),
 				       md->u.md2.dir,
 				       get_hwid(&md->u.md2),
 				       truncate, true);
+		proto = htons(ETH_P_ERSPAN2);
 	} else {
-		goto err_free_rt;
+		goto err_free_skb;
 	}
 
 	gre_build_header(skb, 8, TUNNEL_SEQ,
-			 htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
+			 proto, 0, htonl(tunnel->o_seqno++));
 
-	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
+	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
 
-	iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
-		      key->tos, key->ttl, df, false);
 	return;
 
-err_free_rt:
-	ip_rt_put(rt);
 err_free_skb:
 	kfree_skb(skb);
 	dev->stats.tx_dropped++;
@@ -656,13 +568,18 @@
 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 {
 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
+	const struct ip_tunnel_key *key;
 	struct rtable *rt;
 	struct flowi4 fl4;
 
 	if (ip_tunnel_info_af(info) != AF_INET)
 		return -EINVAL;
 
-	rt = gre_get_rt(skb, dev, &fl4, &info->key);
+	key = &info->key;
+	ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
+			    tunnel_id_to_key32(key->tun_id), key->tos, 0,
+			    skb->mark, skb_get_hash(skb));
+	rt = ip_route_output_key(dev_net(dev), &fl4);
 	if (IS_ERR(rt))
 		return PTR_ERR(rt);
 
@@ -677,6 +594,9 @@
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	const struct iphdr *tnl_params;
 
+	if (!pskb_inet_may_pull(skb))
+		goto free_skb;
+
 	if (tunnel->collect_md) {
 		gre_fb_xmit(skb, dev, skb->protocol);
 		return NETDEV_TX_OK;
@@ -719,9 +639,13 @@
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	bool truncate = false;
+	__be16 proto;
+
+	if (!pskb_inet_may_pull(skb))
+		goto free_skb;
 
 	if (tunnel->collect_md) {
-		erspan_fb_xmit(skb, dev, skb->protocol);
+		erspan_fb_xmit(skb, dev);
 		return NETDEV_TX_OK;
 	}
 
@@ -737,19 +661,22 @@
 	}
 
 	/* Push ERSPAN header */
-	if (tunnel->erspan_ver == 1)
+	if (tunnel->erspan_ver == 1) {
 		erspan_build_header(skb, ntohl(tunnel->parms.o_key),
 				    tunnel->index,
 				    truncate, true);
-	else if (tunnel->erspan_ver == 2)
+		proto = htons(ETH_P_ERSPAN);
+	} else if (tunnel->erspan_ver == 2) {
 		erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
 				       tunnel->dir, tunnel->hwid,
 				       truncate, true);
-	else
+		proto = htons(ETH_P_ERSPAN2);
+	} else {
 		goto free_skb;
+	}
 
 	tunnel->parms.o_flags &= ~TUNNEL_KEY;
-	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
+	__gre_xmit(skb, dev, &tunnel->parms.iph, proto);
 	return NETDEV_TX_OK;
 
 free_skb:
@@ -763,6 +690,9 @@
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 
+	if (!pskb_inet_may_pull(skb))
+		goto free_skb;
+
 	if (tunnel->collect_md) {
 		gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
 		return NETDEV_TX_OK;
@@ -1342,12 +1272,6 @@
 	ip_tunnel_setup(dev, gre_tap_net_id);
 }
 
-bool is_gretap_dev(const struct net_device *dev)
-{
-	return dev->netdev_ops == &gre_tap_netdev_ops;
-}
-EXPORT_SYMBOL_GPL(is_gretap_dev);
-
 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
 			 struct nlattr *tb[], struct nlattr *data[],
 			 struct netlink_ext_ack *extack)
@@ -1457,12 +1381,31 @@
 {
 	struct ip_tunnel *t = netdev_priv(dev);
 	struct ip_tunnel_parm *p = &t->parms;
+	__be16 o_flags = p->o_flags;
+
+	if (t->erspan_ver == 1 || t->erspan_ver == 2) {
+		if (!t->collect_md)
+			o_flags |= TUNNEL_KEY;
+
+		if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
+			goto nla_put_failure;
+
+		if (t->erspan_ver == 1) {
+			if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
+				goto nla_put_failure;
+		} else {
+			if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
+				goto nla_put_failure;
+			if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
+				goto nla_put_failure;
+		}
+	}
 
 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
 	    nla_put_be16(skb, IFLA_GRE_IFLAGS,
 			 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
 	    nla_put_be16(skb, IFLA_GRE_OFLAGS,
-			 gre_tnl_flags_to_gre_flags(p->o_flags)) ||
+			 gre_tnl_flags_to_gre_flags(o_flags)) ||
 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
 	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
@@ -1492,19 +1435,6 @@
 			goto nla_put_failure;
 	}
 
-	if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
-		goto nla_put_failure;
-
-	if (t->erspan_ver == 1) {
-		if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
-			goto nla_put_failure;
-	} else if (t->erspan_ver == 2) {
-		if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
-			goto nla_put_failure;
-		if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
-			goto nla_put_failure;
-	}
-
 	return 0;
 
 nla_put_failure:
@@ -1516,6 +1446,7 @@
 	struct ip_tunnel *t = netdev_priv(dev);
 
 	ether_setup(dev);
+	dev->max_mtu = 0;
 	dev->netdev_ops = &erspan_netdev_ops;
 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
@@ -1604,7 +1535,7 @@
 	memset(&tb, 0, sizeof(tb));
 
 	dev = rtnl_create_link(net, name, name_assign_type,
-			       &ipgre_tap_ops, tb);
+			       &ipgre_tap_ops, tb, NULL);
 	if (IS_ERR(dev))
 		return dev;
 

diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 27c863f..c59a78a 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -14,7 +15,6 @@
  *		Jorge Cwik, <jorge@laser.satlink.net>
  *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  *
- *
  * Fixes:
  *		Alan Cox	:	Commented a couple of minor bits of surplus code
  *		Alan Cox	:	Undefining IP_FORWARD doesn't include the code
@@ -96,8 +96,6 @@
  *		Jos Vos		:	Do accounting *before* call_in_firewall
  *	Willy Konynenberg	:	Transparent proxying support
  *
- *
- *
  * To Fix:
  *		IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient
  *		and could be made very efficient with the addition of some virtual memory hacks to permit
@@ -106,11 +104,6 @@
  *		interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet
  *		output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause
  *		fragmentation anyway.
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) "IPv4: " fmt
@@ -130,6 +123,7 @@
 #include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
+#include <linux/indirect_call_wrapper.h>
 
 #include <net/snmp.h>
 #include <net/ip.h>
@@ -188,51 +182,53 @@
 	return false;
 }
 
+INDIRECT_CALLABLE_DECLARE(int udp_rcv(struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(int tcp_v4_rcv(struct sk_buff *));
+void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol)
+{
+	const struct net_protocol *ipprot;
+	int raw, ret;
+
+resubmit:
+	raw = raw_local_deliver(skb, protocol);
+
+	ipprot = rcu_dereference(inet_protos[protocol]);
+	if (ipprot) {
+		if (!ipprot->no_policy) {
+			if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+				kfree_skb(skb);
+				return;
+			}
+			nf_reset_ct(skb);
+		}
+		ret = INDIRECT_CALL_2(ipprot->handler, tcp_v4_rcv, udp_rcv,
+				      skb);
+		if (ret < 0) {
+			protocol = -ret;
+			goto resubmit;
+		}
+		__IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
+	} else {
+		if (!raw) {
+			if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+				__IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS);
+				icmp_send(skb, ICMP_DEST_UNREACH,
+					  ICMP_PROT_UNREACH, 0);
+			}
+			kfree_skb(skb);
+		} else {
+			__IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
+			consume_skb(skb);
+		}
+	}
+}
+
 static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	__skb_pull(skb, skb_network_header_len(skb));
 
 	rcu_read_lock();
-	{
-		int protocol = ip_hdr(skb)->protocol;
-		const struct net_protocol *ipprot;
-		int raw;
-
-	resubmit:
-		raw = raw_local_deliver(skb, protocol);
-
-		ipprot = rcu_dereference(inet_protos[protocol]);
-		if (ipprot) {
-			int ret;
-
-			if (!ipprot->no_policy) {
-				if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-					kfree_skb(skb);
-					goto out;
-				}
-				nf_reset(skb);
-			}
-			ret = ipprot->handler(skb);
-			if (ret < 0) {
-				protocol = -ret;
-				goto resubmit;
-			}
-			__IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
-		} else {
-			if (!raw) {
-				if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-					__IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS);
-					icmp_send(skb, ICMP_DEST_UNREACH,
-						  ICMP_PROT_UNREACH, 0);
-				}
-				kfree_skb(skb);
-			} else {
-				__IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
-				consume_skb(skb);
-			}
-		}
-	}
- out:
+	ip_protocol_deliver_rcu(net, skb, ip_hdr(skb)->protocol);
 	rcu_read_unlock();
 
 	return 0;
@@ -258,11 +254,10 @@
 		       ip_local_deliver_finish);
 }
 
-static inline bool ip_rcv_options(struct sk_buff *skb)
+static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_options *opt;
 	const struct iphdr *iph;
-	struct net_device *dev = skb->dev;
 
 	/* It looks as overkill, because not all
 	   IP options require packet mangling.
@@ -298,7 +293,7 @@
 			}
 		}
 
-		if (ip_options_rcv_srr(skb))
+		if (ip_options_rcv_srr(skb, dev))
 			goto drop;
 	}
 
@@ -307,12 +302,13 @@
 	return true;
 }
 
+INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
 static int ip_rcv_finish_core(struct net *net, struct sock *sk,
-			      struct sk_buff *skb)
+			      struct sk_buff *skb, struct net_device *dev)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	int (*edemux)(struct sk_buff *skb);
-	struct net_device *dev = skb->dev;
 	struct rtable *rt;
 	int err;
 
@@ -325,7 +321,8 @@
 
 		ipprot = rcu_dereference(inet_protos[protocol]);
 		if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
-			err = edemux(skb);
+			err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux,
+					      udp_v4_early_demux, skb);
 			if (unlikely(err))
 				goto drop_error;
 			/* must reload iph, skb->head might have changed */
@@ -355,7 +352,7 @@
 	}
 #endif
 
-	if (iph->ihl > 5 && ip_rcv_options(skb))
+	if (iph->ihl > 5 && ip_rcv_options(skb, dev))
 		goto drop;
 
 	rt = skb_rtable(skb);
@@ -401,6 +398,7 @@
 
 static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
+	struct net_device *dev = skb->dev;
 	int ret;
 
 	/* if ingress device is enslaved to an L3 master device pass the
@@ -410,7 +408,7 @@
 	if (!skb)
 		return NET_RX_SUCCESS;
 
-	ret = ip_rcv_finish_core(net, sk, skb);
+	ret = ip_rcv_finish_core(net, sk, skb, dev);
 	if (ret != NET_RX_DROP)
 		ret = dst_input(skb);
 	return ret;
@@ -430,7 +428,6 @@
 	if (skb->pkt_type == PACKET_OTHERHOST)
 		goto drop;
 
-
 	__IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
 
 	skb = skb_share_check(skb, GFP_ATOMIC);
@@ -489,6 +486,7 @@
 		goto drop;
 	}
 
+	iph = ip_hdr(skb);
 	skb->transport_header = skb->network_header + iph->ihl*4;
 
 	/* Remove any debris in the socket control block */
@@ -521,6 +519,7 @@
 	skb = ip_rcv_core(skb, net);
 	if (skb == NULL)
 		return NET_RX_DROP;
+
 	return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
 		       net, NULL, skb, dev, NULL,
 		       ip_rcv_finish);
@@ -531,11 +530,7 @@
 	struct sk_buff *skb, *next;
 
 	list_for_each_entry_safe(skb, next, head, list) {
-		list_del(&skb->list);
-		/* Handle ip{6}_forward case, as sch_direct_xmit have
-		 * another kind of SKB-list usage (see validate_xmit_skb_list)
-		 */
-		skb->next = NULL;
+		skb_list_del_init(skb);
 		dst_input(skb);
 	}
 }
@@ -549,6 +544,7 @@
 
 	INIT_LIST_HEAD(&sublist);
 	list_for_each_entry_safe(skb, next, head, list) {
+		struct net_device *dev = skb->dev;
 		struct dst_entry *dst;
 
 		skb_list_del_init(skb);
@@ -558,7 +554,7 @@
 		skb = l3mdev_ip_rcv(skb);
 		if (!skb)
 			continue;
-		if (ip_rcv_finish_core(net, sk, skb) == NET_RX_DROP)
+		if (ip_rcv_finish_core(net, sk, skb, dev) == NET_RX_DROP)
 			continue;
 
 		dst = skb_dst(skb);

diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ed194d4..ddaa01e 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c

@@ -251,8 +251,9 @@
  * If opt == NULL, then skb->data should point to IP header.
  */
 
-int ip_options_compile(struct net *net,
-		       struct ip_options *opt, struct sk_buff *skb)
+int __ip_options_compile(struct net *net,
+			 struct ip_options *opt, struct sk_buff *skb,
+			 __be32 *info)
 {
 	__be32 spec_dst = htonl(INADDR_ANY);
 	unsigned char *pp_ptr = NULL;
@@ -468,11 +469,23 @@
 		return 0;
 
 error:
-	if (skb) {
-		icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((pp_ptr-iph)<<24));
-	}
+	if (info)
+		*info = htonl((pp_ptr-iph)<<24);
 	return -EINVAL;
 }
+EXPORT_SYMBOL(__ip_options_compile);
+
+int ip_options_compile(struct net *net,
+		       struct ip_options *opt, struct sk_buff *skb)
+{
+	int ret;
+	__be32 info;
+
+	ret = __ip_options_compile(net, opt, skb, &info);
+	if (ret != 0 && skb)
+		icmp_send(skb, ICMP_PARAMETERPROB, 0, info);
+	return ret;
+}
 EXPORT_SYMBOL(ip_options_compile);
 
 /*
@@ -600,7 +613,7 @@
 	}
 }
 
-int ip_options_rcv_srr(struct sk_buff *skb)
+int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_options *opt = &(IPCB(skb)->opt);
 	int srrspace, srrptr;
@@ -635,7 +648,7 @@
 
 		orefdst = skb->_skb_refdst;
 		skb_dst_set(skb, NULL);
-		err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev);
+		err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, dev);
 		rt2 = skb_rtable(skb);
 		if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
 			skb_dst_drop(skb);

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 9c4e72e..3d8baaa 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -188,7 +189,7 @@
 	struct net_device *dev = dst->dev;
 	unsigned int hh_len = LL_RESERVED_SPACE(dev);
 	struct neighbour *neigh;
-	u32 nexthop;
+	bool is_v6gw = false;
 
 	if (rt->rt_type == RTN_MULTICAST) {
 		IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len);
@@ -218,16 +219,13 @@
 	}
 
 	rcu_read_lock_bh();
-	nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
-	neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
-	if (unlikely(!neigh))
-		neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
+	neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
 	if (!IS_ERR(neigh)) {
 		int res;
 
 		sock_confirm_neigh(skb, neigh);
-		res = neigh_output(neigh, skb);
-
+		/* if crossing protocols, can not use the cached header */
+		res = neigh_output(neigh, skb, is_v6gw);
 		rcu_read_unlock_bh();
 		return res;
 	}
@@ -278,7 +276,7 @@
 		struct sk_buff *nskb = segs->next;
 		int err;
 
-		segs->next = NULL;
+		skb_mark_not_on_list(segs);
 		err = ip_fragment(net, sk, segs, mtu, ip_finish_output2);
 
 		if (err && ret == 0)
@@ -289,16 +287,9 @@
 	return ret;
 }
 
-static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+static int __ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	unsigned int mtu;
-	int ret;
-
-	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
-	if (ret) {
-		kfree_skb(skb);
-		return ret;
-	}
 
 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
 	/* Policy lookup after SNAT yielded a new policy */
@@ -317,18 +308,54 @@
 	return ip_finish_output2(net, sk, skb);
 }
 
-static int ip_mc_finish_output(struct net *net, struct sock *sk,
-			       struct sk_buff *skb)
+static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	int ret;
 
 	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
-	if (ret) {
+	switch (ret) {
+	case NET_XMIT_SUCCESS:
+		return __ip_finish_output(net, sk, skb);
+	case NET_XMIT_CN:
+		return __ip_finish_output(net, sk, skb) ? : ret;
+	default:
+		kfree_skb(skb);
+		return ret;
+	}
+}
+
+static int ip_mc_finish_output(struct net *net, struct sock *sk,
+			       struct sk_buff *skb)
+{
+	struct rtable *new_rt;
+	bool do_cn = false;
+	int ret, err;
+
+	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
+	switch (ret) {
+	case NET_XMIT_CN:
+		do_cn = true;
+		/* fall through */
+	case NET_XMIT_SUCCESS:
+		break;
+	default:
 		kfree_skb(skb);
 		return ret;
 	}
 
-	return dev_loopback_xmit(net, sk, skb);
+	/* Reset rt_iif so that inet_iif() will return skb->skb_iif. Setting
+	 * this to non-zero causes ipi_ifindex in in_pktinfo to be overwritten,
+	 * see ipv4_pktinfo_prepare().
+	 */
+	new_rt = rt_dst_clone(net->loopback_dev, skb_rtable(skb));
+	if (new_rt) {
+		new_rt->rt_iif = 0;
+		skb_dst_drop(skb);
+		skb_dst_set(skb, &new_rt->dst);
+	}
+
+	err = dev_loopback_xmit(net, sk, skb);
+	return (do_cn && err) ? ret : err;
 }
 
 int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -519,6 +546,7 @@
 	to->pkt_type = from->pkt_type;
 	to->priority = from->priority;
 	to->protocol = from->protocol;
+	to->skb_iif = from->skb_iif;
 	skb_dst_drop(to);
 	skb_dst_copy(to, from);
 	to->dev = from->dev;
@@ -526,13 +554,11 @@
 
 	skb_copy_hash(to, from);
 
-	/* Copy the flags to each fragment. */
-	IPCB(to)->flags = IPCB(from)->flags;
-
 #ifdef CONFIG_NET_SCHED
 	to->tc_index = from->tc_index;
 #endif
 	nf_copy(to, from);
+	skb_ext_copy(to, from);
 #if IS_ENABLED(CONFIG_IP_VS)
 	to->ipvs_property = from->ipvs_property;
 #endif
@@ -561,6 +587,175 @@
 	return ip_do_fragment(net, sk, skb, output);
 }
 
+void ip_fraglist_init(struct sk_buff *skb, struct iphdr *iph,
+		      unsigned int hlen, struct ip_fraglist_iter *iter)
+{
+	unsigned int first_len = skb_pagelen(skb);
+
+	iter->frag = skb_shinfo(skb)->frag_list;
+	skb_frag_list_init(skb);
+
+	iter->offset = 0;
+	iter->iph = iph;
+	iter->hlen = hlen;
+
+	skb->data_len = first_len - skb_headlen(skb);
+	skb->len = first_len;
+	iph->tot_len = htons(first_len);
+	iph->frag_off = htons(IP_MF);
+	ip_send_check(iph);
+}
+EXPORT_SYMBOL(ip_fraglist_init);
+
+static void ip_fraglist_ipcb_prepare(struct sk_buff *skb,
+				     struct ip_fraglist_iter *iter)
+{
+	struct sk_buff *to = iter->frag;
+
+	/* Copy the flags to each fragment. */
+	IPCB(to)->flags = IPCB(skb)->flags;
+
+	if (iter->offset == 0)
+		ip_options_fragment(to);
+}
+
+void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter)
+{
+	unsigned int hlen = iter->hlen;
+	struct iphdr *iph = iter->iph;
+	struct sk_buff *frag;
+
+	frag = iter->frag;
+	frag->ip_summed = CHECKSUM_NONE;
+	skb_reset_transport_header(frag);
+	__skb_push(frag, hlen);
+	skb_reset_network_header(frag);
+	memcpy(skb_network_header(frag), iph, hlen);
+	iter->iph = ip_hdr(frag);
+	iph = iter->iph;
+	iph->tot_len = htons(frag->len);
+	ip_copy_metadata(frag, skb);
+	iter->offset += skb->len - hlen;
+	iph->frag_off = htons(iter->offset >> 3);
+	if (frag->next)
+		iph->frag_off |= htons(IP_MF);
+	/* Ready, complete checksum */
+	ip_send_check(iph);
+}
+EXPORT_SYMBOL(ip_fraglist_prepare);
+
+void ip_frag_init(struct sk_buff *skb, unsigned int hlen,
+		  unsigned int ll_rs, unsigned int mtu, bool DF,
+		  struct ip_frag_state *state)
+{
+	struct iphdr *iph = ip_hdr(skb);
+
+	state->DF = DF;
+	state->hlen = hlen;
+	state->ll_rs = ll_rs;
+	state->mtu = mtu;
+
+	state->left = skb->len - hlen;	/* Space per frame */
+	state->ptr = hlen;		/* Where to start from */
+
+	state->offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
+	state->not_last_frag = iph->frag_off & htons(IP_MF);
+}
+EXPORT_SYMBOL(ip_frag_init);
+
+static void ip_frag_ipcb(struct sk_buff *from, struct sk_buff *to,
+			 bool first_frag, struct ip_frag_state *state)
+{
+	/* Copy the flags to each fragment. */
+	IPCB(to)->flags = IPCB(from)->flags;
+
+	/* ANK: dirty, but effective trick. Upgrade options only if
+	 * the segment to be fragmented was THE FIRST (otherwise,
+	 * options are already fixed) and make it ONCE
+	 * on the initial skb, so that all the following fragments
+	 * will inherit fixed options.
+	 */
+	if (first_frag)
+		ip_options_fragment(from);
+}
+
+struct sk_buff *ip_frag_next(struct sk_buff *skb, struct ip_frag_state *state)
+{
+	unsigned int len = state->left;
+	struct sk_buff *skb2;
+	struct iphdr *iph;
+
+	len = state->left;
+	/* IF: it doesn't fit, use 'mtu' - the data space left */
+	if (len > state->mtu)
+		len = state->mtu;
+	/* IF: we are not sending up to and including the packet end
+	   then align the next start on an eight byte boundary */
+	if (len < state->left)	{
+		len &= ~7;
+	}
+
+	/* Allocate buffer */
+	skb2 = alloc_skb(len + state->hlen + state->ll_rs, GFP_ATOMIC);
+	if (!skb2)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 *	Set up data on packet
+	 */
+
+	ip_copy_metadata(skb2, skb);
+	skb_reserve(skb2, state->ll_rs);
+	skb_put(skb2, len + state->hlen);
+	skb_reset_network_header(skb2);
+	skb2->transport_header = skb2->network_header + state->hlen;
+
+	/*
+	 *	Charge the memory for the fragment to any owner
+	 *	it might possess
+	 */
+
+	if (skb->sk)
+		skb_set_owner_w(skb2, skb->sk);
+
+	/*
+	 *	Copy the packet header into the new buffer.
+	 */
+
+	skb_copy_from_linear_data(skb, skb_network_header(skb2), state->hlen);
+
+	/*
+	 *	Copy a block of the IP datagram.
+	 */
+	if (skb_copy_bits(skb, state->ptr, skb_transport_header(skb2), len))
+		BUG();
+	state->left -= len;
+
+	/*
+	 *	Fill in the new header fields.
+	 */
+	iph = ip_hdr(skb2);
+	iph->frag_off = htons((state->offset >> 3));
+	if (state->DF)
+		iph->frag_off |= htons(IP_DF);
+
+	/*
+	 *	Added AC : If we are fragmenting a fragment that's not the
+	 *		   last fragment then keep MF on each bit
+	 */
+	if (state->left > 0 || state->not_last_frag)
+		iph->frag_off |= htons(IP_MF);
+	state->ptr += len;
+	state->offset += len;
+
+	iph->tot_len = htons(len + state->hlen);
+
+	ip_send_check(iph);
+
+	return skb2;
+}
+EXPORT_SYMBOL(ip_frag_next);
+
 /*
  *	This IP datagram is too large to be sent in one piece.  Break it up into
  *	smaller pieces (each of size equal to IP header plus
@@ -572,12 +767,12 @@
 		   int (*output)(struct net *, struct sock *, struct sk_buff *))
 {
 	struct iphdr *iph;
-	int ptr;
 	struct sk_buff *skb2;
-	unsigned int mtu, hlen, left, len, ll_rs;
-	int offset;
-	__be16 not_last_frag;
 	struct rtable *rt = skb_rtable(skb);
+	unsigned int mtu, hlen, ll_rs;
+	struct ip_fraglist_iter iter;
+	ktime_t tstamp = skb->tstamp;
+	struct ip_frag_state state;
 	int err = 0;
 
 	/* for offloaded checksums cleanup checksum before fragmentation */
@@ -642,49 +837,25 @@
 		}
 
 		/* Everything is OK. Generate! */
-
-		err = 0;
-		offset = 0;
-		frag = skb_shinfo(skb)->frag_list;
-		skb_frag_list_init(skb);
-		skb->data_len = first_len - skb_headlen(skb);
-		skb->len = first_len;
-		iph->tot_len = htons(first_len);
-		iph->frag_off = htons(IP_MF);
-		ip_send_check(iph);
+		ip_fraglist_init(skb, iph, hlen, &iter);
 
 		for (;;) {
 			/* Prepare header of the next frame,
 			 * before previous one went down. */
-			if (frag) {
-				frag->ip_summed = CHECKSUM_NONE;
-				skb_reset_transport_header(frag);
-				__skb_push(frag, hlen);
-				skb_reset_network_header(frag);
-				memcpy(skb_network_header(frag), iph, hlen);
-				iph = ip_hdr(frag);
-				iph->tot_len = htons(frag->len);
-				ip_copy_metadata(frag, skb);
-				if (offset == 0)
-					ip_options_fragment(frag);
-				offset += skb->len - hlen;
-				iph->frag_off = htons(offset>>3);
-				if (frag->next)
-					iph->frag_off |= htons(IP_MF);
-				/* Ready, complete checksum */
-				ip_send_check(iph);
+			if (iter.frag) {
+				ip_fraglist_ipcb_prepare(skb, &iter);
+				ip_fraglist_prepare(skb, &iter);
 			}
 
+			skb->tstamp = tstamp;
 			err = output(net, sk, skb);
 
 			if (!err)
 				IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
-			if (err || !frag)
+			if (err || !iter.frag)
 				break;
 
-			skb = frag;
-			frag = skb->next;
-			skb->next = NULL;
+			skb = ip_fraglist_next(&iter);
 		}
 
 		if (err == 0) {
@@ -692,11 +863,8 @@
 			return 0;
 		}
 
-		while (frag) {
-			skb = frag->next;
-			kfree_skb(frag);
-			frag = skb;
-		}
+		kfree_skb_list(iter.frag);
+
 		IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
 		return err;
 
@@ -711,105 +879,31 @@
 	}
 
 slow_path:
-	iph = ip_hdr(skb);
-
-	left = skb->len - hlen;		/* Space per frame */
-	ptr = hlen;		/* Where to start from */
-
 	/*
 	 *	Fragment the datagram.
 	 */
 
-	offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
-	not_last_frag = iph->frag_off & htons(IP_MF);
+	ip_frag_init(skb, hlen, ll_rs, mtu, IPCB(skb)->flags & IPSKB_FRAG_PMTU,
+		     &state);
 
 	/*
 	 *	Keep copying data until we run out.
 	 */
 
-	while (left > 0) {
-		len = left;
-		/* IF: it doesn't fit, use 'mtu' - the data space left */
-		if (len > mtu)
-			len = mtu;
-		/* IF: we are not sending up to and including the packet end
-		   then align the next start on an eight byte boundary */
-		if (len < left)	{
-			len &= ~7;
-		}
+	while (state.left > 0) {
+		bool first_frag = (state.offset == 0);
 
-		/* Allocate buffer */
-		skb2 = alloc_skb(len + hlen + ll_rs, GFP_ATOMIC);
-		if (!skb2) {
-			err = -ENOMEM;
+		skb2 = ip_frag_next(skb, &state);
+		if (IS_ERR(skb2)) {
+			err = PTR_ERR(skb2);
 			goto fail;
 		}
-
-		/*
-		 *	Set up data on packet
-		 */
-
-		ip_copy_metadata(skb2, skb);
-		skb_reserve(skb2, ll_rs);
-		skb_put(skb2, len + hlen);
-		skb_reset_network_header(skb2);
-		skb2->transport_header = skb2->network_header + hlen;
-
-		/*
-		 *	Charge the memory for the fragment to any owner
-		 *	it might possess
-		 */
-
-		if (skb->sk)
-			skb_set_owner_w(skb2, skb->sk);
-
-		/*
-		 *	Copy the packet header into the new buffer.
-		 */
-
-		skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
-
-		/*
-		 *	Copy a block of the IP datagram.
-		 */
-		if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
-			BUG();
-		left -= len;
-
-		/*
-		 *	Fill in the new header fields.
-		 */
-		iph = ip_hdr(skb2);
-		iph->frag_off = htons((offset >> 3));
-
-		if (IPCB(skb)->flags & IPSKB_FRAG_PMTU)
-			iph->frag_off |= htons(IP_DF);
-
-		/* ANK: dirty, but effective trick. Upgrade options only if
-		 * the segment to be fragmented was THE FIRST (otherwise,
-		 * options are already fixed) and make it ONCE
-		 * on the initial skb, so that all the following fragments
-		 * will inherit fixed options.
-		 */
-		if (offset == 0)
-			ip_options_fragment(skb);
-
-		/*
-		 *	Added AC : If we are fragmenting a fragment that's not the
-		 *		   last fragment then keep MF on each bit
-		 */
-		if (left > 0 || not_last_frag)
-			iph->frag_off |= htons(IP_MF);
-		ptr += len;
-		offset += len;
+		ip_frag_ipcb(skb, skb2, first_frag, &state);
 
 		/*
 		 *	Put this fragment into the sending queue.
 		 */
-		iph->tot_len = htons(len + hlen);
-
-		ip_send_check(iph);
-
+		skb2->tstamp = tstamp;
 		err = output(net, sk, skb2);
 		if (err)
 			goto fail;
@@ -867,6 +961,7 @@
 			    unsigned int flags)
 {
 	struct inet_sock *inet = inet_sk(sk);
+	struct ubuf_info *uarg = NULL;
 	struct sk_buff *skb;
 
 	struct ip_options *opt = cork->opt;
@@ -880,8 +975,8 @@
 	int csummode = CHECKSUM_NONE;
 	struct rtable *rt = (struct rtable *)cork->dst;
 	unsigned int wmem_alloc_delta = 0;
+	bool paged, extra_uref = false;
 	u32 tskey = 0;
-	bool paged;
 
 	skb = skb_peek_tail(queue);
 
@@ -916,6 +1011,20 @@
 	    (!exthdrlen || (rt->dst.dev->features & NETIF_F_HW_ESP_TX_CSUM)))
 		csummode = CHECKSUM_PARTIAL;
 
+	if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
+		uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
+		if (!uarg)
+			return -ENOBUFS;
+		extra_uref = !skb_zcopy(skb);	/* only ref on new uarg */
+		if (rt->dst.dev->features & NETIF_F_SG &&
+		    csummode == CHECKSUM_PARTIAL) {
+			paged = true;
+		} else {
+			uarg->zerocopy = 0;
+			skb_zcopy_set(skb, uarg, &extra_uref);
+		}
+	}
+
 	cork->length += length;
 
 	/* So, what's going on in the loop below?
@@ -939,7 +1048,7 @@
 			unsigned int fraglen;
 			unsigned int fraggap;
 			unsigned int alloclen;
-			unsigned int pagedlen = 0;
+			unsigned int pagedlen;
 			struct sk_buff *skb_prev;
 alloc_new_skb:
 			skb_prev = skb;
@@ -956,6 +1065,7 @@
 			if (datalen > mtu - fragheaderlen)
 				datalen = maxfraglen - fragheaderlen;
 			fraglen = datalen + fragheaderlen;
+			pagedlen = 0;
 
 			if ((flags & MSG_MORE) &&
 			    !(rt->dst.dev->features&NETIF_F_SG))
@@ -1000,12 +1110,6 @@
 			skb->csum = 0;
 			skb_reserve(skb, hh_len);
 
-			/* only the initial fragment is time stamped */
-			skb_shinfo(skb)->tx_flags = cork->tx_flags;
-			cork->tx_flags = 0;
-			skb_shinfo(skb)->tskey = tskey;
-			tskey = 0;
-
 			/*
 			 *	Find where to start putting bytes.
 			 */
@@ -1038,6 +1142,13 @@
 			exthdrlen = 0;
 			csummode = CHECKSUM_NONE;
 
+			/* only the initial fragment is time stamped */
+			skb_shinfo(skb)->tx_flags = cork->tx_flags;
+			cork->tx_flags = 0;
+			skb_shinfo(skb)->tskey = tskey;
+			tskey = 0;
+			skb_zcopy_set(skb, uarg, &extra_uref);
+
 			if ((flags & MSG_CONFIRM) && !skb_prev)
 				skb_set_dst_pending_confirm(skb, 1);
 
@@ -1067,7 +1178,7 @@
 				err = -EFAULT;
 				goto error;
 			}
-		} else {
+		} else if (!uarg || !uarg->zerocopy) {
 			int i = skb_shinfo(skb)->nr_frags;
 
 			err = -ENOMEM;
@@ -1097,6 +1208,10 @@
 			skb->data_len += copy;
 			skb->truesize += copy;
 			wmem_alloc_delta += copy;
+		} else {
+			err = skb_zerocopy_iter_dgram(skb, from, copy);
+			if (err < 0)
+				goto error;
 		}
 		offset += copy;
 		length -= copy;
@@ -1109,6 +1224,8 @@
 error_efault:
 	err = -EFAULT;
 error:
+	if (uarg)
+		sock_zerocopy_put_abort(uarg, extra_uref);
 	cork->length -= length;
 	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
 	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
@@ -1153,6 +1270,7 @@
 	cork->length = 0;
 	cork->ttl = ipc->ttl;
 	cork->tos = ipc->tos;
+	cork->mark = ipc->sockc.mark;
 	cork->priority = ipc->priority;
 	cork->transmit_time = ipc->sockc.transmit_time;
 	cork->tx_flags = 0;
@@ -1416,7 +1534,7 @@
 	}
 
 	skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority;
-	skb->mark = sk->sk_mark;
+	skb->mark = cork->mark;
 	skb->tstamp = cork->transmit_time;
 	/*
 	 * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
@@ -1536,7 +1654,7 @@
 			   const struct ip_options *sopt,
 			   __be32 daddr, __be32 saddr,
 			   const struct ip_reply_arg *arg,
-			   unsigned int len)
+			   unsigned int len, u64 transmit_time)
 {
 	struct ip_options_data replyopts;
 	struct ipcm_cookie ipc;
@@ -1552,6 +1670,7 @@
 
 	ipcm_init(&ipc);
 	ipc.addr = daddr;
+	ipc.sockc.transmit_time = transmit_time;
 
 	if (replyopts.opt.opt.optlen) {
 		ipc.opt = &replyopts.opt;
@@ -1579,7 +1698,6 @@
 
 	inet_sk(sk)->tos = arg->tos;
 
-	sk->sk_priority = skb->priority;
 	sk->sk_protocol = ip_hdr(skb)->protocol;
 	sk->sk_bound_dev_if = arg->bound_dev_if;
 	sk->sk_sndbuf = sysctl_wmem_default;

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 26c36cc..aa3fd61 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c

@@ -148,19 +148,17 @@
 
 static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
 {
+	__be16 _ports[2], *ports;
 	struct sockaddr_in sin;
-	__be16 *ports;
-	int end;
-
-	end = skb_transport_offset(skb) + 4;
-	if (end > 0 && !pskb_may_pull(skb, end))
-		return;
 
 	/* All current transport protocols have the port numbers in the
 	 * first four bytes of the transport header and this function is
 	 * written with this assumption in mind.
 	 */
-	ports = (__be16 *)skb_transport_header(skb);
+	ports = skb_header_pointer(skb, skb_transport_offset(skb),
+				   sizeof(_ports), &_ports);
+	if (!ports)
+		return;
 
 	sin.sin_family = AF_INET;
 	sin.sin_addr.s_addr = ip_hdr(skb)->daddr;
@@ -345,6 +343,8 @@
 		return -EINVAL;
 
 	new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
+	if (on && !new_ra)
+		return -ENOMEM;
 
 	mutex_lock(&net->ipv4.ra_mutex);
 	for (rap = &net->ipv4.ra_chain;
@@ -1246,7 +1246,7 @@
 		return -ENOPROTOOPT;
 
 	err = do_ip_setsockopt(sk, level, optname, optval, optlen);
-#ifdef CONFIG_BPFILTER
+#if IS_ENABLED(CONFIG_BPFILTER_UMH)
 	if (optname >= BPFILTER_IPT_SO_SET_REPLACE &&
 	    optname < BPFILTER_IPT_SET_MAX)
 		err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen);
@@ -1559,7 +1559,7 @@
 	int err;
 
 	err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
-#ifdef CONFIG_BPFILTER
+#if IS_ENABLED(CONFIG_BPFILTER_UMH)
 	if (optname >= BPFILTER_IPT_SO_GET_INFO &&
 	    optname < BPFILTER_IPT_GET_MAX)
 		err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
@@ -1596,7 +1596,7 @@
 	err = do_ip_getsockopt(sk, level, optname, optval, optlen,
 		MSG_CMSG_COMPAT);
 
-#ifdef CONFIG_BPFILTER
+#if IS_ENABLED(CONFIG_BPFILTER_UMH)
 	if (optname >= BPFILTER_IPT_SO_GET_INFO &&
 	    optname < BPFILTER_IPT_GET_MAX)
 		err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 284a221..38c02bb 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c

@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2013 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -310,7 +297,7 @@
 		ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
 				    iph->saddr, tunnel->parms.o_key,
 				    RT_TOS(iph->tos), tunnel->parms.link,
-				    tunnel->fwmark);
+				    tunnel->fwmark, 0);
 		rt = ip_route_output_key(tunnel->net, &fl4);
 
 		if (!IS_ERR(rt)) {
@@ -501,19 +488,24 @@
 
 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 			    struct rtable *rt, __be16 df,
-			    const struct iphdr *inner_iph)
+			    const struct iphdr *inner_iph,
+			    int tunnel_hlen, __be32 dst, bool md)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
-	int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
+	int pkt_size;
 	int mtu;
 
+	tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
+	pkt_size = skb->len - tunnel_hlen - dev->hard_header_len;
+
 	if (df)
 		mtu = dst_mtu(&rt->dst) - dev->hard_header_len
-					- sizeof(struct iphdr) - tunnel->hlen;
+					- sizeof(struct iphdr) - tunnel_hlen;
 	else
-		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+		mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 
-	skb_dst_update_pmtu(skb, mtu);
+	if (skb_valid_dst(skb))
+		skb_dst_update_pmtu(skb, mtu);
 
 	if (skb->protocol == htons(ETH_P_IP)) {
 		if (!skb_is_gso(skb) &&
@@ -526,12 +518,16 @@
 	}
 #if IS_ENABLED(CONFIG_IPV6)
 	else if (skb->protocol == htons(ETH_P_IPV6)) {
-		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
+		struct rt6_info *rt6;
+		__be32 daddr;
+
+		rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
+					   NULL;
+		daddr = md ? dst : tunnel->parms.iph.daddr;
 
 		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
 			   mtu >= IPV6_MIN_MTU) {
-			if ((tunnel->parms.iph.daddr &&
-			    !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
+			if ((daddr && !ipv4_is_multicast(daddr)) ||
 			    rt6->rt6i_dst.plen == 128) {
 				rt6->rt6i_flags |= RTF_MODIFIED;
 				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
@@ -548,17 +544,19 @@
 	return 0;
 }
 
-void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
+void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+		       u8 proto, int tunnel_hlen)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	u32 headroom = sizeof(struct iphdr);
 	struct ip_tunnel_info *tun_info;
 	const struct ip_tunnel_key *key;
 	const struct iphdr *inner_iph;
-	struct rtable *rt;
+	struct rtable *rt = NULL;
 	struct flowi4 fl4;
 	__be16 df = 0;
 	u8 tos, ttl;
+	bool use_cache;
 
 	tun_info = skb_tunnel_info(skb);
 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
@@ -574,20 +572,39 @@
 		else if (skb->protocol == htons(ETH_P_IPV6))
 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
 	}
-	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
-			    RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
+	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
+			    tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
+			    0, skb->mark, skb_get_hash(skb));
 	if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
 		goto tx_error;
-	rt = ip_route_output_key(tunnel->net, &fl4);
-	if (IS_ERR(rt)) {
-		dev->stats.tx_carrier_errors++;
-		goto tx_error;
+
+	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
+	if (use_cache)
+		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
+	if (!rt) {
+		rt = ip_route_output_key(tunnel->net, &fl4);
+		if (IS_ERR(rt)) {
+			dev->stats.tx_carrier_errors++;
+			goto tx_error;
+		}
+		if (use_cache)
+			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
+					  fl4.saddr);
 	}
 	if (rt->dst.dev == dev) {
 		ip_rt_put(rt);
 		dev->stats.collisions++;
 		goto tx_error;
 	}
+
+	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
+		df = htons(IP_DF);
+	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
+			    key->u.ipv4.dst, true)) {
+		ip_rt_put(rt);
+		goto tx_error;
+	}
+
 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
 	ttl = key->ttl;
 	if (ttl == 0) {
@@ -598,10 +615,10 @@
 		else
 			ttl = ip4_dst_hoplimit(&rt->dst);
 	}
-	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
-		df = htons(IP_DF);
-	else if (skb->protocol == htons(ETH_P_IP))
+
+	if (!df && skb->protocol == htons(ETH_P_IP))
 		df = inner_iph->frag_off & htons(IP_DF);
+
 	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
 	if (headroom > dev->needed_headroom)
 		dev->needed_headroom = headroom;
@@ -627,23 +644,17 @@
 		    const struct iphdr *tnl_params, u8 protocol)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
-	unsigned int inner_nhdr_len = 0;
+	struct ip_tunnel_info *tun_info = NULL;
 	const struct iphdr *inner_iph;
-	struct flowi4 fl4;
-	u8     tos, ttl;
-	__be16 df;
-	struct rtable *rt;		/* Route to the other host */
 	unsigned int max_headroom;	/* The extra header space needed */
-	__be32 dst;
+	struct rtable *rt = NULL;		/* Route to the other host */
+	bool use_cache = false;
+	struct flowi4 fl4;
+	bool md = false;
 	bool connected;
-
-	/* ensure we can access the inner net header, for several users below */
-	if (skb->protocol == htons(ETH_P_IP))
-		inner_nhdr_len = sizeof(struct iphdr);
-	else if (skb->protocol == htons(ETH_P_IPV6))
-		inner_nhdr_len = sizeof(struct ipv6hdr);
-	if (unlikely(!pskb_may_pull(skb, inner_nhdr_len)))
-		goto tx_error;
+	u8 tos, ttl;
+	__be32 dst;
+	__be16 df;
 
 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
 	connected = (tunnel->parms.iph.daddr != 0);
@@ -659,7 +670,15 @@
 			goto tx_error;
 		}
 
-		if (skb->protocol == htons(ETH_P_IP)) {
+		tun_info = skb_tunnel_info(skb);
+		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
+		    ip_tunnel_info_af(tun_info) == AF_INET &&
+		    tun_info->key.u.ipv4.dst) {
+			dst = tun_info->key.u.ipv4.dst;
+			md = true;
+			connected = true;
+		}
+		else if (skb->protocol == htons(ETH_P_IP)) {
 			rt = skb_rtable(skb);
 			dst = rt_nexthop(rt, inner_iph->daddr);
 		}
@@ -697,7 +716,8 @@
 		else
 			goto tx_error;
 
-		connected = false;
+		if (!md)
+			connected = false;
 	}
 
 	tos = tnl_params->tos;
@@ -714,13 +734,20 @@
 
 	ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
 			    tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
-			    tunnel->fwmark);
+			    tunnel->fwmark, skb_get_hash(skb));
 
 	if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
 		goto tx_error;
 
-	rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
-			 NULL;
+	if (connected && md) {
+		use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
+		if (use_cache)
+			rt = dst_cache_get_ip4(&tun_info->dst_cache,
+					       &fl4.saddr);
+	} else {
+		rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
+						&fl4.saddr) : NULL;
+	}
 
 	if (!rt) {
 		rt = ip_route_output_key(tunnel->net, &fl4);
@@ -729,7 +756,10 @@
 			dev->stats.tx_carrier_errors++;
 			goto tx_error;
 		}
-		if (connected)
+		if (use_cache)
+			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
+					  fl4.saddr);
+		else if (!md && connected)
 			dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
 					  fl4.saddr);
 	}
@@ -740,7 +770,8 @@
 		goto tx_error;
 	}
 
-	if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
+	if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph,
+			    0, 0, false)) {
 		ip_rt_put(rt);
 		goto tx_error;
 	}

diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index c248e0d..1452a97 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c

@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2013 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -89,9 +76,12 @@
 	__ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1);
 
 	err = ip_local_out(net, sk, skb);
-	if (unlikely(net_xmit_eval(err)))
-		pkt_len = 0;
-	iptunnel_xmit_stats(dev, pkt_len);
+
+	if (dev) {
+		if (unlikely(net_xmit_eval(err)))
+			pkt_len = 0;
+		iptunnel_xmit_stats(dev, pkt_len);
+	}
 }
 EXPORT_SYMBOL_GPL(iptunnel_xmit);
 
@@ -120,7 +110,7 @@
 	}
 
 	skb_clear_hash_if_not_l4(skb);
-	skb->vlan_tci = 0;
+	__vlan_hwaccel_clear_tag(skb);
 	skb_set_queue_mapping(skb, 0);
 	skb_scrub_packet(skb, xnet);
 
@@ -151,6 +141,7 @@
 		       sizeof(struct in6_addr));
 	else
 		dst->key.u.ipv4.dst = src->key.u.ipv4.src;
+	dst->key.tun_flags = src->key.tun_flags;
 	dst->mode = src->mode | IP_TUNNEL_INFO_TX;
 
 	return res;
@@ -238,8 +229,8 @@
 	struct nlattr *tb[LWTUNNEL_IP_MAX + 1];
 	int err;
 
-	err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy,
-			       extack);
+	err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP_MAX, attr,
+					  ip_tun_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -251,6 +242,14 @@
 
 	tun_info = lwt_tun_info(new_state);
 
+#ifdef CONFIG_DST_CACHE
+	err = dst_cache_init(&tun_info->dst_cache, GFP_KERNEL);
+	if (err) {
+		lwtstate_free(new_state);
+		return err;
+	}
+#endif
+
 	if (tb[LWTUNNEL_IP_ID])
 		tun_info->key.tun_id = nla_get_be64(tb[LWTUNNEL_IP_ID]);
 
@@ -277,6 +276,15 @@
 	return 0;
 }
 
+static void ip_tun_destroy_state(struct lwtunnel_state *lwtstate)
+{
+#ifdef CONFIG_DST_CACHE
+	struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate);
+
+	dst_cache_destroy(&tun_info->dst_cache);
+#endif
+}
+
 static int ip_tun_fill_encap_info(struct sk_buff *skb,
 				  struct lwtunnel_state *lwtstate)
 {
@@ -312,6 +320,7 @@
 
 static const struct lwtunnel_encap_ops ip_tun_lwt_ops = {
 	.build_state = ip_tun_build_state,
+	.destroy_state = ip_tun_destroy_state,
 	.fill_encap = ip_tun_fill_encap_info,
 	.get_encap_size = ip_tun_encap_nlsize,
 	.cmp_encap = ip_tun_cmp_encap,
@@ -337,8 +346,8 @@
 	struct nlattr *tb[LWTUNNEL_IP6_MAX + 1];
 	int err;
 
-	err = nla_parse_nested(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy,
-			       extack);
+	err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP6_MAX, attr,
+					  ip6_tun_policy, extack);
 	if (err < 0)
 		return err;
 

diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index f38cb21..cfb0256 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c

@@ -1,15 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Linux NET3: IP/IP protocol decoder modified to support
  *		    virtual tunnel interface
  *
  *	Authors:
  *		Saurabh Mohan (saurabh.mohan@vyatta.com) 05/07/2012
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
  */
 
 /*
@@ -50,7 +45,7 @@
 static int vti_tunnel_init(struct net_device *dev);
 
 static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
-		     int encap_type)
+		     int encap_type, bool update_skb_dev)
 {
 	struct ip_tunnel *tunnel;
 	const struct iphdr *iph = ip_hdr(skb);
@@ -65,6 +60,9 @@
 
 		XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
 
+		if (update_skb_dev)
+			skb->dev = tunnel->dev;
+
 		return xfrm_input(skb, nexthdr, spi, encap_type);
 	}
 
@@ -74,12 +72,28 @@
 	return 0;
 }
 
-static int vti_rcv(struct sk_buff *skb)
+static int vti_input_proto(struct sk_buff *skb, int nexthdr, __be32 spi,
+			   int encap_type)
+{
+	return vti_input(skb, nexthdr, spi, encap_type, false);
+}
+
+static int vti_rcv(struct sk_buff *skb, __be32 spi, bool update_skb_dev)
 {
 	XFRM_SPI_SKB_CB(skb)->family = AF_INET;
 	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
 
-	return vti_input(skb, ip_hdr(skb)->protocol, 0, 0);
+	return vti_input(skb, ip_hdr(skb)->protocol, spi, 0, update_skb_dev);
+}
+
+static int vti_rcv_proto(struct sk_buff *skb)
+{
+	return vti_rcv(skb, 0, false);
+}
+
+static int vti_rcv_tunnel(struct sk_buff *skb)
+{
+	return vti_rcv(skb, ip_hdr(skb)->saddr, true);
 }
 
 static int vti_rcv_cb(struct sk_buff *skb, int err)
@@ -88,7 +102,7 @@
 	struct net_device *dev;
 	struct pcpu_sw_netstats *tstats;
 	struct xfrm_state *x;
-	struct xfrm_mode *inner_mode;
+	const struct xfrm_mode *inner_mode;
 	struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4;
 	u32 orig_mark = skb->mark;
 	int ret;
@@ -107,7 +121,7 @@
 
 	x = xfrm_input_state(skb);
 
-	inner_mode = x->inner_mode;
+	inner_mode = &x->inner_mode;
 
 	if (x->sel.family == AF_UNSPEC) {
 		inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
@@ -118,7 +132,7 @@
 		}
 	}
 
-	family = inner_mode->afinfo->family;
+	family = inner_mode->family;
 
 	skb->mark = be32_to_cpu(tunnel->parms.i_key);
 	ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
@@ -241,6 +255,9 @@
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct flowi fl;
 
+	if (!pskb_inet_may_pull(skb))
+		goto tx_err;
+
 	memset(&fl, 0, sizeof(fl));
 
 	switch (skb->protocol) {
@@ -253,15 +270,18 @@
 		memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
 		break;
 	default:
-		dev->stats.tx_errors++;
-		dev_kfree_skb(skb);
-		return NETDEV_TX_OK;
+		goto tx_err;
 	}
 
 	/* override mark with tunnel output key */
 	fl.flowi_mark = be32_to_cpu(tunnel->parms.o_key);
 
 	return vti_xmit(skb, dev, &fl);
+
+tx_err:
+	dev->stats.tx_errors++;
+	kfree_skb(skb);
+	return NETDEV_TX_OK;
 }
 
 static int vti4_err(struct sk_buff *skb, u32 info)
@@ -318,9 +338,9 @@
 		return 0;
 
 	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
-		ipv4_update_pmtu(skb, net, info, 0, 0, protocol, 0);
+		ipv4_update_pmtu(skb, net, info, 0, protocol);
 	else
-		ipv4_redirect(skb, net, 0, 0, protocol, 0);
+		ipv4_redirect(skb, net, 0, protocol);
 	xfrm_state_put(x);
 
 	return 0;
@@ -406,29 +426,35 @@
 }
 
 static struct xfrm4_protocol vti_esp4_protocol __read_mostly = {
-	.handler	=	vti_rcv,
-	.input_handler	=	vti_input,
+	.handler	=	vti_rcv_proto,
+	.input_handler	=	vti_input_proto,
 	.cb_handler	=	vti_rcv_cb,
 	.err_handler	=	vti4_err,
 	.priority	=	100,
 };
 
 static struct xfrm4_protocol vti_ah4_protocol __read_mostly = {
-	.handler	=	vti_rcv,
-	.input_handler	=	vti_input,
+	.handler	=	vti_rcv_proto,
+	.input_handler	=	vti_input_proto,
 	.cb_handler	=	vti_rcv_cb,
 	.err_handler	=	vti4_err,
 	.priority	=	100,
 };
 
 static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = {
-	.handler	=	vti_rcv,
-	.input_handler	=	vti_input,
+	.handler	=	vti_rcv_proto,
+	.input_handler	=	vti_input_proto,
 	.cb_handler	=	vti_rcv_cb,
 	.err_handler	=	vti4_err,
 	.priority	=	100,
 };
 
+static struct xfrm_tunnel ipip_handler __read_mostly = {
+	.handler	=	vti_rcv_tunnel,
+	.err_handler	=	vti4_err,
+	.priority	=	0,
+};
+
 static int __net_init vti_init_net(struct net *net)
 {
 	int err;
@@ -597,6 +623,11 @@
 	if (err < 0)
 		goto xfrm_proto_comp_failed;
 
+	msg = "ipip tunnel";
+	err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
+	if (err < 0)
+		goto xfrm_tunnel_failed;
+
 	msg = "netlink interface";
 	err = rtnl_link_register(&vti_link_ops);
 	if (err < 0)
@@ -605,6 +636,8 @@
 	return err;
 
 rtnl_link_failed:
+	xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
+xfrm_tunnel_failed:
 	xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
 xfrm_proto_comp_failed:
 	xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
@@ -620,6 +653,7 @@
 static void __exit vti_fini(void)
 {
 	rtnl_link_unregister(&vti_link_ops);
+	xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
 	xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
 	xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
 	xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);

diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index d97f4f2..59bfa38 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * IP Payload Compression Protocol (IPComp) - RFC3173.
  *
  * Copyright (c) 2003 James Morris <jmorris@intercode.com.au>
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
  * Todo:
  *   - Tunable compression parameters.
  *   - Compression stats.
@@ -48,9 +44,9 @@
 		return 0;
 
 	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
-		ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0);
+		ipv4_update_pmtu(skb, net, info, 0, IPPROTO_COMP);
 	else
-		ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0);
+		ipv4_redirect(skb, net, 0, IPPROTO_COMP);
 	xfrm_state_put(x);
 
 	return 0;
@@ -190,8 +186,7 @@
 {
 	if (xfrm4_protocol_deregister(&ipcomp4_protocol, IPPROTO_COMP) < 0)
 		pr_info("%s: can't remove protocol\n", __func__);
-	if (xfrm_unregister_type(&ipcomp_type, AF_INET) < 0)
-		pr_info("%s: can't remove xfrm type\n", __func__);
+	xfrm_unregister_type(&ipcomp_type, AF_INET);
 }
 
 module_init(ipcomp4_init);

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 8821261..9bcca08 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c

@@ -85,7 +85,6 @@
 
 /* Define the friendly delay before and after opening net devices */
 #define CONF_POST_OPEN		10	/* After opening: 10 msecs */
-#define CONF_CARRIER_TIMEOUT	120000	/* Wait for carrier timeout */
 
 /* Define the timeout for waiting for a DHCP/BOOTP/RARP reply */
 #define CONF_OPEN_RETRIES 	2	/* (Re)open devices twice */
@@ -101,6 +100,9 @@
 #define NONE cpu_to_be32(INADDR_NONE)
 #define ANY cpu_to_be32(INADDR_ANY)
 
+/* Wait for carrier timeout default in seconds */
+static unsigned int carrier_timeout = 120;
+
 /*
  * Public IP configuration
  */
@@ -220,7 +222,7 @@
 	for_each_netdev(&init_net, dev) {
 		if (!(dev->flags & IFF_LOOPBACK) && !netdev_uses_dsa(dev))
 			continue;
-		if (dev_change_flags(dev, dev->flags | IFF_UP) < 0)
+		if (dev_change_flags(dev, dev->flags | IFF_UP, NULL) < 0)
 			pr_err("IP-Config: Failed to open %s\n", dev->name);
 	}
 
@@ -238,7 +240,7 @@
 			if (ic_proto_enabled && !able)
 				continue;
 			oflags = dev->flags;
-			if (dev_change_flags(dev, oflags | IFF_UP) < 0) {
+			if (dev_change_flags(dev, oflags | IFF_UP, NULL) < 0) {
 				pr_err("IP-Config: Failed to open %s\n",
 				       dev->name);
 				continue;
@@ -268,9 +270,9 @@
 
 	/* wait for a carrier on at least one device */
 	start = jiffies;
-	next_msg = start + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12);
+	next_msg = start + msecs_to_jiffies(20000);
 	while (time_before(jiffies, start +
-			   msecs_to_jiffies(CONF_CARRIER_TIMEOUT))) {
+			   msecs_to_jiffies(carrier_timeout * 1000))) {
 		int wait, elapsed;
 
 		for_each_netdev(&init_net, dev)
@@ -283,9 +285,9 @@
 			continue;
 
 		elapsed = jiffies_to_msecs(jiffies - start);
-		wait = (CONF_CARRIER_TIMEOUT - elapsed + 500)/1000;
+		wait = (carrier_timeout * 1000 - elapsed + 500) / 1000;
 		pr_info("Waiting up to %d more seconds for network.\n", wait);
-		next_msg = jiffies + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12);
+		next_msg = jiffies + msecs_to_jiffies(20000);
 	}
 have_carrier:
 	rtnl_unlock();
@@ -315,7 +317,7 @@
 		dev = d->dev;
 		if (d != ic_dev && !netdev_uses_dsa(dev)) {
 			pr_debug("IP-Config: Downing %s\n", dev->name);
-			dev_change_flags(dev, d->flags);
+			dev_change_flags(dev, d->flags, NULL);
 		}
 		kfree(d);
 	}
@@ -429,6 +431,8 @@
 			ic_netmask = htonl(IN_CLASSB_NET);
 		else if (IN_CLASSC(ntohl(ic_myaddr)))
 			ic_netmask = htonl(IN_CLASSC_NET);
+		else if (IN_CLASSE(ntohl(ic_myaddr)))
+			ic_netmask = htonl(IN_CLASSE_NET);
 		else {
 			pr_err("IP-Config: Unable to guess netmask for address %pI4\n",
 			       &ic_myaddr);
@@ -1361,18 +1365,7 @@
 	}
 	return 0;
 }
-
-static int ntp_servers_seq_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ntp_servers_seq_show, NULL);
-}
-
-static const struct file_operations ntp_servers_seq_fops = {
-	.open		= ntp_servers_seq_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(ntp_servers_seq);
 #endif /* CONFIG_PROC_FS */
 
 /*
@@ -1789,3 +1782,18 @@
 	return 1;
 }
 __setup("dhcpclass=", vendor_class_identifier_setup);
+
+static int __init set_carrier_timeout(char *str)
+{
+	ssize_t ret;
+
+	if (!str)
+		return 0;
+
+	ret = kstrtouint(str, 0, &carrier_timeout);
+	if (ret)
+		return 0;
+
+	return 1;
+}
+__setup("carrier_timeout=", set_carrier_timeout);

diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index c891235..2f01cf6 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Linux NET3:	IP/IP protocol decoder.
  *
@@ -16,12 +17,6 @@
  *              Carlos Picoto   :       GRE over IP support
  *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  *					I do not want to merge them together.
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
  */
 
 /* tunnel.c: an IP tunnel driver
@@ -140,6 +135,13 @@
 	struct ip_tunnel *t;
 	int err = 0;
 
+	t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+			     iph->daddr, iph->saddr, 0);
+	if (!t) {
+		err = -ENOENT;
+		goto out;
+	}
+
 	switch (type) {
 	case ICMP_DEST_UNREACH:
 		switch (code) {
@@ -167,21 +169,13 @@
 		goto out;
 	}
 
-	t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
-			     iph->daddr, iph->saddr, 0);
-	if (!t) {
-		err = -ENOENT;
-		goto out;
-	}
-
 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
-		ipv4_update_pmtu(skb, net, info, t->parms.link, 0,
-				 iph->protocol, 0);
+		ipv4_update_pmtu(skb, net, info, t->parms.link, iph->protocol);
 		goto out;
 	}
 
 	if (type == ICMP_REDIRECT) {
-		ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0);
+		ipv4_redirect(skb, net, t->parms.link, iph->protocol);
 		goto out;
 	}
 
@@ -281,6 +275,9 @@
 	const struct iphdr  *tiph = &tunnel->parms.iph;
 	u8 ipproto;
 
+	if (!pskb_inet_may_pull(skb))
+		goto tx_error;
+
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
 		ipproto = IPPROTO_IPIP;
@@ -303,7 +300,7 @@
 	skb_set_inner_ipproto(skb, ipproto);
 
 	if (tunnel->collect_md)
-		ip_md_tunnel_xmit(skb, dev, ipproto);
+		ip_md_tunnel_xmit(skb, dev, ipproto, 0);
 	else
 		ip_tunnel_xmit(skb, dev, tiph, ipproto);
 	return NETDEV_TX_OK;

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 5660adc..5800743 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	IP multicast routing support for mrouted 3.6/3.8
  *
  *		(c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
  *	  Linux Consultancy and Custom Driver Development
  *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
  *	Fixes:
  *	Michael Chastain	:	Incorrect size of copying.
  *	Alan Cox		:	Added the cache manager code
@@ -23,7 +19,6 @@
  *      Carlos Picoto           :       PIMv1 Support
  *	Pavlin Ivanov Radoslavov:	PIMv2 Registers must checksum only PIM header
  *					Relax this requirement to work with older peers.
- *
  */
 
 #include <linux/uaccess.h>
@@ -66,8 +61,9 @@
 #include <net/netlink.h>
 #include <net/fib_rules.h>
 #include <linux/netconf.h>
-#include <net/nexthop.h>
-#include <net/switchdev.h>
+#include <net/rtnh.h>
+
+#include <linux/nospec.h>
 
 struct ipmr_rule {
 	struct fib_rule		common;
@@ -109,7 +105,7 @@
 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
 				 int cmd);
 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
-static void mroute_clean_tables(struct mr_table *mrt, bool all);
+static void mroute_clean_tables(struct mr_table *mrt, int flags);
 static void ipmr_expire_process(struct timer_list *t);
 
 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
@@ -372,7 +368,6 @@
 	.key_offset = offsetof(struct mfc_cache, cmparg),
 	.key_len = sizeof(struct mfc_cache_cmp_arg),
 	.nelem_hint = 3,
-	.locks_mul = 1,
 	.obj_cmpfn = ipmr_hash_cmp,
 	.automatic_shrinking = true,
 };
@@ -414,7 +409,8 @@
 static void ipmr_free_table(struct mr_table *mrt)
 {
 	del_timer_sync(&mrt->ipmr_expire_timer);
-	mroute_clean_tables(mrt, true);
+	mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC |
+				 MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC);
 	rhltable_destroy(&mrt->mfc_hash);
 	kfree(mrt);
 }
@@ -506,7 +502,7 @@
 			dev->flags |= IFF_MULTICAST;
 			if (!ipmr_init_vif_indev(dev))
 				goto failure;
-			if (dev_open(dev))
+			if (dev_open(dev, NULL))
 				goto failure;
 			dev_hold(dev);
 		}
@@ -589,7 +585,7 @@
 
 	if (!ipmr_init_vif_indev(dev))
 		goto failure;
-	if (dev_open(dev))
+	if (dev_open(dev, NULL))
 		goto failure;
 
 	dev_hold(dev);
@@ -835,10 +831,8 @@
 static int vif_add(struct net *net, struct mr_table *mrt,
 		   struct vifctl *vifc, int mrtsock)
 {
+	struct netdev_phys_item_id ppid = { };
 	int vifi = vifc->vifc_vifi;
-	struct switchdev_attr attr = {
-		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
-	};
 	struct vif_device *v = &mrt->vif_table[vifi];
 	struct net_device *dev;
 	struct in_device *in_dev;
@@ -917,10 +911,10 @@
 			vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0),
 			(VIFF_TUNNEL | VIFF_REGISTER));
 
-	attr.orig_dev = dev;
-	if (!switchdev_port_attr_get(dev, &attr)) {
-		memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len);
-		v->dev_parent_id.id_len = attr.u.ppid.id_len;
+	err = dev_get_port_parent_id(dev, &ppid, true);
+	if (err == 0) {
+		memcpy(v->dev_parent_id.id, ppid.id, ppid.id_len);
+		v->dev_parent_id.id_len = ppid.id_len;
 	} else {
 		v->dev_parent_id.id_len = 0;
 	}
@@ -1140,8 +1134,8 @@
 
 	if (!found) {
 		/* Create a new entry if allowable */
-		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
-		    (c = ipmr_cache_alloc_unres()) == NULL) {
+		c = ipmr_cache_alloc_unres();
+		if (!c) {
 			spin_unlock_bh(&mfc_unres_lock);
 
 			kfree_skb(skb);
@@ -1297,7 +1291,7 @@
 }
 
 /* Close the multicast socket, and clear the vif tables etc */
-static void mroute_clean_tables(struct mr_table *mrt, bool all)
+static void mroute_clean_tables(struct mr_table *mrt, int flags)
 {
 	struct net *net = read_pnet(&mrt->net);
 	struct mr_mfc *c, *tmp;
@@ -1306,35 +1300,44 @@
 	int i;
 
 	/* Shut down all active vif entries */
-	for (i = 0; i < mrt->maxvif; i++) {
-		if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
-			continue;
-		vif_delete(mrt, i, 0, &list);
+	if (flags & (MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC)) {
+		for (i = 0; i < mrt->maxvif; i++) {
+			if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
+			     !(flags & MRT_FLUSH_VIFS_STATIC)) ||
+			    (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT_FLUSH_VIFS)))
+				continue;
+			vif_delete(mrt, i, 0, &list);
+		}
+		unregister_netdevice_many(&list);
 	}
-	unregister_netdevice_many(&list);
 
 	/* Wipe the cache */
-	list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
-		if (!all && (c->mfc_flags & MFC_STATIC))
-			continue;
-		rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
-		list_del_rcu(&c->list);
-		cache = (struct mfc_cache *)c;
-		call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
-					      mrt->id);
-		mroute_netlink_event(mrt, cache, RTM_DELROUTE);
-		mr_cache_put(c);
+	if (flags & (MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC)) {
+		list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
+			if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC_STATIC)) ||
+			    (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC)))
+				continue;
+			rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
+			list_del_rcu(&c->list);
+			cache = (struct mfc_cache *)c;
+			call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
+						      mrt->id);
+			mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+			mr_cache_put(c);
+		}
 	}
 
-	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
-		spin_lock_bh(&mfc_unres_lock);
-		list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
-			list_del(&c->list);
-			cache = (struct mfc_cache *)c;
-			mroute_netlink_event(mrt, cache, RTM_DELROUTE);
-			ipmr_destroy_unres(mrt, cache);
+	if (flags & MRT_FLUSH_MFC) {
+		if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
+			spin_lock_bh(&mfc_unres_lock);
+			list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
+				list_del(&c->list);
+				cache = (struct mfc_cache *)c;
+				mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+				ipmr_destroy_unres(mrt, cache);
+			}
+			spin_unlock_bh(&mfc_unres_lock);
 		}
-		spin_unlock_bh(&mfc_unres_lock);
 	}
 }
 
@@ -1355,7 +1358,7 @@
 						    NETCONFA_IFINDEX_ALL,
 						    net->ipv4.devconf_all);
 			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
-			mroute_clean_tables(mrt, false);
+			mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_MFC);
 		}
 	}
 	rtnl_unlock();
@@ -1480,6 +1483,17 @@
 					   sk == rtnl_dereference(mrt->mroute_sk),
 					   parent);
 		break;
+	case MRT_FLUSH:
+		if (optlen != sizeof(val)) {
+			ret = -EINVAL;
+			break;
+		}
+		if (get_user(val, (int __user *)optval)) {
+			ret = -EFAULT;
+			break;
+		}
+		mroute_clean_tables(mrt, val);
+		break;
 	/* Control PIM assert. */
 	case MRT_ASSERT:
 		if (optlen != sizeof(val)) {
@@ -1612,6 +1626,7 @@
 			return -EFAULT;
 		if (vr.vifi >= mrt->maxvif)
 			return -EINVAL;
+		vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif);
 		read_lock(&mrt_lock);
 		vif = &mrt->vif_table[vr.vifi];
 		if (VIF_EXISTS(mrt, vr.vifi)) {
@@ -1686,6 +1701,7 @@
 			return -EFAULT;
 		if (vr.vifi >= mrt->maxvif)
 			return -EINVAL;
+		vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif);
 		read_lock(&mrt_lock);
 		vif = &mrt->vif_table[vr.vifi];
 		if (VIF_EXISTS(mrt, vr.vifi)) {
@@ -1778,7 +1794,7 @@
 	ip_send_check(iph);
 
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-	nf_reset(skb);
+	nf_reset_ct(skb);
 }
 
 static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
@@ -1802,7 +1818,7 @@
 	struct vif_device *out_vif = &mrt->vif_table[out_vifi];
 	struct vif_device *in_vif = &mrt->vif_table[in_vifi];
 
-	if (!skb->offload_mr_fwd_mark)
+	if (!skb->offload_l3_fwd_mark)
 		return false;
 	if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len)
 		return false;
@@ -1820,8 +1836,7 @@
 /* Processing handlers for ipmr_forward */
 
 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
-			    int in_vifi, struct sk_buff *skb,
-			    struct mfc_cache *c, int vifi)
+			    int in_vifi, struct sk_buff *skb, int vifi)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	struct vif_device *vif = &mrt->vif_table[vifi];
@@ -2027,7 +2042,7 @@
 
 				if (skb2)
 					ipmr_queue_xmit(net, mrt, true_vifi,
-							skb2, c, psend);
+							skb2, psend);
 			}
 			psend = ct;
 		}
@@ -2039,9 +2054,9 @@
 
 			if (skb2)
 				ipmr_queue_xmit(net, mrt, true_vifi, skb2,
-						c, psend);
+						psend);
 		} else {
-			ipmr_queue_xmit(net, mrt, true_vifi, skb, c, psend);
+			ipmr_queue_xmit(net, mrt, true_vifi, skb, psend);
 			return;
 		}
 	}
@@ -2125,7 +2140,7 @@
 
 			mroute_sk = rcu_dereference(mrt->mroute_sk);
 			if (mroute_sk) {
-				nf_reset(skb);
+				nf_reset_ct(skb);
 				raw_rcv(mroute_sk, skb);
 				return 0;
 			}
@@ -2274,7 +2289,8 @@
 			rcu_read_unlock();
 			return -ENODEV;
 		}
-		skb2 = skb_clone(skb, GFP_ATOMIC);
+
+		skb2 = skb_realloc_headroom(skb, sizeof(struct iphdr));
 		if (!skb2) {
 			read_unlock(&mrt_lock);
 			rcu_read_unlock();
@@ -2464,6 +2480,61 @@
 	rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS);
 }
 
+static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb,
+				       const struct nlmsghdr *nlh,
+				       struct nlattr **tb,
+				       struct netlink_ext_ack *extack)
+{
+	struct rtmsg *rtm;
+	int i, err;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
+		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for multicast route get request");
+		return -EINVAL;
+	}
+
+	if (!netlink_strict_get_check(skb))
+		return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
+					      rtm_ipv4_policy, extack);
+
+	rtm = nlmsg_data(nlh);
+	if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) ||
+	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) ||
+	    rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol ||
+	    rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) {
+		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for multicast route get request");
+		return -EINVAL;
+	}
+
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
+					    rtm_ipv4_policy, extack);
+	if (err)
+		return err;
+
+	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
+	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
+		NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4");
+		return -EINVAL;
+	}
+
+	for (i = 0; i <= RTA_MAX; i++) {
+		if (!tb[i])
+			continue;
+
+		switch (i) {
+		case RTA_SRC:
+		case RTA_DST:
+		case RTA_TABLE:
+			break;
+		default:
+			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in multicast route get request");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 			     struct netlink_ext_ack *extack)
 {
@@ -2472,18 +2543,14 @@
 	struct sk_buff *skb = NULL;
 	struct mfc_cache *cache;
 	struct mr_table *mrt;
-	struct rtmsg *rtm;
 	__be32 src, grp;
 	u32 tableid;
 	int err;
 
-	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
-			  rtm_ipv4_policy, extack);
+	err = ipmr_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
 	if (err < 0)
 		goto errout;
 
-	rtm = nlmsg_data(nlh);
-
 	src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
 	grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
 	tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
@@ -2527,8 +2594,34 @@
 
 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct fib_dump_filter filter = {};
+	int err;
+
+	if (cb->strict_check) {
+		err = ip_valid_fib_dump_req(sock_net(skb->sk), cb->nlh,
+					    &filter, cb);
+		if (err < 0)
+			return err;
+	}
+
+	if (filter.table_id) {
+		struct mr_table *mrt;
+
+		mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id);
+		if (!mrt) {
+			if (filter.dump_all_families)
+				return skb->len;
+
+			NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist");
+			return -ENOENT;
+		}
+		err = mr_table_dump(mrt, skb, cb, _ipmr_fill_mroute,
+				    &mfc_unres_lock, &filter);
+		return skb->len ? : err;
+	}
+
 	return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter,
-				_ipmr_fill_mroute, &mfc_unres_lock);
+				_ipmr_fill_mroute, &mfc_unres_lock, &filter);
 }
 
 static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
@@ -2577,8 +2670,8 @@
 	struct rtmsg *rtm;
 	int ret, rem;
 
-	ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy,
-			     extack);
+	ret = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX,
+					rtm_ipmr_policy, extack);
 	if (ret < 0)
 		goto out;
 	rtm = nlmsg_data(nlh);
@@ -2686,7 +2779,7 @@
 		return true;
 
 	vif = &mrt->vif_table[vifid];
-	vif_nest = nla_nest_start(skb, IPMRA_VIF);
+	vif_nest = nla_nest_start_noflag(skb, IPMRA_VIF);
 	if (!vif_nest)
 		return false;
 	if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) ||
@@ -2710,6 +2803,31 @@
 	return true;
 }
 
+static int ipmr_valid_dumplink(const struct nlmsghdr *nlh,
+			       struct netlink_ext_ack *extack)
+{
+	struct ifinfomsg *ifm;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for ipmr link dump");
+		return -EINVAL;
+	}
+
+	if (nlmsg_attrlen(nlh, sizeof(*ifm))) {
+		NL_SET_ERR_MSG(extack, "Invalid data after header in ipmr link dump");
+		return -EINVAL;
+	}
+
+	ifm = nlmsg_data(nlh);
+	if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags ||
+	    ifm->ifi_change || ifm->ifi_index) {
+		NL_SET_ERR_MSG(extack, "Invalid values in header for ipmr link dump request");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
@@ -2718,6 +2836,13 @@
 	unsigned int e = 0, s_e;
 	struct mr_table *mrt;
 
+	if (cb->strict_check) {
+		int err = ipmr_valid_dumplink(cb->nlh, cb->extack);
+
+		if (err < 0)
+			return err;
+	}
+
 	s_t = cb->args[0];
 	s_e = cb->args[1];
 
@@ -2738,7 +2863,7 @@
 		memset(hdr, 0, sizeof(*hdr));
 		hdr->ifi_family = RTNL_FAMILY_IPMR;
 
-		af = nla_nest_start(skb, IFLA_AF_SPEC);
+		af = nla_nest_start_noflag(skb, IFLA_AF_SPEC);
 		if (!af) {
 			nlmsg_cancel(skb, nlh);
 			goto out;
@@ -2749,7 +2874,7 @@
 			goto out;
 		}
 
-		vifs = nla_nest_start(skb, IPMRA_TABLE_VIFS);
+		vifs = nla_nest_start_noflag(skb, IPMRA_TABLE_VIFS);
 		if (!vifs) {
 			nla_nest_end(skb, af);
 			nlmsg_end(skb, nlh);

diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index eab8cd5..ea48bd1 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c

@@ -228,7 +228,7 @@
 	if (c->mfc_flags & MFC_OFFLOAD)
 		rtm->rtm_flags |= RTNH_F_OFFLOAD;
 
-	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
+	mp_attr = nla_nest_start_noflag(skb, RTA_MULTIPATH);
 	if (!mp_attr)
 		return -EMSGSIZE;
 
@@ -268,6 +268,79 @@
 }
 EXPORT_SYMBOL(mr_fill_mroute);
 
+static bool mr_mfc_uses_dev(const struct mr_table *mrt,
+			    const struct mr_mfc *c,
+			    const struct net_device *dev)
+{
+	int ct;
+
+	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
+		if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
+			const struct vif_device *vif;
+
+			vif = &mrt->vif_table[ct];
+			if (vif->dev == dev)
+				return true;
+		}
+	}
+	return false;
+}
+
+int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb,
+		  struct netlink_callback *cb,
+		  int (*fill)(struct mr_table *mrt, struct sk_buff *skb,
+			      u32 portid, u32 seq, struct mr_mfc *c,
+			      int cmd, int flags),
+		  spinlock_t *lock, struct fib_dump_filter *filter)
+{
+	unsigned int e = 0, s_e = cb->args[1];
+	unsigned int flags = NLM_F_MULTI;
+	struct mr_mfc *mfc;
+	int err;
+
+	if (filter->filter_set)
+		flags |= NLM_F_DUMP_FILTERED;
+
+	list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
+		if (e < s_e)
+			goto next_entry;
+		if (filter->dev &&
+		    !mr_mfc_uses_dev(mrt, mfc, filter->dev))
+			goto next_entry;
+
+		err = fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+			   cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags);
+		if (err < 0)
+			goto out;
+next_entry:
+		e++;
+	}
+
+	spin_lock_bh(lock);
+	list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
+		if (e < s_e)
+			goto next_entry2;
+		if (filter->dev &&
+		    !mr_mfc_uses_dev(mrt, mfc, filter->dev))
+			goto next_entry2;
+
+		err = fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+			   cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags);
+		if (err < 0) {
+			spin_unlock_bh(lock);
+			goto out;
+		}
+next_entry2:
+		e++;
+	}
+	spin_unlock_bh(lock);
+	err = 0;
+out:
+	cb->args[1] = e;
+	return err;
+}
+EXPORT_SYMBOL(mr_table_dump);
+
 int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
 		     struct mr_table *(*iter)(struct net *net,
 					      struct mr_table *mrt),
@@ -275,51 +348,36 @@
 				 struct sk_buff *skb,
 				 u32 portid, u32 seq, struct mr_mfc *c,
 				 int cmd, int flags),
-		     spinlock_t *lock)
+		     spinlock_t *lock, struct fib_dump_filter *filter)
 {
-	unsigned int t = 0, e = 0, s_t = cb->args[0], s_e = cb->args[1];
+	unsigned int t = 0, s_t = cb->args[0];
 	struct net *net = sock_net(skb->sk);
 	struct mr_table *mrt;
-	struct mr_mfc *mfc;
+	int err;
+
+	/* multicast does not track protocol or have route type other
+	 * than RTN_MULTICAST
+	 */
+	if (filter->filter_set) {
+		if (filter->protocol || filter->flags ||
+		    (filter->rt_type && filter->rt_type != RTN_MULTICAST))
+			return skb->len;
+	}
 
 	rcu_read_lock();
 	for (mrt = iter(net, NULL); mrt; mrt = iter(net, mrt)) {
 		if (t < s_t)
 			goto next_table;
-		list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
-			if (e < s_e)
-				goto next_entry;
-			if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
-				 cb->nlh->nlmsg_seq, mfc,
-				 RTM_NEWROUTE, NLM_F_MULTI) < 0)
-				goto done;
-next_entry:
-			e++;
-		}
 
-		spin_lock_bh(lock);
-		list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
-			if (e < s_e)
-				goto next_entry2;
-			if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
-				 cb->nlh->nlmsg_seq, mfc,
-				 RTM_NEWROUTE, NLM_F_MULTI) < 0) {
-				spin_unlock_bh(lock);
-				goto done;
-			}
-next_entry2:
-			e++;
-		}
-		spin_unlock_bh(lock);
-		e = 0;
-		s_e = 0;
+		err = mr_table_dump(mrt, skb, cb, fill, lock, filter);
+		if (err < 0)
+			break;
+		cb->args[1] = 0;
 next_table:
 		t++;
 	}
-done:
 	rcu_read_unlock();
 
-	cb->args[1] = e;
 	cb->args[0] = t;
 
 	return skb->len;

diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c
index 04311f7..3205d5f 100644
--- a/net/ipv4/metrics.c
+++ b/net/ipv4/metrics.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/netlink.h>
 #include <linux/rtnetlink.h>
 #include <linux/types.h>
@@ -5,8 +6,9 @@
 #include <net/net_namespace.h>
 #include <net/tcp.h>
 
-int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len,
-		       u32 *metrics)
+static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx,
+			      int fc_mx_len, u32 *metrics,
+			      struct netlink_ext_ack *extack)
 {
 	bool ecn_ca = false;
 	struct nlattr *nla;
@@ -21,19 +23,26 @@
 
 		if (!type)
 			continue;
-		if (type > RTAX_MAX)
+		if (type > RTAX_MAX) {
+			NL_SET_ERR_MSG(extack, "Invalid metric type");
 			return -EINVAL;
+		}
 
 		if (type == RTAX_CC_ALGO) {
 			char tmp[TCP_CA_NAME_MAX];
 
 			nla_strlcpy(tmp, nla, sizeof(tmp));
 			val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
-			if (val == TCP_CA_UNSPEC)
+			if (val == TCP_CA_UNSPEC) {
+				NL_SET_ERR_MSG(extack, "Unknown tcp congestion algorithm");
 				return -EINVAL;
+			}
 		} else {
-			if (nla_len(nla) != sizeof(u32))
+			if (nla_len(nla) != sizeof(u32)) {
+				NL_SET_ERR_MSG_ATTR(extack, nla,
+						    "Invalid attribute in metrics");
 				return -EINVAL;
+			}
 			val = nla_get_u32(nla);
 		}
 		if (type == RTAX_ADVMSS && val > 65535 - 40)
@@ -42,8 +51,10 @@
 			val = 65535 - 15;
 		if (type == RTAX_HOPLIMIT && val > 255)
 			val = 255;
-		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
+		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) {
+			NL_SET_ERR_MSG(extack, "Unknown flag set in feature mask in metrics attribute");
 			return -EINVAL;
+		}
 		metrics[type - 1] = val;
 	}
 
@@ -52,4 +63,30 @@
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(ip_metrics_convert);
+
+struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx,
+					int fc_mx_len,
+					struct netlink_ext_ack *extack)
+{
+	struct dst_metrics *fib_metrics;
+	int err;
+
+	if (!fc_mx)
+		return (struct dst_metrics *)&dst_default_metrics;
+
+	fib_metrics = kzalloc(sizeof(*fib_metrics), GFP_KERNEL);
+	if (unlikely(!fib_metrics))
+		return ERR_PTR(-ENOMEM);
+
+	err = ip_metrics_convert(net, fc_mx, fc_mx_len, fib_metrics->metrics,
+				 extack);
+	if (!err) {
+		refcount_set(&fib_metrics->refcnt, 1);
+	} else {
+		kfree(fib_metrics);
+		fib_metrics = ERR_PTR(err);
+	}
+
+	return fib_metrics;
+}
+EXPORT_SYMBOL_GPL(ip_fib_metrics_init);

diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 8d2e5dc..a058213 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c

@@ -80,24 +80,6 @@
 }
 EXPORT_SYMBOL(ip_route_me_harder);
 
-int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
-{
-	const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
-
-	if (entry->state.hook == NF_INET_LOCAL_OUT) {
-		const struct iphdr *iph = ip_hdr(skb);
-
-		if (!(iph->tos == rt_info->tos &&
-		      skb->mark == rt_info->mark &&
-		      iph->daddr == rt_info->daddr &&
-		      iph->saddr == rt_info->saddr))
-			return ip_route_me_harder(entry->state.net, skb,
-						  RTN_UNSPEC);
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(nf_ip_reroute);
-
 int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
 		bool strict __always_unused)
 {

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 184bf2e..f17b402 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # IP netfilter configuration
 #
@@ -27,14 +28,6 @@
 
 if NF_TABLES_IPV4
 
-config NFT_CHAIN_ROUTE_IPV4
-	tristate "IPv4 nf_tables route chain support"
-	help
-	  This option enables the "route" chain for IPv4 in nf_tables. This
-	  chain type is used to force packet re-routing after mangling header
-	  fields such as the source, destination, type of service and
-	  the packet mark.
-
 config NFT_REJECT_IPV4
 	select NF_REJECT_IPV4
 	default NFT_REJECT
@@ -94,50 +87,7 @@
 	tristate "IPv4 packet rejection"
 	default m if NETFILTER_ADVANCED=n
 
-config NF_NAT_IPV4
-	tristate "IPv4 NAT"
-	depends on NF_CONNTRACK
-	default m if NETFILTER_ADVANCED=n
-	select NF_NAT
-	help
-	  The IPv4 NAT option allows masquerading, port forwarding and other
-	  forms of full Network Address Port Translation. This can be
-	  controlled by iptables or nft.
-
-if NF_NAT_IPV4
-
-config NF_NAT_MASQUERADE_IPV4
-	bool
-
-if NF_TABLES
-config NFT_CHAIN_NAT_IPV4
-	depends on NF_TABLES_IPV4
-	tristate "IPv4 nf_tables nat chain support"
-	help
-	  This option enables the "nat" chain for IPv4 in nf_tables. This
-	  chain type is used to perform Network Address Translation (NAT)
-	  packet transformations such as the source, destination address and
-	  source and destination ports.
-
-config NFT_MASQ_IPV4
-	tristate "IPv4 masquerading support for nf_tables"
-	depends on NF_TABLES_IPV4
-	depends on NFT_MASQ
-	select NF_NAT_MASQUERADE_IPV4
-	help
-	  This is the expression that provides IPv4 masquerading support for
-	  nf_tables.
-
-config NFT_REDIR_IPV4
-	tristate "IPv4 redirect support for nf_tables"
-	depends on NF_TABLES_IPV4
-	depends on NFT_REDIR
-	select NF_NAT_REDIRECT
-	help
-	  This is the expression that provides IPv4 redirect support for
-	  nf_tables.
-endif # NF_TABLES
-
+if NF_NAT
 config NF_NAT_SNMP_BASIC
 	tristate "Basic SNMP-ALG support"
 	depends on NF_CONNTRACK_SNMP
@@ -156,22 +106,17 @@
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config NF_NAT_PROTO_GRE
-	tristate
-	depends on NF_CT_PROTO_GRE
-
 config NF_NAT_PPTP
 	tristate
 	depends on NF_CONNTRACK
 	default NF_CONNTRACK_PPTP
-	select NF_NAT_PROTO_GRE
 
 config NF_NAT_H323
 	tristate
 	depends on NF_CONNTRACK
 	default NF_CONNTRACK_H323
 
-endif # NF_NAT_IPV4
+endif # NF_NAT
 
 config IP_NF_IPTABLES
 	tristate "IP tables support (required for filtering/masq/NAT)"
@@ -268,7 +213,6 @@
 	depends on NF_CONNTRACK
 	default m if NETFILTER_ADVANCED=n
 	select NF_NAT
-	select NF_NAT_IPV4
 	select NETFILTER_XT_NAT
 	help
 	  This enables the `nat' table in iptables. This allows masquerading,
@@ -281,16 +225,10 @@
 
 config IP_NF_TARGET_MASQUERADE
 	tristate "MASQUERADE target support"
-	select NF_NAT_MASQUERADE_IPV4
-	default m if NETFILTER_ADVANCED=n
+	select NETFILTER_XT_TARGET_MASQUERADE
 	help
-	  Masquerading is a special case of NAT: all outgoing connections are
-	  changed to seem to come from a particular interface's address, and
-	  if the interface goes down, those connections are lost.  This is
-	  only useful for dialup accounts with dynamic IP address (ie. your IP
-	  address will be different on next dialup).
-
-	  To compile it as a module, choose M here.  If unsure, say N.
+	  This is a backwards-compat option for the user's convenience
+	  (e.g. when running oldconfig). It selects NETFILTER_XT_TARGET_MASQUERADE.
 
 config IP_NF_TARGET_NETMAP
 	tristate "NETMAP target support"
@@ -334,7 +272,7 @@
 	  The CLUSTERIP target allows you to build load-balancing clusters of
 	  network servers without having a dedicated load-balancing
 	  router/server/switch.
-	
+
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config IP_NF_TARGET_ECN
@@ -343,7 +281,7 @@
 	depends on NETFILTER_ADVANCED
 	---help---
 	  This option adds a `ECN' target, which can be used in the iptables mangle
-	  table.  
+	  table.
 
 	  You can use this target to remove the ECN bits from the IPv4 header of
 	  an IP packet.  This is particularly useful, if you need to work around
@@ -368,9 +306,9 @@
 	  This option adds a `raw' table to iptables. This table is the very
 	  first in the netfilter framework and hooks in at the PREROUTING
 	  and OUTPUT chains.
-	
+
 	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
+	  <file:Documentation/kbuild/modules.rst>.  If unsure, say `N'.
 
 # security table for MAC policy
 config IP_NF_SECURITY
@@ -380,7 +318,7 @@
 	help
 	  This option adds a `security' table to iptables, for use
 	  with Mandatory Access Control (MAC) policy.
-	 
+
 	  If unsure, say N.
 
 endif # IP_NF_IPTABLES

diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 367993a..7c497c7 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile

@@ -3,10 +3,6 @@
 # Makefile for the netfilter modules on top of IPv4.
 #
 
-nf_nat_ipv4-y		:= nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o
-nf_nat_ipv4-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
-obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
-
 # defrag
 obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
 
@@ -28,21 +24,14 @@
 $(obj)/nf_nat_snmp_basic_main.o: $(obj)/nf_nat_snmp_basic.asn1.h
 obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
 
-# NAT protocols (nf_nat)
-obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
-
-obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
-obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
 obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
 obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o
-obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o
-obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o
 obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
 
 # flow table support
 obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o
 
-# generic IP tables 
+# generic IP tables
 obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
 
 # the three instances of ip_tables
@@ -59,7 +48,6 @@
 # targets
 obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
 obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
-obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
 obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
 obj-$(CONFIG_IP_NF_TARGET_SYNPROXY) += ipt_SYNPROXY.o
 

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 2dc83de..214154b 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Packet matching code for ARP packets.
  *

diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index a5e52a9..a4e07e5 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* module that allows mangling of the arp payload */
 #include <linux/module.h>
 #include <linux/netfilter.h>
@@ -16,7 +17,7 @@
 	unsigned char *arpptr;
 	int pln, hln;
 
-	if (!skb_make_writable(skb, skb->len))
+	if (skb_ensure_writable(skb, skb->len))
 		return NF_DROP;
 
 	arp = arp_hdr(skb);

diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 8f8713b..c216b9a 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Filtering ARP tables module.
  *

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e77872c..10b91eb 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Packet matching code.
  *
  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
  * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
  * Copyright (C) 2006-2010 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/cache.h>

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 2c8d313..6bdb1ab 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Cluster IP hashmark target
  * (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
  * based on ideas of Fabio Olive Leite <olive@unixforge.org>
  *
  * Development of this code funded by SuSE Linux AG, http://www.suse.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
@@ -56,18 +52,15 @@
 #endif
 	enum clusterip_hashmode hash_mode;	/* which hashing mode */
 	u_int32_t hash_initval;			/* hash initialization */
-	struct rcu_head rcu;
-
+	struct rcu_head rcu;			/* for call_rcu */
+	struct net *net;			/* netns for pernet list */
 	char ifname[IFNAMSIZ];			/* device ifname */
-	struct notifier_block notifier;		/* refresh c->ifindex in it */
 };
 
 #ifdef CONFIG_PROC_FS
 static const struct file_operations clusterip_proc_fops;
 #endif
 
-static unsigned int clusterip_net_id __read_mostly;
-
 struct clusterip_net {
 	struct list_head configs;
 	/* lock protects the configs list */
@@ -75,51 +68,66 @@
 
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry *procdir;
+	/* mutex protects the config->pde*/
+	struct mutex mutex;
 #endif
 };
 
+static unsigned int clusterip_net_id __read_mostly;
+static inline struct clusterip_net *clusterip_pernet(struct net *net)
+{
+	return net_generic(net, clusterip_net_id);
+}
+
 static inline void
 clusterip_config_get(struct clusterip_config *c)
 {
 	refcount_inc(&c->refcount);
 }
 
-
 static void clusterip_config_rcu_free(struct rcu_head *head)
 {
-	kfree(container_of(head, struct clusterip_config, rcu));
+	struct clusterip_config *config;
+	struct net_device *dev;
+
+	config = container_of(head, struct clusterip_config, rcu);
+	dev = dev_get_by_name(config->net, config->ifname);
+	if (dev) {
+		dev_mc_del(dev, config->clustermac);
+		dev_put(dev);
+	}
+	kfree(config);
 }
 
 static inline void
 clusterip_config_put(struct clusterip_config *c)
 {
 	if (refcount_dec_and_test(&c->refcount))
-		call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
+		call_rcu(&c->rcu, clusterip_config_rcu_free);
 }
 
 /* decrease the count of entries using/referencing this config.  If last
  * entry(rule) is removed, remove the config from lists, but don't free it
  * yet, since proc-files could still be holding references */
 static inline void
-clusterip_config_entry_put(struct net *net, struct clusterip_config *c)
+clusterip_config_entry_put(struct clusterip_config *c)
 {
-	struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+	struct clusterip_net *cn = clusterip_pernet(c->net);
 
 	local_bh_disable();
 	if (refcount_dec_and_lock(&c->entries, &cn->lock)) {
+		list_del_rcu(&c->list);
+		spin_unlock(&cn->lock);
+		local_bh_enable();
 		/* In case anyone still accesses the file, the open/close
 		 * functions are also incrementing the refcount on their own,
 		 * so it's safe to remove the entry even if it's in use. */
 #ifdef CONFIG_PROC_FS
+		mutex_lock(&cn->mutex);
 		if (cn->procdir)
 			proc_remove(c->pde);
+		mutex_unlock(&cn->mutex);
 #endif
-		list_del_rcu(&c->list);
-		spin_unlock(&cn->lock);
-		local_bh_enable();
-
-		unregister_netdevice_notifier(&c->notifier);
-
 		return;
 	}
 	local_bh_enable();
@@ -129,7 +137,7 @@
 __clusterip_config_find(struct net *net, __be32 clusterip)
 {
 	struct clusterip_config *c;
-	struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+	struct clusterip_net *cn = clusterip_pernet(net);
 
 	list_for_each_entry_rcu(c, &cn->configs, list) {
 		if (c->clusterip == clusterip)
@@ -181,32 +189,37 @@
 		       void *ptr)
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct net *net = dev_net(dev);
+	struct clusterip_net *cn = clusterip_pernet(net);
 	struct clusterip_config *c;
 
-	c = container_of(this, struct clusterip_config, notifier);
-	switch (event) {
-	case NETDEV_REGISTER:
-		if (!strcmp(dev->name, c->ifname)) {
-			c->ifindex = dev->ifindex;
-			dev_mc_add(dev, c->clustermac);
+	spin_lock_bh(&cn->lock);
+	list_for_each_entry_rcu(c, &cn->configs, list) {
+		switch (event) {
+		case NETDEV_REGISTER:
+			if (!strcmp(dev->name, c->ifname)) {
+				c->ifindex = dev->ifindex;
+				dev_mc_add(dev, c->clustermac);
+			}
+			break;
+		case NETDEV_UNREGISTER:
+			if (dev->ifindex == c->ifindex) {
+				dev_mc_del(dev, c->clustermac);
+				c->ifindex = -1;
+			}
+			break;
+		case NETDEV_CHANGENAME:
+			if (!strcmp(dev->name, c->ifname)) {
+				c->ifindex = dev->ifindex;
+				dev_mc_add(dev, c->clustermac);
+			} else if (dev->ifindex == c->ifindex) {
+				dev_mc_del(dev, c->clustermac);
+				c->ifindex = -1;
+			}
+			break;
 		}
-		break;
-	case NETDEV_UNREGISTER:
-		if (dev->ifindex == c->ifindex) {
-			dev_mc_del(dev, c->clustermac);
-			c->ifindex = -1;
-		}
-		break;
-	case NETDEV_CHANGENAME:
-		if (!strcmp(dev->name, c->ifname)) {
-			c->ifindex = dev->ifindex;
-			dev_mc_add(dev, c->clustermac);
-		} else if (dev->ifindex == c->ifindex) {
-			dev_mc_del(dev, c->clustermac);
-			c->ifindex = -1;
-		}
-		break;
 	}
+	spin_unlock_bh(&cn->lock);
 
 	return NOTIFY_DONE;
 }
@@ -215,30 +228,44 @@
 clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
 		      __be32 ip, const char *iniface)
 {
-	struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+	struct clusterip_net *cn = clusterip_pernet(net);
 	struct clusterip_config *c;
+	struct net_device *dev;
 	int err;
 
+	if (iniface[0] == '\0') {
+		pr_info("Please specify an interface name\n");
+		return ERR_PTR(-EINVAL);
+	}
+
 	c = kzalloc(sizeof(*c), GFP_ATOMIC);
 	if (!c)
 		return ERR_PTR(-ENOMEM);
 
-	strcpy(c->ifname, iniface);
-	c->ifindex = -1;
-	c->clusterip = ip;
+	dev = dev_get_by_name(net, iniface);
+	if (!dev) {
+		pr_info("no such interface %s\n", iniface);
+		kfree(c);
+		return ERR_PTR(-ENOENT);
+	}
+	c->ifindex = dev->ifindex;
+	strcpy(c->ifname, dev->name);
 	memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
+	dev_mc_add(dev, c->clustermac);
+	dev_put(dev);
+
+	c->clusterip = ip;
 	c->num_total_nodes = i->num_total_nodes;
 	clusterip_config_init_nodelist(c, i);
 	c->hash_mode = i->hash_mode;
 	c->hash_initval = i->hash_initval;
+	c->net = net;
 	refcount_set(&c->refcount, 1);
 
 	spin_lock_bh(&cn->lock);
 	if (__clusterip_config_find(net, ip)) {
-		spin_unlock_bh(&cn->lock);
-		kfree(c);
-
-		return ERR_PTR(-EBUSY);
+		err = -EBUSY;
+		goto out_config_put;
 	}
 
 	list_add_rcu(&c->list, &cn->configs);
@@ -250,9 +277,11 @@
 
 		/* create proc dir entry */
 		sprintf(buffer, "%pI4", &ip);
+		mutex_lock(&cn->mutex);
 		c->pde = proc_create_data(buffer, 0600,
 					  cn->procdir,
 					  &clusterip_proc_fops, c);
+		mutex_unlock(&cn->mutex);
 		if (!c->pde) {
 			err = -ENOMEM;
 			goto err;
@@ -260,22 +289,17 @@
 	}
 #endif
 
-	c->notifier.notifier_call = clusterip_netdev_event;
-	err = register_netdevice_notifier(&c->notifier);
-	if (!err) {
-		refcount_set(&c->entries, 1);
-		return c;
-	}
+	refcount_set(&c->entries, 1);
+	return c;
 
 #ifdef CONFIG_PROC_FS
-	proc_remove(c->pde);
 err:
 #endif
 	spin_lock_bh(&cn->lock);
 	list_del_rcu(&c->list);
+out_config_put:
 	spin_unlock_bh(&cn->lock);
 	clusterip_config_put(c);
-
 	return ERR_PTR(err);
 }
 
@@ -392,8 +416,8 @@
 	     ctinfo == IP_CT_RELATED_REPLY))
 		return XT_CONTINUE;
 
-	/* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO,
-	 * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here
+	/* nf_conntrack_proto_icmp guarantees us that we only have ICMP_ECHO,
+	 * TIMESTAMP, INFO_REQUEST or ICMP_ADDRESS type icmp packets from here
 	 * on, which all have an ID field [relevant for hashing]. */
 
 	hash = clusterip_hashfn(skb, cipinfo->config);
@@ -475,34 +499,20 @@
 				&e->ip.dst.s_addr);
 			return -EINVAL;
 		} else {
-			struct net_device *dev;
-
-			if (e->ip.iniface[0] == '\0') {
-				pr_info("Please specify an interface name\n");
-				return -EINVAL;
-			}
-
-			dev = dev_get_by_name(par->net, e->ip.iniface);
-			if (!dev) {
-				pr_info("no such interface %s\n",
-					e->ip.iniface);
-				return -ENOENT;
-			}
-			dev_put(dev);
-
 			config = clusterip_config_init(par->net, cipinfo,
 						       e->ip.dst.s_addr,
 						       e->ip.iniface);
 			if (IS_ERR(config))
 				return PTR_ERR(config);
 		}
-	}
+	} else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN))
+		return -EINVAL;
 
 	ret = nf_ct_netns_get(par->net, par->family);
 	if (ret < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		clusterip_config_entry_put(par->net, config);
+		clusterip_config_entry_put(config);
 		clusterip_config_put(config);
 		return ret;
 	}
@@ -524,7 +534,7 @@
 
 	/* if no more entries are referencing the config, remove it
 	 * from the list and destroy the proc entry */
-	clusterip_config_entry_put(par->net, cipinfo->config);
+	clusterip_config_entry_put(cipinfo->config);
 
 	clusterip_config_put(cipinfo->config);
 
@@ -806,7 +816,7 @@
 
 static int clusterip_net_init(struct net *net)
 {
-	struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+	struct clusterip_net *cn = clusterip_pernet(net);
 	int ret;
 
 	INIT_LIST_HEAD(&cn->configs);
@@ -824,6 +834,7 @@
 		pr_err("Unable to proc dir entry\n");
 		return -ENOMEM;
 	}
+	mutex_init(&cn->mutex);
 #endif /* CONFIG_PROC_FS */
 
 	return 0;
@@ -831,13 +842,15 @@
 
 static void clusterip_net_exit(struct net *net)
 {
-	struct clusterip_net *cn = net_generic(net, clusterip_net_id);
 #ifdef CONFIG_PROC_FS
+	struct clusterip_net *cn = clusterip_pernet(net);
+
+	mutex_lock(&cn->mutex);
 	proc_remove(cn->procdir);
 	cn->procdir = NULL;
+	mutex_unlock(&cn->mutex);
 #endif
 	nf_unregister_net_hook(net, &cip_arp_ops);
-	WARN_ON_ONCE(!list_empty(&cn->configs));
 }
 
 static struct pernet_operations clusterip_net_ops = {
@@ -847,6 +860,10 @@
 	.size = sizeof(struct clusterip_net),
 };
 
+static struct notifier_block cip_netdev_notifier = {
+	.notifier_call = clusterip_netdev_event
+};
+
 static int __init clusterip_tg_init(void)
 {
 	int ret;
@@ -859,11 +876,17 @@
 	if (ret < 0)
 		goto cleanup_subsys;
 
+	ret = register_netdevice_notifier(&cip_netdev_notifier);
+	if (ret < 0)
+		goto unregister_target;
+
 	pr_info("ClusterIP Version %s loaded successfully\n",
 		CLUSTERIP_VERSION);
 
 	return 0;
 
+unregister_target:
+	xt_unregister_target(&clusterip_tg_reg);
 cleanup_subsys:
 	unregister_pernet_subsys(&clusterip_net_ops);
 	return ret;
@@ -873,11 +896,12 @@
 {
 	pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
 
+	unregister_netdevice_notifier(&cip_netdev_notifier);
 	xt_unregister_target(&clusterip_tg_reg);
 	unregister_pernet_subsys(&clusterip_net_ops);
 
-	/* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */
-	rcu_barrier_bh();
+	/* Wait for completion of call_rcu()'s (clusterip_config_rcu_free) */
+	rcu_barrier();
 }
 
 module_init(clusterip_tg_init);

diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index aaaf9a8..5930d3b 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* iptables module for the IPv4 and TCP ECN bits, Version 1.5
  *
  * (C) 2002 by Harald Welte <laforge@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
 */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/in.h>
@@ -32,7 +29,7 @@
 
 	if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
 		__u8 oldtos;
-		if (!skb_make_writable(skb, sizeof(struct iphdr)))
+		if (skb_ensure_writable(skb, sizeof(struct iphdr)))
 			return false;
 		iph = ip_hdr(skb);
 		oldtos = iph->tos;
@@ -61,7 +58,7 @@
 	     tcph->cwr == einfo->proto.tcp.cwr))
 		return true;
 
-	if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
+	if (skb_ensure_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
 		return false;
 	tcph = (void *)ip_hdr(skb) + ip_hdrlen(skb);
 

diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
deleted file mode 100644
index fd3f9e8..0000000
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ /dev/null

@@ -1,101 +0,0 @@
-/* Masquerade.  Simple mapping which alters range to a local IP address
-   (depending on route). */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/types.h>
-#include <linux/inetdevice.h>
-#include <linux/ip.h>
-#include <linux/timer.h>
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <net/protocol.h>
-#include <net/ip.h>
-#include <net/checksum.h>
-#include <net/route.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/ipv4/nf_nat_masquerade.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
-
-/* FIXME: Multiple targets. --RR */
-static int masquerade_tg_check(const struct xt_tgchk_param *par)
-{
-	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
-
-	if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) {
-		pr_debug("bad MAP_IPS.\n");
-		return -EINVAL;
-	}
-	if (mr->rangesize != 1) {
-		pr_debug("bad rangesize %u\n", mr->rangesize);
-		return -EINVAL;
-	}
-	return nf_ct_netns_get(par->net, par->family);
-}
-
-static unsigned int
-masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
-{
-	struct nf_nat_range2 range;
-	const struct nf_nat_ipv4_multi_range_compat *mr;
-
-	mr = par->targinfo;
-	range.flags = mr->range[0].flags;
-	range.min_proto = mr->range[0].min;
-	range.max_proto = mr->range[0].max;
-
-	return nf_nat_masquerade_ipv4(skb, xt_hooknum(par), &range,
-				      xt_out(par));
-}
-
-static void masquerade_tg_destroy(const struct xt_tgdtor_param *par)
-{
-	nf_ct_netns_put(par->net, par->family);
-}
-
-static struct xt_target masquerade_tg_reg __read_mostly = {
-	.name		= "MASQUERADE",
-	.family		= NFPROTO_IPV4,
-	.target		= masquerade_tg,
-	.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
-	.table		= "nat",
-	.hooks		= 1 << NF_INET_POST_ROUTING,
-	.checkentry	= masquerade_tg_check,
-	.destroy	= masquerade_tg_destroy,
-	.me		= THIS_MODULE,
-};
-
-static int __init masquerade_tg_init(void)
-{
-	int ret;
-
-	ret = xt_register_target(&masquerade_tg_reg);
-	if (ret)
-		return ret;
-
-	ret = nf_nat_masquerade_ipv4_register_notifier();
-	if (ret)
-		xt_unregister_target(&masquerade_tg_reg);
-
-	return ret;
-}
-
-static void __exit masquerade_tg_exit(void)
-{
-	xt_unregister_target(&masquerade_tg_reg);
-	nf_nat_masquerade_ipv4_unregister_notifier();
-}
-
-module_init(masquerade_tg_init);
-module_exit(masquerade_tg_exit);

diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index e8bed33..e16b98e 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * This is a module which is used for rejecting packets.
  */
 
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>

diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 690b17e..748dc3c 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c

@@ -1,263 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <net/tcp.h>
-
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_SYNPROXY.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_seqadj.h>
-#include <net/netfilter/nf_conntrack_synproxy.h>
-#include <net/netfilter/nf_conntrack_ecache.h>
 
-static struct iphdr *
-synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr,
-		  __be32 daddr)
-{
-	struct iphdr *iph;
-
-	skb_reset_network_header(skb);
-	iph = skb_put(skb, sizeof(*iph));
-	iph->version	= 4;
-	iph->ihl	= sizeof(*iph) / 4;
-	iph->tos	= 0;
-	iph->id		= 0;
-	iph->frag_off	= htons(IP_DF);
-	iph->ttl	= net->ipv4.sysctl_ip_default_ttl;
-	iph->protocol	= IPPROTO_TCP;
-	iph->check	= 0;
-	iph->saddr	= saddr;
-	iph->daddr	= daddr;
-
-	return iph;
-}
-
-static void
-synproxy_send_tcp(struct net *net,
-		  const struct sk_buff *skb, struct sk_buff *nskb,
-		  struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
-		  struct iphdr *niph, struct tcphdr *nth,
-		  unsigned int tcp_hdr_size)
-{
-	nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0);
-	nskb->ip_summed   = CHECKSUM_PARTIAL;
-	nskb->csum_start  = (unsigned char *)nth - nskb->head;
-	nskb->csum_offset = offsetof(struct tcphdr, check);
-
-	skb_dst_set_noref(nskb, skb_dst(skb));
-	nskb->protocol = htons(ETH_P_IP);
-	if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
-		goto free_nskb;
-
-	if (nfct) {
-		nf_ct_set(nskb, (struct nf_conn *)nfct, ctinfo);
-		nf_conntrack_get(nfct);
-	}
-
-	ip_local_out(net, nskb->sk, nskb);
-	return;
-
-free_nskb:
-	kfree_skb(nskb);
-}
-
-static void
-synproxy_send_client_synack(struct net *net,
-			    const struct sk_buff *skb, const struct tcphdr *th,
-			    const struct synproxy_options *opts)
-{
-	struct sk_buff *nskb;
-	struct iphdr *iph, *niph;
-	struct tcphdr *nth;
-	unsigned int tcp_hdr_size;
-	u16 mss = opts->mss;
-
-	iph = ip_hdr(skb);
-
-	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
-	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
-			 GFP_ATOMIC);
-	if (nskb == NULL)
-		return;
-	skb_reserve(nskb, MAX_TCP_HEADER);
-
-	niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr);
-
-	skb_reset_transport_header(nskb);
-	nth = skb_put(nskb, tcp_hdr_size);
-	nth->source	= th->dest;
-	nth->dest	= th->source;
-	nth->seq	= htonl(__cookie_v4_init_sequence(iph, th, &mss));
-	nth->ack_seq	= htonl(ntohl(th->seq) + 1);
-	tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
-	if (opts->options & XT_SYNPROXY_OPT_ECN)
-		tcp_flag_word(nth) |= TCP_FLAG_ECE;
-	nth->doff	= tcp_hdr_size / 4;
-	nth->window	= 0;
-	nth->check	= 0;
-	nth->urg_ptr	= 0;
-
-	synproxy_build_options(nth, opts);
-
-	synproxy_send_tcp(net, skb, nskb, skb_nfct(skb),
-			  IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size);
-}
-
-static void
-synproxy_send_server_syn(struct net *net,
-			 const struct sk_buff *skb, const struct tcphdr *th,
-			 const struct synproxy_options *opts, u32 recv_seq)
-{
-	struct synproxy_net *snet = synproxy_pernet(net);
-	struct sk_buff *nskb;
-	struct iphdr *iph, *niph;
-	struct tcphdr *nth;
-	unsigned int tcp_hdr_size;
-
-	iph = ip_hdr(skb);
-
-	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
-	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
-			 GFP_ATOMIC);
-	if (nskb == NULL)
-		return;
-	skb_reserve(nskb, MAX_TCP_HEADER);
-
-	niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr);
-
-	skb_reset_transport_header(nskb);
-	nth = skb_put(nskb, tcp_hdr_size);
-	nth->source	= th->source;
-	nth->dest	= th->dest;
-	nth->seq	= htonl(recv_seq - 1);
-	/* ack_seq is used to relay our ISN to the synproxy hook to initialize
-	 * sequence number translation once a connection tracking entry exists.
-	 */
-	nth->ack_seq	= htonl(ntohl(th->ack_seq) - 1);
-	tcp_flag_word(nth) = TCP_FLAG_SYN;
-	if (opts->options & XT_SYNPROXY_OPT_ECN)
-		tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
-	nth->doff	= tcp_hdr_size / 4;
-	nth->window	= th->window;
-	nth->check	= 0;
-	nth->urg_ptr	= 0;
-
-	synproxy_build_options(nth, opts);
-
-	synproxy_send_tcp(net, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
-			  niph, nth, tcp_hdr_size);
-}
-
-static void
-synproxy_send_server_ack(struct net *net,
-			 const struct ip_ct_tcp *state,
-			 const struct sk_buff *skb, const struct tcphdr *th,
-			 const struct synproxy_options *opts)
-{
-	struct sk_buff *nskb;
-	struct iphdr *iph, *niph;
-	struct tcphdr *nth;
-	unsigned int tcp_hdr_size;
-
-	iph = ip_hdr(skb);
-
-	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
-	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
-			 GFP_ATOMIC);
-	if (nskb == NULL)
-		return;
-	skb_reserve(nskb, MAX_TCP_HEADER);
-
-	niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr);
-
-	skb_reset_transport_header(nskb);
-	nth = skb_put(nskb, tcp_hdr_size);
-	nth->source	= th->dest;
-	nth->dest	= th->source;
-	nth->seq	= htonl(ntohl(th->ack_seq));
-	nth->ack_seq	= htonl(ntohl(th->seq) + 1);
-	tcp_flag_word(nth) = TCP_FLAG_ACK;
-	nth->doff	= tcp_hdr_size / 4;
-	nth->window	= htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
-	nth->check	= 0;
-	nth->urg_ptr	= 0;
-
-	synproxy_build_options(nth, opts);
-
-	synproxy_send_tcp(net, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
-}
-
-static void
-synproxy_send_client_ack(struct net *net,
-			 const struct sk_buff *skb, const struct tcphdr *th,
-			 const struct synproxy_options *opts)
-{
-	struct sk_buff *nskb;
-	struct iphdr *iph, *niph;
-	struct tcphdr *nth;
-	unsigned int tcp_hdr_size;
-
-	iph = ip_hdr(skb);
-
-	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
-	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
-			 GFP_ATOMIC);
-	if (nskb == NULL)
-		return;
-	skb_reserve(nskb, MAX_TCP_HEADER);
-
-	niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr);
-
-	skb_reset_transport_header(nskb);
-	nth = skb_put(nskb, tcp_hdr_size);
-	nth->source	= th->source;
-	nth->dest	= th->dest;
-	nth->seq	= htonl(ntohl(th->seq) + 1);
-	nth->ack_seq	= th->ack_seq;
-	tcp_flag_word(nth) = TCP_FLAG_ACK;
-	nth->doff	= tcp_hdr_size / 4;
-	nth->window	= htons(ntohs(th->window) >> opts->wscale);
-	nth->check	= 0;
-	nth->urg_ptr	= 0;
-
-	synproxy_build_options(nth, opts);
-
-	synproxy_send_tcp(net, skb, nskb, skb_nfct(skb),
-			  IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size);
-}
-
-static bool
-synproxy_recv_client_ack(struct net *net,
-			 const struct sk_buff *skb, const struct tcphdr *th,
-			 struct synproxy_options *opts, u32 recv_seq)
-{
-	struct synproxy_net *snet = synproxy_pernet(net);
-	int mss;
-
-	mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1);
-	if (mss == 0) {
-		this_cpu_inc(snet->stats->cookie_invalid);
-		return false;
-	}
-
-	this_cpu_inc(snet->stats->cookie_valid);
-	opts->mss = mss;
-	opts->options |= XT_SYNPROXY_OPT_MSS;
-
-	if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
-		synproxy_check_timestamp_cookie(opts);
-
-	synproxy_send_server_syn(net, skb, th, opts, recv_seq);
-	return true;
-}
+#include <net/netfilter/nf_synproxy.h>
 
 static unsigned int
 synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
@@ -286,6 +36,8 @@
 			opts.options |= XT_SYNPROXY_OPT_ECN;
 
 		opts.options &= info->options;
+		opts.mss_encode = opts.mss_option;
+		opts.mss_option = info->mss;
 		if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
 			synproxy_init_timestamp_cookie(info, &opts);
 		else
@@ -309,135 +61,6 @@
 	return XT_CONTINUE;
 }
 
-static unsigned int ipv4_synproxy_hook(void *priv,
-				       struct sk_buff *skb,
-				       const struct nf_hook_state *nhs)
-{
-	struct net *net = nhs->net;
-	struct synproxy_net *snet = synproxy_pernet(net);
-	enum ip_conntrack_info ctinfo;
-	struct nf_conn *ct;
-	struct nf_conn_synproxy *synproxy;
-	struct synproxy_options opts = {};
-	const struct ip_ct_tcp *state;
-	struct tcphdr *th, _th;
-	unsigned int thoff;
-
-	ct = nf_ct_get(skb, &ctinfo);
-	if (ct == NULL)
-		return NF_ACCEPT;
-
-	synproxy = nfct_synproxy(ct);
-	if (synproxy == NULL)
-		return NF_ACCEPT;
-
-	if (nf_is_loopback_packet(skb) ||
-	    ip_hdr(skb)->protocol != IPPROTO_TCP)
-		return NF_ACCEPT;
-
-	thoff = ip_hdrlen(skb);
-	th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
-	if (th == NULL)
-		return NF_DROP;
-
-	state = &ct->proto.tcp;
-	switch (state->state) {
-	case TCP_CONNTRACK_CLOSE:
-		if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
-			nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
-						      ntohl(th->seq) + 1);
-			break;
-		}
-
-		if (!th->syn || th->ack ||
-		    CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
-			break;
-
-		/* Reopened connection - reset the sequence number and timestamp
-		 * adjustments, they will get initialized once the connection is
-		 * reestablished.
-		 */
-		nf_ct_seqadj_init(ct, ctinfo, 0);
-		synproxy->tsoff = 0;
-		this_cpu_inc(snet->stats->conn_reopened);
-
-		/* fall through */
-	case TCP_CONNTRACK_SYN_SENT:
-		if (!synproxy_parse_options(skb, thoff, th, &opts))
-			return NF_DROP;
-
-		if (!th->syn && th->ack &&
-		    CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
-			/* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
-			 * therefore we need to add 1 to make the SYN sequence
-			 * number match the one of first SYN.
-			 */
-			if (synproxy_recv_client_ack(net, skb, th, &opts,
-						     ntohl(th->seq) + 1)) {
-				this_cpu_inc(snet->stats->cookie_retrans);
-				consume_skb(skb);
-				return NF_STOLEN;
-			} else {
-				return NF_DROP;
-			}
-		}
-
-		synproxy->isn = ntohl(th->ack_seq);
-		if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
-			synproxy->its = opts.tsecr;
-
-		nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
-		break;
-	case TCP_CONNTRACK_SYN_RECV:
-		if (!th->syn || !th->ack)
-			break;
-
-		if (!synproxy_parse_options(skb, thoff, th, &opts))
-			return NF_DROP;
-
-		if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) {
-			synproxy->tsoff = opts.tsval - synproxy->its;
-			nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
-		}
-
-		opts.options &= ~(XT_SYNPROXY_OPT_MSS |
-				  XT_SYNPROXY_OPT_WSCALE |
-				  XT_SYNPROXY_OPT_SACK_PERM);
-
-		swap(opts.tsval, opts.tsecr);
-		synproxy_send_server_ack(net, state, skb, th, &opts);
-
-		nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
-		nf_conntrack_event_cache(IPCT_SEQADJ, ct);
-
-		swap(opts.tsval, opts.tsecr);
-		synproxy_send_client_ack(net, skb, th, &opts);
-
-		consume_skb(skb);
-		return NF_STOLEN;
-	default:
-		break;
-	}
-
-	synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
-	return NF_ACCEPT;
-}
-
-static const struct nf_hook_ops ipv4_synproxy_ops[] = {
-	{
-		.hook		= ipv4_synproxy_hook,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_LOCAL_IN,
-		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM - 1,
-	},
-	{
-		.hook		= ipv4_synproxy_hook,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_POST_ROUTING,
-		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM - 1,
-	},
-};
-
 static int synproxy_tg4_check(const struct xt_tgchk_param *par)
 {
 	struct synproxy_net *snet = synproxy_pernet(par->net);
@@ -452,16 +75,12 @@
 	if (err)
 		return err;
 
-	if (snet->hook_ref4 == 0) {
-		err = nf_register_net_hooks(par->net, ipv4_synproxy_ops,
-					    ARRAY_SIZE(ipv4_synproxy_ops));
-		if (err) {
-			nf_ct_netns_put(par->net, par->family);
-			return err;
-		}
+	err = nf_synproxy_ipv4_init(snet, par->net);
+	if (err) {
+		nf_ct_netns_put(par->net, par->family);
+		return err;
 	}
 
-	snet->hook_ref4++;
 	return err;
 }
 
@@ -469,10 +88,7 @@
 {
 	struct synproxy_net *snet = synproxy_pernet(par->net);
 
-	snet->hook_ref4--;
-	if (snet->hook_ref4 == 0)
-		nf_unregister_net_hooks(par->net, ipv4_synproxy_ops,
-					ARRAY_SIZE(ipv4_synproxy_ops));
+	nf_synproxy_ipv4_fini(snet, par->net);
 	nf_ct_netns_put(par->net, par->family);
 }
 

diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 7c6c20e..161ba41 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Kernel module to match AH parameters. */
 /* (C) 1999-2000 Yon Uriarte <yon@astaro.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/in.h>

diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 12843c9..cc23f1c 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2011 Florian Westphal <fw@strlen.de>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * based on fib_frontend.c; Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -36,7 +33,6 @@
 				const struct net_device *dev, u8 flags)
 {
 	struct fib_result res;
-	bool dev_match;
 	int ret __maybe_unused;
 
 	if (fib_lookup(net, fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE))
@@ -46,21 +42,7 @@
 		if (res.type != RTN_LOCAL || !(flags & XT_RPFILTER_ACCEPT_LOCAL))
 			return false;
 	}
-	dev_match = false;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-	for (ret = 0; ret < res.fi->fib_nhs; ret++) {
-		struct fib_nh *nh = &res.fi->fib_nh[ret];
-
-		if (nh->nh_dev == dev) {
-			dev_match = true;
-			break;
-		}
-	}
-#else
-	if (FIB_RES_DEV(res) == dev)
-		dev_match = true;
-#endif
-	return dev_match || flags & XT_RPFILTER_LOOSE;
+	return fib_info_nh_uses_dev(res.fi, dev) || flags & XT_RPFILTER_LOOSE;
 }
 
 static bool
@@ -96,6 +78,7 @@
 	flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
 	flow.flowi4_tos = RT_TOS(iph->tos);
 	flow.flowi4_scope = RT_SCOPE_UNIVERSE;
+	flow.flowi4_oif = l3mdev_master_ifindex_rcu(xt_in(par));
 
 	return rpfilter_lookup_reverse(xt_net(par), &flow, xt_in(par), info->flags) ^ invert;
 }

diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 9ac92ea..9d54b40 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x.
  *
  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
  * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 
 #include <linux/module.h>

diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index dea138c..bb9266e 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x.
  *
  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
  * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/module.h>
 #include <linux/netfilter_ipv4/ip_tables.h>

diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index a317445..ad33687 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
  * (C) 2011 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -15,8 +12,6 @@
 #include <net/ip.h>
 
 #include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l3proto.h>
 
 static int __net_init iptable_nat_table_init(struct net *net);
 
@@ -70,10 +65,10 @@
 	int i, ret;
 
 	for (i = 0; i < ARRAY_SIZE(nf_nat_ipv4_ops); i++) {
-		ret = nf_nat_l3proto_ipv4_register_fn(net, &nf_nat_ipv4_ops[i]);
+		ret = nf_nat_ipv4_register_fn(net, &nf_nat_ipv4_ops[i]);
 		if (ret) {
 			while (i)
-				nf_nat_l3proto_ipv4_unregister_fn(net, &nf_nat_ipv4_ops[--i]);
+				nf_nat_ipv4_unregister_fn(net, &nf_nat_ipv4_ops[--i]);
 
 			return ret;
 		}
@@ -87,7 +82,7 @@
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(nf_nat_ipv4_ops); i++)
-		nf_nat_l3proto_ipv4_unregister_fn(net, &nf_nat_ipv4_ops[i]);
+		nf_nat_ipv4_unregister_fn(net, &nf_nat_ipv4_ops[i]);
 }
 
 static int __net_init iptable_nat_table_init(struct net *net)

diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 960625a..69697eb 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c

@@ -1,7 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * 'raw' table, which is the very first hooked in at PRE_ROUTING and LOCAL_OUT .
  *
- * Copyright (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Copyright (C) 2003 Jozsef Kadlecsik <kadlec@netfilter.org>
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>

diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index e5379fe..ac633c1 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * "security" table
  *
@@ -10,10 +11,6 @@
  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
  * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org>
  * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/module.h>
 #include <linux/netfilter_ipv4/ip_tables.h>

diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index a0d3ad6..8115611 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/types.h>

diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c
index 39895b9..6cc5743 100644
--- a/net/ipv4/netfilter/nf_dup_ipv4.c
+++ b/net/ipv4/netfilter/nf_dup_ipv4.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * (C) 2007 by Sebastian Claßen <sebastian.classen@freenet.ag>
  * (C) 2007-2010 by Jan Engelhardt <jengelh@medozas.de>
  *
  * Extracted from xt_TEE.c
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 or later, as
- * published by the Free Software Foundation.
  */
 #include <linux/ip.h>
 #include <linux/module.h>
@@ -68,7 +65,7 @@
 
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 	/* Avoid counting cloned packets towards the original connection. */
-	nf_reset(skb);
+	nf_reset_ct(skb);
 	nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
 #endif
 	/*

diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c
index e1e56d7..012c404 100644
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>

diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c
index df5c2a2..7a83f88 100644
--- a/net/ipv4/netfilter/nf_log_arp.c
+++ b/net/ipv4/netfilter/nf_log_arp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * (C) 2014 by Pablo Neira Ayuso <pablo@netfilter.org>
  *
@@ -5,10 +6,6 @@
  *
  * Bart De Schuymer <bdschuym@pandora.be>
  * Harald Welte <laforge@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
index 1e6f28c..4b2d49c 100644
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ b/net/ipv4/netfilter/nf_log_ipv4.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 4e6b53a..3e2685c 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c

@@ -1,13 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * H.323 extension for NAT alteration.
  *
  * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
  * Copyright (c) 2006-2012 Patrick McHardy <kaber@trash.net>
  *
- * This source code is licensed under General Public License version 2.
- *
  * Based on the 'brute force' H.323 NAT module by
- * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Jozsef Kadlecsik <kadlec@netfilter.org>
  */
 
 #include <linux/module.h>
@@ -59,7 +58,7 @@
 			net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_udp_packet error\n");
 			return -1;
 		}
-		/* nf_nat_mangle_udp_packet uses skb_make_writable() to copy
+		/* nf_nat_mangle_udp_packet uses skb_ensure_writable() to copy
 		 * or pull everything in a linear buffer, so we can safely
 		 * use the skb pointers now */
 		*data = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
@@ -222,11 +221,11 @@
 		int ret;
 
 		rtp_exp->tuple.dst.u.udp.port = htons(nated_port);
-		ret = nf_ct_expect_related(rtp_exp);
+		ret = nf_ct_expect_related(rtp_exp, 0);
 		if (ret == 0) {
 			rtcp_exp->tuple.dst.u.udp.port =
 			    htons(nated_port + 1);
-			ret = nf_ct_expect_related(rtcp_exp);
+			ret = nf_ct_expect_related(rtcp_exp, 0);
 			if (ret == 0)
 				break;
 			else if (ret == -EBUSY) {
@@ -297,7 +296,7 @@
 		int ret;
 
 		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		ret = nf_ct_expect_related(exp);
+		ret = nf_ct_expect_related(exp, 0);
 		if (ret == 0)
 			break;
 		else if (ret != -EBUSY) {
@@ -353,7 +352,7 @@
 		int ret;
 
 		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		ret = nf_ct_expect_related(exp);
+		ret = nf_ct_expect_related(exp, 0);
 		if (ret == 0)
 			break;
 		else if (ret != -EBUSY) {
@@ -445,7 +444,7 @@
 		int ret;
 
 		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		ret = nf_ct_expect_related(exp);
+		ret = nf_ct_expect_related(exp, 0);
 		if (ret == 0)
 			break;
 		else if (ret != -EBUSY) {
@@ -538,7 +537,7 @@
 		int ret;
 
 		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		ret = nf_ct_expect_related(exp);
+		ret = nf_ct_expect_related(exp, 0);
 		if (ret == 0)
 			break;
 		else if (ret != -EBUSY) {
@@ -631,4 +630,4 @@
 MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>");
 MODULE_DESCRIPTION("H.323 NAT helper");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("ip_nat_h323");
+MODULE_ALIAS_NF_NAT_HELPER("h323");

diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
deleted file mode 100644
index 6115bf1..0000000
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ /dev/null

@@ -1,421 +0,0 @@
-/*
- * (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2011 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/icmp.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/secure_seq.h>
-#include <net/checksum.h>
-#include <net/route.h>
-#include <net/ip.h>
-
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-
-static const struct nf_nat_l3proto nf_nat_l3proto_ipv4;
-
-#ifdef CONFIG_XFRM
-static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
-				       const struct nf_conn *ct,
-				       enum ip_conntrack_dir dir,
-				       unsigned long statusbit,
-				       struct flowi *fl)
-{
-	const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
-	struct flowi4 *fl4 = &fl->u.ip4;
-
-	if (ct->status & statusbit) {
-		fl4->daddr = t->dst.u3.ip;
-		if (t->dst.protonum == IPPROTO_TCP ||
-		    t->dst.protonum == IPPROTO_UDP ||
-		    t->dst.protonum == IPPROTO_UDPLITE ||
-		    t->dst.protonum == IPPROTO_DCCP ||
-		    t->dst.protonum == IPPROTO_SCTP)
-			fl4->fl4_dport = t->dst.u.all;
-	}
-
-	statusbit ^= IPS_NAT_MASK;
-
-	if (ct->status & statusbit) {
-		fl4->saddr = t->src.u3.ip;
-		if (t->dst.protonum == IPPROTO_TCP ||
-		    t->dst.protonum == IPPROTO_UDP ||
-		    t->dst.protonum == IPPROTO_UDPLITE ||
-		    t->dst.protonum == IPPROTO_DCCP ||
-		    t->dst.protonum == IPPROTO_SCTP)
-			fl4->fl4_sport = t->src.u.all;
-	}
-}
-#endif /* CONFIG_XFRM */
-
-static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t,
-				 const struct nf_nat_range2 *range)
-{
-	return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) &&
-	       ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip);
-}
-
-static u32 nf_nat_ipv4_secure_port(const struct nf_conntrack_tuple *t,
-				   __be16 dport)
-{
-	return secure_ipv4_port_ephemeral(t->src.u3.ip, t->dst.u3.ip, dport);
-}
-
-static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
-				  unsigned int iphdroff,
-				  const struct nf_nat_l4proto *l4proto,
-				  const struct nf_conntrack_tuple *target,
-				  enum nf_nat_manip_type maniptype)
-{
-	struct iphdr *iph;
-	unsigned int hdroff;
-
-	if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
-		return false;
-
-	iph = (void *)skb->data + iphdroff;
-	hdroff = iphdroff + iph->ihl * 4;
-
-	if (!l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff, hdroff,
-				target, maniptype))
-		return false;
-	iph = (void *)skb->data + iphdroff;
-
-	if (maniptype == NF_NAT_MANIP_SRC) {
-		csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
-		iph->saddr = target->src.u3.ip;
-	} else {
-		csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
-		iph->daddr = target->dst.u3.ip;
-	}
-	return true;
-}
-
-static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
-				    unsigned int iphdroff, __sum16 *check,
-				    const struct nf_conntrack_tuple *t,
-				    enum nf_nat_manip_type maniptype)
-{
-	struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
-	__be32 oldip, newip;
-
-	if (maniptype == NF_NAT_MANIP_SRC) {
-		oldip = iph->saddr;
-		newip = t->src.u3.ip;
-	} else {
-		oldip = iph->daddr;
-		newip = t->dst.u3.ip;
-	}
-	inet_proto_csum_replace4(check, skb, oldip, newip, true);
-}
-
-static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
-				    u8 proto, void *data, __sum16 *check,
-				    int datalen, int oldlen)
-{
-	if (skb->ip_summed != CHECKSUM_PARTIAL) {
-		const struct iphdr *iph = ip_hdr(skb);
-
-		skb->ip_summed = CHECKSUM_PARTIAL;
-		skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
-			ip_hdrlen(skb);
-		skb->csum_offset = (void *)check - data;
-		*check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen,
-					    proto, 0);
-	} else
-		inet_proto_csum_replace2(check, skb,
-					 htons(oldlen), htons(datalen), true);
-}
-
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
-				       struct nf_nat_range2 *range)
-{
-	if (tb[CTA_NAT_V4_MINIP]) {
-		range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]);
-		range->flags |= NF_NAT_RANGE_MAP_IPS;
-	}
-
-	if (tb[CTA_NAT_V4_MAXIP])
-		range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]);
-	else
-		range->max_addr.ip = range->min_addr.ip;
-
-	return 0;
-}
-#endif
-
-static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
-	.l3proto		= NFPROTO_IPV4,
-	.in_range		= nf_nat_ipv4_in_range,
-	.secure_port		= nf_nat_ipv4_secure_port,
-	.manip_pkt		= nf_nat_ipv4_manip_pkt,
-	.csum_update		= nf_nat_ipv4_csum_update,
-	.csum_recalc		= nf_nat_ipv4_csum_recalc,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	.nlattr_to_range	= nf_nat_ipv4_nlattr_to_range,
-#endif
-#ifdef CONFIG_XFRM
-	.decode_session		= nf_nat_ipv4_decode_session,
-#endif
-};
-
-int nf_nat_icmp_reply_translation(struct sk_buff *skb,
-				  struct nf_conn *ct,
-				  enum ip_conntrack_info ctinfo,
-				  unsigned int hooknum)
-{
-	struct {
-		struct icmphdr	icmp;
-		struct iphdr	ip;
-	} *inside;
-	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
-	unsigned int hdrlen = ip_hdrlen(skb);
-	const struct nf_nat_l4proto *l4proto;
-	struct nf_conntrack_tuple target;
-	unsigned long statusbit;
-
-	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
-
-	if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
-		return 0;
-	if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
-		return 0;
-
-	inside = (void *)skb->data + hdrlen;
-	if (inside->icmp.type == ICMP_REDIRECT) {
-		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
-			return 0;
-		if (ct->status & IPS_NAT_MASK)
-			return 0;
-	}
-
-	if (manip == NF_NAT_MANIP_SRC)
-		statusbit = IPS_SRC_NAT;
-	else
-		statusbit = IPS_DST_NAT;
-
-	/* Invert if this is reply direction */
-	if (dir == IP_CT_DIR_REPLY)
-		statusbit ^= IPS_NAT_MASK;
-
-	if (!(ct->status & statusbit))
-		return 1;
-
-	l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, inside->ip.protocol);
-	if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
-				   l4proto, &ct->tuplehash[!dir].tuple, !manip))
-		return 0;
-
-	if (skb->ip_summed != CHECKSUM_PARTIAL) {
-		/* Reloading "inside" here since manip_pkt may reallocate */
-		inside = (void *)skb->data + hdrlen;
-		inside->icmp.checksum = 0;
-		inside->icmp.checksum =
-			csum_fold(skb_checksum(skb, hdrlen,
-					       skb->len - hdrlen, 0));
-	}
-
-	/* Change outer to look like the reply to an incoming packet */
-	nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
-	l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, 0);
-	if (!nf_nat_ipv4_manip_pkt(skb, 0, l4proto, &target, manip))
-		return 0;
-
-	return 1;
-}
-EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
-
-static unsigned int
-nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
-	       const struct nf_hook_state *state)
-{
-	struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
-
-	ct = nf_ct_get(skb, &ctinfo);
-	if (!ct)
-		return NF_ACCEPT;
-
-	if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
-		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
-			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
-							   state->hook))
-				return NF_DROP;
-			else
-				return NF_ACCEPT;
-		}
-	}
-
-	return nf_nat_inet_fn(priv, skb, state);
-}
-EXPORT_SYMBOL_GPL(nf_nat_ipv4_fn);
-
-static unsigned int
-nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
-	       const struct nf_hook_state *state)
-{
-	unsigned int ret;
-	__be32 daddr = ip_hdr(skb)->daddr;
-
-	ret = nf_nat_ipv4_fn(priv, skb, state);
-	if (ret != NF_DROP && ret != NF_STOLEN &&
-	    daddr != ip_hdr(skb)->daddr)
-		skb_dst_drop(skb);
-
-	return ret;
-}
-
-static unsigned int
-nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
-		const struct nf_hook_state *state)
-{
-#ifdef CONFIG_XFRM
-	const struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
-	int err;
-#endif
-	unsigned int ret;
-
-	ret = nf_nat_ipv4_fn(priv, skb, state);
-#ifdef CONFIG_XFRM
-	if (ret != NF_DROP && ret != NF_STOLEN &&
-	    !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
-	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
-		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
-		if ((ct->tuplehash[dir].tuple.src.u3.ip !=
-		     ct->tuplehash[!dir].tuple.dst.u3.ip) ||
-		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
-		     ct->tuplehash[dir].tuple.src.u.all !=
-		     ct->tuplehash[!dir].tuple.dst.u.all)) {
-			err = nf_xfrm_me_harder(state->net, skb, AF_INET);
-			if (err < 0)
-				ret = NF_DROP_ERR(err);
-		}
-	}
-#endif
-	return ret;
-}
-
-static unsigned int
-nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
-		     const struct nf_hook_state *state)
-{
-	const struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
-	unsigned int ret;
-	int err;
-
-	ret = nf_nat_ipv4_fn(priv, skb, state);
-	if (ret != NF_DROP && ret != NF_STOLEN &&
-	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
-		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
-		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
-		    ct->tuplehash[!dir].tuple.src.u3.ip) {
-			err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
-			if (err < 0)
-				ret = NF_DROP_ERR(err);
-		}
-#ifdef CONFIG_XFRM
-		else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
-			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
-			 ct->tuplehash[dir].tuple.dst.u.all !=
-			 ct->tuplehash[!dir].tuple.src.u.all) {
-			err = nf_xfrm_me_harder(state->net, skb, AF_INET);
-			if (err < 0)
-				ret = NF_DROP_ERR(err);
-		}
-#endif
-	}
-	return ret;
-}
-
-static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
-	/* Before packet filtering, change destination */
-	{
-		.hook		= nf_nat_ipv4_in,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_PRE_ROUTING,
-		.priority	= NF_IP_PRI_NAT_DST,
-	},
-	/* After packet filtering, change source */
-	{
-		.hook		= nf_nat_ipv4_out,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_POST_ROUTING,
-		.priority	= NF_IP_PRI_NAT_SRC,
-	},
-	/* Before packet filtering, change destination */
-	{
-		.hook		= nf_nat_ipv4_local_fn,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_LOCAL_OUT,
-		.priority	= NF_IP_PRI_NAT_DST,
-	},
-	/* After packet filtering, change source */
-	{
-		.hook		= nf_nat_ipv4_fn,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_LOCAL_IN,
-		.priority	= NF_IP_PRI_NAT_SRC,
-	},
-};
-
-int nf_nat_l3proto_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops)
-{
-	return nf_nat_register_fn(net, ops, nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
-}
-EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv4_register_fn);
-
-void nf_nat_l3proto_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
-{
-	nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
-}
-EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv4_unregister_fn);
-
-static int __init nf_nat_l3proto_ipv4_init(void)
-{
-	int err;
-
-	err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
-	if (err < 0)
-		goto err1;
-	err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv4);
-	if (err < 0)
-		goto err2;
-	return err;
-
-err2:
-	nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
-err1:
-	return err;
-}
-
-static void __exit nf_nat_l3proto_ipv4_exit(void)
-{
-	nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4);
-	nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
-}
-
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("nf-nat-" __stringify(AF_INET));
-
-module_init(nf_nat_l3proto_ipv4_init);
-module_exit(nf_nat_l3proto_ipv4_exit);

diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
deleted file mode 100644
index 4c7fcd3..0000000
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ /dev/null

@@ -1,180 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/atomic.h>
-#include <linux/inetdevice.h>
-#include <linux/ip.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <net/protocol.h>
-#include <net/ip.h>
-#include <net/checksum.h>
-#include <net/route.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/ipv4/nf_nat_masquerade.h>
-
-unsigned int
-nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
-		       const struct nf_nat_range2 *range,
-		       const struct net_device *out)
-{
-	struct nf_conn *ct;
-	struct nf_conn_nat *nat;
-	enum ip_conntrack_info ctinfo;
-	struct nf_nat_range2 newrange;
-	const struct rtable *rt;
-	__be32 newsrc, nh;
-
-	WARN_ON(hooknum != NF_INET_POST_ROUTING);
-
-	ct = nf_ct_get(skb, &ctinfo);
-
-	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
-			 ctinfo == IP_CT_RELATED_REPLY)));
-
-	/* Source address is 0.0.0.0 - locally generated packet that is
-	 * probably not supposed to be masqueraded.
-	 */
-	if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
-		return NF_ACCEPT;
-
-	rt = skb_rtable(skb);
-	nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
-	newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE);
-	if (!newsrc) {
-		pr_info("%s ate my IP address\n", out->name);
-		return NF_DROP;
-	}
-
-	nat = nf_ct_nat_ext_add(ct);
-	if (nat)
-		nat->masq_index = out->ifindex;
-
-	/* Transfer from original range. */
-	memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
-	memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
-	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
-	newrange.min_addr.ip = newsrc;
-	newrange.max_addr.ip = newsrc;
-	newrange.min_proto   = range->min_proto;
-	newrange.max_proto   = range->max_proto;
-
-	/* Hand modified range to generic setup. */
-	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
-}
-EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
-
-static int device_cmp(struct nf_conn *i, void *ifindex)
-{
-	const struct nf_conn_nat *nat = nfct_nat(i);
-
-	if (!nat)
-		return 0;
-	if (nf_ct_l3num(i) != NFPROTO_IPV4)
-		return 0;
-	return nat->masq_index == (int)(long)ifindex;
-}
-
-static int masq_device_event(struct notifier_block *this,
-			     unsigned long event,
-			     void *ptr)
-{
-	const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct net *net = dev_net(dev);
-
-	if (event == NETDEV_DOWN) {
-		/* Device was downed.  Search entire table for
-		 * conntracks which were associated with that device,
-		 * and forget them.
-		 */
-		WARN_ON(dev->ifindex == 0);
-
-		nf_ct_iterate_cleanup_net(net, device_cmp,
-					  (void *)(long)dev->ifindex, 0, 0);
-	}
-
-	return NOTIFY_DONE;
-}
-
-static int masq_inet_event(struct notifier_block *this,
-			   unsigned long event,
-			   void *ptr)
-{
-	struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
-	struct netdev_notifier_info info;
-
-	/* The masq_dev_notifier will catch the case of the device going
-	 * down.  So if the inetdev is dead and being destroyed we have
-	 * no work to do.  Otherwise this is an individual address removal
-	 * and we have to perform the flush.
-	 */
-	if (idev->dead)
-		return NOTIFY_DONE;
-
-	netdev_notifier_info_init(&info, idev->dev);
-	return masq_device_event(this, event, &info);
-}
-
-static struct notifier_block masq_dev_notifier = {
-	.notifier_call	= masq_device_event,
-};
-
-static struct notifier_block masq_inet_notifier = {
-	.notifier_call	= masq_inet_event,
-};
-
-static int masq_refcnt;
-static DEFINE_MUTEX(masq_mutex);
-
-int nf_nat_masquerade_ipv4_register_notifier(void)
-{
-	int ret = 0;
-
-	mutex_lock(&masq_mutex);
-	/* check if the notifier was already set */
-	if (++masq_refcnt > 1)
-		goto out_unlock;
-
-	/* Register for device down reports */
-	ret = register_netdevice_notifier(&masq_dev_notifier);
-	if (ret)
-		goto err_dec;
-	/* Register IP address change reports */
-	ret = register_inetaddr_notifier(&masq_inet_notifier);
-	if (ret)
-		goto err_unregister;
-
-	mutex_unlock(&masq_mutex);
-	return ret;
-
-err_unregister:
-	unregister_netdevice_notifier(&masq_dev_notifier);
-err_dec:
-	masq_refcnt--;
-out_unlock:
-	mutex_unlock(&masq_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier);
-
-void nf_nat_masquerade_ipv4_unregister_notifier(void)
-{
-	mutex_lock(&masq_mutex);
-	/* check if the notifier still has clients */
-	if (--masq_refcnt > 0)
-		goto out_unlock;
-
-	unregister_netdevice_notifier(&masq_dev_notifier);
-	unregister_inetaddr_notifier(&masq_inet_notifier);
-out_unlock:
-	mutex_unlock(&masq_mutex);
-}
-EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier);

diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 5d259a1..b2aeb7b 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * nf_nat_pptp.c
  *
@@ -37,7 +38,7 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
 MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP");
-MODULE_ALIAS("ip_nat_pptp");
+MODULE_ALIAS_NF_NAT_HELPER("pptp");
 
 static void pptp_nat_expected(struct nf_conn *ct,
 			      struct nf_conntrack_expect *exp)
@@ -299,8 +300,6 @@
 
 static int __init nf_nat_helper_pptp_init(void)
 {
-	nf_nat_need_gre();
-
 	BUG_ON(nf_nat_pptp_hook_outbound != NULL);
 	RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, pptp_outbound_pkt);
 

diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
deleted file mode 100644
index 00fda63..0000000
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ /dev/null

@@ -1,150 +0,0 @@
-/*
- * nf_nat_proto_gre.c
- *
- * NAT protocol helper module for GRE.
- *
- * GRE is a generic encapsulation protocol, which is generally not very
- * suited for NAT, as it has no protocol-specific part as port numbers.
- *
- * It has an optional key field, which may help us distinguishing two
- * connections between the same two hosts.
- *
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
- *
- * PPTP is built on top of a modified version of GRE, and has a mandatory
- * field called "CallID", which serves us for the same purpose as the key
- * field in plain GRE.
- *
- * Documentation about PPTP can be found in RFC 2637
- *
- * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
- *
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-#include <linux/netfilter/nf_conntrack_proto_gre.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
-
-/* generate unique tuple ... */
-static void
-gre_unique_tuple(const struct nf_nat_l3proto *l3proto,
-		 struct nf_conntrack_tuple *tuple,
-		 const struct nf_nat_range2 *range,
-		 enum nf_nat_manip_type maniptype,
-		 const struct nf_conn *ct)
-{
-	static u_int16_t key;
-	__be16 *keyptr;
-	unsigned int min, i, range_size;
-
-	/* If there is no master conntrack we are not PPTP,
-	   do not change tuples */
-	if (!ct->master)
-		return;
-
-	if (maniptype == NF_NAT_MANIP_SRC)
-		keyptr = &tuple->src.u.gre.key;
-	else
-		keyptr = &tuple->dst.u.gre.key;
-
-	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
-		pr_debug("%p: NATing GRE PPTP\n", ct);
-		min = 1;
-		range_size = 0xffff;
-	} else {
-		min = ntohs(range->min_proto.gre.key);
-		range_size = ntohs(range->max_proto.gre.key) - min + 1;
-	}
-
-	pr_debug("min = %u, range_size = %u\n", min, range_size);
-
-	for (i = 0; ; ++key) {
-		*keyptr = htons(min + key % range_size);
-		if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
-			return;
-	}
-
-	pr_debug("%p: no NAT mapping\n", ct);
-	return;
-}
-
-/* manipulate a GRE packet according to maniptype */
-static bool
-gre_manip_pkt(struct sk_buff *skb,
-	      const struct nf_nat_l3proto *l3proto,
-	      unsigned int iphdroff, unsigned int hdroff,
-	      const struct nf_conntrack_tuple *tuple,
-	      enum nf_nat_manip_type maniptype)
-{
-	const struct gre_base_hdr *greh;
-	struct pptp_gre_header *pgreh;
-
-	/* pgreh includes two optional 32bit fields which are not required
-	 * to be there.  That's where the magic '8' comes from */
-	if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8))
-		return false;
-
-	greh = (void *)skb->data + hdroff;
-	pgreh = (struct pptp_gre_header *)greh;
-
-	/* we only have destination manip of a packet, since 'source key'
-	 * is not present in the packet itself */
-	if (maniptype != NF_NAT_MANIP_DST)
-		return true;
-
-	switch (greh->flags & GRE_VERSION) {
-	case GRE_VERSION_0:
-		/* We do not currently NAT any GREv0 packets.
-		 * Try to behave like "nf_nat_proto_unknown" */
-		break;
-	case GRE_VERSION_1:
-		pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
-		pgreh->call_id = tuple->dst.u.gre.key;
-		break;
-	default:
-		pr_debug("can't nat unknown GRE version\n");
-		return false;
-	}
-	return true;
-}
-
-static const struct nf_nat_l4proto gre = {
-	.l4proto		= IPPROTO_GRE,
-	.manip_pkt		= gre_manip_pkt,
-	.in_range		= nf_nat_l4proto_in_range,
-	.unique_tuple		= gre_unique_tuple,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	.nlattr_to_range	= nf_nat_l4proto_nlattr_to_range,
-#endif
-};
-
-static int __init nf_nat_proto_gre_init(void)
-{
-	return nf_nat_l4proto_register(NFPROTO_IPV4, &gre);
-}
-
-static void __exit nf_nat_proto_gre_fini(void)
-{
-	nf_nat_l4proto_unregister(NFPROTO_IPV4, &gre);
-}
-
-module_init(nf_nat_proto_gre_init);
-module_exit(nf_nat_proto_gre_fini);
-
-void nf_nat_need_gre(void)
-{
-	return;
-}
-EXPORT_SYMBOL_GPL(nf_nat_need_gre);

diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
deleted file mode 100644
index 6d7cf1d..0000000
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ /dev/null

@@ -1,83 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/export.h>
-#include <linux/ip.h>
-#include <linux/icmp.h>
-
-#include <linux/netfilter.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-
-static bool
-icmp_in_range(const struct nf_conntrack_tuple *tuple,
-	      enum nf_nat_manip_type maniptype,
-	      const union nf_conntrack_man_proto *min,
-	      const union nf_conntrack_man_proto *max)
-{
-	return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
-	       ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
-}
-
-static void
-icmp_unique_tuple(const struct nf_nat_l3proto *l3proto,
-		  struct nf_conntrack_tuple *tuple,
-		  const struct nf_nat_range2 *range,
-		  enum nf_nat_manip_type maniptype,
-		  const struct nf_conn *ct)
-{
-	static u_int16_t id;
-	unsigned int range_size;
-	unsigned int i;
-
-	range_size = ntohs(range->max_proto.icmp.id) -
-		     ntohs(range->min_proto.icmp.id) + 1;
-	/* If no range specified... */
-	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
-		range_size = 0xFFFF;
-
-	for (i = 0; ; ++id) {
-		tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) +
-					     (id % range_size));
-		if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
-			return;
-	}
-	return;
-}
-
-static bool
-icmp_manip_pkt(struct sk_buff *skb,
-	       const struct nf_nat_l3proto *l3proto,
-	       unsigned int iphdroff, unsigned int hdroff,
-	       const struct nf_conntrack_tuple *tuple,
-	       enum nf_nat_manip_type maniptype)
-{
-	struct icmphdr *hdr;
-
-	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
-		return false;
-
-	hdr = (struct icmphdr *)(skb->data + hdroff);
-	inet_proto_csum_replace2(&hdr->checksum, skb,
-				 hdr->un.echo.id, tuple->src.u.icmp.id, false);
-	hdr->un.echo.id = tuple->src.u.icmp.id;
-	return true;
-}
-
-const struct nf_nat_l4proto nf_nat_l4proto_icmp = {
-	.l4proto		= IPPROTO_ICMP,
-	.manip_pkt		= icmp_manip_pkt,
-	.in_range		= icmp_in_range,
-	.unique_tuple		= icmp_unique_tuple,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	.nlattr_to_range	= nf_nat_l4proto_nlattr_to_range,
-#endif
-};

diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
index ac110c1..717b726 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * nf_nat_snmp_basic.c
  *
@@ -25,17 +26,6 @@
  *
  * Copyright (c) 2000 RP Internet (www.rpi.net.au).
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
  * Author: James Morris <jmorris@intercode.com.au>
  *
  * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>
@@ -60,6 +50,7 @@
 MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
 MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway");
 MODULE_ALIAS("ip_nat_snmp_basic");
+MODULE_ALIAS_NFCT_HELPER("snmp_trap");
 
 #define SNMP_PORT 161
 #define SNMP_TRAP_PORT 162
@@ -104,6 +95,8 @@
 int snmp_version(void *context, size_t hdrlen, unsigned char tag,
 		 const void *data, size_t datalen)
 {
+	if (datalen != 1)
+		return -EINVAL;
 	if (*(unsigned char *)data > 1)
 		return -ENOTSUPP;
 	return 1;
@@ -113,8 +106,11 @@
 		const void *data, size_t datalen)
 {
 	struct snmp_ctx *ctx = (struct snmp_ctx *)context;
-	__be32 *pdata = (__be32 *)data;
+	__be32 *pdata;
 
+	if (datalen != 4)
+		return -EINVAL;
+	pdata = (__be32 *)data;
 	if (*pdata == ctx->from) {
 		pr_debug("%s: %pI4 to %pI4\n", __func__,
 			 (void *)&ctx->from, (void *)&ctx->to);
@@ -190,7 +186,7 @@
 		return NF_DROP;
 	}
 
-	if (!skb_make_writable(skb, skb->len)) {
+	if (skb_ensure_writable(skb, skb->len)) {
 		nf_ct_helper_log(skb, ct, "cannot mangle packet");
 		return NF_DROP;
 	}

diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 5cd06ba..2361fda 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -102,6 +99,7 @@
 /* Send RST reply */
 void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
 {
+	struct net_device *br_indev __maybe_unused;
 	struct sk_buff *nskb;
 	struct iphdr *niph;
 	const struct tcphdr *oth;
@@ -147,10 +145,11 @@
 	 * build the eth header using the original destination's MAC as the
 	 * source, and send the RST packet directly.
 	 */
-	if (oldskb->nf_bridge) {
+	br_indev = nf_bridge_get_physindev(oldskb);
+	if (br_indev) {
 		struct ethhdr *oeth = eth_hdr(oldskb);
 
-		nskb->dev = nf_bridge_get_physindev(oldskb);
+		nskb->dev = br_indev;
 		niph->tot_len = htons(nskb->len);
 		ip_send_check(niph);
 		if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
@@ -171,21 +170,16 @@
 void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
 {
 	struct iphdr *iph = ip_hdr(skb_in);
-	u8 proto;
+	u8 proto = iph->protocol;
 
 	if (iph->frag_off & htons(IP_OFFSET))
 		return;
 
-	if (skb_csum_unnecessary(skb_in)) {
+	if (skb_csum_unnecessary(skb_in) || !nf_reject_verify_csum(proto)) {
 		icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
 		return;
 	}
 
-	if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP)
-		proto = iph->protocol;
-	else
-		proto = 0;
-
 	if (nf_ip_checksum(skb_in, hook, ip_hdrlen(skb_in), proto) == 0)
 		icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
 }

diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c
index 4824b1e..36a28d4 100644
--- a/net/ipv4/netfilter/nf_socket_ipv4.c
+++ b/net/ipv4/netfilter/nf_socket_ipv4.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2007-2008 BalaBit IT Ltd.
  * Author: Krisztian Kovacs
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>

diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c
index 1647141..b2bae0b 100644
--- a/net/ipv4/netfilter/nf_tproxy_ipv4.c
+++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2007-2008 BalaBit IT Ltd.
  * Author: Krisztian Kovacs
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 
 #include <net/netfilter/nf_tproxy.h>
@@ -53,6 +49,7 @@
 
 __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
 {
+	const struct in_ifaddr *ifa;
 	struct in_device *indev;
 	__be32 laddr;
 
@@ -61,10 +58,14 @@
 
 	laddr = 0;
 	indev = __in_dev_get_rcu(skb->dev);
-	for_primary_ifa(indev) {
+
+	in_dev_for_each_ifa_rcu(ifa, indev) {
+		if (ifa->ifa_flags & IFA_F_SECONDARY)
+			continue;
+
 		laddr = ifa->ifa_local;
 		break;
-	} endfor_ifa(indev);
+	}
 
 	return laddr ? laddr : daddr;
 }

diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
deleted file mode 100644
index a3c4ea3..0000000
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ /dev/null

@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
- * Copyright (c) 2012 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables_ipv4.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/ip.h>
-
-static unsigned int nft_nat_do_chain(void *priv,
-				     struct sk_buff *skb,
-				     const struct nf_hook_state *state)
-{
-	struct nft_pktinfo pkt;
-
-	nft_set_pktinfo(&pkt, skb, state);
-	nft_set_pktinfo_ipv4(&pkt, skb);
-
-	return nft_do_chain(&pkt, priv);
-}
-
-static int nft_nat_ipv4_reg(struct net *net, const struct nf_hook_ops *ops)
-{
-	return nf_nat_l3proto_ipv4_register_fn(net, ops);
-}
-
-static void nft_nat_ipv4_unreg(struct net *net, const struct nf_hook_ops *ops)
-{
-	nf_nat_l3proto_ipv4_unregister_fn(net, ops);
-}
-
-static const struct nft_chain_type nft_chain_nat_ipv4 = {
-	.name		= "nat",
-	.type		= NFT_CHAIN_T_NAT,
-	.family		= NFPROTO_IPV4,
-	.owner		= THIS_MODULE,
-	.hook_mask	= (1 << NF_INET_PRE_ROUTING) |
-			  (1 << NF_INET_POST_ROUTING) |
-			  (1 << NF_INET_LOCAL_OUT) |
-			  (1 << NF_INET_LOCAL_IN),
-	.hooks		= {
-		[NF_INET_PRE_ROUTING]	= nft_nat_do_chain,
-		[NF_INET_POST_ROUTING]	= nft_nat_do_chain,
-		[NF_INET_LOCAL_OUT]	= nft_nat_do_chain,
-		[NF_INET_LOCAL_IN]	= nft_nat_do_chain,
-	},
-	.ops_register = nft_nat_ipv4_reg,
-	.ops_unregister = nft_nat_ipv4_unreg,
-};
-
-static int __init nft_chain_nat_init(void)
-{
-	nft_register_chain_type(&nft_chain_nat_ipv4);
-
-	return 0;
-}
-
-static void __exit nft_chain_nat_exit(void)
-{
-	nft_unregister_chain_type(&nft_chain_nat_ipv4);
-}
-
-module_init(nft_chain_nat_init);
-module_exit(nft_chain_nat_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat");

diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
deleted file mode 100644
index 7d82934..0000000
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ /dev/null

@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/skbuff.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables_ipv4.h>
-#include <net/route.h>
-#include <net/ip.h>
-
-static unsigned int nf_route_table_hook(void *priv,
-					struct sk_buff *skb,
-					const struct nf_hook_state *state)
-{
-	unsigned int ret;
-	struct nft_pktinfo pkt;
-	u32 mark;
-	__be32 saddr, daddr;
-	u_int8_t tos;
-	const struct iphdr *iph;
-	int err;
-
-	nft_set_pktinfo(&pkt, skb, state);
-	nft_set_pktinfo_ipv4(&pkt, skb);
-
-	mark = skb->mark;
-	iph = ip_hdr(skb);
-	saddr = iph->saddr;
-	daddr = iph->daddr;
-	tos = iph->tos;
-
-	ret = nft_do_chain(&pkt, priv);
-	if (ret != NF_DROP && ret != NF_STOLEN) {
-		iph = ip_hdr(skb);
-
-		if (iph->saddr != saddr ||
-		    iph->daddr != daddr ||
-		    skb->mark != mark ||
-		    iph->tos != tos) {
-			err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
-			if (err < 0)
-				ret = NF_DROP_ERR(err);
-		}
-	}
-	return ret;
-}
-
-static const struct nft_chain_type nft_chain_route_ipv4 = {
-	.name		= "route",
-	.type		= NFT_CHAIN_T_ROUTE,
-	.family		= NFPROTO_IPV4,
-	.owner		= THIS_MODULE,
-	.hook_mask	= (1 << NF_INET_LOCAL_OUT),
-	.hooks		= {
-		[NF_INET_LOCAL_OUT]	= nf_route_table_hook,
-	},
-};
-
-static int __init nft_chain_route_init(void)
-{
-	nft_register_chain_type(&nft_chain_route_ipv4);
-
-	return 0;
-}
-
-static void __exit nft_chain_route_exit(void)
-{
-	nft_unregister_chain_type(&nft_chain_route_ipv4);
-}
-
-module_init(nft_chain_route_init);
-module_exit(nft_chain_route_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(AF_INET, "route");

diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c
index 0af3d8d..abf89b9 100644
--- a/net/ipv4/netfilter/nft_dup_ipv4.c
+++ b/net/ipv4/netfilter/nft_dup_ipv4.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
  */
 
 #include <linux/kernel.h>

diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index e50976e..ce29411 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c

@@ -1,8 +1,4 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0-only
 
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -58,11 +54,6 @@
 }
 EXPORT_SYMBOL_GPL(nft_fib4_eval_type);
 
-static int get_ifindex(const struct net_device *dev)
-{
-	return dev ? dev->ifindex : 0;
-}
-
 void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
 		   const struct nft_pktinfo *pkt)
 {
@@ -76,10 +67,7 @@
 		.flowi4_iif = LOOPBACK_IFINDEX,
 	};
 	const struct net_device *oif;
-	struct net_device *found;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-	int i;
-#endif
+	const struct net_device *found;
 
 	/*
 	 * Do not set flowi4_oif, it restricts results (for example, asking
@@ -97,8 +85,7 @@
 
 	if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
 	    nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
-		nft_fib_store_result(dest, priv, pkt,
-				     nft_in(pkt)->ifindex);
+		nft_fib_store_result(dest, priv, nft_in(pkt));
 		return;
 	}
 
@@ -111,8 +98,7 @@
 	if (ipv4_is_zeronet(iph->saddr)) {
 		if (ipv4_is_lbcast(iph->daddr) ||
 		    ipv4_is_local_multicast(iph->daddr)) {
-			nft_fib_store_result(dest, priv, pkt,
-					     get_ifindex(pkt->skb->dev));
+			nft_fib_store_result(dest, priv, pkt->skb->dev);
 			return;
 		}
 	}
@@ -146,36 +132,14 @@
 
        if (!oif) {
                found = FIB_RES_DEV(res);
-               goto ok;
-       }
+	} else {
+		if (!fib_info_nh_uses_dev(res.fi, oif))
+			return;
 
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-	for (i = 0; i < res.fi->fib_nhs; i++) {
-		struct fib_nh *nh = &res.fi->fib_nh[i];
+		found = oif;
+	}
 
-		if (nh->nh_dev == oif) {
-			found = nh->nh_dev;
-			goto ok;
-		}
-	}
-	return;
-#else
-	found = FIB_RES_DEV(res);
-	if (found != oif)
-		return;
-#endif
-ok:
-	switch (priv->result) {
-	case NFT_FIB_RESULT_OIF:
-		*dest = found->ifindex;
-		break;
-	case NFT_FIB_RESULT_OIFNAME:
-		strncpy((char *)dest, found->name, IFNAMSIZ);
-		break;
-	default:
-		WARN_ON_ONCE(1);
-		break;
-	}
+	nft_fib_store_result(dest, priv, found);
 }
 EXPORT_SYMBOL_GPL(nft_fib4_eval);
 

diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
deleted file mode 100644
index 6847de1..0000000
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ /dev/null

@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nft_masq.h>
-#include <net/netfilter/ipv4/nf_nat_masquerade.h>
-
-static void nft_masq_ipv4_eval(const struct nft_expr *expr,
-			       struct nft_regs *regs,
-			       const struct nft_pktinfo *pkt)
-{
-	struct nft_masq *priv = nft_expr_priv(expr);
-	struct nf_nat_range2 range;
-
-	memset(&range, 0, sizeof(range));
-	range.flags = priv->flags;
-	if (priv->sreg_proto_min) {
-		range.min_proto.all = (__force __be16)nft_reg_load16(
-			&regs->data[priv->sreg_proto_min]);
-		range.max_proto.all = (__force __be16)nft_reg_load16(
-			&regs->data[priv->sreg_proto_max]);
-	}
-	regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt),
-						    &range, nft_out(pkt));
-}
-
-static void
-nft_masq_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
-{
-	nf_ct_netns_put(ctx->net, NFPROTO_IPV4);
-}
-
-static struct nft_expr_type nft_masq_ipv4_type;
-static const struct nft_expr_ops nft_masq_ipv4_ops = {
-	.type		= &nft_masq_ipv4_type,
-	.size		= NFT_EXPR_SIZE(sizeof(struct nft_masq)),
-	.eval		= nft_masq_ipv4_eval,
-	.init		= nft_masq_init,
-	.destroy	= nft_masq_ipv4_destroy,
-	.dump		= nft_masq_dump,
-	.validate	= nft_masq_validate,
-};
-
-static struct nft_expr_type nft_masq_ipv4_type __read_mostly = {
-	.family		= NFPROTO_IPV4,
-	.name		= "masq",
-	.ops		= &nft_masq_ipv4_ops,
-	.policy		= nft_masq_policy,
-	.maxattr	= NFTA_MASQ_MAX,
-	.owner		= THIS_MODULE,
-};
-
-static int __init nft_masq_ipv4_module_init(void)
-{
-	int ret;
-
-	ret = nft_register_expr(&nft_masq_ipv4_type);
-	if (ret < 0)
-		return ret;
-
-	ret = nf_nat_masquerade_ipv4_register_notifier();
-	if (ret)
-		nft_unregister_expr(&nft_masq_ipv4_type);
-
-	return ret;
-}
-
-static void __exit nft_masq_ipv4_module_exit(void)
-{
-	nft_unregister_expr(&nft_masq_ipv4_type);
-	nf_nat_masquerade_ipv4_unregister_notifier();
-}
-
-module_init(nft_masq_ipv4_module_init);
-module_exit(nft_masq_ipv4_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org");
-MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "masq");

diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c
deleted file mode 100644
index 5120be1..0000000
--- a/net/ipv4/netfilter/nft_redir_ipv4.c
+++ /dev/null

@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_redirect.h>
-#include <net/netfilter/nft_redir.h>
-
-static void nft_redir_ipv4_eval(const struct nft_expr *expr,
-				struct nft_regs *regs,
-				const struct nft_pktinfo *pkt)
-{
-	struct nft_redir *priv = nft_expr_priv(expr);
-	struct nf_nat_ipv4_multi_range_compat mr;
-
-	memset(&mr, 0, sizeof(mr));
-	if (priv->sreg_proto_min) {
-		mr.range[0].min.all = (__force __be16)nft_reg_load16(
-			&regs->data[priv->sreg_proto_min]);
-		mr.range[0].max.all = (__force __be16)nft_reg_load16(
-			&regs->data[priv->sreg_proto_max]);
-		mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
-	}
-
-	mr.range[0].flags |= priv->flags;
-
-	regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, nft_hook(pkt));
-}
-
-static void
-nft_redir_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
-{
-	nf_ct_netns_put(ctx->net, NFPROTO_IPV4);
-}
-
-static struct nft_expr_type nft_redir_ipv4_type;
-static const struct nft_expr_ops nft_redir_ipv4_ops = {
-	.type		= &nft_redir_ipv4_type,
-	.size		= NFT_EXPR_SIZE(sizeof(struct nft_redir)),
-	.eval		= nft_redir_ipv4_eval,
-	.init		= nft_redir_init,
-	.destroy	= nft_redir_ipv4_destroy,
-	.dump		= nft_redir_dump,
-	.validate	= nft_redir_validate,
-};
-
-static struct nft_expr_type nft_redir_ipv4_type __read_mostly = {
-	.family		= NFPROTO_IPV4,
-	.name		= "redir",
-	.ops		= &nft_redir_ipv4_ops,
-	.policy		= nft_redir_policy,
-	.maxattr	= NFTA_REDIR_MAX,
-	.owner		= THIS_MODULE,
-};
-
-static int __init nft_redir_ipv4_module_init(void)
-{
-	return nft_register_expr(&nft_redir_ipv4_type);
-}
-
-static void __exit nft_redir_ipv4_module_exit(void)
-{
-	nft_unregister_expr(&nft_redir_ipv4_type);
-}
-
-module_init(nft_redir_ipv4_module_init);
-module_exit(nft_redir_ipv4_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
-MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "redir");

diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
index 517ce93..7e6fd5c 100644
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
  * Copyright (c) 2013 Eric Leblond <eric@regit.org>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 

diff --git a/net/ipv4/netlink.c b/net/ipv4/netlink.c
index f86bb4f..b920e1b 100644
--- a/net/ipv4/netlink.c
+++ b/net/ipv4/netlink.c

@@ -1,11 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/netlink.h>
 #include <linux/rtnetlink.h>
 #include <linux/types.h>
 #include <net/net_namespace.h>
 #include <net/netlink.h>
+#include <linux/in6.h>
 #include <net/ip.h>
 
-int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto,
+int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, u8 family,
 				struct netlink_ext_ack *extack)
 {
 	*ip_proto = nla_get_u8(attr);
@@ -13,11 +15,19 @@
 	switch (*ip_proto) {
 	case IPPROTO_TCP:
 	case IPPROTO_UDP:
-	case IPPROTO_ICMP:
 		return 0;
-	default:
-		NL_SET_ERR_MSG(extack, "Unsupported ip proto");
-		return -EOPNOTSUPP;
+	case IPPROTO_ICMP:
+		if (family != AF_INET)
+			break;
+		return 0;
+#if IS_ENABLED(CONFIG_IPV6)
+	case IPPROTO_ICMPV6:
+		if (family != AF_INET6)
+			break;
+		return 0;
+#endif
 	}
+	NL_SET_ERR_MSG(extack, "Unsupported ip proto");
+	return -EOPNOTSUPP;
 }
 EXPORT_SYMBOL_GPL(rtm_getroute_parse_ip_proto);

diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
new file mode 100644
index 0000000..fc34fd1
--- /dev/null
+++ b/net/ipv4/nexthop.c

@@ -0,0 +1,1828 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Generic nexthop implementation
+ *
+ * Copyright (c) 2017-19 Cumulus Networks
+ * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com>
+ */
+
+#include <linux/nexthop.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+#include <net/arp.h>
+#include <net/ipv6_stubs.h>
+#include <net/lwtunnel.h>
+#include <net/ndisc.h>
+#include <net/nexthop.h>
+#include <net/route.h>
+#include <net/sock.h>
+
+static void remove_nexthop(struct net *net, struct nexthop *nh,
+			   struct nl_info *nlinfo);
+
+#define NH_DEV_HASHBITS  8
+#define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)
+
+static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = {
+	[NHA_UNSPEC]		= { .strict_start_type = NHA_UNSPEC + 1 },
+	[NHA_ID]		= { .type = NLA_U32 },
+	[NHA_GROUP]		= { .type = NLA_BINARY },
+	[NHA_GROUP_TYPE]	= { .type = NLA_U16 },
+	[NHA_BLACKHOLE]		= { .type = NLA_FLAG },
+	[NHA_OIF]		= { .type = NLA_U32 },
+	[NHA_GATEWAY]		= { .type = NLA_BINARY },
+	[NHA_ENCAP_TYPE]	= { .type = NLA_U16 },
+	[NHA_ENCAP]		= { .type = NLA_NESTED },
+	[NHA_GROUPS]		= { .type = NLA_FLAG },
+	[NHA_MASTER]		= { .type = NLA_U32 },
+};
+
+static unsigned int nh_dev_hashfn(unsigned int val)
+{
+	unsigned int mask = NH_DEV_HASHSIZE - 1;
+
+	return (val ^
+		(val >> NH_DEV_HASHBITS) ^
+		(val >> (NH_DEV_HASHBITS * 2))) & mask;
+}
+
+static void nexthop_devhash_add(struct net *net, struct nh_info *nhi)
+{
+	struct net_device *dev = nhi->fib_nhc.nhc_dev;
+	struct hlist_head *head;
+	unsigned int hash;
+
+	WARN_ON(!dev);
+
+	hash = nh_dev_hashfn(dev->ifindex);
+	head = &net->nexthop.devhash[hash];
+	hlist_add_head(&nhi->dev_hash, head);
+}
+
+static void nexthop_free_mpath(struct nexthop *nh)
+{
+	struct nh_group *nhg;
+	int i;
+
+	nhg = rcu_dereference_raw(nh->nh_grp);
+	for (i = 0; i < nhg->num_nh; ++i)
+		WARN_ON(nhg->nh_entries[i].nh);
+
+	kfree(nhg);
+}
+
+static void nexthop_free_single(struct nexthop *nh)
+{
+	struct nh_info *nhi;
+
+	nhi = rcu_dereference_raw(nh->nh_info);
+	switch (nhi->family) {
+	case AF_INET:
+		fib_nh_release(nh->net, &nhi->fib_nh);
+		break;
+	case AF_INET6:
+		ipv6_stub->fib6_nh_release(&nhi->fib6_nh);
+		break;
+	}
+	kfree(nhi);
+}
+
+void nexthop_free_rcu(struct rcu_head *head)
+{
+	struct nexthop *nh = container_of(head, struct nexthop, rcu);
+
+	if (nh->is_group)
+		nexthop_free_mpath(nh);
+	else
+		nexthop_free_single(nh);
+
+	kfree(nh);
+}
+EXPORT_SYMBOL_GPL(nexthop_free_rcu);
+
+static struct nexthop *nexthop_alloc(void)
+{
+	struct nexthop *nh;
+
+	nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL);
+	if (nh) {
+		INIT_LIST_HEAD(&nh->fi_list);
+		INIT_LIST_HEAD(&nh->f6i_list);
+		INIT_LIST_HEAD(&nh->grp_list);
+	}
+	return nh;
+}
+
+static struct nh_group *nexthop_grp_alloc(u16 num_nh)
+{
+	size_t sz = offsetof(struct nexthop, nh_grp)
+		    + sizeof(struct nh_group)
+		    + sizeof(struct nh_grp_entry) * num_nh;
+	struct nh_group *nhg;
+
+	nhg = kzalloc(sz, GFP_KERNEL);
+	if (nhg)
+		nhg->num_nh = num_nh;
+
+	return nhg;
+}
+
+static void nh_base_seq_inc(struct net *net)
+{
+	while (++net->nexthop.seq == 0)
+		;
+}
+
+/* no reference taken; rcu lock or rtnl must be held */
+struct nexthop *nexthop_find_by_id(struct net *net, u32 id)
+{
+	struct rb_node **pp, *parent = NULL, *next;
+
+	pp = &net->nexthop.rb_root.rb_node;
+	while (1) {
+		struct nexthop *nh;
+
+		next = rcu_dereference_raw(*pp);
+		if (!next)
+			break;
+		parent = next;
+
+		nh = rb_entry(parent, struct nexthop, rb_node);
+		if (id < nh->id)
+			pp = &next->rb_left;
+		else if (id > nh->id)
+			pp = &next->rb_right;
+		else
+			return nh;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(nexthop_find_by_id);
+
+/* used for auto id allocation; called with rtnl held */
+static u32 nh_find_unused_id(struct net *net)
+{
+	u32 id_start = net->nexthop.last_id_allocated;
+
+	while (1) {
+		net->nexthop.last_id_allocated++;
+		if (net->nexthop.last_id_allocated == id_start)
+			break;
+
+		if (!nexthop_find_by_id(net, net->nexthop.last_id_allocated))
+			return net->nexthop.last_id_allocated;
+	}
+	return 0;
+}
+
+static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
+{
+	struct nexthop_grp *p;
+	size_t len = nhg->num_nh * sizeof(*p);
+	struct nlattr *nla;
+	u16 group_type = 0;
+	int i;
+
+	if (nhg->mpath)
+		group_type = NEXTHOP_GRP_TYPE_MPATH;
+
+	if (nla_put_u16(skb, NHA_GROUP_TYPE, group_type))
+		goto nla_put_failure;
+
+	nla = nla_reserve(skb, NHA_GROUP, len);
+	if (!nla)
+		goto nla_put_failure;
+
+	p = nla_data(nla);
+	for (i = 0; i < nhg->num_nh; ++i) {
+		p->id = nhg->nh_entries[i].nh->id;
+		p->weight = nhg->nh_entries[i].weight - 1;
+		p += 1;
+	}
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
+			int event, u32 portid, u32 seq, unsigned int nlflags)
+{
+	struct fib6_nh *fib6_nh;
+	struct fib_nh *fib_nh;
+	struct nlmsghdr *nlh;
+	struct nh_info *nhi;
+	struct nhmsg *nhm;
+
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	nhm = nlmsg_data(nlh);
+	nhm->nh_family = AF_UNSPEC;
+	nhm->nh_flags = nh->nh_flags;
+	nhm->nh_protocol = nh->protocol;
+	nhm->nh_scope = 0;
+	nhm->resvd = 0;
+
+	if (nla_put_u32(skb, NHA_ID, nh->id))
+		goto nla_put_failure;
+
+	if (nh->is_group) {
+		struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+
+		if (nla_put_nh_group(skb, nhg))
+			goto nla_put_failure;
+		goto out;
+	}
+
+	nhi = rtnl_dereference(nh->nh_info);
+	nhm->nh_family = nhi->family;
+	if (nhi->reject_nh) {
+		if (nla_put_flag(skb, NHA_BLACKHOLE))
+			goto nla_put_failure;
+		goto out;
+	} else {
+		const struct net_device *dev;
+
+		dev = nhi->fib_nhc.nhc_dev;
+		if (dev && nla_put_u32(skb, NHA_OIF, dev->ifindex))
+			goto nla_put_failure;
+	}
+
+	nhm->nh_scope = nhi->fib_nhc.nhc_scope;
+	switch (nhi->family) {
+	case AF_INET:
+		fib_nh = &nhi->fib_nh;
+		if (fib_nh->fib_nh_gw_family &&
+		    nla_put_u32(skb, NHA_GATEWAY, fib_nh->fib_nh_gw4))
+			goto nla_put_failure;
+		break;
+
+	case AF_INET6:
+		fib6_nh = &nhi->fib6_nh;
+		if (fib6_nh->fib_nh_gw_family &&
+		    nla_put_in6_addr(skb, NHA_GATEWAY, &fib6_nh->fib_nh_gw6))
+			goto nla_put_failure;
+		break;
+	}
+
+	if (nhi->fib_nhc.nhc_lwtstate &&
+	    lwtunnel_fill_encap(skb, nhi->fib_nhc.nhc_lwtstate,
+				NHA_ENCAP, NHA_ENCAP_TYPE) < 0)
+		goto nla_put_failure;
+
+out:
+	nlmsg_end(skb, nlh);
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static size_t nh_nlmsg_size_grp(struct nexthop *nh)
+{
+	struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+	size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh;
+
+	return nla_total_size(sz) +
+	       nla_total_size(2);  /* NHA_GROUP_TYPE */
+}
+
+static size_t nh_nlmsg_size_single(struct nexthop *nh)
+{
+	struct nh_info *nhi = rtnl_dereference(nh->nh_info);
+	size_t sz;
+
+	/* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE
+	 * are mutually exclusive
+	 */
+	sz = nla_total_size(4);  /* NHA_OIF */
+
+	switch (nhi->family) {
+	case AF_INET:
+		if (nhi->fib_nh.fib_nh_gw_family)
+			sz += nla_total_size(4);  /* NHA_GATEWAY */
+		break;
+
+	case AF_INET6:
+		/* NHA_GATEWAY */
+		if (nhi->fib6_nh.fib_nh_gw_family)
+			sz += nla_total_size(sizeof(const struct in6_addr));
+		break;
+	}
+
+	if (nhi->fib_nhc.nhc_lwtstate) {
+		sz += lwtunnel_get_encap_size(nhi->fib_nhc.nhc_lwtstate);
+		sz += nla_total_size(2);  /* NHA_ENCAP_TYPE */
+	}
+
+	return sz;
+}
+
+static size_t nh_nlmsg_size(struct nexthop *nh)
+{
+	size_t sz = nla_total_size(4);    /* NHA_ID */
+
+	if (nh->is_group)
+		sz += nh_nlmsg_size_grp(nh);
+	else
+		sz += nh_nlmsg_size_single(nh);
+
+	return sz;
+}
+
+static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
+{
+	unsigned int nlflags = info->nlh ? info->nlh->nlmsg_flags : 0;
+	u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
+	struct sk_buff *skb;
+	int err = -ENOBUFS;
+
+	skb = nlmsg_new(nh_nlmsg_size(nh), gfp_any());
+	if (!skb)
+		goto errout;
+
+	err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags);
+	if (err < 0) {
+		/* -EMSGSIZE implies BUG in nh_nlmsg_size() */
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(skb);
+		goto errout;
+	}
+
+	rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_NEXTHOP,
+		    info->nlh, gfp_any());
+	return;
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
+}
+
+static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
+			   struct netlink_ext_ack *extack)
+{
+	if (nh->is_group) {
+		struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+
+		/* nested multipath (group within a group) is not
+		 * supported
+		 */
+		if (nhg->mpath) {
+			NL_SET_ERR_MSG(extack,
+				       "Multipath group can not be a nexthop within a group");
+			return false;
+		}
+	} else {
+		struct nh_info *nhi = rtnl_dereference(nh->nh_info);
+
+		if (nhi->reject_nh && npaths > 1) {
+			NL_SET_ERR_MSG(extack,
+				       "Blackhole nexthop can not be used in a group with more than 1 path");
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
+			       struct netlink_ext_ack *extack)
+{
+	unsigned int len = nla_len(tb[NHA_GROUP]);
+	struct nexthop_grp *nhg;
+	unsigned int i, j;
+
+	if (len & (sizeof(struct nexthop_grp) - 1)) {
+		NL_SET_ERR_MSG(extack,
+			       "Invalid length for nexthop group attribute");
+		return -EINVAL;
+	}
+
+	/* convert len to number of nexthop ids */
+	len /= sizeof(*nhg);
+
+	nhg = nla_data(tb[NHA_GROUP]);
+	for (i = 0; i < len; ++i) {
+		if (nhg[i].resvd1 || nhg[i].resvd2) {
+			NL_SET_ERR_MSG(extack, "Reserved fields in nexthop_grp must be 0");
+			return -EINVAL;
+		}
+		if (nhg[i].weight > 254) {
+			NL_SET_ERR_MSG(extack, "Invalid value for weight");
+			return -EINVAL;
+		}
+		for (j = i + 1; j < len; ++j) {
+			if (nhg[i].id == nhg[j].id) {
+				NL_SET_ERR_MSG(extack, "Nexthop id can not be used twice in a group");
+				return -EINVAL;
+			}
+		}
+	}
+
+	nhg = nla_data(tb[NHA_GROUP]);
+	for (i = 0; i < len; ++i) {
+		struct nexthop *nh;
+
+		nh = nexthop_find_by_id(net, nhg[i].id);
+		if (!nh) {
+			NL_SET_ERR_MSG(extack, "Invalid nexthop id");
+			return -EINVAL;
+		}
+		if (!valid_group_nh(nh, len, extack))
+			return -EINVAL;
+	}
+	for (i = NHA_GROUP + 1; i < __NHA_MAX; ++i) {
+		if (!tb[i])
+			continue;
+
+		NL_SET_ERR_MSG(extack,
+			       "No other attributes can be set in nexthop groups");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static bool ipv6_good_nh(const struct fib6_nh *nh)
+{
+	int state = NUD_REACHABLE;
+	struct neighbour *n;
+
+	rcu_read_lock_bh();
+
+	n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6);
+	if (n)
+		state = n->nud_state;
+
+	rcu_read_unlock_bh();
+
+	return !!(state & NUD_VALID);
+}
+
+static bool ipv4_good_nh(const struct fib_nh *nh)
+{
+	int state = NUD_REACHABLE;
+	struct neighbour *n;
+
+	rcu_read_lock_bh();
+
+	n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
+				      (__force u32)nh->fib_nh_gw4);
+	if (n)
+		state = n->nud_state;
+
+	rcu_read_unlock_bh();
+
+	return !!(state & NUD_VALID);
+}
+
+struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
+{
+	struct nexthop *rc = NULL;
+	struct nh_group *nhg;
+	int i;
+
+	if (!nh->is_group)
+		return nh;
+
+	nhg = rcu_dereference(nh->nh_grp);
+	for (i = 0; i < nhg->num_nh; ++i) {
+		struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+		struct nh_info *nhi;
+
+		if (hash > atomic_read(&nhge->upper_bound))
+			continue;
+
+		/* nexthops always check if it is good and does
+		 * not rely on a sysctl for this behavior
+		 */
+		nhi = rcu_dereference(nhge->nh->nh_info);
+		switch (nhi->family) {
+		case AF_INET:
+			if (ipv4_good_nh(&nhi->fib_nh))
+				return nhge->nh;
+			break;
+		case AF_INET6:
+			if (ipv6_good_nh(&nhi->fib6_nh))
+				return nhge->nh;
+			break;
+		}
+
+		if (!rc)
+			rc = nhge->nh;
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(nexthop_select_path);
+
+int nexthop_for_each_fib6_nh(struct nexthop *nh,
+			     int (*cb)(struct fib6_nh *nh, void *arg),
+			     void *arg)
+{
+	struct nh_info *nhi;
+	int err;
+
+	if (nh->is_group) {
+		struct nh_group *nhg;
+		int i;
+
+		nhg = rcu_dereference_rtnl(nh->nh_grp);
+		for (i = 0; i < nhg->num_nh; i++) {
+			struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+
+			nhi = rcu_dereference_rtnl(nhge->nh->nh_info);
+			err = cb(&nhi->fib6_nh, arg);
+			if (err)
+				return err;
+		}
+	} else {
+		nhi = rcu_dereference_rtnl(nh->nh_info);
+		err = cb(&nhi->fib6_nh, arg);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nexthop_for_each_fib6_nh);
+
+static int check_src_addr(const struct in6_addr *saddr,
+			  struct netlink_ext_ack *extack)
+{
+	if (!ipv6_addr_any(saddr)) {
+		NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
+		       struct netlink_ext_ack *extack)
+{
+	struct nh_info *nhi;
+
+	/* fib6_src is unique to a fib6_info and limits the ability to cache
+	 * routes in fib6_nh within a nexthop that is potentially shared
+	 * across multiple fib entries. If the config wants to use source
+	 * routing it can not use nexthop objects. mlxsw also does not allow
+	 * fib6_src on routes.
+	 */
+	if (cfg && check_src_addr(&cfg->fc_src, extack) < 0)
+		return -EINVAL;
+
+	if (nh->is_group) {
+		struct nh_group *nhg;
+
+		nhg = rtnl_dereference(nh->nh_grp);
+		if (nhg->has_v4)
+			goto no_v4_nh;
+	} else {
+		nhi = rtnl_dereference(nh->nh_info);
+		if (nhi->family == AF_INET)
+			goto no_v4_nh;
+	}
+
+	return 0;
+no_v4_nh:
+	NL_SET_ERR_MSG(extack, "IPv6 routes can not use an IPv4 nexthop");
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(fib6_check_nexthop);
+
+/* if existing nexthop has ipv6 routes linked to it, need
+ * to verify this new spec works with ipv6
+ */
+static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new,
+			      struct netlink_ext_ack *extack)
+{
+	struct fib6_info *f6i;
+
+	if (list_empty(&old->f6i_list))
+		return 0;
+
+	list_for_each_entry(f6i, &old->f6i_list, nh_list) {
+		if (check_src_addr(&f6i->fib6_src.addr, extack) < 0)
+			return -EINVAL;
+	}
+
+	return fib6_check_nexthop(new, NULL, extack);
+}
+
+static int nexthop_check_scope(struct nexthop *nh, u8 scope,
+			       struct netlink_ext_ack *extack)
+{
+	struct nh_info *nhi;
+
+	nhi = rtnl_dereference(nh->nh_info);
+	if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) {
+		NL_SET_ERR_MSG(extack,
+			       "Route with host scope can not have a gateway");
+		return -EINVAL;
+	}
+
+	if (nhi->fib_nhc.nhc_flags & RTNH_F_ONLINK && scope >= RT_SCOPE_LINK) {
+		NL_SET_ERR_MSG(extack, "Scope mismatch with nexthop");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* Invoked by fib add code to verify nexthop by id is ok with
+ * config for prefix; parts of fib_check_nh not done when nexthop
+ * object is used.
+ */
+int fib_check_nexthop(struct nexthop *nh, u8 scope,
+		      struct netlink_ext_ack *extack)
+{
+	int err = 0;
+
+	if (nh->is_group) {
+		struct nh_group *nhg;
+
+		if (scope == RT_SCOPE_HOST) {
+			NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops");
+			err = -EINVAL;
+			goto out;
+		}
+
+		nhg = rtnl_dereference(nh->nh_grp);
+		/* all nexthops in a group have the same scope */
+		err = nexthop_check_scope(nhg->nh_entries[0].nh, scope, extack);
+	} else {
+		err = nexthop_check_scope(nh, scope, extack);
+	}
+out:
+	return err;
+}
+
+static int fib_check_nh_list(struct nexthop *old, struct nexthop *new,
+			     struct netlink_ext_ack *extack)
+{
+	struct fib_info *fi;
+
+	list_for_each_entry(fi, &old->fi_list, nh_list) {
+		int err;
+
+		err = fib_check_nexthop(new, fi->fib_scope, extack);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static void nh_group_rebalance(struct nh_group *nhg)
+{
+	int total = 0;
+	int w = 0;
+	int i;
+
+	for (i = 0; i < nhg->num_nh; ++i)
+		total += nhg->nh_entries[i].weight;
+
+	for (i = 0; i < nhg->num_nh; ++i) {
+		struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+		int upper_bound;
+
+		w += nhge->weight;
+		upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1;
+		atomic_set(&nhge->upper_bound, upper_bound);
+	}
+}
+
+static void remove_nh_grp_entry(struct nh_grp_entry *nhge,
+				struct nh_group *nhg,
+				struct nl_info *nlinfo)
+{
+	struct nexthop *nh = nhge->nh;
+	struct nh_grp_entry *nhges;
+	bool found = false;
+	int i;
+
+	WARN_ON(!nh);
+
+	nhges = nhg->nh_entries;
+	for (i = 0; i < nhg->num_nh; ++i) {
+		if (found) {
+			nhges[i-1].nh = nhges[i].nh;
+			nhges[i-1].weight = nhges[i].weight;
+			list_del(&nhges[i].nh_list);
+			list_add(&nhges[i-1].nh_list, &nhges[i-1].nh->grp_list);
+		} else if (nhg->nh_entries[i].nh == nh) {
+			found = true;
+		}
+	}
+
+	if (WARN_ON(!found))
+		return;
+
+	nhg->num_nh--;
+	nhg->nh_entries[nhg->num_nh].nh = NULL;
+
+	nh_group_rebalance(nhg);
+
+	nexthop_put(nh);
+
+	if (nlinfo)
+		nexthop_notify(RTM_NEWNEXTHOP, nhge->nh_parent, nlinfo);
+}
+
+static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh,
+				       struct nl_info *nlinfo)
+{
+	struct nh_grp_entry *nhge, *tmp;
+
+	list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list) {
+		struct nh_group *nhg;
+
+		list_del(&nhge->nh_list);
+		nhg = rtnl_dereference(nhge->nh_parent->nh_grp);
+		remove_nh_grp_entry(nhge, nhg, nlinfo);
+
+		/* if this group has no more entries then remove it */
+		if (!nhg->num_nh)
+			remove_nexthop(net, nhge->nh_parent, nlinfo);
+	}
+}
+
+static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
+{
+	struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
+	int i, num_nh = nhg->num_nh;
+
+	for (i = 0; i < num_nh; ++i) {
+		struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+
+		if (WARN_ON(!nhge->nh))
+			continue;
+
+		list_del(&nhge->nh_list);
+		nexthop_put(nhge->nh);
+		nhge->nh = NULL;
+		nhg->num_nh--;
+	}
+}
+
+/* not called for nexthop replace */
+static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
+{
+	struct fib6_info *f6i, *tmp;
+	bool do_flush = false;
+	struct fib_info *fi;
+
+	list_for_each_entry(fi, &nh->fi_list, nh_list) {
+		fi->fib_flags |= RTNH_F_DEAD;
+		do_flush = true;
+	}
+	if (do_flush)
+		fib_flush(net);
+
+	/* ip6_del_rt removes the entry from this list hence the _safe */
+	list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) {
+		/* __ip6_del_rt does a release, so do a hold here */
+		fib6_info_hold(f6i);
+		ipv6_stub->ip6_del_rt(net, f6i);
+	}
+}
+
+static void __remove_nexthop(struct net *net, struct nexthop *nh,
+			     struct nl_info *nlinfo)
+{
+	__remove_nexthop_fib(net, nh);
+
+	if (nh->is_group) {
+		remove_nexthop_group(nh, nlinfo);
+	} else {
+		struct nh_info *nhi;
+
+		nhi = rtnl_dereference(nh->nh_info);
+		if (nhi->fib_nhc.nhc_dev)
+			hlist_del(&nhi->dev_hash);
+
+		remove_nexthop_from_groups(net, nh, nlinfo);
+	}
+}
+
+static void remove_nexthop(struct net *net, struct nexthop *nh,
+			   struct nl_info *nlinfo)
+{
+	/* remove from the tree */
+	rb_erase(&nh->rb_node, &net->nexthop.rb_root);
+
+	if (nlinfo)
+		nexthop_notify(RTM_DELNEXTHOP, nh, nlinfo);
+
+	__remove_nexthop(net, nh, nlinfo);
+	nh_base_seq_inc(net);
+
+	nexthop_put(nh);
+}
+
+/* if any FIB entries reference this nexthop, any dst entries
+ * need to be regenerated
+ */
+static void nh_rt_cache_flush(struct net *net, struct nexthop *nh)
+{
+	struct fib6_info *f6i;
+
+	if (!list_empty(&nh->fi_list))
+		rt_cache_flush(net);
+
+	list_for_each_entry(f6i, &nh->f6i_list, nh_list)
+		ipv6_stub->fib6_update_sernum(net, f6i);
+}
+
+static int replace_nexthop_grp(struct net *net, struct nexthop *old,
+			       struct nexthop *new,
+			       struct netlink_ext_ack *extack)
+{
+	struct nh_group *oldg, *newg;
+	int i;
+
+	if (!new->is_group) {
+		NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop.");
+		return -EINVAL;
+	}
+
+	oldg = rtnl_dereference(old->nh_grp);
+	newg = rtnl_dereference(new->nh_grp);
+
+	/* update parents - used by nexthop code for cleanup */
+	for (i = 0; i < newg->num_nh; i++)
+		newg->nh_entries[i].nh_parent = old;
+
+	rcu_assign_pointer(old->nh_grp, newg);
+
+	for (i = 0; i < oldg->num_nh; i++)
+		oldg->nh_entries[i].nh_parent = new;
+
+	rcu_assign_pointer(new->nh_grp, oldg);
+
+	return 0;
+}
+
+static int replace_nexthop_single(struct net *net, struct nexthop *old,
+				  struct nexthop *new,
+				  struct netlink_ext_ack *extack)
+{
+	struct nh_info *oldi, *newi;
+
+	if (new->is_group) {
+		NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group.");
+		return -EINVAL;
+	}
+
+	oldi = rtnl_dereference(old->nh_info);
+	newi = rtnl_dereference(new->nh_info);
+
+	newi->nh_parent = old;
+	oldi->nh_parent = new;
+
+	old->protocol = new->protocol;
+	old->nh_flags = new->nh_flags;
+
+	rcu_assign_pointer(old->nh_info, newi);
+	rcu_assign_pointer(new->nh_info, oldi);
+
+	return 0;
+}
+
+static void __nexthop_replace_notify(struct net *net, struct nexthop *nh,
+				     struct nl_info *info)
+{
+	struct fib6_info *f6i;
+
+	if (!list_empty(&nh->fi_list)) {
+		struct fib_info *fi;
+
+		/* expectation is a few fib_info per nexthop and then
+		 * a lot of routes per fib_info. So mark the fib_info
+		 * and then walk the fib tables once
+		 */
+		list_for_each_entry(fi, &nh->fi_list, nh_list)
+			fi->nh_updated = true;
+
+		fib_info_notify_update(net, info);
+
+		list_for_each_entry(fi, &nh->fi_list, nh_list)
+			fi->nh_updated = false;
+	}
+
+	list_for_each_entry(f6i, &nh->f6i_list, nh_list)
+		ipv6_stub->fib6_rt_update(net, f6i, info);
+}
+
+/* send RTM_NEWROUTE with REPLACE flag set for all FIB entries
+ * linked to this nexthop and for all groups that the nexthop
+ * is a member of
+ */
+static void nexthop_replace_notify(struct net *net, struct nexthop *nh,
+				   struct nl_info *info)
+{
+	struct nh_grp_entry *nhge;
+
+	__nexthop_replace_notify(net, nh, info);
+
+	list_for_each_entry(nhge, &nh->grp_list, nh_list)
+		__nexthop_replace_notify(net, nhge->nh_parent, info);
+}
+
+static int replace_nexthop(struct net *net, struct nexthop *old,
+			   struct nexthop *new, struct netlink_ext_ack *extack)
+{
+	bool new_is_reject = false;
+	struct nh_grp_entry *nhge;
+	int err;
+
+	/* check that existing FIB entries are ok with the
+	 * new nexthop definition
+	 */
+	err = fib_check_nh_list(old, new, extack);
+	if (err)
+		return err;
+
+	err = fib6_check_nh_list(old, new, extack);
+	if (err)
+		return err;
+
+	if (!new->is_group) {
+		struct nh_info *nhi = rtnl_dereference(new->nh_info);
+
+		new_is_reject = nhi->reject_nh;
+	}
+
+	list_for_each_entry(nhge, &old->grp_list, nh_list) {
+		/* if new nexthop is a blackhole, any groups using this
+		 * nexthop cannot have more than 1 path
+		 */
+		if (new_is_reject &&
+		    nexthop_num_path(nhge->nh_parent) > 1) {
+			NL_SET_ERR_MSG(extack, "Blackhole nexthop can not be a member of a group with more than one path");
+			return -EINVAL;
+		}
+
+		err = fib_check_nh_list(nhge->nh_parent, new, extack);
+		if (err)
+			return err;
+
+		err = fib6_check_nh_list(nhge->nh_parent, new, extack);
+		if (err)
+			return err;
+	}
+
+	if (old->is_group)
+		err = replace_nexthop_grp(net, old, new, extack);
+	else
+		err = replace_nexthop_single(net, old, new, extack);
+
+	if (!err) {
+		nh_rt_cache_flush(net, old);
+
+		__remove_nexthop(net, new, NULL);
+		nexthop_put(new);
+	}
+
+	return err;
+}
+
+/* called with rtnl_lock held */
+static int insert_nexthop(struct net *net, struct nexthop *new_nh,
+			  struct nh_config *cfg, struct netlink_ext_ack *extack)
+{
+	struct rb_node **pp, *parent = NULL, *next;
+	struct rb_root *root = &net->nexthop.rb_root;
+	bool replace = !!(cfg->nlflags & NLM_F_REPLACE);
+	bool create = !!(cfg->nlflags & NLM_F_CREATE);
+	u32 new_id = new_nh->id;
+	int replace_notify = 0;
+	int rc = -EEXIST;
+
+	pp = &root->rb_node;
+	while (1) {
+		struct nexthop *nh;
+
+		next = rtnl_dereference(*pp);
+		if (!next)
+			break;
+
+		parent = next;
+
+		nh = rb_entry(parent, struct nexthop, rb_node);
+		if (new_id < nh->id) {
+			pp = &next->rb_left;
+		} else if (new_id > nh->id) {
+			pp = &next->rb_right;
+		} else if (replace) {
+			rc = replace_nexthop(net, nh, new_nh, extack);
+			if (!rc) {
+				new_nh = nh; /* send notification with old nh */
+				replace_notify = 1;
+			}
+			goto out;
+		} else {
+			/* id already exists and not a replace */
+			goto out;
+		}
+	}
+
+	if (replace && !create) {
+		NL_SET_ERR_MSG(extack, "Replace specified without create and no entry exists");
+		rc = -ENOENT;
+		goto out;
+	}
+
+	rb_link_node_rcu(&new_nh->rb_node, parent, pp);
+	rb_insert_color(&new_nh->rb_node, root);
+	rc = 0;
+out:
+	if (!rc) {
+		nh_base_seq_inc(net);
+		nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo);
+		if (replace_notify)
+			nexthop_replace_notify(net, new_nh, &cfg->nlinfo);
+	}
+
+	return rc;
+}
+
+/* rtnl */
+/* remove all nexthops tied to a device being deleted */
+static void nexthop_flush_dev(struct net_device *dev)
+{
+	unsigned int hash = nh_dev_hashfn(dev->ifindex);
+	struct net *net = dev_net(dev);
+	struct hlist_head *head = &net->nexthop.devhash[hash];
+	struct hlist_node *n;
+	struct nh_info *nhi;
+
+	hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
+		if (nhi->fib_nhc.nhc_dev != dev)
+			continue;
+
+		remove_nexthop(net, nhi->nh_parent, NULL);
+	}
+}
+
+/* rtnl; called when net namespace is deleted */
+static void flush_all_nexthops(struct net *net)
+{
+	struct rb_root *root = &net->nexthop.rb_root;
+	struct rb_node *node;
+	struct nexthop *nh;
+
+	while ((node = rb_first(root))) {
+		nh = rb_entry(node, struct nexthop, rb_node);
+		remove_nexthop(net, nh, NULL);
+		cond_resched();
+	}
+}
+
+static struct nexthop *nexthop_create_group(struct net *net,
+					    struct nh_config *cfg)
+{
+	struct nlattr *grps_attr = cfg->nh_grp;
+	struct nexthop_grp *entry = nla_data(grps_attr);
+	struct nh_group *nhg;
+	struct nexthop *nh;
+	int i;
+
+	nh = nexthop_alloc();
+	if (!nh)
+		return ERR_PTR(-ENOMEM);
+
+	nh->is_group = 1;
+
+	nhg = nexthop_grp_alloc(nla_len(grps_attr) / sizeof(*entry));
+	if (!nhg) {
+		kfree(nh);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	for (i = 0; i < nhg->num_nh; ++i) {
+		struct nexthop *nhe;
+		struct nh_info *nhi;
+
+		nhe = nexthop_find_by_id(net, entry[i].id);
+		if (!nexthop_get(nhe))
+			goto out_no_nh;
+
+		nhi = rtnl_dereference(nhe->nh_info);
+		if (nhi->family == AF_INET)
+			nhg->has_v4 = true;
+
+		nhg->nh_entries[i].nh = nhe;
+		nhg->nh_entries[i].weight = entry[i].weight + 1;
+		list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list);
+		nhg->nh_entries[i].nh_parent = nh;
+	}
+
+	if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) {
+		nhg->mpath = 1;
+		nh_group_rebalance(nhg);
+	}
+
+	rcu_assign_pointer(nh->nh_grp, nhg);
+
+	return nh;
+
+out_no_nh:
+	for (; i >= 0; --i)
+		nexthop_put(nhg->nh_entries[i].nh);
+
+	kfree(nhg);
+	kfree(nh);
+
+	return ERR_PTR(-ENOENT);
+}
+
+static int nh_create_ipv4(struct net *net, struct nexthop *nh,
+			  struct nh_info *nhi, struct nh_config *cfg,
+			  struct netlink_ext_ack *extack)
+{
+	struct fib_nh *fib_nh = &nhi->fib_nh;
+	struct fib_config fib_cfg = {
+		.fc_oif   = cfg->nh_ifindex,
+		.fc_gw4   = cfg->gw.ipv4,
+		.fc_gw_family = cfg->gw.ipv4 ? AF_INET : 0,
+		.fc_flags = cfg->nh_flags,
+		.fc_encap = cfg->nh_encap,
+		.fc_encap_type = cfg->nh_encap_type,
+	};
+	u32 tb_id = l3mdev_fib_table(cfg->dev);
+	int err;
+
+	err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack);
+	if (err) {
+		fib_nh_release(net, fib_nh);
+		goto out;
+	}
+
+	/* sets nh_dev if successful */
+	err = fib_check_nh(net, fib_nh, tb_id, 0, extack);
+	if (!err) {
+		nh->nh_flags = fib_nh->fib_nh_flags;
+		fib_info_update_nhc_saddr(net, &fib_nh->nh_common,
+					  fib_nh->fib_nh_scope);
+	} else {
+		fib_nh_release(net, fib_nh);
+	}
+out:
+	return err;
+}
+
+static int nh_create_ipv6(struct net *net,  struct nexthop *nh,
+			  struct nh_info *nhi, struct nh_config *cfg,
+			  struct netlink_ext_ack *extack)
+{
+	struct fib6_nh *fib6_nh = &nhi->fib6_nh;
+	struct fib6_config fib6_cfg = {
+		.fc_table = l3mdev_fib_table(cfg->dev),
+		.fc_ifindex = cfg->nh_ifindex,
+		.fc_gateway = cfg->gw.ipv6,
+		.fc_flags = cfg->nh_flags,
+		.fc_encap = cfg->nh_encap,
+		.fc_encap_type = cfg->nh_encap_type,
+	};
+	int err;
+
+	if (!ipv6_addr_any(&cfg->gw.ipv6))
+		fib6_cfg.fc_flags |= RTF_GATEWAY;
+
+	/* sets nh_dev if successful */
+	err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
+				      extack);
+	if (err)
+		ipv6_stub->fib6_nh_release(fib6_nh);
+	else
+		nh->nh_flags = fib6_nh->fib_nh_flags;
+
+	return err;
+}
+
+static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
+				      struct netlink_ext_ack *extack)
+{
+	struct nh_info *nhi;
+	struct nexthop *nh;
+	int err = 0;
+
+	nh = nexthop_alloc();
+	if (!nh)
+		return ERR_PTR(-ENOMEM);
+
+	nhi = kzalloc(sizeof(*nhi), GFP_KERNEL);
+	if (!nhi) {
+		kfree(nh);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	nh->nh_flags = cfg->nh_flags;
+	nh->net = net;
+
+	nhi->nh_parent = nh;
+	nhi->family = cfg->nh_family;
+	nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK;
+
+	if (cfg->nh_blackhole) {
+		nhi->reject_nh = 1;
+		cfg->nh_ifindex = net->loopback_dev->ifindex;
+	}
+
+	switch (cfg->nh_family) {
+	case AF_INET:
+		err = nh_create_ipv4(net, nh, nhi, cfg, extack);
+		break;
+	case AF_INET6:
+		err = nh_create_ipv6(net, nh, nhi, cfg, extack);
+		break;
+	}
+
+	if (err) {
+		kfree(nhi);
+		kfree(nh);
+		return ERR_PTR(err);
+	}
+
+	/* add the entry to the device based hash */
+	nexthop_devhash_add(net, nhi);
+
+	rcu_assign_pointer(nh->nh_info, nhi);
+
+	return nh;
+}
+
+/* called with rtnl lock held */
+static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
+				   struct netlink_ext_ack *extack)
+{
+	struct nexthop *nh;
+	int err;
+
+	if (cfg->nlflags & NLM_F_REPLACE && !cfg->nh_id) {
+		NL_SET_ERR_MSG(extack, "Replace requires nexthop id");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (!cfg->nh_id) {
+		cfg->nh_id = nh_find_unused_id(net);
+		if (!cfg->nh_id) {
+			NL_SET_ERR_MSG(extack, "No unused id");
+			return ERR_PTR(-EINVAL);
+		}
+	}
+
+	if (cfg->nh_grp)
+		nh = nexthop_create_group(net, cfg);
+	else
+		nh = nexthop_create(net, cfg, extack);
+
+	if (IS_ERR(nh))
+		return nh;
+
+	refcount_set(&nh->refcnt, 1);
+	nh->id = cfg->nh_id;
+	nh->protocol = cfg->nh_protocol;
+	nh->net = net;
+
+	err = insert_nexthop(net, nh, cfg, extack);
+	if (err) {
+		__remove_nexthop(net, nh, NULL);
+		nexthop_put(nh);
+		nh = ERR_PTR(err);
+	}
+
+	return nh;
+}
+
+static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
+			    struct nlmsghdr *nlh, struct nh_config *cfg,
+			    struct netlink_ext_ack *extack)
+{
+	struct nhmsg *nhm = nlmsg_data(nlh);
+	struct nlattr *tb[NHA_MAX + 1];
+	int err;
+
+	err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy,
+			  extack);
+	if (err < 0)
+		return err;
+
+	err = -EINVAL;
+	if (nhm->resvd || nhm->nh_scope) {
+		NL_SET_ERR_MSG(extack, "Invalid values in ancillary header");
+		goto out;
+	}
+	if (nhm->nh_flags & ~NEXTHOP_VALID_USER_FLAGS) {
+		NL_SET_ERR_MSG(extack, "Invalid nexthop flags in ancillary header");
+		goto out;
+	}
+
+	switch (nhm->nh_family) {
+	case AF_INET:
+	case AF_INET6:
+		break;
+	case AF_UNSPEC:
+		if (tb[NHA_GROUP])
+			break;
+		/* fallthrough */
+	default:
+		NL_SET_ERR_MSG(extack, "Invalid address family");
+		goto out;
+	}
+
+	if (tb[NHA_GROUPS] || tb[NHA_MASTER]) {
+		NL_SET_ERR_MSG(extack, "Invalid attributes in request");
+		goto out;
+	}
+
+	memset(cfg, 0, sizeof(*cfg));
+	cfg->nlflags = nlh->nlmsg_flags;
+	cfg->nlinfo.portid = NETLINK_CB(skb).portid;
+	cfg->nlinfo.nlh = nlh;
+	cfg->nlinfo.nl_net = net;
+
+	cfg->nh_family = nhm->nh_family;
+	cfg->nh_protocol = nhm->nh_protocol;
+	cfg->nh_flags = nhm->nh_flags;
+
+	if (tb[NHA_ID])
+		cfg->nh_id = nla_get_u32(tb[NHA_ID]);
+
+	if (tb[NHA_GROUP]) {
+		if (nhm->nh_family != AF_UNSPEC) {
+			NL_SET_ERR_MSG(extack, "Invalid family for group");
+			goto out;
+		}
+		cfg->nh_grp = tb[NHA_GROUP];
+
+		cfg->nh_grp_type = NEXTHOP_GRP_TYPE_MPATH;
+		if (tb[NHA_GROUP_TYPE])
+			cfg->nh_grp_type = nla_get_u16(tb[NHA_GROUP_TYPE]);
+
+		if (cfg->nh_grp_type > NEXTHOP_GRP_TYPE_MAX) {
+			NL_SET_ERR_MSG(extack, "Invalid group type");
+			goto out;
+		}
+		err = nh_check_attr_group(net, tb, extack);
+
+		/* no other attributes should be set */
+		goto out;
+	}
+
+	if (tb[NHA_BLACKHOLE]) {
+		if (tb[NHA_GATEWAY] || tb[NHA_OIF] ||
+		    tb[NHA_ENCAP]   || tb[NHA_ENCAP_TYPE]) {
+			NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway or oif");
+			goto out;
+		}
+
+		cfg->nh_blackhole = 1;
+		err = 0;
+		goto out;
+	}
+
+	if (!tb[NHA_OIF]) {
+		NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole nexthops");
+		goto out;
+	}
+
+	cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
+	if (cfg->nh_ifindex)
+		cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);
+
+	if (!cfg->dev) {
+		NL_SET_ERR_MSG(extack, "Invalid device index");
+		goto out;
+	} else if (!(cfg->dev->flags & IFF_UP)) {
+		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+		err = -ENETDOWN;
+		goto out;
+	} else if (!netif_carrier_ok(cfg->dev)) {
+		NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
+		err = -ENETDOWN;
+		goto out;
+	}
+
+	err = -EINVAL;
+	if (tb[NHA_GATEWAY]) {
+		struct nlattr *gwa = tb[NHA_GATEWAY];
+
+		switch (cfg->nh_family) {
+		case AF_INET:
+			if (nla_len(gwa) != sizeof(u32)) {
+				NL_SET_ERR_MSG(extack, "Invalid gateway");
+				goto out;
+			}
+			cfg->gw.ipv4 = nla_get_be32(gwa);
+			break;
+		case AF_INET6:
+			if (nla_len(gwa) != sizeof(struct in6_addr)) {
+				NL_SET_ERR_MSG(extack, "Invalid gateway");
+				goto out;
+			}
+			cfg->gw.ipv6 = nla_get_in6_addr(gwa);
+			break;
+		default:
+			NL_SET_ERR_MSG(extack,
+				       "Unknown address family for gateway");
+			goto out;
+		}
+	} else {
+		/* device only nexthop (no gateway) */
+		if (cfg->nh_flags & RTNH_F_ONLINK) {
+			NL_SET_ERR_MSG(extack,
+				       "ONLINK flag can not be set for nexthop without a gateway");
+			goto out;
+		}
+	}
+
+	if (tb[NHA_ENCAP]) {
+		cfg->nh_encap = tb[NHA_ENCAP];
+
+		if (!tb[NHA_ENCAP_TYPE]) {
+			NL_SET_ERR_MSG(extack, "LWT encapsulation type is missing");
+			goto out;
+		}
+
+		cfg->nh_encap_type = nla_get_u16(tb[NHA_ENCAP_TYPE]);
+		err = lwtunnel_valid_encap_type(cfg->nh_encap_type, extack);
+		if (err < 0)
+			goto out;
+
+	} else if (tb[NHA_ENCAP_TYPE]) {
+		NL_SET_ERR_MSG(extack, "LWT encapsulation attribute is missing");
+		goto out;
+	}
+
+
+	err = 0;
+out:
+	return err;
+}
+
+/* rtnl */
+static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
+			   struct netlink_ext_ack *extack)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nh_config cfg;
+	struct nexthop *nh;
+	int err;
+
+	err = rtm_to_nh_config(net, skb, nlh, &cfg, extack);
+	if (!err) {
+		nh = nexthop_add(net, &cfg, extack);
+		if (IS_ERR(nh))
+			err = PTR_ERR(nh);
+	}
+
+	return err;
+}
+
+static int nh_valid_get_del_req(struct nlmsghdr *nlh, u32 *id,
+				struct netlink_ext_ack *extack)
+{
+	struct nhmsg *nhm = nlmsg_data(nlh);
+	struct nlattr *tb[NHA_MAX + 1];
+	int err, i;
+
+	err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy,
+			  extack);
+	if (err < 0)
+		return err;
+
+	err = -EINVAL;
+	for (i = 0; i < __NHA_MAX; ++i) {
+		if (!tb[i])
+			continue;
+
+		switch (i) {
+		case NHA_ID:
+			break;
+		default:
+			NL_SET_ERR_MSG_ATTR(extack, tb[i],
+					    "Unexpected attribute in request");
+			goto out;
+		}
+	}
+	if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
+		NL_SET_ERR_MSG(extack, "Invalid values in header");
+		goto out;
+	}
+
+	if (!tb[NHA_ID]) {
+		NL_SET_ERR_MSG(extack, "Nexthop id is missing");
+		goto out;
+	}
+
+	*id = nla_get_u32(tb[NHA_ID]);
+	if (!(*id))
+		NL_SET_ERR_MSG(extack, "Invalid nexthop id");
+	else
+		err = 0;
+out:
+	return err;
+}
+
+/* rtnl */
+static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
+			   struct netlink_ext_ack *extack)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nl_info nlinfo = {
+		.nlh = nlh,
+		.nl_net = net,
+		.portid = NETLINK_CB(skb).portid,
+	};
+	struct nexthop *nh;
+	int err;
+	u32 id;
+
+	err = nh_valid_get_del_req(nlh, &id, extack);
+	if (err)
+		return err;
+
+	nh = nexthop_find_by_id(net, id);
+	if (!nh)
+		return -ENOENT;
+
+	remove_nexthop(net, nh, &nlinfo);
+
+	return 0;
+}
+
+/* rtnl */
+static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+			   struct netlink_ext_ack *extack)
+{
+	struct net *net = sock_net(in_skb->sk);
+	struct sk_buff *skb = NULL;
+	struct nexthop *nh;
+	int err;
+	u32 id;
+
+	err = nh_valid_get_del_req(nlh, &id, extack);
+	if (err)
+		return err;
+
+	err = -ENOBUFS;
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		goto out;
+
+	err = -ENOENT;
+	nh = nexthop_find_by_id(net, id);
+	if (!nh)
+		goto errout_free;
+
+	err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid,
+			   nlh->nlmsg_seq, 0);
+	if (err < 0) {
+		WARN_ON(err == -EMSGSIZE);
+		goto errout_free;
+	}
+
+	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+out:
+	return err;
+errout_free:
+	kfree_skb(skb);
+	goto out;
+}
+
+static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx,
+			     bool group_filter, u8 family)
+{
+	const struct net_device *dev;
+	const struct nh_info *nhi;
+
+	if (group_filter && !nh->is_group)
+		return true;
+
+	if (!dev_idx && !master_idx && !family)
+		return false;
+
+	if (nh->is_group)
+		return true;
+
+	nhi = rtnl_dereference(nh->nh_info);
+	if (family && nhi->family != family)
+		return true;
+
+	dev = nhi->fib_nhc.nhc_dev;
+	if (dev_idx && (!dev || dev->ifindex != dev_idx))
+		return true;
+
+	if (master_idx) {
+		struct net_device *master;
+
+		if (!dev)
+			return true;
+
+		master = netdev_master_upper_dev_get((struct net_device *)dev);
+		if (!master || master->ifindex != master_idx)
+			return true;
+	}
+
+	return false;
+}
+
+static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx,
+			     int *master_idx, bool *group_filter,
+			     struct netlink_callback *cb)
+{
+	struct netlink_ext_ack *extack = cb->extack;
+	struct nlattr *tb[NHA_MAX + 1];
+	struct nhmsg *nhm;
+	int err, i;
+	u32 idx;
+
+	err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy,
+			  NULL);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i <= NHA_MAX; ++i) {
+		if (!tb[i])
+			continue;
+
+		switch (i) {
+		case NHA_OIF:
+			idx = nla_get_u32(tb[i]);
+			if (idx > INT_MAX) {
+				NL_SET_ERR_MSG(extack, "Invalid device index");
+				return -EINVAL;
+			}
+			*dev_idx = idx;
+			break;
+		case NHA_MASTER:
+			idx = nla_get_u32(tb[i]);
+			if (idx > INT_MAX) {
+				NL_SET_ERR_MSG(extack, "Invalid master device index");
+				return -EINVAL;
+			}
+			*master_idx = idx;
+			break;
+		case NHA_GROUPS:
+			*group_filter = true;
+			break;
+		default:
+			NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request");
+			return -EINVAL;
+		}
+	}
+
+	nhm = nlmsg_data(nlh);
+	if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
+		NL_SET_ERR_MSG(extack, "Invalid values in header for nexthop dump request");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* rtnl */
+static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nhmsg *nhm = nlmsg_data(cb->nlh);
+	int dev_filter_idx = 0, master_idx = 0;
+	struct net *net = sock_net(skb->sk);
+	struct rb_root *root = &net->nexthop.rb_root;
+	bool group_filter = false;
+	struct rb_node *node;
+	int idx = 0, s_idx;
+	int err;
+
+	err = nh_valid_dump_req(cb->nlh, &dev_filter_idx, &master_idx,
+				&group_filter, cb);
+	if (err < 0)
+		return err;
+
+	s_idx = cb->args[0];
+	for (node = rb_first(root); node; node = rb_next(node)) {
+		struct nexthop *nh;
+
+		if (idx < s_idx)
+			goto cont;
+
+		nh = rb_entry(node, struct nexthop, rb_node);
+		if (nh_dump_filtered(nh, dev_filter_idx, master_idx,
+				     group_filter, nhm->nh_family))
+			goto cont;
+
+		err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP,
+				   NETLINK_CB(cb->skb).portid,
+				   cb->nlh->nlmsg_seq, NLM_F_MULTI);
+		if (err < 0) {
+			if (likely(skb->len))
+				goto out;
+
+			goto out_err;
+		}
+cont:
+		idx++;
+	}
+
+out:
+	err = skb->len;
+out_err:
+	cb->args[0] = idx;
+	cb->seq = net->nexthop.seq;
+	nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+
+	return err;
+}
+
+static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu)
+{
+	unsigned int hash = nh_dev_hashfn(dev->ifindex);
+	struct net *net = dev_net(dev);
+	struct hlist_head *head = &net->nexthop.devhash[hash];
+	struct hlist_node *n;
+	struct nh_info *nhi;
+
+	hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
+		if (nhi->fib_nhc.nhc_dev == dev) {
+			if (nhi->family == AF_INET)
+				fib_nhc_update_mtu(&nhi->fib_nhc, dev->mtu,
+						   orig_mtu);
+		}
+	}
+}
+
+/* rtnl */
+static int nh_netdev_event(struct notifier_block *this,
+			   unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct netdev_notifier_info_ext *info_ext;
+
+	switch (event) {
+	case NETDEV_DOWN:
+	case NETDEV_UNREGISTER:
+		nexthop_flush_dev(dev);
+		break;
+	case NETDEV_CHANGE:
+		if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP)))
+			nexthop_flush_dev(dev);
+		break;
+	case NETDEV_CHANGEMTU:
+		info_ext = ptr;
+		nexthop_sync_mtu(dev, info_ext->ext.mtu);
+		rt_cache_flush(dev_net(dev));
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block nh_netdev_notifier = {
+	.notifier_call = nh_netdev_event,
+};
+
+static void __net_exit nexthop_net_exit(struct net *net)
+{
+	rtnl_lock();
+	flush_all_nexthops(net);
+	rtnl_unlock();
+	kfree(net->nexthop.devhash);
+}
+
+static int __net_init nexthop_net_init(struct net *net)
+{
+	size_t sz = sizeof(struct hlist_head) * NH_DEV_HASHSIZE;
+
+	net->nexthop.rb_root = RB_ROOT;
+	net->nexthop.devhash = kzalloc(sz, GFP_KERNEL);
+	if (!net->nexthop.devhash)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static struct pernet_operations nexthop_net_ops = {
+	.init = nexthop_net_init,
+	.exit = nexthop_net_exit,
+};
+
+static int __init nexthop_init(void)
+{
+	register_pernet_subsys(&nexthop_net_ops);
+
+	register_netdevice_notifier(&nh_netdev_notifier);
+
+	rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop,
+		      rtm_dump_nexthop, 0);
+
+	rtnl_register(PF_INET, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
+	rtnl_register(PF_INET, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
+
+	rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
+	rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
+
+	return 0;
+}
+subsys_initcall(nexthop_init);

diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 8d7aaf1..5354272 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -5,11 +6,6 @@
  *
  *		"Ping" sockets
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Based on ipv4/udp.c code.
  *
  * Authors:	Vasiliy Kulikov / Openwall (for Linux 2.6),
@@ -17,7 +13,6 @@
  *
  * Pavel gave all rights to bugs to Vasiliy,
  * none of the bugs are Pavel's now.
- *
  */
 
 #include <linux/uaccess.h>
@@ -779,14 +774,14 @@
 	}
 
 	if (ipv4_is_multicast(daddr)) {
-		if (!ipc.oif)
+		if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif))
 			ipc.oif = inet->mc_index;
 		if (!saddr)
 			saddr = inet->mc_addr;
 	} else if (!ipc.oif)
 		ipc.oif = inet->uc_index;
 
-	flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
+	flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos,
 			   RT_SCOPE_UNIVERSE, sk->sk_protocol,
 			   inet_sk_flowi_flags(sk), faddr, saddr, 0, 0,
 			   sk->sk_uid);
@@ -1113,7 +1108,7 @@
 	__u16 srcp = ntohs(inet->inet_sport);
 
 	seq_printf(f, "%5d: %08X:%04X %08X:%04X"
-		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d",
+		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u",
 		bucket, src, srcp, dest, destp, sp->sk_state,
 		sk_wmem_alloc_get(sp),
 		sk_rmem_alloc_get(sp),

diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 7028968..cc90243 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -25,11 +26,6 @@
  *					split functions for more readibility.
  *	Andi Kleen		:	Add support for /proc/net/netstat
  *	Arnaldo C. Melo		:	Convert to seq_file
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  */
 #include <linux/types.h>
 #include <net/net_namespace.h>
@@ -72,8 +68,8 @@
 	seq_printf(seq, "RAW: inuse %d\n",
 		   sock_prot_inuse_get(net, &raw_prot));
 	seq_printf(seq,  "FRAG: inuse %u memory %lu\n",
-		   atomic_read(&net->ipv4.frags.rhashtable.nelems),
-		   frag_mem_limit(&net->ipv4.frags));
+		   atomic_read(&net->ipv4.fqdir->rhashtable.nelems),
+		   frag_mem_limit(net->ipv4.fqdir));
 	return 0;
 }
 
@@ -219,6 +215,7 @@
 	SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL),
 	SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL),
 	SNMP_MIB_ITEM("TCPRcvCollapsed", LINUX_MIB_TCPRCVCOLLAPSED),
+	SNMP_MIB_ITEM("TCPBacklogCoalesce", LINUX_MIB_TCPBACKLOGCOALESCE),
 	SNMP_MIB_ITEM("TCPDSACKOldSent", LINUX_MIB_TCPDSACKOLDSENT),
 	SNMP_MIB_ITEM("TCPDSACKOfoSent", LINUX_MIB_TCPDSACKOFOSENT),
 	SNMP_MIB_ITEM("TCPDSACKRecv", LINUX_MIB_TCPDSACKRECV),
@@ -290,6 +287,8 @@
 	SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED),
 	SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP),
 	SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP),
+	SNMP_MIB_ITEM("TCPWqueueTooBig", LINUX_MIB_TCPWQUEUETOOBIG),
+	SNMP_MIB_ITEM("TCPFastOpenPassiveAltKey", LINUX_MIB_TCPFASTOPENPASSIVEALTKEY),
 	SNMP_MIB_SENTINEL
 };
 

diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 32a691b..9a8c089 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -16,11 +17,6 @@
  *		Richard Colella	: Hang on hash collision
  *		Vince Laviano	: Modified inet_del_protocol() to correctly
  *				  maintain copy bit.
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  */
 #include <linux/cache.h>
 #include <linux/module.h>
@@ -29,6 +25,7 @@
 #include <net/protocol.h>
 
 struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
+EXPORT_SYMBOL(inet_protos);
 const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly;
 EXPORT_SYMBOL(inet_offloads);
 

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 33df4d7..3183413 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -30,11 +31,6 @@
  *		Alan Cox	:	Added IP_HDRINCL option.
  *		Alan Cox	:	Skip broadcast check if BSDism set.
  *		David S. Miller	:	New socket lookup architecture.
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  */
 
 #include <linux/types.h>
@@ -131,8 +127,7 @@
 		if (net_eq(sock_net(sk), net) && inet->inet_num == num	&&
 		    !(inet->inet_daddr && inet->inet_daddr != raddr) 	&&
 		    !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
-		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
-		      sk->sk_bound_dev_if != sdif))
+		    raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
 			goto found; /* gotcha */
 	}
 	sk = NULL;
@@ -174,6 +169,7 @@
 static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
 {
 	int sdif = inet_sdif(skb);
+	int dif = inet_iif(skb);
 	struct sock *sk;
 	struct hlist_head *head;
 	int delivered = 0;
@@ -186,8 +182,7 @@
 
 	net = dev_net(skb->dev);
 	sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol,
-			     iph->saddr, iph->daddr,
-			     skb->dev->ifindex, sdif);
+			     iph->saddr, iph->daddr, dif, sdif);
 
 	while (sk) {
 		delivered = 1;
@@ -202,7 +197,7 @@
 		}
 		sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol,
 				     iph->saddr, iph->daddr,
-				     skb->dev->ifindex, sdif);
+				     dif, sdif);
 	}
 out:
 	read_unlock(&raw_v4_hashinfo.lock);
@@ -337,7 +332,7 @@
 		kfree_skb(skb);
 		return NET_RX_DROP;
 	}
-	nf_reset(skb);
+	nf_reset_ct(skb);
 
 	skb_push(skb, skb->data - skb_network_header(skb));
 
@@ -380,7 +375,7 @@
 	skb_reserve(skb, hlen);
 
 	skb->priority = sk->sk_priority;
-	skb->mark = sk->sk_mark;
+	skb->mark = sockc->mark;
 	skb->tstamp = sockc->transmit_time;
 	skb_dst_set(skb, &rt->dst);
 	*rtp = NULL;
@@ -391,7 +386,7 @@
 
 	skb->ip_summed = CHECKSUM_NONE;
 
-	sock_tx_timestamp(sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags);
+	skb_setup_tx_timestamp(skb, sockc->tsflags);
 
 	if (flags & MSG_CONFIRM)
 		skb_set_dst_pending_confirm(skb, 1);
@@ -608,7 +603,7 @@
 		tos |= RTO_ONLINK;
 
 	if (ipv4_is_multicast(daddr)) {
-		if (!ipc.oif)
+		if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif))
 			ipc.oif = inet->mc_index;
 		if (!saddr)
 			saddr = inet->mc_addr;
@@ -628,7 +623,7 @@
 		}
 	}
 
-	flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
+	flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos,
 			   RT_SCOPE_UNIVERSE,
 			   hdrincl ? IPPROTO_RAW : sk->sk_protocol,
 			   inet_sk_flowi_flags(sk) |
@@ -805,7 +800,7 @@
 	return copied;
 }
 
-static int raw_init(struct sock *sk)
+static int raw_sk_init(struct sock *sk)
 {
 	struct raw_sock *rp = raw_sk(sk);
 
@@ -970,7 +965,7 @@
 	.connect	   = ip4_datagram_connect,
 	.disconnect	   = __udp_disconnect,
 	.ioctl		   = raw_ioctl,
-	.init		   = raw_init,
+	.init		   = raw_sk_init,
 	.setsockopt	   = raw_setsockopt,
 	.getsockopt	   = raw_getsockopt,
 	.sendmsg	   = raw_sendmsg,
@@ -1077,7 +1072,7 @@
 	      srcp  = inet->inet_num;
 
 	seq_printf(seq, "%4d: %08X:%04X %08X:%04X"
-		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n",
+		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n",
 		i, src, srcp, dest, destp, sp->sk_state,
 		sk_wmem_alloc_get(sp),
 		sk_rmem_alloc_get(sp),
@@ -1134,3 +1129,27 @@
 	unregister_pernet_subsys(&raw_net_ops);
 }
 #endif /* CONFIG_PROC_FS */
+
+static void raw_sysctl_init_net(struct net *net)
+{
+#ifdef CONFIG_NET_L3_MASTER_DEV
+	net->ipv4.sysctl_raw_l3mdev_accept = 1;
+#endif
+}
+
+static int __net_init raw_sysctl_init(struct net *net)
+{
+	raw_sysctl_init_net(net);
+	return 0;
+}
+
+static struct pernet_operations __net_initdata raw_sysctl_ops = {
+	.init	= raw_sysctl_init,
+};
+
+void __init raw_init(void)
+{
+	raw_sysctl_init_net(&init_net);
+	if (register_pernet_subsys(&raw_sysctl_ops))
+		panic("RAW: failed to init sysctl parameters.\n");
+}

diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index c200065..e35736b 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/module.h>
 
 #include <linux/inet_diag.h>
@@ -23,9 +24,6 @@
 		return &raw_v6_hashinfo;
 #endif
 	} else {
-		pr_warn_once("Unexpected inet family %d\n",
-			     r->sdiag_family);
-		WARN_ON_ONCE(1);
 		return ERR_PTR(-EINVAL);
 	}
 }

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8501554..621f834 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -55,11 +56,6 @@
  *	Eric Dumazet		:	hashed spinlocks and rt_check_expire() fixes.
  * 	Ilia Sotnikov		:	Ignore TOS on PMTUD and Redirect
  * 	Ilia Sotnikov		:	Removed TOS from hash calculations
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) "IPv4: " fmt
@@ -99,6 +95,7 @@
 #include <net/inetpeer.h>
 #include <net/sock.h>
 #include <net/ip_fib.h>
+#include <net/nexthop.h>
 #include <net/arp.h>
 #include <net/tcp.h>
 #include <net/icmp.h>
@@ -434,37 +431,46 @@
 					   struct sk_buff *skb,
 					   const void *daddr)
 {
+	const struct rtable *rt = container_of(dst, struct rtable, dst);
 	struct net_device *dev = dst->dev;
-	const __be32 *pkey = daddr;
-	const struct rtable *rt;
 	struct neighbour *n;
 
-	rt = (const struct rtable *) dst;
-	if (rt->rt_gateway)
-		pkey = (const __be32 *) &rt->rt_gateway;
-	else if (skb)
-		pkey = &ip_hdr(skb)->daddr;
+	rcu_read_lock_bh();
 
-	n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
-	if (n)
-		return n;
-	return neigh_create(&arp_tbl, pkey, dev);
+	if (likely(rt->rt_gw_family == AF_INET)) {
+		n = ip_neigh_gw4(dev, rt->rt_gw4);
+	} else if (rt->rt_gw_family == AF_INET6) {
+		n = ip_neigh_gw6(dev, &rt->rt_gw6);
+        } else {
+		__be32 pkey;
+
+		pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr);
+		n = ip_neigh_gw4(dev, pkey);
+	}
+
+	if (!IS_ERR(n) && !refcount_inc_not_zero(&n->refcnt))
+		n = NULL;
+
+	rcu_read_unlock_bh();
+
+	return n;
 }
 
 static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
 {
+	const struct rtable *rt = container_of(dst, struct rtable, dst);
 	struct net_device *dev = dst->dev;
 	const __be32 *pkey = daddr;
-	const struct rtable *rt;
 
-	rt = (const struct rtable *)dst;
-	if (rt->rt_gateway)
-		pkey = (const __be32 *)&rt->rt_gateway;
-	else if (!daddr ||
+	if (rt->rt_gw_family == AF_INET) {
+		pkey = (const __be32 *)&rt->rt_gw4;
+	} else if (rt->rt_gw_family == AF_INET6) {
+		return __ipv6_confirm_neigh_stub(dev, &rt->rt_gw6);
+	} else if (!daddr ||
 		 (rt->rt_flags &
-		  (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL)))
+		  (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) {
 		return;
-
+	}
 	__ipv4_confirm_neigh(dev, *(__force u32 *)pkey);
 }
 
@@ -500,15 +506,17 @@
 
 void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
 {
-	static u32 ip_idents_hashrnd __read_mostly;
 	u32 hash, id;
 
-	net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
+	/* Note the following code is not safe, but this is okay. */
+	if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
+		get_random_bytes(&net->ipv4.ip_id_key,
+				 sizeof(net->ipv4.ip_id_key));
 
-	hash = jhash_3words((__force u32)iph->daddr,
+	hash = siphash_3u32((__force u32)iph->daddr,
 			    (__force u32)iph->saddr,
-			    iph->protocol ^ net_hash_mix(net),
-			    ip_idents_hashrnd);
+			    iph->protocol,
+			    &net->ipv4.ip_id_key);
 	id = ip_idents_reserve(hash, segs);
 	iph->id = htons(id);
 }
@@ -627,13 +635,15 @@
 
 	if (fnhe->fnhe_gw) {
 		rt->rt_flags |= RTCF_REDIRECTED;
-		rt->rt_gateway = fnhe->fnhe_gw;
 		rt->rt_uses_gateway = 1;
+		rt->rt_gw_family = AF_INET;
+		rt->rt_gw4 = fnhe->fnhe_gw;
 	}
 }
 
-static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
-				  u32 pmtu, bool lock, unsigned long expires)
+static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
+				  __be32 gw, u32 pmtu, bool lock,
+				  unsigned long expires)
 {
 	struct fnhe_hash_bucket *hash;
 	struct fib_nh_exception *fnhe;
@@ -642,17 +652,17 @@
 	unsigned int i;
 	int depth;
 
-	genid = fnhe_genid(dev_net(nh->nh_dev));
+	genid = fnhe_genid(dev_net(nhc->nhc_dev));
 	hval = fnhe_hashfun(daddr);
 
 	spin_lock_bh(&fnhe_lock);
 
-	hash = rcu_dereference(nh->nh_exceptions);
+	hash = rcu_dereference(nhc->nhc_exceptions);
 	if (!hash) {
 		hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC);
 		if (!hash)
 			goto out_unlock;
-		rcu_assign_pointer(nh->nh_exceptions, hash);
+		rcu_assign_pointer(nhc->nhc_exceptions, hash);
 	}
 
 	hash += hval;
@@ -704,13 +714,13 @@
 		 * stale, so anyone caching it rechecks if this exception
 		 * applies to them.
 		 */
-		rt = rcu_dereference(nh->nh_rth_input);
+		rt = rcu_dereference(nhc->nhc_rth_input);
 		if (rt)
 			rt->dst.obsolete = DST_OBSOLETE_KILL;
 
 		for_each_possible_cpu(i) {
 			struct rtable __rcu **prt;
-			prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
+			prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i);
 			rt = rcu_dereference(*prt);
 			if (rt)
 				rt->dst.obsolete = DST_OBSOLETE_KILL;
@@ -745,7 +755,7 @@
 		return;
 	}
 
-	if (rt->rt_gateway != old_gw)
+	if (rt->rt_gw_family != AF_INET || rt->rt_gw4 != old_gw)
 		return;
 
 	in_dev = __in_dev_get_rcu(dev);
@@ -776,9 +786,9 @@
 			neigh_event_send(n, NULL);
 		} else {
 			if (fib_lookup(net, fl4, &res, 0) == 0) {
-				struct fib_nh *nh = &FIB_RES_NH(res);
+				struct fib_nh_common *nhc = FIB_RES_NHC(res);
 
-				update_or_create_fnhe(nh, fl4->daddr, new_gw,
+				update_or_create_fnhe(nhc, fl4->daddr, new_gw,
 						0, false,
 						jiffies + ip_rt_gc_timeout);
 			}
@@ -887,13 +897,15 @@
 	/* No redirected packets during ip_rt_redirect_silence;
 	 * reset the algorithm.
 	 */
-	if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
+	if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) {
 		peer->rate_tokens = 0;
+		peer->n_redirects = 0;
+	}
 
 	/* Too many ignored redirects; do not send anything
 	 * set dst.rate_last to the last seen redirected packet.
 	 */
-	if (peer->rate_tokens >= ip_rt_redirect_number) {
+	if (peer->n_redirects >= ip_rt_redirect_number) {
 		peer->rate_last = jiffies;
 		goto out_put_peer;
 	}
@@ -904,15 +916,15 @@
 	if (peer->rate_tokens == 0 ||
 	    time_after(jiffies,
 		       (peer->rate_last +
-			(ip_rt_redirect_load << peer->rate_tokens)))) {
+			(ip_rt_redirect_load << peer->n_redirects)))) {
 		__be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
 
 		icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
 		peer->rate_last = jiffies;
-		++peer->rate_tokens;
+		++peer->n_redirects;
 #ifdef CONFIG_IP_ROUTE_VERBOSE
 		if (log_martians &&
-		    peer->rate_tokens == ip_rt_redirect_number)
+		    peer->n_redirects == ip_rt_redirect_number)
 			net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
 					     &ip_hdr(skb)->saddr, inet_iif(skb),
 					     &ip_hdr(skb)->daddr, &gw);
@@ -1022,9 +1034,9 @@
 
 	rcu_read_lock();
 	if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
-		struct fib_nh *nh = &FIB_RES_NH(res);
+		struct fib_nh_common *nhc = FIB_RES_NHC(res);
 
-		update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock,
+		update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
 				      jiffies + ip_rt_mtu_expires);
 	}
 	rcu_read_unlock();
@@ -1041,17 +1053,15 @@
 }
 
 void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
-		      int oif, u32 mark, u8 protocol, int flow_flags)
+		      int oif, u8 protocol)
 {
 	const struct iphdr *iph = (const struct iphdr *) skb->data;
 	struct flowi4 fl4;
 	struct rtable *rt;
-
-	if (!mark)
-		mark = IP4_REPLY_MARK(net, skb->mark);
+	u32 mark = IP4_REPLY_MARK(net, skb->mark);
 
 	__build_flow_key(net, &fl4, NULL, iph, oif,
-			 RT_TOS(iph->tos), protocol, mark, flow_flags);
+			 RT_TOS(iph->tos), protocol, mark, 0);
 	rt = __ip_route_output_key(net, &fl4);
 	if (!IS_ERR(rt)) {
 		__ip_rt_update_pmtu(rt, &fl4, mtu);
@@ -1133,14 +1143,14 @@
 EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
 
 void ipv4_redirect(struct sk_buff *skb, struct net *net,
-		   int oif, u32 mark, u8 protocol, int flow_flags)
+		   int oif, u8 protocol)
 {
 	const struct iphdr *iph = (const struct iphdr *) skb->data;
 	struct flowi4 fl4;
 	struct rtable *rt;
 
 	__build_flow_key(net, &fl4, NULL, iph, oif,
-			 RT_TOS(iph->tos), protocol, mark, flow_flags);
+			 RT_TOS(iph->tos), protocol, 0, 0);
 	rt = __ip_route_output_key(net, &fl4);
 	if (!IS_ERR(rt)) {
 		__ip_do_redirect(rt, skb, &fl4, false);
@@ -1175,18 +1185,46 @@
 	 *
 	 * When a PMTU/redirect information update invalidates a route,
 	 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
-	 * DST_OBSOLETE_DEAD by dst_free().
+	 * DST_OBSOLETE_DEAD.
 	 */
 	if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
 		return NULL;
 	return dst;
 }
 
+static void ipv4_send_dest_unreach(struct sk_buff *skb)
+{
+	struct ip_options opt;
+	int res;
+
+	/* Recompile ip options since IPCB may not be valid anymore.
+	 * Also check we have a reasonable ipv4 header.
+	 */
+	if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) ||
+	    ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5)
+		return;
+
+	memset(&opt, 0, sizeof(opt));
+	if (ip_hdr(skb)->ihl > 5) {
+		if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4))
+			return;
+		opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
+
+		rcu_read_lock();
+		res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
+		rcu_read_unlock();
+
+		if (res)
+			return;
+	}
+	__icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt);
+}
+
 static void ipv4_link_failure(struct sk_buff *skb)
 {
 	struct rtable *rt;
 
-	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+	ipv4_send_dest_unreach(skb);
 
 	rt = skb_rtable(skb);
 	if (rt)
@@ -1220,22 +1258,19 @@
 		src = ip_hdr(skb)->saddr;
 	else {
 		struct fib_result res;
-		struct flowi4 fl4;
-		struct iphdr *iph;
-
-		iph = ip_hdr(skb);
-
-		memset(&fl4, 0, sizeof(fl4));
-		fl4.daddr = iph->daddr;
-		fl4.saddr = iph->saddr;
-		fl4.flowi4_tos = RT_TOS(iph->tos);
-		fl4.flowi4_oif = rt->dst.dev->ifindex;
-		fl4.flowi4_iif = skb->dev->ifindex;
-		fl4.flowi4_mark = skb->mark;
+		struct iphdr *iph = ip_hdr(skb);
+		struct flowi4 fl4 = {
+			.daddr = iph->daddr,
+			.saddr = iph->saddr,
+			.flowi4_tos = RT_TOS(iph->tos),
+			.flowi4_oif = rt->dst.dev->ifindex,
+			.flowi4_iif = skb->dev->ifindex,
+			.flowi4_mark = skb->mark,
+		};
 
 		rcu_read_lock();
 		if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
-			src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
+			src = fib_result_prefsrc(dev_net(rt->dst.dev), &res);
 		else
 			src = inet_select_addr(rt->dst.dev,
 					       rt_nexthop(rt, iph->daddr),
@@ -1287,7 +1322,7 @@
 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
 }
 
-static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
+static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr)
 {
 	struct fnhe_hash_bucket *hash;
 	struct fib_nh_exception *fnhe, __rcu **fnhe_p;
@@ -1295,7 +1330,7 @@
 
 	spin_lock_bh(&fnhe_lock);
 
-	hash = rcu_dereference_protected(nh->nh_exceptions,
+	hash = rcu_dereference_protected(nhc->nhc_exceptions,
 					 lockdep_is_held(&fnhe_lock));
 	hash += hval;
 
@@ -1305,6 +1340,10 @@
 		if (fnhe->fnhe_daddr == daddr) {
 			rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
 				fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
+			/* set fnhe_daddr to 0 to ensure it won't bind with
+			 * new dsts in rt_bind_exception().
+			 */
+			fnhe->fnhe_daddr = 0;
 			fnhe_flush_routes(fnhe);
 			kfree_rcu(fnhe, rcu);
 			break;
@@ -1317,9 +1356,10 @@
 	spin_unlock_bh(&fnhe_lock);
 }
 
-static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
+static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc,
+					       __be32 daddr)
 {
-	struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
+	struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions);
 	struct fib_nh_exception *fnhe;
 	u32 hval;
 
@@ -1333,7 +1373,7 @@
 		if (fnhe->fnhe_daddr == daddr) {
 			if (fnhe->fnhe_expires &&
 			    time_after(jiffies, fnhe->fnhe_expires)) {
-				ip_del_fnhe(nh, daddr);
+				ip_del_fnhe(nhc, daddr);
 				break;
 			}
 			return fnhe;
@@ -1350,9 +1390,9 @@
 
 u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
 {
+	struct fib_nh_common *nhc = res->nhc;
+	struct net_device *dev = nhc->nhc_dev;
 	struct fib_info *fi = res->fi;
-	struct fib_nh *nh = &fi->fib_nh[res->nh_sel];
-	struct net_device *dev = nh->nh_dev;
 	u32 mtu = 0;
 
 	if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
@@ -1362,7 +1402,7 @@
 	if (likely(!mtu)) {
 		struct fib_nh_exception *fnhe;
 
-		fnhe = find_exception(nh, daddr);
+		fnhe = find_exception(nhc, daddr);
 		if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires))
 			mtu = fnhe->fnhe_pmtu;
 	}
@@ -1370,7 +1410,7 @@
 	if (likely(!mtu))
 		mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU);
 
-	return mtu - lwtunnel_headroom(nh->nh_lwtstate, mtu);
+	return mtu - lwtunnel_headroom(nhc->nhc_lwtstate, mtu);
 }
 
 static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
@@ -1401,8 +1441,10 @@
 			orig = NULL;
 		}
 		fill_route_from_fnhe(rt, fnhe);
-		if (!rt->rt_gateway)
-			rt->rt_gateway = daddr;
+		if (!rt->rt_gw4) {
+			rt->rt_gw4 = daddr;
+			rt->rt_gw_family = AF_INET;
+		}
 
 		if (do_cache) {
 			dst_hold(&rt->dst);
@@ -1421,15 +1463,15 @@
 	return ret;
 }
 
-static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
+static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt)
 {
 	struct rtable *orig, *prev, **p;
 	bool ret = true;
 
 	if (rt_is_input_route(rt)) {
-		p = (struct rtable **)&nh->nh_rth_input;
+		p = (struct rtable **)&nhc->nhc_rth_input;
 	} else {
-		p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output);
+		p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
 	}
 	orig = *p;
 
@@ -1440,7 +1482,7 @@
 	prev = cmpxchg(p, orig, rt);
 	if (prev == orig) {
 		if (orig) {
-			dst_dev_put(&orig->dst);
+			rt_add_uncached_list(orig);
 			dst_release(&orig->dst);
 		}
 	} else {
@@ -1482,18 +1524,14 @@
 
 static void ipv4_dst_destroy(struct dst_entry *dst)
 {
-	struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
 	struct rtable *rt = (struct rtable *)dst;
 
-	if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
-		kfree(p);
-
+	ip_dst_metrics_put(dst);
 	rt_del_uncached_list(rt);
 }
 
 void rt_flush_dev(struct net_device *dev)
 {
-	struct net *net = dev_net(dev);
 	struct rtable *rt;
 	int cpu;
 
@@ -1504,7 +1542,7 @@
 		list_for_each_entry(rt, &ul->head, rt_uncached) {
 			if (rt->dst.dev != dev)
 				continue;
-			rt->dst.dev = net->loopback_dev;
+			rt->dst.dev = blackhole_netdev;
 			dev_hold(rt->dst.dev);
 			dev_put(dev);
 		}
@@ -1528,33 +1566,43 @@
 	bool cached = false;
 
 	if (fi) {
-		struct fib_nh *nh = &FIB_RES_NH(*res);
+		struct fib_nh_common *nhc = FIB_RES_NHC(*res);
 
-		if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
-			rt->rt_gateway = nh->nh_gw;
+		if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
 			rt->rt_uses_gateway = 1;
+			rt->rt_gw_family = nhc->nhc_gw_family;
+			/* only INET and INET6 are supported */
+			if (likely(nhc->nhc_gw_family == AF_INET))
+				rt->rt_gw4 = nhc->nhc_gw.ipv4;
+			else
+				rt->rt_gw6 = nhc->nhc_gw.ipv6;
 		}
-		dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
-		if (fi->fib_metrics != &dst_default_metrics) {
-			rt->dst._metrics |= DST_METRICS_REFCOUNTED;
-			refcount_inc(&fi->fib_metrics->refcnt);
-		}
+
+		ip_dst_init_metrics(&rt->dst, fi->fib_metrics);
+
 #ifdef CONFIG_IP_ROUTE_CLASSID
-		rt->dst.tclassid = nh->nh_tclassid;
+		if (nhc->nhc_family == AF_INET) {
+			struct fib_nh *nh;
+
+			nh = container_of(nhc, struct fib_nh, nh_common);
+			rt->dst.tclassid = nh->nh_tclassid;
+		}
 #endif
-		rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
+		rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
 		if (unlikely(fnhe))
 			cached = rt_bind_exception(rt, fnhe, daddr, do_cache);
 		else if (do_cache)
-			cached = rt_cache_route(nh, rt);
+			cached = rt_cache_route(nhc, rt);
 		if (unlikely(!cached)) {
 			/* Routes we intend to cache in nexthop exception or
 			 * FIB nexthop have the DST_NOCACHE bit clear.
 			 * However, if we are unsuccessful at storing this
 			 * route into the cache we really need to set it.
 			 */
-			if (!rt->rt_gateway)
-				rt->rt_gateway = daddr;
+			if (!rt->rt_gw4) {
+				rt->rt_gw_family = AF_INET;
+				rt->rt_gw4 = daddr;
+			}
 			rt_add_uncached_list(rt);
 		}
 	} else
@@ -1587,8 +1635,9 @@
 		rt->rt_iif = 0;
 		rt->rt_pmtu = 0;
 		rt->rt_mtu_locked = 0;
-		rt->rt_gateway = 0;
 		rt->rt_uses_gateway = 0;
+		rt->rt_gw_family = 0;
+		rt->rt_gw4 = 0;
 		INIT_LIST_HEAD(&rt->rt_uncached);
 
 		rt->dst.output = ip_output;
@@ -1600,6 +1649,39 @@
 }
 EXPORT_SYMBOL(rt_dst_alloc);
 
+struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
+{
+	struct rtable *new_rt;
+
+	new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
+			   rt->dst.flags);
+
+	if (new_rt) {
+		new_rt->rt_genid = rt_genid_ipv4(dev_net(dev));
+		new_rt->rt_flags = rt->rt_flags;
+		new_rt->rt_type = rt->rt_type;
+		new_rt->rt_is_input = rt->rt_is_input;
+		new_rt->rt_iif = rt->rt_iif;
+		new_rt->rt_pmtu = rt->rt_pmtu;
+		new_rt->rt_mtu_locked = rt->rt_mtu_locked;
+		new_rt->rt_gw_family = rt->rt_gw_family;
+		if (rt->rt_gw_family == AF_INET)
+			new_rt->rt_gw4 = rt->rt_gw4;
+		else if (rt->rt_gw_family == AF_INET6)
+			new_rt->rt_gw6 = rt->rt_gw6;
+		INIT_LIST_HEAD(&new_rt->rt_uncached);
+
+		new_rt->dst.flags |= DST_HOST;
+		new_rt->dst.input = rt->dst.input;
+		new_rt->dst.output = rt->dst.output;
+		new_rt->dst.error = rt->dst.error;
+		new_rt->dst.lastuse = jiffies;
+		new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate);
+	}
+	return new_rt;
+}
+EXPORT_SYMBOL(rt_dst_clone);
+
 /* called in rcu_read_lock() section */
 int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 			  u8 tos, struct net_device *dev,
@@ -1619,7 +1701,8 @@
 		return -EINVAL;
 
 	if (ipv4_is_zeronet(saddr)) {
-		if (!ipv4_is_local_multicast(daddr))
+		if (!ipv4_is_local_multicast(daddr) &&
+		    ip_hdr(skb)->protocol != IPPROTO_IGMP)
 			return -EINVAL;
 	} else {
 		err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
@@ -1688,7 +1771,7 @@
 			print_hex_dump(KERN_WARNING, "ll header: ",
 				       DUMP_PREFIX_OFFSET, 16, 1,
 				       skb_mac_header(skb),
-				       dev->hard_header_len, true);
+				       dev->hard_header_len, false);
 		}
 	}
 #endif
@@ -1700,6 +1783,8 @@
 			   struct in_device *in_dev,
 			   __be32 daddr, __be32 saddr, u32 tos)
 {
+	struct fib_nh_common *nhc = FIB_RES_NHC(*res);
+	struct net_device *dev = nhc->nhc_dev;
 	struct fib_nh_exception *fnhe;
 	struct rtable *rth;
 	int err;
@@ -1708,7 +1793,7 @@
 	u32 itag = 0;
 
 	/* get a working reference to the output device */
-	out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
+	out_dev = __in_dev_get_rcu(dev);
 	if (!out_dev) {
 		net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
 		return -EINVAL;
@@ -1725,10 +1810,14 @@
 
 	do_cache = res->fi && !itag;
 	if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
-	    skb->protocol == htons(ETH_P_IP) &&
-	    (IN_DEV_SHARED_MEDIA(out_dev) ||
-	     inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
-		IPCB(skb)->flags |= IPSKB_DOREDIRECT;
+	    skb->protocol == htons(ETH_P_IP)) {
+		__be32 gw;
+
+		gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0;
+		if (IN_DEV_SHARED_MEDIA(out_dev) ||
+		    inet_addr_onlink(out_dev, saddr, gw))
+			IPCB(skb)->flags |= IPSKB_DOREDIRECT;
+	}
 
 	if (skb->protocol != htons(ETH_P_IP)) {
 		/* Not IP (i.e. ARP). Do not create route, if it is
@@ -1745,12 +1834,12 @@
 		}
 	}
 
-	fnhe = find_exception(&FIB_RES_NH(*res), daddr);
+	fnhe = find_exception(nhc, daddr);
 	if (do_cache) {
 		if (fnhe)
 			rth = rcu_dereference(fnhe->fnhe_rth_input);
 		else
-			rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+			rth = rcu_dereference(nhc->nhc_rth_input);
 		if (rt_cache_valid(rth)) {
 			skb_dst_set_noref(skb, &rth->dst);
 			goto out;
@@ -1827,6 +1916,7 @@
 int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 		       const struct sk_buff *skb, struct flow_keys *flkeys)
 {
+	u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0;
 	struct flow_keys hash_keys;
 	u32 mhash;
 
@@ -1874,9 +1964,42 @@
 			hash_keys.basic.ip_proto = fl4->flowi4_proto;
 		}
 		break;
+	case 2:
+		memset(&hash_keys, 0, sizeof(hash_keys));
+		/* skb is currently provided only when forwarding */
+		if (skb) {
+			struct flow_keys keys;
+
+			skb_flow_dissect_flow_keys(skb, &keys, 0);
+			/* Inner can be v4 or v6 */
+			if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+				hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+				hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
+			} else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+				hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
+				hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
+				hash_keys.tags.flow_label = keys.tags.flow_label;
+				hash_keys.basic.ip_proto = keys.basic.ip_proto;
+			} else {
+				/* Same as case 0 */
+				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+				ip_multipath_l3_keys(skb, &hash_keys);
+			}
+		} else {
+			/* Same as case 0 */
+			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+			hash_keys.addrs.v4addrs.src = fl4->saddr;
+			hash_keys.addrs.v4addrs.dst = fl4->daddr;
+		}
+		break;
 	}
 	mhash = flow_hash_from_keys(&hash_keys);
 
+	if (multipath_hash)
+		mhash = jhash_2words(mhash, multipath_hash, 0);
+
 	return mhash >> 1;
 }
 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
@@ -1888,7 +2011,7 @@
 			    struct flow_keys *hkeys)
 {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-	if (res->fi && res->fi->fib_nhs > 1) {
+	if (res->fi && fib_info_num_path(res->fi) > 1) {
 		int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
 
 		fib_select_multipath(res, h);
@@ -1923,7 +2046,7 @@
 	u32		itag = 0;
 	struct rtable	*rth;
 	struct flowi4	fl4;
-	bool do_cache;
+	bool do_cache = true;
 
 	/* IP on this device is disabled. */
 
@@ -2000,6 +2123,9 @@
 	if (res->type == RTN_BROADCAST) {
 		if (IN_DEV_BFORWARD(in_dev))
 			goto make_route;
+		/* not do cache if bc_forwarding is enabled */
+		if (IPV4_DEVCONF_ALL(net, BC_FORWARDING))
+			do_cache = false;
 		goto brd_input;
 	}
 
@@ -2037,16 +2163,15 @@
 	RT_CACHE_STAT_INC(in_brd);
 
 local_input:
-	do_cache = false;
-	if (res->fi) {
-		if (!itag) {
-			rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
-			if (rt_cache_valid(rth)) {
-				skb_dst_set_noref(skb, &rth->dst);
-				err = 0;
-				goto out;
-			}
-			do_cache = true;
+	do_cache &= res->fi && !itag;
+	if (do_cache) {
+		struct fib_nh_common *nhc = FIB_RES_NHC(*res);
+
+		rth = rcu_dereference(nhc->nhc_rth_input);
+		if (rt_cache_valid(rth)) {
+			skb_dst_set_noref(skb, &rth->dst);
+			err = 0;
+			goto out;
 		}
 	}
 
@@ -2070,16 +2195,16 @@
 	}
 
 	if (do_cache) {
-		struct fib_nh *nh = &FIB_RES_NH(*res);
+		struct fib_nh_common *nhc = FIB_RES_NHC(*res);
 
-		rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
+		rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
 		if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
 			WARN_ON(rth->dst.input == lwtunnel_input);
 			rth->dst.lwtstate->orig_input = rth->dst.input;
 			rth->dst.input = lwtunnel_input;
 		}
 
-		if (unlikely(!rt_cache_route(nh, rth)))
+		if (unlikely(!rt_cache_route(nhc, rth)))
 			rt_add_uncached_list(rth);
 	}
 	skb_dst_set(skb, &rth->dst);
@@ -2152,12 +2277,13 @@
 		int our = 0;
 		int err = -EINVAL;
 
-		if (in_dev)
-			our = ip_check_mc_rcu(in_dev, daddr, saddr,
-					      ip_hdr(skb)->protocol);
+		if (!in_dev)
+			return err;
+		our = ip_check_mc_rcu(in_dev, daddr, saddr,
+				      ip_hdr(skb)->protocol);
 
 		/* check l3 master if no match yet */
-		if ((!in_dev || !our) && netif_is_l3_slave(dev)) {
+		if (!our && netif_is_l3_slave(dev)) {
 			struct in_device *l3_in_dev;
 
 			l3_in_dev = __in_dev_get_rcu(skb->dev);
@@ -2249,10 +2375,10 @@
 	fnhe = NULL;
 	do_cache &= fi != NULL;
 	if (fi) {
+		struct fib_nh_common *nhc = FIB_RES_NHC(*res);
 		struct rtable __rcu **prth;
-		struct fib_nh *nh = &FIB_RES_NH(*res);
 
-		fnhe = find_exception(nh, fl4->daddr);
+		fnhe = find_exception(nhc, fl4->daddr);
 		if (!do_cache)
 			goto add;
 		if (fnhe) {
@@ -2260,12 +2386,12 @@
 		} else {
 			if (unlikely(fl4->flowi4_flags &
 				     FLOWI_FLAG_KNOWN_NH &&
-				     !(nh->nh_gw &&
-				       nh->nh_scope == RT_SCOPE_LINK))) {
+				     !(nhc->nhc_gw_family &&
+				       nhc->nhc_scope == RT_SCOPE_LINK))) {
 				do_cache = false;
 				goto add;
 			}
-			prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
+			prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
 		}
 		rth = rcu_dereference(*prth);
 		if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
@@ -2344,14 +2470,17 @@
 	int orig_oif = fl4->flowi4_oif;
 	unsigned int flags = 0;
 	struct rtable *rth;
-	int err = -ENETUNREACH;
+	int err;
 
 	if (fl4->saddr) {
-		rth = ERR_PTR(-EINVAL);
 		if (ipv4_is_multicast(fl4->saddr) ||
 		    ipv4_is_lbcast(fl4->saddr) ||
-		    ipv4_is_zeronet(fl4->saddr))
+		    ipv4_is_zeronet(fl4->saddr)) {
+			rth = ERR_PTR(-EINVAL);
 			goto out;
+		}
+
+		rth = ERR_PTR(-ENETUNREACH);
 
 		/* I removed check for oif == dev_out->oif here.
 		   It was wrong for two reasons:
@@ -2570,8 +2699,12 @@
 		rt->rt_genid = rt_genid_ipv4(net);
 		rt->rt_flags = ort->rt_flags;
 		rt->rt_type = ort->rt_type;
-		rt->rt_gateway = ort->rt_gateway;
 		rt->rt_uses_gateway = ort->rt_uses_gateway;
+		rt->rt_gw_family = ort->rt_gw_family;
+		if (rt->rt_gw_family == AF_INET)
+			rt->rt_gw4 = ort->rt_gw4;
+		else if (rt->rt_gw_family == AF_INET6)
+			rt->rt_gw6 = ort->rt_gw6;
 
 		INIT_LIST_HEAD(&rt->rt_uncached);
 	}
@@ -2601,7 +2734,8 @@
 /* called with rcu_read_lock held */
 static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
 			struct rtable *rt, u32 table_id, struct flowi4 *fl4,
-			struct sk_buff *skb, u32 portid, u32 seq)
+			struct sk_buff *skb, u32 portid, u32 seq,
+			unsigned int flags)
 {
 	struct rtmsg *r;
 	struct nlmsghdr *nlh;
@@ -2609,7 +2743,7 @@
 	u32 error;
 	u32 metrics[RTAX_MAX];
 
-	nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0);
+	nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), flags);
 	if (!nlh)
 		return -EMSGSIZE;
 
@@ -2617,7 +2751,7 @@
 	r->rtm_family	 = AF_INET;
 	r->rtm_dst_len	= 32;
 	r->rtm_src_len	= 0;
-	r->rtm_tos	= fl4->flowi4_tos;
+	r->rtm_tos	= fl4 ? fl4->flowi4_tos : 0;
 	r->rtm_table	= table_id < 256 ? table_id : RT_TABLE_COMPAT;
 	if (nla_put_u32(skb, RTA_TABLE, table_id))
 		goto nla_put_failure;
@@ -2645,14 +2779,29 @@
 	    nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
 		goto nla_put_failure;
 #endif
-	if (!rt_is_input_route(rt) &&
+	if (fl4 && !rt_is_input_route(rt) &&
 	    fl4->saddr != src) {
 		if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
 			goto nla_put_failure;
 	}
-	if (rt->rt_uses_gateway &&
-	    nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gateway))
-		goto nla_put_failure;
+	if (rt->rt_uses_gateway) {
+		if (rt->rt_gw_family == AF_INET &&
+		    nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
+			goto nla_put_failure;
+		} else if (rt->rt_gw_family == AF_INET6) {
+			int alen = sizeof(struct in6_addr);
+			struct nlattr *nla;
+			struct rtvia *via;
+
+			nla = nla_reserve(skb, RTA_VIA, alen + 2);
+			if (!nla)
+				goto nla_put_failure;
+
+			via = nla_data(nla);
+			via->rtvia_family = AF_INET6;
+			memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
+		}
+	}
 
 	expires = rt->dst.expires;
 	if (expires) {
@@ -2672,36 +2821,40 @@
 	if (rtnetlink_put_metrics(skb, metrics) < 0)
 		goto nla_put_failure;
 
-	if (fl4->flowi4_mark &&
-	    nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
-		goto nla_put_failure;
+	if (fl4) {
+		if (fl4->flowi4_mark &&
+		    nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
+			goto nla_put_failure;
 
-	if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
-	    nla_put_u32(skb, RTA_UID,
-			from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
-		goto nla_put_failure;
+		if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
+		    nla_put_u32(skb, RTA_UID,
+				from_kuid_munged(current_user_ns(),
+						 fl4->flowi4_uid)))
+			goto nla_put_failure;
+
+		if (rt_is_input_route(rt)) {
+#ifdef CONFIG_IP_MROUTE
+			if (ipv4_is_multicast(dst) &&
+			    !ipv4_is_local_multicast(dst) &&
+			    IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
+				int err = ipmr_get_route(net, skb,
+							 fl4->saddr, fl4->daddr,
+							 r, portid);
+
+				if (err <= 0) {
+					if (err == 0)
+						return 0;
+					goto nla_put_failure;
+				}
+			} else
+#endif
+				if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif))
+					goto nla_put_failure;
+		}
+	}
 
 	error = rt->dst.error;
 
-	if (rt_is_input_route(rt)) {
-#ifdef CONFIG_IP_MROUTE
-		if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
-		    IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
-			int err = ipmr_get_route(net, skb,
-						 fl4->saddr, fl4->daddr,
-						 r, portid);
-
-			if (err <= 0) {
-				if (err == 0)
-					return 0;
-				goto nla_put_failure;
-			}
-		} else
-#endif
-			if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif))
-				goto nla_put_failure;
-	}
-
 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
 		goto nla_put_failure;
 
@@ -2713,6 +2866,81 @@
 	return -EMSGSIZE;
 }
 
+static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb,
+			    struct netlink_callback *cb, u32 table_id,
+			    struct fnhe_hash_bucket *bucket, int genid,
+			    int *fa_index, int fa_start, unsigned int flags)
+{
+	int i;
+
+	for (i = 0; i < FNHE_HASH_SIZE; i++) {
+		struct fib_nh_exception *fnhe;
+
+		for (fnhe = rcu_dereference(bucket[i].chain); fnhe;
+		     fnhe = rcu_dereference(fnhe->fnhe_next)) {
+			struct rtable *rt;
+			int err;
+
+			if (*fa_index < fa_start)
+				goto next;
+
+			if (fnhe->fnhe_genid != genid)
+				goto next;
+
+			if (fnhe->fnhe_expires &&
+			    time_after(jiffies, fnhe->fnhe_expires))
+				goto next;
+
+			rt = rcu_dereference(fnhe->fnhe_rth_input);
+			if (!rt)
+				rt = rcu_dereference(fnhe->fnhe_rth_output);
+			if (!rt)
+				goto next;
+
+			err = rt_fill_info(net, fnhe->fnhe_daddr, 0, rt,
+					   table_id, NULL, skb,
+					   NETLINK_CB(cb->skb).portid,
+					   cb->nlh->nlmsg_seq, flags);
+			if (err)
+				return err;
+next:
+			(*fa_index)++;
+		}
+	}
+
+	return 0;
+}
+
+int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb,
+		       u32 table_id, struct fib_info *fi,
+		       int *fa_index, int fa_start, unsigned int flags)
+{
+	struct net *net = sock_net(cb->skb->sk);
+	int nhsel, genid = fnhe_genid(net);
+
+	for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
+		struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel);
+		struct fnhe_hash_bucket *bucket;
+		int err;
+
+		if (nhc->nhc_flags & RTNH_F_DEAD)
+			continue;
+
+		rcu_read_lock();
+		bucket = rcu_dereference(nhc->nhc_exceptions);
+		err = 0;
+		if (bucket)
+			err = fnhe_dump_bucket(net, skb, cb, table_id, bucket,
+					       genid, fa_index, fa_start,
+					       flags);
+		rcu_read_unlock();
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
 						   u8 ip_proto, __be16 sport,
 						   __be16 dport)
@@ -2774,6 +3002,75 @@
 	return skb;
 }
 
+static int inet_rtm_valid_getroute_req(struct sk_buff *skb,
+				       const struct nlmsghdr *nlh,
+				       struct nlattr **tb,
+				       struct netlink_ext_ack *extack)
+{
+	struct rtmsg *rtm;
+	int i, err;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
+		NL_SET_ERR_MSG(extack,
+			       "ipv4: Invalid header for route get request");
+		return -EINVAL;
+	}
+
+	if (!netlink_strict_get_check(skb))
+		return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
+					      rtm_ipv4_policy, extack);
+
+	rtm = nlmsg_data(nlh);
+	if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) ||
+	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) ||
+	    rtm->rtm_table || rtm->rtm_protocol ||
+	    rtm->rtm_scope || rtm->rtm_type) {
+		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for route get request");
+		return -EINVAL;
+	}
+
+	if (rtm->rtm_flags & ~(RTM_F_NOTIFY |
+			       RTM_F_LOOKUP_TABLE |
+			       RTM_F_FIB_MATCH)) {
+		NL_SET_ERR_MSG(extack, "ipv4: Unsupported rtm_flags for route get request");
+		return -EINVAL;
+	}
+
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
+					    rtm_ipv4_policy, extack);
+	if (err)
+		return err;
+
+	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
+	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
+		NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4");
+		return -EINVAL;
+	}
+
+	for (i = 0; i <= RTA_MAX; i++) {
+		if (!tb[i])
+			continue;
+
+		switch (i) {
+		case RTA_IIF:
+		case RTA_OIF:
+		case RTA_SRC:
+		case RTA_DST:
+		case RTA_IP_PROTO:
+		case RTA_SPORT:
+		case RTA_DPORT:
+		case RTA_MARK:
+		case RTA_UID:
+			break;
+		default:
+			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in route get request");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 			     struct netlink_ext_ack *extack)
 {
@@ -2786,7 +3083,7 @@
 	struct rtable *rt = NULL;
 	struct sk_buff *skb;
 	struct rtmsg *rtm;
-	struct flowi4 fl4;
+	struct flowi4 fl4 = {};
 	__be32 dst = 0;
 	__be32 src = 0;
 	kuid_t uid;
@@ -2794,8 +3091,7 @@
 	int err;
 	int mark;
 
-	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy,
-			  extack);
+	err = inet_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
 	if (err < 0)
 		return err;
 
@@ -2811,7 +3107,7 @@
 
 	if (tb[RTA_IP_PROTO]) {
 		err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
-						  &ip_proto, extack);
+						  &ip_proto, AF_INET, extack);
 		if (err)
 			return err;
 	}
@@ -2826,7 +3122,6 @@
 	if (!skb)
 		return -ENOBUFS;
 
-	memset(&fl4, 0, sizeof(fl4));
 	fl4.daddr = dst;
 	fl4.saddr = src;
 	fl4.flowi4_tos = rtm->rtm_tos;
@@ -2861,6 +3156,7 @@
 			err = -rt->dst.error;
 	} else {
 		fl4.flowi4_iif = LOOPBACK_IFINDEX;
+		skb->dev = net->loopback_dev;
 		rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
 		err = 0;
 		if (IS_ERR(rt))
@@ -2897,7 +3193,8 @@
 				    fl4.flowi4_tos, res.fi, 0);
 	} else {
 		err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb,
-				   NETLINK_CB(in_skb).portid, nlh->nlmsg_seq);
+				   NETLINK_CB(in_skb).portid,
+				   nlh->nlmsg_seq, 0);
 	}
 	if (err < 0)
 		goto errout_rcu;
@@ -3052,9 +3349,11 @@
 	{ }
 };
 
+static const char ipv4_route_flush_procname[] = "flush";
+
 static struct ctl_table ipv4_route_flush_table[] = {
 	{
-		.procname	= "flush",
+		.procname	= ipv4_route_flush_procname,
 		.maxlen		= sizeof(int),
 		.mode		= 0200,
 		.proc_handler	= ipv4_sysctl_rtcache_flush,
@@ -3072,9 +3371,11 @@
 		if (!tbl)
 			goto err_dup;
 
-		/* Don't export sysctls to unprivileged users */
-		if (net->user_ns != &init_user_ns)
-			tbl[0].procname = NULL;
+		/* Don't export non-whitelisted sysctls to unprivileged users */
+		if (net->user_ns != &init_user_ns) {
+			if (tbl[0].procname != ipv4_route_flush_procname)
+				tbl[0].procname = NULL;
+		}
 	}
 	tbl[0].extra1 = net;
 

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index c3387df..535b693 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  Syncookies implementation for the Linux kernel
  *
  *  Copyright (C) 1997 Andi Kleen
  *  Based on ideas by D.J.Bernstein and Eric Schenk.
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 #include <linux/tcp.h>
@@ -88,7 +84,7 @@
 		ts <<= TSBITS;
 		ts |= options;
 	}
-	return (u64)ts * (USEC_PER_SEC / TCP_TS_HZ);
+	return (u64)ts * (NSEC_PER_SEC / TCP_TS_HZ);
 }
 
 
@@ -216,11 +212,15 @@
 		refcount_set(&req->rsk_refcnt, 1);
 		tcp_sk(child)->tsoffset = tsoff;
 		sock_rps_save_rxhash(child, skb);
-		inet_csk_reqsk_queue_add(sk, req, child);
-	} else {
-		reqsk_free(req);
+		if (inet_csk_reqsk_queue_add(sk, req, child))
+			return child;
+
+		bh_unlock_sock(child);
+		sock_put(child);
 	}
-	return child;
+	__reqsk_free(req);
+
+	return NULL;
 }
 EXPORT_SYMBOL(tcp_get_cookie_sock);
 

diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 891ed2f..0902cb3 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c

@@ -28,8 +28,6 @@
 #include <net/protocol.h>
 #include <net/netevent.h>
 
-static int zero;
-static int one = 1;
 static int two = 2;
 static int four = 4;
 static int thousand = 1000;
@@ -39,6 +37,8 @@
 static int ip_local_port_range_max[] = { 65535, 65535 };
 static int tcp_adv_win_scale_min = -31;
 static int tcp_adv_win_scale_max = 31;
+static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS;
+static int tcp_min_snd_mss_max = 65535;
 static int ip_privileged_port_min;
 static int ip_privileged_port_max = 65535;
 static int ip_ttl_min = 1;
@@ -49,6 +49,7 @@
 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
 static int comp_sack_nr_max = 255;
 static u32 u32_max_div_HZ = UINT_MAX / HZ;
+static int one_day_secs = 24 * 3600;
 
 /* obsolete */
 static int sysctl_tcp_low_latency __read_mostly;
@@ -276,55 +277,96 @@
 	return ret;
 }
 
+static int sscanf_key(char *buf, __le32 *key)
+{
+	u32 user_key[4];
+	int i, ret = 0;
+
+	if (sscanf(buf, "%x-%x-%x-%x", user_key, user_key + 1,
+		   user_key + 2, user_key + 3) != 4) {
+		ret = -EINVAL;
+	} else {
+		for (i = 0; i < ARRAY_SIZE(user_key); i++)
+			key[i] = cpu_to_le32(user_key[i]);
+	}
+	pr_debug("proc TFO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
+		 user_key[0], user_key[1], user_key[2], user_key[3], buf, ret);
+
+	return ret;
+}
+
 static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
 				 void __user *buffer, size_t *lenp,
 				 loff_t *ppos)
 {
 	struct net *net = container_of(table->data, struct net,
 	    ipv4.sysctl_tcp_fastopen);
-	struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
-	struct tcp_fastopen_context *ctxt;
-	u32  user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */
-	__le32 key[4];
-	int ret, i;
+	/* maxlen to print the list of keys in hex (*2), with dashes
+	 * separating doublewords and a comma in between keys.
+	 */
+	struct ctl_table tbl = { .maxlen = ((TCP_FASTOPEN_KEY_LENGTH *
+					    2 * TCP_FASTOPEN_KEY_MAX) +
+					    (TCP_FASTOPEN_KEY_MAX * 5)) };
+	struct tcp_fastopen_context *ctx;
+	u32 user_key[TCP_FASTOPEN_KEY_MAX * 4];
+	__le32 key[TCP_FASTOPEN_KEY_MAX * 4];
+	char *backup_data;
+	int ret, i = 0, off = 0, n_keys = 0;
 
 	tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
 	if (!tbl.data)
 		return -ENOMEM;
 
 	rcu_read_lock();
-	ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
-	if (ctxt)
-		memcpy(key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
-	else
-		memset(key, 0, sizeof(key));
+	ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
+	if (ctx) {
+		n_keys = tcp_fastopen_context_len(ctx);
+		memcpy(&key[0], &ctx->key[0], TCP_FASTOPEN_KEY_LENGTH * n_keys);
+	}
 	rcu_read_unlock();
 
-	for (i = 0; i < ARRAY_SIZE(key); i++)
+	if (!n_keys) {
+		memset(&key[0], 0, TCP_FASTOPEN_KEY_LENGTH);
+		n_keys = 1;
+	}
+
+	for (i = 0; i < n_keys * 4; i++)
 		user_key[i] = le32_to_cpu(key[i]);
 
-	snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
-		user_key[0], user_key[1], user_key[2], user_key[3]);
+	for (i = 0; i < n_keys; i++) {
+		off += snprintf(tbl.data + off, tbl.maxlen - off,
+				"%08x-%08x-%08x-%08x",
+				user_key[i * 4],
+				user_key[i * 4 + 1],
+				user_key[i * 4 + 2],
+				user_key[i * 4 + 3]);
+		if (i + 1 < n_keys)
+			off += snprintf(tbl.data + off, tbl.maxlen - off, ",");
+	}
+
 	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
 
 	if (write && ret == 0) {
-		if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1,
-			   user_key + 2, user_key + 3) != 4) {
+		backup_data = strchr(tbl.data, ',');
+		if (backup_data) {
+			*backup_data = '\0';
+			backup_data++;
+		}
+		if (sscanf_key(tbl.data, key)) {
 			ret = -EINVAL;
 			goto bad_key;
 		}
-
-		for (i = 0; i < ARRAY_SIZE(user_key); i++)
-			key[i] = cpu_to_le32(user_key[i]);
-
+		if (backup_data) {
+			if (sscanf_key(backup_data, key + 4)) {
+				ret = -EINVAL;
+				goto bad_key;
+			}
+		}
 		tcp_fastopen_reset_cipher(net, NULL, key,
-					  TCP_FASTOPEN_KEY_LENGTH);
+					  backup_data ? key + 4 : NULL);
 	}
 
 bad_key:
-	pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
-		user_key[0], user_key[1], user_key[2], user_key[3],
-	       (char *)tbl.data, ret);
 	kfree(tbl.data);
 	return ret;
 }
@@ -532,7 +574,7 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 	},
 	{
 		.procname	= "icmp_msgs_burst",
@@ -540,7 +582,7 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 	},
 	{
 		.procname	= "udp_mem",
@@ -549,6 +591,27 @@
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
 	},
+	{
+		.procname	= "fib_sync_mem",
+		.data		= &sysctl_fib_sync_mem,
+		.maxlen		= sizeof(sysctl_fib_sync_mem),
+		.mode		= 0644,
+		.proc_handler	= proc_douintvec_minmax,
+		.extra1		= &sysctl_fib_sync_mem_min,
+		.extra2		= &sysctl_fib_sync_mem_max,
+	},
+	{
+		.procname	= "tcp_rx_skb_cache",
+		.data		= &tcp_rx_skb_cache_key.key,
+		.mode		= 0644,
+		.proc_handler	= proc_do_static_key,
+	},
+	{
+		.procname	= "tcp_tx_skb_cache",
+		.data		= &tcp_tx_skb_cache_key.key,
+		.mode		= 0644,
+		.proc_handler	= proc_do_static_key,
+	},
 	{ }
 };
 
@@ -602,6 +665,17 @@
 		.mode		= 0644,
 		.proc_handler	= ipv4_ping_group_range,
 	},
+#ifdef CONFIG_NET_L3_MASTER_DEV
+	{
+		.procname	= "raw_l3mdev_accept",
+		.data		= &init_net.ipv4.sysctl_raw_l3mdev_accept,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
 	{
 		.procname	= "tcp_ecn",
 		.data		= &init_net.ipv4.sysctl_tcp_ecn,
@@ -687,8 +761,8 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler   = ipv4_fwd_update_priority,
-		.extra1		= &zero,
-		.extra2		= &one,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
 	},
 	{
 		.procname	= "ip_nonlocal_bind",
@@ -718,8 +792,8 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &one,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
 	},
 #endif
 	{
@@ -737,6 +811,24 @@
 		.proc_handler	= proc_dointvec,
 	},
 	{
+		.procname	= "tcp_min_snd_mss",
+		.data		= &init_net.ipv4.sysctl_tcp_min_snd_mss,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &tcp_min_snd_mss_min,
+		.extra2		= &tcp_min_snd_mss_max,
+	},
+	{
+		.procname	= "tcp_mtu_probe_floor",
+		.data		= &init_net.ipv4.sysctl_tcp_mtu_probe_floor,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &tcp_min_snd_mss_min,
+		.extra2		= &tcp_min_snd_mss_max,
+	},
+	{
 		.procname	= "tcp_probe_threshold",
 		.data		= &init_net.ipv4.sysctl_tcp_probe_threshold,
 		.maxlen		= sizeof(int),
@@ -779,7 +871,7 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one
+		.extra1		= SYSCTL_ONE
 	},
 #endif
 	{
@@ -884,7 +976,7 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 		.extra2		= &two,
 	},
 	{
@@ -912,7 +1004,12 @@
 		.procname	= "tcp_fastopen_key",
 		.mode		= 0600,
 		.data		= &init_net.ipv4.sysctl_tcp_fastopen,
-		.maxlen		= ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
+		/* maxlen to print the list of keys in hex (*2), with dashes
+		 * separating doublewords and a comma in between keys.
+		 */
+		.maxlen		= ((TCP_FASTOPEN_KEY_LENGTH *
+				   2 * TCP_FASTOPEN_KEY_MAX) +
+				   (TCP_FASTOPEN_KEY_MAX * 5)),
 		.proc_handler	= proc_tcp_fastopen_key,
 	},
 	{
@@ -921,7 +1018,7 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_tfo_blackhole_detect_timeout,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 	},
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	{
@@ -930,8 +1027,8 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &one,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
 	},
 	{
 		.procname	= "fib_multipath_hash_policy",
@@ -939,8 +1036,8 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_fib_multipath_hash_policy,
-		.extra1		= &zero,
-		.extra2		= &one,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &two,
 	},
 #endif
 	{
@@ -957,8 +1054,8 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &one,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
 	},
 #endif
 	{
@@ -988,7 +1085,7 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 		.extra2		= &four,
 	},
 	{
@@ -1132,7 +1229,7 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
+		.extra1		= SYSCTL_ONE,
 		.extra2		= &gso_max_segs,
 	},
 	{
@@ -1140,7 +1237,9 @@
 		.data		= &init_net.ipv4.sysctl_tcp_min_rtt_wlen,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &one_day_secs
 	},
 	{
 		.procname	= "tcp_autocorking",
@@ -1148,8 +1247,8 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &one,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
 	},
 	{
 		.procname	= "tcp_invalid_ratelimit",
@@ -1164,7 +1263,7 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 		.extra2		= &thousand,
 	},
 	{
@@ -1173,7 +1272,7 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 		.extra2		= &thousand,
 	},
 	{
@@ -1182,7 +1281,7 @@
 		.maxlen		= sizeof(init_net.ipv4.sysctl_tcp_wmem),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
+		.extra1		= SYSCTL_ONE,
 	},
 	{
 		.procname	= "tcp_rmem",
@@ -1190,7 +1289,7 @@
 		.maxlen		= sizeof(init_net.ipv4.sysctl_tcp_rmem),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
+		.extra1		= SYSCTL_ONE,
 	},
 	{
 		.procname	= "tcp_comp_sack_delay_ns",
@@ -1205,7 +1304,7 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 		.extra2		= &comp_sack_nr_max,
 	},
 	{
@@ -1214,7 +1313,7 @@
 		.maxlen		= sizeof(init_net.ipv4.sysctl_udp_rmem_min),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one
+		.extra1		= SYSCTL_ONE
 	},
 	{
 		.procname	= "udp_wmem_min",
@@ -1222,7 +1321,7 @@
 		.maxlen		= sizeof(init_net.ipv4.sysctl_udp_wmem_min),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one
+		.extra1		= SYSCTL_ONE
 	},
 	{ }
 };

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a32a0f4..d8876f0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -205,11 +206,6 @@
  *	Hirokazu Takahashi	:	Use copy_from_user() instead of
  *					csum_and_copy_from_user() if possible.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or(at your option) any later version.
- *
  * Description of States:
  *
  *	TCP_SYN_SENT		sent a connection request, waiting for ack
@@ -262,7 +258,7 @@
 #include <linux/net.h>
 #include <linux/socket.h>
 #include <linux/random.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/highmem.h>
 #include <linux/swap.h>
 #include <linux/cache.h>
@@ -321,11 +317,16 @@
 unsigned long tcp_memory_pressure __read_mostly;
 EXPORT_SYMBOL_GPL(tcp_memory_pressure);
 
+DEFINE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key);
+EXPORT_SYMBOL(tcp_rx_skb_cache_key);
+
+DEFINE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key);
+
 void tcp_enter_memory_pressure(struct sock *sk)
 {
 	unsigned long val;
 
-	if (tcp_memory_pressure)
+	if (READ_ONCE(tcp_memory_pressure))
 		return;
 	val = jiffies;
 
@@ -340,7 +341,7 @@
 {
 	unsigned long val;
 
-	if (!tcp_memory_pressure)
+	if (!READ_ONCE(tcp_memory_pressure))
 		return;
 	val = xchg(&tcp_memory_pressure, 0);
 	if (val)
@@ -449,26 +450,14 @@
 
 	icsk->icsk_sync_mss = tcp_sync_mss;
 
-	sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
-	sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
+	WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
+	WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
 
 	sk_sockets_allocated_inc(sk);
 	sk->sk_route_forced_caps = NETIF_F_GSO;
 }
 EXPORT_SYMBOL(tcp_init_sock);
 
-void tcp_init_transfer(struct sock *sk, int bpf_op)
-{
-	struct inet_connection_sock *icsk = inet_csk(sk);
-
-	tcp_mtup_init(sk);
-	icsk->icsk_af_ops->rebuild_header(sk);
-	tcp_init_metrics(sk);
-	tcp_call_bpf(sk, bpf_op, 0, NULL);
-	tcp_init_congestion_control(sk);
-	tcp_init_buffer_space(sk);
-}
-
 static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
 {
 	struct sk_buff *skb = tcp_write_queue_tail(sk);
@@ -488,7 +477,7 @@
 static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
 					  int target, struct sock *sk)
 {
-	return (tp->rcv_nxt - tp->copied_seq >= target) ||
+	return (READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq) >= target) ||
 		(sk->sk_prot->stream_memory_read ?
 		sk->sk_prot->stream_memory_read(sk) : false);
 }
@@ -554,10 +543,10 @@
 
 	/* Connected or passive Fast Open socket? */
 	if (state != TCP_SYN_SENT &&
-	    (state != TCP_SYN_RECV || tp->fastopen_rsk)) {
+	    (state != TCP_SYN_RECV || rcu_access_pointer(tp->fastopen_rsk))) {
 		int target = sock_rcvlowat(sk, 0, INT_MAX);
 
-		if (tp->urg_seq == tp->copied_seq &&
+		if (READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq) &&
 		    !sock_flag(sk, SOCK_URGINLINE) &&
 		    tp->urg_data)
 			target++;
@@ -595,7 +584,7 @@
 	}
 	/* This barrier is coupled with smp_wmb() in tcp_reset() */
 	smp_rmb();
-	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+	if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
 		mask |= EPOLLERR;
 
 	return mask;
@@ -618,7 +607,8 @@
 		unlock_sock_fast(sk, slow);
 		break;
 	case SIOCATMARK:
-		answ = tp->urg_data && tp->urg_seq == tp->copied_seq;
+		answ = tp->urg_data &&
+		       READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq);
 		break;
 	case SIOCOUTQ:
 		if (sk->sk_state == TCP_LISTEN)
@@ -627,7 +617,7 @@
 		if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
 			answ = 0;
 		else
-			answ = tp->write_seq - tp->snd_una;
+			answ = READ_ONCE(tp->write_seq) - tp->snd_una;
 		break;
 	case SIOCOUTQNSD:
 		if (sk->sk_state == TCP_LISTEN)
@@ -636,7 +626,8 @@
 		if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
 			answ = 0;
 		else
-			answ = tp->write_seq - tp->snd_nxt;
+			answ = READ_ONCE(tp->write_seq) -
+			       READ_ONCE(tp->snd_nxt);
 		break;
 	default:
 		return -ENOIOCTLCMD;
@@ -668,7 +659,7 @@
 	tcb->sacked  = 0;
 	__skb_header_release(skb);
 	tcp_add_write_queue_tail(sk, skb);
-	sk->sk_wmem_queued += skb->truesize;
+	sk_wmem_queued_add(sk, skb->truesize);
 	sk_mem_charge(sk, skb->truesize);
 	if (tp->nonagle & TCP_NAGLE_PUSH)
 		tp->nonagle &= ~TCP_NAGLE_PUSH;
@@ -865,6 +856,18 @@
 {
 	struct sk_buff *skb;
 
+	if (likely(!size)) {
+		skb = sk->sk_tx_skb_cache;
+		if (skb) {
+			skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
+			sk->sk_tx_skb_cache = NULL;
+			pskb_trim(skb, 0);
+			INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
+			skb_shinfo(skb)->tx_flags = 0;
+			memset(TCP_SKB_CB(skb), 0, sizeof(struct tcp_skb_cb));
+			return skb;
+		}
+	}
 	/* The TCP header must be at least 32-bit aligned.  */
 	size = ALIGN(size, 4);
 
@@ -934,6 +937,22 @@
 	return mss_now;
 }
 
+/* In some cases, both sendpage() and sendmsg() could have added
+ * an skb to the write queue, but failed adding payload on it.
+ * We need to remove it to consume less memory, but more
+ * importantly be able to generate EPOLLOUT for Edge Trigger epoll()
+ * users.
+ */
+static void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb)
+{
+	if (skb && !skb->len) {
+		tcp_unlink_write_queue(skb, sk);
+		if (tcp_write_queue_empty(sk))
+			tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
+		sk_wmem_free_skb(sk, skb);
+	}
+}
+
 ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
 			 size_t size, int flags)
 {
@@ -943,6 +962,10 @@
 	ssize_t copied;
 	long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
 
+	if (IS_ENABLED(CONFIG_DEBUG_VM) &&
+	    WARN_ONCE(PageSlab(page), "page must not be a Slab one"))
+		return -EINVAL;
+
 	/* Wait for a connection to finish. One exception is TCP Fast Open
 	 * (passive side) where data is allowed to be sent before a connection
 	 * is fully established.
@@ -979,6 +1002,9 @@
 			if (!skb)
 				goto wait_for_memory;
 
+#ifdef CONFIG_TLS_DEVICE
+			skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);
+#endif
 			skb_entail(sk, skb);
 			copy = size_goal;
 		}
@@ -1008,10 +1034,10 @@
 		skb->len += copy;
 		skb->data_len += copy;
 		skb->truesize += copy;
-		sk->sk_wmem_queued += copy;
+		sk_wmem_queued_add(sk, copy);
 		sk_mem_charge(sk, copy);
 		skb->ip_summed = CHECKSUM_PARTIAL;
-		tp->write_seq += copy;
+		WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
 		TCP_SKB_CB(skb)->end_seq += copy;
 		tcp_skb_pcount_set(skb, 0);
 
@@ -1056,6 +1082,7 @@
 	return copied;
 
 do_error:
+	tcp_remove_empty_skb(sk, tcp_write_queue_tail(sk));
 	if (copied)
 		goto out;
 out_err:
@@ -1094,30 +1121,6 @@
 }
 EXPORT_SYMBOL(tcp_sendpage);
 
-/* Do not bother using a page frag for very small frames.
- * But use this heuristic only for the first skb in write queue.
- *
- * Having no payload in skb->head allows better SACK shifting
- * in tcp_shift_skb_data(), reducing sack/rack overhead, because
- * write queue has less skbs.
- * Each skb can hold up to MAX_SKB_FRAGS * 32Kbytes, or ~0.5 MB.
- * This also speeds up tso_fragment(), since it wont fallback
- * to tcp_fragment().
- */
-static int linear_payload_sz(bool first_skb)
-{
-	if (first_skb)
-		return SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER);
-	return 0;
-}
-
-static int select_size(bool first_skb, bool zc)
-{
-	if (zc)
-		return 0;
-	return linear_payload_sz(first_skb);
-}
-
 void tcp_free_fastopen_req(struct tcp_sock *tp)
 {
 	if (tp->fastopen_req) {
@@ -1127,7 +1130,8 @@
 }
 
 static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
-				int *copied, size_t size)
+				int *copied, size_t size,
+				struct ubuf_info *uarg)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_sock *inet = inet_sk(sk);
@@ -1147,6 +1151,7 @@
 		return -ENOBUFS;
 	tp->fastopen_req->data = msg;
 	tp->fastopen_req->size = size;
+	tp->fastopen_req->uarg = uarg;
 
 	if (inet->defer_connect) {
 		err = tcp_connect(sk);
@@ -1179,18 +1184,13 @@
 	struct sockcm_cookie sockc;
 	int flags, err, copied = 0;
 	int mss_now = 0, size_goal, copied_syn = 0;
-	bool process_backlog = false;
+	int process_backlog = 0;
 	bool zc = false;
 	long timeo;
 
 	flags = msg->msg_flags;
 
 	if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) {
-		if (sk->sk_state != TCP_ESTABLISHED) {
-			err = -EINVAL;
-			goto out_err;
-		}
-
 		skb = tcp_write_queue_tail(sk);
 		uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb));
 		if (!uarg) {
@@ -1205,7 +1205,7 @@
 
 	if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
 	    !tp->repair) {
-		err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
+		err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size, uarg);
 		if (err == -EINPROGRESS && copied_syn > 0)
 			goto out;
 		else if (err)
@@ -1271,31 +1271,30 @@
 
 		if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
 			bool first_skb;
-			int linear;
 
 new_segment:
 			if (!sk_stream_memory_free(sk))
 				goto wait_for_sndbuf;
 
-			if (process_backlog && sk_flush_backlog(sk)) {
-				process_backlog = false;
-				goto restart;
+			if (unlikely(process_backlog >= 16)) {
+				process_backlog = 0;
+				if (sk_flush_backlog(sk))
+					goto restart;
 			}
 			first_skb = tcp_rtx_and_write_queues_empty(sk);
-			linear = select_size(first_skb, zc);
-			skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation,
+			skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
 						  first_skb);
 			if (!skb)
 				goto wait_for_memory;
 
-			process_backlog = true;
+			process_backlog++;
 			skb->ip_summed = CHECKSUM_PARTIAL;
 
 			skb_entail(sk, skb);
 			copy = size_goal;
 
 			/* All packets are restored as if they have
-			 * already been sent. skb_mstamp isn't set to
+			 * already been sent. skb_mstamp_ns isn't set to
 			 * avoid wrong rtt estimation.
 			 */
 			if (tp->repair)
@@ -1365,7 +1364,7 @@
 		if (!copied)
 			TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
 
-		tp->write_seq += copy;
+		WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
 		TCP_SKB_CB(skb)->end_seq += copy;
 		tcp_skb_pcount_set(skb, 0);
 
@@ -1409,21 +1408,15 @@
 	sock_zerocopy_put(uarg);
 	return copied + copied_syn;
 
-do_fault:
-	if (!skb->len) {
-		tcp_unlink_write_queue(skb, sk);
-		/* It is the one place in all of TCP, except connection
-		 * reset, where we can be unlinking the send_head.
-		 */
-		tcp_check_send_head(sk, skb);
-		sk_wmem_free_skb(sk, skb);
-	}
-
 do_error:
+	skb = tcp_write_queue_tail(sk);
+do_fault:
+	tcp_remove_empty_skb(sk, skb);
+
 	if (copied + copied_syn)
 		goto out;
 out_err:
-	sock_zerocopy_put_abort(uarg);
+	sock_zerocopy_put_abort(uarg, true);
 	err = sk_stream_error(sk, flags, err);
 	/* make sure we wake any epoll edge trigger waiter */
 	if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
@@ -1554,7 +1547,7 @@
 		    (copied > 0 &&
 		     ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) ||
 		      ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
-		       !icsk->icsk_ack.pingpong)) &&
+		       !inet_csk_in_pingpong_mode(sk))) &&
 		      !atomic_read(&sk->sk_rmem_alloc)))
 			time_to_ack = true;
 	}
@@ -1677,9 +1670,9 @@
 		sk_eat_skb(sk, skb);
 		if (!desc->count)
 			break;
-		tp->copied_seq = seq;
+		WRITE_ONCE(tp->copied_seq, seq);
 	}
-	tp->copied_seq = seq;
+	WRITE_ONCE(tp->copied_seq, seq);
 
 	tcp_rcv_space_adjust(sk);
 
@@ -1708,7 +1701,7 @@
 	else
 		cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1;
 	val = min(val, cap);
-	sk->sk_rcvlowat = val ? : 1;
+	WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
 
 	/* Check if we need to signal EPOLLIN right now */
 	tcp_data_ready(sk);
@@ -1718,7 +1711,7 @@
 
 	val <<= 1;
 	if (val > sk->sk_rcvbuf) {
-		sk->sk_rcvbuf = val;
+		WRITE_ONCE(sk->sk_rcvbuf, val);
 		tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val);
 	}
 	return 0;
@@ -1753,6 +1746,7 @@
 	struct vm_area_struct *vma;
 	struct sk_buff *skb = NULL;
 	struct tcp_sock *tp;
+	int inq;
 	int ret;
 
 	if (address & (PAGE_SIZE - 1) || address != zc->address)
@@ -1773,12 +1767,15 @@
 
 	tp = tcp_sk(sk);
 	seq = tp->copied_seq;
-	zc->length = min_t(u32, zc->length, tcp_inq(sk));
+	inq = tcp_inq(sk);
+	zc->length = min_t(u32, zc->length, inq);
 	zc->length &= ~(PAGE_SIZE - 1);
-
-	zap_page_range(vma, address, zc->length);
-
-	zc->recv_skip_hint = 0;
+	if (zc->length) {
+		zap_page_range(vma, address, zc->length);
+		zc->recv_skip_hint = 0;
+	} else {
+		zc->recv_skip_hint = inq;
+	}
 	ret = 0;
 	while (length + PAGE_SIZE <= zc->length) {
 		if (zc->recv_skip_hint < PAGE_SIZE) {
@@ -1795,14 +1792,23 @@
 				break;
 			frags = skb_shinfo(skb)->frags;
 			while (offset) {
-				if (frags->size > offset)
+				if (skb_frag_size(frags) > offset)
 					goto out;
-				offset -= frags->size;
+				offset -= skb_frag_size(frags);
 				frags++;
 			}
 		}
-		if (frags->size != PAGE_SIZE || frags->page_offset)
+		if (skb_frag_size(frags) != PAGE_SIZE || skb_frag_off(frags)) {
+			int remaining = zc->recv_skip_hint;
+
+			while (remaining && (skb_frag_size(frags) != PAGE_SIZE ||
+					     skb_frag_off(frags))) {
+				remaining -= skb_frag_size(frags);
+				frags++;
+			}
+			zc->recv_skip_hint -= remaining;
 			break;
+		}
 		ret = vm_insert_page(vma, address + length,
 				     skb_frag_page(frags));
 		if (ret)
@@ -1815,7 +1821,7 @@
 out:
 	up_read(&current->mm->mmap_sem);
 	if (length) {
-		tp->copied_seq = seq;
+		WRITE_ONCE(tp->copied_seq, seq);
 		tcp_rcv_space_adjust(sk);
 
 		/* Clean up data we have read: This will do ACK frames. */
@@ -1834,57 +1840,78 @@
 #endif
 
 static void tcp_update_recv_tstamps(struct sk_buff *skb,
-				    struct scm_timestamping *tss)
+				    struct scm_timestamping_internal *tss)
 {
 	if (skb->tstamp)
-		tss->ts[0] = ktime_to_timespec(skb->tstamp);
+		tss->ts[0] = ktime_to_timespec64(skb->tstamp);
 	else
-		tss->ts[0] = (struct timespec) {0};
+		tss->ts[0] = (struct timespec64) {0};
 
 	if (skb_hwtstamps(skb)->hwtstamp)
-		tss->ts[2] = ktime_to_timespec(skb_hwtstamps(skb)->hwtstamp);
+		tss->ts[2] = ktime_to_timespec64(skb_hwtstamps(skb)->hwtstamp);
 	else
-		tss->ts[2] = (struct timespec) {0};
+		tss->ts[2] = (struct timespec64) {0};
 }
 
 /* Similar to __sock_recv_timestamp, but does not require an skb */
 static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
-			       struct scm_timestamping *tss)
+			       struct scm_timestamping_internal *tss)
 {
-	struct timeval tv;
+	int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
 	bool has_timestamping = false;
 
 	if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) {
 		if (sock_flag(sk, SOCK_RCVTSTAMP)) {
 			if (sock_flag(sk, SOCK_RCVTSTAMPNS)) {
-				put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
-					 sizeof(tss->ts[0]), &tss->ts[0]);
-			} else {
-				tv.tv_sec = tss->ts[0].tv_sec;
-				tv.tv_usec = tss->ts[0].tv_nsec / 1000;
+				if (new_tstamp) {
+					struct __kernel_timespec kts = {tss->ts[0].tv_sec, tss->ts[0].tv_nsec};
 
-				put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
-					 sizeof(tv), &tv);
+					put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
+						 sizeof(kts), &kts);
+				} else {
+					struct timespec ts_old = timespec64_to_timespec(tss->ts[0]);
+
+					put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
+						 sizeof(ts_old), &ts_old);
+				}
+			} else {
+				if (new_tstamp) {
+					struct __kernel_sock_timeval stv;
+
+					stv.tv_sec = tss->ts[0].tv_sec;
+					stv.tv_usec = tss->ts[0].tv_nsec / 1000;
+					put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
+						 sizeof(stv), &stv);
+				} else {
+					struct __kernel_old_timeval tv;
+
+					tv.tv_sec = tss->ts[0].tv_sec;
+					tv.tv_usec = tss->ts[0].tv_nsec / 1000;
+					put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
+						 sizeof(tv), &tv);
+				}
 			}
 		}
 
 		if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE)
 			has_timestamping = true;
 		else
-			tss->ts[0] = (struct timespec) {0};
+			tss->ts[0] = (struct timespec64) {0};
 	}
 
 	if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
 		if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)
 			has_timestamping = true;
 		else
-			tss->ts[2] = (struct timespec) {0};
+			tss->ts[2] = (struct timespec64) {0};
 	}
 
 	if (has_timestamping) {
-		tss->ts[1] = (struct timespec) {0};
-		put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING,
-			 sizeof(*tss), tss);
+		tss->ts[1] = (struct timespec64) {0};
+		if (sock_flag(sk, SOCK_TSTAMP_NEW))
+			put_cmsg_scm_timestamping64(msg, tss);
+		else
+			put_cmsg_scm_timestamping(msg, tss);
 	}
 }
 
@@ -1901,6 +1928,11 @@
 		inq = tp->rcv_nxt - tp->copied_seq;
 		release_sock(sk);
 	}
+	/* After receiving a FIN, tell the user-space to continue reading
+	 * by returning a non-zero inq.
+	 */
+	if (inq == 0 && sock_flag(sk, SOCK_DONE))
+		inq = 1;
 	return inq;
 }
 
@@ -1925,14 +1957,14 @@
 	long timeo;
 	struct sk_buff *skb, *last;
 	u32 urg_hole = 0;
-	struct scm_timestamping tss;
+	struct scm_timestamping_internal tss;
 	bool has_tss = false;
 	bool has_cmsg;
 
 	if (unlikely(flags & MSG_ERRQUEUE))
 		return inet_recv_error(sk, msg, len, addr_len);
 
-	if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) &&
+	if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) &&
 	    (sk->sk_state == TCP_ESTABLISHED))
 		sk_busy_loop(sk, nonblock);
 
@@ -2075,7 +2107,7 @@
 		}
 		continue;
 
-	found_ok_skb:
+found_ok_skb:
 		/* Ok so how much can we use? */
 		used = skb->len - offset;
 		if (len < used)
@@ -2087,7 +2119,7 @@
 			if (urg_offset < used) {
 				if (!urg_offset) {
 					if (!sock_flag(sk, SOCK_URGINLINE)) {
-						++*seq;
+						WRITE_ONCE(*seq, *seq + 1);
 						urg_hole++;
 						offset++;
 						used--;
@@ -2109,7 +2141,7 @@
 			}
 		}
 
-		*seq += used;
+		WRITE_ONCE(*seq, *seq + used);
 		copied += used;
 		len -= used;
 
@@ -2134,9 +2166,9 @@
 			sk_eat_skb(sk, skb);
 		continue;
 
-	found_fin_ok:
+found_fin_ok:
 		/* Process the FIN. */
-		++*seq;
+		WRITE_ONCE(*seq, *seq + 1);
 		if (!(flags & MSG_PEEK))
 			sk_eat_skb(sk, skb);
 		break;
@@ -2228,10 +2260,6 @@
 	 * socket sitting in hash tables.
 	 */
 	inet_sk_state_store(sk, state);
-
-#ifdef STATE_TRACE
-	SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
-#endif
 }
 EXPORT_SYMBOL_GPL(tcp_set_state);
 
@@ -2461,7 +2489,10 @@
 	}
 
 	if (sk->sk_state == TCP_CLOSE) {
-		struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
+		struct request_sock *req;
+
+		req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk,
+						lockdep_sock_is_held(sk));
 		/* We could get here with a non-NULL req if the socket is
 		 * aborted (e.g., closed with unread data) before 3WHS
 		 * finishes.
@@ -2515,10 +2546,16 @@
 		sk_wmem_free_skb(sk, skb);
 	}
 	tcp_rtx_queue_purge(sk);
+	skb = sk->sk_tx_skb_cache;
+	if (skb) {
+		__kfree_skb(skb);
+		sk->sk_tx_skb_cache = NULL;
+	}
 	INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
 	sk_mem_reclaim(sk);
 	tcp_clear_all_retrans_hints(tcp_sk(sk));
 	tcp_sk(sk)->packets_out = 0;
+	inet_csk(sk)->icsk_backoff = 0;
 }
 
 int tcp_disconnect(struct sock *sk, int flags)
@@ -2527,6 +2564,7 @@
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	int old_state = sk->sk_state;
+	u32 seq;
 
 	if (old_state != TCP_CLOSE)
 		tcp_set_state(sk, TCP_CLOSE);
@@ -2549,7 +2587,11 @@
 
 	tcp_clear_xmit_timers(sk);
 	__skb_queue_purge(&sk->sk_receive_queue);
-	tp->copied_seq = tp->rcv_nxt;
+	if (sk->sk_rx_skb_cache) {
+		__kfree_skb(sk->sk_rx_skb_cache);
+		sk->sk_rx_skb_cache = NULL;
+	}
+	WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
 	tp->urg_data = 0;
 	tcp_write_queue_purge(sk);
 	tcp_fastopen_active_disable_ofo_check(sk);
@@ -2563,14 +2605,20 @@
 	sk->sk_shutdown = 0;
 	sock_reset_flag(sk, SOCK_DONE);
 	tp->srtt_us = 0;
+	tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
 	tp->rcv_rtt_last_tsecr = 0;
-	tp->write_seq += tp->max_window + 2;
-	if (tp->write_seq == 0)
-		tp->write_seq = 1;
+
+	seq = tp->write_seq + tp->max_window + 2;
+	if (!seq)
+		seq = 1;
+	WRITE_ONCE(tp->write_seq, seq);
+
 	icsk->icsk_backoff = 0;
 	tp->snd_cwnd = 2;
 	icsk->icsk_probes_out = 0;
+	icsk->icsk_rto = TCP_TIMEOUT_INIT;
 	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
+	tp->snd_cwnd = TCP_INIT_CWND;
 	tp->snd_cwnd_cnt = 0;
 	tp->window_clamp = 0;
 	tp->delivered_ce = 0;
@@ -2589,9 +2637,31 @@
 	tcp_saved_syn_free(tp);
 	tp->compressed_ack = 0;
 	tp->bytes_sent = 0;
+	tp->bytes_acked = 0;
+	tp->bytes_received = 0;
 	tp->bytes_retrans = 0;
+	tp->duplicate_sack[0].start_seq = 0;
+	tp->duplicate_sack[0].end_seq = 0;
 	tp->dsack_dups = 0;
 	tp->reord_seen = 0;
+	tp->retrans_out = 0;
+	tp->sacked_out = 0;
+	tp->tlp_high_seq = 0;
+	tp->last_oow_ack_time = 0;
+	/* There's a bubble in the pipe until at least the first ACK. */
+	tp->app_limited = ~0U;
+	tp->rack.mstamp = 0;
+	tp->rack.advanced = 0;
+	tp->rack.reo_wnd_steps = 1;
+	tp->rack.last_delivered = 0;
+	tp->rack.reo_wnd_persist = 0;
+	tp->rack.dsack_seen = 0;
+	tp->syn_data_acked = 0;
+	tp->rx_opt.saw_tstamp = 0;
+	tp->rx_opt.dsack = 0;
+	tp->rx_opt.num_sacks = 0;
+	tp->rcv_ooopack = 0;
+
 
 	/* Clean up fastopen related fields */
 	tcp_free_fastopen_req(tp);
@@ -2697,6 +2767,21 @@
 	return 0;
 }
 
+DEFINE_STATIC_KEY_FALSE(tcp_tx_delay_enabled);
+EXPORT_SYMBOL(tcp_tx_delay_enabled);
+
+static void tcp_enable_tx_delay(void)
+{
+	if (!static_branch_unlikely(&tcp_tx_delay_enabled)) {
+		static int __tcp_tx_delay_enabled = 0;
+
+		if (cmpxchg(&__tcp_tx_delay_enabled, 0, 1) == 0) {
+			static_branch_enable(&tcp_tx_delay_enabled);
+			pr_info("TCP_TX_DELAY enabled\n");
+		}
+	}
+}
+
 /*
  *	Socket option code for TCP.
  */
@@ -2724,7 +2809,9 @@
 		name[val] = 0;
 
 		lock_sock(sk);
-		err = tcp_set_congestion_control(sk, name, true, true);
+		err = tcp_set_congestion_control(sk, name, true, true,
+						 ns_capable(sock_net(sk)->user_ns,
+							    CAP_NET_ADMIN));
 		release_sock(sk);
 		return err;
 	}
@@ -2747,15 +2834,23 @@
 		return err;
 	}
 	case TCP_FASTOPEN_KEY: {
-		__u8 key[TCP_FASTOPEN_KEY_LENGTH];
+		__u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH];
+		__u8 *backup_key = NULL;
 
-		if (optlen != sizeof(key))
+		/* Allow a backup key as well to facilitate key rotation
+		 * First key is the active one.
+		 */
+		if (optlen != TCP_FASTOPEN_KEY_LENGTH &&
+		    optlen != TCP_FASTOPEN_KEY_BUF_LENGTH)
 			return -EINVAL;
 
 		if (copy_from_user(key, optval, optlen))
 			return -EFAULT;
 
-		return tcp_fastopen_reset_cipher(net, sk, key, sizeof(key));
+		if (optlen == TCP_FASTOPEN_KEY_BUF_LENGTH)
+			backup_key = key + TCP_FASTOPEN_KEY_LENGTH;
+
+		return tcp_fastopen_reset_cipher(net, sk, key, backup_key);
 	}
 	default:
 		/* fallthru */
@@ -2844,9 +2939,9 @@
 		if (sk->sk_state != TCP_CLOSE)
 			err = -EPERM;
 		else if (tp->repair_queue == TCP_SEND_QUEUE)
-			tp->write_seq = val;
+			WRITE_ONCE(tp->write_seq, val);
 		else if (tp->repair_queue == TCP_RECV_QUEUE)
-			tp->rcv_nxt = val;
+			WRITE_ONCE(tp->rcv_nxt, val);
 		else
 			err = -EINVAL;
 		break;
@@ -2957,16 +3052,16 @@
 
 	case TCP_QUICKACK:
 		if (!val) {
-			icsk->icsk_ack.pingpong = 1;
+			inet_csk_enter_pingpong_mode(sk);
 		} else {
-			icsk->icsk_ack.pingpong = 0;
+			inet_csk_exit_pingpong_mode(sk);
 			if ((1 << sk->sk_state) &
 			    (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
 			    inet_csk_ack_scheduled(sk)) {
 				icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
 				tcp_cleanup_rbuf(sk, 1);
 				if (!(val & 1))
-					icsk->icsk_ack.pingpong = 1;
+					inet_csk_enter_pingpong_mode(sk);
 			}
 		}
 		break;
@@ -3039,6 +3134,11 @@
 		else
 			tp->recvmsg_inq = val;
 		break;
+	case TCP_TX_DELAY:
+		if (val)
+			tcp_enable_tx_delay();
+		tp->tcp_tx_delay = val;
+		break;
 	default:
 		err = -ENOPROTOOPT;
 		break;
@@ -3096,10 +3196,10 @@
 {
 	const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
 	const struct inet_connection_sock *icsk = inet_csk(sk);
+	unsigned long rate;
 	u32 now;
 	u64 rate64;
 	bool slow;
-	u32 rate;
 
 	memset(info, 0, sizeof(*info));
 	if (sk->sk_type != SOCK_STREAM)
@@ -3109,11 +3209,11 @@
 
 	/* Report meaningful fields for all TCP states, including listeners */
 	rate = READ_ONCE(sk->sk_pacing_rate);
-	rate64 = rate != ~0U ? rate : ~0ULL;
+	rate64 = (rate != ~0UL) ? rate : ~0ULL;
 	info->tcpi_pacing_rate = rate64;
 
 	rate = READ_ONCE(sk->sk_max_pacing_rate);
-	rate64 = rate != ~0U ? rate : ~0ULL;
+	rate64 = (rate != ~0UL) ? rate : ~0ULL;
 	info->tcpi_max_pacing_rate = rate64;
 
 	info->tcpi_reordering = tp->reordering;
@@ -3203,6 +3303,8 @@
 	info->tcpi_bytes_retrans = tp->bytes_retrans;
 	info->tcpi_dsack_dups = tp->dsack_dups;
 	info->tcpi_reord_seen = tp->reord_seen;
+	info->tcpi_rcv_ooopack = tp->rcv_ooopack;
+	info->tcpi_snd_wnd = tp->snd_wnd;
 	unlock_sock_fast(sk, slow);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -3231,6 +3333,7 @@
 		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_RETRANS */
 		nla_total_size(sizeof(u32)) + /* TCP_NLA_DSACK_DUPS */
 		nla_total_size(sizeof(u32)) + /* TCP_NLA_REORD_SEEN */
+		nla_total_size(sizeof(u32)) + /* TCP_NLA_SRTT */
 		0;
 }
 
@@ -3239,8 +3342,8 @@
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *stats;
 	struct tcp_info info;
+	unsigned long rate;
 	u64 rate64;
-	u32 rate;
 
 	stats = alloc_skb(tcp_opt_stats_get_size(), GFP_ATOMIC);
 	if (!stats)
@@ -3259,7 +3362,7 @@
 			  tp->total_retrans, TCP_NLA_PAD);
 
 	rate = READ_ONCE(sk->sk_pacing_rate);
-	rate64 = rate != ~0U ? rate : ~0ULL;
+	rate64 = (rate != ~0UL) ? rate : ~0ULL;
 	nla_put_u64_64bit(stats, TCP_NLA_PACING_RATE, rate64, TCP_NLA_PAD);
 
 	rate64 = tcp_compute_delivery_rate(tp);
@@ -3284,6 +3387,7 @@
 			  TCP_NLA_PAD);
 	nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups);
 	nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen);
+	nla_put_u32(stats, TCP_NLA_SRTT, tp->srtt_us >> 3);
 
 	return stats;
 }
@@ -3378,7 +3482,7 @@
 		return 0;
 	}
 	case TCP_QUICKACK:
-		val = !icsk->icsk_ack.pingpong;
+		val = !inet_csk_in_pingpong_mode(sk);
 		break;
 
 	case TCP_CONGESTION:
@@ -3407,21 +3511,23 @@
 		return 0;
 
 	case TCP_FASTOPEN_KEY: {
-		__u8 key[TCP_FASTOPEN_KEY_LENGTH];
+		__u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH];
 		struct tcp_fastopen_context *ctx;
+		unsigned int key_len = 0;
 
 		if (get_user(len, optlen))
 			return -EFAULT;
 
 		rcu_read_lock();
 		ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx);
-		if (ctx)
-			memcpy(key, ctx->key, sizeof(key));
-		else
-			len = 0;
+		if (ctx) {
+			key_len = tcp_fastopen_context_len(ctx) *
+					TCP_FASTOPEN_KEY_LENGTH;
+			memcpy(&key[0], &ctx->key[0], key_len);
+		}
 		rcu_read_unlock();
 
-		len = min_t(unsigned int, len, sizeof(key));
+		len = min_t(unsigned int, len, key_len);
 		if (put_user(len, optlen))
 			return -EFAULT;
 		if (copy_to_user(optval, key, len))
@@ -3494,6 +3600,10 @@
 		val = tp->fastopen_no_cookie;
 		break;
 
+	case TCP_TX_DELAY:
+		val = tp->tcp_tx_delay;
+		break;
+
 	case TCP_TIMESTAMP:
 		val = tcp_time_stamp_raw() + tp->tsoffset;
 		break;
@@ -3643,8 +3753,11 @@
 	if (unlikely(!tcp_md5sig_pool_populated)) {
 		mutex_lock(&tcp_md5sig_mutex);
 
-		if (!tcp_md5sig_pool_populated)
+		if (!tcp_md5sig_pool_populated) {
 			__tcp_alloc_md5sig_pool();
+			if (tcp_md5sig_pool_populated)
+				static_branch_inc(&tcp_md5_needed);
+		}
 
 		mutex_unlock(&tcp_md5sig_mutex);
 	}
@@ -3694,8 +3807,8 @@
 		return 1;
 
 	for (i = 0; i < shi->nr_frags; ++i) {
-		const struct skb_frag_struct *f = &shi->frags[i];
-		unsigned int offset = f->page_offset;
+		const skb_frag_t *f = &shi->frags[i];
+		unsigned int offset = skb_frag_off(f);
 		struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
 
 		sg_set_page(&sg, page, skb_frag_size(f),
@@ -3727,7 +3840,13 @@
 
 void tcp_done(struct sock *sk)
 {
-	struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
+	struct request_sock *req;
+
+	/* We might be called with a new socket, after
+	 * inet_csk_prepare_forced_close() has been called
+	 * so we can not use lockdep_sock_is_held(sk)
+	 */
+	req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, 1);
 
 	if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
 		TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
@@ -3824,6 +3943,7 @@
 	unsigned long limit;
 	unsigned int i;
 
+	BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE);
 	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) >
 		     FIELD_SIZEOF(struct sk_buff, cb));
 
@@ -3889,8 +4009,8 @@
 	init_net.ipv4.sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
 
 	init_net.ipv4.sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
-	init_net.ipv4.sysctl_tcp_rmem[1] = 87380;
-	init_net.ipv4.sysctl_tcp_rmem[2] = max(87380, max_rshare);
+	init_net.ipv4.sysctl_tcp_rmem[1] = 131072;
+	init_net.ipv4.sysctl_tcp_rmem[2] = max(131072, max_rshare);
 
 	pr_info("Hash tables configured (established %u bind %u)\n",
 		tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);

diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 02ff2dd..32772d6 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c

@@ -115,6 +115,14 @@
 		unused_b:5;
 	u32	prior_cwnd;	/* prior cwnd upon entering loss recovery */
 	u32	full_bw;	/* recent bw, to estimate if pipe is full */
+
+	/* For tracking ACK aggregation: */
+	u64	ack_epoch_mstamp;	/* start of ACK sampling epoch */
+	u16	extra_acked[2];		/* max excess data ACKed in epoch */
+	u32	ack_epoch_acked:20,	/* packets (S)ACKed in sampling epoch */
+		extra_acked_win_rtts:5,	/* age of extra_acked, in round trips */
+		extra_acked_win_idx:1,	/* current index in extra_acked array */
+		unused_c:6;
 };
 
 #define CYCLE_LEN	8	/* number of phases in a pacing gain cycle */
@@ -128,6 +136,14 @@
 /* Skip TSO below the following bandwidth (bits/sec): */
 static const int bbr_min_tso_rate = 1200000;
 
+/* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck.
+ * In order to help drive the network toward lower queues and low latency while
+ * maintaining high utilization, the average pacing rate aims to be slightly
+ * lower than the estimated bandwidth. This is an important aspect of the
+ * design.
+ */
+static const int bbr_pacing_margin_percent = 1;
+
 /* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain
  * that will allow a smoothly increasing pacing rate that will double each RTT
  * and send the same number of packets per RTT that an un-paced, slow-starting
@@ -174,6 +190,15 @@
 /* If we estimate we're policed, use lt_bw for this many round trips: */
 static const u32 bbr_lt_bw_max_rtts = 48;
 
+/* Gain factor for adding extra_acked to target cwnd: */
+static const int bbr_extra_acked_gain = BBR_UNIT;
+/* Window length of extra_acked window. */
+static const u32 bbr_extra_acked_win_rtts = 5;
+/* Max allowed val for ack_epoch_acked, after which sampling epoch is reset */
+static const u32 bbr_ack_epoch_acked_reset_thresh = 1U << 20;
+/* Time period for clamping cwnd increment due to ack aggregation */
+static const u32 bbr_extra_acked_max_us = 100 * 1000;
+
 static void bbr_check_probe_rtt_done(struct sock *sk);
 
 /* Do we estimate that STARTUP filled the pipe? */
@@ -200,6 +225,16 @@
 	return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk);
 }
 
+/* Return maximum extra acked in past k-2k round trips,
+ * where k = bbr_extra_acked_win_rtts.
+ */
+static u16 bbr_extra_acked(const struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	return max(bbr->extra_acked[0], bbr->extra_acked[1]);
+}
+
 /* Return rate in bytes per second, optionally with a gain.
  * The order here is chosen carefully to avoid overflow of u64. This should
  * work for input rates of up to 2.9Tbit/sec and gain of 2.89x.
@@ -208,17 +243,15 @@
 {
 	unsigned int mss = tcp_sk(sk)->mss_cache;
 
-	if (!tcp_needs_internal_pacing(sk))
-		mss = tcp_mss_to_mtu(sk, mss);
 	rate *= mss;
 	rate *= gain;
 	rate >>= BBR_SCALE;
-	rate *= USEC_PER_SEC;
+	rate *= USEC_PER_SEC / 100 * (100 - bbr_pacing_margin_percent);
 	return rate >> BW_SCALE;
 }
 
 /* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */
-static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
+static unsigned long bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
 {
 	u64 rate = bw;
 
@@ -246,18 +279,12 @@
 	sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
 }
 
-/* Pace using current bw estimate and a gain factor. In order to help drive the
- * network toward lower queues while maintaining high utilization and low
- * latency, the average pacing rate aims to be slightly (~1%) lower than the
- * estimated bandwidth. This is an important aspect of the design. In this
- * implementation this slightly lower pacing rate is achieved implicitly by not
- * including link-layer headers in the packet size used for the pacing rate.
- */
+/* Pace using current bw estimate and a gain factor. */
 static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bbr *bbr = inet_csk_ca(sk);
-	u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain);
+	unsigned long rate = bbr_bw_to_pacing_rate(sk, bw, gain);
 
 	if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
 		bbr_init_pacing_rate_from_rtt(sk);
@@ -279,7 +306,7 @@
 	/* Sort of tcp_tso_autosize() but ignoring
 	 * driver provided sk_gso_max_size.
 	 */
-	bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift,
+	bytes = min_t(unsigned long, sk->sk_pacing_rate >> sk->sk_pacing_shift,
 		      GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
 	segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
 
@@ -305,6 +332,8 @@
 
 	if (event == CA_EVENT_TX_START && tp->app_limited) {
 		bbr->idle_restart = 1;
+		bbr->ack_epoch_mstamp = tp->tcp_mstamp;
+		bbr->ack_epoch_acked = 0;
 		/* Avoid pointless buffer overflows: pace at est. bw if we don't
 		 * need more speed (we're restarting from idle and app-limited).
 		 */
@@ -315,30 +344,19 @@
 	}
 }
 
-/* Find target cwnd. Right-size the cwnd based on min RTT and the
- * estimated bottleneck bandwidth:
+/* Calculate bdp based on min RTT and the estimated bottleneck bandwidth:
  *
- * cwnd = bw * min_rtt * gain = BDP * gain
+ * bdp = ceil(bw * min_rtt * gain)
  *
  * The key factor, gain, controls the amount of queue. While a small gain
  * builds a smaller queue, it becomes more vulnerable to noise in RTT
  * measurements (e.g., delayed ACKs or other ACK compression effects). This
  * noise may cause BBR to under-estimate the rate.
- *
- * To achieve full performance in high-speed paths, we budget enough cwnd to
- * fit full-sized skbs in-flight on both end hosts to fully utilize the path:
- *   - one skb in sending host Qdisc,
- *   - one skb in sending host TSO/GSO engine
- *   - one skb being received by receiver host LRO/GRO/delayed-ACK engine
- * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because
- * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets,
- * which allows 2 outstanding 2-packet sequences, to try to keep pipe
- * full even with ACK-every-other-packet delayed ACKs.
  */
-static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
+static u32 bbr_bdp(struct sock *sk, u32 bw, int gain)
 {
 	struct bbr *bbr = inet_csk_ca(sk);
-	u32 cwnd;
+	u32 bdp;
 	u64 w;
 
 	/* If we've never had a valid RTT sample, cap cwnd at the initial
@@ -352,8 +370,27 @@
 
 	w = (u64)bw * bbr->min_rtt_us;
 
-	/* Apply a gain to the given value, then remove the BW_SCALE shift. */
-	cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
+	/* Apply a gain to the given value, remove the BW_SCALE shift, and
+	 * round the value up to avoid a negative feedback loop.
+	 */
+	bdp = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
+
+	return bdp;
+}
+
+/* To achieve full performance in high-speed paths, we budget enough cwnd to
+ * fit full-sized skbs in-flight on both end hosts to fully utilize the path:
+ *   - one skb in sending host Qdisc,
+ *   - one skb in sending host TSO/GSO engine
+ *   - one skb being received by receiver host LRO/GRO/delayed-ACK engine
+ * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because
+ * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets,
+ * which allows 2 outstanding 2-packet sequences, to try to keep pipe
+ * full even with ACK-every-other-packet delayed ACKs.
+ */
+static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
 
 	/* Allow enough full-sized skbs in flight to utilize end systems. */
 	cwnd += 3 * bbr_tso_segs_goal(sk);
@@ -362,12 +399,72 @@
 	cwnd = (cwnd + 1) & ~1U;
 
 	/* Ensure gain cycling gets inflight above BDP even for small BDPs. */
-	if (bbr->mode == BBR_PROBE_BW && gain > BBR_UNIT)
+	if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == 0)
 		cwnd += 2;
 
 	return cwnd;
 }
 
+/* Find inflight based on min RTT and the estimated bottleneck bandwidth. */
+static u32 bbr_inflight(struct sock *sk, u32 bw, int gain)
+{
+	u32 inflight;
+
+	inflight = bbr_bdp(sk, bw, gain);
+	inflight = bbr_quantization_budget(sk, inflight);
+
+	return inflight;
+}
+
+/* With pacing at lower layers, there's often less data "in the network" than
+ * "in flight". With TSQ and departure time pacing at lower layers (e.g. fq),
+ * we often have several skbs queued in the pacing layer with a pre-scheduled
+ * earliest departure time (EDT). BBR adapts its pacing rate based on the
+ * inflight level that it estimates has already been "baked in" by previous
+ * departure time decisions. We calculate a rough estimate of the number of our
+ * packets that might be in the network at the earliest departure time for the
+ * next skb scheduled:
+ *   in_network_at_edt = inflight_at_edt - (EDT - now) * bw
+ * If we're increasing inflight, then we want to know if the transmit of the
+ * EDT skb will push inflight above the target, so inflight_at_edt includes
+ * bbr_tso_segs_goal() from the skb departing at EDT. If decreasing inflight,
+ * then estimate if inflight will sink too low just before the EDT transmit.
+ */
+static u32 bbr_packets_in_net_at_edt(struct sock *sk, u32 inflight_now)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	u64 now_ns, edt_ns, interval_us;
+	u32 interval_delivered, inflight_at_edt;
+
+	now_ns = tp->tcp_clock_cache;
+	edt_ns = max(tp->tcp_wstamp_ns, now_ns);
+	interval_us = div_u64(edt_ns - now_ns, NSEC_PER_USEC);
+	interval_delivered = (u64)bbr_bw(sk) * interval_us >> BW_SCALE;
+	inflight_at_edt = inflight_now;
+	if (bbr->pacing_gain > BBR_UNIT)              /* increasing inflight */
+		inflight_at_edt += bbr_tso_segs_goal(sk);  /* include EDT skb */
+	if (interval_delivered >= inflight_at_edt)
+		return 0;
+	return inflight_at_edt - interval_delivered;
+}
+
+/* Find the cwnd increment based on estimate of ack aggregation */
+static u32 bbr_ack_aggregation_cwnd(struct sock *sk)
+{
+	u32 max_aggr_cwnd, aggr_cwnd = 0;
+
+	if (bbr_extra_acked_gain && bbr_full_bw_reached(sk)) {
+		max_aggr_cwnd = ((u64)bbr_bw(sk) * bbr_extra_acked_max_us)
+				/ BW_UNIT;
+		aggr_cwnd = (bbr_extra_acked_gain * bbr_extra_acked(sk))
+			     >> BBR_SCALE;
+		aggr_cwnd = min(aggr_cwnd, max_aggr_cwnd);
+	}
+
+	return aggr_cwnd;
+}
+
 /* An optimization in BBR to reduce losses: On the first round of recovery, we
  * follow the packet conservation principle: send P packets per P packets acked.
  * After that, we slow-start and send at most 2*P packets per P packets acked.
@@ -428,8 +525,15 @@
 	if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd))
 		goto done;
 
+	target_cwnd = bbr_bdp(sk, bw, gain);
+
+	/* Increment the cwnd to account for excess ACKed data that seems
+	 * due to aggregation (of data and/or ACKs) visible in the ACK stream.
+	 */
+	target_cwnd += bbr_ack_aggregation_cwnd(sk);
+	target_cwnd = bbr_quantization_budget(sk, target_cwnd);
+
 	/* If we're below target cwnd, slow start cwnd toward target cwnd. */
-	target_cwnd = bbr_target_cwnd(sk, bw, gain);
 	if (bbr_full_bw_reached(sk))  /* only cut cwnd if we filled the pipe */
 		cwnd = min(cwnd + acked, target_cwnd);
 	else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND)
@@ -459,7 +563,7 @@
 	if (bbr->pacing_gain == BBR_UNIT)
 		return is_full_length;		/* just use wall clock time */
 
-	inflight = rs->prior_in_flight;  /* what was in-flight before ACK? */
+	inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight);
 	bw = bbr_max_bw(sk);
 
 	/* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at
@@ -470,14 +574,14 @@
 	if (bbr->pacing_gain > BBR_UNIT)
 		return is_full_length &&
 			(rs->losses ||  /* perhaps pacing_gain*BDP won't fit */
-			 inflight >= bbr_target_cwnd(sk, bw, bbr->pacing_gain));
+			 inflight >= bbr_inflight(sk, bw, bbr->pacing_gain));
 
 	/* A pacing_gain < 1.0 tries to drain extra queue we added if bw
 	 * probing didn't find more bw. If inflight falls to match BDP then we
 	 * estimate queue is drained; persisting would underutilize the pipe.
 	 */
 	return is_full_length ||
-		inflight <= bbr_target_cwnd(sk, bw, BBR_UNIT);
+		inflight <= bbr_inflight(sk, bw, BBR_UNIT);
 }
 
 static void bbr_advance_cycle_phase(struct sock *sk)
@@ -487,8 +591,6 @@
 
 	bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1);
 	bbr->cycle_mstamp = tp->delivered_mstamp;
-	bbr->pacing_gain = bbr->lt_use_bw ? BBR_UNIT :
-					    bbr_pacing_gain[bbr->cycle_idx];
 }
 
 /* Gain cycling: cycle pacing gain to converge to fair share of available bw. */
@@ -506,8 +608,6 @@
 	struct bbr *bbr = inet_csk_ca(sk);
 
 	bbr->mode = BBR_STARTUP;
-	bbr->pacing_gain = bbr_high_gain;
-	bbr->cwnd_gain	 = bbr_high_gain;
 }
 
 static void bbr_reset_probe_bw_mode(struct sock *sk)
@@ -515,8 +615,6 @@
 	struct bbr *bbr = inet_csk_ca(sk);
 
 	bbr->mode = BBR_PROBE_BW;
-	bbr->pacing_gain = BBR_UNIT;
-	bbr->cwnd_gain = bbr_cwnd_gain;
 	bbr->cycle_idx = CYCLE_LEN - 1 - prandom_u32_max(bbr_cycle_rand);
 	bbr_advance_cycle_phase(sk);	/* flip to next phase of gain cycle */
 }
@@ -700,6 +798,67 @@
 	}
 }
 
+/* Estimates the windowed max degree of ack aggregation.
+ * This is used to provision extra in-flight data to keep sending during
+ * inter-ACK silences.
+ *
+ * Degree of ack aggregation is estimated as extra data acked beyond expected.
+ *
+ * max_extra_acked = "maximum recent excess data ACKed beyond max_bw * interval"
+ * cwnd += max_extra_acked
+ *
+ * Max extra_acked is clamped by cwnd and bw * bbr_extra_acked_max_us (100 ms).
+ * Max filter is an approximate sliding window of 5-10 (packet timed) round
+ * trips.
+ */
+static void bbr_update_ack_aggregation(struct sock *sk,
+				       const struct rate_sample *rs)
+{
+	u32 epoch_us, expected_acked, extra_acked;
+	struct bbr *bbr = inet_csk_ca(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (!bbr_extra_acked_gain || rs->acked_sacked <= 0 ||
+	    rs->delivered < 0 || rs->interval_us <= 0)
+		return;
+
+	if (bbr->round_start) {
+		bbr->extra_acked_win_rtts = min(0x1F,
+						bbr->extra_acked_win_rtts + 1);
+		if (bbr->extra_acked_win_rtts >= bbr_extra_acked_win_rtts) {
+			bbr->extra_acked_win_rtts = 0;
+			bbr->extra_acked_win_idx = bbr->extra_acked_win_idx ?
+						   0 : 1;
+			bbr->extra_acked[bbr->extra_acked_win_idx] = 0;
+		}
+	}
+
+	/* Compute how many packets we expected to be delivered over epoch. */
+	epoch_us = tcp_stamp_us_delta(tp->delivered_mstamp,
+				      bbr->ack_epoch_mstamp);
+	expected_acked = ((u64)bbr_bw(sk) * epoch_us) / BW_UNIT;
+
+	/* Reset the aggregation epoch if ACK rate is below expected rate or
+	 * significantly large no. of ack received since epoch (potentially
+	 * quite old epoch).
+	 */
+	if (bbr->ack_epoch_acked <= expected_acked ||
+	    (bbr->ack_epoch_acked + rs->acked_sacked >=
+	     bbr_ack_epoch_acked_reset_thresh)) {
+		bbr->ack_epoch_acked = 0;
+		bbr->ack_epoch_mstamp = tp->delivered_mstamp;
+		expected_acked = 0;
+	}
+
+	/* Compute excess data delivered, beyond what was expected. */
+	bbr->ack_epoch_acked = min_t(u32, 0xFFFFF,
+				     bbr->ack_epoch_acked + rs->acked_sacked);
+	extra_acked = bbr->ack_epoch_acked - expected_acked;
+	extra_acked = min(extra_acked, tp->snd_cwnd);
+	if (extra_acked > bbr->extra_acked[bbr->extra_acked_win_idx])
+		bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked;
+}
+
 /* Estimate when the pipe is full, using the change in delivery rate: BBR
  * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by
  * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited
@@ -734,14 +893,12 @@
 
 	if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {
 		bbr->mode = BBR_DRAIN;	/* drain queue we created */
-		bbr->pacing_gain = bbr_drain_gain;	/* pace slow to drain */
-		bbr->cwnd_gain = bbr_high_gain;	/* maintain cwnd */
 		tcp_sk(sk)->snd_ssthresh =
-				bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT);
+				bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT);
 	}	/* fall through to check if in-flight is already small: */
 	if (bbr->mode == BBR_DRAIN &&
-	    tcp_packets_in_flight(tcp_sk(sk)) <=
-	    bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT))
+	    bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <=
+	    bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT))
 		bbr_reset_probe_bw_mode(sk);  /* we estimate queue is drained */
 }
 
@@ -797,8 +954,6 @@
 	if (bbr_probe_rtt_mode_ms > 0 && filter_expired &&
 	    !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) {
 		bbr->mode = BBR_PROBE_RTT;  /* dip, drain queue */
-		bbr->pacing_gain = BBR_UNIT;
-		bbr->cwnd_gain = BBR_UNIT;
 		bbr_save_cwnd(sk);  /* note cwnd so we can restore it */
 		bbr->probe_rtt_done_stamp = 0;
 	}
@@ -826,13 +981,44 @@
 		bbr->idle_restart = 0;
 }
 
+static void bbr_update_gains(struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	switch (bbr->mode) {
+	case BBR_STARTUP:
+		bbr->pacing_gain = bbr_high_gain;
+		bbr->cwnd_gain	 = bbr_high_gain;
+		break;
+	case BBR_DRAIN:
+		bbr->pacing_gain = bbr_drain_gain;	/* slow, to drain */
+		bbr->cwnd_gain	 = bbr_high_gain;	/* keep cwnd */
+		break;
+	case BBR_PROBE_BW:
+		bbr->pacing_gain = (bbr->lt_use_bw ?
+				    BBR_UNIT :
+				    bbr_pacing_gain[bbr->cycle_idx]);
+		bbr->cwnd_gain	 = bbr_cwnd_gain;
+		break;
+	case BBR_PROBE_RTT:
+		bbr->pacing_gain = BBR_UNIT;
+		bbr->cwnd_gain	 = BBR_UNIT;
+		break;
+	default:
+		WARN_ONCE(1, "BBR bad mode: %u\n", bbr->mode);
+		break;
+	}
+}
+
 static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
 {
 	bbr_update_bw(sk, rs);
+	bbr_update_ack_aggregation(sk, rs);
 	bbr_update_cycle_phase(sk, rs);
 	bbr_check_full_bw_reached(sk, rs);
 	bbr_check_drain(sk, rs);
 	bbr_update_min_rtt(sk, rs);
+	bbr_update_gains(sk);
 }
 
 static void bbr_main(struct sock *sk, const struct rate_sample *rs)
@@ -879,6 +1065,13 @@
 	bbr_reset_lt_bw_sampling(sk);
 	bbr_reset_startup_mode(sk);
 
+	bbr->ack_epoch_mstamp = tp->tcp_mstamp;
+	bbr->ack_epoch_acked = 0;
+	bbr->extra_acked_win_rtts = 0;
+	bbr->extra_acked_win_idx = 0;
+	bbr->extra_acked[0] = 0;
+	bbr->extra_acked[1] = 0;
+
 	cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED);
 }
 

diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index fc36143..645cc30 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Binary Increase Congestion control for TCP
  * Home page:

diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
new file mode 100644
index 0000000..e387051
--- /dev/null
+++ b/net/ipv4/tcp_bpf.c

@@ -0,0 +1,696 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
+
+#include <linux/skmsg.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/init.h>
+#include <linux/wait.h>
+
+#include <net/inet_common.h>
+#include <net/tls.h>
+
+static bool tcp_bpf_stream_read(const struct sock *sk)
+{
+	struct sk_psock *psock;
+	bool empty = true;
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (likely(psock))
+		empty = list_empty(&psock->ingress_msg);
+	rcu_read_unlock();
+	return !empty;
+}
+
+static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock,
+			     int flags, long timeo, int *err)
+{
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+	int ret = 0;
+
+	if (!timeo)
+		return ret;
+
+	add_wait_queue(sk_sleep(sk), &wait);
+	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+	ret = sk_wait_event(sk, &timeo,
+			    !list_empty(&psock->ingress_msg) ||
+			    !skb_queue_empty(&sk->sk_receive_queue), &wait);
+	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+	remove_wait_queue(sk_sleep(sk), &wait);
+	return ret;
+}
+
+int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
+		      struct msghdr *msg, int len, int flags)
+{
+	struct iov_iter *iter = &msg->msg_iter;
+	int peek = flags & MSG_PEEK;
+	int i, ret, copied = 0;
+	struct sk_msg *msg_rx;
+
+	msg_rx = list_first_entry_or_null(&psock->ingress_msg,
+					  struct sk_msg, list);
+
+	while (copied != len) {
+		struct scatterlist *sge;
+
+		if (unlikely(!msg_rx))
+			break;
+
+		i = msg_rx->sg.start;
+		do {
+			struct page *page;
+			int copy;
+
+			sge = sk_msg_elem(msg_rx, i);
+			copy = sge->length;
+			page = sg_page(sge);
+			if (copied + copy > len)
+				copy = len - copied;
+			ret = copy_page_to_iter(page, sge->offset, copy, iter);
+			if (ret != copy) {
+				msg_rx->sg.start = i;
+				return -EFAULT;
+			}
+
+			copied += copy;
+			if (likely(!peek)) {
+				sge->offset += copy;
+				sge->length -= copy;
+				sk_mem_uncharge(sk, copy);
+				msg_rx->sg.size -= copy;
+
+				if (!sge->length) {
+					sk_msg_iter_var_next(i);
+					if (!msg_rx->skb)
+						put_page(page);
+				}
+			} else {
+				sk_msg_iter_var_next(i);
+			}
+
+			if (copied == len)
+				break;
+		} while (i != msg_rx->sg.end);
+
+		if (unlikely(peek)) {
+			msg_rx = list_next_entry(msg_rx, list);
+			continue;
+		}
+
+		msg_rx->sg.start = i;
+		if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
+			list_del(&msg_rx->list);
+			if (msg_rx->skb)
+				consume_skb(msg_rx->skb);
+			kfree(msg_rx);
+		}
+		msg_rx = list_first_entry_or_null(&psock->ingress_msg,
+						  struct sk_msg, list);
+	}
+
+	return copied;
+}
+EXPORT_SYMBOL_GPL(__tcp_bpf_recvmsg);
+
+int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+		    int nonblock, int flags, int *addr_len)
+{
+	struct sk_psock *psock;
+	int copied, ret;
+
+	if (unlikely(flags & MSG_ERRQUEUE))
+		return inet_recv_error(sk, msg, len, addr_len);
+	if (!skb_queue_empty(&sk->sk_receive_queue))
+		return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+
+	psock = sk_psock_get(sk);
+	if (unlikely(!psock))
+		return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+	lock_sock(sk);
+msg_bytes_ready:
+	copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags);
+	if (!copied) {
+		int data, err = 0;
+		long timeo;
+
+		timeo = sock_rcvtimeo(sk, nonblock);
+		data = tcp_bpf_wait_data(sk, psock, flags, timeo, &err);
+		if (data) {
+			if (skb_queue_empty(&sk->sk_receive_queue))
+				goto msg_bytes_ready;
+			release_sock(sk);
+			sk_psock_put(sk, psock);
+			return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+		}
+		if (err) {
+			ret = err;
+			goto out;
+		}
+		copied = -EAGAIN;
+	}
+	ret = copied;
+out:
+	release_sock(sk);
+	sk_psock_put(sk, psock);
+	return ret;
+}
+
+static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
+			   struct sk_msg *msg, u32 apply_bytes, int flags)
+{
+	bool apply = apply_bytes;
+	struct scatterlist *sge;
+	u32 size, copied = 0;
+	struct sk_msg *tmp;
+	int i, ret = 0;
+
+	tmp = kzalloc(sizeof(*tmp), __GFP_NOWARN | GFP_KERNEL);
+	if (unlikely(!tmp))
+		return -ENOMEM;
+
+	lock_sock(sk);
+	tmp->sg.start = msg->sg.start;
+	i = msg->sg.start;
+	do {
+		sge = sk_msg_elem(msg, i);
+		size = (apply && apply_bytes < sge->length) ?
+			apply_bytes : sge->length;
+		if (!sk_wmem_schedule(sk, size)) {
+			if (!copied)
+				ret = -ENOMEM;
+			break;
+		}
+
+		sk_mem_charge(sk, size);
+		sk_msg_xfer(tmp, msg, i, size);
+		copied += size;
+		if (sge->length)
+			get_page(sk_msg_page(tmp, i));
+		sk_msg_iter_var_next(i);
+		tmp->sg.end = i;
+		if (apply) {
+			apply_bytes -= size;
+			if (!apply_bytes)
+				break;
+		}
+	} while (i != msg->sg.end);
+
+	if (!ret) {
+		msg->sg.start = i;
+		msg->sg.size -= apply_bytes;
+		sk_psock_queue_msg(psock, tmp);
+		sk_psock_data_ready(sk, psock);
+	} else {
+		sk_msg_free(sk, tmp);
+		kfree(tmp);
+	}
+
+	release_sock(sk);
+	return ret;
+}
+
+static int tcp_bpf_push(struct sock *sk, struct sk_msg *msg, u32 apply_bytes,
+			int flags, bool uncharge)
+{
+	bool apply = apply_bytes;
+	struct scatterlist *sge;
+	struct page *page;
+	int size, ret = 0;
+	u32 off;
+
+	while (1) {
+		bool has_tx_ulp;
+
+		sge = sk_msg_elem(msg, msg->sg.start);
+		size = (apply && apply_bytes < sge->length) ?
+			apply_bytes : sge->length;
+		off  = sge->offset;
+		page = sg_page(sge);
+
+		tcp_rate_check_app_limited(sk);
+retry:
+		has_tx_ulp = tls_sw_has_ctx_tx(sk);
+		if (has_tx_ulp) {
+			flags |= MSG_SENDPAGE_NOPOLICY;
+			ret = kernel_sendpage_locked(sk,
+						     page, off, size, flags);
+		} else {
+			ret = do_tcp_sendpages(sk, page, off, size, flags);
+		}
+
+		if (ret <= 0)
+			return ret;
+		if (apply)
+			apply_bytes -= ret;
+		msg->sg.size -= ret;
+		sge->offset += ret;
+		sge->length -= ret;
+		if (uncharge)
+			sk_mem_uncharge(sk, ret);
+		if (ret != size) {
+			size -= ret;
+			off  += ret;
+			goto retry;
+		}
+		if (!sge->length) {
+			put_page(page);
+			sk_msg_iter_next(msg, start);
+			sg_init_table(sge, 1);
+			if (msg->sg.start == msg->sg.end)
+				break;
+		}
+		if (apply && !apply_bytes)
+			break;
+	}
+
+	return 0;
+}
+
+static int tcp_bpf_push_locked(struct sock *sk, struct sk_msg *msg,
+			       u32 apply_bytes, int flags, bool uncharge)
+{
+	int ret;
+
+	lock_sock(sk);
+	ret = tcp_bpf_push(sk, msg, apply_bytes, flags, uncharge);
+	release_sock(sk);
+	return ret;
+}
+
+int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
+			  u32 bytes, int flags)
+{
+	bool ingress = sk_msg_to_ingress(msg);
+	struct sk_psock *psock = sk_psock_get(sk);
+	int ret;
+
+	if (unlikely(!psock)) {
+		sk_msg_free(sk, msg);
+		return 0;
+	}
+	ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes, flags) :
+			tcp_bpf_push_locked(sk, msg, bytes, flags, false);
+	sk_psock_put(sk, psock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir);
+
+static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
+				struct sk_msg *msg, int *copied, int flags)
+{
+	bool cork = false, enospc = sk_msg_full(msg);
+	struct sock *sk_redir;
+	u32 tosend, delta = 0;
+	int ret;
+
+more_data:
+	if (psock->eval == __SK_NONE) {
+		/* Track delta in msg size to add/subtract it on SK_DROP from
+		 * returned to user copied size. This ensures user doesn't
+		 * get a positive return code with msg_cut_data and SK_DROP
+		 * verdict.
+		 */
+		delta = msg->sg.size;
+		psock->eval = sk_psock_msg_verdict(sk, psock, msg);
+		if (msg->sg.size < delta)
+			delta -= msg->sg.size;
+		else
+			delta = 0;
+	}
+
+	if (msg->cork_bytes &&
+	    msg->cork_bytes > msg->sg.size && !enospc) {
+		psock->cork_bytes = msg->cork_bytes - msg->sg.size;
+		if (!psock->cork) {
+			psock->cork = kzalloc(sizeof(*psock->cork),
+					      GFP_ATOMIC | __GFP_NOWARN);
+			if (!psock->cork)
+				return -ENOMEM;
+		}
+		memcpy(psock->cork, msg, sizeof(*msg));
+		return 0;
+	}
+
+	tosend = msg->sg.size;
+	if (psock->apply_bytes && psock->apply_bytes < tosend)
+		tosend = psock->apply_bytes;
+
+	switch (psock->eval) {
+	case __SK_PASS:
+		ret = tcp_bpf_push(sk, msg, tosend, flags, true);
+		if (unlikely(ret)) {
+			*copied -= sk_msg_free(sk, msg);
+			break;
+		}
+		sk_msg_apply_bytes(psock, tosend);
+		break;
+	case __SK_REDIRECT:
+		sk_redir = psock->sk_redir;
+		sk_msg_apply_bytes(psock, tosend);
+		if (psock->cork) {
+			cork = true;
+			psock->cork = NULL;
+		}
+		sk_msg_return(sk, msg, tosend);
+		release_sock(sk);
+		ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags);
+		lock_sock(sk);
+		if (unlikely(ret < 0)) {
+			int free = sk_msg_free_nocharge(sk, msg);
+
+			if (!cork)
+				*copied -= free;
+		}
+		if (cork) {
+			sk_msg_free(sk, msg);
+			kfree(msg);
+			msg = NULL;
+			ret = 0;
+		}
+		break;
+	case __SK_DROP:
+	default:
+		sk_msg_free_partial(sk, msg, tosend);
+		sk_msg_apply_bytes(psock, tosend);
+		*copied -= (tosend + delta);
+		return -EACCES;
+	}
+
+	if (likely(!ret)) {
+		if (!psock->apply_bytes) {
+			psock->eval =  __SK_NONE;
+			if (psock->sk_redir) {
+				sock_put(psock->sk_redir);
+				psock->sk_redir = NULL;
+			}
+		}
+		if (msg &&
+		    msg->sg.data[msg->sg.start].page_link &&
+		    msg->sg.data[msg->sg.start].length)
+			goto more_data;
+	}
+	return ret;
+}
+
+static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+	struct sk_msg tmp, *msg_tx = NULL;
+	int copied = 0, err = 0;
+	struct sk_psock *psock;
+	long timeo;
+	int flags;
+
+	/* Don't let internal do_tcp_sendpages() flags through */
+	flags = (msg->msg_flags & ~MSG_SENDPAGE_DECRYPTED);
+	flags |= MSG_NO_SHARED_FRAGS;
+
+	psock = sk_psock_get(sk);
+	if (unlikely(!psock))
+		return tcp_sendmsg(sk, msg, size);
+
+	lock_sock(sk);
+	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+	while (msg_data_left(msg)) {
+		bool enospc = false;
+		u32 copy, osize;
+
+		if (sk->sk_err) {
+			err = -sk->sk_err;
+			goto out_err;
+		}
+
+		copy = msg_data_left(msg);
+		if (!sk_stream_memory_free(sk))
+			goto wait_for_sndbuf;
+		if (psock->cork) {
+			msg_tx = psock->cork;
+		} else {
+			msg_tx = &tmp;
+			sk_msg_init(msg_tx);
+		}
+
+		osize = msg_tx->sg.size;
+		err = sk_msg_alloc(sk, msg_tx, msg_tx->sg.size + copy, msg_tx->sg.end - 1);
+		if (err) {
+			if (err != -ENOSPC)
+				goto wait_for_memory;
+			enospc = true;
+			copy = msg_tx->sg.size - osize;
+		}
+
+		err = sk_msg_memcopy_from_iter(sk, &msg->msg_iter, msg_tx,
+					       copy);
+		if (err < 0) {
+			sk_msg_trim(sk, msg_tx, osize);
+			goto out_err;
+		}
+
+		copied += copy;
+		if (psock->cork_bytes) {
+			if (size > psock->cork_bytes)
+				psock->cork_bytes = 0;
+			else
+				psock->cork_bytes -= size;
+			if (psock->cork_bytes && !enospc)
+				goto out_err;
+			/* All cork bytes are accounted, rerun the prog. */
+			psock->eval = __SK_NONE;
+			psock->cork_bytes = 0;
+		}
+
+		err = tcp_bpf_send_verdict(sk, psock, msg_tx, &copied, flags);
+		if (unlikely(err < 0))
+			goto out_err;
+		continue;
+wait_for_sndbuf:
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+wait_for_memory:
+		err = sk_stream_wait_memory(sk, &timeo);
+		if (err) {
+			if (msg_tx && msg_tx != psock->cork)
+				sk_msg_free(sk, msg_tx);
+			goto out_err;
+		}
+	}
+out_err:
+	if (err < 0)
+		err = sk_stream_error(sk, msg->msg_flags, err);
+	release_sock(sk);
+	sk_psock_put(sk, psock);
+	return copied ? copied : err;
+}
+
+static int tcp_bpf_sendpage(struct sock *sk, struct page *page, int offset,
+			    size_t size, int flags)
+{
+	struct sk_msg tmp, *msg = NULL;
+	int err = 0, copied = 0;
+	struct sk_psock *psock;
+	bool enospc = false;
+
+	psock = sk_psock_get(sk);
+	if (unlikely(!psock))
+		return tcp_sendpage(sk, page, offset, size, flags);
+
+	lock_sock(sk);
+	if (psock->cork) {
+		msg = psock->cork;
+	} else {
+		msg = &tmp;
+		sk_msg_init(msg);
+	}
+
+	/* Catch case where ring is full and sendpage is stalled. */
+	if (unlikely(sk_msg_full(msg)))
+		goto out_err;
+
+	sk_msg_page_add(msg, page, size, offset);
+	sk_mem_charge(sk, size);
+	copied = size;
+	if (sk_msg_full(msg))
+		enospc = true;
+	if (psock->cork_bytes) {
+		if (size > psock->cork_bytes)
+			psock->cork_bytes = 0;
+		else
+			psock->cork_bytes -= size;
+		if (psock->cork_bytes && !enospc)
+			goto out_err;
+		/* All cork bytes are accounted, rerun the prog. */
+		psock->eval = __SK_NONE;
+		psock->cork_bytes = 0;
+	}
+
+	err = tcp_bpf_send_verdict(sk, psock, msg, &copied, flags);
+out_err:
+	release_sock(sk);
+	sk_psock_put(sk, psock);
+	return copied ? copied : err;
+}
+
+static void tcp_bpf_remove(struct sock *sk, struct sk_psock *psock)
+{
+	struct sk_psock_link *link;
+
+	while ((link = sk_psock_link_pop(psock))) {
+		sk_psock_unlink(sk, link);
+		sk_psock_free_link(link);
+	}
+}
+
+static void tcp_bpf_unhash(struct sock *sk)
+{
+	void (*saved_unhash)(struct sock *sk);
+	struct sk_psock *psock;
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (unlikely(!psock)) {
+		rcu_read_unlock();
+		if (sk->sk_prot->unhash)
+			sk->sk_prot->unhash(sk);
+		return;
+	}
+
+	saved_unhash = psock->saved_unhash;
+	tcp_bpf_remove(sk, psock);
+	rcu_read_unlock();
+	saved_unhash(sk);
+}
+
+static void tcp_bpf_close(struct sock *sk, long timeout)
+{
+	void (*saved_close)(struct sock *sk, long timeout);
+	struct sk_psock *psock;
+
+	lock_sock(sk);
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (unlikely(!psock)) {
+		rcu_read_unlock();
+		release_sock(sk);
+		return sk->sk_prot->close(sk, timeout);
+	}
+
+	saved_close = psock->saved_close;
+	tcp_bpf_remove(sk, psock);
+	rcu_read_unlock();
+	release_sock(sk);
+	saved_close(sk, timeout);
+}
+
+enum {
+	TCP_BPF_IPV4,
+	TCP_BPF_IPV6,
+	TCP_BPF_NUM_PROTS,
+};
+
+enum {
+	TCP_BPF_BASE,
+	TCP_BPF_TX,
+	TCP_BPF_NUM_CFGS,
+};
+
+static struct proto *tcpv6_prot_saved __read_mostly;
+static DEFINE_SPINLOCK(tcpv6_prot_lock);
+static struct proto tcp_bpf_prots[TCP_BPF_NUM_PROTS][TCP_BPF_NUM_CFGS];
+
+static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
+				   struct proto *base)
+{
+	prot[TCP_BPF_BASE]			= *base;
+	prot[TCP_BPF_BASE].unhash		= tcp_bpf_unhash;
+	prot[TCP_BPF_BASE].close		= tcp_bpf_close;
+	prot[TCP_BPF_BASE].recvmsg		= tcp_bpf_recvmsg;
+	prot[TCP_BPF_BASE].stream_memory_read	= tcp_bpf_stream_read;
+
+	prot[TCP_BPF_TX]			= prot[TCP_BPF_BASE];
+	prot[TCP_BPF_TX].sendmsg		= tcp_bpf_sendmsg;
+	prot[TCP_BPF_TX].sendpage		= tcp_bpf_sendpage;
+}
+
+static void tcp_bpf_check_v6_needs_rebuild(struct sock *sk, struct proto *ops)
+{
+	if (sk->sk_family == AF_INET6 &&
+	    unlikely(ops != smp_load_acquire(&tcpv6_prot_saved))) {
+		spin_lock_bh(&tcpv6_prot_lock);
+		if (likely(ops != tcpv6_prot_saved)) {
+			tcp_bpf_rebuild_protos(tcp_bpf_prots[TCP_BPF_IPV6], ops);
+			smp_store_release(&tcpv6_prot_saved, ops);
+		}
+		spin_unlock_bh(&tcpv6_prot_lock);
+	}
+}
+
+static int __init tcp_bpf_v4_build_proto(void)
+{
+	tcp_bpf_rebuild_protos(tcp_bpf_prots[TCP_BPF_IPV4], &tcp_prot);
+	return 0;
+}
+core_initcall(tcp_bpf_v4_build_proto);
+
+static void tcp_bpf_update_sk_prot(struct sock *sk, struct sk_psock *psock)
+{
+	int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
+	int config = psock->progs.msg_parser   ? TCP_BPF_TX   : TCP_BPF_BASE;
+
+	sk_psock_update_proto(sk, psock, &tcp_bpf_prots[family][config]);
+}
+
+static void tcp_bpf_reinit_sk_prot(struct sock *sk, struct sk_psock *psock)
+{
+	int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
+	int config = psock->progs.msg_parser   ? TCP_BPF_TX   : TCP_BPF_BASE;
+
+	/* Reinit occurs when program types change e.g. TCP_BPF_TX is removed
+	 * or added requiring sk_prot hook updates. We keep original saved
+	 * hooks in this case.
+	 */
+	sk->sk_prot = &tcp_bpf_prots[family][config];
+}
+
+static int tcp_bpf_assert_proto_ops(struct proto *ops)
+{
+	/* In order to avoid retpoline, we make assumptions when we call
+	 * into ops if e.g. a psock is not present. Make sure they are
+	 * indeed valid assumptions.
+	 */
+	return ops->recvmsg  == tcp_recvmsg &&
+	       ops->sendmsg  == tcp_sendmsg &&
+	       ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP;
+}
+
+void tcp_bpf_reinit(struct sock *sk)
+{
+	struct sk_psock *psock;
+
+	sock_owned_by_me(sk);
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	tcp_bpf_reinit_sk_prot(sk, psock);
+	rcu_read_unlock();
+}
+
+int tcp_bpf_init(struct sock *sk)
+{
+	struct proto *ops = READ_ONCE(sk->sk_prot);
+	struct sk_psock *psock;
+
+	sock_owned_by_me(sk);
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (unlikely(!psock || psock->sk_proto ||
+		     tcp_bpf_assert_proto_ops(ops))) {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+	tcp_bpf_check_v6_needs_rebuild(sk, ops);
+	tcp_bpf_update_sk_prot(sk, psock);
+	rcu_read_unlock();
+	return 0;
+}

diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 06fbe10..709d238 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * CAIA Delay-Gradient (CDG) congestion control
  *
@@ -146,7 +147,7 @@
 		return;
 
 	if (hystart_detect & HYSTART_ACK_TRAIN) {
-		u32 now_us = div_u64(local_clock(), NSEC_PER_USEC);
+		u32 now_us = tp->tcp_mstamp;
 
 		if (ca->last_ack == 0 || !tcp_is_cwnd_limited(sk)) {
 			ca->last_ack = now_us;

diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index bc6c02f..c445a81 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Pluggable TCP congestion control support and newReno
  * congestion control.
@@ -332,7 +333,8 @@
  * tcp_reinit_congestion_control (if the current congestion control was
  * already initialized.
  */
-int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit)
+int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
+			       bool reinit, bool cap_net_admin)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	const struct tcp_congestion_ops *ca;
@@ -368,8 +370,7 @@
 		} else {
 			err = -EBUSY;
 		}
-	} else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
-		     ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) {
+	} else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || cap_net_admin)) {
 		err = -EPERM;
 	} else if (!try_module_get(ca->owner)) {
 		err = -EBUSY;

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 78bfadf..1b3d032 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * TCP CUBIC: Binary Increase Congestion control for TCP v2.3
  * Home page:

diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index ca61e2a..79f7054 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* DataCenter TCP (DCTCP) congestion control.
  *
  * http://simula.stanford.edu/~alizade/Site/DCTCP.html
@@ -33,24 +34,19 @@
  *	Daniel Borkmann <dborkman@redhat.com>
  *	Florian Westphal <fw@strlen.de>
  *	Glenn Judd <glenn.judd@morganstanley.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
  */
 
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <net/tcp.h>
 #include <linux/inet_diag.h>
+#include "tcp_dctcp.h"
 
 #define DCTCP_MAX_ALPHA	1024U
 
 struct dctcp {
-	u32 acked_bytes_ecn;
-	u32 acked_bytes_total;
-	u32 prior_snd_una;
+	u32 old_delivered;
+	u32 old_delivered_ce;
 	u32 prior_rcv_nxt;
 	u32 dctcp_alpha;
 	u32 next_seq;
@@ -66,19 +62,14 @@
 module_param(dctcp_alpha_on_init, uint, 0644);
 MODULE_PARM_DESC(dctcp_alpha_on_init, "parameter for initial alpha value");
 
-static unsigned int dctcp_clamp_alpha_on_loss __read_mostly;
-module_param(dctcp_clamp_alpha_on_loss, uint, 0644);
-MODULE_PARM_DESC(dctcp_clamp_alpha_on_loss,
-		 "parameter for clamping alpha on loss");
-
 static struct tcp_congestion_ops dctcp_reno;
 
 static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca)
 {
 	ca->next_seq = tp->snd_nxt;
 
-	ca->acked_bytes_ecn = 0;
-	ca->acked_bytes_total = 0;
+	ca->old_delivered = tp->delivered;
+	ca->old_delivered_ce = tp->delivered_ce;
 }
 
 static void dctcp_init(struct sock *sk)
@@ -90,7 +81,6 @@
 	     sk->sk_state == TCP_CLOSE)) {
 		struct dctcp *ca = inet_csk_ca(sk);
 
-		ca->prior_snd_una = tp->snd_una;
 		ca->prior_rcv_nxt = tp->rcv_nxt;
 
 		ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
@@ -118,89 +108,29 @@
 	return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
 }
 
-/* Minimal DCTP CE state machine:
- *
- * S:	0 <- last pkt was non-CE
- *	1 <- last pkt was CE
- */
-
-static void dctcp_ce_state_0_to_1(struct sock *sk)
-{
-	struct dctcp *ca = inet_csk_ca(sk);
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	if (!ca->ce_state) {
-		/* State has changed from CE=0 to CE=1, force an immediate
-		 * ACK to reflect the new CE state. If an ACK was delayed,
-		 * send that first to reflect the prior CE state.
-		 */
-		if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
-			__tcp_send_ack(sk, ca->prior_rcv_nxt);
-		inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
-	}
-
-	ca->prior_rcv_nxt = tp->rcv_nxt;
-	ca->ce_state = 1;
-
-	tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
-}
-
-static void dctcp_ce_state_1_to_0(struct sock *sk)
-{
-	struct dctcp *ca = inet_csk_ca(sk);
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	if (ca->ce_state) {
-		/* State has changed from CE=1 to CE=0, force an immediate
-		 * ACK to reflect the new CE state. If an ACK was delayed,
-		 * send that first to reflect the prior CE state.
-		 */
-		if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
-			__tcp_send_ack(sk, ca->prior_rcv_nxt);
-		inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
-	}
-
-	ca->prior_rcv_nxt = tp->rcv_nxt;
-	ca->ce_state = 0;
-
-	tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
-}
-
 static void dctcp_update_alpha(struct sock *sk, u32 flags)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct dctcp *ca = inet_csk_ca(sk);
-	u32 acked_bytes = tp->snd_una - ca->prior_snd_una;
-
-	/* If ack did not advance snd_una, count dupack as MSS size.
-	 * If ack did update window, do not count it at all.
-	 */
-	if (acked_bytes == 0 && !(flags & CA_ACK_WIN_UPDATE))
-		acked_bytes = inet_csk(sk)->icsk_ack.rcv_mss;
-	if (acked_bytes) {
-		ca->acked_bytes_total += acked_bytes;
-		ca->prior_snd_una = tp->snd_una;
-
-		if (flags & CA_ACK_ECE)
-			ca->acked_bytes_ecn += acked_bytes;
-	}
 
 	/* Expired RTT */
 	if (!before(tp->snd_una, ca->next_seq)) {
-		u64 bytes_ecn = ca->acked_bytes_ecn;
+		u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce;
 		u32 alpha = ca->dctcp_alpha;
 
 		/* alpha = (1 - g) * alpha + g * F */
 
 		alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g);
-		if (bytes_ecn) {
-			/* If dctcp_shift_g == 1, a 32bit value would overflow
-			 * after 8 Mbytes.
-			 */
-			bytes_ecn <<= (10 - dctcp_shift_g);
-			do_div(bytes_ecn, max(1U, ca->acked_bytes_total));
+		if (delivered_ce) {
+			u32 delivered = tp->delivered - ca->old_delivered;
 
-			alpha = min(alpha + (u32)bytes_ecn, DCTCP_MAX_ALPHA);
+			/* If dctcp_shift_g == 1, a 32bit value would overflow
+			 * after 8 M packets.
+			 */
+			delivered_ce <<= (10 - dctcp_shift_g);
+			delivered_ce /= max(1U, delivered);
+
+			alpha = min(alpha + delivered_ce, DCTCP_MAX_ALPHA);
 		}
 		/* dctcp_alpha can be read from dctcp_get_info() without
 		 * synchro, so we ask compiler to not use dctcp_alpha
@@ -211,31 +141,36 @@
 	}
 }
 
+static void dctcp_react_to_loss(struct sock *sk)
+{
+	struct dctcp *ca = inet_csk_ca(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	ca->loss_cwnd = tp->snd_cwnd;
+	tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U);
+}
+
 static void dctcp_state(struct sock *sk, u8 new_state)
 {
-	if (dctcp_clamp_alpha_on_loss && new_state == TCP_CA_Loss) {
-		struct dctcp *ca = inet_csk_ca(sk);
-
-		/* If this extension is enabled, we clamp dctcp_alpha to
-		 * max on packet loss; the motivation is that dctcp_alpha
-		 * is an indicator to the extend of congestion and packet
-		 * loss is an indicator of extreme congestion; setting
-		 * this in practice turned out to be beneficial, and
-		 * effectively assumes total congestion which reduces the
-		 * window by half.
-		 */
-		ca->dctcp_alpha = DCTCP_MAX_ALPHA;
-	}
+	if (new_state == TCP_CA_Recovery &&
+	    new_state != inet_csk(sk)->icsk_ca_state)
+		dctcp_react_to_loss(sk);
+	/* We handle RTO in dctcp_cwnd_event to ensure that we perform only
+	 * one loss-adjustment per RTT.
+	 */
 }
 
 static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
 {
+	struct dctcp *ca = inet_csk_ca(sk);
+
 	switch (ev) {
 	case CA_EVENT_ECN_IS_CE:
-		dctcp_ce_state_0_to_1(sk);
-		break;
 	case CA_EVENT_ECN_NO_CE:
-		dctcp_ce_state_1_to_0(sk);
+		dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state);
+		break;
+	case CA_EVENT_LOSS:
+		dctcp_react_to_loss(sk);
 		break;
 	default:
 		/* Don't care for the rest. */
@@ -247,6 +182,7 @@
 			     union tcp_cc_info *info)
 {
 	const struct dctcp *ca = inet_csk_ca(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 
 	/* Fill it also in case of VEGASINFO due to req struct limits.
 	 * We can still correctly retrieve it later.
@@ -258,8 +194,10 @@
 			info->dctcp.dctcp_enabled = 1;
 			info->dctcp.dctcp_ce_state = (u16) ca->ce_state;
 			info->dctcp.dctcp_alpha = ca->dctcp_alpha;
-			info->dctcp.dctcp_ab_ecn = ca->acked_bytes_ecn;
-			info->dctcp.dctcp_ab_tot = ca->acked_bytes_total;
+			info->dctcp.dctcp_ab_ecn = tp->mss_cache *
+						   (tp->delivered_ce - ca->old_delivered_ce);
+			info->dctcp.dctcp_ab_tot = tp->mss_cache *
+						   (tp->delivered - ca->old_delivered);
 		}
 
 		*attr = INET_DIAG_DCTCPINFO;

diff --git a/net/ipv4/tcp_dctcp.h b/net/ipv4/tcp_dctcp.h
new file mode 100644
index 0000000..d69a77c
--- /dev/null
+++ b/net/ipv4/tcp_dctcp.h

@@ -0,0 +1,40 @@
+#ifndef _TCP_DCTCP_H
+#define _TCP_DCTCP_H
+
+static inline void dctcp_ece_ack_cwr(struct sock *sk, u32 ce_state)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (ce_state == 1)
+		tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+	else
+		tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+}
+
+/* Minimal DCTP CE state machine:
+ *
+ * S:	0 <- last pkt was non-CE
+ *	1 <- last pkt was CE
+ */
+static inline void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
+					u32 *prior_rcv_nxt, u32 *ce_state)
+{
+	u32 new_ce_state = (evt == CA_EVENT_ECN_IS_CE) ? 1 : 0;
+
+	if (*ce_state != new_ce_state) {
+		/* CE state has changed, force an immediate ACK to
+		 * reflect the new CE state. If an ACK was delayed,
+		 * send that first to reflect the prior CE state.
+		 */
+		if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
+			dctcp_ece_ack_cwr(sk, *ce_state);
+			__tcp_send_ack(sk, *prior_rcv_nxt);
+		}
+		inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
+	}
+	*prior_rcv_nxt = tcp_sk(sk)->rcv_nxt;
+	*ce_state = new_ce_state;
+	dctcp_ece_ack_cwr(sk, new_ce_state);
+}
+
+#endif

diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 81148f7..5495061 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * tcp_diag.c	Module for monitoring TCP transport protocols sockets.
  *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -30,8 +26,9 @@
 	} else if (sk->sk_type == SOCK_STREAM) {
 		const struct tcp_sock *tp = tcp_sk(sk);
 
-		r->idiag_rqueue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
-		r->idiag_wqueue = tp->write_seq - tp->snd_una;
+		r->idiag_rqueue = max_t(int, READ_ONCE(tp->rcv_nxt) -
+					     READ_ONCE(tp->copied_seq), 0);
+		r->idiag_wqueue = READ_ONCE(tp->write_seq) - tp->snd_una;
 	}
 	if (info)
 		tcp_get_info(sk, info);
@@ -85,13 +82,42 @@
 }
 #endif
 
+static int tcp_diag_put_ulp(struct sk_buff *skb, struct sock *sk,
+			    const struct tcp_ulp_ops *ulp_ops)
+{
+	struct nlattr *nest;
+	int err;
+
+	nest = nla_nest_start_noflag(skb, INET_DIAG_ULP_INFO);
+	if (!nest)
+		return -EMSGSIZE;
+
+	err = nla_put_string(skb, INET_ULP_INFO_NAME, ulp_ops->name);
+	if (err)
+		goto nla_failure;
+
+	if (ulp_ops->get_info)
+		err = ulp_ops->get_info(sk, skb);
+	if (err)
+		goto nla_failure;
+
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_failure:
+	nla_nest_cancel(skb, nest);
+	return err;
+}
+
 static int tcp_diag_get_aux(struct sock *sk, bool net_admin,
 			    struct sk_buff *skb)
 {
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	int err = 0;
+
 #ifdef CONFIG_TCP_MD5SIG
 	if (net_admin) {
 		struct tcp_md5sig_info *md5sig;
-		int err = 0;
 
 		rcu_read_lock();
 		md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
@@ -103,11 +129,21 @@
 	}
 #endif
 
+	if (net_admin) {
+		const struct tcp_ulp_ops *ulp_ops;
+
+		ulp_ops = icsk->icsk_ulp_ops;
+		if (ulp_ops)
+			err = tcp_diag_put_ulp(skb, sk, ulp_ops);
+		if (err)
+			return err;
+	}
 	return 0;
 }
 
 static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin)
 {
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	size_t size = 0;
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -128,6 +164,17 @@
 	}
 #endif
 
+	if (net_admin && sk_fullsock(sk)) {
+		const struct tcp_ulp_ops *ulp_ops;
+
+		ulp_ops = icsk->icsk_ulp_ops;
+		if (ulp_ops) {
+			size += nla_total_size(0) +
+				nla_total_size(TCP_ULP_NAME_MAX);
+			if (ulp_ops->get_info_size)
+				size += ulp_ops->get_info_size(sk);
+		}
+	}
 	return size;
 }
 

diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 018a484..a915ade 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c

@@ -30,15 +30,15 @@
 	 * for a valid cookie, so this is an acceptable risk.
 	 */
 	get_random_bytes(key, sizeof(key));
-	tcp_fastopen_reset_cipher(net, NULL, key, sizeof(key));
+	tcp_fastopen_reset_cipher(net, NULL, key, NULL);
 }
 
 static void tcp_fastopen_ctx_free(struct rcu_head *head)
 {
 	struct tcp_fastopen_context *ctx =
 	    container_of(head, struct tcp_fastopen_context, rcu);
-	crypto_free_cipher(ctx->tfm);
-	kfree(ctx);
+
+	kzfree(ctx);
 }
 
 void tcp_fastopen_destroy_cipher(struct sock *sk)
@@ -67,31 +67,27 @@
 }
 
 int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
-			      void *key, unsigned int len)
+			      void *primary_key, void *backup_key)
 {
 	struct tcp_fastopen_context *ctx, *octx;
 	struct fastopen_queue *q;
-	int err;
+	int err = 0;
 
 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
-	if (!ctx)
-		return -ENOMEM;
-	ctx->tfm = crypto_alloc_cipher("aes", 0, 0);
-
-	if (IS_ERR(ctx->tfm)) {
-		err = PTR_ERR(ctx->tfm);
-error:		kfree(ctx);
-		pr_err("TCP: TFO aes cipher alloc error: %d\n", err);
-		return err;
+	if (!ctx) {
+		err = -ENOMEM;
+		goto out;
 	}
-	err = crypto_cipher_setkey(ctx->tfm, key, len);
-	if (err) {
-		pr_err("TCP: TFO cipher key error: %d\n", err);
-		crypto_free_cipher(ctx->tfm);
-		goto error;
-	}
-	memcpy(ctx->key, key, len);
 
+	ctx->key[0].key[0] = get_unaligned_le64(primary_key);
+	ctx->key[0].key[1] = get_unaligned_le64(primary_key + 8);
+	if (backup_key) {
+		ctx->key[1].key[0] = get_unaligned_le64(backup_key);
+		ctx->key[1].key[1] = get_unaligned_le64(backup_key + 8);
+		ctx->num = 2;
+	} else {
+		ctx->num = 1;
+	}
 
 	spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
 	if (sk) {
@@ -108,66 +104,58 @@
 
 	if (octx)
 		call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
+out:
 	return err;
 }
 
-static bool __tcp_fastopen_cookie_gen(struct sock *sk, const void *path,
-				      struct tcp_fastopen_cookie *foc)
+static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req,
+					     struct sk_buff *syn,
+					     const siphash_key_t *key,
+					     struct tcp_fastopen_cookie *foc)
 {
-	struct tcp_fastopen_context *ctx;
-	bool ok = false;
+	BUILD_BUG_ON(TCP_FASTOPEN_COOKIE_SIZE != sizeof(u64));
 
-	rcu_read_lock();
-
-	ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx);
-	if (!ctx)
-		ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx);
-
-	if (ctx) {
-		crypto_cipher_encrypt_one(ctx->tfm, foc->val, path);
-		foc->len = TCP_FASTOPEN_COOKIE_SIZE;
-		ok = true;
-	}
-	rcu_read_unlock();
-	return ok;
-}
-
-/* Generate the fastopen cookie by doing aes128 encryption on both
- * the source and destination addresses. Pad 0s for IPv4 or IPv4-mapped-IPv6
- * addresses. For the longer IPv6 addresses use CBC-MAC.
- *
- * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE.
- */
-static bool tcp_fastopen_cookie_gen(struct sock *sk,
-				    struct request_sock *req,
-				    struct sk_buff *syn,
-				    struct tcp_fastopen_cookie *foc)
-{
 	if (req->rsk_ops->family == AF_INET) {
 		const struct iphdr *iph = ip_hdr(syn);
 
-		__be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
-		return __tcp_fastopen_cookie_gen(sk, path, foc);
+		foc->val[0] = cpu_to_le64(siphash(&iph->saddr,
+					  sizeof(iph->saddr) +
+					  sizeof(iph->daddr),
+					  key));
+		foc->len = TCP_FASTOPEN_COOKIE_SIZE;
+		return true;
 	}
-
 #if IS_ENABLED(CONFIG_IPV6)
 	if (req->rsk_ops->family == AF_INET6) {
 		const struct ipv6hdr *ip6h = ipv6_hdr(syn);
-		struct tcp_fastopen_cookie tmp;
 
-		if (__tcp_fastopen_cookie_gen(sk, &ip6h->saddr, &tmp)) {
-			struct in6_addr *buf = &tmp.addr;
-			int i;
-
-			for (i = 0; i < 4; i++)
-				buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
-			return __tcp_fastopen_cookie_gen(sk, buf, foc);
-		}
+		foc->val[0] = cpu_to_le64(siphash(&ip6h->saddr,
+					  sizeof(ip6h->saddr) +
+					  sizeof(ip6h->daddr),
+					  key));
+		foc->len = TCP_FASTOPEN_COOKIE_SIZE;
+		return true;
 	}
 #endif
 	return false;
 }
 
+/* Generate the fastopen cookie by applying SipHash to both the source and
+ * destination addresses.
+ */
+static void tcp_fastopen_cookie_gen(struct sock *sk,
+				    struct request_sock *req,
+				    struct sk_buff *syn,
+				    struct tcp_fastopen_cookie *foc)
+{
+	struct tcp_fastopen_context *ctx;
+
+	rcu_read_lock();
+	ctx = tcp_fastopen_get_ctx(sk);
+	if (ctx)
+		__tcp_fastopen_cookie_gen_cipher(req, syn, &ctx->key[0], foc);
+	rcu_read_unlock();
+}
 
 /* If an incoming SYN or SYNACK frame contains a payload and/or FIN,
  * queue this additional data / FIN.
@@ -212,6 +200,35 @@
 		tcp_fin(sk);
 }
 
+/* returns 0 - no key match, 1 for primary, 2 for backup */
+static int tcp_fastopen_cookie_gen_check(struct sock *sk,
+					 struct request_sock *req,
+					 struct sk_buff *syn,
+					 struct tcp_fastopen_cookie *orig,
+					 struct tcp_fastopen_cookie *valid_foc)
+{
+	struct tcp_fastopen_cookie search_foc = { .len = -1 };
+	struct tcp_fastopen_cookie *foc = valid_foc;
+	struct tcp_fastopen_context *ctx;
+	int i, ret = 0;
+
+	rcu_read_lock();
+	ctx = tcp_fastopen_get_ctx(sk);
+	if (!ctx)
+		goto out;
+	for (i = 0; i < tcp_fastopen_context_len(ctx); i++) {
+		__tcp_fastopen_cookie_gen_cipher(req, syn, &ctx->key[i], foc);
+		if (tcp_fastopen_cookie_match(foc, orig)) {
+			ret = i + 1;
+			goto out;
+		}
+		foc = &search_foc;
+	}
+out:
+	rcu_read_unlock();
+	return ret;
+}
+
 static struct sock *tcp_fastopen_create_child(struct sock *sk,
 					      struct sk_buff *skb,
 					      struct request_sock *req)
@@ -221,10 +238,6 @@
 	struct sock *child;
 	bool own_req;
 
-	req->num_retrans = 0;
-	req->num_timeout = 0;
-	req->sk = NULL;
-
 	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
 							 NULL, &own_req);
 	if (!child)
@@ -240,7 +253,7 @@
 	 */
 	tp = tcp_sk(child);
 
-	tp->fastopen_rsk = req;
+	rcu_assign_pointer(tp->fastopen_rsk, req);
 	tcp_rsk(req)->tfo_listener = true;
 
 	/* RFC1323: The window in SYN & SYN/ACK segments is never
@@ -331,6 +344,7 @@
 	int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
 	struct tcp_fastopen_cookie valid_foc = { .len = -1 };
 	struct sock *child;
+	int ret = 0;
 
 	if (foc->len == 0) /* Client requests a cookie */
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
@@ -346,31 +360,44 @@
 	    tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
 		goto fastopen;
 
-	if (foc->len >= 0 &&  /* Client presents or requests a cookie */
-	    tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc) &&
-	    foc->len == TCP_FASTOPEN_COOKIE_SIZE &&
-	    foc->len == valid_foc.len &&
-	    !memcmp(foc->val, valid_foc.val, foc->len)) {
-		/* Cookie is valid. Create a (full) child socket to accept
-		 * the data in SYN before returning a SYN-ACK to ack the
-		 * data. If we fail to create the socket, fall back and
-		 * ack the ISN only but includes the same cookie.
-		 *
-		 * Note: Data-less SYN with valid cookie is allowed to send
-		 * data in SYN_RECV state.
-		 */
-fastopen:
-		child = tcp_fastopen_create_child(sk, skb, req);
-		if (child) {
-			foc->len = -1;
+	if (foc->len == 0) {
+		/* Client requests a cookie. */
+		tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc);
+	} else if (foc->len > 0) {
+		ret = tcp_fastopen_cookie_gen_check(sk, req, skb, foc,
+						    &valid_foc);
+		if (!ret) {
 			NET_INC_STATS(sock_net(sk),
-				      LINUX_MIB_TCPFASTOPENPASSIVE);
-			return child;
+				      LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
+		} else {
+			/* Cookie is valid. Create a (full) child socket to
+			 * accept the data in SYN before returning a SYN-ACK to
+			 * ack the data. If we fail to create the socket, fall
+			 * back and ack the ISN only but includes the same
+			 * cookie.
+			 *
+			 * Note: Data-less SYN with valid cookie is allowed to
+			 * send data in SYN_RECV state.
+			 */
+fastopen:
+			child = tcp_fastopen_create_child(sk, skb, req);
+			if (child) {
+				if (ret == 2) {
+					valid_foc.exp = foc->exp;
+					*foc = valid_foc;
+					NET_INC_STATS(sock_net(sk),
+						      LINUX_MIB_TCPFASTOPENPASSIVEALTKEY);
+				} else {
+					foc->len = -1;
+				}
+				NET_INC_STATS(sock_net(sk),
+					      LINUX_MIB_TCPFASTOPENPASSIVE);
+				return child;
+			}
+			NET_INC_STATS(sock_net(sk),
+				      LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
 		}
-		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
-	} else if (foc->len > 0) /* Client presents an invalid cookie */
-		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
-
+	}
 	valid_foc.exp = foc->exp;
 	*foc = valid_foc;
 	return NULL;

diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index d1c33c9..bfdfbb9 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Sally Floyd's High Speed TCP (RFC 3649) congestion control
  *

diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 082d479..88e1f01 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * H-TCP congestion control. The algorithm is detailed in:
  * R.N.Shorten, D.J.Leith:

diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 0f7175c..be39327 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * TCP HYBLA
  *

diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index faddf4f..00e5487 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * TCP Illinois congestion control.
  * Home page:

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 664fa7d..a2e52ad 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c

@@ -77,7 +77,7 @@
 #include <asm/unaligned.h>
 #include <linux/errqueue.h>
 #include <trace/events/tcp.h>
-#include <linux/static_key.h>
+#include <linux/jump_label_ratelimit.h>
 #include <net/busy_poll.h>
 
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
@@ -113,22 +113,28 @@
 #define REXMIT_NEW	2 /* FRTO-style transmit of unsent/new packets */
 
 #if IS_ENABLED(CONFIG_TLS_DEVICE)
-static DEFINE_STATIC_KEY_FALSE(clean_acked_data_enabled);
+static DEFINE_STATIC_KEY_DEFERRED_FALSE(clean_acked_data_enabled, HZ);
 
 void clean_acked_data_enable(struct inet_connection_sock *icsk,
 			     void (*cad)(struct sock *sk, u32 ack_seq))
 {
 	icsk->icsk_clean_acked = cad;
-	static_branch_inc(&clean_acked_data_enabled);
+	static_branch_deferred_inc(&clean_acked_data_enabled);
 }
 EXPORT_SYMBOL_GPL(clean_acked_data_enable);
 
 void clean_acked_data_disable(struct inet_connection_sock *icsk)
 {
-	static_branch_dec(&clean_acked_data_enabled);
+	static_branch_slow_dec_deferred(&clean_acked_data_enabled);
 	icsk->icsk_clean_acked = NULL;
 }
 EXPORT_SYMBOL_GPL(clean_acked_data_disable);
+
+void clean_acked_data_flush(void)
+{
+	static_key_deferred_flush(&clean_acked_data_enabled);
+}
+EXPORT_SYMBOL_GPL(clean_acked_data_flush);
 #endif
 
 static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
@@ -221,7 +227,7 @@
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
 	tcp_incr_quickack(sk, max_quickacks);
-	icsk->icsk_ack.pingpong = 0;
+	inet_csk_exit_pingpong_mode(sk);
 	icsk->icsk_ack.ato = TCP_ATO_MIN;
 }
 EXPORT_SYMBOL(tcp_enter_quickack_mode);
@@ -236,7 +242,7 @@
 	const struct dst_entry *dst = __sk_dst_get(sk);
 
 	return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
-		(icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong);
+		(icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk));
 }
 
 static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
@@ -260,7 +266,7 @@
 
 static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
 {
-	tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+	tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
 }
 
 static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
@@ -353,7 +359,8 @@
 	sndmem *= nr_segs * per_mss;
 
 	if (sk->sk_sndbuf < sndmem)
-		sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]);
+		WRITE_ONCE(sk->sk_sndbuf,
+			   min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]));
 }
 
 /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -402,11 +409,12 @@
 static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	int room;
+
+	room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh;
 
 	/* Check #1 */
-	if (tp->rcv_ssthresh < tp->window_clamp &&
-	    (int)tp->rcv_ssthresh < tcp_space(sk) &&
-	    !tcp_under_memory_pressure(sk)) {
+	if (room > 0 && !tcp_under_memory_pressure(sk)) {
 		int incr;
 
 		/* Check #2. Increase window, if skb with such overhead
@@ -419,33 +427,13 @@
 
 		if (incr) {
 			incr = max_t(int, incr, 2 * skb->len);
-			tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
-					       tp->window_clamp);
+			tp->rcv_ssthresh += min(room, incr);
 			inet_csk(sk)->icsk_ack.quick |= 1;
 		}
 	}
 }
 
-/* 3. Tuning rcvbuf, when connection enters established state. */
-static void tcp_fixup_rcvbuf(struct sock *sk)
-{
-	u32 mss = tcp_sk(sk)->advmss;
-	int rcvmem;
-
-	rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) *
-		 tcp_default_init_rwnd(mss);
-
-	/* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency
-	 * Allow enough cushion so that sender is not limited by our window
-	 */
-	if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)
-		rcvmem <<= 2;
-
-	if (sk->sk_rcvbuf < rcvmem)
-		sk->sk_rcvbuf = min(rcvmem, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
-}
-
-/* 4. Try to fixup all. It is made immediately after connection enters
+/* 3. Try to fixup all. It is made immediately after connection enters
  *    established state.
  */
 void tcp_init_buffer_space(struct sock *sk)
@@ -454,12 +442,10 @@
 	struct tcp_sock *tp = tcp_sk(sk);
 	int maxwin;
 
-	if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
-		tcp_fixup_rcvbuf(sk);
 	if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
 		tcp_sndbuf_expand(sk);
 
-	tp->rcvq_space.space = tp->rcv_wnd;
+	tp->rcvq_space.space = min_t(u32, tp->rcv_wnd, TCP_INIT_CWND * tp->advmss);
 	tcp_mstamp_refresh(tp);
 	tp->rcvq_space.time = tp->tcp_mstamp;
 	tp->rcvq_space.seq = tp->copied_seq;
@@ -485,7 +471,7 @@
 	tp->snd_cwnd_stamp = tcp_jiffies32;
 }
 
-/* 5. Recalculate window clamp after socket hit its memory bounds. */
+/* 4. Recalculate window clamp after socket hit its memory bounds. */
 static void tcp_clamp_window(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -498,8 +484,9 @@
 	    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
 	    !tcp_under_memory_pressure(sk) &&
 	    sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
-		sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
-				    net->ipv4.sysctl_tcp_rmem[2]);
+		WRITE_ONCE(sk->sk_rcvbuf,
+			   min(atomic_read(&sk->sk_rmem_alloc),
+			       net->ipv4.sysctl_tcp_rmem[2]));
 	}
 	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
 		tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
@@ -600,10 +587,12 @@
 		u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
 		u32 delta_us;
 
-		if (!delta)
-			delta = 1;
-		delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
-		tcp_rcv_rtt_update(tp, delta_us, 0);
+		if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+			if (!delta)
+				delta = 1;
+			delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+			tcp_rcv_rtt_update(tp, delta_us, 0);
+		}
 	}
 }
 
@@ -661,7 +650,7 @@
 		rcvbuf = min_t(u64, rcvwin * rcvmem,
 			       sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
 		if (rcvbuf > sk->sk_rcvbuf) {
-			sk->sk_rcvbuf = rcvbuf;
+			WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
 
 			/* Make the window clamp follow along.  */
 			tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
@@ -791,6 +780,8 @@
 				tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
 			tp->rtt_seq = tp->snd_nxt;
 			tp->mdev_max_us = tcp_rto_min_us(sk);
+
+			tcp_bpf_rtt(sk);
 		}
 	} else {
 		/* no previous measure. */
@@ -799,6 +790,8 @@
 		tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
 		tp->mdev_max_us = tp->rttvar_us;
 		tp->rtt_seq = tp->snd_nxt;
+
+		tcp_bpf_rtt(sk);
 	}
 	tp->srtt_us = max(1U, srtt);
 }
@@ -1305,7 +1298,7 @@
 	 */
 	tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
 			start_seq, end_seq, dup_sack, pcount,
-			skb->skb_mstamp);
+			tcp_skb_timestamp_us(skb));
 	tcp_rate_skb_delivered(sk, skb, state->rate);
 
 	if (skb == tp->lost_skb_hint)
@@ -1315,7 +1308,7 @@
 	TCP_SKB_CB(skb)->seq += shifted;
 
 	tcp_skb_pcount_add(prev, pcount);
-	BUG_ON(tcp_skb_pcount(skb) < pcount);
+	WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
 	tcp_skb_pcount_add(skb, -pcount);
 
 	/* When we're adding to gso_segs == 1, gso_size will be zero,
@@ -1381,6 +1374,21 @@
 	return !skb_headlen(skb) && skb_is_nonlinear(skb);
 }
 
+int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from,
+		  int pcount, int shiftlen)
+{
+	/* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE)
+	 * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need
+	 * to make sure not storing more than 65535 * 8 bytes per skb,
+	 * even if current MSS is bigger.
+	 */
+	if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE))
+		return 0;
+	if (unlikely(tcp_skb_pcount(to) + pcount > 65535))
+		return 0;
+	return skb_shift(to, from, shiftlen);
+}
+
 /* Try collapsing SACK blocks spanning across multiple skbs to a single
  * skb.
  */
@@ -1486,7 +1494,7 @@
 	if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
 		goto fallback;
 
-	if (!skb_shift(prev, skb, len))
+	if (!tcp_skb_shift(prev, skb, pcount, len))
 		goto fallback;
 	if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
 		goto out;
@@ -1504,11 +1512,10 @@
 		goto out;
 
 	len = skb->len;
-	if (skb_shift(prev, skb, len)) {
-		pcount += tcp_skb_pcount(skb);
-		tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb),
+	pcount = tcp_skb_pcount(skb);
+	if (tcp_skb_shift(prev, skb, pcount, len))
+		tcp_shifted_skb(sk, prev, skb, state, pcount,
 				len, mss, 0);
-	}
 
 out:
 	return prev;
@@ -1580,7 +1587,7 @@
 						TCP_SKB_CB(skb)->end_seq,
 						dup_sack,
 						tcp_skb_pcount(skb),
-						skb->skb_mstamp);
+						tcp_skb_timestamp_us(skb));
 			tcp_rate_skb_delivered(sk, skb, state->rate);
 			if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
 				list_del_init(&skb->tcp_tsorted_anchor);
@@ -1593,9 +1600,7 @@
 	return skb;
 }
 
-static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk,
-					   struct tcp_sacktag_state *state,
-					   u32 seq)
+static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk, u32 seq)
 {
 	struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
 	struct sk_buff *skb;
@@ -1617,13 +1622,12 @@
 }
 
 static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
-					struct tcp_sacktag_state *state,
 					u32 skip_to_seq)
 {
 	if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
 		return skb;
 
-	return tcp_sacktag_bsearch(sk, state, skip_to_seq);
+	return tcp_sacktag_bsearch(sk, skip_to_seq);
 }
 
 static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
@@ -1636,7 +1640,7 @@
 		return skb;
 
 	if (before(next_dup->start_seq, skip_to_seq)) {
-		skb = tcp_sacktag_skip(skb, sk, state, next_dup->start_seq);
+		skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq);
 		skb = tcp_sacktag_walk(skb, sk, NULL, state,
 				       next_dup->start_seq, next_dup->end_seq,
 				       1);
@@ -1777,8 +1781,7 @@
 
 			/* Head todo? */
 			if (before(start_seq, cache->start_seq)) {
-				skb = tcp_sacktag_skip(skb, sk, state,
-						       start_seq);
+				skb = tcp_sacktag_skip(skb, sk, start_seq);
 				skb = tcp_sacktag_walk(skb, sk, next_dup,
 						       state,
 						       start_seq,
@@ -1804,7 +1807,7 @@
 				goto walk;
 			}
 
-			skb = tcp_sacktag_skip(skb, sk, state, cache->end_seq);
+			skb = tcp_sacktag_skip(skb, sk, cache->end_seq);
 			/* Check overlap against next cached too (past this one already) */
 			cache++;
 			continue;
@@ -1815,7 +1818,7 @@
 			if (!skb)
 				break;
 		}
-		skb = tcp_sacktag_skip(skb, sk, state, start_seq);
+		skb = tcp_sacktag_skip(skb, sk, start_seq);
 
 walk:
 		skb = tcp_sacktag_walk(skb, sk, next_dup, state,
@@ -1884,16 +1887,20 @@
 
 /* Emulate SACKs for SACKless connection: account for a new dupack. */
 
-static void tcp_add_reno_sack(struct sock *sk)
+static void tcp_add_reno_sack(struct sock *sk, int num_dupack)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
-	u32 prior_sacked = tp->sacked_out;
+	if (num_dupack) {
+		struct tcp_sock *tp = tcp_sk(sk);
+		u32 prior_sacked = tp->sacked_out;
+		s32 delivered;
 
-	tp->sacked_out++;
-	tcp_check_reno_reordering(sk, 0);
-	if (tp->sacked_out > prior_sacked)
-		tp->delivered++; /* Some out-of-order packet is delivered */
-	tcp_verify_left_out(tp);
+		tp->sacked_out += num_dupack;
+		tcp_check_reno_reordering(sk, 0);
+		delivered = tp->sacked_out - prior_sacked;
+		if (delivered > 0)
+			tp->delivered += delivered;
+		tcp_verify_left_out(tp);
+	}
 }
 
 /* Account for ACK, ACKing some data in Reno Recovery phase. */
@@ -2271,7 +2278,7 @@
  */
 static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
 {
-	return !tp->retrans_stamp ||
+	return tp->retrans_stamp &&
 	       tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
 }
 
@@ -2478,8 +2485,8 @@
 		u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
 			       tp->prior_cwnd - 1;
 		sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
-	} else if ((flag & FLAG_RETRANS_DATA_ACKED) &&
-		   !(flag & FLAG_LOST_RETRANS)) {
+	} else if ((flag & (FLAG_RETRANS_DATA_ACKED | FLAG_LOST_RETRANS)) ==
+		   FLAG_RETRANS_DATA_ACKED) {
 		sndcnt = min_t(int, delta,
 			       max_t(int, tp->prr_delivered - tp->prr_out,
 				     newly_acked_sacked) + 1);
@@ -2655,13 +2662,13 @@
 /* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
  * recovered or spurious. Otherwise retransmits more on partial ACKs.
  */
-static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
+static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
 			     int *rexmit)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	bool recovered = !before(tp->snd_una, tp->high_seq);
 
-	if ((flag & FLAG_SND_UNA_ADVANCED) &&
+	if ((flag & FLAG_SND_UNA_ADVANCED || rcu_access_pointer(tp->fastopen_rsk)) &&
 	    tcp_try_undo_loss(sk, false))
 		return;
 
@@ -2674,7 +2681,7 @@
 			return;
 
 		if (after(tp->snd_nxt, tp->high_seq)) {
-			if (flag & FLAG_DATA_SACKED || is_dupack)
+			if (flag & FLAG_DATA_SACKED || num_dupack)
 				tp->frto = 0; /* Step 3.a. loss was real */
 		} else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
 			tp->high_seq = tp->snd_nxt;
@@ -2700,8 +2707,8 @@
 		/* A Reno DUPACK means new data in F-RTO step 2.b above are
 		 * delivered. Lower inflight to clock out (re)tranmissions.
 		 */
-		if (after(tp->snd_nxt, tp->high_seq) && is_dupack)
-			tcp_add_reno_sack(sk);
+		if (after(tp->snd_nxt, tp->high_seq) && num_dupack)
+			tcp_add_reno_sack(sk, num_dupack);
 		else if (flag & FLAG_SND_UNA_ADVANCED)
 			tcp_reset_reno_sack(tp);
 	}
@@ -2778,13 +2785,13 @@
  * tcp_xmit_retransmit_queue().
  */
 static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
-				  bool is_dupack, int *ack_flag, int *rexmit)
+				  int num_dupack, int *ack_flag, int *rexmit)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	int fast_rexmit = 0, flag = *ack_flag;
-	bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
-				     tcp_force_fast_retransmit(sk));
+	bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) &&
+				      tcp_force_fast_retransmit(sk));
 
 	if (!tp->packets_out && tp->sacked_out)
 		tp->sacked_out = 0;
@@ -2831,8 +2838,8 @@
 	switch (icsk->icsk_ca_state) {
 	case TCP_CA_Recovery:
 		if (!(flag & FLAG_SND_UNA_ADVANCED)) {
-			if (tcp_is_reno(tp) && is_dupack)
-				tcp_add_reno_sack(sk);
+			if (tcp_is_reno(tp))
+				tcp_add_reno_sack(sk, num_dupack);
 		} else {
 			if (tcp_try_undo_partial(sk, prior_snd_una))
 				return;
@@ -2847,7 +2854,7 @@
 		tcp_identify_packet_loss(sk, ack_flag);
 		break;
 	case TCP_CA_Loss:
-		tcp_process_loss(sk, flag, is_dupack, rexmit);
+		tcp_process_loss(sk, flag, num_dupack, rexmit);
 		tcp_identify_packet_loss(sk, ack_flag);
 		if (!(icsk->icsk_ca_state == TCP_CA_Open ||
 		      (*ack_flag & FLAG_LOST_RETRANS)))
@@ -2858,8 +2865,7 @@
 		if (tcp_is_reno(tp)) {
 			if (flag & FLAG_SND_UNA_ADVANCED)
 				tcp_reset_reno_sack(tp);
-			if (is_dupack)
-				tcp_add_reno_sack(sk);
+			tcp_add_reno_sack(sk, num_dupack);
 		}
 
 		if (icsk->icsk_ca_state <= TCP_CA_Disorder)
@@ -2931,9 +2937,11 @@
 	if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
 	    flag & FLAG_ACKED) {
 		u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
-		u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
 
-		seq_rtt_us = ca_rtt_us = delta_us;
+		if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+			seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+			ca_rtt_us = seq_rtt_us;
+		}
 	}
 	rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
 	if (seq_rtt_us < 0)
@@ -2984,7 +2992,7 @@
 	/* If the retrans timer is currently being used by Fast Open
 	 * for SYN-ACK retrans purpose, stay put.
 	 */
-	if (tp->fastopen_rsk)
+	if (rcu_access_pointer(tp->fastopen_rsk))
 		return;
 
 	if (!tp->packets_out) {
@@ -3000,8 +3008,8 @@
 			 */
 			rto = usecs_to_jiffies(max_t(int, delta_us, 1));
 		}
-		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
-					  TCP_RTO_MAX);
+		tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
+				     TCP_RTO_MAX, tcp_rtx_queue_head(sk));
 	}
 }
 
@@ -3103,7 +3111,7 @@
 				tp->retrans_out -= acked_pcount;
 			flag |= FLAG_RETRANS_DATA_ACKED;
 		} else if (!(sacked & TCPCB_SACKED_ACKED)) {
-			last_ackt = skb->skb_mstamp;
+			last_ackt = tcp_skb_timestamp_us(skb);
 			WARN_ON_ONCE(last_ackt == 0);
 			if (!first_ackt)
 				first_ackt = last_ackt;
@@ -3121,7 +3129,7 @@
 			tp->delivered += acked_pcount;
 			if (!tcp_skb_spurious_retrans(tp, skb))
 				tcp_rack_advance(tp, sacked, scb->end_seq,
-						 skb->skb_mstamp);
+						 tcp_skb_timestamp_us(skb));
 		}
 		if (sacked & TCPCB_LOST)
 			tp->lost_out -= acked_pcount;
@@ -3215,7 +3223,8 @@
 			tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
 		}
 	} else if (skb && rtt_update && sack_rtt_us >= 0 &&
-		   sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) {
+		   sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp,
+						    tcp_skb_timestamp_us(skb))) {
 		/* Do not re-arm RTO if the sack RTT is measured from data sent
 		 * after when the head was last (re)transmitted. Otherwise the
 		 * timeout may continue to extend in loss recovery.
@@ -3275,8 +3284,8 @@
 	} else {
 		unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX);
 
-		inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
-					  when, TCP_RTO_MAX);
+		tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
+				     when, TCP_RTO_MAX, NULL);
 	}
 }
 
@@ -3355,7 +3364,7 @@
 
 	sock_owned_by_me((struct sock *)tp);
 	tp->bytes_received += delta;
-	tp->rcv_nxt = seq;
+	WRITE_ONCE(tp->rcv_nxt, seq);
 }
 
 /* Update our send window.
@@ -3538,7 +3547,7 @@
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (rexmit == REXMIT_NONE)
+	if (rexmit == REXMIT_NONE || sk->sk_state == TCP_SYN_SENT)
 		return;
 
 	if (unlikely(rexmit == 2)) {
@@ -3578,7 +3587,7 @@
 	bool is_sack_reneg = tp->is_sack_reneg;
 	u32 ack_seq = TCP_SKB_CB(skb)->seq;
 	u32 ack = TCP_SKB_CB(skb)->ack_seq;
-	bool is_dupack = false;
+	int num_dupack = 0;
 	int prior_packets = tp->packets_out;
 	u32 delivered = tp->delivered;
 	u32 lost = tp->lost;
@@ -3608,14 +3617,14 @@
 	 * this segment (RFC793 Section 3.9).
 	 */
 	if (after(ack, tp->snd_nxt))
-		goto invalid_ack;
+		return -1;
 
 	if (after(ack, prior_snd_una)) {
 		flag |= FLAG_SND_UNA_ADVANCED;
 		icsk->icsk_retransmits = 0;
 
 #if IS_ENABLED(CONFIG_TLS_DEVICE)
-		if (static_branch_unlikely(&clean_acked_data_enabled))
+		if (static_branch_unlikely(&clean_acked_data_enabled.key))
 			if (icsk->icsk_clean_acked)
 				icsk->icsk_clean_acked(sk, ack);
 #endif
@@ -3630,7 +3639,8 @@
 	if (flag & FLAG_UPDATE_TS_RECENT)
 		tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
 
-	if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
+	if ((flag & (FLAG_SLOWPATH | FLAG_SND_UNA_ADVANCED)) ==
+	    FLAG_SND_UNA_ADVANCED) {
 		/* Window is constant, pure forward advance.
 		 * No more checks are required.
 		 * Note, we use the fact that SND.UNA>=SND.WL2.
@@ -3688,8 +3698,13 @@
 		tcp_set_xmit_timer(sk);
 
 	if (tcp_ack_is_dubious(sk, flag)) {
-		is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
-		tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+		if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
+			num_dupack = 1;
+			/* Consider if pure acks were aggregated in tcp_add_backlog() */
+			if (!(flag & FLAG_DATA))
+				num_dupack = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+		}
+		tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
 				      &rexmit);
 	}
 
@@ -3707,7 +3722,7 @@
 no_queue:
 	/* If data was DSACKed, see if we can undo a cwnd reduction. */
 	if (flag & FLAG_DSACKING_ACK) {
-		tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+		tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
 				      &rexmit);
 		tcp_newly_delivered(sk, delivered, flag);
 	}
@@ -3721,10 +3736,6 @@
 		tcp_process_tlp_ack(sk, ack, flag);
 	return 1;
 
-invalid_ack:
-	SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
-	return -1;
-
 old_ack:
 	/* If data was SACKed, tag it and see if we should send more data.
 	 * If data was DSACKed, see if we can undo a cwnd reduction.
@@ -3732,13 +3743,12 @@
 	if (TCP_SKB_CB(skb)->sacked) {
 		flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
 						&sack_state);
-		tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+		tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
 				      &rexmit);
 		tcp_newly_delivered(sk, delivered, flag);
 		tcp_xmit_recovery(sk, rexmit);
 	}
 
-	SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
 	return 0;
 }
 
@@ -3774,6 +3784,49 @@
 #endif
 }
 
+/* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped
+ * value on success.
+ */
+static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
+{
+	const unsigned char *ptr = (const unsigned char *)(th + 1);
+	int length = (th->doff * 4) - sizeof(struct tcphdr);
+	u16 mss = 0;
+
+	while (length > 0) {
+		int opcode = *ptr++;
+		int opsize;
+
+		switch (opcode) {
+		case TCPOPT_EOL:
+			return mss;
+		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
+			length--;
+			continue;
+		default:
+			if (length < 2)
+				return mss;
+			opsize = *ptr++;
+			if (opsize < 2) /* "silly options" */
+				return mss;
+			if (opsize > length)
+				return mss;	/* fail on partial options */
+			if (opcode == TCPOPT_MSS && opsize == TCPOLEN_MSS) {
+				u16 in_mss = get_unaligned_be16(ptr);
+
+				if (in_mss) {
+					if (user_mss && user_mss < in_mss)
+						in_mss = user_mss;
+					mss = in_mss;
+				}
+			}
+			ptr += opsize - 2;
+			length -= opsize;
+		}
+	}
+	return mss;
+}
+
 /* Look for tcp options. Normally only called on SYN and SYNACK packets.
  * But, this can also be called on packets in the established flow when
  * the fast version below fails.
@@ -3801,6 +3854,8 @@
 			length--;
 			continue;
 		default:
+			if (length < 2)
+				return;
 			opsize = *ptr++;
 			if (opsize < 2) /* "silly options" */
 				return;
@@ -4101,7 +4156,7 @@
 	case TCP_ESTABLISHED:
 		/* Move to CLOSE_WAIT */
 		tcp_set_state(sk, TCP_CLOSE_WAIT);
-		inet_csk(sk)->icsk_ack.pingpong = 1;
+		inet_csk_enter_pingpong_mode(sk);
 		break;
 
 	case TCP_CLOSE_WAIT:
@@ -4199,6 +4254,17 @@
 		tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
 }
 
+static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
+{
+	/* When the ACK path fails or drops most ACKs, the sender would
+	 * timeout and spuriously retransmit the same segment repeatedly.
+	 * The receiver remembers and reflects via DSACKs. Leverage the
+	 * DSACK state and change the txhash to re-route speculatively.
+	 */
+	if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq)
+		sk_rethink_txhash(sk);
+}
+
 static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -4211,6 +4277,7 @@
 		if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
 			u32 end_seq = TCP_SKB_CB(skb)->end_seq;
 
+			tcp_rcv_spurious_retrans(sk, skb);
 			if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
 				end_seq = tp->rcv_nxt;
 			tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
@@ -4427,13 +4494,9 @@
 		rb_erase(&skb->rbnode, &tp->out_of_order_queue);
 
 		if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
-			SOCK_DEBUG(sk, "ofo packet was already received\n");
 			tcp_drop(sk, skb);
 			continue;
 		}
-		SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
-			   tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
-			   TCP_SKB_CB(skb)->end_seq);
 
 		tail = skb_peek_tail(&sk->sk_receive_queue);
 		eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
@@ -4494,11 +4557,10 @@
 	tp->pred_flags = 0;
 	inet_csk_schedule_ack(sk);
 
+	tp->rcv_ooopack += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
 	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
 	seq = TCP_SKB_CB(skb)->seq;
 	end_seq = TCP_SKB_CB(skb)->end_seq;
-	SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
-		   tp->rcv_nxt, seq, end_seq);
 
 	p = &tp->out_of_order_queue.rb_node;
 	if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
@@ -4610,13 +4672,12 @@
 	}
 }
 
-static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
-		  bool *fragstolen)
+static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
+				      bool *fragstolen)
 {
 	int eaten;
 	struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
 
-	__skb_pull(skb, hdrlen);
 	eaten = (tail &&
 		 tcp_try_coalesce(sk, tail,
 				  skb, fragstolen)) ? 1 : 0;
@@ -4667,7 +4728,7 @@
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
 	TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
 
-	if (tcp_queue_rcv(sk, skb, 0, &fragstolen)) {
+	if (tcp_queue_rcv(sk, skb, &fragstolen)) {
 		WARN_ON_ONCE(fragstolen); /* should not happen */
 		__kfree_skb(skb);
 	}
@@ -4727,7 +4788,7 @@
 			goto drop;
 		}
 
-		eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
+		eaten = tcp_queue_rcv(sk, skb, &fragstolen);
 		if (skb->len)
 			tcp_event_data_recv(sk, skb);
 		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -4756,6 +4817,7 @@
 	}
 
 	if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
+		tcp_rcv_spurious_retrans(sk, skb);
 		/* A retransmit, 2nd most common case.  Force an immediate ack. */
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
 		tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
@@ -4774,10 +4836,6 @@
 
 	if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
 		/* Partial packet, seq < rcv_next < end_seq */
-		SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
-			   tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
-			   TCP_SKB_CB(skb)->end_seq);
-
 		tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
 
 		/* If window is closed, drop tail of packet. But after
@@ -5056,8 +5114,6 @@
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
-
 	NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
 
 	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
@@ -5302,7 +5358,7 @@
 	}
 
 	tp->urg_data = TCP_URG_NOTYET;
-	tp->urg_seq = ptr;
+	WRITE_ONCE(tp->urg_seq, ptr);
 
 	/* Disable header prediction. */
 	tp->pred_flags = 0;
@@ -5602,8 +5658,8 @@
 			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
 
 			/* Bulk data transfer: receiver */
-			eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
-					      &fragstolen);
+			__skb_pull(skb, tcp_header_len);
+			eaten = tcp_queue_rcv(sk, skb, &fragstolen);
 
 			tcp_event_data_recv(sk, skb);
 
@@ -5663,6 +5719,32 @@
 }
 EXPORT_SYMBOL(tcp_rcv_established);
 
+void tcp_init_transfer(struct sock *sk, int bpf_op)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	tcp_mtup_init(sk);
+	icsk->icsk_af_ops->rebuild_header(sk);
+	tcp_init_metrics(sk);
+
+	/* Initialize the congestion window to start the transfer.
+	 * Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
+	 * retransmitted. In light of RFC6298 more aggressive 1sec
+	 * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK
+	 * retransmission has occurred.
+	 */
+	if (tp->total_retrans > 1 && tp->undo_marker)
+		tp->snd_cwnd = 1;
+	else
+		tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
+	tp->snd_cwnd_stamp = tcp_jiffies32;
+
+	tcp_call_bpf(sk, bpf_op, 0, NULL);
+	tcp_init_congestion_control(sk);
+	tcp_init_buffer_space(sk);
+}
+
 void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -5764,6 +5846,21 @@
 #endif
 }
 
+static void tcp_try_undo_spurious_syn(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 syn_stamp;
+
+	/* undo_marker is set when SYN or SYNACK times out. The timeout is
+	 * spurious if the ACK's timestamp option echo value matches the
+	 * original SYN timestamp.
+	 */
+	syn_stamp = tp->retrans_stamp;
+	if (tp->undo_marker && syn_stamp && tp->rx_opt.saw_tstamp &&
+	    syn_stamp == tp->rx_opt.rcv_tsecr)
+		tp->undo_marker = 0;
+}
+
 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 					 const struct tcphdr *th)
 {
@@ -5831,12 +5928,13 @@
 		tcp_ecn_rcv_synack(tp, th);
 
 		tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
+		tcp_try_undo_spurious_syn(sk);
 		tcp_ack(sk, skb, FLAG_SLOWPATH);
 
 		/* Ok.. it's good. Set up sequence numbers and
 		 * move to established.
 		 */
-		tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
+		WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
 		tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
 
 		/* RFC1323: The window in SYN & SYN/ACK segments is
@@ -5865,7 +5963,7 @@
 		/* Remember, tcp_poll() does not lock socket!
 		 * Change state from SYN-SENT only after copied_seq
 		 * is initialized. */
-		tp->copied_seq = tp->rcv_nxt;
+		WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
 
 		smc_check_reset_syn(tp);
 
@@ -5884,7 +5982,7 @@
 			return -1;
 		if (sk->sk_write_pending ||
 		    icsk->icsk_accept_queue.rskq_defer_accept ||
-		    icsk->icsk_ack.pingpong) {
+		    inet_csk_in_pingpong_mode(sk)) {
 			/* Save one ACK. Data will be ready after
 			 * several ticks, if write_pending is set.
 			 *
@@ -5939,8 +6037,8 @@
 			tp->tcp_header_len = sizeof(struct tcphdr);
 		}
 
-		tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
-		tp->copied_seq = tp->rcv_nxt;
+		WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
+		WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
 		tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
 
 		/* RFC1323: The window in SYN & SYN/ACK segments is
@@ -5989,6 +6087,34 @@
 	return 1;
 }
 
+static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
+{
+	struct request_sock *req;
+
+	tcp_try_undo_loss(sk, false);
+
+	/* Reset rtx states to prevent spurious retransmits_timed_out() */
+	tcp_sk(sk)->retrans_stamp = 0;
+	inet_csk(sk)->icsk_retransmits = 0;
+
+	/* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1,
+	 * we no longer need req so release it.
+	 */
+	req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk,
+					lockdep_sock_is_held(sk));
+	reqsk_fastopen_remove(sk, req, false);
+
+	/* Re-arm the timer because data may have been sent out.
+	 * This is similar to the regular data transmission case
+	 * when new data has just been ack'ed.
+	 *
+	 * (TFO) - we could try to be more aggressive and
+	 * retransmitting any data sooner based on when they
+	 * are sent out.
+	 */
+	tcp_rearm_rto(sk);
+}
+
 /*
  *	This function implements the receiving procedure of RFC 793 for
  *	all states except ESTABLISHED and TIME_WAIT.
@@ -6051,7 +6177,8 @@
 
 	tcp_mstamp_refresh(tp);
 	tp->rx_opt.saw_tstamp = 0;
-	req = tp->fastopen_rsk;
+	req = rcu_dereference_protected(tp->fastopen_rsk,
+					lockdep_sock_is_held(sk));
 	if (req) {
 		bool req_stolen;
 
@@ -6085,24 +6212,13 @@
 		if (!tp->srtt_us)
 			tcp_synack_rtt_meas(sk, req);
 
-		/* Once we leave TCP_SYN_RECV, we no longer need req
-		 * so release it.
-		 */
 		if (req) {
-			inet_csk(sk)->icsk_retransmits = 0;
-			reqsk_fastopen_remove(sk, req, false);
-			/* Re-arm the timer because data may have been sent out.
-			 * This is similar to the regular data transmission case
-			 * when new data has just been ack'ed.
-			 *
-			 * (TFO) - we could try to be more aggressive and
-			 * retransmitting any data sooner based on when they
-			 * are sent out.
-			 */
-			tcp_rearm_rto(sk);
+			tcp_rcv_synrecv_state_fastopen(sk);
 		} else {
+			tcp_try_undo_spurious_syn(sk);
+			tp->retrans_stamp = 0;
 			tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
-			tp->copied_seq = tp->rcv_nxt;
+			WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
 		}
 		smp_mb();
 		tcp_set_state(sk, TCP_ESTABLISHED);
@@ -6135,16 +6251,9 @@
 	case TCP_FIN_WAIT1: {
 		int tmo;
 
-		/* If we enter the TCP_FIN_WAIT1 state and we are a
-		 * Fast Open socket and this is the first acceptable
-		 * ACK we have received, this would have acknowledged
-		 * our SYNACK so stop the SYNACK timer.
-		 */
-		if (req) {
-			/* We no longer need the request sock. */
-			reqsk_fastopen_remove(sk, req, false);
-			tcp_rearm_rto(sk);
-		}
+		if (req)
+			tcp_rcv_synrecv_state_fastopen(sk);
+
 		if (tp->snd_una != tp->write_seq)
 			break;
 
@@ -6279,6 +6388,11 @@
  * congestion control: Linux DCTCP asserts ECT on all packets,
  * including SYN, which is most optimal solution; however,
  * others, such as FreeBSD do not.
+ *
+ * Exception: At least one of the reserved bits of the TCP header (th->res1) is
+ * set, indicating the use of a future TCP extension (such as AccECN). See
+ * RFC8311 §4.3 which updates RFC3168 to allow the development of such
+ * extensions.
  */
 static void tcp_ecn_create_request(struct request_sock *req,
 				   const struct sk_buff *skb,
@@ -6298,7 +6412,7 @@
 	ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
 	ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
 
-	if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
+	if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
 	    (ecn_ok_dst & DST_FEATURE_ECN_CA) ||
 	    tcp_bpf_ca_needs_ecn((struct sock *)req))
 		inet_rsk(req)->ecn_ok = 1;
@@ -6314,7 +6428,7 @@
 	req->cookie_ts = 0;
 	tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
 	tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
-	tcp_rsk(req)->snt_synack = tcp_clock_us();
+	tcp_rsk(req)->snt_synack = 0;
 	tcp_rsk(req)->last_oow_ack_time = 0;
 	req->mss = rx_opt->mss_clamp;
 	req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
@@ -6359,9 +6473,7 @@
 /*
  * Return true if a syncookie should be sent
  */
-static bool tcp_syn_flood_action(const struct sock *sk,
-				 const struct sk_buff *skb,
-				 const char *proto)
+static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
 {
 	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
 	const char *msg = "Dropping request";
@@ -6381,7 +6493,7 @@
 	    net->ipv4.sysctl_tcp_syncookies != 2 &&
 	    xchg(&queue->synflood_warned, 1) == 0)
 		net_info_ratelimited("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
-				     proto, ntohs(tcp_hdr(skb)->dest), msg);
+				     proto, sk->sk_num, msg);
 
 	return want_cookie;
 }
@@ -6403,6 +6515,36 @@
 	}
 }
 
+/* If a SYN cookie is required and supported, returns a clamped MSS value to be
+ * used for SYN cookie generation.
+ */
+u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
+			  const struct tcp_request_sock_ops *af_ops,
+			  struct sock *sk, struct tcphdr *th)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	u16 mss;
+
+	if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 &&
+	    !inet_csk_reqsk_queue_is_full(sk))
+		return 0;
+
+	if (!tcp_syn_flood_action(sk, rsk_ops->slab_name))
+		return 0;
+
+	if (sk_acceptq_is_full(sk)) {
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+		return 0;
+	}
+
+	mss = tcp_parse_mss_option(th, tp->rx_opt.user_mss);
+	if (!mss)
+		mss = af_ops->mss_clamp;
+
+	return mss;
+}
+EXPORT_SYMBOL_GPL(tcp_get_syncookie_mss);
+
 int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		     const struct tcp_request_sock_ops *af_ops,
 		     struct sock *sk, struct sk_buff *skb)
@@ -6424,7 +6566,7 @@
 	 */
 	if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
 	     inet_csk_reqsk_queue_is_full(sk)) && !isn) {
-		want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
+		want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
 		if (!want_cookie)
 			goto drop;
 	}
@@ -6514,7 +6656,12 @@
 		af_ops->send_synack(fastopen_sk, dst, &fl, req,
 				    &foc, TCP_SYNACK_FASTOPEN);
 		/* Add the child socket directly into the accept queue */
-		inet_csk_reqsk_queue_add(sk, req, fastopen_sk);
+		if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) {
+			reqsk_fastopen_remove(fastopen_sk, req, false);
+			bh_unlock_sock(fastopen_sk);
+			sock_put(fastopen_sk);
+			goto drop_and_free;
+		}
 		sk->sk_data_ready(sk);
 		bh_unlock_sock(fastopen_sk);
 		sock_put(fastopen_sk);
@@ -6537,7 +6684,7 @@
 drop_and_release:
 	dst_release(dst);
 drop_and_free:
-	reqsk_free(req);
+	__reqsk_free(req);
 drop:
 	tcp_listendrop(sk);
 	return 0;

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index cd42631..67b2dc7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -7,18 +8,12 @@
  *
  *		IPv4 specific functions
  *
- *
  *		code split from:
  *		linux/ipv4/tcp.c
  *		linux/ipv4/tcp_input.c
  *		linux/ipv4/tcp_output.c
  *
  *		See tcp.c for author information
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 /*
@@ -169,9 +164,11 @@
 		 * without appearing to create any others.
 		 */
 		if (likely(!tp->repair)) {
-			tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
-			if (tp->write_seq == 0)
-				tp->write_seq = 1;
+			u32 seq = tcptw->tw_snd_nxt + 65535 + 2;
+
+			if (!seq)
+				seq = 1;
+			WRITE_ONCE(tp->write_seq, seq);
 			tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
 			tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
 		}
@@ -258,7 +255,7 @@
 		tp->rx_opt.ts_recent	   = 0;
 		tp->rx_opt.ts_recent_stamp = 0;
 		if (likely(!tp->repair))
-			tp->write_seq	   = 0;
+			WRITE_ONCE(tp->write_seq, 0);
 	}
 
 	inet->inet_dport = usin->sin_port;
@@ -296,16 +293,17 @@
 
 	if (likely(!tp->repair)) {
 		if (!tp->write_seq)
-			tp->write_seq = secure_tcp_seq(inet->inet_saddr,
-						       inet->inet_daddr,
-						       inet->inet_sport,
-						       usin->sin_port);
+			WRITE_ONCE(tp->write_seq,
+				   secure_tcp_seq(inet->inet_saddr,
+						  inet->inet_daddr,
+						  inet->inet_sport,
+						  usin->sin_port));
 		tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
 						 inet->inet_saddr,
 						 inet->inet_daddr);
 	}
 
-	inet->inet_id = tp->write_seq ^ jiffies;
+	inet->inet_id = prandom_u32();
 
 	if (tcp_fastopen_defer_connect(sk, &err))
 		return err;
@@ -423,7 +421,7 @@
  *
  */
 
-void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
+int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 {
 	const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
@@ -446,20 +444,21 @@
 				       inet_iif(icmp_skb), 0);
 	if (!sk) {
 		__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
-		return;
+		return -ENOENT;
 	}
 	if (sk->sk_state == TCP_TIME_WAIT) {
 		inet_twsk_put(inet_twsk(sk));
-		return;
+		return 0;
 	}
 	seq = ntohl(th->seq);
-	if (sk->sk_state == TCP_NEW_SYN_RECV)
-		return tcp_req_err(sk, seq,
-				  type == ICMP_PARAMETERPROB ||
-				  type == ICMP_TIME_EXCEEDED ||
-				  (type == ICMP_DEST_UNREACH &&
-				   (code == ICMP_NET_UNREACH ||
-				    code == ICMP_HOST_UNREACH)));
+	if (sk->sk_state == TCP_NEW_SYN_RECV) {
+		tcp_req_err(sk, seq, type == ICMP_PARAMETERPROB ||
+				     type == ICMP_TIME_EXCEEDED ||
+				     (type == ICMP_DEST_UNREACH &&
+				      (code == ICMP_NET_UNREACH ||
+				       code == ICMP_HOST_UNREACH)));
+		return 0;
+	}
 
 	bh_lock_sock(sk);
 	/* If too many ICMPs get dropped on busy
@@ -482,7 +481,7 @@
 	icsk = inet_csk(sk);
 	tp = tcp_sk(sk);
 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
-	fastopen = tp->fastopen_rsk;
+	fastopen = rcu_dereference(tp->fastopen_rsk);
 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
 	if (sk->sk_state != TCP_LISTEN &&
 	    !between(seq, snd_una, tp->snd_nxt)) {
@@ -535,16 +534,18 @@
 		if (sock_owned_by_user(sk))
 			break;
 
+		skb = tcp_rtx_queue_head(sk);
+		if (WARN_ON_ONCE(!skb))
+			break;
+
 		icsk->icsk_backoff--;
 		icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
 					       TCP_TIMEOUT_INIT;
 		icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
 
-		skb = tcp_rtx_queue_head(sk);
-		BUG_ON(!skb);
 
 		tcp_mstamp_refresh(tp);
-		delta_us = (u32)(tp->tcp_mstamp - skb->skb_mstamp);
+		delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb));
 		remaining = icsk->icsk_rto -
 			    usecs_to_jiffies(delta_us);
 
@@ -613,6 +614,7 @@
 out:
 	bh_unlock_sock(sk);
 	sock_put(sk);
+	return 0;
 }
 
 void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
@@ -663,8 +665,9 @@
 	int genhash;
 	struct sock *sk1 = NULL;
 #endif
-	struct net *net;
+	u64 transmit_time = 0;
 	struct sock *ctl_sk;
+	struct net *net;
 
 	/* Never send a reset in response to a reset. */
 	if (th->rst)
@@ -767,14 +770,19 @@
 	arg.tos = ip_hdr(skb)->tos;
 	arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
 	local_bh_disable();
-	ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
-	if (sk)
+	ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
+	if (sk) {
 		ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
 				   inet_twsk(sk)->tw_mark : sk->sk_mark;
+		ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
+				   inet_twsk(sk)->tw_priority : sk->sk_priority;
+		transmit_time = tcp_transmit_time(sk);
+	}
 	ip_send_unicast_reply(ctl_sk,
 			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
 			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
-			      &arg, arg.iov[0].iov_len);
+			      &arg, arg.iov[0].iov_len,
+			      transmit_time);
 
 	ctl_sk->sk_mark = 0;
 	__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
@@ -809,6 +817,7 @@
 	struct net *net = sock_net(sk);
 	struct ip_reply_arg arg;
 	struct sock *ctl_sk;
+	u64 transmit_time;
 
 	memset(&rep.th, 0, sizeof(struct tcphdr));
 	memset(&arg, 0, sizeof(arg));
@@ -859,14 +868,17 @@
 	arg.tos = tos;
 	arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
 	local_bh_disable();
-	ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
-	if (sk)
-		ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
-				   inet_twsk(sk)->tw_mark : sk->sk_mark;
+	ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
+	ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
+			   inet_twsk(sk)->tw_mark : sk->sk_mark;
+	ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
+			   inet_twsk(sk)->tw_priority : sk->sk_priority;
+	transmit_time = tcp_transmit_time(sk);
 	ip_send_unicast_reply(ctl_sk,
 			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
 			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
-			      &arg, arg.iov[0].iov_len);
+			      &arg, arg.iov[0].iov_len,
+			      transmit_time);
 
 	ctl_sk->sk_mark = 0;
 	__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
@@ -969,10 +981,13 @@
  * We need to maintain these in the sk structure.
  */
 
+DEFINE_STATIC_KEY_FALSE(tcp_md5_needed);
+EXPORT_SYMBOL(tcp_md5_needed);
+
 /* Find the Key structure for an address.  */
-struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
-					 const union tcp_md5_addr *addr,
-					 int family)
+struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk,
+					   const union tcp_md5_addr *addr,
+					   int family)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_md5sig_key *key;
@@ -1010,7 +1025,7 @@
 	}
 	return best_match;
 }
-EXPORT_SYMBOL(tcp_md5_do_lookup);
+EXPORT_SYMBOL(__tcp_md5_do_lookup);
 
 static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
 						      const union tcp_md5_addr *addr,
@@ -1435,7 +1450,7 @@
 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
 	if (inet_opt)
 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
-	newinet->inet_id = newtp->write_seq ^ jiffies;
+	newinet->inet_id = prandom_u32();
 
 	if (!dst) {
 		dst = inet_csk_route_child_sock(sk, newsk, req);
@@ -1507,6 +1522,21 @@
 	return sk;
 }
 
+u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
+			 struct tcphdr *th, u32 *cookie)
+{
+	u16 mss = 0;
+#ifdef CONFIG_SYN_COOKIES
+	mss = tcp_get_syncookie_mss(&tcp_request_sock_ops,
+				    &tcp_request_sock_ipv4_ops, sk, th);
+	if (mss) {
+		*cookie = __cookie_v4_init_sequence(iph, th, &mss);
+		tcp_synq_overflow(sk);
+	}
+#endif
+	return mss;
+}
+
 /* The socket must have it's spinlock held when we get
  * here, unless it is a TCP_LISTEN socket.
  *
@@ -1617,13 +1647,15 @@
 
 bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
-	u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
-
-	/* Only socket owner can try to collapse/prune rx queues
-	 * to reduce memory overhead, so add a little headroom here.
-	 * Few sockets backlog are possibly concurrently non empty.
-	 */
-	limit += 64*1024;
+	u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf);
+	struct skb_shared_info *shinfo;
+	const struct tcphdr *th;
+	struct tcphdr *thtail;
+	struct sk_buff *tail;
+	unsigned int hdrlen;
+	bool fragstolen;
+	u32 gso_segs;
+	int delta;
 
 	/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
 	 * we can fix skb->truesize to its real value to avoid future drops.
@@ -1633,6 +1665,97 @@
 	 */
 	skb_condense(skb);
 
+	skb_dst_drop(skb);
+
+	if (unlikely(tcp_checksum_complete(skb))) {
+		bh_unlock_sock(sk);
+		__TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
+		__TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
+		return true;
+	}
+
+	/* Attempt coalescing to last skb in backlog, even if we are
+	 * above the limits.
+	 * This is okay because skb capacity is limited to MAX_SKB_FRAGS.
+	 */
+	th = (const struct tcphdr *)skb->data;
+	hdrlen = th->doff * 4;
+	shinfo = skb_shinfo(skb);
+
+	if (!shinfo->gso_size)
+		shinfo->gso_size = skb->len - hdrlen;
+
+	if (!shinfo->gso_segs)
+		shinfo->gso_segs = 1;
+
+	tail = sk->sk_backlog.tail;
+	if (!tail)
+		goto no_coalesce;
+	thtail = (struct tcphdr *)tail->data;
+
+	if (TCP_SKB_CB(tail)->end_seq != TCP_SKB_CB(skb)->seq ||
+	    TCP_SKB_CB(tail)->ip_dsfield != TCP_SKB_CB(skb)->ip_dsfield ||
+	    ((TCP_SKB_CB(tail)->tcp_flags |
+	      TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_SYN | TCPHDR_RST | TCPHDR_URG)) ||
+	    !((TCP_SKB_CB(tail)->tcp_flags &
+	      TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) ||
+	    ((TCP_SKB_CB(tail)->tcp_flags ^
+	      TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) ||
+#ifdef CONFIG_TLS_DEVICE
+	    tail->decrypted != skb->decrypted ||
+#endif
+	    thtail->doff != th->doff ||
+	    memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)))
+		goto no_coalesce;
+
+	__skb_pull(skb, hdrlen);
+	if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) {
+		thtail->window = th->window;
+
+		TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq;
+
+		if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq))
+			TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
+
+		/* We have to update both TCP_SKB_CB(tail)->tcp_flags and
+		 * thtail->fin, so that the fast path in tcp_rcv_established()
+		 * is not entered if we append a packet with a FIN.
+		 * SYN, RST, URG are not present.
+		 * ACK is set on both packets.
+		 * PSH : we do not really care in TCP stack,
+		 *       at least for 'GRO' packets.
+		 */
+		thtail->fin |= th->fin;
+		TCP_SKB_CB(tail)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
+
+		if (TCP_SKB_CB(skb)->has_rxtstamp) {
+			TCP_SKB_CB(tail)->has_rxtstamp = true;
+			tail->tstamp = skb->tstamp;
+			skb_hwtstamps(tail)->hwtstamp = skb_hwtstamps(skb)->hwtstamp;
+		}
+
+		/* Not as strict as GRO. We only need to carry mss max value */
+		skb_shinfo(tail)->gso_size = max(shinfo->gso_size,
+						 skb_shinfo(tail)->gso_size);
+
+		gso_segs = skb_shinfo(tail)->gso_segs + shinfo->gso_segs;
+		skb_shinfo(tail)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
+
+		sk->sk_backlog.len += delta;
+		__NET_INC_STATS(sock_net(sk),
+				LINUX_MIB_TCPBACKLOGCOALESCE);
+		kfree_skb_partial(skb, fragstolen);
+		return false;
+	}
+	__skb_push(skb, hdrlen);
+
+no_coalesce:
+	/* Only socket owner can try to collapse/prune rx queues
+	 * to reduce memory overhead, so add a little headroom here.
+	 * Few sockets backlog are possibly concurrently non empty.
+	 */
+	limit += 64*1024;
+
 	if (unlikely(sk_add_backlog(sk, skb, limit))) {
 		bh_unlock_sock(sk);
 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
@@ -1645,15 +1768,8 @@
 int tcp_filter(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcphdr *th = (struct tcphdr *)skb->data;
-	unsigned int eaten = skb->len;
-	int err;
 
-	err = sk_filter_trim_cap(sk, skb, th->doff * 4);
-	if (!err) {
-		eaten -= skb->len;
-		TCP_SKB_CB(skb)->end_seq -= eaten;
-	}
-	return err;
+	return sk_filter_trim_cap(sk, skb, th->doff * 4);
 }
 EXPORT_SYMBOL(tcp_filter);
 
@@ -1692,6 +1808,7 @@
 int tcp_v4_rcv(struct sk_buff *skb)
 {
 	struct net *net = dev_net(skb->dev);
+	struct sk_buff *skb_to_free;
 	int sdif = inet_sdif(skb);
 	const struct iphdr *iph;
 	const struct tcphdr *th;
@@ -1802,7 +1919,7 @@
 	if (tcp_v4_inbound_md5_hash(sk, skb))
 		goto discard_and_relse;
 
-	nf_reset(skb);
+	nf_reset_ct(skb);
 
 	if (tcp_filter(sk, skb))
 		goto discard_and_relse;
@@ -1823,11 +1940,17 @@
 	tcp_segs_in(tcp_sk(sk), skb);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
+		skb_to_free = sk->sk_rx_skb_cache;
+		sk->sk_rx_skb_cache = NULL;
 		ret = tcp_v4_do_rcv(sk, skb);
-	} else if (tcp_add_backlog(sk, skb)) {
-		goto discard_and_relse;
+	} else {
+		if (tcp_add_backlog(sk, skb))
+			goto discard_and_relse;
+		skb_to_free = NULL;
 	}
 	bh_unlock_sock(sk);
+	if (skb_to_free)
+		__kfree_skb(skb_to_free);
 
 put_and_return:
 	if (refcounted)
@@ -2001,7 +2124,7 @@
 	if (inet_csk(sk)->icsk_bind_hash)
 		inet_put_port(sk);
 
-	BUG_ON(tp->fastopen_rsk);
+	BUG_ON(rcu_access_pointer(tp->fastopen_rsk));
 
 	/* If socket is aborted during connect operation */
 	tcp_free_fastopen_req(tp);
@@ -2335,12 +2458,13 @@
 		/* Because we don't lock the socket,
 		 * we might find a transient negative value.
 		 */
-		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
+		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
+				      READ_ONCE(tp->copied_seq), 0);
 
 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
 			"%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
 		i, src, srcp, dest, destp, state,
-		tp->write_seq - tp->snd_una,
+		READ_ONCE(tp->write_seq) - tp->snd_una,
 		rx_queue,
 		timer_active,
 		jiffies_delta_to_clock_t(timer_expires - jiffies),
@@ -2351,7 +2475,7 @@
 		refcount_read(&sk->sk_refcnt), sk,
 		jiffies_to_clock_t(icsk->icsk_rto),
 		jiffies_to_clock_t(icsk->icsk_ack.ato),
-		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
+		(icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sk),
 		tp->snd_cwnd,
 		state == TCP_LISTEN ?
 		    fastopenq->max_qlen :
@@ -2496,7 +2620,8 @@
 {
 	int cpu;
 
-	module_put(net->ipv4.tcp_congestion_control->owner);
+	if (net->ipv4.tcp_congestion_control)
+		module_put(net->ipv4.tcp_congestion_control->owner);
 
 	for_each_possible_cpu(cpu)
 		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
@@ -2532,8 +2657,10 @@
 	net->ipv4.sysctl_tcp_ecn_fallback = 1;
 
 	net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
+	net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS;
 	net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
 	net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
+	net->ipv4.sysctl_tcp_mtu_probe_floor = TCP_MIN_SND_MSS;
 
 	net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
 	net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
@@ -2551,10 +2678,10 @@
 	net->ipv4.sysctl_tcp_tw_reuse = 2;
 
 	cnt = tcp_hashinfo.ehash_mask + 1;
-	net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2;
+	net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
 	net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
 
-	net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256);
+	net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 128);
 	net->ipv4.sysctl_tcp_sack = 1;
 	net->ipv4.sysctl_tcp_window_scaling = 1;
 	net->ipv4.sysctl_tcp_timestamps = 1;
@@ -2573,8 +2700,8 @@
 	 * which are too large can cause TCP streams to be bursty.
 	 */
 	net->ipv4.sysctl_tcp_tso_win_divisor = 3;
-	/* Default TSQ limit of four TSO segments */
-	net->ipv4.sysctl_tcp_limit_output_bytes = 262144;
+	/* Default TSQ limit of 16 TSO segments */
+	net->ipv4.sysctl_tcp_limit_output_bytes = 16 * 65536;
 	/* rfc5961 challenge ack rate limiting */
 	net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
 	net->ipv4.sysctl_tcp_min_tso_segs = 2;

diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index ae10ed6..8c643a4 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * TCP Low Priority (TCP-LP)
  *

diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 03b51cd..c4848e7 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c

@@ -512,16 +512,6 @@
 
 		inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
 	}
-	/* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
-	 * retransmitted. In light of RFC6298 more aggressive 1sec
-	 * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK
-	 * retransmission has occurred.
-	 */
-	if (tp->total_retrans > 1)
-		tp->snd_cwnd = 1;
-	else
-		tp->snd_cwnd = tcp_init_cwnd(tp, dst);
-	tp->snd_cwnd_stamp = tcp_jiffies32;
 }
 
 bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
@@ -658,7 +648,7 @@
 	{
 		int n = 0;
 
-		nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS);
+		nest = nla_nest_start_noflag(msg, TCP_METRICS_ATTR_VALS);
 		if (!nest)
 			goto nla_put_failure;
 		for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) {
@@ -951,14 +941,14 @@
 static const struct genl_ops tcp_metrics_nl_ops[] = {
 	{
 		.cmd = TCP_METRICS_CMD_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = tcp_metrics_nl_cmd_get,
 		.dumpit = tcp_metrics_nl_dump,
-		.policy = tcp_metrics_nl_policy,
 	},
 	{
 		.cmd = TCP_METRICS_CMD_DEL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = tcp_metrics_nl_cmd_del,
-		.policy = tcp_metrics_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
 };
@@ -968,6 +958,7 @@
 	.name		= TCP_METRICS_GENL_NAME,
 	.version	= TCP_METRICS_GENL_VERSION,
 	.maxattr	= TCP_METRICS_ATTR_MAX,
+	.policy = tcp_metrics_nl_policy,
 	.netnsok	= true,
 	.module		= THIS_MODULE,
 	.ops		= tcp_metrics_nl_ops,
@@ -1000,7 +991,7 @@
 
 	slots = tcpmhash_entries;
 	if (!slots) {
-		if (totalram_pages >= 128 * 1024)
+		if (totalram_pages() >= 128 * 1024)
 			slots = 16 * 1024;
 		else
 			slots = 8 * 1024;

diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 12affb7..c802bc8 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -265,6 +266,7 @@
 
 		tw->tw_transparent	= inet->transparent;
 		tw->tw_mark		= sk->sk_mark;
+		tw->tw_priority		= sk->sk_priority;
 		tw->tw_rcv_wscale	= tp->rx_opt.rcv_wscale;
 		tcptw->tw_rcv_nxt	= tp->rcv_nxt;
 		tcptw->tw_snd_nxt	= tp->snd_nxt;
@@ -273,7 +275,7 @@
 		tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
 		tcptw->tw_ts_offset	= tp->tsoffset;
 		tcptw->tw_last_oow_ack_time = 0;
-
+		tcptw->tw_tx_delay	= tp->tcp_tx_delay;
 #if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == PF_INET6) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
@@ -282,6 +284,7 @@
 			tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
 			tw->tw_tclass = np->tclass;
 			tw->tw_flowlabel = be32_to_cpu(np->flow_label & IPV6_FLOWLABEL_MASK);
+			tw->tw_txhash = sk->sk_txhash;
 			tw->tw_ipv6only = sk->sk_ipv6only;
 		}
 #endif
@@ -294,12 +297,15 @@
 		 * so the timewait ack generating code has the key.
 		 */
 		do {
-			struct tcp_md5sig_key *key;
 			tcptw->tw_md5_key = NULL;
-			key = tp->af_specific->md5_lookup(sk, sk);
-			if (key) {
-				tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
-				BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool());
+			if (static_branch_unlikely(&tcp_md5_needed)) {
+				struct tcp_md5sig_key *key;
+
+				key = tp->af_specific->md5_lookup(sk, sk);
+				if (key) {
+					tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
+					BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool());
+				}
 			}
 		} while (0);
 #endif
@@ -338,10 +344,12 @@
 void tcp_twsk_destructor(struct sock *sk)
 {
 #ifdef CONFIG_TCP_MD5SIG
-	struct tcp_timewait_sock *twsk = tcp_twsk(sk);
+	if (static_branch_unlikely(&tcp_md5_needed)) {
+		struct tcp_timewait_sock *twsk = tcp_twsk(sk);
 
-	if (twsk->tw_md5_key)
-		kfree_rcu(twsk->tw_md5_key, rcu);
+		if (twsk->tw_md5_key)
+			kfree_rcu(twsk->tw_md5_key, rcu);
+	}
 #endif
 }
 EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
@@ -454,6 +462,7 @@
 	struct tcp_request_sock *treq = tcp_rsk(req);
 	struct inet_connection_sock *newicsk;
 	struct tcp_sock *oldtp, *newtp;
+	u32 seq;
 
 	if (!newsk)
 		return NULL;
@@ -467,54 +476,31 @@
 	/* Now setup tcp_sock */
 	newtp->pred_flags = 0;
 
-	newtp->rcv_wup = newtp->copied_seq =
-	newtp->rcv_nxt = treq->rcv_isn + 1;
+	seq = treq->rcv_isn + 1;
+	newtp->rcv_wup = seq;
+	WRITE_ONCE(newtp->copied_seq, seq);
+	WRITE_ONCE(newtp->rcv_nxt, seq);
 	newtp->segs_in = 1;
 
-	newtp->snd_sml = newtp->snd_una =
-	newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
+	seq = treq->snt_isn + 1;
+	newtp->snd_sml = newtp->snd_una = seq;
+	WRITE_ONCE(newtp->snd_nxt, seq);
+	newtp->snd_up = seq;
 
 	INIT_LIST_HEAD(&newtp->tsq_node);
 	INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
 
 	tcp_init_wl(newtp, treq->rcv_isn);
 
-	newtp->srtt_us = 0;
-	newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
 	minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U);
-	newicsk->icsk_rto = TCP_TIMEOUT_INIT;
 	newicsk->icsk_ack.lrcvtime = tcp_jiffies32;
 
-	newtp->packets_out = 0;
-	newtp->retrans_out = 0;
-	newtp->sacked_out = 0;
-	newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
-	newtp->tlp_high_seq = 0;
 	newtp->lsndtime = tcp_jiffies32;
 	newsk->sk_txhash = treq->txhash;
-	newtp->last_oow_ack_time = 0;
 	newtp->total_retrans = req->num_retrans;
 
-	/* So many TCP implementations out there (incorrectly) count the
-	 * initial SYN frame in their delayed-ACK and congestion control
-	 * algorithms that we must have the following bandaid to talk
-	 * efficiently to them.  -DaveM
-	 */
-	newtp->snd_cwnd = TCP_INIT_CWND;
-	newtp->snd_cwnd_cnt = 0;
-
-	/* There's a bubble in the pipe until at least the first ACK. */
-	newtp->app_limited = ~0U;
-
 	tcp_init_xmit_timers(newsk);
-	newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
-
-	newtp->rx_opt.saw_tstamp = 0;
-
-	newtp->rx_opt.dsack = 0;
-	newtp->rx_opt.num_sacks = 0;
-
-	newtp->urg_data = 0;
+	WRITE_ONCE(newtp->write_seq, newtp->pushed_seq = treq->snt_isn + 1);
 
 	if (sock_flag(newsk, SOCK_KEEPOPEN))
 		inet_csk_reset_keepalive_timer(newsk,
@@ -544,6 +530,11 @@
 		newtp->rx_opt.ts_recent_stamp = 0;
 		newtp->tcp_header_len = sizeof(struct tcphdr);
 	}
+	if (req->num_timeout) {
+		newtp->undo_marker = treq->snt_isn;
+		newtp->retrans_stamp = div_u64(treq->snt_synack,
+					       USEC_PER_SEC / TCP_TS_HZ);
+	}
 	newtp->tsoffset = treq->ts_off;
 #ifdef CONFIG_TCP_MD5SIG
 	newtp->md5sig_info = NULL;	/*XXX*/
@@ -555,14 +546,7 @@
 	newtp->rx_opt.mss_clamp = req->mss;
 	tcp_ecn_openreq_child(newtp, req);
 	newtp->fastopen_req = NULL;
-	newtp->fastopen_rsk = NULL;
-	newtp->syn_data_acked = 0;
-	newtp->rack.mstamp = 0;
-	newtp->rack.advanced = 0;
-	newtp->rack.reo_wnd_steps = 1;
-	newtp->rack.last_delivered = 0;
-	newtp->rack.reo_wnd_persist = 0;
-	newtp->rack.dsack_seen = 0;
+	RCU_INIT_POINTER(newtp->fastopen_rsk, NULL);
 
 	__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
 

diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index 764298e..95db7a1 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * TCP NV: TCP with Congestion Avoidance
  *

diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 870b0a3..e09147a 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c

@@ -1,15 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	IPV4 GSO/GRO offload support
  *	Linux INET implementation
  *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
  *	TCPv4 GSO/GRO support
  */
 
+#include <linux/indirect_call_wrapper.h>
 #include <linux/skbuff.h>
 #include <net/tcp.h>
 #include <net/protocol.h>
@@ -305,7 +302,8 @@
 }
 EXPORT_SYMBOL(tcp_gro_complete);
 
-static struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE
+struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
 	/* Don't bother verifying checksum if we're going to flush anyway. */
 	if (!NAPI_GRO_CB(skb)->flush &&
@@ -318,7 +316,7 @@
 	return tcp_gro_receive(head, skb);
 }
 
-static int tcp4_gro_complete(struct sk_buff *skb, int thoff)
+INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	struct tcphdr *th = tcp_hdr(skb);

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index bd134e3..0488607 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -45,6 +46,17 @@
 
 #include <trace/events/tcp.h>
 
+/* Refresh clocks of a TCP socket,
+ * ensuring monotically increasing values.
+ */
+void tcp_mstamp_refresh(struct tcp_sock *tp)
+{
+	u64 val = tcp_clock_ns();
+
+	tp->tcp_clock_cache = val;
+	tp->tcp_mstamp = div_u64(val, NSEC_PER_USEC);
+}
+
 static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 			   int push_one, gfp_t gfp);
 
@@ -55,7 +67,7 @@
 	struct tcp_sock *tp = tcp_sk(sk);
 	unsigned int prior_packets = tp->packets_out;
 
-	tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
+	WRITE_ONCE(tp->snd_nxt, TCP_SKB_CB(skb)->end_seq);
 
 	__skb_unlink(skb, &sk->sk_write_queue);
 	tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
@@ -150,13 +162,16 @@
 	if (tcp_packets_in_flight(tp) == 0)
 		tcp_ca_event(sk, CA_EVENT_TX_START);
 
-	tp->lsndtime = now;
-
-	/* If it is a reply for ato after last received
-	 * packet, enter pingpong mode.
+	/* If this is the first data packet sent in response to the
+	 * previous received data,
+	 * and it is a reply for ato after last received packet,
+	 * increase pingpong count.
 	 */
-	if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
-		icsk->icsk_ack.pingpong = 1;
+	if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) &&
+	    (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
+		inet_csk_inc_pingpong_cnt(sk);
+
+	tp->lsndtime = now;
 }
 
 /* Account for an ACK we sent. */
@@ -179,21 +194,6 @@
 	inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
 }
 
-
-u32 tcp_default_init_rwnd(u32 mss)
-{
-	/* Initial receive window should be twice of TCP_INIT_CWND to
-	 * enable proper sending of new unsent data during fast recovery
-	 * (RFC 3517, Section 4, NextSeg() rule (2)). Further place a
-	 * limit when mss is larger than 1460.
-	 */
-	u32 init_rwnd = TCP_INIT_CWND * 2;
-
-	if (mss > 1460)
-		init_rwnd = max((1460 * init_rwnd) / mss, 2U);
-	return init_rwnd;
-}
-
 /* Determine a window scaling and initial window to offer.
  * Based on the assumption that the given amount of space
  * will be offered. Store the results in the tp structure.
@@ -228,24 +228,20 @@
 	if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
 		(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
 	else
-		(*rcv_wnd) = space;
+		(*rcv_wnd) = min_t(u32, space, U16_MAX);
 
-	(*rcv_wscale) = 0;
+	if (init_rcv_wnd)
+		*rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
+
+	*rcv_wscale = 0;
 	if (wscale_ok) {
 		/* Set window scaling on max possible window */
 		space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
 		space = max_t(u32, space, sysctl_rmem_max);
 		space = min_t(u32, space, *window_clamp);
-		while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) {
-			space >>= 1;
-			(*rcv_wscale)++;
-		}
+		*rcv_wscale = clamp_t(int, ilog2(space) - 15,
+				      0, TCP_MAX_WSCALE);
 	}
-
-	if (!init_rcv_wnd) /* Use default unless specified otherwise */
-		init_rcv_wnd = tcp_default_init_rwnd(mss);
-	*rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
-
 	/* Set the clamp no higher than max representable value */
 	(*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp);
 }
@@ -598,7 +594,8 @@
 
 	*md5 = NULL;
 #ifdef CONFIG_TCP_MD5SIG
-	if (unlikely(rcu_access_pointer(tp->md5sig_info))) {
+	if (static_branch_unlikely(&tcp_md5_needed) &&
+	    rcu_access_pointer(tp->md5sig_info)) {
 		*md5 = tp->af_specific->md5_lookup(sk, sk);
 		if (*md5) {
 			opts->options |= OPTION_MD5;
@@ -734,7 +731,8 @@
 
 	*md5 = NULL;
 #ifdef CONFIG_TCP_MD5SIG
-	if (unlikely(rcu_access_pointer(tp->md5sig_info))) {
+	if (static_branch_unlikely(&tcp_md5_needed) &&
+	    rcu_access_pointer(tp->md5sig_info)) {
 		*md5 = tp->af_specific->md5_lookup(sk, sk);
 		if (*md5) {
 			opts->options |= OPTION_MD5;
@@ -977,28 +975,27 @@
 	return HRTIMER_NORESTART;
 }
 
-static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
+static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb,
+				      u64 prior_wstamp)
 {
-	u64 len_ns;
-	u32 rate;
+	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (!tcp_needs_internal_pacing(sk))
-		return;
-	rate = sk->sk_pacing_rate;
-	if (!rate || rate == ~0U)
-		return;
+	if (sk->sk_pacing_status != SK_PACING_NONE) {
+		unsigned long rate = sk->sk_pacing_rate;
 
-	len_ns = (u64)skb->len * NSEC_PER_SEC;
-	do_div(len_ns, rate);
-	hrtimer_start(&tcp_sk(sk)->pacing_timer,
-		      ktime_add_ns(ktime_get(), len_ns),
-		      HRTIMER_MODE_ABS_PINNED_SOFT);
-	sock_hold(sk);
-}
+		/* Original sch_fq does not pace first 10 MSS
+		 * Note that tp->data_segs_out overflows after 2^32 packets,
+		 * this is a minor annoyance.
+		 */
+		if (rate != ~0UL && rate && tp->data_segs_out >= 10) {
+			u64 len_ns = div64_ul((u64)skb->len * NSEC_PER_SEC, rate);
+			u64 credit = tp->tcp_wstamp_ns - prior_wstamp;
 
-static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
-{
-	skb->skb_mstamp = tp->tcp_mstamp;
+			/* take into account OS jitter */
+			len_ns -= min_t(u64, len_ns / 2, credit);
+			tp->tcp_wstamp_ns += len_ns;
+		}
+	}
 	list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
 }
 
@@ -1025,11 +1022,14 @@
 	struct sk_buff *oskb = NULL;
 	struct tcp_md5sig_key *md5;
 	struct tcphdr *th;
+	u64 prior_wstamp;
 	int err;
 
 	BUG_ON(!skb || !tcp_skb_pcount(skb));
 	tp = tcp_sk(sk);
-
+	prior_wstamp = tp->tcp_wstamp_ns;
+	tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
+	skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
 	if (clone_it) {
 		TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
 			- tp->snd_una;
@@ -1045,17 +1045,27 @@
 		if (unlikely(!skb))
 			return -ENOBUFS;
 	}
-	skb->skb_mstamp = tp->tcp_mstamp;
 
 	inet = inet_sk(sk);
 	tcb = TCP_SKB_CB(skb);
 	memset(&opts, 0, sizeof(opts));
 
-	if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
+	if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
 		tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
-	else
+	} else {
 		tcp_options_size = tcp_established_options(sk, skb, &opts,
 							   &md5);
+		/* Force a PSH flag on all (GSO) packets to expedite GRO flush
+		 * at receiver : This slightly improve GRO performance.
+		 * Note that we do not force the PSH flag for non GSO packets,
+		 * because they might be sent under high congestion events,
+		 * and in this case it is better to delay the delivery of 1-MSS
+		 * packets and thus the corresponding ACK packet that would
+		 * release the following packet.
+		 */
+		if (tcp_skb_pcount(skb) > 1)
+			tcb->tcp_flags |= TCPHDR_PSH;
+	}
 	tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
 
 	/* if no packet is in qdisc/device queue, then allow XPS to select
@@ -1137,7 +1147,6 @@
 		tcp_event_data_sent(tp, sk);
 		tp->data_segs_out += tcp_skb_pcount(skb);
 		tp->bytes_sent += skb->len - tcp_header_size;
-		tcp_internal_pacing(sk, skb);
 	}
 
 	if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
@@ -1149,13 +1158,14 @@
 	skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
 	skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);
 
-	/* Our usage of tstamp should remain private */
-	skb->tstamp = 0;
+	/* Leave earliest departure time in skb->tstamp (skb->skb_mstamp_ns) */
 
 	/* Cleanup our debris for IP stacks */
 	memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
 			       sizeof(struct inet6_skb_parm)));
 
+	tcp_add_tx_delay(skb, tp);
+
 	err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
 
 	if (unlikely(err > 0)) {
@@ -1163,7 +1173,7 @@
 		err = net_xmit_eval(err);
 	}
 	if (!err && oskb) {
-		tcp_update_skb_after_send(tp, oskb);
+		tcp_update_skb_after_send(sk, oskb, prior_wstamp);
 		tcp_rate_skb_sent(sk, oskb);
 	}
 	return err;
@@ -1186,10 +1196,10 @@
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	/* Advance write_seq and place onto the write_queue. */
-	tp->write_seq = TCP_SKB_CB(skb)->end_seq;
+	WRITE_ONCE(tp->write_seq, TCP_SKB_CB(skb)->end_seq);
 	__skb_header_release(skb);
 	tcp_add_write_queue_tail(sk, skb);
-	sk->sk_wmem_queued += skb->truesize;
+	sk_wmem_queued_add(sk, skb->truesize);
 	sk_mem_charge(sk, skb->truesize);
 }
 
@@ -1289,6 +1299,7 @@
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *buff;
 	int nsize, old_factor;
+	long limit;
 	int nlen;
 	u8 flags;
 
@@ -1299,6 +1310,20 @@
 	if (nsize < 0)
 		nsize = 0;
 
+	/* tcp_sendmsg() can overshoot sk_wmem_queued by one full size skb.
+	 * We need some allowance to not penalize applications setting small
+	 * SO_SNDBUF values.
+	 * Also allow first and last skb in retransmit queue to be split.
+	 */
+	limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE);
+	if (unlikely((sk->sk_wmem_queued >> 1) > limit &&
+		     tcp_queue != TCP_FRAG_IN_WRITE_QUEUE &&
+		     skb != tcp_rtx_queue_head(sk) &&
+		     skb != tcp_rtx_queue_tail(sk))) {
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
+		return -ENOMEM;
+	}
+
 	if (skb_unclone(skb, gfp))
 		return -ENOMEM;
 
@@ -1306,8 +1331,9 @@
 	buff = sk_stream_alloc_skb(sk, nsize, gfp, true);
 	if (!buff)
 		return -ENOMEM; /* We'll just try again later. */
+	skb_copy_decrypted(buff, skb);
 
-	sk->sk_wmem_queued += buff->truesize;
+	sk_wmem_queued_add(sk, buff->truesize);
 	sk_mem_charge(sk, buff->truesize);
 	nlen = skb->len - len - nsize;
 	buff->truesize += nlen;
@@ -1388,7 +1414,7 @@
 		} else {
 			shinfo->frags[k] = shinfo->frags[i];
 			if (eat) {
-				shinfo->frags[k].page_offset += eat;
+				skb_frag_off_add(&shinfo->frags[k], eat);
 				skb_frag_size_sub(&shinfo->frags[k], eat);
 				eat = 0;
 			}
@@ -1417,7 +1443,7 @@
 
 	if (delta_truesize) {
 		skb->truesize	   -= delta_truesize;
-		sk->sk_wmem_queued -= delta_truesize;
+		sk_wmem_queued_add(sk, -delta_truesize);
 		sk_mem_uncharge(sk, delta_truesize);
 		sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
 	}
@@ -1457,8 +1483,7 @@
 	mss_now -= icsk->icsk_ext_hdr_len;
 
 	/* Then reserve room for full set of TCP options and 8 bytes of data */
-	if (mss_now < 48)
-		mss_now = 48;
+	mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss);
 	return mss_now;
 }
 
@@ -1698,8 +1723,9 @@
 {
 	u32 bytes, segs;
 
-	bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift,
-		    sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
+	bytes = min_t(unsigned long,
+		      sk->sk_pacing_rate >> sk->sk_pacing_shift,
+		      sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
 
 	/* Goal is to send at least one packet per ms,
 	 * not one big TSO packet every 100 ms.
@@ -1845,23 +1871,24 @@
  * know that all the data is in scatter-gather pages, and that the
  * packet has never been sent out before (and thus is not cloned).
  */
-static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
-			struct sk_buff *skb, unsigned int len,
+static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 			unsigned int mss_now, gfp_t gfp)
 {
-	struct sk_buff *buff;
 	int nlen = skb->len - len;
+	struct sk_buff *buff;
 	u8 flags;
 
 	/* All of a TSO frame must be composed of paged data.  */
 	if (skb->len != skb->data_len)
-		return tcp_fragment(sk, tcp_queue, skb, len, mss_now, gfp);
+		return tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
+				    skb, len, mss_now, gfp);
 
 	buff = sk_stream_alloc_skb(sk, 0, gfp, true);
 	if (unlikely(!buff))
 		return -ENOMEM;
+	skb_copy_decrypted(buff, skb);
 
-	sk->sk_wmem_queued += buff->truesize;
+	sk_wmem_queued_add(sk, buff->truesize);
 	sk_mem_charge(sk, buff->truesize);
 	buff->truesize += nlen;
 	skb->truesize -= nlen;
@@ -1891,7 +1918,7 @@
 
 	/* Link BUFF into the send queue. */
 	__skb_header_release(buff);
-	tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
+	tcp_insert_write_queue_after(skb, buff, sk, TCP_FRAG_IN_WRITE_QUEUE);
 
 	return 0;
 }
@@ -1907,18 +1934,22 @@
 				 u32 max_segs)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
-	u32 age, send_win, cong_win, limit, in_flight;
+	u32 send_win, cong_win, limit, in_flight;
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *head;
 	int win_divisor;
+	s64 delta;
 
 	if (icsk->icsk_ca_state >= TCP_CA_Recovery)
 		goto send_now;
 
 	/* Avoid bursty behavior by allowing defer
-	 * only if the last write was recent.
+	 * only if the last write was recent (1 ms).
+	 * Note that tp->tcp_wstamp_ns can be in the future if we have
+	 * packets waiting in a qdisc or device for EDT delivery.
 	 */
-	if ((s32)(tcp_jiffies32 - tp->lsndtime) > 0)
+	delta = tp->tcp_clock_cache - tp->tcp_wstamp_ns - NSEC_PER_MSEC;
+	if (delta > 0)
 		goto send_now;
 
 	in_flight = tcp_packets_in_flight(tp);
@@ -1965,9 +1996,9 @@
 	head = tcp_rtx_queue_head(sk);
 	if (!head)
 		goto send_now;
-	age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp);
+	delta = tp->tcp_clock_cache - head->tstamp;
 	/* If next ACK is likely to come too late (half srtt), do not defer */
-	if (age < (tp->srtt_us >> 4))
+	if ((s64)(delta - (u64)NSEC_PER_USEC * (tp->srtt_us >> 4)) < 0)
 		goto send_now;
 
 	/* Ok, it looks like it is advisable to defer.
@@ -1989,7 +2020,8 @@
 	}
 
 	/* If this packet won't get more data, do not wait. */
-	if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
+	if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) ||
+	    TCP_SKB_CB(skb)->eor)
 		goto send_now;
 
 	return true;
@@ -2032,7 +2064,7 @@
 		if (len <= skb->len)
 			break;
 
-		if (unlikely(TCP_SKB_CB(skb)->eor))
+		if (unlikely(TCP_SKB_CB(skb)->eor) || tcp_has_tx_tstamp(skb))
 			return false;
 
 		len -= skb->len;
@@ -2120,10 +2152,11 @@
 	nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
 	if (!nskb)
 		return -1;
-	sk->sk_wmem_queued += nskb->truesize;
+	sk_wmem_queued_add(sk, nskb->truesize);
 	sk_mem_charge(sk, nskb->truesize);
 
 	skb = tcp_send_head(sk);
+	skb_copy_decrypted(nskb, skb);
 
 	TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
@@ -2148,6 +2181,7 @@
 			 * we need to propagate it to the new skb.
 			 */
 			TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor;
+			tcp_skb_collapse_tstamp(nskb, skb);
 			tcp_unlink_write_queue(skb, sk);
 			sk_wmem_free_skb(sk, skb);
 		} else {
@@ -2188,10 +2222,23 @@
 	return -1;
 }
 
-static bool tcp_pacing_check(const struct sock *sk)
+static bool tcp_pacing_check(struct sock *sk)
 {
-	return tcp_needs_internal_pacing(sk) &&
-	       hrtimer_is_queued(&tcp_sk(sk)->pacing_timer);
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (!tcp_needs_internal_pacing(sk))
+		return false;
+
+	if (tp->tcp_wstamp_ns <= tp->tcp_clock_cache)
+		return false;
+
+	if (!hrtimer_is_queued(&tp->pacing_timer)) {
+		hrtimer_start(&tp->pacing_timer,
+			      ns_to_ktime(tp->tcp_wstamp_ns),
+			      HRTIMER_MODE_ABS_PINNED_SOFT);
+		sock_hold(sk);
+	}
+	return true;
 }
 
 /* TCP Small Queues :
@@ -2208,13 +2255,28 @@
 static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
 				  unsigned int factor)
 {
-	unsigned int limit;
+	unsigned long limit;
 
-	limit = max(2 * skb->truesize, sk->sk_pacing_rate >> sk->sk_pacing_shift);
-	limit = min_t(u32, limit,
-		      sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
+	limit = max_t(unsigned long,
+		      2 * skb->truesize,
+		      sk->sk_pacing_rate >> sk->sk_pacing_shift);
+	if (sk->sk_pacing_status == SK_PACING_NONE)
+		limit = min_t(unsigned long, limit,
+			      sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
 	limit <<= factor;
 
+	if (static_branch_unlikely(&tcp_tx_delay_enabled) &&
+	    tcp_sk(sk)->tcp_tx_delay) {
+		u64 extra_bytes = (u64)sk->sk_pacing_rate * tcp_sk(sk)->tcp_tx_delay;
+
+		/* TSQ is based on skb truesize sum (sk_wmem_alloc), so we
+		 * approximate our needs assuming an ~100% skb->truesize overhead.
+		 * USEC_PER_SEC is approximated by 2^20.
+		 * do_div(extra_bytes, USEC_PER_SEC/2) is replaced by a right shift.
+		 */
+		extra_bytes >>= (20 - 1);
+		limit += extra_bytes;
+	}
 	if (refcount_read(&sk->sk_wmem_alloc) > limit) {
 		/* Always send skb if rtx queue is empty.
 		 * No need to wait for TX completion to call us back,
@@ -2320,18 +2382,20 @@
 	while ((skb = tcp_send_head(sk))) {
 		unsigned int limit;
 
+		if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
+			/* "skb_mstamp_ns" is used as a start point for the retransmit timer */
+			skb->skb_mstamp_ns = tp->tcp_wstamp_ns = tp->tcp_clock_cache;
+			list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
+			tcp_init_tso_segs(skb, mss_now);
+			goto repair; /* Skip network transmission */
+		}
+
 		if (tcp_pacing_check(sk))
 			break;
 
 		tso_segs = tcp_init_tso_segs(skb, mss_now);
 		BUG_ON(!tso_segs);
 
-		if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
-			/* "skb_mstamp" is used as a start point for the retransmit timer */
-			tcp_update_skb_after_send(tp, skb);
-			goto repair; /* Skip network transmission */
-		}
-
 		cwnd_quota = tcp_cwnd_test(tp, skb);
 		if (!cwnd_quota) {
 			if (push_one == 2)
@@ -2367,8 +2431,7 @@
 						    nonagle);
 
 		if (skb->len > limit &&
-		    unlikely(tso_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
-					  skb, limit, mss_now, gfp)))
+		    unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
 			break;
 
 		if (tcp_small_queue_check(sk, skb, 0))
@@ -2419,7 +2482,7 @@
 	/* Don't do any loss probe on a Fast Open connection before 3WHS
 	 * finishes.
 	 */
-	if (tp->fastopen_rsk)
+	if (rcu_access_pointer(tp->fastopen_rsk))
 		return false;
 
 	early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
@@ -2453,8 +2516,8 @@
 	if (rto_delta_us > 0)
 		timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us));
 
-	inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
-				  TCP_RTO_MAX);
+	tcp_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
+			     TCP_RTO_MAX, NULL);
 	return true;
 }
 
@@ -2727,7 +2790,7 @@
 		if (next_skb_size <= skb_availroom(skb))
 			skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
 				      next_skb_size);
-		else if (!skb_shift(skb, next_skb, next_skb_size))
+		else if (!tcp_skb_shift(skb, next_skb, 1, next_skb_size))
 			return false;
 	}
 	tcp_highest_sack_replace(sk, next_skb, skb);
@@ -2906,22 +2969,26 @@
 		} tcp_skb_tsorted_restore(skb);
 
 		if (!err) {
-			tcp_update_skb_after_send(tp, skb);
+			tcp_update_skb_after_send(sk, skb, tp->tcp_wstamp_ns);
 			tcp_rate_skb_sent(sk, skb);
 		}
 	} else {
 		err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 	}
 
+	/* To avoid taking spuriously low RTT samples based on a timestamp
+	 * for a transmit that never happened, always mark EVER_RETRANS
+	 */
+	TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
+
 	if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG))
 		tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB,
 				  TCP_SKB_CB(skb)->seq, segs, err);
 
 	if (likely(!err)) {
-		TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
 		trace_tcp_retransmit_skb(sk, skb);
 	} else if (err != -EBUSY) {
-		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
+		NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs);
 	}
 	return err;
 }
@@ -2939,13 +3006,12 @@
 #endif
 		TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
 		tp->retrans_out += tcp_skb_pcount(skb);
-
-		/* Save stamp of the first retransmit. */
-		if (!tp->retrans_stamp)
-			tp->retrans_stamp = tcp_skb_timestamp(skb);
-
 	}
 
+	/* Save stamp of the first (attempted) retransmit. */
+	if (!tp->retrans_stamp)
+		tp->retrans_stamp = tcp_skb_timestamp(skb);
+
 	if (tp->undo_retrans < 0)
 		tp->undo_retrans = 0;
 	tp->undo_retrans += tcp_skb_pcount(skb);
@@ -3021,9 +3087,10 @@
 
 		if (skb == rtx_head &&
 		    icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT)
-			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
-						  inet_csk(sk)->icsk_rto,
-						  TCP_RTO_MAX);
+			tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+					     inet_csk(sk)->icsk_rto,
+					     TCP_RTO_MAX,
+					     skb);
 	}
 }
 
@@ -3065,7 +3132,6 @@
 		tskb = skb_rb_last(&sk->tcp_rtx_queue);
 
 	if (tskb) {
-coalesce:
 		TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
 		TCP_SKB_CB(tskb)->end_seq++;
 		tp->write_seq++;
@@ -3076,16 +3142,14 @@
 			 * if FIN had been sent. This is because retransmit path
 			 * does not change tp->snd_nxt.
 			 */
-			tp->snd_nxt++;
+			WRITE_ONCE(tp->snd_nxt, tp->snd_nxt + 1);
 			return;
 		}
 	} else {
 		skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation);
-		if (unlikely(!skb)) {
-			if (tskb)
-				goto coalesce;
+		if (unlikely(!skb))
 			return;
-		}
+
 		INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
 		skb_reserve(skb, MAX_TCP_HEADER);
 		sk_forced_mem_schedule(sk, skb->truesize);
@@ -3158,7 +3222,7 @@
 			tcp_rtx_queue_unlink_and_free(skb, sk);
 			__skb_header_release(nskb);
 			tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb);
-			sk->sk_wmem_queued += nskb->truesize;
+			sk_wmem_queued_add(sk, nskb->truesize);
 			sk_mem_charge(sk, nskb->truesize);
 			skb = nskb;
 		}
@@ -3191,6 +3255,7 @@
 	int tcp_header_size;
 	struct tcphdr *th;
 	int mss;
+	u64 now;
 
 	skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
 	if (unlikely(!skb)) {
@@ -3222,12 +3287,17 @@
 	mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
 
 	memset(&opts, 0, sizeof(opts));
+	now = tcp_clock_ns();
 #ifdef CONFIG_SYN_COOKIES
 	if (unlikely(req->cookie_ts))
-		skb->skb_mstamp = cookie_init_timestamp(req);
+		skb->skb_mstamp_ns = cookie_init_timestamp(req);
 	else
 #endif
-		skb->skb_mstamp = tcp_clock_us();
+	{
+		skb->skb_mstamp_ns = now;
+		if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */
+			tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb);
+	}
 
 #ifdef CONFIG_TCP_MD5SIG
 	rcu_read_lock();
@@ -3267,8 +3337,9 @@
 	rcu_read_unlock();
 #endif
 
-	/* Do not fool tcpdump (if any), clean our debris */
-	skb->tstamp = 0;
+	skb->skb_mstamp_ns = now;
+	tcp_add_tx_delay(skb, tp);
+
 	return skb;
 }
 EXPORT_SYMBOL(tcp_make_synack);
@@ -3355,14 +3426,14 @@
 	tp->snd_una = tp->write_seq;
 	tp->snd_sml = tp->write_seq;
 	tp->snd_up = tp->write_seq;
-	tp->snd_nxt = tp->write_seq;
+	WRITE_ONCE(tp->snd_nxt, tp->write_seq);
 
 	if (likely(!tp->repair))
 		tp->rcv_nxt = 0;
 	else
 		tp->rcv_tstamp = tcp_jiffies32;
 	tp->rcv_wup = tp->rcv_nxt;
-	tp->copied_seq = tp->rcv_nxt;
+	WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
 
 	inet_csk(sk)->icsk_rto = tcp_timeout_init(sk);
 	inet_csk(sk)->icsk_retransmits = 0;
@@ -3376,9 +3447,9 @@
 
 	tcb->end_seq += skb->len;
 	__skb_header_release(skb);
-	sk->sk_wmem_queued += skb->truesize;
+	sk_wmem_queued_add(sk, skb->truesize);
 	sk_mem_charge(sk, skb->truesize);
-	tp->write_seq = tcb->end_seq;
+	WRITE_ONCE(tp->write_seq, tcb->end_seq);
 	tp->packets_out += tcp_skb_pcount(skb);
 }
 
@@ -3431,6 +3502,7 @@
 			skb_trim(syn_data, copied);
 			space = copied;
 		}
+		skb_zcopy_set(syn_data, fo->uarg, NULL);
 	}
 	/* No more data pending in inet_wait_for_connect() */
 	if (space == fo->size)
@@ -3443,7 +3515,7 @@
 
 	err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
 
-	syn->skb_mstamp = syn_data->skb_mstamp;
+	syn->skb_mstamp_ns = syn_data->skb_mstamp_ns;
 
 	/* Now full SYN+DATA was cloned and sent (or not),
 	 * remove the SYN from the original skb (syn_data)
@@ -3514,11 +3586,11 @@
 	/* We change tp->snd_nxt after the tcp_transmit_skb() call
 	 * in order to make this packet get counted in tcpOutSegs.
 	 */
-	tp->snd_nxt = tp->write_seq;
+	WRITE_ONCE(tp->snd_nxt, tp->write_seq);
 	tp->pushed_seq = tp->write_seq;
 	buff = tcp_send_head(sk);
 	if (unlikely(buff)) {
-		tp->snd_nxt	= TCP_SKB_CB(buff)->seq;
+		WRITE_ONCE(tp->snd_nxt, TCP_SKB_CB(buff)->seq);
 		tp->pushed_seq	= TCP_SKB_CB(buff)->seq;
 	}
 	TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
@@ -3544,7 +3616,7 @@
 		const struct tcp_sock *tp = tcp_sk(sk);
 		int max_ato = HZ / 2;
 
-		if (icsk->icsk_ack.pingpong ||
+		if (inet_csk_in_pingpong_mode(sk) ||
 		    (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
 			max_ato = TCP_DELACK_MAX;
 
@@ -3725,7 +3797,7 @@
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct net *net = sock_net(sk);
-	unsigned long probe_max;
+	unsigned long timeout;
 	int err;
 
 	err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
@@ -3737,25 +3809,18 @@
 		return;
 	}
 
+	icsk->icsk_probes_out++;
 	if (err <= 0) {
 		if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2)
 			icsk->icsk_backoff++;
-		icsk->icsk_probes_out++;
-		probe_max = TCP_RTO_MAX;
+		timeout = tcp_probe0_when(sk, TCP_RTO_MAX);
 	} else {
 		/* If packet was not sent due to local congestion,
-		 * do not backoff and do not remember icsk_probes_out.
-		 * Let local senders to fight for local resources.
-		 *
-		 * Use accumulated backoff yet.
+		 * Let senders fight for local resources conservatively.
 		 */
-		if (!icsk->icsk_probes_out)
-			icsk->icsk_probes_out = 1;
-		probe_max = TCP_RESOURCE_PROBE_INTERVAL;
+		timeout = TCP_RESOURCE_PROBE_INTERVAL;
 	}
-	inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
-				  tcp_probe0_when(sk, probe_max),
-				  TCP_RTO_MAX);
+	tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, TCP_RTO_MAX, NULL);
 }
 
 int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)

diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index 4dff40d..0de6935 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <net/tcp.h>
 
 /* The bandwidth estimator estimates the rate at which the network
@@ -55,8 +56,10 @@
 	  * bandwidth estimate.
 	  */
 	if (!tp->packets_out) {
-		tp->first_tx_mstamp  = skb->skb_mstamp;
-		tp->delivered_mstamp = skb->skb_mstamp;
+		u64 tstamp_us = tcp_skb_timestamp_us(skb);
+
+		tp->first_tx_mstamp  = tstamp_us;
+		tp->delivered_mstamp = tstamp_us;
 	}
 
 	TCP_SKB_CB(skb)->tx.first_tx_mstamp	= tp->first_tx_mstamp;
@@ -88,13 +91,12 @@
 		rs->is_app_limited   = scb->tx.is_app_limited;
 		rs->is_retrans	     = scb->sacked & TCPCB_RETRANS;
 
-		/* Find the duration of the "send phase" of this window: */
-		rs->interval_us      = tcp_stamp_us_delta(
-						skb->skb_mstamp,
-						scb->tx.first_tx_mstamp);
-
 		/* Record send time of most recently ACKed packet: */
-		tp->first_tx_mstamp  = skb->skb_mstamp;
+		tp->first_tx_mstamp  = tcp_skb_timestamp_us(skb);
+		/* Find the duration of the "send phase" of this window: */
+		rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp,
+						     scb->tx.first_tx_mstamp);
+
 	}
 	/* Mark off the skb delivered once it's sacked to avoid being
 	 * used again when it's cumulatively acked. For acked packets

diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index c81aadf..fdb715b 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c

@@ -50,7 +50,7 @@
 s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb, u32 reo_wnd)
 {
 	return tp->rack.rtt_us + reo_wnd -
-	       tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp);
+	       tcp_stamp_us_delta(tp->tcp_mstamp, tcp_skb_timestamp_us(skb));
 }
 
 /* RACK loss detection (IETF draft draft-ietf-tcpm-rack-01):
@@ -91,7 +91,8 @@
 		    !(scb->sacked & TCPCB_SACKED_RETRANS))
 			continue;
 
-		if (!tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp,
+		if (!tcp_rack_sent_after(tp->rack.mstamp,
+					 tcp_skb_timestamp_us(skb),
 					 tp->rack.end_seq, scb->end_seq))
 			break;
 

diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index addc122..471571e 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Tom Kelly's Scalable TCP
  *
  * See http://www.deneholme.net/tom/scalable/

diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 57eae8d..dd5a631 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -22,33 +23,21 @@
 #include <linux/gfp.h>
 #include <net/tcp.h>
 
-static u32 tcp_retransmit_stamp(const struct sock *sk)
-{
-	u32 start_ts = tcp_sk(sk)->retrans_stamp;
-
-	if (unlikely(!start_ts)) {
-		struct sk_buff *head = tcp_rtx_queue_head(sk);
-
-		if (!head)
-			return 0;
-		start_ts = tcp_skb_timestamp(head);
-	}
-	return start_ts;
-}
-
 static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 elapsed, start_ts;
+	s32 remaining;
 
-	start_ts = tcp_retransmit_stamp(sk);
-	if (!icsk->icsk_user_timeout || !start_ts)
+	start_ts = tcp_sk(sk)->retrans_stamp;
+	if (!icsk->icsk_user_timeout)
 		return icsk->icsk_rto;
 	elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
-	if (elapsed >= icsk->icsk_user_timeout)
+	remaining = icsk->icsk_user_timeout - elapsed;
+	if (remaining <= 0)
 		return 1; /* user timeout has passed; fire ASAP */
-	else
-		return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(icsk->icsk_user_timeout - elapsed));
+
+	return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining));
 }
 
 /**
@@ -165,13 +154,27 @@
 	} else {
 		mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
 		mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
-		mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len);
+		mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor);
+		mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss);
 		icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
 	}
 	tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 }
 
+static unsigned int tcp_model_timeout(struct sock *sk,
+				      unsigned int boundary,
+				      unsigned int rto_base)
+{
+	unsigned int linear_backoff_thresh, timeout;
 
+	linear_backoff_thresh = ilog2(TCP_RTO_MAX / rto_base);
+	if (boundary <= linear_backoff_thresh)
+		timeout = ((2 << boundary) - 1) * rto_base;
+	else
+		timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
+			(boundary - linear_backoff_thresh) * TCP_RTO_MAX;
+	return jiffies_to_msecs(timeout);
+}
 /**
  *  retransmits_timed_out() - returns true if this connection has timed out
  *  @sk:       The current socket
@@ -189,27 +192,21 @@
 				  unsigned int boundary,
 				  unsigned int timeout)
 {
-	const unsigned int rto_base = TCP_RTO_MIN;
-	unsigned int linear_backoff_thresh, start_ts;
+	unsigned int start_ts;
 
 	if (!inet_csk(sk)->icsk_retransmits)
 		return false;
 
-	start_ts = tcp_retransmit_stamp(sk);
-	if (!start_ts)
-		return false;
-
+	start_ts = tcp_sk(sk)->retrans_stamp;
 	if (likely(timeout == 0)) {
-		linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
+		unsigned int rto_base = TCP_RTO_MIN;
 
-		if (boundary <= linear_backoff_thresh)
-			timeout = ((2 << boundary) - 1) * rto_base;
-		else
-			timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
-				(boundary - linear_backoff_thresh) * TCP_RTO_MAX;
-		timeout = jiffies_to_msecs(timeout);
+		if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
+			rto_base = tcp_timeout_init(sk);
+		timeout = tcp_model_timeout(sk, boundary, rto_base);
 	}
-	return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= timeout;
+
+	return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0;
 }
 
 /* A write timeout has occurred. Process the after effects. */
@@ -218,13 +215,13 @@
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct net *net = sock_net(sk);
-	bool expired, do_reset;
+	bool expired = false, do_reset;
 	int retry_until;
 
 	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
 		if (icsk->icsk_retransmits) {
 			dst_negative_advice(sk);
-		} else if (!tp->syn_data && !tp->syn_fastopen) {
+		} else {
 			sk_rethink_txhash(sk);
 		}
 		retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
@@ -250,9 +247,10 @@
 			if (tcp_out_of_resources(sk, do_reset))
 				return 1;
 		}
+	}
+	if (!expired)
 		expired = retransmits_timed_out(sk, retry_until,
 						icsk->icsk_user_timeout);
-	}
 	tcp_fastopen_active_detect_blackhole(sk, expired);
 
 	if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
@@ -287,14 +285,14 @@
 	icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
 
 	if (inet_csk_ack_scheduled(sk)) {
-		if (!icsk->icsk_ack.pingpong) {
+		if (!inet_csk_in_pingpong_mode(sk)) {
 			/* Delayed ACK missed: inflate ATO. */
 			icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto);
 		} else {
 			/* Delayed ACK missed: leave pingpong mode and
 			 * deflate ATO.
 			 */
-			icsk->icsk_ack.pingpong = 0;
+			inet_csk_exit_pingpong_mode(sk);
 			icsk->icsk_ack.ato      = TCP_ATO_MIN;
 		}
 		tcp_mstamp_refresh(tcp_sk(sk));
@@ -343,7 +341,6 @@
 	struct sk_buff *skb = tcp_send_head(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	int max_probes;
-	u32 start_ts;
 
 	if (tp->packets_out || !skb) {
 		icsk->icsk_probes_out = 0;
@@ -358,12 +355,13 @@
 	 * corresponding system limit. We also implement similar policy when
 	 * we use RTO to probe window in tcp_retransmit_timer().
 	 */
-	start_ts = tcp_skb_timestamp(skb);
-	if (!start_ts)
-		skb->skb_mstamp = tp->tcp_mstamp;
-	else if (icsk->icsk_user_timeout &&
-		 (s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout)
-		goto abort;
+	if (icsk->icsk_user_timeout) {
+		u32 elapsed = tcp_model_timeout(sk, icsk->icsk_probes_out,
+						tcp_probe0_base(sk));
+
+		if (elapsed >= icsk->icsk_user_timeout)
+			goto abort;
+	}
 
 	max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
 	if (sock_flag(sk, SOCK_DEAD)) {
@@ -376,7 +374,7 @@
 			return;
 	}
 
-	if (icsk->icsk_probes_out > max_probes) {
+	if (icsk->icsk_probes_out >= max_probes) {
 abort:		tcp_write_err(sk);
 	} else {
 		/* Only send another probe if we didn't close things up. */
@@ -388,20 +386,22 @@
  *	Timer for Fast Open socket to retransmit SYNACK. Note that the
  *	sk here is the child socket, not the parent (listener) socket.
  */
-static void tcp_fastopen_synack_timer(struct sock *sk)
+static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	int max_retries = icsk->icsk_syn_retries ? :
 	    sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
-	struct request_sock *req;
+	struct tcp_sock *tp = tcp_sk(sk);
 
-	req = tcp_sk(sk)->fastopen_rsk;
 	req->rsk_ops->syn_ack_timeout(req);
 
 	if (req->num_timeout >= max_retries) {
 		tcp_write_err(sk);
 		return;
 	}
+	/* Lower cwnd after certain SYNACK timeout like tcp_init_transfer() */
+	if (icsk->icsk_retransmits == 1)
+		tcp_enter_loss(sk);
 	/* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error
 	 * returned from rtx_syn_ack() to make it more persistent like
 	 * regular retransmit because if the child socket has been accepted
@@ -410,6 +410,8 @@
 	inet_rtx_syn_ack(sk, req);
 	req->num_timeout++;
 	icsk->icsk_retransmits++;
+	if (!tp->retrans_stamp)
+		tp->retrans_stamp = tcp_time_stamp(tp);
 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 			  TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
 }
@@ -431,20 +433,21 @@
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct net *net = sock_net(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct request_sock *req;
 
-	if (tp->fastopen_rsk) {
+	req = rcu_dereference_protected(tp->fastopen_rsk,
+					lockdep_sock_is_held(sk));
+	if (req) {
 		WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
 			     sk->sk_state != TCP_FIN_WAIT1);
-		tcp_fastopen_synack_timer(sk);
+		tcp_fastopen_synack_timer(sk, req);
 		/* Before we receive ACK to our SYN-ACK don't retransmit
 		 * anything else (e.g., data or FIN segments).
 		 */
 		return;
 	}
-	if (!tp->packets_out)
-		goto out;
-
-	WARN_ON(tcp_rtx_queue_empty(sk));
+	if (!tp->packets_out || WARN_ON_ONCE(tcp_rtx_queue_empty(sk)))
+		return;
 
 	tp->tlp_high_seq = 0;
 
@@ -482,11 +485,12 @@
 		goto out_reset_timer;
 	}
 
+	__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTS);
 	if (tcp_write_timeout(sk))
 		goto out;
 
 	if (icsk->icsk_retransmits == 0) {
-		int mib_idx;
+		int mib_idx = 0;
 
 		if (icsk->icsk_ca_state == TCP_CA_Recovery) {
 			if (tcp_is_sack(tp))
@@ -501,22 +505,20 @@
 				mib_idx = LINUX_MIB_TCPSACKFAILURES;
 			else
 				mib_idx = LINUX_MIB_TCPRENOFAILURES;
-		} else {
-			mib_idx = LINUX_MIB_TCPTIMEOUTS;
 		}
-		__NET_INC_STATS(sock_net(sk), mib_idx);
+		if (mib_idx)
+			__NET_INC_STATS(sock_net(sk), mib_idx);
 	}
 
 	tcp_enter_loss(sk);
 
+	icsk->icsk_retransmits++;
 	if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) {
 		/* Retransmission failed because of local congestion,
-		 * do not backoff.
+		 * Let senders fight for local resources conservatively.
 		 */
-		if (!icsk->icsk_retransmits)
-			icsk->icsk_retransmits = 1;
 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
-					  min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL),
+					  TCP_RESOURCE_PROBE_INTERVAL,
 					  TCP_RTO_MAX);
 		goto out;
 	}
@@ -537,7 +539,6 @@
 	 * the 120 second clamps though!
 	 */
 	icsk->icsk_backoff++;
-	icsk->icsk_retransmits++;
 
 out_reset_timer:
 	/* If stream is thin, use linear timeouts. Since 'icsk_backoff' is

diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index a5995bb..4849edb 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Pluggable TCP upper layer protocol support.
  *
@@ -6,7 +7,7 @@
  *
  */
 
-#include<linux/module.h>
+#include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/types.h>
 #include <linux/list.h>
@@ -29,18 +30,6 @@
 	return NULL;
 }
 
-static struct tcp_ulp_ops *tcp_ulp_find_id(const int ulp)
-{
-	struct tcp_ulp_ops *e;
-
-	list_for_each_entry_rcu(e, &tcp_ulp_list, list) {
-		if (e->uid == ulp)
-			return e;
-	}
-
-	return NULL;
-}
-
 static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name)
 {
 	const struct tcp_ulp_ops *ulp = NULL;
@@ -63,18 +52,6 @@
 	return ulp;
 }
 
-static const struct tcp_ulp_ops *__tcp_ulp_lookup(const int uid)
-{
-	const struct tcp_ulp_ops *ulp;
-
-	rcu_read_lock();
-	ulp = tcp_ulp_find_id(uid);
-	if (!ulp || !try_module_get(ulp->owner))
-		ulp = NULL;
-	rcu_read_unlock();
-	return ulp;
-}
-
 /* Attach new upper layer protocol to the list
  * of available protocols.
  */
@@ -119,10 +96,27 @@
 	rcu_read_unlock();
 }
 
+void tcp_update_ulp(struct sock *sk, struct proto *proto)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	if (!icsk->icsk_ulp_ops) {
+		sk->sk_prot = proto;
+		return;
+	}
+
+	if (icsk->icsk_ulp_ops->update)
+		icsk->icsk_ulp_ops->update(sk, proto);
+}
+
 void tcp_cleanup_ulp(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
+	/* No sock_owned_by_me() check here as at the time the
+	 * stack calls this function, the socket is dead and
+	 * about to be destroyed.
+	 */
 	if (!icsk->icsk_ulp_ops)
 		return;
 
@@ -133,54 +127,35 @@
 	icsk->icsk_ulp_ops = NULL;
 }
 
-/* Change upper layer protocol for socket */
-int tcp_set_ulp(struct sock *sk, const char *name)
+static int __tcp_set_ulp(struct sock *sk, const struct tcp_ulp_ops *ulp_ops)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
-	const struct tcp_ulp_ops *ulp_ops;
-	int err = 0;
+	int err;
 
+	err = -EEXIST;
 	if (icsk->icsk_ulp_ops)
-		return -EEXIST;
+		goto out_err;
+
+	err = ulp_ops->init(sk);
+	if (err)
+		goto out_err;
+
+	icsk->icsk_ulp_ops = ulp_ops;
+	return 0;
+out_err:
+	module_put(ulp_ops->owner);
+	return err;
+}
+
+int tcp_set_ulp(struct sock *sk, const char *name)
+{
+	const struct tcp_ulp_ops *ulp_ops;
+
+	sock_owned_by_me(sk);
 
 	ulp_ops = __tcp_ulp_find_autoload(name);
 	if (!ulp_ops)
 		return -ENOENT;
 
-	if (!ulp_ops->user_visible) {
-		module_put(ulp_ops->owner);
-		return -ENOENT;
-	}
-
-	err = ulp_ops->init(sk);
-	if (err) {
-		module_put(ulp_ops->owner);
-		return err;
-	}
-
-	icsk->icsk_ulp_ops = ulp_ops;
-	return 0;
-}
-
-int tcp_set_ulp_id(struct sock *sk, int ulp)
-{
-	struct inet_connection_sock *icsk = inet_csk(sk);
-	const struct tcp_ulp_ops *ulp_ops;
-	int err;
-
-	if (icsk->icsk_ulp_ops)
-		return -EEXIST;
-
-	ulp_ops = __tcp_ulp_lookup(ulp);
-	if (!ulp_ops)
-		return -ENOENT;
-
-	err = ulp_ops->init(sk);
-	if (err) {
-		module_put(ulp_ops->owner);
-		return err;
-	}
-
-	icsk->icsk_ulp_ops = ulp_ops;
-	return 0;
+	return __tcp_set_ulp(sk, ulp_ops);
 }

diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index ee113ff..3f51e78 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * TCP Vegas congestion control
  *

diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 6fcf482..3b36bb1 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * TCP Veno congestion control
  *

diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index bec9caf..b2e05c4 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * TCP Westwood+: end-to-end bandwidth estimation for TCP
  *

diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 96e829b..e00570d 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *
  *   YeAH TCP

diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index c063001..c4b2ccb 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* tunnel4.c: Generic IP tunnel transformer.
  *
  * Copyright (C) 2003 David S. Miller (davem@redhat.com)
@@ -149,34 +150,40 @@
 }
 #endif
 
-static void tunnel4_err(struct sk_buff *skb, u32 info)
+static int tunnel4_err(struct sk_buff *skb, u32 info)
 {
 	struct xfrm_tunnel *handler;
 
 	for_each_tunnel_rcu(tunnel4_handlers, handler)
 		if (!handler->err_handler(skb, info))
-			break;
+			return 0;
+
+	return -ENOENT;
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static void tunnel64_err(struct sk_buff *skb, u32 info)
+static int tunnel64_err(struct sk_buff *skb, u32 info)
 {
 	struct xfrm_tunnel *handler;
 
 	for_each_tunnel_rcu(tunnel64_handlers, handler)
 		if (!handler->err_handler(skb, info))
-			break;
+			return 0;
+
+	return -ENOENT;
 }
 #endif
 
 #if IS_ENABLED(CONFIG_MPLS)
-static void tunnelmpls4_err(struct sk_buff *skb, u32 info)
+static int tunnelmpls4_err(struct sk_buff *skb, u32 info)
 {
 	struct xfrm_tunnel *handler;
 
 	for_each_tunnel_rcu(tunnelmpls4_handlers, handler)
 		if (!handler->err_handler(skb, info))
-			break;
+			return 0;
+
+	return -ENOENT;
 }
 #endif
 

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f8183fd..447defb 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -69,19 +70,13 @@
  *					a single port at the same time.
  *	Derek Atkins <derek@ihtfp.com>: Add Encapulation Support
  *	James Chapman		:	Add L2TP encapsulation type.
- *
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) "UDP: " fmt
 
 #include <linux/uaccess.h>
 #include <asm/ioctls.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/highmem.h>
 #include <linux/swap.h>
 #include <linux/types.h>
@@ -105,6 +100,7 @@
 #include <net/net_namespace.h>
 #include <net/icmp.h>
 #include <net/inet_hashtables.h>
+#include <net/ip_tunnels.h>
 #include <net/route.h>
 #include <net/checksum.h>
 #include <net/xfrm.h>
@@ -115,6 +111,7 @@
 #include "udp_impl.h"
 #include <net/sock_reuseport.h>
 #include <net/addrconf.h>
+#include <net/udp_tunnel.h>
 
 struct udp_table udp_table __read_mostly;
 EXPORT_SYMBOL(udp_table);
@@ -128,17 +125,6 @@
 #define MAX_UDP_PORTS 65536
 #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
 
-/* IPCB reference means this can not be used from early demux */
-static bool udp_lib_exact_dif_match(struct net *net, struct sk_buff *skb)
-{
-#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
-	if (!net->ipv4.sysctl_udp_l3mdev_accept &&
-	    skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
-		return true;
-#endif
-	return false;
-}
-
 static int udp_lib_lport_inuse(struct net *net, __u16 num,
 			       const struct udp_hslot *hslot,
 			       unsigned long *bitmap,
@@ -367,25 +353,23 @@
 static int compute_score(struct sock *sk, struct net *net,
 			 __be32 saddr, __be16 sport,
 			 __be32 daddr, unsigned short hnum,
-			 int dif, int sdif, bool exact_dif)
+			 int dif, int sdif)
 {
 	int score;
 	struct inet_sock *inet;
+	bool dev_match;
 
 	if (!net_eq(sock_net(sk), net) ||
 	    udp_sk(sk)->udp_port_hash != hnum ||
 	    ipv6_only_sock(sk))
 		return -1;
 
+	if (sk->sk_rcv_saddr != daddr)
+		return -1;
+
 	score = (sk->sk_family == PF_INET) ? 2 : 1;
+
 	inet = inet_sk(sk);
-
-	if (inet->inet_rcv_saddr) {
-		if (inet->inet_rcv_saddr != daddr)
-			return -1;
-		score += 4;
-	}
-
 	if (inet->inet_daddr) {
 		if (inet->inet_daddr != saddr)
 			return -1;
@@ -398,17 +382,13 @@
 		score += 4;
 	}
 
-	if (sk->sk_bound_dev_if || exact_dif) {
-		bool dev_match = (sk->sk_bound_dev_if == dif ||
-				  sk->sk_bound_dev_if == sdif);
+	dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if,
+					dif, sdif);
+	if (!dev_match)
+		return -1;
+	score += 4;
 
-		if (!dev_match)
-			return -1;
-		if (sk->sk_bound_dev_if)
-			score += 4;
-	}
-
-	if (sk->sk_incoming_cpu == raw_smp_processor_id())
+	if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
 		score++;
 	return score;
 }
@@ -429,7 +409,7 @@
 static struct sock *udp4_lib_lookup2(struct net *net,
 				     __be32 saddr, __be16 sport,
 				     __be32 daddr, unsigned int hnum,
-				     int dif, int sdif, bool exact_dif,
+				     int dif, int sdif,
 				     struct udp_hslot *hslot2,
 				     struct sk_buff *skb)
 {
@@ -441,14 +421,15 @@
 	badness = 0;
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
 		score = compute_score(sk, net, saddr, sport,
-				      daddr, hnum, dif, sdif, exact_dif);
+				      daddr, hnum, dif, sdif);
 		if (score > badness) {
-			if (sk->sk_reuseport) {
+			if (sk->sk_reuseport &&
+			    sk->sk_state != TCP_ESTABLISHED) {
 				hash = udp_ehashfn(net, daddr, hnum,
 						   saddr, sport);
 				result = reuseport_select_sock(sk, hash, skb,
 							sizeof(struct udphdr));
-				if (result)
+				if (result && !reuseport_has_conns(sk, false))
 					return result;
 			}
 			badness = score;
@@ -465,65 +446,29 @@
 		__be16 sport, __be32 daddr, __be16 dport, int dif,
 		int sdif, struct udp_table *udptable, struct sk_buff *skb)
 {
-	struct sock *sk, *result;
+	struct sock *result;
 	unsigned short hnum = ntohs(dport);
-	unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
-	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
-	bool exact_dif = udp_lib_exact_dif_match(net, skb);
-	int score, badness;
-	u32 hash = 0;
+	unsigned int hash2, slot2;
+	struct udp_hslot *hslot2;
 
-	if (hslot->count > 10) {
-		hash2 = ipv4_portaddr_hash(net, daddr, hnum);
+	hash2 = ipv4_portaddr_hash(net, daddr, hnum);
+	slot2 = hash2 & udptable->mask;
+	hslot2 = &udptable->hash2[slot2];
+
+	result = udp4_lib_lookup2(net, saddr, sport,
+				  daddr, hnum, dif, sdif,
+				  hslot2, skb);
+	if (!result) {
+		hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
 		slot2 = hash2 & udptable->mask;
 		hslot2 = &udptable->hash2[slot2];
-		if (hslot->count < hslot2->count)
-			goto begin;
 
 		result = udp4_lib_lookup2(net, saddr, sport,
-					  daddr, hnum, dif, sdif,
-					  exact_dif, hslot2, skb);
-		if (!result) {
-			unsigned int old_slot2 = slot2;
-			hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
-			slot2 = hash2 & udptable->mask;
-			/* avoid searching the same slot again. */
-			if (unlikely(slot2 == old_slot2))
-				return result;
-
-			hslot2 = &udptable->hash2[slot2];
-			if (hslot->count < hslot2->count)
-				goto begin;
-
-			result = udp4_lib_lookup2(net, saddr, sport,
-						  daddr, hnum, dif, sdif,
-						  exact_dif, hslot2, skb);
-		}
-		if (unlikely(IS_ERR(result)))
-			return NULL;
-		return result;
+					  htonl(INADDR_ANY), hnum, dif, sdif,
+					  hslot2, skb);
 	}
-begin:
-	result = NULL;
-	badness = 0;
-	sk_for_each_rcu(sk, &hslot->head) {
-		score = compute_score(sk, net, saddr, sport,
-				      daddr, hnum, dif, sdif, exact_dif);
-		if (score > badness) {
-			if (sk->sk_reuseport) {
-				hash = udp_ehashfn(net, daddr, hnum,
-						   saddr, sport);
-				result = reuseport_select_sock(sk, hash, skb,
-							sizeof(struct udphdr));
-				if (unlikely(IS_ERR(result)))
-					return NULL;
-				if (result)
-					return result;
-			}
-			result = sk;
-			badness = score;
-		}
-	}
+	if (IS_ERR(result))
+		return NULL;
 	return result;
 }
 EXPORT_SYMBOL_GPL(__udp4_lib_lookup);
@@ -542,7 +487,11 @@
 struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
 				 __be16 sport, __be16 dport)
 {
-	return __udp4_lib_lookup_skb(skb, sport, dport, &udp_table);
+	const struct iphdr *iph = ip_hdr(skb);
+
+	return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
+				 iph->daddr, dport, inet_iif(skb),
+				 inet_sdif(skb), &udp_table, NULL);
 }
 EXPORT_SYMBOL_GPL(udp4_lib_lookup_skb);
 
@@ -577,14 +526,98 @@
 	    (inet->inet_dport != rmt_port && inet->inet_dport) ||
 	    (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) ||
 	    ipv6_only_sock(sk) ||
-	    (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
-	     sk->sk_bound_dev_if != sdif))
+	    !udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
 		return false;
 	if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif, sdif))
 		return false;
 	return true;
 }
 
+DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
+void udp_encap_enable(void)
+{
+	static_branch_inc(&udp_encap_needed_key);
+}
+EXPORT_SYMBOL(udp_encap_enable);
+
+/* Handler for tunnels with arbitrary destination ports: no socket lookup, go
+ * through error handlers in encapsulations looking for a match.
+ */
+static int __udp4_lib_err_encap_no_sk(struct sk_buff *skb, u32 info)
+{
+	int i;
+
+	for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) {
+		int (*handler)(struct sk_buff *skb, u32 info);
+		const struct ip_tunnel_encap_ops *encap;
+
+		encap = rcu_dereference(iptun_encaps[i]);
+		if (!encap)
+			continue;
+		handler = encap->err_handler;
+		if (handler && !handler(skb, info))
+			return 0;
+	}
+
+	return -ENOENT;
+}
+
+/* Try to match ICMP errors to UDP tunnels by looking up a socket without
+ * reversing source and destination port: this will match tunnels that force the
+ * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that
+ * lwtunnels might actually break this assumption by being configured with
+ * different destination ports on endpoints, in this case we won't be able to
+ * trace ICMP messages back to them.
+ *
+ * If this doesn't match any socket, probe tunnels with arbitrary destination
+ * ports (e.g. FoU, GUE): there, the receiving socket is useless, as the port
+ * we've sent packets to won't necessarily match the local destination port.
+ *
+ * Then ask the tunnel implementation to match the error against a valid
+ * association.
+ *
+ * Return an error if we can't find a match, the socket if we need further
+ * processing, zero otherwise.
+ */
+static struct sock *__udp4_lib_err_encap(struct net *net,
+					 const struct iphdr *iph,
+					 struct udphdr *uh,
+					 struct udp_table *udptable,
+					 struct sk_buff *skb, u32 info)
+{
+	int network_offset, transport_offset;
+	struct sock *sk;
+
+	network_offset = skb_network_offset(skb);
+	transport_offset = skb_transport_offset(skb);
+
+	/* Network header needs to point to the outer IPv4 header inside ICMP */
+	skb_reset_network_header(skb);
+
+	/* Transport header needs to point to the UDP header */
+	skb_set_transport_header(skb, iph->ihl << 2);
+
+	sk = __udp4_lib_lookup(net, iph->daddr, uh->source,
+			       iph->saddr, uh->dest, skb->dev->ifindex, 0,
+			       udptable, NULL);
+	if (sk) {
+		int (*lookup)(struct sock *sk, struct sk_buff *skb);
+		struct udp_sock *up = udp_sk(sk);
+
+		lookup = READ_ONCE(up->encap_err_lookup);
+		if (!lookup || lookup(sk, skb))
+			sk = NULL;
+	}
+
+	if (!sk)
+		sk = ERR_PTR(__udp4_lib_err_encap_no_sk(skb, info));
+
+	skb_set_transport_header(skb, transport_offset);
+	skb_set_network_header(skb, network_offset);
+
+	return sk;
+}
+
 /*
  * This routine is called by the ICMP module when it gets some
  * sort of error condition.  If err < 0 then the socket should
@@ -596,24 +629,38 @@
  * to find the appropriate port.
  */
 
-void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
+int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
 {
 	struct inet_sock *inet;
 	const struct iphdr *iph = (const struct iphdr *)skb->data;
 	struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2));
 	const int type = icmp_hdr(skb)->type;
 	const int code = icmp_hdr(skb)->code;
+	bool tunnel = false;
 	struct sock *sk;
 	int harderr;
 	int err;
 	struct net *net = dev_net(skb->dev);
 
 	sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
-			       iph->saddr, uh->source, skb->dev->ifindex, 0,
-			       udptable, NULL);
+			       iph->saddr, uh->source, skb->dev->ifindex,
+			       inet_sdif(skb), udptable, NULL);
 	if (!sk) {
-		__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
-		return;	/* No socket for error */
+		/* No socket for error: try tunnels before discarding */
+		sk = ERR_PTR(-ENOENT);
+		if (static_branch_unlikely(&udp_encap_needed_key)) {
+			sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb,
+						  info);
+			if (!sk)
+				return 0;
+		}
+
+		if (IS_ERR(sk)) {
+			__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
+			return PTR_ERR(sk);
+		}
+
+		tunnel = true;
 	}
 
 	err = 0;
@@ -656,6 +703,10 @@
 	 *      RFC1122: OK.  Passes ICMP errors back to application, as per
 	 *	4.1.3.3.
 	 */
+	if (tunnel) {
+		/* ...not for tunnels though: we don't have a sending socket */
+		goto out;
+	}
 	if (!inet->recverr) {
 		if (!harderr || sk->sk_state != TCP_ESTABLISHED)
 			goto out;
@@ -665,12 +716,12 @@
 	sk->sk_err = err;
 	sk->sk_error_report(sk);
 out:
-	return;
+	return 0;
 }
 
-void udp_err(struct sk_buff *skb, u32 info)
+int udp_err(struct sk_buff *skb, u32 info)
 {
-	__udp4_lib_err(skb, info, &udp_table);
+	return __udp4_lib_err(skb, info, &udp_table);
 }
 
 /*
@@ -770,6 +821,7 @@
 	int is_udplite = IS_UDPLITE(sk);
 	int offset = skb_transport_offset(skb);
 	int len = skb->len - offset;
+	int datalen = len - sizeof(*uh);
 	__wsum csum = 0;
 
 	/*
@@ -785,20 +837,30 @@
 		const int hlen = skb_network_header_len(skb) +
 				 sizeof(struct udphdr);
 
-		if (hlen + cork->gso_size > cork->fragsize)
+		if (hlen + cork->gso_size > cork->fragsize) {
+			kfree_skb(skb);
 			return -EINVAL;
-		if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS)
+		}
+		if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) {
+			kfree_skb(skb);
 			return -EINVAL;
-		if (sk->sk_no_check_tx)
+		}
+		if (sk->sk_no_check_tx) {
+			kfree_skb(skb);
 			return -EINVAL;
+		}
 		if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
-		    dst_xfrm(skb_dst(skb)))
+		    dst_xfrm(skb_dst(skb))) {
+			kfree_skb(skb);
 			return -EIO;
+		}
 
-		skb_shinfo(skb)->gso_size = cork->gso_size;
-		skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
-		skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(len - sizeof(uh),
-							 cork->gso_size);
+		if (datalen > cork->gso_size) {
+			skb_shinfo(skb)->gso_size = cork->gso_size;
+			skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
+			skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(datalen,
+								 cork->gso_size);
+		}
 		goto csum_partial;
 	}
 
@@ -1042,7 +1104,7 @@
 	}
 
 	if (ipv4_is_multicast(daddr)) {
-		if (!ipc.oif)
+		if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif))
 			ipc.oif = inet->mc_index;
 		if (!saddr)
 			saddr = inet->mc_addr;
@@ -1072,7 +1134,7 @@
 
 		fl4 = &fl4_stack;
 
-		flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
+		flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, tos,
 				   RT_SCOPE_UNIVERSE, sk->sk_protocol,
 				   flow_flags,
 				   faddr, saddr, dport, inet->inet_sport,
@@ -1235,6 +1297,27 @@
 
 #define UDP_SKB_IS_STATELESS 0x80000000
 
+/* all head states (dst, sk, nf conntrack) except skb extensions are
+ * cleared by udp_rcv().
+ *
+ * We need to preserve secpath, if present, to eventually process
+ * IP_CMSG_PASSSEC at recvmsg() time.
+ *
+ * Other extensions can be cleared.
+ */
+static bool udp_try_make_stateless(struct sk_buff *skb)
+{
+	if (!skb_has_extensions(skb))
+		return true;
+
+	if (!secpath_exists(skb)) {
+		skb_ext_reset(skb);
+		return true;
+	}
+
+	return false;
+}
+
 static void udp_set_dev_scratch(struct sk_buff *skb)
 {
 	struct udp_dev_scratch *scratch = udp_skb_scratch(skb);
@@ -1246,14 +1329,24 @@
 	scratch->csum_unnecessary = !!skb_csum_unnecessary(skb);
 	scratch->is_linear = !skb_is_nonlinear(skb);
 #endif
-	/* all head states execept sp (dst, sk, nf) are always cleared by
-	 * udp_rcv() and we need to preserve secpath, if present, to eventually
-	 * process IP_CMSG_PASSSEC at recvmsg() time
-	 */
-	if (likely(!skb_sec_path(skb)))
+	if (udp_try_make_stateless(skb))
 		scratch->_tsize_state |= UDP_SKB_IS_STATELESS;
 }
 
+static void udp_skb_csum_unnecessary_set(struct sk_buff *skb)
+{
+	/* We come here after udp_lib_checksum_complete() returned 0.
+	 * This means that __skb_checksum_complete() might have
+	 * set skb->csum_valid to 1.
+	 * On 64bit platforms, we can set csum_unnecessary
+	 * to true, but only if the skb is not shared.
+	 */
+#if BITS_PER_LONG == 64
+	if (!skb_shared(skb))
+		udp_skb_scratch(skb)->csum_unnecessary = true;
+#endif
+}
+
 static int udp_skb_truesize(struct sk_buff *skb)
 {
 	return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS;
@@ -1488,10 +1581,7 @@
 			*total += skb->truesize;
 			kfree_skb(skb);
 		} else {
-			/* the csum related bits could be changed, refresh
-			 * the scratch area
-			 */
-			udp_set_dev_scratch(skb);
+			udp_skb_csum_unnecessary_set(skb);
 			break;
 		}
 	}
@@ -1515,7 +1605,7 @@
 
 	spin_lock_bh(&rcvq->lock);
 	skb = __first_packet_length(sk, rcvq, &total);
-	if (!skb && !skb_queue_empty(sk_queue)) {
+	if (!skb && !skb_queue_empty_lockless(sk_queue)) {
 		spin_lock(&sk_queue->lock);
 		skb_queue_splice_tail_init(sk_queue, rcvq);
 		spin_unlock(&sk_queue->lock);
@@ -1559,7 +1649,7 @@
 EXPORT_SYMBOL(udp_ioctl);
 
 struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
-			       int noblock, int *peeked, int *off, int *err)
+			       int noblock, int *off, int *err)
 {
 	struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
 	struct sk_buff_head *queue;
@@ -1578,19 +1668,17 @@
 			break;
 
 		error = -EAGAIN;
-		*peeked = 0;
 		do {
 			spin_lock_bh(&queue->lock);
 			skb = __skb_try_recv_from_queue(sk, queue, flags,
 							udp_skb_destructor,
-							peeked, off, err,
-							&last);
+							off, err, &last);
 			if (skb) {
 				spin_unlock_bh(&queue->lock);
 				return skb;
 			}
 
-			if (skb_queue_empty(sk_queue)) {
+			if (skb_queue_empty_lockless(sk_queue)) {
 				spin_unlock_bh(&queue->lock);
 				goto busy_check;
 			}
@@ -1605,8 +1693,7 @@
 
 			skb = __skb_try_recv_from_queue(sk, queue, flags,
 							udp_skb_dtor_locked,
-							peeked, off, err,
-							&last);
+							off, err, &last);
 			spin_unlock(&sk_queue->lock);
 			spin_unlock_bh(&queue->lock);
 			if (skb)
@@ -1617,7 +1704,7 @@
 				break;
 
 			sk_busy_loop(sk, flags & MSG_DONTWAIT);
-		} while (!skb_queue_empty(sk_queue));
+		} while (!skb_queue_empty_lockless(sk_queue));
 
 		/* sk_queue is empty, reader_queue may contain peeked packets */
 	} while (timeo &&
@@ -1641,8 +1728,7 @@
 	DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
 	struct sk_buff *skb;
 	unsigned int ulen, copied;
-	int peeked, peeking, off;
-	int err;
+	int off, err, peeking = flags & MSG_PEEK;
 	int is_udplite = IS_UDPLITE(sk);
 	bool checksum_valid = false;
 
@@ -1650,9 +1736,8 @@
 		return ip_recv_error(sk, msg, len, addr_len);
 
 try_again:
-	peeking = flags & MSG_PEEK;
 	off = sk_peek_offset(sk, flags);
-	skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
+	skb = __skb_recv_udp(sk, flags, noblock, &off, &err);
 	if (!skb)
 		return err;
 
@@ -1690,7 +1775,7 @@
 	}
 
 	if (unlikely(err)) {
-		if (!peeked) {
+		if (!peeking) {
 			atomic_inc(&sk->sk_drops);
 			UDP_INC_STATS(sock_net(sk),
 				      UDP_MIB_INERRORS, is_udplite);
@@ -1699,7 +1784,7 @@
 		return err;
 	}
 
-	if (!peeked)
+	if (!peeking)
 		UDP_INC_STATS(sock_net(sk),
 			      UDP_MIB_INDATAGRAMS, is_udplite);
 
@@ -1712,7 +1797,15 @@
 		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
 		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
 		*addr_len = sizeof(*sin);
+
+		if (cgroup_bpf_enabled)
+			BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk,
+							(struct sockaddr *)sin);
 	}
+
+	if (udp_sk(sk)->gro_enabled)
+		udp_cmsg_recv(msg, sk, skb);
+
 	if (inet->cmsg_flags)
 		ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off);
 
@@ -1852,7 +1945,7 @@
 }
 EXPORT_SYMBOL(udp_lib_rehash);
 
-static void udp_v4_rehash(struct sock *sk)
+void udp_v4_rehash(struct sock *sk)
 {
 	u16 new_hash = ipv4_portaddr_hash(sock_net(sk),
 					  inet_sk(sk)->inet_rcv_saddr,
@@ -1889,13 +1982,6 @@
 	return 0;
 }
 
-static DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
-void udp_encap_enable(void)
-{
-	static_branch_enable(&udp_encap_needed_key);
-}
-EXPORT_SYMBOL(udp_encap_enable);
-
 /* returns:
  *  -1: error
  *   0: success
@@ -1904,7 +1990,7 @@
  * Note that in the success and error cases, the skb is assumed to
  * have either been requeued or freed.
  */
-static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
 {
 	struct udp_sock *up = udp_sk(sk);
 	int is_udplite = IS_UDPLITE(sk);
@@ -1914,7 +2000,7 @@
 	 */
 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto drop;
-	nf_reset(skb);
+	nf_reset_ct(skb);
 
 	if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
 		int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
@@ -2007,6 +2093,27 @@
 	return -1;
 }
 
+static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	struct sk_buff *next, *segs;
+	int ret;
+
+	if (likely(!udp_unexpected_gso(sk, skb)))
+		return udp_queue_rcv_one_skb(sk, skb);
+
+	BUILD_BUG_ON(sizeof(struct udp_skb_cb) > SKB_SGO_CB_OFFSET);
+	__skb_push(skb, -skb_mac_offset(skb));
+	segs = udp_rcv_segment(sk, skb, true);
+	for (skb = segs; skb; skb = next) {
+		next = skb->next;
+		__skb_pull(skb, skb_transport_offset(skb));
+		ret = udp_queue_rcv_one_skb(sk, skb);
+		if (ret > 0)
+			ip_protocol_deliver_rcu(dev_net(skb->dev), skb, -ret);
+	}
+	return 0;
+}
+
 /* For TCP sockets, sk_rx_dst is protected by socket lock
  * For UDP, we use xchg() to guard against concurrent changes.
  */
@@ -2095,7 +2202,7 @@
 
 /* Initialize UDP checksum. If exited with zero value (success),
  * CHECKSUM_UNNECESSARY means, that no more checks are required.
- * Otherwise, csum completion requires chacksumming packet body,
+ * Otherwise, csum completion requires checksumming packet body,
  * including udp header and folding it to skb->csum.
  */
 static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
@@ -2149,8 +2256,7 @@
 	int ret;
 
 	if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
-		skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
-					 inet_compute_pseudo);
+		skb_checksum_try_convert(skb, IPPROTO_UDP, inet_compute_pseudo);
 
 	ret = udp_queue_rcv_skb(sk, skb);
 
@@ -2223,7 +2329,7 @@
 
 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 		goto drop;
-	nf_reset(skb);
+	nf_reset_ct(skb);
 
 	/* No socket. Drop packet silently, if checksum is wrong */
 	if (udp_lib_checksum_complete(skb))
@@ -2398,11 +2504,15 @@
 	bool slow = lock_sock_fast(sk);
 	udp_flush_pending_frames(sk);
 	unlock_sock_fast(sk, slow);
-	if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
-		void (*encap_destroy)(struct sock *sk);
-		encap_destroy = READ_ONCE(up->encap_destroy);
-		if (encap_destroy)
-			encap_destroy(sk);
+	if (static_branch_unlikely(&udp_encap_needed_key)) {
+		if (up->encap_type) {
+			void (*encap_destroy)(struct sock *sk);
+			encap_destroy = READ_ONCE(up->encap_destroy);
+			if (encap_destroy)
+				encap_destroy(sk);
+		}
+		if (up->encap_enabled)
+			static_branch_dec(&udp_encap_needed_key);
 	}
 }
 
@@ -2447,7 +2557,9 @@
 			/* FALLTHROUGH */
 		case UDP_ENCAP_L2TPINUDP:
 			up->encap_type = val;
-			udp_encap_enable();
+			lock_sock(sk);
+			udp_tunnel_encap_enable(sk->sk_socket);
+			release_sock(sk);
 			break;
 		default:
 			err = -ENOPROTOOPT;
@@ -2469,6 +2581,14 @@
 		up->gso_size = val;
 		break;
 
+	case UDP_GRO:
+		lock_sock(sk);
+		if (valbool)
+			udp_tunnel_encap_enable(sk->sk_socket);
+		up->gro_enabled = valbool;
+		release_sock(sk);
+		break;
+
 	/*
 	 * 	UDP-Lite's partial checksum coverage (RFC 3828).
 	 */
@@ -2620,7 +2740,7 @@
 	__poll_t mask = datagram_poll(file, sock, wait);
 	struct sock *sk = sock->sk;
 
-	if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
+	if (!skb_queue_empty_lockless(&udp_sk(sk)->reader_queue))
 		mask |= EPOLLIN | EPOLLRDNORM;
 
 	/* Check for false positives due to checksum errors */
@@ -2784,7 +2904,7 @@
 	__u16 srcp	  = ntohs(inet->inet_sport);
 
 	seq_printf(f, "%5d: %08X:%04X %08X:%04X"
-		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d",
+		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u",
 		bucket, src, srcp, dest, destp, sp->sk_state,
 		sk_wmem_alloc_get(sp),
 		udp_rqueue_get(sp),

diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index d9ad986..910555a 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * udp_diag.c	Module for monitoring UDP transport protocols sockets.
  *
  * Authors:	Pavel Emelyanov, <xemul@parallels.com>
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 
@@ -42,6 +38,7 @@
 
 	rcu_read_lock();
 	if (req->sdiag_family == AF_INET)
+		/* src and dst are swapped for historical reasons */
 		sk = __udp4_lib_lookup(net,
 				req->id.idiag_src[0], req->id.idiag_sport,
 				req->id.idiag_dst[0], req->id.idiag_dport,

diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index e7d18b1..6b2fa77 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h

@@ -7,9 +7,10 @@
 #include <net/inet_common.h>
 
 int __udp4_lib_rcv(struct sk_buff *, struct udp_table *, int);
-void __udp4_lib_err(struct sk_buff *, u32, struct udp_table *);
+int __udp4_lib_err(struct sk_buff *, u32, struct udp_table *);
 
 int udp_v4_get_port(struct sock *sk, unsigned short snum);
+void udp_v4_rehash(struct sock *sk);
 
 int udp_setsockopt(struct sock *sk, int level, int optname,
 		   char __user *optval, unsigned int optlen);

diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 0c0522b..a3908e5 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c

@@ -1,18 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	IPV4 GSO/GRO offload support
  *	Linux INET implementation
  *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
  *	UDPv4 GSO support
  */
 
 #include <linux/skbuff.h>
 #include <net/udp.h>
 #include <net/protocol.h>
+#include <net/inet_common.h>
 
 static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
 	netdev_features_t features,
@@ -211,7 +208,7 @@
 		gso_skb->destructor = NULL;
 
 	segs = skb_segment(gso_skb, features);
-	if (unlikely(IS_ERR_OR_NULL(segs))) {
+	if (IS_ERR_OR_NULL(segs)) {
 		if (copy_dtor)
 			gso_skb->destructor = sock_wfree;
 		return segs;
@@ -227,6 +224,11 @@
 	seg = segs;
 	uh = udp_hdr(seg);
 
+	/* preserve TX timestamp flags and TS key for first segment */
+	skb_shinfo(seg)->tskey = skb_shinfo(gso_skb)->tskey;
+	skb_shinfo(seg)->tx_flags |=
+			(skb_shinfo(gso_skb)->tx_flags & SKBTX_ANY_TSTAMP);
+
 	/* compute checksum adjustment based on old length versus new */
 	newlen = htons(sizeof(*uh) + mss);
 	check = csum16_add(csum16_sub(uh->check, uh->len), newlen);
@@ -343,6 +345,64 @@
 	return segs;
 }
 
+#define UDP_GRO_CNT_MAX 64
+static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
+					       struct sk_buff *skb)
+{
+	struct udphdr *uh = udp_hdr(skb);
+	struct sk_buff *pp = NULL;
+	struct udphdr *uh2;
+	struct sk_buff *p;
+	unsigned int ulen;
+
+	/* requires non zero csum, for symmetry with GSO */
+	if (!uh->check) {
+		NAPI_GRO_CB(skb)->flush = 1;
+		return NULL;
+	}
+
+	/* Do not deal with padded or malicious packets, sorry ! */
+	ulen = ntohs(uh->len);
+	if (ulen <= sizeof(*uh) || ulen != skb_gro_len(skb)) {
+		NAPI_GRO_CB(skb)->flush = 1;
+		return NULL;
+	}
+	/* pull encapsulating udp header */
+	skb_gro_pull(skb, sizeof(struct udphdr));
+	skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
+
+	list_for_each_entry(p, head, list) {
+		if (!NAPI_GRO_CB(p)->same_flow)
+			continue;
+
+		uh2 = udp_hdr(p);
+
+		/* Match ports only, as csum is always non zero */
+		if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) {
+			NAPI_GRO_CB(p)->same_flow = 0;
+			continue;
+		}
+
+		/* Terminate the flow on len mismatch or if it grow "too much".
+		 * Under small packet flood GRO count could elsewhere grow a lot
+		 * leading to excessive truesize values.
+		 * On len mismatch merge the first packet shorter than gso_size,
+		 * otherwise complete the GRO packet.
+		 */
+		if (ulen > ntohs(uh2->len) || skb_gro_receive(p, skb) ||
+		    ulen != ntohs(uh2->len) ||
+		    NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX)
+			pp = p;
+
+		return pp;
+	}
+
+	/* mismatch, but we never need to flush */
+	return NULL;
+}
+
+INDIRECT_CALLABLE_DECLARE(struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
+						   __be16 sport, __be16 dport));
 struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
 				struct udphdr *uh, udp_lookup_t lookup)
 {
@@ -353,23 +413,28 @@
 	int flush = 1;
 	struct sock *sk;
 
+	rcu_read_lock();
+	sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
+				udp4_lib_lookup_skb, skb, uh->source, uh->dest);
+	if (!sk)
+		goto out_unlock;
+
+	if (udp_sk(sk)->gro_enabled) {
+		pp = call_gro_receive(udp_gro_receive_segment, head, skb);
+		rcu_read_unlock();
+		return pp;
+	}
+
 	if (NAPI_GRO_CB(skb)->encap_mark ||
 	    (skb->ip_summed != CHECKSUM_PARTIAL &&
 	     NAPI_GRO_CB(skb)->csum_cnt == 0 &&
-	     !NAPI_GRO_CB(skb)->csum_valid))
-		goto out;
+	     !NAPI_GRO_CB(skb)->csum_valid) ||
+	    !udp_sk(sk)->gro_receive)
+		goto out_unlock;
 
 	/* mark that this skb passed once through the tunnel gro layer */
 	NAPI_GRO_CB(skb)->encap_mark = 1;
 
-	rcu_read_lock();
-	sk = (*lookup)(skb, uh->source, uh->dest);
-
-	if (sk && udp_sk(sk)->gro_receive)
-		goto unflush;
-	goto out_unlock;
-
-unflush:
 	flush = 0;
 
 	list_for_each_entry(p, head, list) {
@@ -394,18 +459,17 @@
 
 out_unlock:
 	rcu_read_unlock();
-out:
 	skb_gro_flush_final(skb, pp, flush);
 	return pp;
 }
 EXPORT_SYMBOL(udp_gro_receive);
 
-static struct sk_buff *udp4_gro_receive(struct list_head *head,
-					struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE
+struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
 	struct udphdr *uh = udp_gro_udphdr(skb);
 
-	if (unlikely(!uh))
+	if (unlikely(!uh) || !static_branch_unlikely(&udp_encap_needed_key))
 		goto flush;
 
 	/* Don't bother verifying checksum if we're going to flush anyway. */
@@ -427,6 +491,19 @@
 	return NULL;
 }
 
+static int udp_gro_complete_segment(struct sk_buff *skb)
+{
+	struct udphdr *uh = udp_hdr(skb);
+
+	skb->csum_start = (unsigned char *)uh - skb->head;
+	skb->csum_offset = offsetof(struct udphdr, check);
+	skb->ip_summed = CHECKSUM_PARTIAL;
+
+	skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+	skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
+	return 0;
+}
+
 int udp_gro_complete(struct sk_buff *skb, int nhoff,
 		     udp_lookup_t lookup)
 {
@@ -437,16 +514,22 @@
 
 	uh->len = newlen;
 
-	/* Set encapsulation before calling into inner gro_complete() functions
-	 * to make them set up the inner offsets.
-	 */
-	skb->encapsulation = 1;
-
 	rcu_read_lock();
-	sk = (*lookup)(skb, uh->source, uh->dest);
-	if (sk && udp_sk(sk)->gro_complete)
+	sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
+				udp4_lib_lookup_skb, skb, uh->source, uh->dest);
+	if (sk && udp_sk(sk)->gro_enabled) {
+		err = udp_gro_complete_segment(skb);
+	} else if (sk && udp_sk(sk)->gro_complete) {
+		skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM
+					: SKB_GSO_UDP_TUNNEL;
+
+		/* Set encapsulation before calling into inner gro_complete()
+		 * functions to make them set up the inner offsets.
+		 */
+		skb->encapsulation = 1;
 		err = udp_sk(sk)->gro_complete(sk, skb,
 				nhoff + sizeof(struct udphdr));
+	}
 	rcu_read_unlock();
 
 	if (skb->remcsum_offload)
@@ -456,18 +539,14 @@
 }
 EXPORT_SYMBOL(udp_gro_complete);
 
-static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
+INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 
-	if (uh->check) {
-		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+	if (uh->check)
 		uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
 					  iph->daddr, 0);
-	} else {
-		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
-	}
 
 	return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb);
 }

diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 6539ff1..150e6f0 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/socket.h>
@@ -20,6 +21,14 @@
 	if (err < 0)
 		goto error;
 
+	if (cfg->bind_ifindex) {
+		err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX,
+					(void *)&cfg->bind_ifindex,
+					sizeof(cfg->bind_ifindex));
+		if (err < 0)
+			goto error;
+	}
+
 	udp_addr.sin_family = AF_INET;
 	udp_addr.sin_addr = cfg->local_ip;
 	udp_addr.sin_port = cfg->local_udp_port;
@@ -68,6 +77,7 @@
 
 	udp_sk(sk)->encap_type = cfg->encap_type;
 	udp_sk(sk)->encap_rcv = cfg->encap_rcv;
+	udp_sk(sk)->encap_err_lookup = cfg->encap_err_lookup;
 	udp_sk(sk)->encap_destroy = cfg->encap_destroy;
 	udp_sk(sk)->gro_receive = cfg->gro_receive;
 	udp_sk(sk)->gro_complete = cfg->gro_complete;

diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 8545457..5936d66 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  UDPLITE     An implementation of the UDP-Lite protocol (RFC 3828).
  *
@@ -5,10 +6,6 @@
  *
  *  Changes:
  *  Fixes:
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) "UDPLite: " fmt
@@ -25,9 +22,9 @@
 	return __udp4_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE);
 }
 
-static void udplite_err(struct sk_buff *skb, u32 info)
+static int udplite_err(struct sk_buff *skb, u32 info)
 {
-	__udp4_lib_err(skb, info, &udplite_table);
+	return __udp4_lib_err(skb, info, &udplite_table);
 }
 
 static const struct net_protocol udplite_protocol = {
@@ -53,6 +50,7 @@
 	.sendpage	   = udp_sendpage,
 	.hash		   = udp_lib_hash,
 	.unhash		   = udp_lib_unhash,
+	.rehash		   = udp_v4_rehash,
 	.get_port	   = udp_v4_get_port,
 	.memory_allocated  = &udp_memory_allocated,
 	.sysctl_mem	   = sysctl_udp_mem,

diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
deleted file mode 100644
index 856d2df..0000000
--- a/net/ipv4/xfrm4_mode_beet.c
+++ /dev/null

@@ -1,155 +0,0 @@
-/*
- * xfrm4_mode_beet.c - BEET mode encapsulation for IPv4.
- *
- * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com>
- *                    Miika Komu     <miika@iki.fi>
- *                    Herbert Xu     <herbert@gondor.apana.org.au>
- *                    Abhinav Pathak <abhinav.pathak@hiit.fi>
- *                    Jeff Ahrenholz <ahrenholz@gmail.com>
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/stringify.h>
-#include <net/dst.h>
-#include <net/ip.h>
-#include <net/xfrm.h>
-
-static void xfrm4_beet_make_header(struct sk_buff *skb)
-{
-	struct iphdr *iph = ip_hdr(skb);
-
-	iph->ihl = 5;
-	iph->version = 4;
-
-	iph->protocol = XFRM_MODE_SKB_CB(skb)->protocol;
-	iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
-
-	iph->id = XFRM_MODE_SKB_CB(skb)->id;
-	iph->frag_off = XFRM_MODE_SKB_CB(skb)->frag_off;
-	iph->ttl = XFRM_MODE_SKB_CB(skb)->ttl;
-}
-
-/* Add encapsulation header.
- *
- * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
- */
-static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
-{
-	struct ip_beet_phdr *ph;
-	struct iphdr *top_iph;
-	int hdrlen, optlen;
-
-	hdrlen = 0;
-	optlen = XFRM_MODE_SKB_CB(skb)->optlen;
-	if (unlikely(optlen))
-		hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4);
-
-	skb_set_network_header(skb, -x->props.header_len -
-				    hdrlen + (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph)));
-	if (x->sel.family != AF_INET6)
-		skb->network_header += IPV4_BEET_PHMAXLEN;
-	skb->mac_header = skb->network_header +
-			  offsetof(struct iphdr, protocol);
-	skb->transport_header = skb->network_header + sizeof(*top_iph);
-
-	xfrm4_beet_make_header(skb);
-
-	ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen);
-
-	top_iph = ip_hdr(skb);
-
-	if (unlikely(optlen)) {
-		BUG_ON(optlen < 0);
-
-		ph->padlen = 4 - (optlen & 4);
-		ph->hdrlen = optlen / 8;
-		ph->nexthdr = top_iph->protocol;
-		if (ph->padlen)
-			memset(ph + 1, IPOPT_NOP, ph->padlen);
-
-		top_iph->protocol = IPPROTO_BEETPH;
-		top_iph->ihl = sizeof(struct iphdr) / 4;
-	}
-
-	top_iph->saddr = x->props.saddr.a4;
-	top_iph->daddr = x->id.daddr.a4;
-
-	return 0;
-}
-
-static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
-{
-	struct iphdr *iph;
-	int optlen = 0;
-	int err = -EINVAL;
-
-	if (unlikely(XFRM_MODE_SKB_CB(skb)->protocol == IPPROTO_BEETPH)) {
-		struct ip_beet_phdr *ph;
-		int phlen;
-
-		if (!pskb_may_pull(skb, sizeof(*ph)))
-			goto out;
-
-		ph = (struct ip_beet_phdr *)skb->data;
-
-		phlen = sizeof(*ph) + ph->padlen;
-		optlen = ph->hdrlen * 8 + (IPV4_BEET_PHMAXLEN - phlen);
-		if (optlen < 0 || optlen & 3 || optlen > 250)
-			goto out;
-
-		XFRM_MODE_SKB_CB(skb)->protocol = ph->nexthdr;
-
-		if (!pskb_may_pull(skb, phlen))
-			goto out;
-		__skb_pull(skb, phlen);
-	}
-
-	skb_push(skb, sizeof(*iph));
-	skb_reset_network_header(skb);
-	skb_mac_header_rebuild(skb);
-
-	xfrm4_beet_make_header(skb);
-
-	iph = ip_hdr(skb);
-
-	iph->ihl += optlen / 4;
-	iph->tot_len = htons(skb->len);
-	iph->daddr = x->sel.daddr.a4;
-	iph->saddr = x->sel.saddr.a4;
-	iph->check = 0;
-	iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
-	err = 0;
-out:
-	return err;
-}
-
-static struct xfrm_mode xfrm4_beet_mode = {
-	.input2 = xfrm4_beet_input,
-	.input = xfrm_prepare_input,
-	.output2 = xfrm4_beet_output,
-	.output = xfrm4_prepare_output,
-	.owner = THIS_MODULE,
-	.encap = XFRM_MODE_BEET,
-	.flags = XFRM_MODE_FLAG_TUNNEL,
-};
-
-static int __init xfrm4_beet_init(void)
-{
-	return xfrm_register_mode(&xfrm4_beet_mode, AF_INET);
-}
-
-static void __exit xfrm4_beet_exit(void)
-{
-	int err;
-
-	err = xfrm_unregister_mode(&xfrm4_beet_mode, AF_INET);
-	BUG_ON(err);
-}
-
-module_init(xfrm4_beet_init);
-module_exit(xfrm4_beet_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_BEET);

diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
deleted file mode 100644
index 1ad2c2c..0000000
--- a/net/ipv4/xfrm4_mode_transport.c
+++ /dev/null

@@ -1,114 +0,0 @@
-/*
- * xfrm4_mode_transport.c - Transport mode encapsulation for IPv4.
- *
- * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/stringify.h>
-#include <net/dst.h>
-#include <net/ip.h>
-#include <net/xfrm.h>
-#include <net/protocol.h>
-
-/* Add encapsulation header.
- *
- * The IP header will be moved forward to make space for the encapsulation
- * header.
- */
-static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
-{
-	struct iphdr *iph = ip_hdr(skb);
-	int ihl = iph->ihl * 4;
-
-	skb_set_inner_transport_header(skb, skb_transport_offset(skb));
-
-	skb_set_network_header(skb, -x->props.header_len);
-	skb->mac_header = skb->network_header +
-			  offsetof(struct iphdr, protocol);
-	skb->transport_header = skb->network_header + ihl;
-	__skb_pull(skb, ihl);
-	memmove(skb_network_header(skb), iph, ihl);
-	return 0;
-}
-
-/* Remove encapsulation header.
- *
- * The IP header will be moved over the top of the encapsulation header.
- *
- * On entry, skb->h shall point to where the IP header should be and skb->nh
- * shall be set to where the IP header currently is.  skb->data shall point
- * to the start of the payload.
- */
-static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
-{
-	int ihl = skb->data - skb_transport_header(skb);
-
-	if (skb->transport_header != skb->network_header) {
-		memmove(skb_transport_header(skb),
-			skb_network_header(skb), ihl);
-		skb->network_header = skb->transport_header;
-	}
-	ip_hdr(skb)->tot_len = htons(skb->len + ihl);
-	skb_reset_transport_header(skb);
-	return 0;
-}
-
-static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x,
-						   struct sk_buff *skb,
-						   netdev_features_t features)
-{
-	const struct net_offload *ops;
-	struct sk_buff *segs = ERR_PTR(-EINVAL);
-	struct xfrm_offload *xo = xfrm_offload(skb);
-
-	skb->transport_header += x->props.header_len;
-	ops = rcu_dereference(inet_offloads[xo->proto]);
-	if (likely(ops && ops->callbacks.gso_segment))
-		segs = ops->callbacks.gso_segment(skb, features);
-
-	return segs;
-}
-
-static void xfrm4_transport_xmit(struct xfrm_state *x, struct sk_buff *skb)
-{
-	struct xfrm_offload *xo = xfrm_offload(skb);
-
-	skb_reset_mac_len(skb);
-	pskb_pull(skb, skb->mac_len + sizeof(struct iphdr) + x->props.header_len);
-
-	if (xo->flags & XFRM_GSO_SEGMENT) {
-		 skb_reset_transport_header(skb);
-		 skb->transport_header -= x->props.header_len;
-	}
-}
-
-static struct xfrm_mode xfrm4_transport_mode = {
-	.input = xfrm4_transport_input,
-	.output = xfrm4_transport_output,
-	.gso_segment = xfrm4_transport_gso_segment,
-	.xmit = xfrm4_transport_xmit,
-	.owner = THIS_MODULE,
-	.encap = XFRM_MODE_TRANSPORT,
-};
-
-static int __init xfrm4_transport_init(void)
-{
-	return xfrm_register_mode(&xfrm4_transport_mode, AF_INET);
-}
-
-static void __exit xfrm4_transport_exit(void)
-{
-	int err;
-
-	err = xfrm_unregister_mode(&xfrm4_transport_mode, AF_INET);
-	BUG_ON(err);
-}
-
-module_init(xfrm4_transport_init);
-module_exit(xfrm4_transport_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_TRANSPORT);

diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
deleted file mode 100644
index 2a9764b..0000000
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ /dev/null

@@ -1,152 +0,0 @@
-/*
- * xfrm4_mode_tunnel.c - Tunnel mode encapsulation for IPv4.
- *
- * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
- */
-
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/stringify.h>
-#include <net/dst.h>
-#include <net/inet_ecn.h>
-#include <net/ip.h>
-#include <net/xfrm.h>
-
-static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
-{
-	struct iphdr *inner_iph = ipip_hdr(skb);
-
-	if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos))
-		IP_ECN_set_ce(inner_iph);
-}
-
-/* Add encapsulation header.
- *
- * The top IP header will be constructed per RFC 2401.
- */
-static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
-{
-	struct dst_entry *dst = skb_dst(skb);
-	struct iphdr *top_iph;
-	int flags;
-
-	skb_set_inner_network_header(skb, skb_network_offset(skb));
-	skb_set_inner_transport_header(skb, skb_transport_offset(skb));
-
-	skb_set_network_header(skb, -x->props.header_len);
-	skb->mac_header = skb->network_header +
-			  offsetof(struct iphdr, protocol);
-	skb->transport_header = skb->network_header + sizeof(*top_iph);
-	top_iph = ip_hdr(skb);
-
-	top_iph->ihl = 5;
-	top_iph->version = 4;
-
-	top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family);
-
-	/* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */
-	if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
-		top_iph->tos = 0;
-	else
-		top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
-	top_iph->tos = INET_ECN_encapsulate(top_iph->tos,
-					    XFRM_MODE_SKB_CB(skb)->tos);
-
-	flags = x->props.flags;
-	if (flags & XFRM_STATE_NOECN)
-		IP_ECN_clear(top_iph);
-
-	top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
-		0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
-
-	top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst));
-
-	top_iph->saddr = x->props.saddr.a4;
-	top_iph->daddr = x->id.daddr.a4;
-	ip_select_ident(dev_net(dst->dev), skb, NULL);
-
-	return 0;
-}
-
-static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
-{
-	int err = -EINVAL;
-
-	if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP)
-		goto out;
-
-	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
-		goto out;
-
-	err = skb_unclone(skb, GFP_ATOMIC);
-	if (err)
-		goto out;
-
-	if (x->props.flags & XFRM_STATE_DECAP_DSCP)
-		ipv4_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, ipip_hdr(skb));
-	if (!(x->props.flags & XFRM_STATE_NOECN))
-		ipip_ecn_decapsulate(skb);
-
-	skb_reset_network_header(skb);
-	skb_mac_header_rebuild(skb);
-	if (skb->mac_len)
-		eth_hdr(skb)->h_proto = skb->protocol;
-
-	err = 0;
-
-out:
-	return err;
-}
-
-static struct sk_buff *xfrm4_mode_tunnel_gso_segment(struct xfrm_state *x,
-						     struct sk_buff *skb,
-						     netdev_features_t features)
-{
-	__skb_push(skb, skb->mac_len);
-	return skb_mac_gso_segment(skb, features);
-}
-
-static void xfrm4_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb)
-{
-	struct xfrm_offload *xo = xfrm_offload(skb);
-
-	if (xo->flags & XFRM_GSO_SEGMENT)
-		skb->transport_header = skb->network_header +
-					sizeof(struct iphdr);
-
-	skb_reset_mac_len(skb);
-	pskb_pull(skb, skb->mac_len + x->props.header_len);
-}
-
-static struct xfrm_mode xfrm4_tunnel_mode = {
-	.input2 = xfrm4_mode_tunnel_input,
-	.input = xfrm_prepare_input,
-	.output2 = xfrm4_mode_tunnel_output,
-	.output = xfrm4_prepare_output,
-	.gso_segment = xfrm4_mode_tunnel_gso_segment,
-	.xmit = xfrm4_mode_tunnel_xmit,
-	.owner = THIS_MODULE,
-	.encap = XFRM_MODE_TUNNEL,
-	.flags = XFRM_MODE_FLAG_TUNNEL,
-};
-
-static int __init xfrm4_mode_tunnel_init(void)
-{
-	return xfrm_register_mode(&xfrm4_tunnel_mode, AF_INET);
-}
-
-static void __exit xfrm4_mode_tunnel_exit(void)
-{
-	int err;
-
-	err = xfrm_unregister_mode(&xfrm4_tunnel_mode, AF_INET);
-	BUG_ON(err);
-}
-
-module_init(xfrm4_mode_tunnel_init);
-module_exit(xfrm4_mode_tunnel_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_TUNNEL);

diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index be980c1..ecff3fc 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * xfrm4_output.c - Common IPsec encapsulation code for IPv4.
  * Copyright (c) 2004 Herbert Xu <herbert@gondor.apana.org.au>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/if_ether.h>
@@ -58,21 +54,6 @@
 	return xfrm4_extract_header(skb);
 }
 
-int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
-{
-	int err;
-
-	err = xfrm_inner_extract_output(x, skb);
-	if (err)
-		return err;
-
-	IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
-	skb->protocol = htons(ETH_P_IP);
-
-	return x->outer_mode->output2(x, skb);
-}
-EXPORT_SYMBOL(xfrm4_prepare_output);
-
 int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb)
 {
 	memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
@@ -87,6 +68,8 @@
 static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct xfrm_state *x = skb_dst(skb)->xfrm;
+	const struct xfrm_state_afinfo *afinfo;
+	int ret = -EAFNOSUPPORT;
 
 #ifdef CONFIG_NETFILTER
 	if (!x) {
@@ -95,7 +78,15 @@
 	}
 #endif
 
-	return x->outer_mode->afinfo->output_finish(sk, skb);
+	rcu_read_lock();
+	afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family);
+	if (likely(afinfo))
+		ret = afinfo->output_finish(sk, skb);
+	else
+		kfree_skb(skb);
+	rcu_read_unlock();
+
+	return ret;
 }
 
 int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index d73a6d6..35b84b5 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c

@@ -12,7 +12,6 @@
 #include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/inetdevice.h>
-#include <linux/if_tunnel.h>
 #include <net/dst.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
@@ -69,17 +68,6 @@
 	return 0;
 }
 
-static int xfrm4_get_tos(const struct flowi *fl)
-{
-	return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos; /* Strip ECN bits */
-}
-
-static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
-			   int nfheader_len)
-{
-	return 0;
-}
-
 static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 			  const struct flowi *fl)
 {
@@ -97,8 +85,12 @@
 	xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST |
 					      RTCF_LOCAL);
 	xdst->u.rt.rt_type = rt->rt_type;
-	xdst->u.rt.rt_gateway = rt->rt_gateway;
 	xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
+	xdst->u.rt.rt_gw_family = rt->rt_gw_family;
+	if (rt->rt_gw_family == AF_INET)
+		xdst->u.rt.rt_gw4 = rt->rt_gw4;
+	else if (rt->rt_gw_family == AF_INET6)
+		xdst->u.rt.rt_gw6 = rt->rt_gw6;
 	xdst->u.rt.rt_pmtu = rt->rt_pmtu;
 	xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked;
 	INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
@@ -107,118 +99,6 @@
 	return 0;
 }
 
-static void
-_decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
-{
-	const struct iphdr *iph = ip_hdr(skb);
-	u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
-	struct flowi4 *fl4 = &fl->u.ip4;
-	int oif = 0;
-
-	if (skb_dst(skb))
-		oif = skb_dst(skb)->dev->ifindex;
-
-	memset(fl4, 0, sizeof(struct flowi4));
-	fl4->flowi4_mark = skb->mark;
-	fl4->flowi4_oif = reverse ? skb->skb_iif : oif;
-
-	if (!ip_is_fragment(iph)) {
-		switch (iph->protocol) {
-		case IPPROTO_UDP:
-		case IPPROTO_UDPLITE:
-		case IPPROTO_TCP:
-		case IPPROTO_SCTP:
-		case IPPROTO_DCCP:
-			if (xprth + 4 < skb->data ||
-			    pskb_may_pull(skb, xprth + 4 - skb->data)) {
-				__be16 *ports;
-
-				xprth = skb_network_header(skb) + iph->ihl * 4;
-				ports = (__be16 *)xprth;
-
-				fl4->fl4_sport = ports[!!reverse];
-				fl4->fl4_dport = ports[!reverse];
-			}
-			break;
-
-		case IPPROTO_ICMP:
-			if (xprth + 2 < skb->data ||
-			    pskb_may_pull(skb, xprth + 2 - skb->data)) {
-				u8 *icmp;
-
-				xprth = skb_network_header(skb) + iph->ihl * 4;
-				icmp = xprth;
-
-				fl4->fl4_icmp_type = icmp[0];
-				fl4->fl4_icmp_code = icmp[1];
-			}
-			break;
-
-		case IPPROTO_ESP:
-			if (xprth + 4 < skb->data ||
-			    pskb_may_pull(skb, xprth + 4 - skb->data)) {
-				__be32 *ehdr;
-
-				xprth = skb_network_header(skb) + iph->ihl * 4;
-				ehdr = (__be32 *)xprth;
-
-				fl4->fl4_ipsec_spi = ehdr[0];
-			}
-			break;
-
-		case IPPROTO_AH:
-			if (xprth + 8 < skb->data ||
-			    pskb_may_pull(skb, xprth + 8 - skb->data)) {
-				__be32 *ah_hdr;
-
-				xprth = skb_network_header(skb) + iph->ihl * 4;
-				ah_hdr = (__be32 *)xprth;
-
-				fl4->fl4_ipsec_spi = ah_hdr[1];
-			}
-			break;
-
-		case IPPROTO_COMP:
-			if (xprth + 4 < skb->data ||
-			    pskb_may_pull(skb, xprth + 4 - skb->data)) {
-				__be16 *ipcomp_hdr;
-
-				xprth = skb_network_header(skb) + iph->ihl * 4;
-				ipcomp_hdr = (__be16 *)xprth;
-
-				fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
-			}
-			break;
-
-		case IPPROTO_GRE:
-			if (xprth + 12 < skb->data ||
-			    pskb_may_pull(skb, xprth + 12 - skb->data)) {
-				__be16 *greflags;
-				__be32 *gre_hdr;
-
-				xprth = skb_network_header(skb) + iph->ihl * 4;
-				greflags = (__be16 *)xprth;
-				gre_hdr = (__be32 *)xprth;
-
-				if (greflags[0] & GRE_KEY) {
-					if (greflags[0] & GRE_CSUM)
-						gre_hdr++;
-					fl4->fl4_gre_key = gre_hdr[1];
-				}
-			}
-			break;
-
-		default:
-			fl4->fl4_ipsec_spi = 0;
-			break;
-		}
-	}
-	fl4->flowi4_proto = iph->protocol;
-	fl4->daddr = reverse ? iph->saddr : iph->daddr;
-	fl4->saddr = reverse ? iph->daddr : iph->saddr;
-	fl4->flowi4_tos = iph->tos;
-}
-
 static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
 			      struct sk_buff *skb, u32 mtu)
 {
@@ -271,9 +151,6 @@
 	.dst_ops =		&xfrm4_dst_ops_template,
 	.dst_lookup =		xfrm4_dst_lookup,
 	.get_saddr =		xfrm4_get_saddr,
-	.decode_session =	_decode_session4,
-	.get_tos =		xfrm4_get_tos,
-	.init_path =		xfrm4_init_path,
 	.fill_dst =		xfrm4_fill_dst,
 	.blackhole_route =	ipv4_blackhole_route,
 };

diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c
index 8dd0e6a..8a42857 100644
--- a/net/ipv4/xfrm4_protocol.c
+++ b/net/ipv4/xfrm4_protocol.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* xfrm4_protocol.c - Generic xfrm protocol multiplexer.
  *
  * Copyright (C) 2013 secunet Security Networks AG
@@ -7,11 +8,6 @@
  *
  * Based on:
  * net/ipv4/tunnel4.c
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/init.h>
@@ -46,7 +42,7 @@
 	     handler != NULL;				\
 	     handler = rcu_dereference(handler->next))	\
 
-int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
+static int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
 {
 	int ret;
 	struct xfrm4_protocol *handler;
@@ -61,7 +57,6 @@
 
 	return 0;
 }
-EXPORT_SYMBOL(xfrm4_rcv_cb);
 
 int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
 		    int encap_type)
@@ -106,13 +101,15 @@
 	return 0;
 }
 
-static void xfrm4_esp_err(struct sk_buff *skb, u32 info)
+static int xfrm4_esp_err(struct sk_buff *skb, u32 info)
 {
 	struct xfrm4_protocol *handler;
 
 	for_each_protocol_rcu(esp4_handlers, handler)
 		if (!handler->err_handler(skb, info))
-			break;
+			return 0;
+
+	return -ENOENT;
 }
 
 static int xfrm4_ah_rcv(struct sk_buff *skb)
@@ -132,13 +129,15 @@
 	return 0;
 }
 
-static void xfrm4_ah_err(struct sk_buff *skb, u32 info)
+static int xfrm4_ah_err(struct sk_buff *skb, u32 info)
 {
 	struct xfrm4_protocol *handler;
 
 	for_each_protocol_rcu(ah4_handlers, handler)
 		if (!handler->err_handler(skb, info))
-			break;
+			return 0;
+
+	return -ENOENT;
 }
 
 static int xfrm4_ipcomp_rcv(struct sk_buff *skb)
@@ -158,13 +157,15 @@
 	return 0;
 }
 
-static void xfrm4_ipcomp_err(struct sk_buff *skb, u32 info)
+static int xfrm4_ipcomp_err(struct sk_buff *skb, u32 info)
 {
 	struct xfrm4_protocol *handler;
 
 	for_each_protocol_rcu(ipcomp4_handlers, handler)
 		if (!handler->err_handler(skb, info))
-			break;
+			return 0;
+
+	return -ENOENT;
 }
 
 static const struct net_protocol esp4_protocol = {

diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 80c40b4..f8ed3c3 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c

@@ -15,46 +15,6 @@
 #include <linux/netfilter_ipv4.h>
 #include <linux/export.h>
 
-static int xfrm4_init_flags(struct xfrm_state *x)
-{
-	if (xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)
-		x->props.flags |= XFRM_STATE_NOPMTUDISC;
-	return 0;
-}
-
-static void
-__xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
-{
-	const struct flowi4 *fl4 = &fl->u.ip4;
-
-	sel->daddr.a4 = fl4->daddr;
-	sel->saddr.a4 = fl4->saddr;
-	sel->dport = xfrm_flowi_dport(fl, &fl4->uli);
-	sel->dport_mask = htons(0xffff);
-	sel->sport = xfrm_flowi_sport(fl, &fl4->uli);
-	sel->sport_mask = htons(0xffff);
-	sel->family = AF_INET;
-	sel->prefixlen_d = 32;
-	sel->prefixlen_s = 32;
-	sel->proto = fl4->flowi4_proto;
-	sel->ifindex = fl4->flowi4_oif;
-}
-
-static void
-xfrm4_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl,
-		   const xfrm_address_t *daddr, const xfrm_address_t *saddr)
-{
-	x->id = tmpl->id;
-	if (x->id.daddr.a4 == 0)
-		x->id.daddr.a4 = daddr->a4;
-	x->props.saddr = tmpl->saddr;
-	if (x->props.saddr.a4 == 0)
-		x->props.saddr.a4 = saddr->a4;
-	x->props.mode = tmpl->mode;
-	x->props.reqid = tmpl->reqid;
-	x->props.family = AF_INET;
-}
-
 int xfrm4_extract_header(struct sk_buff *skb)
 {
 	const struct iphdr *iph = ip_hdr(skb);
@@ -74,11 +34,6 @@
 static struct xfrm_state_afinfo xfrm4_state_afinfo = {
 	.family			= AF_INET,
 	.proto			= IPPROTO_IPIP,
-	.eth_proto		= htons(ETH_P_IP),
-	.owner			= THIS_MODULE,
-	.init_flags		= xfrm4_init_flags,
-	.init_tempsel		= __xfrm4_init_tempsel,
-	.init_temprop		= xfrm4_init_temprop,
 	.output			= xfrm4_output,
 	.output_finish		= xfrm4_output_finish,
 	.extract_input		= xfrm4_extract_input,

diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 06347db..dc19aff 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* xfrm4_tunnel.c: Generic IP tunnel transformer.
  *
  * Copyright (C) 2003 David S. Miller (davem@redhat.com)
@@ -107,8 +108,7 @@
 	if (xfrm4_tunnel_deregister(&xfrm_tunnel_handler, AF_INET))
 		pr_info("%s: can't remove xfrm handler for AF_INET\n",
 			__func__);
-	if (xfrm_unregister_type(&ipip_type, AF_INET) < 0)
-		pr_info("%s: can't remove xfrm type\n", __func__);
+	xfrm_unregister_type(&ipip_type, AF_INET);
 }
 
 module_init(ipip_init);

diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 613282c..ae1344e 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # IPv6 configuration
 #
@@ -135,44 +136,11 @@
 	tristate
 	default n
 
-config INET6_XFRM_MODE_TRANSPORT
-	tristate "IPv6: IPsec transport mode"
-	default IPV6
-	select XFRM
-	---help---
-	  Support for IPsec transport mode.
-
-	  If unsure, say Y.
-
-config INET6_XFRM_MODE_TUNNEL
-	tristate "IPv6: IPsec tunnel mode"
-	default IPV6
-	select XFRM
-	---help---
-	  Support for IPsec tunnel mode.
-
-	  If unsure, say Y.
-
-config INET6_XFRM_MODE_BEET
-	tristate "IPv6: IPsec BEET mode"
-	default IPV6
-	select XFRM
-	---help---
-	  Support for IPsec BEET mode.
-
-	  If unsure, say Y.
-
-config INET6_XFRM_MODE_ROUTEOPTIMIZATION
-	tristate "IPv6: MIPv6 route optimization mode"
-	select XFRM
-	---help---
-	  Support for MIPv6 route optimization mode.
-
 config IPV6_VTI
 tristate "Virtual (secure) IPv6: tunneling"
 	select IPV6_TUNNEL
 	select NET_IP_TUNNEL
-	depends on INET6_XFRM_MODE_TUNNEL
+	select XFRM
 	---help---
 	Tunneling means encapsulating data of one protocol type within
 	another protocol and sending it over a channel that understands the

diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index e0026fa..8ccf355 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile

@@ -35,10 +35,6 @@
 obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o
 obj-$(CONFIG_INET6_XFRM_TUNNEL) += xfrm6_tunnel.o
 obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o
-obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o
-obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
-obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
-obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
 obj-$(CONFIG_IPV6_MIP6) += mip6.o
 obj-$(CONFIG_IPV6_ILA) += ila/
 obj-$(CONFIG_NETFILTER)	+= netfilter/

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4e81ff2..34ccef1 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	IPv6 Address [auto]configuration
  *	Linux INET6 implementation
@@ -5,11 +6,6 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 /*
@@ -173,13 +169,14 @@
 static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 						  int plen,
 						  const struct net_device *dev,
-						  u32 flags, u32 noflags);
+						  u32 flags, u32 noflags,
+						  bool no_gw);
 
 static void addrconf_dad_start(struct inet6_ifaddr *ifp);
 static void addrconf_dad_work(struct work_struct *w);
 static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
 				   bool send_na);
-static void addrconf_dad_run(struct inet6_dev *idev);
+static void addrconf_dad_run(struct inet6_dev *idev, bool restart);
 static void addrconf_rs_timer(struct timer_list *t);
 static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
 static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
@@ -481,7 +478,7 @@
 	if (!idev) {
 		idev = ipv6_add_dev(dev);
 		if (IS_ERR(idev))
-			return NULL;
+			return idev;
 	}
 
 	if (dev->flags&IFF_UP)
@@ -597,6 +594,45 @@
 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
 };
 
+static int inet6_netconf_valid_get_req(struct sk_buff *skb,
+				       const struct nlmsghdr *nlh,
+				       struct nlattr **tb,
+				       struct netlink_ext_ack *extack)
+{
+	int i, err;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid header for netconf get request");
+		return -EINVAL;
+	}
+
+	if (!netlink_strict_get_check(skb))
+		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
+					      tb, NETCONFA_MAX,
+					      devconf_ipv6_policy, extack);
+
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
+					    tb, NETCONFA_MAX,
+					    devconf_ipv6_policy, extack);
+	if (err)
+		return err;
+
+	for (i = 0; i <= NETCONFA_MAX; i++) {
+		if (!tb[i])
+			continue;
+
+		switch (i) {
+		case NETCONFA_IFINDEX:
+			break;
+		default:
+			NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in netconf get request");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
 				     struct nlmsghdr *nlh,
 				     struct netlink_ext_ack *extack)
@@ -605,14 +641,12 @@
 	struct nlattr *tb[NETCONFA_MAX+1];
 	struct inet6_dev *in6_dev = NULL;
 	struct net_device *dev = NULL;
-	struct netconfmsg *ncm;
 	struct sk_buff *skb;
 	struct ipv6_devconf *devconf;
 	int ifindex;
 	int err;
 
-	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
-			  devconf_ipv6_policy, extack);
+	err = inet6_netconf_valid_get_req(in_skb, nlh, tb, extack);
 	if (err < 0)
 		return err;
 
@@ -666,6 +700,7 @@
 static int inet6_netconf_dump_devconf(struct sk_buff *skb,
 				      struct netlink_callback *cb)
 {
+	const struct nlmsghdr *nlh = cb->nlh;
 	struct net *net = sock_net(skb->sk);
 	int h, s_h;
 	int idx, s_idx;
@@ -673,6 +708,21 @@
 	struct inet6_dev *idev;
 	struct hlist_head *head;
 
+	if (cb->strict_check) {
+		struct netlink_ext_ack *extack = cb->extack;
+		struct netconfmsg *ncm;
+
+		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
+			NL_SET_ERR_MSG_MOD(extack, "Invalid header for netconf dump request");
+			return -EINVAL;
+		}
+
+		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
+			NL_SET_ERR_MSG_MOD(extack, "Invalid data after header in netconf dump request");
+			return -EINVAL;
+		}
+	}
+
 	s_h = cb->args[0];
 	s_idx = idx = cb->args[1];
 
@@ -692,7 +742,7 @@
 			if (inet6_netconf_fill_devconf(skb, dev->ifindex,
 						       &idev->cnf,
 						       NETLINK_CB(cb->skb).portid,
-						       cb->nlh->nlmsg_seq,
+						       nlh->nlmsg_seq,
 						       RTM_NEWNETCONF,
 						       NLM_F_MULTI,
 						       NETCONFA_ALL) < 0) {
@@ -709,7 +759,7 @@
 		if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
 					       net->ipv6.devconf_all,
 					       NETLINK_CB(cb->skb).portid,
-					       cb->nlh->nlmsg_seq,
+					       nlh->nlmsg_seq,
 					       RTM_NEWNETCONF, NLM_F_MULTI,
 					       NETCONFA_ALL) < 0)
 			goto done;
@@ -720,7 +770,7 @@
 		if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
 					       net->ipv6.devconf_dflt,
 					       NETLINK_CB(cb->skb).portid,
-					       cb->nlh->nlmsg_seq,
+					       nlh->nlmsg_seq,
 					       RTM_NEWNETCONF, NLM_F_MULTI,
 					       NETCONFA_ALL) < 0)
 			goto done;
@@ -995,8 +1045,10 @@
 	int err = 0;
 
 	if (addr_type == IPV6_ADDR_ANY ||
-	    addr_type & IPV6_ADDR_MULTICAST ||
+	    (addr_type & IPV6_ADDR_MULTICAST &&
+	     !(cfg->ifa_flags & IFA_F_MCAUTOJOIN)) ||
 	    (!(idev->dev->flags & IFF_LOOPBACK) &&
+	     !netif_is_l3_master(idev->dev) &&
 	     addr_type & IPV6_ADDR_LOOPBACK))
 		return ERR_PTR(-EADDRNOTAVAIL);
 
@@ -1148,7 +1200,8 @@
 	list_for_each_entry(ifa, &idev->addr_list, if_list) {
 		if (ifa == ifp)
 			continue;
-		if (!ipv6_prefix_equal(&ifa->addr, &ifp->addr,
+		if (ifa->prefix_len != ifp->prefix_len ||
+		    !ipv6_prefix_equal(&ifa->addr, &ifp->addr,
 				       ifp->prefix_len))
 			continue;
 		if (ifa->flags & (IFA_F_PERMANENT | IFA_F_NOPREFIXROUTE))
@@ -1177,10 +1230,8 @@
 {
 	struct fib6_info *f6i;
 
-	f6i = addrconf_get_prefix_route(&ifp->addr,
-				       ifp->prefix_len,
-				       ifp->idev->dev,
-				       0, RTF_GATEWAY | RTF_DEFAULT);
+	f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len,
+					ifp->idev->dev, 0, RTF_DEFAULT, true);
 	if (f6i) {
 		if (del_rt)
 			ip6_del_rt(dev_net(ifp->idev->dev), f6i);
@@ -2349,7 +2400,8 @@
 static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 						  int plen,
 						  const struct net_device *dev,
-						  u32 flags, u32 noflags)
+						  u32 flags, u32 noflags,
+						  bool no_gw)
 {
 	struct fib6_node *fn;
 	struct fib6_info *rt = NULL;
@@ -2366,7 +2418,13 @@
 		goto out;
 
 	for_each_fib6_node_rt_rcu(fn) {
-		if (rt->fib6_nh.nh_dev->ifindex != dev->ifindex)
+		/* prefix routes only use builtin fib6_nh */
+		if (rt->nh)
+			continue;
+
+		if (rt->fib6_nh->fib_nh_dev->ifindex != dev->ifindex)
+			continue;
+		if (no_gw && rt->fib6_nh->fib_nh_gw_family)
 			continue;
 		if ((rt->fib6_flags & flags) != flags)
 			continue;
@@ -2408,8 +2466,8 @@
 	ASSERT_RTNL();
 
 	idev = ipv6_find_idev(dev);
-	if (!idev)
-		return ERR_PTR(-ENOBUFS);
+	if (IS_ERR(idev))
+		return idev;
 
 	if (idev->cnf.disable_ipv6)
 		return ERR_PTR(-EACCES);
@@ -2664,7 +2722,7 @@
 					       pinfo->prefix_len,
 					       dev,
 					       RTF_ADDRCONF | RTF_PREFIX_RT,
-					       RTF_GATEWAY | RTF_DEFAULT);
+					       RTF_DEFAULT, true);
 
 		if (rt) {
 			/* Autoconf prefix route */
@@ -2803,7 +2861,7 @@
 			dev = __dev_get_by_name(net, p.name);
 			if (!dev)
 				goto err_exit;
-			err = dev_open(dev);
+			err = dev_open(dev, NULL);
 		}
 	}
 #endif
@@ -3070,11 +3128,9 @@
 		struct in_device *in_dev = __in_dev_get_rtnl(dev);
 		if (in_dev && (dev->flags & IFF_UP)) {
 			struct in_ifaddr *ifa;
-
 			int flag = scope;
 
-			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
-
+			in_dev_for_each_ifa_rtnl(ifa, in_dev) {
 				addr.s6_addr32[3] = ifa->ifa_local;
 
 				if (ifa->ifa_scope == RT_SCOPE_LINK)
@@ -3103,7 +3159,7 @@
 	ASSERT_RTNL();
 
 	idev = ipv6_find_idev(dev);
-	if (!idev) {
+	if (IS_ERR(idev)) {
 		pr_debug("%s: add_dev failed\n", __func__);
 		return;
 	}
@@ -3318,7 +3374,7 @@
 	 */
 
 	idev = ipv6_find_idev(dev);
-	if (!idev) {
+	if (IS_ERR(idev)) {
 		pr_debug("%s: add_dev failed\n", __func__);
 		return;
 	}
@@ -3343,7 +3399,7 @@
 	ASSERT_RTNL();
 
 	idev = ipv6_find_idev(dev);
-	if (!idev) {
+	if (IS_ERR(idev)) {
 		pr_debug("%s: add_dev failed\n", __func__);
 		return;
 	}
@@ -3422,6 +3478,7 @@
 			   void *ptr)
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct netdev_notifier_change_info *change_info;
 	struct netdev_notifier_changeupper_info *info;
 	struct inet6_dev *idev = __in6_dev_get(dev);
 	struct net *net = dev_net(dev);
@@ -3477,8 +3534,8 @@
 
 			if (!addrconf_link_ready(dev)) {
 				/* device is not ready yet. */
-				pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n",
-					dev->name);
+				pr_debug("ADDRCONF(NETDEV_UP): %s: link is not ready\n",
+					 dev->name);
 				break;
 			}
 
@@ -3496,7 +3553,7 @@
 				break;
 			}
 
-			if (idev) {
+			if (!IS_ERR_OR_NULL(idev)) {
 				if (idev->if_flags & IF_READY) {
 					/* device is already configured -
 					 * but resend MLD reports, we might
@@ -3504,6 +3561,9 @@
 					 * multicast snooping switches
 					 */
 					ipv6_mc_up(idev);
+					change_info = ptr;
+					if (change_info->flags_changed & IFF_NOARP)
+						addrconf_dad_run(idev, true);
 					rt6_sync_up(dev, RTNH_F_LINKDOWN);
 					break;
 				}
@@ -3538,7 +3598,7 @@
 
 		if (!IS_ERR_OR_NULL(idev)) {
 			if (run_pending)
-				addrconf_dad_run(idev);
+				addrconf_dad_run(idev, false);
 
 			/* Device has an address by now */
 			rt6_sync_up(dev, RTNH_F_DEAD);
@@ -4156,16 +4216,19 @@
 		addrconf_verify_rtnl();
 }
 
-static void addrconf_dad_run(struct inet6_dev *idev)
+static void addrconf_dad_run(struct inet6_dev *idev, bool restart)
 {
 	struct inet6_ifaddr *ifp;
 
 	read_lock_bh(&idev->lock);
 	list_for_each_entry(ifp, &idev->addr_list, if_list) {
 		spin_lock(&ifp->lock);
-		if (ifp->flags & IFA_F_TENTATIVE &&
-		    ifp->state == INET6_IFADDR_STATE_DAD)
+		if ((ifp->flags & IFA_F_TENTATIVE &&
+		     ifp->state == INET6_IFADDR_STATE_DAD) || restart) {
+			if (restart)
+				ifp->state = INET6_IFADDR_STATE_PREDAD;
 			addrconf_dad_kick(ifp);
+		}
 		spin_unlock(&ifp->lock);
 	}
 	read_unlock_bh(&idev->lock);
@@ -4489,6 +4552,7 @@
 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
 	[IFA_FLAGS]		= { .len = sizeof(u32) },
 	[IFA_RT_PRIORITY]	= { .len = sizeof(u32) },
+	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
 };
 
 static int
@@ -4502,8 +4566,8 @@
 	u32 ifa_flags;
 	int err;
 
-	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy,
-			  extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
+				     ifa_ipv6_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -4527,10 +4591,8 @@
 	struct fib6_info *f6i;
 	u32 prio;
 
-	f6i = addrconf_get_prefix_route(&ifp->addr,
-					ifp->prefix_len,
-					ifp->idev->dev,
-					0, RTF_GATEWAY | RTF_DEFAULT);
+	f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len,
+					ifp->idev->dev, 0, RTF_DEFAULT, true);
 	if (!f6i)
 		return -ENOENT;
 
@@ -4668,8 +4730,8 @@
 	struct ifa6_config cfg;
 	int err;
 
-	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy,
-			  extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
+				     ifa_ipv6_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -4791,19 +4853,40 @@
 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */;
 }
 
+enum addr_type_t {
+	UNICAST_ADDR,
+	MULTICAST_ADDR,
+	ANYCAST_ADDR,
+};
+
+struct inet6_fill_args {
+	u32 portid;
+	u32 seq;
+	int event;
+	unsigned int flags;
+	int netnsid;
+	int ifindex;
+	enum addr_type_t type;
+};
+
 static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
-			     u32 portid, u32 seq, int event, unsigned int flags)
+			     struct inet6_fill_args *args)
 {
 	struct nlmsghdr  *nlh;
 	u32 preferred, valid;
 
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
+	nlh = nlmsg_put(skb, args->portid, args->seq, args->event,
+			sizeof(struct ifaddrmsg), args->flags);
 	if (!nlh)
 		return -EMSGSIZE;
 
 	put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope),
 		      ifa->idev->dev->ifindex);
 
+	if (args->netnsid >= 0 &&
+	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
+		goto error;
+
 	if (!((ifa->flags&IFA_F_PERMANENT) &&
 	      (ifa->prefered_lft == INFINITY_LIFE_TIME))) {
 		preferred = ifa->prefered_lft;
@@ -4853,7 +4936,7 @@
 }
 
 static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
-				u32 portid, u32 seq, int event, u16 flags)
+			       struct inet6_fill_args *args)
 {
 	struct nlmsghdr  *nlh;
 	u8 scope = RT_SCOPE_UNIVERSE;
@@ -4862,10 +4945,15 @@
 	if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
 		scope = RT_SCOPE_SITE;
 
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
+	nlh = nlmsg_put(skb, args->portid, args->seq, args->event,
+			sizeof(struct ifaddrmsg), args->flags);
 	if (!nlh)
 		return -EMSGSIZE;
 
+	if (args->netnsid >= 0 &&
+	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
+		return -EMSGSIZE;
+
 	put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
 	if (nla_put_in6_addr(skb, IFA_MULTICAST, &ifmca->mca_addr) < 0 ||
 	    put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp,
@@ -4879,7 +4967,7 @@
 }
 
 static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
-				u32 portid, u32 seq, int event, unsigned int flags)
+			       struct inet6_fill_args *args)
 {
 	struct net_device *dev = fib6_info_nh_dev(ifaca->aca_rt);
 	int ifindex = dev ? dev->ifindex : 1;
@@ -4889,10 +4977,15 @@
 	if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
 		scope = RT_SCOPE_SITE;
 
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
+	nlh = nlmsg_put(skb, args->portid, args->seq, args->event,
+			sizeof(struct ifaddrmsg), args->flags);
 	if (!nlh)
 		return -EMSGSIZE;
 
+	if (args->netnsid >= 0 &&
+	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
+		return -EMSGSIZE;
+
 	put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
 	if (nla_put_in6_addr(skb, IFA_ANYCAST, &ifaca->aca_addr) < 0 ||
 	    put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp,
@@ -4905,36 +4998,27 @@
 	return 0;
 }
 
-enum addr_type_t {
-	UNICAST_ADDR,
-	MULTICAST_ADDR,
-	ANYCAST_ADDR,
-};
-
 /* called with rcu_read_lock() */
 static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
-			  struct netlink_callback *cb, enum addr_type_t type,
-			  int s_ip_idx, int *p_ip_idx)
+			  struct netlink_callback *cb, int s_ip_idx,
+			  struct inet6_fill_args *fillargs)
 {
 	struct ifmcaddr6 *ifmca;
 	struct ifacaddr6 *ifaca;
+	int ip_idx = 0;
 	int err = 1;
-	int ip_idx = *p_ip_idx;
 
 	read_lock_bh(&idev->lock);
-	switch (type) {
+	switch (fillargs->type) {
 	case UNICAST_ADDR: {
 		struct inet6_ifaddr *ifa;
+		fillargs->event = RTM_NEWADDR;
 
 		/* unicast address incl. temp addr */
 		list_for_each_entry(ifa, &idev->addr_list, if_list) {
 			if (ip_idx < s_ip_idx)
 				goto next;
-			err = inet6_fill_ifaddr(skb, ifa,
-						NETLINK_CB(cb->skb).portid,
-						cb->nlh->nlmsg_seq,
-						RTM_NEWADDR,
-						NLM_F_MULTI);
+			err = inet6_fill_ifaddr(skb, ifa, fillargs);
 			if (err < 0)
 				break;
 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
@@ -4944,31 +5028,26 @@
 		break;
 	}
 	case MULTICAST_ADDR:
+		fillargs->event = RTM_GETMULTICAST;
+
 		/* multicast address */
 		for (ifmca = idev->mc_list; ifmca;
 		     ifmca = ifmca->next, ip_idx++) {
 			if (ip_idx < s_ip_idx)
 				continue;
-			err = inet6_fill_ifmcaddr(skb, ifmca,
-						  NETLINK_CB(cb->skb).portid,
-						  cb->nlh->nlmsg_seq,
-						  RTM_GETMULTICAST,
-						  NLM_F_MULTI);
+			err = inet6_fill_ifmcaddr(skb, ifmca, fillargs);
 			if (err < 0)
 				break;
 		}
 		break;
 	case ANYCAST_ADDR:
+		fillargs->event = RTM_GETANYCAST;
 		/* anycast address */
 		for (ifaca = idev->ac_list; ifaca;
 		     ifaca = ifaca->aca_next, ip_idx++) {
 			if (ip_idx < s_ip_idx)
 				continue;
-			err = inet6_fill_ifacaddr(skb, ifaca,
-						  NETLINK_CB(cb->skb).portid,
-						  cb->nlh->nlmsg_seq,
-						  RTM_GETANYCAST,
-						  NLM_F_MULTI);
+			err = inet6_fill_ifacaddr(skb, ifaca, fillargs);
 			if (err < 0)
 				break;
 		}
@@ -4977,42 +5056,130 @@
 		break;
 	}
 	read_unlock_bh(&idev->lock);
-	*p_ip_idx = ip_idx;
+	cb->args[2] = ip_idx;
 	return err;
 }
 
+static int inet6_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
+				       struct inet6_fill_args *fillargs,
+				       struct net **tgt_net, struct sock *sk,
+				       struct netlink_callback *cb)
+{
+	struct netlink_ext_ack *extack = cb->extack;
+	struct nlattr *tb[IFA_MAX+1];
+	struct ifaddrmsg *ifm;
+	int err, i;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid header for address dump request");
+		return -EINVAL;
+	}
+
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for address dump request");
+		return -EINVAL;
+	}
+
+	fillargs->ifindex = ifm->ifa_index;
+	if (fillargs->ifindex) {
+		cb->answer_flags |= NLM_F_DUMP_FILTERED;
+		fillargs->flags |= NLM_F_DUMP_FILTERED;
+	}
+
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
+					    ifa_ipv6_policy, extack);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i <= IFA_MAX; ++i) {
+		if (!tb[i])
+			continue;
+
+		if (i == IFA_TARGET_NETNSID) {
+			struct net *net;
+
+			fillargs->netnsid = nla_get_s32(tb[i]);
+			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
+			if (IS_ERR(net)) {
+				fillargs->netnsid = -1;
+				NL_SET_ERR_MSG_MOD(extack, "Invalid target network namespace id");
+				return PTR_ERR(net);
+			}
+			*tgt_net = net;
+		} else {
+			NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in dump request");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 			   enum addr_type_t type)
 {
+	const struct nlmsghdr *nlh = cb->nlh;
+	struct inet6_fill_args fillargs = {
+		.portid = NETLINK_CB(cb->skb).portid,
+		.seq = cb->nlh->nlmsg_seq,
+		.flags = NLM_F_MULTI,
+		.netnsid = -1,
+		.type = type,
+	};
 	struct net *net = sock_net(skb->sk);
+	struct net *tgt_net = net;
+	int idx, s_idx, s_ip_idx;
 	int h, s_h;
-	int idx, ip_idx;
-	int s_idx, s_ip_idx;
 	struct net_device *dev;
 	struct inet6_dev *idev;
 	struct hlist_head *head;
+	int err = 0;
 
 	s_h = cb->args[0];
 	s_idx = idx = cb->args[1];
-	s_ip_idx = ip_idx = cb->args[2];
+	s_ip_idx = cb->args[2];
+
+	if (cb->strict_check) {
+		err = inet6_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
+						  skb->sk, cb);
+		if (err < 0)
+			goto put_tgt_net;
+
+		err = 0;
+		if (fillargs.ifindex) {
+			dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
+			if (!dev) {
+				err = -ENODEV;
+				goto put_tgt_net;
+			}
+			idev = __in6_dev_get(dev);
+			if (idev) {
+				err = in6_dump_addrs(idev, skb, cb, s_ip_idx,
+						     &fillargs);
+				if (err > 0)
+					err = 0;
+			}
+			goto put_tgt_net;
+		}
+	}
 
 	rcu_read_lock();
-	cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ net->dev_base_seq;
+	cb->seq = atomic_read(&tgt_net->ipv6.dev_addr_genid) ^ tgt_net->dev_base_seq;
 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
 		idx = 0;
-		head = &net->dev_index_head[h];
+		head = &tgt_net->dev_index_head[h];
 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
 			if (idx < s_idx)
 				goto cont;
 			if (h > s_h || idx > s_idx)
 				s_ip_idx = 0;
-			ip_idx = 0;
 			idev = __in6_dev_get(dev);
 			if (!idev)
 				goto cont;
 
-			if (in6_dump_addrs(idev, skb, cb, type,
-					   s_ip_idx, &ip_idx) < 0)
+			if (in6_dump_addrs(idev, skb, cb, s_ip_idx,
+					   &fillargs) < 0)
 				goto done;
 cont:
 			idx++;
@@ -5022,9 +5189,11 @@
 	rcu_read_unlock();
 	cb->args[0] = h;
 	cb->args[1] = idx;
-	cb->args[2] = ip_idx;
+put_tgt_net:
+	if (fillargs.netnsid >= 0)
+		put_net(tgt_net);
 
-	return skb->len;
+	return skb->len ? : err;
 }
 
 static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
@@ -5049,10 +5218,64 @@
 	return inet6_dump_addr(skb, cb, type);
 }
 
+static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb,
+				       const struct nlmsghdr *nlh,
+				       struct nlattr **tb,
+				       struct netlink_ext_ack *extack)
+{
+	struct ifaddrmsg *ifm;
+	int i, err;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid header for get address request");
+		return -EINVAL;
+	}
+
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get address request");
+		return -EINVAL;
+	}
+
+	if (!netlink_strict_get_check(skb))
+		return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
+					      ifa_ipv6_policy, extack);
+
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
+					    ifa_ipv6_policy, extack);
+	if (err)
+		return err;
+
+	for (i = 0; i <= IFA_MAX; i++) {
+		if (!tb[i])
+			continue;
+
+		switch (i) {
+		case IFA_TARGET_NETNSID:
+		case IFA_ADDRESS:
+		case IFA_LOCAL:
+			break;
+		default:
+			NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get address request");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 			     struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(in_skb->sk);
+	struct inet6_fill_args fillargs = {
+		.portid = NETLINK_CB(in_skb).portid,
+		.seq = nlh->nlmsg_seq,
+		.event = RTM_NEWADDR,
+		.flags = 0,
+		.netnsid = -1,
+	};
+	struct net *tgt_net = net;
 	struct ifaddrmsg *ifm;
 	struct nlattr *tb[IFA_MAX+1];
 	struct in6_addr *addr = NULL, *peer;
@@ -5061,20 +5284,28 @@
 	struct sk_buff *skb;
 	int err;
 
-	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy,
-			  extack);
+	err = inet6_rtm_valid_getaddr_req(in_skb, nlh, tb, extack);
 	if (err < 0)
 		return err;
 
+	if (tb[IFA_TARGET_NETNSID]) {
+		fillargs.netnsid = nla_get_s32(tb[IFA_TARGET_NETNSID]);
+
+		tgt_net = rtnl_get_net_ns_capable(NETLINK_CB(in_skb).sk,
+						  fillargs.netnsid);
+		if (IS_ERR(tgt_net))
+			return PTR_ERR(tgt_net);
+	}
+
 	addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
 	if (!addr)
 		return -EINVAL;
 
 	ifm = nlmsg_data(nlh);
 	if (ifm->ifa_index)
-		dev = dev_get_by_index(net, ifm->ifa_index);
+		dev = dev_get_by_index(tgt_net, ifm->ifa_index);
 
-	ifa = ipv6_get_ifaddr(net, addr, dev, 1);
+	ifa = ipv6_get_ifaddr(tgt_net, addr, dev, 1);
 	if (!ifa) {
 		err = -EADDRNOTAVAIL;
 		goto errout;
@@ -5086,20 +5317,22 @@
 		goto errout_ifa;
 	}
 
-	err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).portid,
-				nlh->nlmsg_seq, RTM_NEWADDR, 0);
+	err = inet6_fill_ifaddr(skb, ifa, &fillargs);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
 		WARN_ON(err == -EMSGSIZE);
 		kfree_skb(skb);
 		goto errout_ifa;
 	}
-	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+	err = rtnl_unicast(skb, tgt_net, NETLINK_CB(in_skb).portid);
 errout_ifa:
 	in6_ifa_put(ifa);
 errout:
 	if (dev)
 		dev_put(dev);
+	if (fillargs.netnsid >= 0)
+		put_net(tgt_net);
+
 	return err;
 }
 
@@ -5107,13 +5340,20 @@
 {
 	struct sk_buff *skb;
 	struct net *net = dev_net(ifa->idev->dev);
+	struct inet6_fill_args fillargs = {
+		.portid = 0,
+		.seq = 0,
+		.event = event,
+		.flags = 0,
+		.netnsid = -1,
+	};
 	int err = -ENOBUFS;
 
 	skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC);
 	if (!skb)
 		goto errout;
 
-	err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0);
+	err = inet6_fill_ifaddr(skb, ifa, &fillargs);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
 		WARN_ON(err == -EMSGSIZE);
@@ -5420,18 +5660,6 @@
 	[IFLA_INET6_TOKEN]		= { .len = sizeof(struct in6_addr) },
 };
 
-static int inet6_validate_link_af(const struct net_device *dev,
-				  const struct nlattr *nla)
-{
-	struct nlattr *tb[IFLA_INET6_MAX + 1];
-
-	if (dev && !__in6_dev_get(dev))
-		return -EAFNOSUPPORT;
-
-	return nla_parse_nested(tb, IFLA_INET6_MAX, nla, inet6_af_policy,
-				NULL);
-}
-
 static int check_addr_gen_mode(int mode)
 {
 	if (mode != IN6_ADDR_GEN_MODE_EUI64 &&
@@ -5452,16 +5680,46 @@
 	return 1;
 }
 
+static int inet6_validate_link_af(const struct net_device *dev,
+				  const struct nlattr *nla)
+{
+	struct nlattr *tb[IFLA_INET6_MAX + 1];
+	struct inet6_dev *idev = NULL;
+	int err;
+
+	if (dev) {
+		idev = __in6_dev_get(dev);
+		if (!idev)
+			return -EAFNOSUPPORT;
+	}
+
+	err = nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla,
+					  inet6_af_policy, NULL);
+	if (err)
+		return err;
+
+	if (!tb[IFLA_INET6_TOKEN] && !tb[IFLA_INET6_ADDR_GEN_MODE])
+		return -EINVAL;
+
+	if (tb[IFLA_INET6_ADDR_GEN_MODE]) {
+		u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]);
+
+		if (check_addr_gen_mode(mode) < 0)
+			return -EINVAL;
+		if (dev && check_stable_privacy(idev, dev_net(dev), mode) < 0)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
 {
-	int err = -EINVAL;
 	struct inet6_dev *idev = __in6_dev_get(dev);
 	struct nlattr *tb[IFLA_INET6_MAX + 1];
+	int err;
 
-	if (!idev)
-		return -EAFNOSUPPORT;
-
-	if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0)
+	if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0)
 		BUG();
 
 	if (tb[IFLA_INET6_TOKEN]) {
@@ -5473,15 +5731,10 @@
 	if (tb[IFLA_INET6_ADDR_GEN_MODE]) {
 		u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]);
 
-		if (check_addr_gen_mode(mode) < 0 ||
-		    check_stable_privacy(idev, dev_net(dev), mode) < 0)
-			return -EINVAL;
-
 		idev->cnf.addr_gen_mode = mode;
-		err = 0;
 	}
 
-	return err;
+	return 0;
 }
 
 static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
@@ -5513,7 +5766,7 @@
 	    nla_put_u8(skb, IFLA_OPERSTATE,
 		       netif_running(dev) ? dev->operstate : IF_OPER_DOWN))
 		goto nla_put_failure;
-	protoinfo = nla_nest_start(skb, IFLA_PROTINFO);
+	protoinfo = nla_nest_start_noflag(skb, IFLA_PROTINFO);
 	if (!protoinfo)
 		goto nla_put_failure;
 
@@ -5529,6 +5782,31 @@
 	return -EMSGSIZE;
 }
 
+static int inet6_valid_dump_ifinfo(const struct nlmsghdr *nlh,
+				   struct netlink_ext_ack *extack)
+{
+	struct ifinfomsg *ifm;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid header for link dump request");
+		return -EINVAL;
+	}
+
+	if (nlmsg_attrlen(nlh, sizeof(*ifm))) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid data after header");
+		return -EINVAL;
+	}
+
+	ifm = nlmsg_data(nlh);
+	if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags ||
+	    ifm->ifi_change || ifm->ifi_index) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for dump request");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
@@ -5538,6 +5816,16 @@
 	struct inet6_dev *idev;
 	struct hlist_head *head;
 
+	/* only requests using strict checking can pass data to
+	 * influence the dump
+	 */
+	if (cb->strict_check) {
+		int err = inet6_valid_dump_ifinfo(cb->nlh, cb->extack);
+
+		if (err < 0)
+			return err;
+	}
+
 	s_h = cb->args[0];
 	s_idx = cb->args[1];
 
@@ -5676,13 +5964,20 @@
 	switch (event) {
 	case RTM_NEWADDR:
 		/*
-		 * If the address was optimistic
-		 * we inserted the route at the start of
-		 * our DAD process, so we don't need
-		 * to do it again
+		 * If the address was optimistic we inserted the route at the
+		 * start of our DAD process, so we don't need to do it again.
+		 * If the device was taken down in the middle of the DAD
+		 * cycle there is a race where we could get here without a
+		 * host route, so nothing to insert. That will be fixed when
+		 * the device is brought up.
 		 */
-		if (!rcu_access_pointer(ifp->rt->fib6_node))
+		if (ifp->rt && !rcu_access_pointer(ifp->rt->fib6_node)) {
 			ip6_ins_rt(net, ifp->rt);
+		} else if (!ifp->rt && (ifp->idev->dev->flags & IFF_UP)) {
+			pr_warn("BUG: Address %pI6c on device %s is missing its host route.\n",
+				&ifp->addr, ifp->idev->dev->name);
+		}
+
 		if (ifp->idev->cnf.forwarding)
 			addrconf_join_anycast(ifp);
 		if (!ipv6_addr_any(&ifp->peer_addr))
@@ -5698,7 +5993,8 @@
 			struct fib6_info *rt;
 
 			rt = addrconf_get_prefix_route(&ifp->peer_addr, 128,
-						       ifp->idev->dev, 0, 0);
+						       ifp->idev->dev, 0, 0,
+						       false);
 			if (rt)
 				ip6_del_rt(net, rt);
 		}
@@ -6064,16 +6360,17 @@
 	list_for_each_entry(ifa, &idev->addr_list, if_list) {
 		spin_lock(&ifa->lock);
 		if (ifa->rt) {
-			struct fib6_info *rt = ifa->rt;
+			/* host routes only use builtin fib6_nh */
+			struct fib6_nh *nh = ifa->rt->fib6_nh;
 			int cpu;
 
 			rcu_read_lock();
 			ifa->rt->dst_nopolicy = val ? true : false;
-			if (rt->rt6i_pcpu) {
+			if (nh->rt6i_pcpu) {
 				for_each_possible_cpu(cpu) {
 					struct rt6_info **rtp;
 
-					rtp = per_cpu_ptr(rt->rt6i_pcpu, cpu);
+					rtp = per_cpu_ptr(nh->rt6i_pcpu, cpu);
 					addrconf_set_nopolicy(*rtp, val);
 				}
 			}
@@ -6143,8 +6440,6 @@
 }
 
 static int minus_one = -1;
-static const int zero = 0;
-static const int one = 1;
 static const int two_five_five = 255;
 
 static const struct ctl_table addrconf_sysctl[] = {
@@ -6161,7 +6456,7 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= (void *)&one,
+		.extra1		= (void *)SYSCTL_ONE,
 		.extra2		= (void *)&two_five_five,
 	},
 	{
@@ -6520,7 +6815,7 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= (void *)&zero,
+		.extra1		= (void *)SYSCTL_ZERO,
 		.extra2		= (void *)&two_five_five,
 	},
 	{
@@ -6631,6 +6926,12 @@
 	if (!dflt)
 		goto err_alloc_dflt;
 
+	if (IS_ENABLED(CONFIG_SYSCTL) &&
+	    sysctl_devconf_inherit_init_net == 1 && !net_eq(net, &init_net)) {
+		memcpy(all, init_net.ipv6.devconf_all, sizeof(ipv6_devconf));
+		memcpy(dflt, init_net.ipv6.devconf_dflt, sizeof(ipv6_devconf_dflt));
+	}
+
 	/* these will be inherited by all namespaces */
 	dflt->autoconf = ipv6_defaults.autoconf;
 	dflt->disable_ipv6 = ipv6_defaults.disable_ipv6;

diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 5cd0029..2fc0792 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * IPv6 library code, needed by static components when full IPv6 support is
  * not configured or static.
@@ -5,6 +6,7 @@
 
 #include <linux/export.h>
 #include <net/ipv6.h>
+#include <net/ipv6_stubs.h>
 #include <net/addrconf.h>
 #include <net/ip.h>
 
@@ -134,47 +136,69 @@
 	return -EAFNOSUPPORT;
 }
 
+static int eafnosupport_ipv6_route_input(struct sk_buff *skb)
+{
+	return -EAFNOSUPPORT;
+}
+
 static struct fib6_table *eafnosupport_fib6_get_table(struct net *net, u32 id)
 {
 	return NULL;
 }
 
-static struct fib6_info *
+static int
 eafnosupport_fib6_table_lookup(struct net *net, struct fib6_table *table,
-			       int oif, struct flowi6 *fl6, int flags)
+			       int oif, struct flowi6 *fl6,
+			       struct fib6_result *res, int flags)
 {
-	return NULL;
+	return -EAFNOSUPPORT;
 }
 
-static struct fib6_info *
+static int
 eafnosupport_fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
-			 int flags)
+			 struct fib6_result *res, int flags)
 {
-	return NULL;
+	return -EAFNOSUPPORT;
 }
 
-static struct fib6_info *
-eafnosupport_fib6_multipath_select(const struct net *net, struct fib6_info *f6i,
-				   struct flowi6 *fl6, int oif,
-				   const struct sk_buff *skb, int strict)
+static void
+eafnosupport_fib6_select_path(const struct net *net, struct fib6_result *res,
+			      struct flowi6 *fl6, int oif, bool have_oif_match,
+			      const struct sk_buff *skb, int strict)
 {
-	return f6i;
 }
 
 static u32
-eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
-			       struct in6_addr *saddr)
+eafnosupport_ip6_mtu_from_fib6(const struct fib6_result *res,
+			       const struct in6_addr *daddr,
+			       const struct in6_addr *saddr)
 {
 	return 0;
 }
 
+static int eafnosupport_fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
+				     struct fib6_config *cfg, gfp_t gfp_flags,
+				     struct netlink_ext_ack *extack)
+{
+	NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel");
+	return -EAFNOSUPPORT;
+}
+
+static int eafnosupport_ip6_del_rt(struct net *net, struct fib6_info *rt)
+{
+	return -EAFNOSUPPORT;
+}
+
 const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
 	.ipv6_dst_lookup   = eafnosupport_ipv6_dst_lookup,
+	.ipv6_route_input  = eafnosupport_ipv6_route_input,
 	.fib6_get_table    = eafnosupport_fib6_get_table,
 	.fib6_table_lookup = eafnosupport_fib6_table_lookup,
 	.fib6_lookup       = eafnosupport_fib6_lookup,
-	.fib6_multipath_select = eafnosupport_fib6_multipath_select,
+	.fib6_select_path  = eafnosupport_fib6_select_path,
 	.ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6,
+	.fib6_nh_init	   = eafnosupport_fib6_nh_init,
+	.ip6_del_rt	   = eafnosupport_ip6_del_rt,
 };
 EXPORT_SYMBOL_GPL(ipv6_stub);
 

diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 1d6ced3..642fc6a 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c

@@ -383,8 +383,8 @@
 	u32 label;
 	int err = 0;
 
-	err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy,
-			  extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*ifal), tb, IFAL_MAX,
+				     ifal_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -458,20 +458,52 @@
 	return 0;
 }
 
+static int ip6addrlbl_valid_dump_req(const struct nlmsghdr *nlh,
+				     struct netlink_ext_ack *extack)
+{
+	struct ifaddrlblmsg *ifal;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifal))) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid header for address label dump request");
+		return -EINVAL;
+	}
+
+	ifal = nlmsg_data(nlh);
+	if (ifal->__ifal_reserved || ifal->ifal_prefixlen ||
+	    ifal->ifal_flags || ifal->ifal_index || ifal->ifal_seq) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for address label dump request");
+		return -EINVAL;
+	}
+
+	if (nlmsg_attrlen(nlh, sizeof(*ifal))) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump request");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	const struct nlmsghdr *nlh = cb->nlh;
 	struct net *net = sock_net(skb->sk);
 	struct ip6addrlbl_entry *p;
 	int idx = 0, s_idx = cb->args[0];
 	int err;
 
+	if (cb->strict_check) {
+		err = ip6addrlbl_valid_dump_req(nlh, cb->extack);
+		if (err < 0)
+			return err;
+	}
+
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
 		if (idx >= s_idx) {
 			err = ip6addrlbl_fill(skb, p,
 					      net->ipv6.ip6addrlbl_table.seq,
 					      NETLINK_CB(cb->skb).portid,
-					      cb->nlh->nlmsg_seq,
+					      nlh->nlmsg_seq,
 					      RTM_NEWADDRLABEL,
 					      NLM_F_MULTI);
 			if (err < 0)
@@ -491,6 +523,50 @@
 		+ nla_total_size(4);	/* IFAL_LABEL */
 }
 
+static int ip6addrlbl_valid_get_req(struct sk_buff *skb,
+				    const struct nlmsghdr *nlh,
+				    struct nlattr **tb,
+				    struct netlink_ext_ack *extack)
+{
+	struct ifaddrlblmsg *ifal;
+	int i, err;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifal))) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid header for addrlabel get request");
+		return -EINVAL;
+	}
+
+	if (!netlink_strict_get_check(skb))
+		return nlmsg_parse_deprecated(nlh, sizeof(*ifal), tb,
+					      IFAL_MAX, ifal_policy, extack);
+
+	ifal = nlmsg_data(nlh);
+	if (ifal->__ifal_reserved || ifal->ifal_flags || ifal->ifal_seq) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for addrlabel get request");
+		return -EINVAL;
+	}
+
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifal), tb, IFAL_MAX,
+					    ifal_policy, extack);
+	if (err)
+		return err;
+
+	for (i = 0; i <= IFAL_MAX; i++) {
+		if (!tb[i])
+			continue;
+
+		switch (i) {
+		case IFAL_ADDRESS:
+			break;
+		default:
+			NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in addrlabel get request");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 			  struct netlink_ext_ack *extack)
 {
@@ -503,8 +579,7 @@
 	struct ip6addrlbl_entry *p;
 	struct sk_buff *skb;
 
-	err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy,
-			  extack);
+	err = ip6addrlbl_valid_get_req(in_skb, nlh, tb, extack);
 	if (err < 0)
 		return err;
 

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 9a4261e..ef37e05 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	PF_INET6 socket protocol family
  *	Linux INET6 implementation
@@ -11,11 +12,6 @@
  *	piggy, Karl Knutson	:	Socket protocol table
  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
  *	Arnaldo Melo		:	check proc_net_create return, cleanups
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) "IPv6: " fmt
@@ -56,6 +52,7 @@
 #include <net/transp_v6.h>
 #include <net/ip6_route.h>
 #include <net/addrconf.h>
+#include <net/ipv6_stubs.h>
 #include <net/ndisc.h>
 #ifdef CONFIG_IPV6_TUNNEL
 #include <net/ip6_tunnel.h>
@@ -209,8 +206,9 @@
 	np->hop_limit	= -1;
 	np->mcast_hops	= IPV6_DEFAULT_MCASTHOPS;
 	np->mc_loop	= 1;
+	np->mc_all	= 1;
 	np->pmtudisc	= IPV6_PMTUDISC_WANT;
-	np->repflow	= net->ipv6.sysctl.flowlabel_reflect;
+	np->repflow	= net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED;
 	sk->sk_ipv6only	= net->ipv6.sysctl.bindv6only;
 
 	/* Init the ipv4 part of the socket since we can have sockets
@@ -309,6 +307,7 @@
 
 	/* Check if the address belongs to the host. */
 	if (addr_type == IPV6_ADDR_MAPPED) {
+		struct net_device *dev = NULL;
 		int chk_addr_ret;
 
 		/* Binding to v4-mapped address on a v6-only socket
@@ -319,9 +318,20 @@
 			goto out;
 		}
 
+		rcu_read_lock();
+		if (sk->sk_bound_dev_if) {
+			dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
+			if (!dev) {
+				err = -ENODEV;
+				goto out_unlock;
+			}
+		}
+
 		/* Reproduce AF_INET checks to make the bindings consistent */
 		v4addr = addr->sin6_addr.s6_addr32[3];
-		chk_addr_ret = inet_addr_type(net, v4addr);
+		chk_addr_ret = inet_addr_type_dev_table(net, dev, v4addr);
+		rcu_read_unlock();
+
 		if (!inet_can_nonlocal_bind(net, inet) &&
 		    v4addr != htonl(INADDR_ANY) &&
 		    chk_addr_ret != RTN_LOCAL &&
@@ -349,6 +359,9 @@
 					err = -EINVAL;
 					goto out_unlock;
 				}
+			}
+
+			if (sk->sk_bound_dev_if) {
 				dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
 				if (!dev) {
 					err = -ENODEV;
@@ -467,12 +480,10 @@
 	/* Release rx options */
 
 	skb = xchg(&np->pktoptions, NULL);
-	if (skb)
-		kfree_skb(skb);
+	kfree_skb(skb);
 
 	skb = xchg(&np->rxpmtu, NULL);
-	if (skb)
-		kfree_skb(skb);
+	kfree_skb(skb);
 
 	/* Free flowlabels */
 	fl6_free_socklist(sk);
@@ -532,12 +543,6 @@
 	struct net *net = sock_net(sk);
 
 	switch (cmd) {
-	case SIOCGSTAMP:
-		return sock_get_timestamp(sk, (struct timeval __user *)arg);
-
-	case SIOCGSTAMPNS:
-		return sock_get_timestampns(sk, (struct timespec __user *)arg);
-
 	case SIOCADDRT:
 	case SIOCDELRT:
 
@@ -559,6 +564,39 @@
 }
 EXPORT_SYMBOL(inet6_ioctl);
 
+INDIRECT_CALLABLE_DECLARE(int udpv6_sendmsg(struct sock *, struct msghdr *,
+					    size_t));
+int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+{
+	struct sock *sk = sock->sk;
+
+	if (unlikely(inet_send_prepare(sk)))
+		return -EAGAIN;
+
+	return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udpv6_sendmsg,
+			       sk, msg, size);
+}
+
+INDIRECT_CALLABLE_DECLARE(int udpv6_recvmsg(struct sock *, struct msghdr *,
+					    size_t, int, int, int *));
+int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+		  int flags)
+{
+	struct sock *sk = sock->sk;
+	int addr_len = 0;
+	int err;
+
+	if (likely(!(flags & MSG_ERRQUEUE)))
+		sock_rps_record_flow(sk);
+
+	err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udpv6_recvmsg,
+			      sk, msg, size, flags & MSG_DONTWAIT,
+			      flags & ~MSG_DONTWAIT, &addr_len);
+	if (err >= 0)
+		msg->msg_namelen = addr_len;
+	return err;
+}
+
 const struct proto_ops inet6_stream_ops = {
 	.family		   = PF_INET6,
 	.owner		   = THIS_MODULE,
@@ -570,12 +608,13 @@
 	.getname	   = inet6_getname,
 	.poll		   = tcp_poll,			/* ok		*/
 	.ioctl		   = inet6_ioctl,		/* must change  */
+	.gettstamp	   = sock_gettstamp,
 	.listen		   = inet_listen,		/* ok		*/
 	.shutdown	   = inet_shutdown,		/* ok		*/
 	.setsockopt	   = sock_common_setsockopt,	/* ok		*/
 	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
-	.sendmsg	   = inet_sendmsg,		/* ok		*/
-	.recvmsg	   = inet_recvmsg,		/* ok		*/
+	.sendmsg	   = inet6_sendmsg,		/* retpoline's sake */
+	.recvmsg	   = inet6_recvmsg,		/* retpoline's sake */
 #ifdef CONFIG_MMU
 	.mmap		   = tcp_mmap,
 #endif
@@ -603,12 +642,13 @@
 	.getname	   = inet6_getname,
 	.poll		   = udp_poll,			/* ok		*/
 	.ioctl		   = inet6_ioctl,		/* must change  */
+	.gettstamp	   = sock_gettstamp,
 	.listen		   = sock_no_listen,		/* ok		*/
 	.shutdown	   = inet_shutdown,		/* ok		*/
 	.setsockopt	   = sock_common_setsockopt,	/* ok		*/
 	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
-	.sendmsg	   = inet_sendmsg,		/* ok		*/
-	.recvmsg	   = inet_recvmsg,		/* ok		*/
+	.sendmsg	   = inet6_sendmsg,		/* retpoline's sake */
+	.recvmsg	   = inet6_recvmsg,		/* retpoline's sake */
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = sock_no_sendpage,
 	.set_peek_off	   = sk_set_peek_off,
@@ -833,6 +873,17 @@
 	net->ipv6.sysctl.bindv6only = 0;
 	net->ipv6.sysctl.icmpv6_time = 1*HZ;
 	net->ipv6.sysctl.icmpv6_echo_ignore_all = 0;
+	net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0;
+	net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0;
+
+	/* By default, rate limit error messages.
+	 * Except for pmtu discovery, it would break it.
+	 * proc_do_large_bitmap needs pointer to the bitmap.
+	 */
+	bitmap_set(net->ipv6.sysctl.icmpv6_ratemask, 0, ICMPV6_ERRMSG_MAX + 1);
+	bitmap_clear(net->ipv6.sysctl.icmpv6_ratemask, ICMPV6_PKT_TOOBIG, 1);
+	net->ipv6.sysctl.icmpv6_ratemask_ptr = net->ipv6.sysctl.icmpv6_ratemask;
+
 	net->ipv6.sysctl.flowlabel_consistency = 1;
 	net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS;
 	net->ipv6.sysctl.idgen_retries = 3;
@@ -886,15 +937,27 @@
 	.exit = inet6_net_exit,
 };
 
+static int ipv6_route_input(struct sk_buff *skb)
+{
+	ip6_route_input(skb);
+	return skb_dst(skb)->error;
+}
+
 static const struct ipv6_stub ipv6_stub_impl = {
 	.ipv6_sock_mc_join = ipv6_sock_mc_join,
 	.ipv6_sock_mc_drop = ipv6_sock_mc_drop,
 	.ipv6_dst_lookup   = ip6_dst_lookup,
+	.ipv6_route_input  = ipv6_route_input,
 	.fib6_get_table	   = fib6_get_table,
 	.fib6_table_lookup = fib6_table_lookup,
 	.fib6_lookup       = fib6_lookup,
-	.fib6_multipath_select = fib6_multipath_select,
+	.fib6_select_path  = fib6_select_path,
 	.ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
+	.fib6_nh_init	   = fib6_nh_init,
+	.fib6_nh_release   = fib6_nh_release,
+	.fib6_update_sernum = fib6_update_sernum_stub,
+	.fib6_rt_update	   = fib6_rt_update,
+	.ip6_del_rt	   = ip6_del_rt,
 	.udpv6_encap_enable = udpv6_encap_enable,
 	.ndisc_send_na = ndisc_send_na,
 	.nd_tbl	= &nd_tbl,
@@ -902,6 +965,7 @@
 
 static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
 	.inet6_bind = __inet6_bind,
+	.udp6_lib_lookup = __udp6_lib_lookup,
 };
 
 static int __init inet6_init(void)
@@ -1001,6 +1065,9 @@
 	err = ip6_flowlabel_init();
 	if (err)
 		goto ip6_flowlabel_fail;
+	err = ipv6_anycast_init();
+	if (err)
+		goto ipv6_anycast_fail;
 	err = addrconf_init();
 	if (err)
 		goto addrconf_fail;
@@ -1091,6 +1158,8 @@
 ipv6_exthdrs_fail:
 	addrconf_cleanup();
 addrconf_fail:
+	ipv6_anycast_cleanup();
+ipv6_anycast_fail:
 	ip6_flowlabel_cleanup();
 ip6_flowlabel_fail:
 	ndisc_late_cleanup();

diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 78c9743..95835e8 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c

@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C)2002 USAGI/WIDE Project
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
  * Authors
  *
  *	Mitsuru KANDA @USAGI       : IPv6 Support
@@ -476,7 +464,7 @@
 	struct ah_data *ahp = x->data;
 	struct ip_auth_hdr *ah = ip_auth_hdr(skb);
 	int hdr_len = skb_network_header_len(skb);
-	int ah_hlen = (ah->hdrlen + 2) << 2;
+	int ah_hlen = ipv6_authlen(ah);
 
 	if (err)
 		goto out;
@@ -558,7 +546,7 @@
 	ahash = ahp->ahash;
 
 	nexthdr = ah->nexthdr;
-	ah_hlen = (ah->hdrlen + 2) << 2;
+	ah_hlen = ipv6_authlen(ah);
 
 	if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) &&
 	    ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len))
@@ -805,9 +793,7 @@
 	if (xfrm6_protocol_deregister(&ah6_protocol, IPPROTO_AH) < 0)
 		pr_info("%s: can't remove protocol\n", __func__);
 
-	if (xfrm_unregister_type(&ah6_type, AF_INET6) < 0)
-		pr_info("%s: can't remove xfrm type\n", __func__);
-
+	xfrm_unregister_type(&ah6_type, AF_INET6);
 }
 
 module_init(ah6_init);

diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 4e0ff70..fed91ab 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Anycast support for IPv6
  *	Linux INET6 implementation
@@ -6,11 +7,6 @@
  *	David L Stevens (dlstevens@us.ibm.com)
  *
  *	based heavily on net/ipv6/mcast.c
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 #include <linux/capability.h>
@@ -44,8 +40,22 @@
 
 #include <net/checksum.h>
 
+#define IN6_ADDR_HSIZE_SHIFT	8
+#define IN6_ADDR_HSIZE		BIT(IN6_ADDR_HSIZE_SHIFT)
+/*	anycast address hash table
+ */
+static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
+static DEFINE_SPINLOCK(acaddr_hash_lock);
+
 static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
 
+static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+	u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+
+	return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
+}
+
 /*
  *	socket join an anycast group
  */
@@ -204,16 +214,39 @@
 	rtnl_unlock();
 }
 
+static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca)
+{
+	unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr);
+
+	spin_lock(&acaddr_hash_lock);
+	hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]);
+	spin_unlock(&acaddr_hash_lock);
+}
+
+static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca)
+{
+	spin_lock(&acaddr_hash_lock);
+	hlist_del_init_rcu(&aca->aca_addr_lst);
+	spin_unlock(&acaddr_hash_lock);
+}
+
 static void aca_get(struct ifacaddr6 *aca)
 {
 	refcount_inc(&aca->aca_refcnt);
 }
 
+static void aca_free_rcu(struct rcu_head *h)
+{
+	struct ifacaddr6 *aca = container_of(h, struct ifacaddr6, rcu);
+
+	fib6_info_release(aca->aca_rt);
+	kfree(aca);
+}
+
 static void aca_put(struct ifacaddr6 *ac)
 {
 	if (refcount_dec_and_test(&ac->aca_refcnt)) {
-		fib6_info_release(ac->aca_rt);
-		kfree(ac);
+		call_rcu(&ac->rcu, aca_free_rcu);
 	}
 }
 
@@ -229,6 +262,7 @@
 	aca->aca_addr = *addr;
 	fib6_info_hold(f6i);
 	aca->aca_rt = f6i;
+	INIT_HLIST_NODE(&aca->aca_addr_lst);
 	aca->aca_users = 1;
 	/* aca_tstamp should be updated upon changes */
 	aca->aca_cstamp = aca->aca_tstamp = jiffies;
@@ -285,6 +319,8 @@
 	aca_get(aca);
 	write_unlock_bh(&idev->lock);
 
+	ipv6_add_acaddr_hash(net, aca);
+
 	ip6_ins_rt(net, f6i);
 
 	addrconf_join_solict(idev->dev, &aca->aca_addr);
@@ -325,6 +361,7 @@
 	else
 		idev->ac_list = aca->aca_next;
 	write_unlock_bh(&idev->lock);
+	ipv6_del_acaddr_hash(aca);
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
 	ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
@@ -352,6 +389,8 @@
 		idev->ac_list = aca->aca_next;
 		write_unlock_bh(&idev->lock);
 
+		ipv6_del_acaddr_hash(aca);
+
 		addrconf_leave_solict(idev, &aca->aca_addr);
 
 		ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
@@ -390,17 +429,27 @@
 bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 			 const struct in6_addr *addr)
 {
+	struct net_device *nh_dev;
+	struct ifacaddr6 *aca;
 	bool found = false;
 
 	rcu_read_lock();
 	if (dev)
 		found = ipv6_chk_acast_dev(dev, addr);
-	else
-		for_each_netdev_rcu(net, dev)
-			if (ipv6_chk_acast_dev(dev, addr)) {
+	else {
+		unsigned int hash = inet6_acaddr_hash(net, addr);
+
+		hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash],
+					 aca_addr_lst) {
+			nh_dev = fib6_info_nh_dev(aca->aca_rt);
+			if (!nh_dev || !net_eq(dev_net(nh_dev), net))
+				continue;
+			if (ipv6_addr_equal(&aca->aca_addr, addr)) {
 				found = true;
 				break;
 			}
+		}
+	}
 	rcu_read_unlock();
 	return found;
 }
@@ -540,3 +589,24 @@
 	remove_proc_entry("anycast6", net->proc_net);
 }
 #endif
+
+/*	Init / cleanup code
+ */
+int __init ipv6_anycast_init(void)
+{
+	int i;
+
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
+	return 0;
+}
+
+void ipv6_anycast_cleanup(void)
+{
+	int i;
+
+	spin_lock(&acaddr_hash_lock);
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
+	spin_unlock(&acaddr_hash_lock);
+}

diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index 1c0bb9f..221c81f 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * CALIPSO - Common Architecture Label IPv6 Security Option
  *
@@ -6,25 +7,10 @@
  *
  * Authors: Paul Moore <paul.moore@hp.com>
  *          Huw Davies <huw@codeweavers.com>
- *
  */
 
 /* (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
  * (c) Copyright Huw Davies <huw@codeweavers.com>, 2015
- *
- * This program is free software;  you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY;  without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- * the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program;  if not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #include <linux/init.h>

diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 1ede7a1..96f9392 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	common UDP/RAW code
  *	Linux INET6 implementation
  *
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 #include <linux/capability.h>
@@ -31,6 +27,7 @@
 #include <net/ip6_route.h>
 #include <net/tcp_states.h>
 #include <net/dsfield.h>
+#include <net/sock_reuseport.h>
 
 #include <linux/errqueue.h>
 #include <linux/uaccess.h>
@@ -78,7 +75,7 @@
 
 	if (np->sndflow && (np->flow_label & IPV6_FLOWLABEL_MASK)) {
 		flowlabel = fl6_sock_lookup(sk, np->flow_label);
-		if (!flowlabel)
+		if (IS_ERR(flowlabel))
 			return -EINVAL;
 	}
 	ip6_datagram_flow_key_init(&fl6, sk);
@@ -258,6 +255,7 @@
 		goto out;
 	}
 
+	reuseport_has_conns(sk, true);
 	sk->sk_state = TCP_ESTABLISHED;
 	sk_set_txhash(sk);
 out:
@@ -341,6 +339,7 @@
 	skb_reset_network_header(skb);
 	iph = ipv6_hdr(skb);
 	iph->daddr = fl6->daddr;
+	ip6_flow_hdr(iph, 0, 0);
 
 	serr = SKB_EXT_ERR(skb);
 	serr->ee.ee_errno = err;
@@ -700,17 +699,15 @@
 	}
 	if (np->rxopt.bits.rxorigdstaddr) {
 		struct sockaddr_in6 sin6;
-		__be16 *ports;
-		int end;
+		__be16 _ports[2], *ports;
 
-		end = skb_transport_offset(skb) + 4;
-		if (end <= 0 || pskb_may_pull(skb, end)) {
+		ports = skb_header_pointer(skb, skb_transport_offset(skb),
+					   sizeof(_ports), &_ports);
+		if (ports) {
 			/* All current transport protocols have the port numbers in the
 			 * first four bytes of the transport header and this function is
 			 * written with this assumption in mind.
 			 */
-			ports = (__be16 *)skb_transport_header(skb);
-
 			sin6.sin6_family = AF_INET6;
 			sin6.sin6_addr = ipv6_hdr(skb)->daddr;
 			sin6.sin6_port = ports[1];
@@ -772,6 +769,7 @@
 		case IPV6_2292PKTINFO:
 		    {
 			struct net_device *dev = NULL;
+			int src_idx;
 
 			if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) {
 				err = -EINVAL;
@@ -779,12 +777,15 @@
 			}
 
 			src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
+			src_idx = src_info->ipi6_ifindex;
 
-			if (src_info->ipi6_ifindex) {
+			if (src_idx) {
 				if (fl6->flowi6_oif &&
-				    src_info->ipi6_ifindex != fl6->flowi6_oif)
+				    src_idx != fl6->flowi6_oif &&
+				    (sk->sk_bound_dev_if != fl6->flowi6_oif ||
+				     !sk_dev_equal_l3scope(sk, src_idx)))
 					return -EINVAL;
-				fl6->flowi6_oif = src_info->ipi6_ifindex;
+				fl6->flowi6_oif = src_idx;
 			}
 
 			addr_type = __ipv6_addr_type(&src_info->ipi6_addr);
@@ -1031,7 +1032,7 @@
 	src   = &sp->sk_v6_rcv_saddr;
 	seq_printf(seq,
 		   "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
-		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n",
+		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n",
 		   bucket,
 		   src->s6_addr32[0], src->s6_addr32[1],
 		   src->s6_addr32[2], src->s6_addr32[3], srcp,

diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 88a7579..a3b403b 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c

@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C)2002 USAGI/WIDE Project
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
  * Authors
  *
  *	Mitsuru KANDA @USAGI       : IPv6 Support
@@ -53,8 +41,6 @@
 
 #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0]))
 
-static u32 esp6_get_mtu(struct xfrm_state *x, int mtu);
-
 /*
  * Allocate an AEAD request structure with extra space for SG and IV.
  *
@@ -145,10 +131,13 @@
 	void *tmp;
 	struct xfrm_state *x;
 
-	if (xo && (xo->flags & XFRM_DEV_RESUME))
-		x = skb->sp->xvec[skb->sp->len - 1];
-	else
+	if (xo && (xo->flags & XFRM_DEV_RESUME)) {
+		struct sec_path *sp = skb_sec_path(skb);
+
+		x = sp->xvec[sp->len - 1];
+	} else {
 		x = skb_dst(skb)->xfrm;
+	}
 
 	tmp = ESP_SKB_CB(skb)->tmp;
 	esp_ssg_unref(x, tmp);
@@ -293,7 +282,7 @@
 			skb->len += tailen;
 			skb->data_len += tailen;
 			skb->truesize += tailen;
-			if (sk)
+			if (sk && sk_fullsock(sk))
 				refcount_add(tailen, &sk->sk_wmem_alloc);
 
 			goto out;
@@ -456,7 +445,7 @@
 		struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
 		u32 padto;
 
-		padto = min(x->tfcpad, esp6_get_mtu(x, dst->child_mtu_cached));
+		padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached));
 		if (skb->len < padto)
 			esp.tfclen = padto - skb->len;
 	}
@@ -601,12 +590,11 @@
 
 static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct ip_esp_hdr *esph;
 	struct crypto_aead *aead = x->data;
 	struct aead_request *req;
 	struct sk_buff *trailer;
 	int ivlen = crypto_aead_ivsize(aead);
-	int elen = skb->len - sizeof(*esph) - ivlen;
+	int elen = skb->len - sizeof(struct ip_esp_hdr) - ivlen;
 	int nfrags;
 	int assoclen;
 	int seqhilen;
@@ -616,7 +604,7 @@
 	u8 *iv;
 	struct scatterlist *sg;
 
-	if (!pskb_may_pull(skb, sizeof(*esph) + ivlen)) {
+	if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + ivlen)) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -626,7 +614,7 @@
 		goto out;
 	}
 
-	assoclen = sizeof(*esph);
+	assoclen = sizeof(struct ip_esp_hdr);
 	seqhilen = 0;
 
 	if (x->props.flags & XFRM_STATE_ESN) {
@@ -697,21 +685,6 @@
 	return ret;
 }
 
-static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
-{
-	struct crypto_aead *aead = x->data;
-	u32 blksize = ALIGN(crypto_aead_blocksize(aead), 4);
-	unsigned int net_adj;
-
-	if (x->props.mode != XFRM_MODE_TUNNEL)
-		net_adj = sizeof(struct ipv6hdr);
-	else
-		net_adj = 0;
-
-	return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
-		 net_adj) & ~(blksize - 1)) + net_adj - 2;
-}
-
 static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		    u8 type, u8 code, int offset, __be32 info)
 {
@@ -929,7 +902,6 @@
 	.flags		= XFRM_TYPE_REPLAY_PROT,
 	.init_state	= esp6_init_state,
 	.destructor	= esp6_destroy,
-	.get_mtu	= esp6_get_mtu,
 	.input		= esp6_input,
 	.output		= esp6_output,
 	.hdr_offset	= xfrm6_find_1stfragopt,
@@ -961,8 +933,7 @@
 {
 	if (xfrm6_protocol_deregister(&esp6_protocol, IPPROTO_ESP) < 0)
 		pr_info("%s: can't remove protocol\n", __func__);
-	if (xfrm_unregister_type(&esp6_type, AF_INET6) < 0)
-		pr_info("%s: can't remove xfrm type\n", __func__);
+	xfrm_unregister_type(&esp6_type, AF_INET6);
 }
 
 module_init(esp6_init);

diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 6177e21..e31626f 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * IPV6 GSO/GRO offload support
  * Linux INET implementation
@@ -5,10 +6,6 @@
  * Copyright (C) 2016 secunet Security Networks AG
  * Author: Steffen Klassert <steffen.klassert@secunet.com>
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
  * ESP GRO support
  */
 
@@ -68,26 +65,27 @@
 
 	xo = xfrm_offload(skb);
 	if (!xo || !(xo->flags & CRYPTO_DONE)) {
-		err = secpath_set(skb);
-		if (err)
+		struct sec_path *sp = secpath_set(skb);
+
+		if (!sp)
 			goto out;
 
-		if (skb->sp->len == XFRM_MAX_DEPTH)
-			goto out;
+		if (sp->len == XFRM_MAX_DEPTH)
+			goto out_reset;
 
 		x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
 				      (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
 				      spi, IPPROTO_ESP, AF_INET6);
 		if (!x)
-			goto out;
+			goto out_reset;
 
-		skb->sp->xvec[skb->sp->len++] = x;
-		skb->sp->olen++;
+		sp->xvec[sp->len++] = x;
+		sp->olen++;
 
 		xo = xfrm_offload(skb);
 		if (!xo) {
 			xfrm_state_put(x);
-			goto out;
+			goto out_reset;
 		}
 	}
 
@@ -108,6 +106,8 @@
 	xfrm_input(skb, IPPROTO_ESP, spi, -2);
 
 	return ERR_PTR(-EINPROGRESS);
+out_reset:
+	secpath_reset(skb);
 out:
 	skb_push(skb, offset);
 	NAPI_GRO_CB(skb)->same_flow = 0;
@@ -133,6 +133,44 @@
 	xo->proto = proto;
 }
 
+static struct sk_buff *xfrm6_tunnel_gso_segment(struct xfrm_state *x,
+						struct sk_buff *skb,
+						netdev_features_t features)
+{
+	__skb_push(skb, skb->mac_len);
+	return skb_mac_gso_segment(skb, features);
+}
+
+static struct sk_buff *xfrm6_transport_gso_segment(struct xfrm_state *x,
+						   struct sk_buff *skb,
+						   netdev_features_t features)
+{
+	const struct net_offload *ops;
+	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	struct xfrm_offload *xo = xfrm_offload(skb);
+
+	skb->transport_header += x->props.header_len;
+	ops = rcu_dereference(inet6_offloads[xo->proto]);
+	if (likely(ops && ops->callbacks.gso_segment))
+		segs = ops->callbacks.gso_segment(skb, features);
+
+	return segs;
+}
+
+static struct sk_buff *xfrm6_outer_mode_gso_segment(struct xfrm_state *x,
+						    struct sk_buff *skb,
+						    netdev_features_t features)
+{
+	switch (x->outer_mode.encap) {
+	case XFRM_MODE_TUNNEL:
+		return xfrm6_tunnel_gso_segment(x, skb, features);
+	case XFRM_MODE_TRANSPORT:
+		return xfrm6_transport_gso_segment(x, skb, features);
+	}
+
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
 static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
 				        netdev_features_t features)
 {
@@ -141,6 +179,7 @@
 	struct crypto_aead *aead;
 	netdev_features_t esp_features = features;
 	struct xfrm_offload *xo = xfrm_offload(skb);
+	struct sec_path *sp;
 
 	if (!xo)
 		return ERR_PTR(-EINVAL);
@@ -148,7 +187,8 @@
 	if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP))
 		return ERR_PTR(-EINVAL);
 
-	x = skb->sp->xvec[skb->sp->len - 1];
+	sp = skb_sec_path(skb);
+	x = sp->xvec[sp->len - 1];
 	aead = x->data;
 	esph = ip_esp_hdr(skb);
 
@@ -169,7 +209,7 @@
 
 	xo->flags |= XFRM_GSO_SEGMENT;
 
-	return x->outer_mode->gso_segment(x, skb, esp_features);
+	return xfrm6_outer_mode_gso_segment(x, skb, esp_features);
 }
 
 static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb)
@@ -296,9 +336,7 @@
 
 static void __exit esp6_offload_exit(void)
 {
-	if (xfrm_unregister_type_offload(&esp6_type_offload, AF_INET6) < 0)
-		pr_info("%s: can't remove xfrm type offload\n", __func__);
-
+	xfrm_unregister_type_offload(&esp6_type_offload, AF_INET6);
 	inet6_del_offload(&esp6_offload, IPPROTO_ESP);
 }
 

diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 20291c2..ab5add0 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Extension Header handling for IPv6
  *	Linux INET6 implementation
@@ -6,11 +7,6 @@
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *	Andi Kleen		<ak@muc.de>
  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 /* Changes:

diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index ae365df..da46c42 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * IPv6 library code, needed by static components when full IPv6 support is
  * not configured or static.
@@ -196,10 +197,8 @@
 		struct ipv6hdr _ip6, *ip6;
 
 		ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6);
-		if (!ip6 || (ip6->version != 6)) {
-			printk(KERN_ERR "IPv6 header not found\n");
+		if (!ip6 || (ip6->version != 6))
 			return -EBADMSG;
-		}
 		start = *offset + sizeof(struct ipv6hdr);
 		nexthdr = ip6->nexthdr;
 	}
@@ -265,7 +264,7 @@
 		} else if (nexthdr == NEXTHDR_AUTH) {
 			if (flags && (*flags & IP6_FH_F_AUTH) && (target < 0))
 				break;
-			hdrlen = (hp->hdrlen + 2) << 2;
+			hdrlen = ipv6_authlen(hp);
 		} else
 			hdrlen = ipv6_optlen(hp);
 

diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c
index f5e2ba1..06750d6 100644
--- a/net/ipv6/exthdrs_offload.c
+++ b/net/ipv6/exthdrs_offload.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	IPV6 GSO/GRO offload support
  *	Linux INET6 implementation
  *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
  *      IPV6 Extension Header GSO/GRO support
  */
 #include <net/protocol.h>

diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index f590446..f9e8fe3 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * net/ipv6/fib6_rules.c	IPv6 Routing Policy Rules
  *
  * Copyright (C)2003-2006 Helsinki University of Technology
  * Copyright (C)2003-2006 USAGI/WIDE Project
  *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License as
- *	published by the Free Software Foundation, version 2.
- *
  * Authors
  *	Thomas Graf		<tgraf@suug.ch>
  *	Ville Nuorvala		<vnuorval@tcs.hut.fi>
@@ -61,16 +58,16 @@
 }
 
 /* called with rcu lock held; no reference taken on fib6_info */
-struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
-			      int flags)
+int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+		struct fib6_result *res, int flags)
 {
-	struct fib6_info *f6i;
 	int err;
 
 	if (net->ipv6.fib6_has_custom_rules) {
 		struct fib_lookup_arg arg = {
 			.lookup_ptr = fib6_table_lookup,
 			.lookup_data = &oif,
+			.result = res,
 			.flags = FIB_LOOKUP_NOREF,
 		};
 
@@ -78,19 +75,15 @@
 
 		err = fib_rules_lookup(net->ipv6.fib6_rules_ops,
 				       flowi6_to_flowi(fl6), flags, &arg);
-		if (err)
-			return ERR_PTR(err);
-
-		f6i = arg.result ? : net->ipv6.fib6_null_entry;
 	} else {
-		f6i = fib6_table_lookup(net, net->ipv6.fib6_local_tbl,
-					oif, fl6, flags);
-		if (!f6i || f6i == net->ipv6.fib6_null_entry)
-			f6i = fib6_table_lookup(net, net->ipv6.fib6_main_tbl,
-						oif, fl6, flags);
+		err = fib6_table_lookup(net, net->ipv6.fib6_local_tbl, oif,
+					fl6, res, flags);
+		if (err || res->f6i == net->ipv6.fib6_null_entry)
+			err = fib6_table_lookup(net, net->ipv6.fib6_main_tbl,
+						oif, fl6, res, flags);
 	}
 
-	return f6i;
+	return err;
 }
 
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
@@ -98,9 +91,11 @@
 				   int flags, pol_lookup_t lookup)
 {
 	if (net->ipv6.fib6_has_custom_rules) {
+		struct fib6_result res = {};
 		struct fib_lookup_arg arg = {
 			.lookup_ptr = lookup,
 			.lookup_data = skb,
+			.result = &res,
 			.flags = FIB_LOOKUP_NOREF,
 		};
 
@@ -110,22 +105,23 @@
 		fib_rules_lookup(net->ipv6.fib6_rules_ops,
 				 flowi6_to_flowi(fl6), flags, &arg);
 
-		if (arg.result)
-			return arg.result;
+		if (res.rt6)
+			return &res.rt6->dst;
 	} else {
 		struct rt6_info *rt;
 
 		rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, skb, flags);
 		if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN)
 			return &rt->dst;
-		ip6_rt_put(rt);
+		ip6_rt_put_flags(rt, flags);
 		rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
 		if (rt->dst.error != -EAGAIN)
 			return &rt->dst;
-		ip6_rt_put(rt);
+		ip6_rt_put_flags(rt, flags);
 	}
 
-	dst_hold(&net->ipv6.ip6_null_entry->dst);
+	if (!(flags & RT6_LOOKUP_F_DST_NOREF))
+		dst_hold(&net->ipv6.ip6_null_entry->dst);
 	return &net->ipv6.ip6_null_entry->dst;
 }
 
@@ -157,11 +153,11 @@
 static int fib6_rule_action_alt(struct fib_rule *rule, struct flowi *flp,
 				int flags, struct fib_lookup_arg *arg)
 {
+	struct fib6_result *res = arg->result;
 	struct flowi6 *flp6 = &flp->u.ip6;
 	struct net *net = rule->fr_net;
 	struct fib6_table *table;
-	struct fib6_info *f6i;
-	int err = -EAGAIN, *oif;
+	int err, *oif;
 	u32 tb_id;
 
 	switch (rule->action) {
@@ -182,14 +178,12 @@
 		return -EAGAIN;
 
 	oif = (int *)arg->lookup_data;
-	f6i = fib6_table_lookup(net, table, *oif, flp6, flags);
-	if (f6i != net->ipv6.fib6_null_entry) {
+	err = fib6_table_lookup(net, table, *oif, flp6, res, flags);
+	if (!err && res->f6i != net->ipv6.fib6_null_entry)
 		err = fib6_rule_saddr(net, rule, flags, flp6,
-				      fib6_info_nh_dev(f6i));
-
-		if (likely(!err))
-			arg->result = f6i;
-	}
+				      res->nh->fib_nh_dev);
+	else
+		err = -EAGAIN;
 
 	return err;
 }
@@ -197,6 +191,7 @@
 static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 			      int flags, struct fib_lookup_arg *arg)
 {
+	struct fib6_result *res = arg->result;
 	struct flowi6 *flp6 = &flp->u.ip6;
 	struct rt6_info *rt = NULL;
 	struct fib6_table *table;
@@ -243,15 +238,16 @@
 			goto out;
 	}
 again:
-	ip6_rt_put(rt);
+	ip6_rt_put_flags(rt, flags);
 	err = -EAGAIN;
 	rt = NULL;
 	goto out;
 
 discard_pkt:
-	dst_hold(&rt->dst);
+	if (!(flags & RT6_LOOKUP_F_DST_NOREF))
+		dst_hold(&rt->dst);
 out:
-	arg->result = rt;
+	res->rt6 = rt;
 	return err;
 }
 
@@ -266,9 +262,13 @@
 
 static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)
 {
-	struct rt6_info *rt = (struct rt6_info *) arg->result;
+	struct fib6_result *res = arg->result;
+	struct rt6_info *rt = res->rt6;
 	struct net_device *dev = NULL;
 
+	if (!rt)
+		return false;
+
 	if (rt->rt6i_idev)
 		dev = rt->rt6i_idev->dev;
 
@@ -287,7 +287,8 @@
 	return false;
 
 suppress_route:
-	ip6_rt_put(rt);
+	if (!(arg->flags & FIB_LOOKUP_NOREF))
+		ip6_rt_put(rt);
 	return true;
 }
 

diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c
index 6de3c04..091f941 100644
--- a/net/ipv6/fou6.c
+++ b/net/ipv6/fou6.c

@@ -1,9 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/socket.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
 #include <linux/udp.h>
+#include <linux/icmpv6.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <net/fou.h>
@@ -69,14 +71,100 @@
 	return 0;
 }
 
+static int gue6_err_proto_handler(int proto, struct sk_buff *skb,
+				  struct inet6_skb_parm *opt,
+				  u8 type, u8 code, int offset, __be32 info)
+{
+	const struct inet6_protocol *ipprot;
+
+	ipprot = rcu_dereference(inet6_protos[proto]);
+	if (ipprot && ipprot->err_handler) {
+		if (!ipprot->err_handler(skb, opt, type, code, offset, info))
+			return 0;
+	}
+
+	return -ENOENT;
+}
+
+static int gue6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+		    u8 type, u8 code, int offset, __be32 info)
+{
+	int transport_offset = skb_transport_offset(skb);
+	struct guehdr *guehdr;
+	size_t len, optlen;
+	int ret;
+
+	len = sizeof(struct udphdr) + sizeof(struct guehdr);
+	if (!pskb_may_pull(skb, transport_offset + len))
+		return -EINVAL;
+
+	guehdr = (struct guehdr *)&udp_hdr(skb)[1];
+
+	switch (guehdr->version) {
+	case 0: /* Full GUE header present */
+		break;
+	case 1: {
+		/* Direct encasulation of IPv4 or IPv6 */
+		skb_set_transport_header(skb, -(int)sizeof(struct icmp6hdr));
+
+		switch (((struct iphdr *)guehdr)->version) {
+		case 4:
+			ret = gue6_err_proto_handler(IPPROTO_IPIP, skb, opt,
+						     type, code, offset, info);
+			goto out;
+		case 6:
+			ret = gue6_err_proto_handler(IPPROTO_IPV6, skb, opt,
+						     type, code, offset, info);
+			goto out;
+		default:
+			ret = -EOPNOTSUPP;
+			goto out;
+		}
+	}
+	default: /* Undefined version */
+		return -EOPNOTSUPP;
+	}
+
+	if (guehdr->control)
+		return -ENOENT;
+
+	optlen = guehdr->hlen << 2;
+
+	if (!pskb_may_pull(skb, transport_offset + len + optlen))
+		return -EINVAL;
+
+	guehdr = (struct guehdr *)&udp_hdr(skb)[1];
+	if (validate_gue_flags(guehdr, optlen))
+		return -EINVAL;
+
+	/* Handling exceptions for direct UDP encapsulation in GUE would lead to
+	 * recursion. Besides, this kind of encapsulation can't even be
+	 * configured currently. Discard this.
+	 */
+	if (guehdr->proto_ctype == IPPROTO_UDP ||
+	    guehdr->proto_ctype == IPPROTO_UDPLITE)
+		return -EOPNOTSUPP;
+
+	skb_set_transport_header(skb, -(int)sizeof(struct icmp6hdr));
+	ret = gue6_err_proto_handler(guehdr->proto_ctype, skb,
+				     opt, type, code, offset, info);
+
+out:
+	skb_set_transport_header(skb, transport_offset);
+	return ret;
+}
+
+
 static const struct ip6_tnl_encap_ops fou_ip6tun_ops = {
 	.encap_hlen = fou_encap_hlen,
 	.build_header = fou6_build_header,
+	.err_handler = gue6_err,
 };
 
 static const struct ip6_tnl_encap_ops gue_ip6tun_ops = {
 	.encap_hlen = gue_encap_hlen,
 	.build_header = gue6_build_header,
+	.err_handler = gue6_err,
 };
 
 static int ip6_tnl_encap_add_fou_ops(void)

diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index c9c53ad..62c9972 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Internet Control Message Protocol (ICMPv6)
  *	Linux INET6 implementation
@@ -8,11 +9,6 @@
  *	Based on net/ipv4/icmp.c
  *
  *	RFC 1885
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 /*
@@ -79,12 +75,12 @@
  *
  *	On SMP we have one ICMP socket per-cpu.
  */
-static inline struct sock *icmpv6_sk(struct net *net)
+static struct sock *icmpv6_sk(struct net *net)
 {
-	return net->ipv6.icmp_sk[smp_processor_id()];
+	return this_cpu_read(*net->ipv6.icmp_sk);
 }
 
-static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		       u8 type, u8 code, int offset, __be32 info)
 {
 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
@@ -100,6 +96,8 @@
 	if (!(type & ICMPV6_INFOMSG_MASK))
 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
 			ping_err(skb, offset, ntohl(info));
+
+	return 0;
 }
 
 static int icmpv6_rcv(struct sk_buff *skb);
@@ -166,22 +164,21 @@
 	return false;
 }
 
-static bool icmpv6_mask_allow(int type)
+static bool icmpv6_mask_allow(struct net *net, int type)
 {
-	/* Informational messages are not limited. */
-	if (type & ICMPV6_INFOMSG_MASK)
+	if (type > ICMPV6_MSG_MAX)
 		return true;
 
-	/* Do not limit pmtu discovery, it would break it. */
-	if (type == ICMPV6_PKT_TOOBIG)
+	/* Limit if icmp type is set in ratemask. */
+	if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
 		return true;
 
 	return false;
 }
 
-static bool icmpv6_global_allow(int type)
+static bool icmpv6_global_allow(struct net *net, int type)
 {
-	if (icmpv6_mask_allow(type))
+	if (icmpv6_mask_allow(net, type))
 		return true;
 
 	if (icmp_global_allow())
@@ -200,7 +197,7 @@
 	struct dst_entry *dst;
 	bool res = false;
 
-	if (icmpv6_mask_allow(type))
+	if (icmpv6_mask_allow(net, type))
 		return true;
 
 	/*
@@ -396,23 +393,28 @@
 	return ERR_PTR(err);
 }
 
-static int icmp6_iif(const struct sk_buff *skb)
+static struct net_device *icmp6_dev(const struct sk_buff *skb)
 {
-	int iif = skb->dev->ifindex;
+	struct net_device *dev = skb->dev;
 
 	/* for local traffic to local address, skb dev is the loopback
 	 * device. Check if there is a dst attached to the skb and if so
 	 * get the real device index. Same is needed for replies to a link
 	 * local address on a device enslaved to an L3 master device
 	 */
-	if (unlikely(iif == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
+	if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
 		const struct rt6_info *rt6 = skb_rt6_info(skb);
 
 		if (rt6)
-			iif = rt6->rt6i_idev->dev->ifindex;
+			dev = rt6->rt6i_idev->dev;
 	}
 
-	return iif;
+	return dev;
+}
+
+static int icmp6_iif(const struct sk_buff *skb)
+{
+	return icmp6_dev(skb)->ifindex;
 }
 
 /*
@@ -421,10 +423,10 @@
 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 		       const struct in6_addr *force_saddr)
 {
-	struct net *net = dev_net(skb->dev);
 	struct inet6_dev *idev = NULL;
 	struct ipv6hdr *hdr = ipv6_hdr(skb);
 	struct sock *sk;
+	struct net *net;
 	struct ipv6_pinfo *np;
 	const struct in6_addr *saddr = NULL;
 	struct dst_entry *dst;
@@ -435,12 +437,16 @@
 	int iif = 0;
 	int addr_type = 0;
 	int len;
-	u32 mark = IP6_REPLY_MARK(net, skb->mark);
+	u32 mark;
 
 	if ((u8 *)hdr < skb->head ||
 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
 		return;
 
+	if (!skb->dev)
+		return;
+	net = dev_net(skb->dev);
+	mark = IP6_REPLY_MARK(net, skb->mark);
 	/*
 	 *	Make sure we respect the rules
 	 *	i.e. RFC 1885 2.4(e)
@@ -505,7 +511,7 @@
 	local_bh_disable();
 
 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
-	if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
+	if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
 		goto out_bh_enable;
 
 	mip6_addr_swap(skb);
@@ -677,18 +683,29 @@
 	struct dst_entry *dst;
 	struct ipcm6_cookie ipc6;
 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
+	bool acast;
+
+	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
+	    net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
+		return;
 
 	saddr = &ipv6_hdr(skb)->daddr;
 
+	acast = ipv6_anycast_destination(skb_dst(skb), saddr);
+	if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
+		return;
+
 	if (!ipv6_unicast_destination(skb) &&
-	    !(net->ipv6.sysctl.anycast_src_echo_reply &&
-	      ipv6_anycast_destination(skb_dst(skb), saddr)))
+	    !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
 		saddr = NULL;
 
 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
 
 	memset(&fl6, 0, sizeof(fl6));
+	if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
+		fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
+
 	fl6.flowi6_proto = IPPROTO_ICMPV6;
 	fl6.daddr = ipv6_hdr(skb)->saddr;
 	if (saddr)
@@ -717,6 +734,11 @@
 	if (IS_ERR(dst))
 		goto out;
 
+	/* Check the ratelimit */
+	if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
+	    !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
+		goto out_dst_release;
+
 	idev = __in6_dev_get(skb->dev);
 
 	msg.skb = skb;
@@ -737,6 +759,7 @@
 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
 					   skb->len + sizeof(struct icmp6hdr));
 	}
+out_dst_release:
 	dst_release(dst);
 out:
 	icmpv6_xmit_unlock(sk);
@@ -795,7 +818,7 @@
 static int icmpv6_rcv(struct sk_buff *skb)
 {
 	struct net *net = dev_net(skb->dev);
-	struct net_device *dev = skb->dev;
+	struct net_device *dev = icmp6_dev(skb);
 	struct inet6_dev *idev = __in6_dev_get(dev);
 	const struct in6_addr *saddr, *daddr;
 	struct icmp6hdr *hdr;
@@ -947,13 +970,21 @@
 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
 }
 
+static void __net_exit icmpv6_sk_exit(struct net *net)
+{
+	int i;
+
+	for_each_possible_cpu(i)
+		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
+	free_percpu(net->ipv6.icmp_sk);
+}
+
 static int __net_init icmpv6_sk_init(struct net *net)
 {
 	struct sock *sk;
-	int err, i, j;
+	int err, i;
 
-	net->ipv6.icmp_sk =
-		kcalloc(nr_cpu_ids, sizeof(struct sock *), GFP_KERNEL);
+	net->ipv6.icmp_sk = alloc_percpu(struct sock *);
 	if (!net->ipv6.icmp_sk)
 		return -ENOMEM;
 
@@ -966,7 +997,7 @@
 			goto fail;
 		}
 
-		net->ipv6.icmp_sk[i] = sk;
+		*per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
 
 		/* Enough space for 2 64K ICMP packets, including
 		 * sk_buff struct overhead.
@@ -976,22 +1007,10 @@
 	return 0;
 
  fail:
-	for (j = 0; j < i; j++)
-		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
-	kfree(net->ipv6.icmp_sk);
+	icmpv6_sk_exit(net);
 	return err;
 }
 
-static void __net_exit icmpv6_sk_exit(struct net *net)
-{
-	int i;
-
-	for_each_possible_cpu(i) {
-		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
-	}
-	kfree(net->ipv6.icmp_sk);
-}
-
 static struct pernet_operations icmpv6_sk_ops = {
 	.init = icmpv6_sk_init,
 	.exit = icmpv6_sk_exit,
@@ -1113,6 +1132,27 @@
 		.mode		= 0644,
 		.proc_handler = proc_dointvec,
 	},
+	{
+		.procname	= "echo_ignore_multicast",
+		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler = proc_dointvec,
+	},
+	{
+		.procname	= "echo_ignore_anycast",
+		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler = proc_dointvec,
+	},
+	{
+		.procname	= "ratemask",
+		.data		= &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
+		.maxlen		= ICMPV6_MSG_MAX + 1,
+		.mode		= 0644,
+		.proc_handler = proc_do_large_bitmap,
+	},
 	{ },
 };
 
@@ -1127,6 +1167,9 @@
 	if (table) {
 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
 		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
+		table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
+		table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
+		table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
 	}
 	return table;
 }

diff --git a/net/ipv6/ila/Makefile b/net/ipv6/ila/Makefile
index b7739ab..1bc88ed 100644
--- a/net/ipv6/ila/Makefile
+++ b/net/ipv6/ila/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for ILA module
 #

diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index 1f747bc..bb6fc0d 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h

@@ -1,11 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright (c) 2015 Tom Herbert <tom@herbertland.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
  */
 
 #ifndef __ILA_H

diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 3d56a2f..422dcc6 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c

@@ -146,7 +146,8 @@
 	if (family != AF_INET6)
 		return -EINVAL;
 
-	ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack);
+	ret = nla_parse_nested_deprecated(tb, ILA_ATTR_MAX, nla,
+					  ila_nl_policy, extack);
 	if (ret < 0)
 		return ret;
 

diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c
index 18fac76..257d2b6 100644
--- a/net/ipv6/ila/ila_main.c
+++ b/net/ipv6/ila/ila_main.c

@@ -16,29 +16,29 @@
 static const struct genl_ops ila_nl_ops[] = {
 	{
 		.cmd = ILA_CMD_ADD,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = ila_xlat_nl_cmd_add_mapping,
-		.policy = ila_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
 	{
 		.cmd = ILA_CMD_DEL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = ila_xlat_nl_cmd_del_mapping,
-		.policy = ila_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
 	{
 		.cmd = ILA_CMD_FLUSH,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = ila_xlat_nl_cmd_flush,
-		.policy = ila_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
 	{
 		.cmd = ILA_CMD_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = ila_xlat_nl_cmd_get_mapping,
 		.start = ila_xlat_nl_dump_start,
 		.dumpit = ila_xlat_nl_dump,
 		.done = ila_xlat_nl_dump_done,
-		.policy = ila_nl_policy,
 	},
 };
 
@@ -49,6 +49,7 @@
 	.name		= ILA_GENL_NAME,
 	.version	= ILA_GENL_VERSION,
 	.maxattr	= ILA_ATTR_MAX,
+	.policy = ila_nl_policy,
 	.netnsok	= true,
 	.parallel_ops	= true,
 	.module		= THIS_MODULE,

diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 17c455f..5fc1f4e 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c

@@ -383,12 +383,9 @@
 	struct rhashtable_iter iter;
 	struct ila_map *ila;
 	spinlock_t *lock;
-	int ret;
+	int ret = 0;
 
-	ret = rhashtable_walk_init(&ilan->xlat.rhash_table, &iter, GFP_KERNEL);
-	if (ret)
-		goto done;
-
+	rhashtable_walk_enter(&ilan->xlat.rhash_table, &iter);
 	rhashtable_walk_start(&iter);
 
 	for (;;) {
@@ -420,6 +417,7 @@
 
 done:
 	rhashtable_walk_stop(&iter);
+	rhashtable_walk_exit(&iter);
 	return ret;
 }
 
@@ -509,23 +507,17 @@
 	struct net *net = sock_net(cb->skb->sk);
 	struct ila_net *ilan = net_generic(net, ila_net_id);
 	struct ila_dump_iter *iter;
-	int ret;
 
 	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
 	if (!iter)
 		return -ENOMEM;
 
-	ret = rhashtable_walk_init(&ilan->xlat.rhash_table, &iter->rhiter,
-				   GFP_KERNEL);
-	if (ret) {
-		kfree(iter);
-		return ret;
-	}
+	rhashtable_walk_enter(&ilan->xlat.rhash_table, &iter->rhiter);
 
 	iter->skip = 0;
 	cb->args[0] = (long)iter;
 
-	return ret;
+	return 0;
 }
 
 int ila_xlat_nl_dump_done(struct netlink_callback *cb)

diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 9a31d13..0a0945a 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET        An implementation of the TCP/IP protocol suite for the LINUX
  *             operating system.  INET is implemented using the  BSD Socket
@@ -6,11 +7,6 @@
  *             Support for INET6 connection oriented protocols.
  *
  * Authors:    See the TCPv6 sources
- *
- *             This program is free software; you can redistribute it and/or
- *             modify it under the terms of the GNU General Public License
- *             as published by the Free Software Foundation; either version
- *             2 of the License, or(at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -137,7 +133,7 @@
 	fl6.daddr = sk->sk_v6_daddr;
 
 	res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt),
-		       np->tclass);
+		       np->tclass,  sk->sk_priority);
 	rcu_read_unlock();
 	return res;
 }

diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 3d7c746..fbe9d42 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the BSD Socket
@@ -7,11 +8,6 @@
  *
  * Authors:	Lotsa people, from code originally in tcp, generalised here
  *		by Arnaldo Carvalho de Melo <acme@mandriva.com>
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -102,23 +98,14 @@
 
 	if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&
 	    sk->sk_family == PF_INET6) {
+		if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
+			return -1;
+
+		if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
+			return -1;
 
 		score = 1;
-		if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
-			if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
-				return -1;
-			score++;
-		}
-		if (sk->sk_bound_dev_if || exact_dif) {
-			bool dev_match = (sk->sk_bound_dev_if == dif ||
-					  sk->sk_bound_dev_if == sdif);
-
-			if (!dev_match)
-				return -1;
-			if (sk->sk_bound_dev_if)
-				score++;
-		}
-		if (sk->sk_incoming_cpu == raw_smp_processor_id())
+		if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
 			score++;
 	}
 	return score;
@@ -166,26 +153,12 @@
 		const __be16 sport, const struct in6_addr *daddr,
 		const unsigned short hnum, const int dif, const int sdif)
 {
-	unsigned int hash = inet_lhashfn(net, hnum);
-	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
-	bool exact_dif = inet6_exact_dif_match(net, skb);
 	struct inet_listen_hashbucket *ilb2;
-	struct sock *sk, *result = NULL;
-	int score, hiscore = 0;
+	struct sock *result = NULL;
 	unsigned int hash2;
-	u32 phash = 0;
-
-	if (ilb->count <= 10 || !hashinfo->lhash2)
-		goto port_lookup;
-
-	/* Too many sk in the ilb bucket (which is hashed by port alone).
-	 * Try lhash2 (which is hashed by port and addr) instead.
-	 */
 
 	hash2 = ipv6_portaddr_hash(net, daddr, hnum);
 	ilb2 = inet_lhash2_bucket(hashinfo, hash2);
-	if (ilb2->count > ilb->count)
-		goto port_lookup;
 
 	result = inet6_lhash2_lookup(net, ilb2, skb, doff,
 				     saddr, sport, daddr, hnum,
@@ -194,35 +167,14 @@
 		goto done;
 
 	/* Lookup lhash2 with in6addr_any */
-
 	hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
 	ilb2 = inet_lhash2_bucket(hashinfo, hash2);
-	if (ilb2->count > ilb->count)
-		goto port_lookup;
 
 	result = inet6_lhash2_lookup(net, ilb2, skb, doff,
-				     saddr, sport, daddr, hnum,
+				     saddr, sport, &in6addr_any, hnum,
 				     dif, sdif);
-	goto done;
-
-port_lookup:
-	sk_for_each(sk, &ilb->head) {
-		score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif);
-		if (score > hiscore) {
-			if (sk->sk_reuseport) {
-				phash = inet6_ehashfn(net, daddr, hnum,
-						      saddr, sport);
-				result = reuseport_select_sock(sk, phash,
-							       skb, doff);
-				if (result)
-					goto done;
-			}
-			result = sk;
-			hiscore = score;
-		}
-	}
 done:
-	if (unlikely(IS_ERR(result)))
+	if (IS_ERR(result))
 		return NULL;
 	return result;
 }

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index cbe4617..6e2af41 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Linux INET6 implementation
  *	Forwarding Information Database
@@ -5,11 +6,6 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
  *	Changes:
  *	Yuji SEKIYA @USAGI:	Support default route on router node;
  *				remove ip6_null_entry from the top of
@@ -29,6 +25,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 
+#include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/ndisc.h>
 #include <net/addrconf.h>
@@ -46,6 +43,7 @@
 	int (*func)(struct fib6_info *, void *arg);
 	int sernum;
 	void *arg;
+	bool skip_notify;
 };
 
 #ifdef CONFIG_IPV6_SUBTREES
@@ -145,24 +143,21 @@
 	       addr[fn_bit >> 5];
 }
 
-struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
+struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
 {
 	struct fib6_info *f6i;
+	size_t sz = sizeof(*f6i);
 
-	f6i = kzalloc(sizeof(*f6i), gfp_flags);
+	if (with_fib6_nh)
+		sz += sizeof(struct fib6_nh);
+
+	f6i = kzalloc(sz, gfp_flags);
 	if (!f6i)
 		return NULL;
 
-	f6i->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
-	if (!f6i->rt6i_pcpu) {
-		kfree(f6i);
-		return NULL;
-	}
-
+	/* fib6_siblings is a union with nh_list, so this initializes both */
 	INIT_LIST_HEAD(&f6i->fib6_siblings);
-	f6i->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
-
-	atomic_inc(&f6i->fib6_ref);
+	refcount_set(&f6i->fib6_ref, 1);
 
 	return f6i;
 }
@@ -170,45 +165,15 @@
 void fib6_info_destroy_rcu(struct rcu_head *head)
 {
 	struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
-	struct rt6_exception_bucket *bucket;
-	struct dst_metrics *m;
 
 	WARN_ON(f6i->fib6_node);
 
-	bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1);
-	if (bucket) {
-		f6i->rt6i_exception_bucket = NULL;
-		kfree(bucket);
-	}
+	if (f6i->nh)
+		nexthop_put(f6i->nh);
+	else
+		fib6_nh_release(f6i->fib6_nh);
 
-	if (f6i->rt6i_pcpu) {
-		int cpu;
-
-		for_each_possible_cpu(cpu) {
-			struct rt6_info **ppcpu_rt;
-			struct rt6_info *pcpu_rt;
-
-			ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu);
-			pcpu_rt = *ppcpu_rt;
-			if (pcpu_rt) {
-				dst_dev_put(&pcpu_rt->dst);
-				dst_release(&pcpu_rt->dst);
-				*ppcpu_rt = NULL;
-			}
-		}
-
-		free_percpu(f6i->rt6i_pcpu);
-	}
-
-	lwtstate_put(f6i->fib6_nh.nh_lwtstate);
-
-	if (f6i->fib6_nh.nh_dev)
-		dev_put(f6i->fib6_nh.nh_dev);
-
-	m = f6i->fib6_metrics;
-	if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
-		kfree(m);
-
+	ip_fib_metrics_put(f6i->fib6_metrics);
 	kfree(f6i);
 }
 EXPORT_SYMBOL_GPL(fib6_info_destroy_rcu);
@@ -351,19 +316,21 @@
 
 	rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
 	if (rt->dst.error == -EAGAIN) {
-		ip6_rt_put(rt);
+		ip6_rt_put_flags(rt, flags);
 		rt = net->ipv6.ip6_null_entry;
-		dst_hold(&rt->dst);
+		if (!(flags & RT6_LOOKUP_F_DST_NOREF))
+			dst_hold(&rt->dst);
 	}
 
 	return &rt->dst;
 }
 
 /* called with rcu lock held; no reference taken on fib6_info */
-struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
-			      int flags)
+int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+		struct fib6_result *res, int flags)
 {
-	return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6, flags);
+	return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6,
+				 res, flags);
 }
 
 static void __net_init fib6_tables_init(struct net *net)
@@ -401,10 +368,10 @@
 	return call_fib6_notifier(nb, net, event_type, &info.info);
 }
 
-static int call_fib6_entry_notifiers(struct net *net,
-				     enum fib_event_type event_type,
-				     struct fib6_info *rt,
-				     struct netlink_ext_ack *extack)
+int call_fib6_entry_notifiers(struct net *net,
+			      enum fib_event_type event_type,
+			      struct fib6_info *rt,
+			      struct netlink_ext_ack *extack)
 {
 	struct fib6_entry_notifier_info info = {
 		.info.extack = extack,
@@ -415,6 +382,22 @@
 	return call_fib6_notifiers(net, event_type, &info.info);
 }
 
+int call_fib6_multipath_entry_notifiers(struct net *net,
+					enum fib_event_type event_type,
+					struct fib6_info *rt,
+					unsigned int nsiblings,
+					struct netlink_ext_ack *extack)
+{
+	struct fib6_entry_notifier_info info = {
+		.info.extack = extack,
+		.rt = rt,
+		.nsiblings = nsiblings,
+	};
+
+	rt->fib6_table->fib_seq++;
+	return call_fib6_notifiers(net, event_type, &info.info);
+}
+
 struct fib6_dump_arg {
 	struct net *net;
 	struct notifier_block *nb;
@@ -481,12 +464,19 @@
 	struct fib6_info *rt;
 
 	for_each_fib6_walker_rt(w) {
-		res = rt6_dump_route(rt, w->args);
-		if (res < 0) {
+		res = rt6_dump_route(rt, w->args, w->skip_in_node);
+		if (res >= 0) {
 			/* Frame is full, suspend walking */
 			w->leaf = rt;
+
+			/* We'll restart from this node, so if some routes were
+			 * already dumped, skip them next time.
+			 */
+			w->skip_in_node += res;
+
 			return 1;
 		}
+		w->skip_in_node = 0;
 
 		/* Multipath routes are dumped in one route with the
 		 * RTA_MULTIPATH attribute. Jump 'rt' to point to the
@@ -538,6 +528,7 @@
 	if (cb->args[4] == 0) {
 		w->count = 0;
 		w->skip = 0;
+		w->skip_in_node = 0;
 
 		spin_lock_bh(&table->tb6_lock);
 		res = fib6_walk(net, w);
@@ -553,6 +544,7 @@
 			w->state = FWS_INIT;
 			w->node = w->root;
 			w->skip = w->count;
+			w->skip_in_node = 0;
 		} else
 			w->skip = 0;
 
@@ -570,17 +562,29 @@
 
 static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct rt6_rtnl_dump_arg arg = { .filter.dump_exceptions = true,
+					 .filter.dump_routes = true };
+	const struct nlmsghdr *nlh = cb->nlh;
 	struct net *net = sock_net(skb->sk);
 	unsigned int h, s_h;
 	unsigned int e = 0, s_e;
-	struct rt6_rtnl_dump_arg arg;
 	struct fib6_walker *w;
 	struct fib6_table *tb;
 	struct hlist_head *head;
 	int res = 0;
 
-	s_h = cb->args[0];
-	s_e = cb->args[1];
+	if (cb->strict_check) {
+		int err;
+
+		err = ip_valid_fib_dump_req(net, nlh, &arg.filter, cb);
+		if (err < 0)
+			return err;
+	} else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
+		struct rtmsg *rtm = nlmsg_data(nlh);
+
+		if (rtm->rtm_flags & RTM_F_PREFIX)
+			arg.filter.flags = RTM_F_PREFIX;
+	}
 
 	w = (void *)cb->args[2];
 	if (!w) {
@@ -606,6 +610,27 @@
 	arg.net = net;
 	w->args = &arg;
 
+	if (arg.filter.table_id) {
+		tb = fib6_get_table(net, arg.filter.table_id);
+		if (!tb) {
+			if (arg.filter.dump_all_families)
+				goto out;
+
+			NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
+			return -ENOENT;
+		}
+
+		if (!cb->args[0]) {
+			res = fib6_dump_table(tb, skb, cb);
+			if (!res)
+				cb->args[0] = 1;
+		}
+		goto out;
+	}
+
+	s_h = cb->args[0];
+	s_e = cb->args[1];
+
 	rcu_read_lock();
 	for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
 		e = 0;
@@ -615,16 +640,16 @@
 				goto next;
 			res = fib6_dump_table(tb, skb, cb);
 			if (res != 0)
-				goto out;
+				goto out_unlock;
 next:
 			e++;
 		}
 	}
-out:
+out_unlock:
 	rcu_read_unlock();
 	cb->args[1] = e;
 	cb->args[0] = h;
-
+out:
 	res = res < 0 ? res : skb->len;
 	if (res <= 0)
 		fib6_dump_end(cb);
@@ -819,8 +844,8 @@
 
 		RCU_INIT_POINTER(in->parent, pn);
 		in->leaf = fn->leaf;
-		atomic_inc(&rcu_dereference_protected(in->leaf,
-				lockdep_is_held(&table->tb6_lock))->fib6_ref);
+		fib6_info_hold(rcu_dereference_protected(in->leaf,
+				lockdep_is_held(&table->tb6_lock)));
 
 		/* update parent pointer */
 		if (dir)
@@ -872,11 +897,15 @@
 	return ln;
 }
 
-static void fib6_drop_pcpu_from(struct fib6_info *f6i,
-				const struct fib6_table *table)
+static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
+				  const struct fib6_info *match,
+				  const struct fib6_table *table)
 {
 	int cpu;
 
+	if (!fib6_nh->rt6i_pcpu)
+		return;
+
 	/* release the reference to this fib entry from
 	 * all of its cached pcpu routes
 	 */
@@ -884,25 +913,72 @@
 		struct rt6_info **ppcpu_rt;
 		struct rt6_info *pcpu_rt;
 
-		ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu);
+		ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
 		pcpu_rt = *ppcpu_rt;
-		if (pcpu_rt) {
+
+		/* only dropping the 'from' reference if the cached route
+		 * is using 'match'. The cached pcpu_rt->from only changes
+		 * from a fib6_info to NULL (ip6_dst_destroy); it can never
+		 * change from one fib6_info reference to another
+		 */
+		if (pcpu_rt && rcu_access_pointer(pcpu_rt->from) == match) {
 			struct fib6_info *from;
 
-			from = rcu_dereference_protected(pcpu_rt->from,
-					     lockdep_is_held(&table->tb6_lock));
-			rcu_assign_pointer(pcpu_rt->from, NULL);
+			from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
 			fib6_info_release(from);
 		}
 	}
 }
 
+struct fib6_nh_pcpu_arg {
+	struct fib6_info	*from;
+	const struct fib6_table *table;
+};
+
+static int fib6_nh_drop_pcpu_from(struct fib6_nh *nh, void *_arg)
+{
+	struct fib6_nh_pcpu_arg *arg = _arg;
+
+	__fib6_drop_pcpu_from(nh, arg->from, arg->table);
+	return 0;
+}
+
+static void fib6_drop_pcpu_from(struct fib6_info *f6i,
+				const struct fib6_table *table)
+{
+	/* Make sure rt6_make_pcpu_route() wont add other percpu routes
+	 * while we are cleaning them here.
+	 */
+	f6i->fib6_destroying = 1;
+	mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */
+
+	if (f6i->nh) {
+		struct fib6_nh_pcpu_arg arg = {
+			.from = f6i,
+			.table = table
+		};
+
+		nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from,
+					 &arg);
+	} else {
+		struct fib6_nh *fib6_nh;
+
+		fib6_nh = f6i->fib6_nh;
+		__fib6_drop_pcpu_from(fib6_nh, f6i, table);
+	}
+}
+
 static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
 			  struct net *net)
 {
 	struct fib6_table *table = rt->fib6_table;
 
-	if (atomic_read(&rt->fib6_ref) != 1) {
+	fib6_drop_pcpu_from(rt, table);
+
+	if (rt->nh && !list_empty(&rt->nh_list))
+		list_del_init(&rt->nh_list);
+
+	if (refcount_read(&rt->fib6_ref) != 1) {
 		/* This route is used as dummy address holder in some split
 		 * nodes. It is not leaked, but it still holds other resources,
 		 * which must be released in time. So, scan ascendant nodes
@@ -915,7 +991,7 @@
 			struct fib6_info *new_leaf;
 			if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
 				new_leaf = fib6_find_prefix(net, table, fn);
-				atomic_inc(&new_leaf->fib6_ref);
+				fib6_info_hold(new_leaf);
 
 				rcu_assign_pointer(fn->leaf, new_leaf);
 				fib6_info_release(rt);
@@ -923,9 +999,6 @@
 			fn = rcu_dereference_protected(fn->parent,
 				    lockdep_is_held(&table->tb6_lock));
 		}
-
-		if (rt->rt6i_pcpu)
-			fib6_drop_pcpu_from(rt, table);
 	}
 }
 
@@ -1074,14 +1147,32 @@
 add:
 		nlflags |= NLM_F_CREATE;
 
-		err = call_fib6_entry_notifiers(info->nl_net,
-						FIB_EVENT_ENTRY_ADD,
-						rt, extack);
-		if (err)
-			return err;
+		if (!info->skip_notify_kernel) {
+			err = call_fib6_entry_notifiers(info->nl_net,
+							FIB_EVENT_ENTRY_ADD,
+							rt, extack);
+			if (err) {
+				struct fib6_info *sibling, *next_sibling;
+
+				/* If the route has siblings, then it first
+				 * needs to be unlinked from them.
+				 */
+				if (!rt->fib6_nsiblings)
+					return err;
+
+				list_for_each_entry_safe(sibling, next_sibling,
+							 &rt->fib6_siblings,
+							 fib6_siblings)
+					sibling->fib6_nsiblings--;
+				rt->fib6_nsiblings = 0;
+				list_del_init(&rt->fib6_siblings);
+				rt6_multipath_rebalance(next_sibling);
+				return err;
+			}
+		}
 
 		rcu_assign_pointer(rt->fib6_next, iter);
-		atomic_inc(&rt->fib6_ref);
+		fib6_info_hold(rt);
 		rcu_assign_pointer(rt->fib6_node, fn);
 		rcu_assign_pointer(*ins, rt);
 		if (!info->skip_notify)
@@ -1103,13 +1194,15 @@
 			return -ENOENT;
 		}
 
-		err = call_fib6_entry_notifiers(info->nl_net,
-						FIB_EVENT_ENTRY_REPLACE,
-						rt, extack);
-		if (err)
-			return err;
+		if (!info->skip_notify_kernel) {
+			err = call_fib6_entry_notifiers(info->nl_net,
+							FIB_EVENT_ENTRY_REPLACE,
+							rt, extack);
+			if (err)
+				return err;
+		}
 
-		atomic_inc(&rt->fib6_ref);
+		fib6_info_hold(rt);
 		rcu_assign_pointer(rt->fib6_node, fn);
 		rt->fib6_next = iter->fib6_next;
 		rcu_assign_pointer(*ins, rt);
@@ -1191,6 +1284,14 @@
 	__fib6_update_sernum_upto_root(rt, fib6_new_sernum(net));
 }
 
+/* allow ipv4 to update sernum via ipv6_stub */
+void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i)
+{
+	spin_lock_bh(&f6i->fib6_table->tb6_lock);
+	fib6_update_sernum_upto_root(net, f6i);
+	spin_unlock_bh(&f6i->fib6_table->tb6_lock);
+}
+
 /*
  *	Add routing information to the routing tree.
  *	<destination addr>/<source addr>
@@ -1251,7 +1352,7 @@
 			if (!sfn)
 				goto failure;
 
-			atomic_inc(&info->nl_net->ipv6.fib6_null_entry->fib6_ref);
+			fib6_info_hold(info->nl_net->ipv6.fib6_null_entry);
 			rcu_assign_pointer(sfn->leaf,
 					   info->nl_net->ipv6.fib6_null_entry);
 			sfn->fn_flags = RTN_ROOT;
@@ -1294,7 +1395,7 @@
 				rcu_assign_pointer(fn->leaf,
 					    info->nl_net->ipv6.fib6_null_entry);
 			} else {
-				atomic_inc(&rt->fib6_ref);
+				fib6_info_hold(rt);
 				rcu_assign_pointer(fn->leaf, rt);
 			}
 		}
@@ -1304,6 +1405,8 @@
 
 	err = fib6_add_rt2node(fn, rt, info, extack);
 	if (!err) {
+		if (rt->nh)
+			list_add(&rt->nh_list, &rt->nh->f6i_list);
 		__fib6_update_sernum_upto_root(rt, sernum);
 		fib6_start_gc(info->nl_net, rt);
 	}
@@ -1509,7 +1612,8 @@
 		if (plen == fn->fn_bit)
 			return fn;
 
-		prev = fn;
+		if (fn->fn_flags & RTN_RTINFO)
+			prev = fn;
 
 next:
 		/*
@@ -1780,9 +1884,11 @@
 
 	fib6_purge_rt(rt, fn, net);
 
-	call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
+	if (!info->skip_notify_kernel)
+		call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
 	if (!info->skip_notify)
 		inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
+
 	fib6_info_release(rt);
 }
 
@@ -1954,6 +2060,7 @@
 	struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w);
 	struct nl_info info = {
 		.nl_net = c->net,
+		.skip_notify = c->skip_notify,
 	};
 
 	if (c->sernum != FIB6_NO_SERNUM_CHANGE &&
@@ -2005,7 +2112,7 @@
 
 static void fib6_clean_tree(struct net *net, struct fib6_node *root,
 			    int (*func)(struct fib6_info *, void *arg),
-			    int sernum, void *arg)
+			    int sernum, void *arg, bool skip_notify)
 {
 	struct fib6_cleaner c;
 
@@ -2013,17 +2120,19 @@
 	c.w.func = fib6_clean_node;
 	c.w.count = 0;
 	c.w.skip = 0;
+	c.w.skip_in_node = 0;
 	c.func = func;
 	c.sernum = sernum;
 	c.arg = arg;
 	c.net = net;
+	c.skip_notify = skip_notify;
 
 	fib6_walk(net, &c.w);
 }
 
 static void __fib6_clean_all(struct net *net,
 			     int (*func)(struct fib6_info *, void *),
-			     int sernum, void *arg)
+			     int sernum, void *arg, bool skip_notify)
 {
 	struct fib6_table *table;
 	struct hlist_head *head;
@@ -2035,7 +2144,7 @@
 		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
 			spin_lock_bh(&table->tb6_lock);
 			fib6_clean_tree(net, &table->tb6_root,
-					func, sernum, arg);
+					func, sernum, arg, skip_notify);
 			spin_unlock_bh(&table->tb6_lock);
 		}
 	}
@@ -2045,14 +2154,21 @@
 void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *),
 		    void *arg)
 {
-	__fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg);
+	__fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, false);
+}
+
+void fib6_clean_all_skip_notify(struct net *net,
+				int (*func)(struct fib6_info *, void *),
+				void *arg)
+{
+	__fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, true);
 }
 
 static void fib6_flush_trees(struct net *net)
 {
 	int new_sernum = fib6_new_sernum(net);
 
-	__fib6_clean_all(net, NULL, new_sernum, NULL);
+	__fib6_clean_all(net, NULL, new_sernum, NULL, false);
 }
 
 /*
@@ -2256,8 +2372,13 @@
 {
 	struct fib6_info *rt = v;
 	struct ipv6_route_iter *iter = seq->private;
+	struct fib6_nh *fib6_nh = rt->fib6_nh;
+	unsigned int flags = rt->fib6_flags;
 	const struct net_device *dev;
 
+	if (rt->nh)
+		fib6_nh = nexthop_fib6_nh(rt->nh);
+
 	seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
 
 #ifdef CONFIG_IPV6_SUBTREES
@@ -2265,15 +2386,17 @@
 #else
 	seq_puts(seq, "00000000000000000000000000000000 00 ");
 #endif
-	if (rt->fib6_flags & RTF_GATEWAY)
-		seq_printf(seq, "%pi6", &rt->fib6_nh.nh_gw);
-	else
+	if (fib6_nh->fib_nh_gw_family) {
+		flags |= RTF_GATEWAY;
+		seq_printf(seq, "%pi6", &fib6_nh->fib_nh_gw6);
+	} else {
 		seq_puts(seq, "00000000000000000000000000000000");
+	}
 
-	dev = rt->fib6_nh.nh_dev;
+	dev = fib6_nh->fib_nh_dev;
 	seq_printf(seq, " %08x %08x %08x %08x %8s\n",
-		   rt->fib6_metric, atomic_read(&rt->fib6_ref), 0,
-		   rt->fib6_flags, dev ? dev->name : "");
+		   rt->fib6_metric, refcount_read(&rt->fib6_ref), 0,
+		   flags, dev ? dev->name : "");
 	iter->w.leaf = NULL;
 	return 0;
 }

diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index cb54a8a..d64b83e 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	ip6_flowlabel.c		IPv6 flowlabel manager.
  *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
  *	Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  */
 
@@ -21,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/export.h>
 #include <linux/pid_namespace.h>
+#include <linux/jump_label_ratelimit.h>
 
 #include <net/net_namespace.h>
 #include <net/sock.h>
@@ -57,6 +54,9 @@
 
 static DEFINE_SPINLOCK(ip6_sk_fl_lock);
 
+DEFINE_STATIC_KEY_DEFERRED_FALSE(ipv6_flowlabel_exclusive, HZ);
+EXPORT_SYMBOL(ipv6_flowlabel_exclusive);
+
 #define for_each_fl_rcu(hash, fl)				\
 	for (fl = rcu_dereference_bh(fl_ht[(hash)]);		\
 	     fl != NULL;					\
@@ -94,15 +94,33 @@
 	return fl;
 }
 
+static bool fl_shared_exclusive(struct ip6_flowlabel *fl)
+{
+	return fl->share == IPV6_FL_S_EXCL ||
+	       fl->share == IPV6_FL_S_PROCESS ||
+	       fl->share == IPV6_FL_S_USER;
+}
+
+static void fl_free_rcu(struct rcu_head *head)
+{
+	struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu);
+
+	if (fl->share == IPV6_FL_S_PROCESS)
+		put_pid(fl->owner.pid);
+	kfree(fl->opt);
+	kfree(fl);
+}
+
 
 static void fl_free(struct ip6_flowlabel *fl)
 {
-	if (fl) {
-		if (fl->share == IPV6_FL_S_PROCESS)
-			put_pid(fl->owner.pid);
-		kfree(fl->opt);
-		kfree_rcu(fl, rcu);
-	}
+	if (!fl)
+		return;
+
+	if (fl_shared_exclusive(fl) || fl->opt)
+		static_branch_slow_dec_deferred(&ipv6_flowlabel_exclusive);
+
+	call_rcu(&fl->rcu, fl_free_rcu);
 }
 
 static void fl_release(struct ip6_flowlabel *fl)
@@ -238,7 +256,7 @@
 
 /* Socket flowlabel lists */
 
-struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label)
+struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label)
 {
 	struct ipv6_fl_socklist *sfl;
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -248,9 +266,9 @@
 	rcu_read_lock_bh();
 	for_each_sk_fl_rcu(np, sfl) {
 		struct ip6_flowlabel *fl = sfl->fl;
-		if (fl->label == label) {
+
+		if (fl->label == label && atomic_inc_not_zero(&fl->users)) {
 			fl->lastuse = jiffies;
-			atomic_inc(&fl->users);
 			rcu_read_unlock_bh();
 			return fl;
 		}
@@ -258,7 +276,7 @@
 	rcu_read_unlock_bh();
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(fl6_sock_lookup);
+EXPORT_SYMBOL_GPL(__fl6_sock_lookup);
 
 void fl6_free_socklist(struct sock *sk)
 {
@@ -431,10 +449,15 @@
 		err = -EINVAL;
 		goto done;
 	}
+	if (fl_shared_exclusive(fl) || fl->opt)
+		static_branch_deferred_inc(&ipv6_flowlabel_exclusive);
 	return fl;
 
 done:
-	fl_free(fl);
+	if (fl) {
+		kfree(fl->opt);
+		kfree(fl);
+	}
 	*err_p = err;
 	return NULL;
 }
@@ -616,7 +639,8 @@
 						goto done;
 					}
 					fl1 = sfl->fl;
-					atomic_inc(&fl1->users);
+					if (!atomic_inc_not_zero(&fl1->users))
+						fl1 = NULL;
 					break;
 				}
 			}
@@ -633,9 +657,9 @@
 				if (fl1->share == IPV6_FL_S_EXCL ||
 				    fl1->share != fl->share ||
 				    ((fl1->share == IPV6_FL_S_PROCESS) &&
-				     (fl1->owner.pid == fl->owner.pid)) ||
+				     (fl1->owner.pid != fl->owner.pid)) ||
 				    ((fl1->share == IPV6_FL_S_USER) &&
-				     uid_eq(fl1->owner.uid, fl->owner.uid)))
+				     !uid_eq(fl1->owner.uid, fl->owner.uid)))
 					goto release;
 
 				err = -ENOMEM;
@@ -851,6 +875,7 @@
 
 void ip6_flowlabel_cleanup(void)
 {
+	static_key_deferred_flush(&ipv6_flowlabel_exclusive);
 	del_timer(&ip6_fl_gc_timer);
 	unregister_pernet_subsys(&ip6_flowlabel_net_ops);
 }

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index e493b04..923034c 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c

@@ -1,13 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	GRE over IPv6 protocol decoder.
  *
  *	Authors: Dmitry Kozlov (xeb@mail.ru)
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -423,41 +418,23 @@
 }
 
 
-static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		       u8 type, u8 code, int offset, __be32 info)
 {
 	struct net *net = dev_net(skb->dev);
-	const struct gre_base_hdr *greh;
 	const struct ipv6hdr *ipv6h;
-	int grehlen = sizeof(*greh);
+	struct tnl_ptk_info tpi;
 	struct ip6_tnl *t;
-	int key_off = 0;
-	__be16 flags;
-	__be32 key;
 
-	if (!pskb_may_pull(skb, offset + grehlen))
-		return;
-	greh = (const struct gre_base_hdr *)(skb->data + offset);
-	flags = greh->flags;
-	if (flags & (GRE_VERSION | GRE_ROUTING))
-		return;
-	if (flags & GRE_CSUM)
-		grehlen += 4;
-	if (flags & GRE_KEY) {
-		key_off = grehlen + offset;
-		grehlen += 4;
-	}
+	if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IPV6),
+			     offset) < 0)
+		return -EINVAL;
 
-	if (!pskb_may_pull(skb, offset + grehlen))
-		return;
 	ipv6h = (const struct ipv6hdr *)skb->data;
-	greh = (const struct gre_base_hdr *)(skb->data + offset);
-	key = key_off ? *(__be32 *)(skb->data + key_off) : 0;
-
 	t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr,
-				 key, greh->protocol);
+				 tpi.key, tpi.proto);
 	if (!t)
-		return;
+		return -ENOENT;
 
 	switch (type) {
 		struct ipv6_tlv_tnl_enc_lim *tel;
@@ -467,14 +444,14 @@
 				    t->parms.name);
 		if (code != ICMPV6_PORT_UNREACH)
 			break;
-		return;
+		return 0;
 	case ICMPV6_TIME_EXCEED:
 		if (code == ICMPV6_EXC_HOPLIMIT) {
 			net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
 					    t->parms.name);
 			break;
 		}
-		return;
+		return 0;
 	case ICMPV6_PARAMPROB:
 		teli = 0;
 		if (code == ICMPV6_HDR_FIELD)
@@ -490,14 +467,14 @@
 			net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
 					    t->parms.name);
 		}
-		return;
+		return 0;
 	case ICMPV6_PKT_TOOBIG:
 		ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
-		return;
+		return 0;
 	case NDISC_REDIRECT:
 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
 			     sock_net_uid(net, NULL));
-		return;
+		return 0;
 	}
 
 	if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
@@ -505,6 +482,8 @@
 	else
 		t->err_count = 1;
 	t->err_time = jiffies;
+
+	return 0;
 }
 
 static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
@@ -540,23 +519,19 @@
 	return PACKET_REJECT;
 }
 
-static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len,
-			 struct tnl_ptk_info *tpi)
+static int ip6erspan_rcv(struct sk_buff *skb,
+			 struct tnl_ptk_info *tpi,
+			 int gre_hdr_len)
 {
 	struct erspan_base_hdr *ershdr;
-	struct erspan_metadata *pkt_md;
 	const struct ipv6hdr *ipv6h;
 	struct erspan_md2 *md2;
 	struct ip6_tnl *tunnel;
 	u8 ver;
 
-	if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr))))
-		return PACKET_REJECT;
-
 	ipv6h = ipv6_hdr(skb);
 	ershdr = (struct erspan_base_hdr *)skb->data;
 	ver = ershdr->ver;
-	tpi->key = cpu_to_be32(get_session_id(ershdr));
 
 	tunnel = ip6gre_tunnel_lookup(skb->dev,
 				      &ipv6h->saddr, &ipv6h->daddr, tpi->key,
@@ -567,18 +542,16 @@
 		if (unlikely(!pskb_may_pull(skb, len)))
 			return PACKET_REJECT;
 
-		ershdr = (struct erspan_base_hdr *)skb->data;
-		pkt_md = (struct erspan_metadata *)(ershdr + 1);
-
 		if (__iptunnel_pull_header(skb, len,
 					   htons(ETH_P_TEB),
 					   false, false) < 0)
 			return PACKET_REJECT;
 
 		if (tunnel->parms.collect_md) {
+			struct erspan_metadata *pkt_md, *md;
 			struct metadata_dst *tun_dst;
 			struct ip_tunnel_info *info;
-			struct erspan_metadata *md;
+			unsigned char *gh;
 			__be64 tun_id;
 			__be16 flags;
 
@@ -591,6 +564,14 @@
 			if (!tun_dst)
 				return PACKET_REJECT;
 
+			/* skb can be uncloned in __iptunnel_pull_header, so
+			 * old pkt_md is no longer valid and we need to reset
+			 * it
+			 */
+			gh = skb_network_header(skb) +
+			     skb_network_header_len(skb);
+			pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
+							    sizeof(*ershdr));
 			info = &tun_dst->u.tun_info;
 			md = ip_tunnel_info_opts(info);
 			md->version = ver;
@@ -627,7 +608,7 @@
 
 	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
 		     tpi.proto == htons(ETH_P_ERSPAN2))) {
-		if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD)
+		if (ip6erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
 			return 0;
 		goto out;
 	}
@@ -679,12 +660,13 @@
 				    struct flowi6 *fl6, __u8 *dsfield,
 				    int *encap_limit)
 {
-	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	struct ipv6hdr *ipv6h;
 	struct ip6_tnl *t = netdev_priv(dev);
 	__u16 offset;
 
 	offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
 	/* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
+	ipv6h = ipv6_hdr(skb);
 
 	if (offset > 0) {
 		struct ipv6_tlv_tnl_enc_lim *tel;
@@ -897,6 +879,9 @@
 	struct net_device_stats *stats = &t->dev->stats;
 	int ret;
 
+	if (!pskb_inet_may_pull(skb))
+		goto tx_err;
+
 	if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr))
 		goto tx_err;
 
@@ -935,10 +920,14 @@
 	__u8 dsfield = false;
 	struct flowi6 fl6;
 	int err = -EINVAL;
+	__be16 proto;
 	__u32 mtu;
 	int nhoff;
 	int thoff;
 
+	if (!pskb_inet_may_pull(skb))
+		goto tx_err;
+
 	if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr))
 		goto tx_err;
 
@@ -979,7 +968,7 @@
 		if (unlikely(!tun_info ||
 			     !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
 			     ip_tunnel_info_af(tun_info) != AF_INET6))
-			return -EINVAL;
+			goto tx_err;
 
 		key = &tun_info->key;
 		memset(&fl6, 0, sizeof(fl6));
@@ -991,9 +980,9 @@
 		dsfield = key->tos;
 		if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
 			goto tx_err;
-		md = ip_tunnel_info_opts(tun_info);
-		if (!md)
+		if (tun_info->options_len < sizeof(*md))
 			goto tx_err;
+		md = ip_tunnel_info_opts(tun_info);
 
 		tun_id = tunnel_id_to_key32(key->tun_id);
 		if (md->version == 1) {
@@ -1011,8 +1000,6 @@
 			goto tx_err;
 		}
 	} else {
-		struct ipv6hdr *ipv6h = ipv6_hdr(skb);
-
 		switch (skb->protocol) {
 		case htons(ETH_P_IP):
 			memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -1020,7 +1007,7 @@
 						 &dsfield, &encap_limit);
 			break;
 		case htons(ETH_P_IPV6):
-			if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
+			if (ipv6_addr_equal(&t->parms.raddr, &ipv6_hdr(skb)->saddr))
 				goto tx_err;
 			if (prepare_ip6gre_xmit_ipv6(skb, dev, &fl6,
 						     &dsfield, &encap_limit))
@@ -1047,8 +1034,9 @@
 	}
 
 	/* Push GRE header. */
-	gre_build_header(skb, 8, TUNNEL_SEQ,
-			 htons(ETH_P_ERSPAN), 0, htonl(t->o_seqno++));
+	proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN)
+					   : htons(ETH_P_ERSPAN2);
+	gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++));
 
 	/* TooBig packet may have updated dst->dev's mtu */
 	if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
@@ -1181,6 +1169,10 @@
 	t->parms.i_flags = p->i_flags;
 	t->parms.o_flags = p->o_flags;
 	t->parms.fwmark = p->fwmark;
+	t->parms.erspan_ver = p->erspan_ver;
+	t->parms.index = p->index;
+	t->parms.dir = p->dir;
+	t->parms.hwid = p->hwid;
 	dst_cache_reset(&t->dst_cache);
 }
 
@@ -1729,6 +1721,27 @@
 	return 0;
 }
 
+static void ip6erspan_set_version(struct nlattr *data[],
+				  struct __ip6_tnl_parm *parms)
+{
+	if (!data)
+		return;
+
+	parms->erspan_ver = 1;
+	if (data[IFLA_GRE_ERSPAN_VER])
+		parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
+
+	if (parms->erspan_ver == 1) {
+		if (data[IFLA_GRE_ERSPAN_INDEX])
+			parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+	} else if (parms->erspan_ver == 2) {
+		if (data[IFLA_GRE_ERSPAN_DIR])
+			parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
+		if (data[IFLA_GRE_ERSPAN_HWID])
+			parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
+	}
+}
+
 static void ip6gre_netlink_parms(struct nlattr *data[],
 				struct __ip6_tnl_parm *parms)
 {
@@ -1777,20 +1790,6 @@
 
 	if (data[IFLA_GRE_COLLECT_METADATA])
 		parms->collect_md = true;
-
-	parms->erspan_ver = 1;
-	if (data[IFLA_GRE_ERSPAN_VER])
-		parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
-
-	if (parms->erspan_ver == 1) {
-		if (data[IFLA_GRE_ERSPAN_INDEX])
-			parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
-	} else if (parms->erspan_ver == 2) {
-		if (data[IFLA_GRE_ERSPAN_DIR])
-			parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
-		if (data[IFLA_GRE_ERSPAN_HWID])
-			parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
-	}
 }
 
 static int ip6gre_tap_init(struct net_device *dev)
@@ -1901,12 +1900,6 @@
 	netif_keep_dst(dev);
 }
 
-bool is_ip6gretap_dev(const struct net_device *dev)
-{
-	return dev->netdev_ops == &ip6gre_tap_netdev_ops;
-}
-EXPORT_SYMBOL_GPL(is_ip6gretap_dev);
-
 static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
 				       struct ip_tunnel_encap *ipencap)
 {
@@ -2043,9 +2036,9 @@
 			     struct nlattr *data[],
 			     struct netlink_ext_ack *extack)
 {
-	struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id);
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
 	struct __ip6_tnl_parm p;
-	struct ip6_tnl *t;
 
 	t = ip6gre_changelink_common(dev, tb, data, &p, extack);
 	if (IS_ERR(t))
@@ -2114,12 +2107,31 @@
 {
 	struct ip6_tnl *t = netdev_priv(dev);
 	struct __ip6_tnl_parm *p = &t->parms;
+	__be16 o_flags = p->o_flags;
+
+	if (p->erspan_ver == 1 || p->erspan_ver == 2) {
+		if (!p->collect_md)
+			o_flags |= TUNNEL_KEY;
+
+		if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver))
+			goto nla_put_failure;
+
+		if (p->erspan_ver == 1) {
+			if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
+				goto nla_put_failure;
+		} else {
+			if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, p->dir))
+				goto nla_put_failure;
+			if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, p->hwid))
+				goto nla_put_failure;
+		}
+	}
 
 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
 	    nla_put_be16(skb, IFLA_GRE_IFLAGS,
 			 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
 	    nla_put_be16(skb, IFLA_GRE_OFLAGS,
-			 gre_tnl_flags_to_gre_flags(p->o_flags)) ||
+			 gre_tnl_flags_to_gre_flags(o_flags)) ||
 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
 	    nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) ||
@@ -2128,8 +2140,7 @@
 	    nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
 	    nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
 	    nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags) ||
-	    nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark) ||
-	    nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
+	    nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark))
 		goto nla_put_failure;
 
 	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
@@ -2147,19 +2158,6 @@
 			goto nla_put_failure;
 	}
 
-	if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver))
-		goto nla_put_failure;
-
-	if (p->erspan_ver == 1) {
-		if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
-			goto nla_put_failure;
-	} else if (p->erspan_ver == 2) {
-		if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, p->dir))
-			goto nla_put_failure;
-		if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, p->hwid))
-			goto nla_put_failure;
-	}
-
 	return 0;
 
 nla_put_failure:
@@ -2194,6 +2192,7 @@
 {
 	ether_setup(dev);
 
+	dev->max_mtu = 0;
 	dev->netdev_ops = &ip6erspan_netdev_ops;
 	dev->needs_free_netdev = true;
 	dev->priv_destructor = ip6gre_dev_free;
@@ -2214,6 +2213,7 @@
 	int err;
 
 	ip6gre_netlink_parms(data, &nt->parms);
+	ip6erspan_set_version(data, &nt->parms);
 	ign = net_generic(net, ip6gre_net_id);
 
 	if (nt->parms.collect_md) {
@@ -2259,6 +2259,7 @@
 	if (IS_ERR(t))
 		return PTR_ERR(t);
 
+	ip6erspan_set_version(data, &p);
 	ip6gre_tunnel_unlink_md(ign, t);
 	ip6gre_tunnel_unlink(ign, t);
 	ip6erspan_tnl_change(t, &p, !tb[IFLA_MTU]);

diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 6b74523..3d71c7d 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	IPv6 input
  *	Linux INET6 implementation
@@ -7,11 +8,6 @@
  *	Ian P. Morris		<I.P.Morris@soton.ac.uk>
  *
  *	Based in linux/net/ipv4/ip_input.c
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 /* Changes
  *
@@ -29,6 +25,7 @@
 #include <linux/icmpv6.h>
 #include <linux/mroute6.h>
 #include <linux/slab.h>
+#include <linux/indirect_call_wrapper.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
@@ -47,6 +44,8 @@
 #include <net/inet_ecn.h>
 #include <net/dst_metadata.h>
 
+INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *));
 static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
 				struct sk_buff *skb)
 {
@@ -57,7 +56,8 @@
 
 		ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
 		if (ipprot && (edemux = READ_ONCE(ipprot->early_demux)))
-			edemux(skb);
+			INDIRECT_CALL_2(edemux, tcp_v6_early_demux,
+					udp_v6_early_demux, skb);
 	}
 	if (!skb_valid_dst(skb))
 		ip6_route_input(skb);
@@ -80,8 +80,10 @@
 {
 	struct sk_buff *skb, *next;
 
-	list_for_each_entry_safe(skb, next, head, list)
+	list_for_each_entry_safe(skb, next, head, list) {
+		skb_list_del_init(skb);
 		dst_input(skb);
+	}
 }
 
 static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
@@ -178,7 +180,8 @@
 	 */
 	if ((ipv6_addr_loopback(&hdr->saddr) ||
 	     ipv6_addr_loopback(&hdr->daddr)) &&
-	     !(dev->flags & IFF_LOOPBACK))
+	    !(dev->flags & IFF_LOOPBACK) &&
+	    !netif_is_l3_master(dev))
 		goto err;
 
 	/* RFC4291 Errata ID: 3480
@@ -220,6 +223,16 @@
 	if (ipv6_addr_is_multicast(&hdr->saddr))
 		goto err;
 
+	/* While RFC4291 is not explicit about v4mapped addresses
+	 * in IPv6 headers, it seems clear linux dual-stack
+	 * model can not deal properly with these.
+	 * Security models could be fooled by ::ffff:127.0.0.1 for example.
+	 *
+	 * https://tools.ietf.org/html/draft-itojun-v6ops-v4mapped-harmful-02
+	 */
+	if (ipv6_addr_v4mapped(&hdr->saddr))
+		goto err;
+
 	skb->transport_header = skb->network_header + sizeof(*hdr);
 	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
 
@@ -315,31 +328,32 @@
 	ip6_sublist_rcv(&sublist, curr_dev, curr_net);
 }
 
+INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *));
+
 /*
  *	Deliver the packet to the host
  */
-
-
-static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr,
+			      bool have_final)
 {
 	const struct inet6_protocol *ipprot;
 	struct inet6_dev *idev;
 	unsigned int nhoff;
-	int nexthdr;
 	bool raw;
-	bool have_final = false;
 
 	/*
 	 *	Parse extension headers
 	 */
 
-	rcu_read_lock();
 resubmit:
 	idev = ip6_dst_idev(skb_dst(skb));
-	if (!pskb_pull(skb, skb_transport_offset(skb)))
-		goto discard;
 	nhoff = IP6CB(skb)->nhoff;
-	nexthdr = skb_network_header(skb)[nhoff];
+	if (!have_final) {
+		if (!pskb_pull(skb, skb_transport_offset(skb)))
+			goto discard;
+		nexthdr = skb_network_header(skb)[nhoff];
+	}
 
 resubmit_final:
 	raw = raw6_local_deliver(skb, nexthdr);
@@ -358,6 +372,8 @@
 			}
 		} else if (ipprot->flags & INET6_PROTO_FINAL) {
 			const struct ipv6hdr *hdr;
+			int sdif = inet6_sdif(skb);
+			struct net_device *dev;
 
 			/* Only do this once for first final protocol */
 			have_final = true;
@@ -365,14 +381,24 @@
 			/* Free reference early: we don't need it any more,
 			   and it may hold ip_conntrack module loaded
 			   indefinitely. */
-			nf_reset(skb);
+			nf_reset_ct(skb);
 
 			skb_postpull_rcsum(skb, skb_network_header(skb),
 					   skb_network_header_len(skb));
 			hdr = ipv6_hdr(skb);
+
+			/* skb->dev passed may be master dev for vrfs. */
+			if (sdif) {
+				dev = dev_get_by_index_rcu(net, sdif);
+				if (!dev)
+					goto discard;
+			} else {
+				dev = skb->dev;
+			}
+
 			if (ipv6_addr_is_multicast(&hdr->daddr) &&
-			    !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr,
-			    &hdr->saddr) &&
+			    !ipv6_chk_mcast_addr(dev, &hdr->daddr,
+						 &hdr->saddr) &&
 			    !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb)))
 				goto discard;
 		}
@@ -380,7 +406,8 @@
 		    !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
 			goto discard;
 
-		ret = ipprot->handler(skb);
+		ret = INDIRECT_CALL_2(ipprot->handler, tcp_v6_rcv, udpv6_rcv,
+				      skb);
 		if (ret > 0) {
 			if (ipprot->flags & INET6_PROTO_FINAL) {
 				/* Not an extension header, most likely UDP
@@ -410,13 +437,19 @@
 			consume_skb(skb);
 		}
 	}
-	rcu_read_unlock();
-	return 0;
+	return;
 
 discard:
 	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
-	rcu_read_unlock();
 	kfree_skb(skb);
+}
+
+static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+	rcu_read_lock();
+	ip6_protocol_deliver_rcu(net, skb, 0, false);
+	rcu_read_unlock();
+
 	return 0;
 }
 
@@ -431,15 +464,32 @@
 
 int ip6_mc_input(struct sk_buff *skb)
 {
+	int sdif = inet6_sdif(skb);
 	const struct ipv6hdr *hdr;
+	struct net_device *dev;
 	bool deliver;
 
 	__IP6_UPD_PO_STATS(dev_net(skb_dst(skb)->dev),
 			 __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INMCAST,
 			 skb->len);
 
+	/* skb->dev passed may be master dev for vrfs. */
+	if (sdif) {
+		rcu_read_lock();
+		dev = dev_get_by_index_rcu(dev_net(skb->dev), sdif);
+		if (!dev) {
+			rcu_read_unlock();
+			kfree_skb(skb);
+			return -ENODEV;
+		}
+	} else {
+		dev = skb->dev;
+	}
+
 	hdr = ipv6_hdr(skb);
-	deliver = ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL);
+	deliver = ipv6_chk_mcast_addr(dev, &hdr->daddr, NULL);
+	if (sdif)
+		rcu_read_unlock();
 
 #ifdef CONFIG_IPV6_MROUTE
 	/*

diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index c7e495f..7fbb447 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	IPV6 GSO/GRO offload support
  *	Linux INET6 implementation
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 #include <linux/kernel.h>
@@ -20,6 +16,23 @@
 
 #include "ip6_offload.h"
 
+/* All GRO functions are always builtin, except UDP over ipv6, which lays in
+ * ipv6 module, as it depends on UDPv6 lookup function, so we need special care
+ * when ipv6 is built as a module
+ */
+#if IS_BUILTIN(CONFIG_IPV6)
+#define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__)
+#else
+#define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_1(f, f2, __VA_ARGS__)
+#endif
+
+#define indirect_call_gro_receive_l4(f2, f1, cb, head, skb)	\
+({								\
+	unlikely(gro_recursion_inc_test(skb)) ?			\
+		NAPI_GRO_CB(skb)->flush |= 1, NULL :		\
+		INDIRECT_CALL_L4(cb, f2, f1, head, skb);	\
+})
+
 static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
 {
 	const struct net_offload *ops = NULL;
@@ -164,8 +177,12 @@
 	return len;
 }
 
-static struct sk_buff *ipv6_gro_receive(struct list_head *head,
-					struct sk_buff *skb)
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *,
+							   struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *,
+							   struct sk_buff *));
+INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
+							 struct sk_buff *skb)
 {
 	const struct net_offload *ops;
 	struct sk_buff *pp = NULL;
@@ -229,14 +246,21 @@
 		 * XXX skbs on the gro_list have all been parsed and pulled
 		 * already so we don't need to compare nlen
 		 * (nlen != (sizeof(*iph2) + ipv6_exthdrs_len(iph2, &ops)))
-		 * memcmp() alone below is suffcient, right?
+		 * memcmp() alone below is sufficient, right?
 		 */
 		 if ((first_word & htonl(0xF00FFFFF)) ||
-		    memcmp(&iph->nexthdr, &iph2->nexthdr,
-			   nlen - offsetof(struct ipv6hdr, nexthdr))) {
+		    !ipv6_addr_equal(&iph->saddr, &iph2->saddr) ||
+		    !ipv6_addr_equal(&iph->daddr, &iph2->daddr) ||
+		    *(u16 *)&iph->nexthdr != *(u16 *)&iph2->nexthdr) {
+not_same_flow:
 			NAPI_GRO_CB(p)->same_flow = 0;
 			continue;
 		}
+		if (unlikely(nlen > sizeof(struct ipv6hdr))) {
+			if (memcmp(iph + 1, iph2 + 1,
+				   nlen - sizeof(struct ipv6hdr)))
+				goto not_same_flow;
+		}
 		/* flush if Traffic Class fields are different */
 		NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
 		NAPI_GRO_CB(p)->flush |= flush;
@@ -253,7 +277,8 @@
 
 	skb_gro_postpull_rcsum(skb, iph, nlen);
 
-	pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+	pp = indirect_call_gro_receive_l4(tcp6_gro_receive, udp6_gro_receive,
+					 ops->callbacks.gro_receive, head, skb);
 
 out_unlock:
 	rcu_read_unlock();
@@ -294,7 +319,9 @@
 	return inet_gro_receive(head, skb);
 }
 
-static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
+INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *, int));
+INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int));
+INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
 {
 	const struct net_offload *ops;
 	struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
@@ -313,7 +340,8 @@
 	if (WARN_ON(!ops || !ops->callbacks.gro_complete))
 		goto out_unlock;
 
-	err = ops->callbacks.gro_complete(skb, nhoff);
+	err = INDIRECT_CALL_L4(ops->callbacks.gro_complete, tcp6_gro_complete,
+			       udp6_gro_complete, skb, nhoff);
 
 out_unlock:
 	rcu_read_unlock();
@@ -351,9 +379,36 @@
 	},
 };
 
+static struct sk_buff *sit_gso_segment(struct sk_buff *skb,
+				       netdev_features_t features)
+{
+	if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP4))
+		return ERR_PTR(-EINVAL);
+
+	return ipv6_gso_segment(skb, features);
+}
+
+static struct sk_buff *ip4ip6_gso_segment(struct sk_buff *skb,
+					  netdev_features_t features)
+{
+	if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP6))
+		return ERR_PTR(-EINVAL);
+
+	return inet_gso_segment(skb, features);
+}
+
+static struct sk_buff *ip6ip6_gso_segment(struct sk_buff *skb,
+					  netdev_features_t features)
+{
+	if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP6))
+		return ERR_PTR(-EINVAL);
+
+	return ipv6_gso_segment(skb, features);
+}
+
 static const struct net_offload sit_offload = {
 	.callbacks = {
-		.gso_segment	= ipv6_gso_segment,
+		.gso_segment	= sit_gso_segment,
 		.gro_receive    = sit_ip6ip6_gro_receive,
 		.gro_complete   = sit_gro_complete,
 	},
@@ -361,7 +416,7 @@
 
 static const struct net_offload ip4ip6_offload = {
 	.callbacks = {
-		.gso_segment	= inet_gso_segment,
+		.gso_segment	= ip4ip6_gso_segment,
 		.gro_receive    = ip4ip6_gro_receive,
 		.gro_complete   = ip4ip6_gro_complete,
 	},
@@ -369,7 +424,7 @@
 
 static const struct net_offload ip6ip6_offload = {
 	.callbacks = {
-		.gso_segment	= ipv6_gso_segment,
+		.gso_segment	= ip6ip6_gso_segment,
 		.gro_receive    = sit_ip6ip6_gro_receive,
 		.gro_complete   = ip6ip6_gro_complete,
 	},

diff --git a/net/ipv6/ip6_offload.h b/net/ipv6/ip6_offload.h
index 96b40e4..e768987 100644
--- a/net/ipv6/ip6_offload.h
+++ b/net/ipv6/ip6_offload.h

@@ -1,11 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  *	IPV6 GSO/GRO offload support
  *	Linux INET6 implementation
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 #ifndef __ip6_offload_h

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2694def..71827b5 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	IPv6 output functions
  *	Linux INET6 implementation
@@ -7,11 +8,6 @@
  *
  *	Based on linux/net/ipv4/ip_output.c
  *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
  *	Changes:
  *	A.N.Kuznetsov	:	airthmetics in fragmentation.
  *				extension headers are implemented.
@@ -63,8 +59,8 @@
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct net_device *dev = dst->dev;
+	const struct in6_addr *nexthop;
 	struct neighbour *neigh;
-	struct in6_addr *nexthop;
 	int ret;
 
 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
@@ -117,7 +113,7 @@
 		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
 	if (!IS_ERR(neigh)) {
 		sock_confirm_neigh(skb, neigh);
-		ret = neigh_output(neigh, skb);
+		ret = neigh_output(neigh, skb, false);
 		rcu_read_unlock_bh();
 		return ret;
 	}
@@ -128,16 +124,8 @@
 	return -EINVAL;
 }
 
-static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	int ret;
-
-	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
-	if (ret) {
-		kfree_skb(skb);
-		return ret;
-	}
-
 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
 	/* Policy lookup after SNAT yielded a new policy */
 	if (skb_dst(skb)->xfrm) {
@@ -154,6 +142,22 @@
 		return ip6_finish_output2(net, sk, skb);
 }
 
+static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+	int ret;
+
+	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
+	switch (ret) {
+	case NET_XMIT_SUCCESS:
+		return __ip6_finish_output(net, sk, skb);
+	case NET_XMIT_CN:
+		return __ip6_finish_output(net, sk, skb) ? : ret;
+	default:
+		kfree_skb(skb);
+		return ret;
+	}
+}
+
 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct net_device *dev = skb_dst(skb)->dev;
@@ -189,7 +193,7 @@
  * which are using proper atomic operations or spinlocks.
  */
 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
-	     __u32 mark, struct ipv6_txoptions *opt, int tclass)
+	     __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
 {
 	struct net *net = sock_net(sk);
 	const struct ipv6_pinfo *np = inet6_sk(sk);
@@ -254,7 +258,7 @@
 	hdr->daddr = *first_hop;
 
 	skb->protocol = htons(ETH_P_IPV6);
-	skb->priority = sk->sk_priority;
+	skb->priority = priority;
 	skb->mark = mark;
 
 	mtu = dst_mtu(dst);
@@ -300,6 +304,12 @@
 		if (sk && ra->sel == sel &&
 		    (!sk->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
+			struct ipv6_pinfo *np = inet6_sk(sk);
+
+			if (np && np->rtalert_isolate &&
+			    !net_eq(sock_net(sk), dev_net(skb->dev))) {
+				continue;
+			}
 			if (last) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 				if (skb2)
@@ -378,6 +388,14 @@
 	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 	__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 
+#ifdef CONFIG_NET_SWITCHDEV
+	if (skb->offload_l3_fwd_mark) {
+		consume_skb(skb);
+		return 0;
+	}
+#endif
+
+	skb->tstamp = 0;
 	return dst_output(net, sk, skb);
 }
 
@@ -574,9 +592,173 @@
 	to->tc_index = from->tc_index;
 #endif
 	nf_copy(to, from);
+	skb_ext_copy(to, from);
 	skb_copy_secmark(to, from);
 }
 
+int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
+		      u8 nexthdr, __be32 frag_id,
+		      struct ip6_fraglist_iter *iter)
+{
+	unsigned int first_len;
+	struct frag_hdr *fh;
+
+	/* BUILD HEADER */
+	*prevhdr = NEXTHDR_FRAGMENT;
+	iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
+	if (!iter->tmp_hdr)
+		return -ENOMEM;
+
+	iter->frag = skb_shinfo(skb)->frag_list;
+	skb_frag_list_init(skb);
+
+	iter->offset = 0;
+	iter->hlen = hlen;
+	iter->frag_id = frag_id;
+	iter->nexthdr = nexthdr;
+
+	__skb_pull(skb, hlen);
+	fh = __skb_push(skb, sizeof(struct frag_hdr));
+	__skb_push(skb, hlen);
+	skb_reset_network_header(skb);
+	memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
+
+	fh->nexthdr = nexthdr;
+	fh->reserved = 0;
+	fh->frag_off = htons(IP6_MF);
+	fh->identification = frag_id;
+
+	first_len = skb_pagelen(skb);
+	skb->data_len = first_len - skb_headlen(skb);
+	skb->len = first_len;
+	ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
+
+	return 0;
+}
+EXPORT_SYMBOL(ip6_fraglist_init);
+
+void ip6_fraglist_prepare(struct sk_buff *skb,
+			  struct ip6_fraglist_iter *iter)
+{
+	struct sk_buff *frag = iter->frag;
+	unsigned int hlen = iter->hlen;
+	struct frag_hdr *fh;
+
+	frag->ip_summed = CHECKSUM_NONE;
+	skb_reset_transport_header(frag);
+	fh = __skb_push(frag, sizeof(struct frag_hdr));
+	__skb_push(frag, hlen);
+	skb_reset_network_header(frag);
+	memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
+	iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
+	fh->nexthdr = iter->nexthdr;
+	fh->reserved = 0;
+	fh->frag_off = htons(iter->offset);
+	if (frag->next)
+		fh->frag_off |= htons(IP6_MF);
+	fh->identification = iter->frag_id;
+	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+	ip6_copy_metadata(frag, skb);
+}
+EXPORT_SYMBOL(ip6_fraglist_prepare);
+
+void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
+		   unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
+		   u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
+{
+	state->prevhdr = prevhdr;
+	state->nexthdr = nexthdr;
+	state->frag_id = frag_id;
+
+	state->hlen = hlen;
+	state->mtu = mtu;
+
+	state->left = skb->len - hlen;	/* Space per frame */
+	state->ptr = hlen;		/* Where to start from */
+
+	state->hroom = hdr_room;
+	state->troom = needed_tailroom;
+
+	state->offset = 0;
+}
+EXPORT_SYMBOL(ip6_frag_init);
+
+struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
+{
+	u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
+	struct sk_buff *frag;
+	struct frag_hdr *fh;
+	unsigned int len;
+
+	len = state->left;
+	/* IF: it doesn't fit, use 'mtu' - the data space left */
+	if (len > state->mtu)
+		len = state->mtu;
+	/* IF: we are not sending up to and including the packet end
+	   then align the next start on an eight byte boundary */
+	if (len < state->left)
+		len &= ~7;
+
+	/* Allocate buffer */
+	frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
+			 state->hroom + state->troom, GFP_ATOMIC);
+	if (!frag)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 *	Set up data on packet
+	 */
+
+	ip6_copy_metadata(frag, skb);
+	skb_reserve(frag, state->hroom);
+	skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
+	skb_reset_network_header(frag);
+	fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
+	frag->transport_header = (frag->network_header + state->hlen +
+				  sizeof(struct frag_hdr));
+
+	/*
+	 *	Charge the memory for the fragment to any owner
+	 *	it might possess
+	 */
+	if (skb->sk)
+		skb_set_owner_w(frag, skb->sk);
+
+	/*
+	 *	Copy the packet header into the new buffer.
+	 */
+	skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
+
+	fragnexthdr_offset = skb_network_header(frag);
+	fragnexthdr_offset += prevhdr - skb_network_header(skb);
+	*fragnexthdr_offset = NEXTHDR_FRAGMENT;
+
+	/*
+	 *	Build fragment header.
+	 */
+	fh->nexthdr = state->nexthdr;
+	fh->reserved = 0;
+	fh->identification = state->frag_id;
+
+	/*
+	 *	Copy a block of the IP datagram.
+	 */
+	BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
+			     len));
+	state->left -= len;
+
+	fh->frag_off = htons(state->offset);
+	if (state->left > 0)
+		fh->frag_off |= htons(IP6_MF);
+	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+
+	state->ptr += len;
+	state->offset += len;
+
+	return frag;
+}
+EXPORT_SYMBOL(ip6_frag_next);
+
 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 		 int (*output)(struct net *, struct sock *, struct sk_buff *))
 {
@@ -584,12 +766,11 @@
 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
 				inet6_sk(skb->sk) : NULL;
-	struct ipv6hdr *tmp_hdr;
-	struct frag_hdr *fh;
-	unsigned int mtu, hlen, left, len;
-	int hroom, troom;
+	struct ip6_frag_state state;
+	unsigned int mtu, hlen, nexthdr_offset;
+	ktime_t tstamp = skb->tstamp;
+	int hroom, err = 0;
 	__be32 frag_id;
-	int ptr, offset = 0, err = 0;
 	u8 *prevhdr, nexthdr = 0;
 
 	err = ip6_find_1stfragopt(skb, &prevhdr);
@@ -597,6 +778,7 @@
 		goto fail;
 	hlen = err;
 	nexthdr = *prevhdr;
+	nexthdr_offset = prevhdr - skb_network_header(skb);
 
 	mtu = ip6_skb_dst_mtu(skb);
 
@@ -631,9 +813,11 @@
 	    (err = skb_checksum_help(skb)))
 		goto fail;
 
+	prevhdr = skb_network_header(skb) + nexthdr_offset;
 	hroom = LL_RESERVED_SPACE(rt->dst.dev);
 	if (skb_has_frag_list(skb)) {
 		unsigned int first_len = skb_pagelen(skb);
+		struct ip6_fraglist_iter iter;
 		struct sk_buff *frag2;
 
 		if (first_len - hlen > mtu ||
@@ -661,74 +845,30 @@
 			skb->truesize -= frag->truesize;
 		}
 
-		err = 0;
-		offset = 0;
-		/* BUILD HEADER */
-
-		*prevhdr = NEXTHDR_FRAGMENT;
-		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
-		if (!tmp_hdr) {
-			err = -ENOMEM;
+		err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
+					&iter);
+		if (err < 0)
 			goto fail;
-		}
-		frag = skb_shinfo(skb)->frag_list;
-		skb_frag_list_init(skb);
-
-		__skb_pull(skb, hlen);
-		fh = __skb_push(skb, sizeof(struct frag_hdr));
-		__skb_push(skb, hlen);
-		skb_reset_network_header(skb);
-		memcpy(skb_network_header(skb), tmp_hdr, hlen);
-
-		fh->nexthdr = nexthdr;
-		fh->reserved = 0;
-		fh->frag_off = htons(IP6_MF);
-		fh->identification = frag_id;
-
-		first_len = skb_pagelen(skb);
-		skb->data_len = first_len - skb_headlen(skb);
-		skb->len = first_len;
-		ipv6_hdr(skb)->payload_len = htons(first_len -
-						   sizeof(struct ipv6hdr));
 
 		for (;;) {
 			/* Prepare header of the next frame,
 			 * before previous one went down. */
-			if (frag) {
-				frag->ip_summed = CHECKSUM_NONE;
-				skb_reset_transport_header(frag);
-				fh = __skb_push(frag, sizeof(struct frag_hdr));
-				__skb_push(frag, hlen);
-				skb_reset_network_header(frag);
-				memcpy(skb_network_header(frag), tmp_hdr,
-				       hlen);
-				offset += skb->len - hlen - sizeof(struct frag_hdr);
-				fh->nexthdr = nexthdr;
-				fh->reserved = 0;
-				fh->frag_off = htons(offset);
-				if (frag->next)
-					fh->frag_off |= htons(IP6_MF);
-				fh->identification = frag_id;
-				ipv6_hdr(frag)->payload_len =
-						htons(frag->len -
-						      sizeof(struct ipv6hdr));
-				ip6_copy_metadata(frag, skb);
-			}
+			if (iter.frag)
+				ip6_fraglist_prepare(skb, &iter);
 
+			skb->tstamp = tstamp;
 			err = output(net, sk, skb);
 			if (!err)
 				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 					      IPSTATS_MIB_FRAGCREATES);
 
-			if (err || !frag)
+			if (err || !iter.frag)
 				break;
 
-			skb = frag;
-			frag = skb->next;
-			skb->next = NULL;
+			skb = ip6_fraglist_next(&iter);
 		}
 
-		kfree(tmp_hdr);
+		kfree(iter.tmp_hdr);
 
 		if (err == 0) {
 			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
@@ -736,7 +876,7 @@
 			return 0;
 		}
 
-		kfree_skb_list(frag);
+		kfree_skb_list(iter.frag);
 
 		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 			      IPSTATS_MIB_FRAGFAILS);
@@ -753,93 +893,29 @@
 	}
 
 slow_path:
-	left = skb->len - hlen;		/* Space per frame */
-	ptr = hlen;			/* Where to start from */
-
 	/*
 	 *	Fragment the datagram.
 	 */
 
-	troom = rt->dst.dev->needed_tailroom;
+	ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
+		      LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
+		      &state);
 
 	/*
 	 *	Keep copying data until we run out.
 	 */
-	while (left > 0)	{
-		u8 *fragnexthdr_offset;
 
-		len = left;
-		/* IF: it doesn't fit, use 'mtu' - the data space left */
-		if (len > mtu)
-			len = mtu;
-		/* IF: we are not sending up to and including the packet end
-		   then align the next start on an eight byte boundary */
-		if (len < left)	{
-			len &= ~7;
-		}
-
-		/* Allocate buffer */
-		frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
-				 hroom + troom, GFP_ATOMIC);
-		if (!frag) {
-			err = -ENOMEM;
+	while (state.left > 0) {
+		frag = ip6_frag_next(skb, &state);
+		if (IS_ERR(frag)) {
+			err = PTR_ERR(frag);
 			goto fail;
 		}
 
 		/*
-		 *	Set up data on packet
-		 */
-
-		ip6_copy_metadata(frag, skb);
-		skb_reserve(frag, hroom);
-		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
-		skb_reset_network_header(frag);
-		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
-		frag->transport_header = (frag->network_header + hlen +
-					  sizeof(struct frag_hdr));
-
-		/*
-		 *	Charge the memory for the fragment to any owner
-		 *	it might possess
-		 */
-		if (skb->sk)
-			skb_set_owner_w(frag, skb->sk);
-
-		/*
-		 *	Copy the packet header into the new buffer.
-		 */
-		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
-
-		fragnexthdr_offset = skb_network_header(frag);
-		fragnexthdr_offset += prevhdr - skb_network_header(skb);
-		*fragnexthdr_offset = NEXTHDR_FRAGMENT;
-
-		/*
-		 *	Build fragment header.
-		 */
-		fh->nexthdr = nexthdr;
-		fh->reserved = 0;
-		fh->identification = frag_id;
-
-		/*
-		 *	Copy a block of the IP datagram.
-		 */
-		BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
-				     len));
-		left -= len;
-
-		fh->frag_off = htons(offset);
-		if (left > 0)
-			fh->frag_off |= htons(IP6_MF);
-		ipv6_hdr(frag)->payload_len = htons(frag->len -
-						    sizeof(struct ipv6hdr));
-
-		ptr += len;
-		offset += len;
-
-		/*
 		 *	Put this fragment into the sending queue.
 		 */
+		frag->tstamp = tstamp;
 		err = output(net, sk, frag);
 		if (err)
 			goto fail;
@@ -1221,6 +1297,7 @@
 	cork->base.fragsize = mtu;
 	cork->base.gso_size = ipc6->gso_size;
 	cork->base.tx_flags = 0;
+	cork->base.mark = ipc6->sockc.mark;
 	sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
 
 	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
@@ -1245,6 +1322,7 @@
 {
 	struct sk_buff *skb, *skb_prev = NULL;
 	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
+	struct ubuf_info *uarg = NULL;
 	int exthdrlen = 0;
 	int dst_exthdrlen = 0;
 	int hh_len;
@@ -1257,7 +1335,7 @@
 	int csummode = CHECKSUM_NONE;
 	unsigned int maxnonfragsize, headersize;
 	unsigned int wmem_alloc_delta = 0;
-	bool paged;
+	bool paged, extra_uref = false;
 
 	skb = skb_peek_tail(queue);
 	if (!skb) {
@@ -1322,6 +1400,20 @@
 	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
 		csummode = CHECKSUM_PARTIAL;
 
+	if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
+		uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
+		if (!uarg)
+			return -ENOBUFS;
+		extra_uref = !skb_zcopy(skb);	/* only ref on new uarg */
+		if (rt->dst.dev->features & NETIF_F_SG &&
+		    csummode == CHECKSUM_PARTIAL) {
+			paged = true;
+		} else {
+			uarg->zerocopy = 0;
+			skb_zcopy_set(skb, uarg, &extra_uref);
+		}
+	}
+
 	/*
 	 * Let's try using as much space as possible.
 	 * Use MTU if total length of the message fits into the MTU.
@@ -1354,7 +1446,7 @@
 			unsigned int fraglen;
 			unsigned int fraggap;
 			unsigned int alloclen;
-			unsigned int pagedlen = 0;
+			unsigned int pagedlen;
 alloc_new_skb:
 			/* There's no room in the current skb */
 			if (skb)
@@ -1378,6 +1470,7 @@
 			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
 				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
 			fraglen = datalen + fragheaderlen;
+			pagedlen = 0;
 
 			if ((flags & MSG_MORE) &&
 			    !(rt->dst.dev->features&NETIF_F_SG))
@@ -1439,12 +1532,6 @@
 			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
 				    dst_exthdrlen);
 
-			/* Only the initial fragment is time stamped */
-			skb_shinfo(skb)->tx_flags = cork->tx_flags;
-			cork->tx_flags = 0;
-			skb_shinfo(skb)->tskey = tskey;
-			tskey = 0;
-
 			/*
 			 *	Find where to start putting bytes
 			 */
@@ -1476,6 +1563,13 @@
 			exthdrlen = 0;
 			dst_exthdrlen = 0;
 
+			/* Only the initial fragment is time stamped */
+			skb_shinfo(skb)->tx_flags = cork->tx_flags;
+			cork->tx_flags = 0;
+			skb_shinfo(skb)->tskey = tskey;
+			tskey = 0;
+			skb_zcopy_set(skb, uarg, &extra_uref);
+
 			if ((flags & MSG_CONFIRM) && !skb_prev)
 				skb_set_dst_pending_confirm(skb, 1);
 
@@ -1505,7 +1599,7 @@
 				err = -EFAULT;
 				goto error;
 			}
-		} else {
+		} else if (!uarg || !uarg->zerocopy) {
 			int i = skb_shinfo(skb)->nr_frags;
 
 			err = -ENOMEM;
@@ -1535,6 +1629,10 @@
 			skb->data_len += copy;
 			skb->truesize += copy;
 			wmem_alloc_delta += copy;
+		} else {
+			err = skb_zerocopy_iter_dgram(skb, from, copy);
+			if (err < 0)
+				goto error;
 		}
 		offset += copy;
 		length -= copy;
@@ -1547,6 +1645,8 @@
 error_efault:
 	err = -EFAULT;
 error:
+	if (uarg)
+		sock_zerocopy_put_abort(uarg, extra_uref);
 	cork->length -= length;
 	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
 	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
@@ -1668,7 +1768,7 @@
 	hdr->daddr = *final_dst;
 
 	skb->priority = sk->sk_priority;
-	skb->mark = sk->sk_mark;
+	skb->mark = cork->base.mark;
 
 	skb->tstamp = cork->base.transmit_time;
 

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a9d06d4..754a484 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	IPv6 tunneling device
  *	Linux INET6 implementation
@@ -10,12 +11,6 @@
  *      linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
  *
  *      RFC 2473
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -421,7 +416,7 @@
 				break;
 			optlen = 8;
 		} else if (nexthdr == NEXTHDR_AUTH) {
-			optlen = (hdr->hdrlen + 2) << 2;
+			optlen = ipv6_authlen(hdr);
 		} else {
 			optlen = ipv6_optlen(hdr);
 		}
@@ -627,7 +622,7 @@
 		rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
 					   eiph->daddr, eiph->saddr, 0, 0,
 					   IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
-		if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) {
+		if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL6) {
 			if (!IS_ERR(rt))
 				ip_rt_put(rt);
 			goto out;
@@ -636,7 +631,7 @@
 	} else {
 		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
 				   skb2->dev) ||
-		    skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
+		    skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6)
 			goto out;
 	}
 
@@ -901,6 +896,7 @@
 			goto drop;
 		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
 			goto drop;
+		ipv6h = ipv6_hdr(skb);
 		if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr))
 			goto drop;
 		if (iptunnel_pull_header(skb, 0, tpi->proto, false))
@@ -1242,10 +1238,6 @@
 	u8 tproto;
 	int err;
 
-	/* ensure we can access the full inner ip header */
-	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
-		return -1;
-
 	iph = ip_hdr(skb);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 
@@ -1286,12 +1278,11 @@
 	}
 
 	fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+	dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph));
 
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
 		return -1;
 
-	dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph));
-
 	skb_set_inner_ipproto(skb, IPPROTO_IPIP);
 
 	err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
@@ -1320,9 +1311,6 @@
 	u8 tproto;
 	int err;
 
-	if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
-		return -1;
-
 	ipv6h = ipv6_hdr(skb);
 	tproto = READ_ONCE(t->parms.proto);
 	if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
@@ -1378,12 +1366,11 @@
 	}
 
 	fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+	dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h));
 
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
 		return -1;
 
-	dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h));
-
 	skb_set_inner_ipproto(skb, IPPROTO_IPV6);
 
 	err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
@@ -1404,6 +1391,9 @@
 	struct net_device_stats *stats = &t->dev->stats;
 	int ret;
 
+	if (!pskb_inet_may_pull(skb))
+		goto tx_err;
+
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
 		ret = ip4ip6_tnl_xmit(skb, dev);

diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index b283f29..58956a6 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/socket.h>
@@ -15,7 +16,7 @@
 int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
 		     struct socket **sockp)
 {
-	struct sockaddr_in6 udp6_addr;
+	struct sockaddr_in6 udp6_addr = {};
 	int err;
 	struct socket *sock = NULL;
 
@@ -31,6 +32,13 @@
 		if (err < 0)
 			goto error;
 	}
+	if (cfg->bind_ifindex) {
+		err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX,
+					(void *)&cfg->bind_ifindex,
+					sizeof(cfg->bind_ifindex));
+		if (err < 0)
+			goto error;
+	}
 
 	udp6_addr.sin6_family = AF_INET6;
 	memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
@@ -42,6 +50,7 @@
 		goto error;
 
 	if (cfg->peer_udp_port) {
+		memset(&udp6_addr, 0, sizeof(udp6_addr));
 		udp6_addr.sin6_family = AF_INET6;
 		memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
 		       sizeof(udp6_addr.sin6_addr));

diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index eeaf745..024db17 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	IPv6 virtual tunneling interface
  *
@@ -8,11 +9,6 @@
  *
  *	Based on:
  *	net/ipv6/ip6_tunnel.c
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -318,6 +314,7 @@
 			return 0;
 		}
 
+		ipv6h = ipv6_hdr(skb);
 		if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) {
 			t->dev->stats.rx_dropped++;
 			rcu_read_unlock();
@@ -341,7 +338,7 @@
 	struct net_device *dev;
 	struct pcpu_sw_netstats *tstats;
 	struct xfrm_state *x;
-	struct xfrm_mode *inner_mode;
+	const struct xfrm_mode *inner_mode;
 	struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6;
 	u32 orig_mark = skb->mark;
 	int ret;
@@ -360,7 +357,7 @@
 
 	x = xfrm_input_state(skb);
 
-	inner_mode = x->inner_mode;
+	inner_mode = &x->inner_mode;
 
 	if (x->sel.family == AF_UNSPEC) {
 		inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
@@ -371,7 +368,7 @@
 		}
 	}
 
-	family = inner_mode->afinfo->family;
+	family = inner_mode->family;
 
 	skb->mark = be32_to_cpu(t->parms.i_key);
 	ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
@@ -521,18 +518,18 @@
 {
 	struct ip6_tnl *t = netdev_priv(dev);
 	struct net_device_stats *stats = &t->dev->stats;
-	struct ipv6hdr *ipv6h;
 	struct flowi fl;
 	int ret;
 
+	if (!pskb_inet_may_pull(skb))
+		goto tx_err;
+
 	memset(&fl, 0, sizeof(fl));
 
 	switch (skb->protocol) {
 	case htons(ETH_P_IPV6):
-		ipv6h = ipv6_hdr(skb);
-
 		if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
-		    vti6_addr_conflict(t, ipv6h))
+		    vti6_addr_conflict(t, ipv6_hdr(skb)))
 			goto tx_err;
 
 		xfrm_decode_session(skb, &fl, AF_INET6);

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index d0b7e02..857a89a 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Linux IPv6 multicast routing support for BSD pim6sd
  *	Based on net/ipv4/ipmr.c.
@@ -8,12 +9,6 @@
  *		6WIND, Paris, France
  *	Copyright (C)2007,2008 USAGI/WIDE Project
  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
  */
 
 #include <linux/uaccess.h>
@@ -51,6 +46,9 @@
 #include <linux/export.h>
 #include <net/ip6_checksum.h>
 #include <linux/netconf.h>
+#include <net/ip_tunnels.h>
+
+#include <linux/nospec.h>
 
 struct ip6mr_rule {
 	struct fib_rule		common;
@@ -85,7 +83,8 @@
 static void ip6mr_free_table(struct mr_table *mrt);
 
 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
-			   struct sk_buff *skb, struct mfc6_cache *cache);
+			   struct net_device *dev, struct sk_buff *skb,
+			   struct mfc6_cache *cache);
 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
 			      mifi_t mifi, int assert);
 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
@@ -93,7 +92,7 @@
 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
 			       struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr_table *mrt, bool all);
+static void mroute_clean_tables(struct mr_table *mrt, int flags);
 static void ipmr_expire_process(struct timer_list *t);
 
 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
@@ -138,6 +137,9 @@
 		.flags = FIB_LOOKUP_NOREF,
 	};
 
+	/* update flow if oif or iif point to device enslaved to l3mdev */
+	l3mdev_update_flow(net, flowi6_to_flowi(flp6));
+
 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
 			       flowi6_to_flowi(flp6), 0, &arg);
 	if (err < 0)
@@ -164,7 +166,9 @@
 		return -EINVAL;
 	}
 
-	mrt = ip6mr_get_table(rule->fr_net, rule->table);
+	arg->table = fib_rule_get_table(rule, arg);
+
+	mrt = ip6mr_get_table(rule->fr_net, arg->table);
 	if (!mrt)
 		return -EAGAIN;
 	res->mrt = mrt;
@@ -346,7 +350,6 @@
 	.key_offset = offsetof(struct mfc6_cache, cmparg),
 	.key_len = sizeof(struct mfc6_cache_cmp_arg),
 	.nelem_hint = 3,
-	.locks_mul = 1,
 	.obj_cmpfn = ip6mr_hash_cmp,
 	.automatic_shrinking = true,
 };
@@ -384,7 +387,8 @@
 static void ip6mr_free_table(struct mr_table *mrt)
 {
 	del_timer_sync(&mrt->ipmr_expire_timer);
-	mroute_clean_tables(mrt, true);
+	mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
+				 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
 	rhltable_destroy(&mrt->mfc_hash);
 	kfree(mrt);
 }
@@ -591,13 +595,12 @@
 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
 		.flowi6_mark	= skb->mark,
 	};
-	int err;
 
-	err = ip6mr_fib_lookup(net, &fl6, &mrt);
-	if (err < 0) {
-		kfree_skb(skb);
-		return err;
-	}
+	if (!pskb_inet_may_pull(skb))
+		goto tx_err;
+
+	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
+		goto tx_err;
 
 	read_lock(&mrt_lock);
 	dev->stats.tx_bytes += skb->len;
@@ -606,6 +609,11 @@
 	read_unlock(&mrt_lock);
 	kfree_skb(skb);
 	return NETDEV_TX_OK;
+
+tx_err:
+	dev->stats.tx_errors++;
+	kfree_skb(skb);
+	return NETDEV_TX_OK;
 }
 
 static int reg_vif_get_iflink(const struct net_device *dev)
@@ -649,7 +657,7 @@
 		return NULL;
 	}
 
-	if (dev_open(dev))
+	if (dev_open(dev, NULL))
 		goto failure;
 
 	dev_hold(dev);
@@ -1014,7 +1022,7 @@
 			}
 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 		} else
-			ip6_mr_forward(net, mrt, skb, c);
+			ip6_mr_forward(net, mrt, skb->dev, skb, c);
 	}
 }
 
@@ -1120,7 +1128,7 @@
 
 /* Queue a packet for resolution. It gets locked cache entry! */
 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
-				  struct sk_buff *skb)
+				  struct sk_buff *skb, struct net_device *dev)
 {
 	struct mfc6_cache *c;
 	bool found = false;
@@ -1140,8 +1148,8 @@
 		 *	Create a new entry if allowable
 		 */
 
-		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
-		    (c = ip6mr_cache_alloc_unres()) == NULL) {
+		c = ip6mr_cache_alloc_unres();
+		if (!c) {
 			spin_unlock_bh(&mfc_unres_lock);
 
 			kfree_skb(skb);
@@ -1180,6 +1188,10 @@
 		kfree_skb(skb);
 		err = -ENOBUFS;
 	} else {
+		if (dev) {
+			skb->dev = dev;
+			skb->skb_iif = dev->ifindex;
+		}
 		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
 		err = 0;
 	}
@@ -1479,43 +1491,51 @@
  *	Close the multicast socket, and clear the vif tables etc
  */
 
-static void mroute_clean_tables(struct mr_table *mrt, bool all)
+static void mroute_clean_tables(struct mr_table *mrt, int flags)
 {
 	struct mr_mfc *c, *tmp;
 	LIST_HEAD(list);
 	int i;
 
 	/* Shut down all active vif entries */
-	for (i = 0; i < mrt->maxvif; i++) {
-		if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
-			continue;
-		mif6_delete(mrt, i, 0, &list);
+	if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) {
+		for (i = 0; i < mrt->maxvif; i++) {
+			if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
+			     !(flags & MRT6_FLUSH_MIFS_STATIC)) ||
+			    (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS)))
+				continue;
+			mif6_delete(mrt, i, 0, &list);
+		}
+		unregister_netdevice_many(&list);
 	}
-	unregister_netdevice_many(&list);
 
 	/* Wipe the cache */
-	list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
-		if (!all && (c->mfc_flags & MFC_STATIC))
-			continue;
-		rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
-		list_del_rcu(&c->list);
-		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
-		mr_cache_put(c);
-	}
-
-	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
-		spin_lock_bh(&mfc_unres_lock);
-		list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
-			list_del(&c->list);
+	if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) {
+		list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
+			if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) ||
+			    (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC)))
+				continue;
+			rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
+			list_del_rcu(&c->list);
 			call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
 						       FIB_EVENT_ENTRY_DEL,
-						       (struct mfc6_cache *)c,
-						       mrt->id);
-			mr6_netlink_event(mrt, (struct mfc6_cache *)c,
-					  RTM_DELROUTE);
-			ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
+						       (struct mfc6_cache *)c, mrt->id);
+			mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+			mr_cache_put(c);
 		}
-		spin_unlock_bh(&mfc_unres_lock);
+	}
+
+	if (flags & MRT6_FLUSH_MFC) {
+		if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
+			spin_lock_bh(&mfc_unres_lock);
+			list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
+				list_del(&c->list);
+				mr6_netlink_event(mrt, (struct mfc6_cache *)c,
+						  RTM_DELROUTE);
+				ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
+			}
+			spin_unlock_bh(&mfc_unres_lock);
+		}
 	}
 }
 
@@ -1571,7 +1591,7 @@
 						     NETCONFA_IFINDEX_ALL,
 						     net->ipv6.devconf_all);
 
-			mroute_clean_tables(mrt, false);
+			mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC);
 			err = 0;
 			break;
 		}
@@ -1687,6 +1707,20 @@
 		rtnl_unlock();
 		return ret;
 
+	case MRT6_FLUSH:
+	{
+		int flags;
+
+		if (optlen != sizeof(flags))
+			return -EINVAL;
+		if (get_user(flags, (int __user *)optval))
+			return -EFAULT;
+		rtnl_lock();
+		mroute_clean_tables(mrt, flags);
+		rtnl_unlock();
+		return 0;
+	}
+
 	/*
 	 *	Control PIM assert (to activate pim will activate assert)
 	 */
@@ -1831,6 +1865,7 @@
 			return -EFAULT;
 		if (vr.mifi >= mrt->maxvif)
 			return -EINVAL;
+		vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
 		read_lock(&mrt_lock);
 		vif = &mrt->vif_table[vr.mifi];
 		if (VIF_EXISTS(mrt, vr.mifi)) {
@@ -1905,6 +1940,7 @@
 			return -EFAULT;
 		if (vr.mifi >= mrt->maxvif)
 			return -EINVAL;
+		vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
 		read_lock(&mrt_lock);
 		vif = &mrt->vif_table[vr.mifi];
 		if (VIF_EXISTS(mrt, vr.mifi)) {
@@ -1946,10 +1982,10 @@
 
 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
-			IPSTATS_MIB_OUTFORWDATAGRAMS);
-	__IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
-			IPSTATS_MIB_OUTOCTETS, skb->len);
+	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+		      IPSTATS_MIB_OUTFORWDATAGRAMS);
+	IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
+		      IPSTATS_MIB_OUTOCTETS, skb->len);
 	return dst_output(net, sk, skb);
 }
 
@@ -1958,7 +1994,7 @@
  */
 
 static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
-			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
+			  struct sk_buff *skb, int vifi)
 {
 	struct ipv6hdr *ipv6h;
 	struct vif_device *vif = &mrt->vif_table[vifi];
@@ -2043,11 +2079,12 @@
 }
 
 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
-			   struct sk_buff *skb, struct mfc6_cache *c)
+			   struct net_device *dev, struct sk_buff *skb,
+			   struct mfc6_cache *c)
 {
 	int psend = -1;
 	int vif, ct;
-	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
+	int true_vifi = ip6mr_find_vif(mrt, dev);
 
 	vif = c->_c.mfc_parent;
 	c->_c.mfc_un.res.pkt++;
@@ -2073,7 +2110,7 @@
 	/*
 	 * Wrong interface: drop packet and (maybe) send PIM assert.
 	 */
-	if (mrt->vif_table[vif].dev != skb->dev) {
+	if (mrt->vif_table[vif].dev != dev) {
 		c->_c.mfc_un.res.wrong_if++;
 
 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
@@ -2123,15 +2160,14 @@
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 				if (skb2)
-					ip6mr_forward2(net, mrt, skb2,
-						       c, psend);
+					ip6mr_forward2(net, mrt, skb2, psend);
 			}
 			psend = ct;
 		}
 	}
 last_forward:
 	if (psend != -1) {
-		ip6mr_forward2(net, mrt, skb, c, psend);
+		ip6mr_forward2(net, mrt, skb, psend);
 		return;
 	}
 
@@ -2154,6 +2190,19 @@
 		.flowi6_mark	= skb->mark,
 	};
 	int err;
+	struct net_device *dev;
+
+	/* skb->dev passed in is the master dev for vrfs.
+	 * Get the proper interface that does have a vif associated with it.
+	 */
+	dev = skb->dev;
+	if (netif_is_l3_master(skb->dev)) {
+		dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
+		if (!dev) {
+			kfree_skb(skb);
+			return -ENODEV;
+		}
+	}
 
 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
 	if (err < 0) {
@@ -2165,7 +2214,7 @@
 	cache = ip6mr_cache_find(mrt,
 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
 	if (!cache) {
-		int vif = ip6mr_find_vif(mrt, skb->dev);
+		int vif = ip6mr_find_vif(mrt, dev);
 
 		if (vif >= 0)
 			cache = ip6mr_cache_find_any(mrt,
@@ -2179,9 +2228,9 @@
 	if (!cache) {
 		int vif;
 
-		vif = ip6mr_find_vif(mrt, skb->dev);
+		vif = ip6mr_find_vif(mrt, dev);
 		if (vif >= 0) {
-			int err = ip6mr_cache_unresolved(mrt, vif, skb);
+			int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
 			read_unlock(&mrt_lock);
 
 			return err;
@@ -2191,7 +2240,7 @@
 		return -ENODEV;
 	}
 
-	ip6_mr_forward(net, mrt, skb, cache);
+	ip6_mr_forward(net, mrt, dev, skb, cache);
 
 	read_unlock(&mrt_lock);
 
@@ -2257,7 +2306,7 @@
 		iph->saddr = rt->rt6i_src.addr;
 		iph->daddr = rt->rt6i_dst.addr;
 
-		err = ip6mr_cache_unresolved(mrt, vif, skb2);
+		err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
 		read_unlock(&mrt_lock);
 
 		return err;
@@ -2433,6 +2482,33 @@
 
 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	const struct nlmsghdr *nlh = cb->nlh;
+	struct fib_dump_filter filter = {};
+	int err;
+
+	if (cb->strict_check) {
+		err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
+					    &filter, cb);
+		if (err < 0)
+			return err;
+	}
+
+	if (filter.table_id) {
+		struct mr_table *mrt;
+
+		mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id);
+		if (!mrt) {
+			if (filter.dump_all_families)
+				return skb->len;
+
+			NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
+			return -ENOENT;
+		}
+		err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
+				    &mfc_unres_lock, &filter);
+		return skb->len ? : err;
+	}
+
 	return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
-				_ip6mr_fill_mroute, &mfc_unres_lock);
+				_ip6mr_fill_mroute, &mfc_unres_lock, &filter);
 }

diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 54d165b..3752bd3 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c

@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173
  *
  * Copyright (C)2003 USAGI/WIDE Project
  *
  * Author	Mitsuru KANDA  <mk@linux-ipv6.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 /*
  * [Memo]
@@ -218,8 +206,7 @@
 {
 	if (xfrm6_protocol_deregister(&ipcomp6_protocol, IPPROTO_COMP) < 0)
 		pr_info("%s: can't remove protocol\n", __func__);
-	if (xfrm_unregister_type(&ipcomp6_type, AF_INET6) < 0)
-		pr_info("%s: can't remove xfrm type\n", __func__);
+	xfrm_unregister_type(&ipcomp6_type, AF_INET6);
 }
 
 module_init(ipcomp6_init);

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index c0cac9c..79fc012 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	IPv6 BSD socket options interface
  *	Linux INET6 implementation
@@ -7,11 +8,6 @@
  *
  *	Based on linux/net/ipv4/ip_sockglue.c
  *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
  *	FIXME: Make the setsockopt code POSIX compliant: That is
  *
  *	o	Truncate getsockopt returns
@@ -68,6 +64,8 @@
 		return -ENOPROTOOPT;
 
 	new_ra = (sel >= 0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
+	if (sel >= 0 && !new_ra)
+		return -ENOMEM;
 
 	write_lock_bh(&ip6_ra_lock);
 	for (rap = &ip6_ra_chain; (ra = *rap) != NULL; rap = &ra->next) {
@@ -365,8 +363,8 @@
 		break;
 
 	case IPV6_TRANSPARENT:
-		if (valbool && !ns_capable(net->user_ns, CAP_NET_ADMIN) &&
-		    !ns_capable(net->user_ns, CAP_NET_RAW)) {
+		if (valbool && !ns_capable(net->user_ns, CAP_NET_RAW) &&
+		    !ns_capable(net->user_ns, CAP_NET_ADMIN)) {
 			retv = -EPERM;
 			break;
 		}
@@ -486,7 +484,7 @@
 				retv = -EFAULT;
 				break;
 		}
-		if (sk->sk_bound_dev_if && pkt.ipi6_ifindex != sk->sk_bound_dev_if)
+		if (!sk_dev_equal_l3scope(sk, pkt.ipi6_ifindex))
 			goto e_inval;
 
 		np->sticky_pktinfo.ipi6_ifindex = pkt.ipi6_ifindex;
@@ -674,6 +672,13 @@
 			retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr);
 		break;
 	}
+	case IPV6_MULTICAST_ALL:
+		if (optlen < sizeof(int))
+			goto e_inval;
+		np->mc_all = valbool;
+		retv = 0;
+		break;
+
 	case MCAST_JOIN_GROUP:
 	case MCAST_LEAVE_GROUP:
 	{
@@ -780,6 +785,12 @@
 			goto e_inval;
 		retv = ip6_ra_control(sk, val);
 		break;
+	case IPV6_ROUTER_ALERT_ISOLATE:
+		if (optlen < sizeof(int))
+			goto e_inval;
+		np->rtalert_isolate = valbool;
+		retv = 0;
+		break;
 	case IPV6_MTU_DISCOVER:
 		if (optlen < sizeof(int))
 			goto e_inval;
@@ -1266,6 +1277,10 @@
 		val = np->mcast_oif;
 		break;
 
+	case IPV6_MULTICAST_ALL:
+		val = np->mc_all;
+		break;
+
 	case IPV6_UNICAST_IF:
 		val = (__force int)htonl((__u32) np->ucast_oif);
 		break;
@@ -1347,6 +1362,10 @@
 		val = np->rxopt.bits.recvfragsize;
 		break;
 
+	case IPV6_ROUTER_ALERT_ISOLATE:
+		val = np->rtalert_isolate;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index dbab62e..eaa4c2c 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Multicast support for IPv6
  *	Linux INET6 implementation
@@ -6,11 +7,6 @@
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *
  *	Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 /* Changes:
@@ -636,7 +632,7 @@
 	}
 	if (!mc) {
 		rcu_read_unlock();
-		return true;
+		return np->mc_all;
 	}
 	read_lock(&mc->sflock);
 	psl = mc->sflist;
@@ -791,14 +787,15 @@
 	if (pmc) {
 		im->idev = pmc->idev;
 		if (im->mca_sfmode == MCAST_INCLUDE) {
-			im->mca_tomb = pmc->mca_tomb;
-			im->mca_sources = pmc->mca_sources;
+			swap(im->mca_tomb, pmc->mca_tomb);
+			swap(im->mca_sources, pmc->mca_sources);
 			for (psf = im->mca_sources; psf; psf = psf->sf_next)
 				psf->sf_crcount = idev->mc_qrv;
 		} else {
 			im->mca_crcount = idev->mc_qrv;
 		}
 		in6_dev_put(pmc->idev);
+		ip6_mc_clear_src(pmc);
 		kfree(pmc);
 	}
 	spin_unlock_bh(&im->mca_lock);
@@ -940,6 +937,7 @@
 {
 	return __ipv6_dev_mc_inc(dev, addr, MCAST_EXCLUDE);
 }
+EXPORT_SYMBOL(ipv6_dev_mc_inc);
 
 /*
  *	device multicast group del
@@ -987,6 +985,7 @@
 
 	return err;
 }
+EXPORT_SYMBOL(ipv6_dev_mc_dec);
 
 /*
  *	check if the interface/address pair is valid

diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c
index 9405b04..d3d6b6a 100644
--- a/net/ipv6/mcast_snoop.c
+++ b/net/ipv6/mcast_snoop.c

@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (C) 2010: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  * Copyright (C) 2015: Linus Lüssing <linus.luessing@c0d3.blue>
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- *
  * Based on the MLD support added to br_multicast.c by YOSHIFUJI Hideaki.
  */
 
@@ -41,6 +29,8 @@
 	if (skb->len < len || len <= offset)
 		return -EINVAL;
 
+	skb_set_transport_header(skb, offset);
+
 	return 0;
 }
 
@@ -77,27 +67,27 @@
 
 	len += sizeof(struct mld2_report);
 
-	return pskb_may_pull(skb, len) ? 0 : -EINVAL;
+	return ipv6_mc_may_pull(skb, len) ? 0 : -EINVAL;
 }
 
 static int ipv6_mc_check_mld_query(struct sk_buff *skb)
 {
+	unsigned int transport_len = ipv6_transport_len(skb);
 	struct mld_msg *mld;
-	unsigned int len = skb_transport_offset(skb);
+	unsigned int len;
 
 	/* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */
 	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL))
 		return -EINVAL;
 
-	len += sizeof(struct mld_msg);
-	if (skb->len < len)
-		return -EINVAL;
-
 	/* MLDv1? */
-	if (skb->len != len) {
+	if (transport_len != sizeof(struct mld_msg)) {
 		/* or MLDv2? */
-		len += sizeof(struct mld2_query) - sizeof(struct mld_msg);
-		if (skb->len < len || !pskb_may_pull(skb, len))
+		if (transport_len < sizeof(struct mld2_query))
+			return -EINVAL;
+
+		len = skb_transport_offset(skb) + sizeof(struct mld2_query);
+		if (!ipv6_mc_may_pull(skb, len))
 			return -EINVAL;
 	}
 
@@ -115,12 +105,17 @@
 
 static int ipv6_mc_check_mld_msg(struct sk_buff *skb)
 {
-	struct mld_msg *mld = (struct mld_msg *)skb_transport_header(skb);
+	unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg);
+	struct mld_msg *mld;
+
+	if (!ipv6_mc_may_pull(skb, len))
+		return -EINVAL;
+
+	mld = (struct mld_msg *)skb_transport_header(skb);
 
 	switch (mld->mld_type) {
 	case ICMPV6_MGM_REDUCTION:
 	case ICMPV6_MGM_REPORT:
-		/* fall through */
 		return 0;
 	case ICMPV6_MLD2_REPORT:
 		return ipv6_mc_check_mld_reportv2(skb);
@@ -136,49 +131,30 @@
 	return skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo);
 }
 
-static int __ipv6_mc_check_mld(struct sk_buff *skb,
-			       struct sk_buff **skb_trimmed)
-
+int ipv6_mc_check_icmpv6(struct sk_buff *skb)
 {
-	struct sk_buff *skb_chk = NULL;
-	unsigned int transport_len;
-	unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg);
-	int ret = -EINVAL;
+	unsigned int len = skb_transport_offset(skb) + sizeof(struct icmp6hdr);
+	unsigned int transport_len = ipv6_transport_len(skb);
+	struct sk_buff *skb_chk;
 
-	transport_len = ntohs(ipv6_hdr(skb)->payload_len);
-	transport_len -= skb_transport_offset(skb) - sizeof(struct ipv6hdr);
+	if (!ipv6_mc_may_pull(skb, len))
+		return -EINVAL;
 
 	skb_chk = skb_checksum_trimmed(skb, transport_len,
 				       ipv6_mc_validate_checksum);
 	if (!skb_chk)
-		goto err;
+		return -EINVAL;
 
-	if (!pskb_may_pull(skb_chk, len))
-		goto err;
-
-	ret = ipv6_mc_check_mld_msg(skb_chk);
-	if (ret)
-		goto err;
-
-	if (skb_trimmed)
-		*skb_trimmed = skb_chk;
-	/* free now unneeded clone */
-	else if (skb_chk != skb)
+	if (skb_chk != skb)
 		kfree_skb(skb_chk);
 
-	ret = 0;
-
-err:
-	if (ret && skb_chk && skb_chk != skb)
-		kfree_skb(skb_chk);
-
-	return ret;
+	return 0;
 }
+EXPORT_SYMBOL(ipv6_mc_check_icmpv6);
 
 /**
  * ipv6_mc_check_mld - checks whether this is a sane MLD packet
  * @skb: the skb to validate
- * @skb_trimmed: to store an skb pointer trimmed to IPv6 packet tail (optional)
  *
  * Checks whether an IPv6 packet is a valid MLD packet. If so sets
  * skb transport header accordingly and returns zero.
@@ -188,18 +164,10 @@
  * -ENOMSG: IP header validation succeeded but it is not an MLD packet.
  * -ENOMEM: A memory allocation failure happened.
  *
- * Optionally, an skb pointer might be provided via skb_trimmed (or set it
- * to NULL): After parsing an MLD packet successfully it will point to
- * an skb which has its tail aligned to the IP packet end. This might
- * either be the originally provided skb or a trimmed, cloned version if
- * the skb frame had data beyond the IP packet. A cloned skb allows us
- * to leave the original skb and its full frame unchanged (which might be
- * desirable for layer 2 frame jugglers).
- *
  * Caller needs to set the skb network header and free any returned skb if it
  * differs from the provided skb.
  */
-int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed)
+int ipv6_mc_check_mld(struct sk_buff *skb)
 {
 	int ret;
 
@@ -211,6 +179,10 @@
 	if (ret < 0)
 		return ret;
 
-	return __ipv6_mc_check_mld(skb, skb_trimmed);
+	ret = ipv6_mc_check_icmpv6(skb);
+	if (ret < 0)
+		return ret;
+
+	return ipv6_mc_check_mld_msg(skb);
 }
 EXPORT_SYMBOL(ipv6_mc_check_mld);

diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 64f0f7b..878fcec 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c

@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C)2003-2006 Helsinki University of Technology
  * Copyright (C)2003-2006 USAGI/WIDE Project
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 /*
  * Authors:
@@ -511,10 +499,8 @@
 {
 	if (rawv6_mh_filter_unregister(mip6_mh_filter) < 0)
 		pr_info("%s: can't remove rawv6 mh filter\n", __func__);
-	if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0)
-		pr_info("%s: can't remove xfrm type(rthdr)\n", __func__);
-	if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0)
-		pr_info("%s: can't remove xfrm type(destopt)\n", __func__);
+	xfrm_unregister_type(&mip6_rthdr_type, AF_INET6);
+	xfrm_unregister_type(&mip6_destopt_type, AF_INET6);
 }
 
 module_init(mip6_init);

diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 673a4a9..53caf59 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Neighbour Discovery for IPv6
  *	Linux INET6 implementation
@@ -5,11 +6,6 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *	Mike Shaver		<shaver@ingenia.com>
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 /*
@@ -77,6 +73,8 @@
 		      const struct net_device *dev,
 		      __u32 *hash_rnd);
 static bool ndisc_key_eq(const struct neighbour *neigh, const void *pkey);
+static bool ndisc_allow_add(const struct net_device *dev,
+			    struct netlink_ext_ack *extack);
 static int ndisc_constructor(struct neighbour *neigh);
 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
@@ -117,6 +115,7 @@
 	.pconstructor =	pndisc_constructor,
 	.pdestructor =	pndisc_destructor,
 	.proxy_redo =	pndisc_redo,
+	.allow_add  =   ndisc_allow_add,
 	.id =		"ndisc_cache",
 	.parms = {
 		.tbl			= &nd_tbl,
@@ -197,6 +196,7 @@
 {
 	return opt->nd_opt_type == ND_OPT_RDNSS ||
 		opt->nd_opt_type == ND_OPT_DNSSL ||
+		opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL ||
 		ndisc_ops_is_useropt(dev, opt->nd_opt_type);
 }
 
@@ -392,6 +392,20 @@
 	ipv6_dev_mc_dec(dev, &maddr);
 }
 
+/* called with rtnl held */
+static bool ndisc_allow_add(const struct net_device *dev,
+			    struct netlink_ext_ack *extack)
+{
+	struct inet6_dev *idev = __in6_dev_get(dev);
+
+	if (!idev || idev->cnf.disable_ipv6) {
+		NL_SET_ERR_MSG(extack, "IPv6 is disabled on this device");
+		return false;
+	}
+
+	return true;
+}
+
 static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
 				       int len)
 {
@@ -1272,12 +1286,11 @@
 	    !in6_dev->cnf.accept_ra_rtr_pref)
 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
 #endif
-
+	/* routes added from RAs do not use nexthop objects */
 	rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev);
-
 	if (rt) {
-		neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw,
-					 rt->fib6_nh.nh_dev, NULL,
+		neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
+					 rt->fib6_nh->fib_nh_dev, NULL,
 					  &ipv6_hdr(skb)->saddr);
 		if (!neigh) {
 			ND_PRINTK(0, err,
@@ -1306,8 +1319,8 @@
 			return;
 		}
 
-		neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw,
-					 rt->fib6_nh.nh_dev, NULL,
+		neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
+					 rt->fib6_nh->fib_nh_dev, NULL,
 					  &ipv6_hdr(skb)->saddr);
 		if (!neigh) {
 			ND_PRINTK(0, err,
@@ -1533,7 +1546,7 @@
 
 	if (!ndopts.nd_opts_rh) {
 		ip6_redirect_no_header(skb, dev_net(skb->dev),
-					skb->dev->ifindex, 0);
+					skb->dev->ifindex);
 		return;
 	}
 
@@ -1783,6 +1796,8 @@
 		change_info = ptr;
 		if (change_info->flags_changed & IFF_NOARP)
 			neigh_changeaddr(&nd_tbl, dev);
+		if (!netif_carrier_ok(dev))
+			neigh_carrier_down(&nd_tbl, dev);
 		break;
 	case NETDEV_DOWN:
 		neigh_ifdown(&nd_tbl, dev);

diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 8b075f0..409e79b 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c

@@ -16,6 +16,9 @@
 #include <net/ip6_route.h>
 #include <net/xfrm.h>
 #include <net/netfilter/nf_queue.h>
+#include <net/netfilter/nf_conntrack_bridge.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include "../bridge/br_private.h"
 
 int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
 {
@@ -23,9 +26,11 @@
 	struct sock *sk = sk_to_full_sk(skb->sk);
 	unsigned int hh_len;
 	struct dst_entry *dst;
+	int strict = (ipv6_addr_type(&iph->daddr) &
+		      (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
 	struct flowi6 fl6 = {
 		.flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if :
-			rt6_need_strict(&iph->daddr) ? skb_dst(skb)->dev->ifindex : 0,
+			strict ? skb_dst(skb)->dev->ifindex : 0,
 		.flowi6_mark = skb->mark,
 		.flowi6_uid = sock_net_uid(net, sk),
 		.daddr = iph->daddr,
@@ -84,8 +89,8 @@
 	return 0;
 }
 
-static int nf_ip6_route(struct net *net, struct dst_entry **dst,
-			struct flowi *fl, bool strict)
+int __nf_ip6_route(struct net *net, struct dst_entry **dst,
+		   struct flowi *fl, bool strict)
 {
 	static const struct ipv6_pinfo fake_pinfo;
 	static const struct inet_sock fake_sk = {
@@ -105,13 +110,147 @@
 		*dst = result;
 	return err;
 }
+EXPORT_SYMBOL_GPL(__nf_ip6_route);
+
+int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+		    struct nf_bridge_frag_data *data,
+		    int (*output)(struct net *, struct sock *sk,
+				  const struct nf_bridge_frag_data *data,
+				  struct sk_buff *))
+{
+	int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
+	ktime_t tstamp = skb->tstamp;
+	struct ip6_frag_state state;
+	u8 *prevhdr, nexthdr = 0;
+	unsigned int mtu, hlen;
+	int hroom, err = 0;
+	__be32 frag_id;
+
+	err = ip6_find_1stfragopt(skb, &prevhdr);
+	if (err < 0)
+		goto blackhole;
+	hlen = err;
+	nexthdr = *prevhdr;
+
+	mtu = skb->dev->mtu;
+	if (frag_max_size > mtu ||
+	    frag_max_size < IPV6_MIN_MTU)
+		goto blackhole;
+
+	mtu = frag_max_size;
+	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
+		goto blackhole;
+	mtu -= hlen + sizeof(struct frag_hdr);
+
+	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
+				    &ipv6_hdr(skb)->saddr);
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL &&
+	    (err = skb_checksum_help(skb)))
+		goto blackhole;
+
+	hroom = LL_RESERVED_SPACE(skb->dev);
+	if (skb_has_frag_list(skb)) {
+		unsigned int first_len = skb_pagelen(skb);
+		struct ip6_fraglist_iter iter;
+		struct sk_buff *frag2;
+
+		if (first_len - hlen > mtu ||
+		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
+			goto blackhole;
+
+		if (skb_cloned(skb))
+			goto slow_path;
+
+		skb_walk_frags(skb, frag2) {
+			if (frag2->len > mtu ||
+			    skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr)))
+				goto blackhole;
+
+			/* Partially cloned skb? */
+			if (skb_shared(frag2))
+				goto slow_path;
+		}
+
+		err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
+					&iter);
+		if (err < 0)
+			goto blackhole;
+
+		for (;;) {
+			/* Prepare header of the next frame,
+			 * before previous one went down.
+			 */
+			if (iter.frag)
+				ip6_fraglist_prepare(skb, &iter);
+
+			skb->tstamp = tstamp;
+			err = output(net, sk, data, skb);
+			if (err || !iter.frag)
+				break;
+
+			skb = ip6_fraglist_next(&iter);
+		}
+
+		kfree(iter.tmp_hdr);
+		if (!err)
+			return 0;
+
+		kfree_skb_list(iter.frag);
+		return err;
+	}
+slow_path:
+	/* This is a linearized skbuff, the original geometry is lost for us.
+	 * This may also be a clone skbuff, we could preserve the geometry for
+	 * the copies but probably not worth the effort.
+	 */
+	ip6_frag_init(skb, hlen, mtu, skb->dev->needed_tailroom,
+		      LL_RESERVED_SPACE(skb->dev), prevhdr, nexthdr, frag_id,
+		      &state);
+
+	while (state.left > 0) {
+		struct sk_buff *skb2;
+
+		skb2 = ip6_frag_next(skb, &state);
+		if (IS_ERR(skb2)) {
+			err = PTR_ERR(skb2);
+			goto blackhole;
+		}
+
+		skb2->tstamp = tstamp;
+		err = output(net, sk, data, skb2);
+		if (err)
+			goto blackhole;
+	}
+	consume_skb(skb);
+	return err;
+
+blackhole:
+	kfree_skb(skb);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(br_ip6_fragment);
 
 static const struct nf_ipv6_ops ipv6ops = {
+#if IS_MODULE(CONFIG_IPV6)
 	.chk_addr		= ipv6_chk_addr,
-	.route_input    	= ip6_route_input,
+	.route_me_harder	= ip6_route_me_harder,
+	.dev_get_saddr		= ipv6_dev_get_saddr,
+	.route			= __nf_ip6_route,
+#if IS_ENABLED(CONFIG_SYN_COOKIES)
+	.cookie_init_sequence	= __cookie_v6_init_sequence,
+	.cookie_v6_check	= __cookie_v6_check,
+#endif
+#endif
+	.route_input		= ip6_route_input,
 	.fragment		= ip6_fragment,
-	.route			= nf_ip6_route,
 	.reroute		= nf_ip6_reroute,
+#if IS_MODULE(CONFIG_IPV6) && IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+	.br_defrag		= nf_ct_frag6_gather,
+#endif
+#if IS_MODULE(CONFIG_IPV6)
+	.br_fragment		= br_ip6_fragment,
+#endif
 };
 
 int __init ipv6_netfilter_init(void)

diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 339d076..69443e9 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # IP netfilter configuration
 #
@@ -23,42 +24,6 @@
 
 if NF_TABLES_IPV6
 
-config NFT_CHAIN_ROUTE_IPV6
-	tristate "IPv6 nf_tables route chain support"
-	help
-	  This option enables the "route" chain for IPv6 in nf_tables. This
-	  chain type is used to force packet re-routing after mangling header
-	  fields such as the source, destination, flowlabel, hop-limit and
-	  the packet mark.
-
-if NF_NAT_IPV6
-
-config NFT_CHAIN_NAT_IPV6
-	tristate "IPv6 nf_tables nat chain support"
-	help
-	  This option enables the "nat" chain for IPv6 in nf_tables. This
-	  chain type is used to perform Network Address Translation (NAT)
-	  packet transformations such as the source, destination address and
-	  source and destination ports.
-
-config NFT_MASQ_IPV6
-	tristate "IPv6 masquerade support for nf_tables"
-	depends on NFT_MASQ
-	select NF_NAT_MASQUERADE_IPV6
-	help
-	  This is the expression that provides IPv4 masquerading support for
-	  nf_tables.
-
-config NFT_REDIR_IPV6
-	tristate "IPv6 redirect support for nf_tables"
-	depends on NFT_REDIR
-	select NF_NAT_REDIRECT
-	help
-	  This is the expression that provides IPv4 redirect support for
-	  nf_tables.
-
-endif # NF_NAT_IPV6
-
 config NFT_REJECT_IPV6
 	select NF_REJECT_IPV6
 	default NFT_REJECT
@@ -106,23 +71,6 @@
 	default m if NETFILTER_ADVANCED=n
 	select NF_LOG_COMMON
 
-config NF_NAT_IPV6
-	tristate "IPv6 NAT"
-	depends on NF_CONNTRACK
-	depends on NETFILTER_ADVANCED
-	select NF_NAT
-	help
-	  The IPv6 NAT option allows masquerading, port forwarding and other
-	  forms of full Network Address Port Translation. This can be
-	  controlled by iptables or nft.
-
-if NF_NAT_IPV6
-
-config NF_NAT_MASQUERADE_IPV6
-	bool
-
-endif # NF_NAT_IPV6
-
 config IP6_NF_IPTABLES
 	tristate "IP6 tables support (required for filtering)"
 	depends on INET && IPV6
@@ -222,13 +170,13 @@
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config IP6_NF_MATCH_SRH
-        tristate '"srh" Segment Routing header match support'
-        depends on NETFILTER_ADVANCED
-        help
-          srh matching allows you to match packets based on the segment
+	tristate '"srh" Segment Routing header match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  srh matching allows you to match packets based on the segment
 	  routing header of the packet.
 
-          To compile it as a module, choose M here.  If unsure, say N.
+	  To compile it as a module, choose M here.  If unsure, say N.
 
 # The targets
 config IP6_NF_TARGET_HL
@@ -293,7 +241,7 @@
 	  and OUTPUT chains.
 
 	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
+	  <file:Documentation/kbuild/modules.rst>.  If unsure, say `N'.
 
 # security table for MAC policy
 config IP6_NF_SECURITY
@@ -301,17 +249,16 @@
        depends on SECURITY
        depends on NETFILTER_ADVANCED
        help
-         This option adds a `security' table to iptables, for use
-         with Mandatory Access Control (MAC) policy.
+	 This option adds a `security' table to iptables, for use
+	 with Mandatory Access Control (MAC) policy.
 
-         If unsure, say N.
+	 If unsure, say N.
 
 config IP6_NF_NAT
 	tristate "ip6tables NAT support"
 	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	select NF_NAT
-	select NF_NAT_IPV6
 	select NETFILTER_XT_NAT
 	help
 	  This enables the `nat' table in ip6tables. This allows masquerading,
@@ -324,15 +271,10 @@
 
 config IP6_NF_TARGET_MASQUERADE
 	tristate "MASQUERADE target support"
-	select NF_NAT_MASQUERADE_IPV6
+	select NETFILTER_XT_TARGET_MASQUERADE
 	help
-	  Masquerading is a special case of NAT: all outgoing connections are
-	  changed to seem to come from a particular interface's address, and
-	  if the interface goes down, those connections are lost.  This is
-	  only useful for dialup accounts with dynamic IP address (ie. your IP
-	  address will be different on next dialup).
-
-	  To compile it as a module, choose M here.  If unsure, say N.
+	  This is a backwards-compat option for the user's convenience
+	  (e.g. when running oldconfig). It selects NETFILTER_XT_TARGET_MASQUERADE.
 
 config IP6_NF_TARGET_NPT
 	tristate "NPT (Network Prefix translation) target support"

diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 200c0c2..731a74c 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile

@@ -11,10 +11,6 @@
 obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
 obj-$(CONFIG_IP6_NF_NAT) += ip6table_nat.o
 
-nf_nat_ipv6-y		:= nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
-nf_nat_ipv6-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o
-obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
-
 # defrag
 nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
 obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
@@ -31,11 +27,7 @@
 obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o
 
 # nf_tables
-obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
-obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
 obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
-obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o
-obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o
 obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
 obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
 
@@ -54,7 +46,6 @@
 obj-$(CONFIG_IP6_NF_MATCH_SRH) += ip6t_srh.o
 
 # targets
-obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o
 obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o
 obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
 obj-$(CONFIG_IP6_NF_TARGET_SYNPROXY) += ip6t_SYNPROXY.o

diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index daf2e9e..c973ace 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Packet matching code.
  *
  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
  * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
  * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
deleted file mode 100644
index 29c7f19..0000000
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ /dev/null

@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6
- * NAT funded by Astaro.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/ipv6.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6.h>
-#include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/addrconf.h>
-#include <net/ipv6.h>
-#include <net/netfilter/ipv6/nf_nat_masquerade.h>
-
-static unsigned int
-masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
-{
-	return nf_nat_masquerade_ipv6(skb, par->targinfo, xt_out(par));
-}
-
-static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
-{
-	const struct nf_nat_range2 *range = par->targinfo;
-
-	if (range->flags & NF_NAT_RANGE_MAP_IPS)
-		return -EINVAL;
-	return nf_ct_netns_get(par->net, par->family);
-}
-
-static void masquerade_tg6_destroy(const struct xt_tgdtor_param *par)
-{
-	nf_ct_netns_put(par->net, par->family);
-}
-
-static struct xt_target masquerade_tg6_reg __read_mostly = {
-	.name		= "MASQUERADE",
-	.family		= NFPROTO_IPV6,
-	.checkentry	= masquerade_tg6_checkentry,
-	.destroy	= masquerade_tg6_destroy,
-	.target		= masquerade_tg6,
-	.targetsize	= sizeof(struct nf_nat_range),
-	.table		= "nat",
-	.hooks		= 1 << NF_INET_POST_ROUTING,
-	.me		= THIS_MODULE,
-};
-
-static int __init masquerade_tg6_init(void)
-{
-	int err;
-
-	err = xt_register_target(&masquerade_tg6_reg);
-	if (err)
-		return err;
-
-	err = nf_nat_masquerade_ipv6_register_notifier();
-	if (err)
-		xt_unregister_target(&masquerade_tg6_reg);
-
-	return err;
-}
-static void __exit masquerade_tg6_exit(void)
-{
-	nf_nat_masquerade_ipv6_unregister_notifier();
-	xt_unregister_target(&masquerade_tg6_reg);
-}
-
-module_init(masquerade_tg6_init);
-module_exit(masquerade_tg6_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("Xtables: automatic address SNAT");

diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c
index a379d2f..9ee077b 100644
--- a/net/ipv6/netfilter/ip6t_NPT.c
+++ b/net/ipv6/netfilter/ip6t_NPT.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2011, 2012 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>

diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 38dea8f..3ac5485 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * IP6 tables REJECT target module
  * Linux INET6 implementation
@@ -10,11 +11,6 @@
  * Copyright (c) 2005-2007 Patrick McHardy <kaber@trash.net>
  *
  * Based on net/ipv4/netfilter/ipt_REJECT.c
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 

diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index cb6d42b..fd1f52a 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c

@@ -1,277 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <net/ip6_checksum.h>
-#include <net/ip6_route.h>
-#include <net/tcp.h>
-
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_SYNPROXY.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_seqadj.h>
-#include <net/netfilter/nf_conntrack_synproxy.h>
-#include <net/netfilter/nf_conntrack_ecache.h>
 
-static struct ipv6hdr *
-synproxy_build_ip(struct net *net, struct sk_buff *skb,
-		  const struct in6_addr *saddr,
-		  const struct in6_addr *daddr)
-{
-	struct ipv6hdr *iph;
-
-	skb_reset_network_header(skb);
-	iph = skb_put(skb, sizeof(*iph));
-	ip6_flow_hdr(iph, 0, 0);
-	iph->hop_limit	= net->ipv6.devconf_all->hop_limit;
-	iph->nexthdr	= IPPROTO_TCP;
-	iph->saddr	= *saddr;
-	iph->daddr	= *daddr;
-
-	return iph;
-}
-
-static void
-synproxy_send_tcp(struct net *net,
-		  const struct sk_buff *skb, struct sk_buff *nskb,
-		  struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
-		  struct ipv6hdr *niph, struct tcphdr *nth,
-		  unsigned int tcp_hdr_size)
-{
-	struct dst_entry *dst;
-	struct flowi6 fl6;
-
-	nth->check = ~tcp_v6_check(tcp_hdr_size, &niph->saddr, &niph->daddr, 0);
-	nskb->ip_summed   = CHECKSUM_PARTIAL;
-	nskb->csum_start  = (unsigned char *)nth - nskb->head;
-	nskb->csum_offset = offsetof(struct tcphdr, check);
-
-	memset(&fl6, 0, sizeof(fl6));
-	fl6.flowi6_proto = IPPROTO_TCP;
-	fl6.saddr = niph->saddr;
-	fl6.daddr = niph->daddr;
-	fl6.fl6_sport = nth->source;
-	fl6.fl6_dport = nth->dest;
-	security_skb_classify_flow((struct sk_buff *)skb, flowi6_to_flowi(&fl6));
-	dst = ip6_route_output(net, NULL, &fl6);
-	if (dst->error) {
-		dst_release(dst);
-		goto free_nskb;
-	}
-	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
-	if (IS_ERR(dst))
-		goto free_nskb;
-
-	skb_dst_set(nskb, dst);
-
-	if (nfct) {
-		nf_ct_set(nskb, (struct nf_conn *)nfct, ctinfo);
-		nf_conntrack_get(nfct);
-	}
-
-	ip6_local_out(net, nskb->sk, nskb);
-	return;
-
-free_nskb:
-	kfree_skb(nskb);
-}
-
-static void
-synproxy_send_client_synack(struct net *net,
-			    const struct sk_buff *skb, const struct tcphdr *th,
-			    const struct synproxy_options *opts)
-{
-	struct sk_buff *nskb;
-	struct ipv6hdr *iph, *niph;
-	struct tcphdr *nth;
-	unsigned int tcp_hdr_size;
-	u16 mss = opts->mss;
-
-	iph = ipv6_hdr(skb);
-
-	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
-	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
-			 GFP_ATOMIC);
-	if (nskb == NULL)
-		return;
-	skb_reserve(nskb, MAX_TCP_HEADER);
-
-	niph = synproxy_build_ip(net, nskb, &iph->daddr, &iph->saddr);
-
-	skb_reset_transport_header(nskb);
-	nth = skb_put(nskb, tcp_hdr_size);
-	nth->source	= th->dest;
-	nth->dest	= th->source;
-	nth->seq	= htonl(__cookie_v6_init_sequence(iph, th, &mss));
-	nth->ack_seq	= htonl(ntohl(th->seq) + 1);
-	tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
-	if (opts->options & XT_SYNPROXY_OPT_ECN)
-		tcp_flag_word(nth) |= TCP_FLAG_ECE;
-	nth->doff	= tcp_hdr_size / 4;
-	nth->window	= 0;
-	nth->check	= 0;
-	nth->urg_ptr	= 0;
-
-	synproxy_build_options(nth, opts);
-
-	synproxy_send_tcp(net, skb, nskb, skb_nfct(skb),
-			  IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size);
-}
-
-static void
-synproxy_send_server_syn(struct net *net,
-			 const struct sk_buff *skb, const struct tcphdr *th,
-			 const struct synproxy_options *opts, u32 recv_seq)
-{
-	struct synproxy_net *snet = synproxy_pernet(net);
-	struct sk_buff *nskb;
-	struct ipv6hdr *iph, *niph;
-	struct tcphdr *nth;
-	unsigned int tcp_hdr_size;
-
-	iph = ipv6_hdr(skb);
-
-	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
-	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
-			 GFP_ATOMIC);
-	if (nskb == NULL)
-		return;
-	skb_reserve(nskb, MAX_TCP_HEADER);
-
-	niph = synproxy_build_ip(net, nskb, &iph->saddr, &iph->daddr);
-
-	skb_reset_transport_header(nskb);
-	nth = skb_put(nskb, tcp_hdr_size);
-	nth->source	= th->source;
-	nth->dest	= th->dest;
-	nth->seq	= htonl(recv_seq - 1);
-	/* ack_seq is used to relay our ISN to the synproxy hook to initialize
-	 * sequence number translation once a connection tracking entry exists.
-	 */
-	nth->ack_seq	= htonl(ntohl(th->ack_seq) - 1);
-	tcp_flag_word(nth) = TCP_FLAG_SYN;
-	if (opts->options & XT_SYNPROXY_OPT_ECN)
-		tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
-	nth->doff	= tcp_hdr_size / 4;
-	nth->window	= th->window;
-	nth->check	= 0;
-	nth->urg_ptr	= 0;
-
-	synproxy_build_options(nth, opts);
-
-	synproxy_send_tcp(net, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
-			  niph, nth, tcp_hdr_size);
-}
-
-static void
-synproxy_send_server_ack(struct net *net,
-			 const struct ip_ct_tcp *state,
-			 const struct sk_buff *skb, const struct tcphdr *th,
-			 const struct synproxy_options *opts)
-{
-	struct sk_buff *nskb;
-	struct ipv6hdr *iph, *niph;
-	struct tcphdr *nth;
-	unsigned int tcp_hdr_size;
-
-	iph = ipv6_hdr(skb);
-
-	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
-	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
-			 GFP_ATOMIC);
-	if (nskb == NULL)
-		return;
-	skb_reserve(nskb, MAX_TCP_HEADER);
-
-	niph = synproxy_build_ip(net, nskb, &iph->daddr, &iph->saddr);
-
-	skb_reset_transport_header(nskb);
-	nth = skb_put(nskb, tcp_hdr_size);
-	nth->source	= th->dest;
-	nth->dest	= th->source;
-	nth->seq	= htonl(ntohl(th->ack_seq));
-	nth->ack_seq	= htonl(ntohl(th->seq) + 1);
-	tcp_flag_word(nth) = TCP_FLAG_ACK;
-	nth->doff	= tcp_hdr_size / 4;
-	nth->window	= htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
-	nth->check	= 0;
-	nth->urg_ptr	= 0;
-
-	synproxy_build_options(nth, opts);
-
-	synproxy_send_tcp(net, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
-}
-
-static void
-synproxy_send_client_ack(struct net *net,
-			 const struct sk_buff *skb, const struct tcphdr *th,
-			 const struct synproxy_options *opts)
-{
-	struct sk_buff *nskb;
-	struct ipv6hdr *iph, *niph;
-	struct tcphdr *nth;
-	unsigned int tcp_hdr_size;
-
-	iph = ipv6_hdr(skb);
-
-	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
-	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
-			 GFP_ATOMIC);
-	if (nskb == NULL)
-		return;
-	skb_reserve(nskb, MAX_TCP_HEADER);
-
-	niph = synproxy_build_ip(net, nskb, &iph->saddr, &iph->daddr);
-
-	skb_reset_transport_header(nskb);
-	nth = skb_put(nskb, tcp_hdr_size);
-	nth->source	= th->source;
-	nth->dest	= th->dest;
-	nth->seq	= htonl(ntohl(th->seq) + 1);
-	nth->ack_seq	= th->ack_seq;
-	tcp_flag_word(nth) = TCP_FLAG_ACK;
-	nth->doff	= tcp_hdr_size / 4;
-	nth->window	= htons(ntohs(th->window) >> opts->wscale);
-	nth->check	= 0;
-	nth->urg_ptr	= 0;
-
-	synproxy_build_options(nth, opts);
-
-	synproxy_send_tcp(net, skb, nskb, skb_nfct(skb),
-			  IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size);
-}
-
-static bool
-synproxy_recv_client_ack(struct net *net,
-			 const struct sk_buff *skb, const struct tcphdr *th,
-			 struct synproxy_options *opts, u32 recv_seq)
-{
-	struct synproxy_net *snet = synproxy_pernet(net);
-	int mss;
-
-	mss = __cookie_v6_check(ipv6_hdr(skb), th, ntohl(th->ack_seq) - 1);
-	if (mss == 0) {
-		this_cpu_inc(snet->stats->cookie_invalid);
-		return false;
-	}
-
-	this_cpu_inc(snet->stats->cookie_valid);
-	opts->mss = mss;
-	opts->options |= XT_SYNPROXY_OPT_MSS;
-
-	if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
-		synproxy_check_timestamp_cookie(opts);
-
-	synproxy_send_server_syn(net, skb, th, opts, recv_seq);
-	return true;
-}
+#include <net/netfilter/nf_synproxy.h>
 
 static unsigned int
 synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
@@ -300,6 +36,8 @@
 			opts.options |= XT_SYNPROXY_OPT_ECN;
 
 		opts.options &= info->options;
+		opts.mss_encode = opts.mss_option;
+		opts.mss_option = info->mss;
 		if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
 			synproxy_init_timestamp_cookie(info, &opts);
 		else
@@ -307,13 +45,14 @@
 					  XT_SYNPROXY_OPT_SACK_PERM |
 					  XT_SYNPROXY_OPT_ECN);
 
-		synproxy_send_client_synack(net, skb, th, &opts);
+		synproxy_send_client_synack_ipv6(net, skb, th, &opts);
 		consume_skb(skb);
 		return NF_STOLEN;
 
 	} else if (th->ack && !(th->fin || th->rst || th->syn)) {
 		/* ACK from client */
-		if (synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq))) {
+		if (synproxy_recv_client_ack_ipv6(net, skb, th, &opts,
+						  ntohl(th->seq))) {
 			consume_skb(skb);
 			return NF_STOLEN;
 		} else {
@@ -324,141 +63,6 @@
 	return XT_CONTINUE;
 }
 
-static unsigned int ipv6_synproxy_hook(void *priv,
-				       struct sk_buff *skb,
-				       const struct nf_hook_state *nhs)
-{
-	struct net *net = nhs->net;
-	struct synproxy_net *snet = synproxy_pernet(net);
-	enum ip_conntrack_info ctinfo;
-	struct nf_conn *ct;
-	struct nf_conn_synproxy *synproxy;
-	struct synproxy_options opts = {};
-	const struct ip_ct_tcp *state;
-	struct tcphdr *th, _th;
-	__be16 frag_off;
-	u8 nexthdr;
-	int thoff;
-
-	ct = nf_ct_get(skb, &ctinfo);
-	if (ct == NULL)
-		return NF_ACCEPT;
-
-	synproxy = nfct_synproxy(ct);
-	if (synproxy == NULL)
-		return NF_ACCEPT;
-
-	if (nf_is_loopback_packet(skb))
-		return NF_ACCEPT;
-
-	nexthdr = ipv6_hdr(skb)->nexthdr;
-	thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
-				 &frag_off);
-	if (thoff < 0 || nexthdr != IPPROTO_TCP)
-		return NF_ACCEPT;
-
-	th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
-	if (th == NULL)
-		return NF_DROP;
-
-	state = &ct->proto.tcp;
-	switch (state->state) {
-	case TCP_CONNTRACK_CLOSE:
-		if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
-			nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
-						      ntohl(th->seq) + 1);
-			break;
-		}
-
-		if (!th->syn || th->ack ||
-		    CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
-			break;
-
-		/* Reopened connection - reset the sequence number and timestamp
-		 * adjustments, they will get initialized once the connection is
-		 * reestablished.
-		 */
-		nf_ct_seqadj_init(ct, ctinfo, 0);
-		synproxy->tsoff = 0;
-		this_cpu_inc(snet->stats->conn_reopened);
-
-		/* fall through */
-	case TCP_CONNTRACK_SYN_SENT:
-		if (!synproxy_parse_options(skb, thoff, th, &opts))
-			return NF_DROP;
-
-		if (!th->syn && th->ack &&
-		    CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
-			/* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
-			 * therefore we need to add 1 to make the SYN sequence
-			 * number match the one of first SYN.
-			 */
-			if (synproxy_recv_client_ack(net, skb, th, &opts,
-						     ntohl(th->seq) + 1)) {
-				this_cpu_inc(snet->stats->cookie_retrans);
-				consume_skb(skb);
-				return NF_STOLEN;
-			} else {
-				return NF_DROP;
-			}
-		}
-
-		synproxy->isn = ntohl(th->ack_seq);
-		if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
-			synproxy->its = opts.tsecr;
-
-		nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
-		break;
-	case TCP_CONNTRACK_SYN_RECV:
-		if (!th->syn || !th->ack)
-			break;
-
-		if (!synproxy_parse_options(skb, thoff, th, &opts))
-			return NF_DROP;
-
-		if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) {
-			synproxy->tsoff = opts.tsval - synproxy->its;
-			nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
-		}
-
-		opts.options &= ~(XT_SYNPROXY_OPT_MSS |
-				  XT_SYNPROXY_OPT_WSCALE |
-				  XT_SYNPROXY_OPT_SACK_PERM);
-
-		swap(opts.tsval, opts.tsecr);
-		synproxy_send_server_ack(net, state, skb, th, &opts);
-
-		nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
-		nf_conntrack_event_cache(IPCT_SEQADJ, ct);
-
-		swap(opts.tsval, opts.tsecr);
-		synproxy_send_client_ack(net, skb, th, &opts);
-
-		consume_skb(skb);
-		return NF_STOLEN;
-	default:
-		break;
-	}
-
-	synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
-	return NF_ACCEPT;
-}
-
-static const struct nf_hook_ops ipv6_synproxy_ops[] = {
-	{
-		.hook		= ipv6_synproxy_hook,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_LOCAL_IN,
-		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM - 1,
-	},
-	{
-		.hook		= ipv6_synproxy_hook,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_POST_ROUTING,
-		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM - 1,
-	},
-};
-
 static int synproxy_tg6_check(const struct xt_tgchk_param *par)
 {
 	struct synproxy_net *snet = synproxy_pernet(par->net);
@@ -474,16 +78,12 @@
 	if (err)
 		return err;
 
-	if (snet->hook_ref6 == 0) {
-		err = nf_register_net_hooks(par->net, ipv6_synproxy_ops,
-					    ARRAY_SIZE(ipv6_synproxy_ops));
-		if (err) {
-			nf_ct_netns_put(par->net, par->family);
-			return err;
-		}
+	err = nf_synproxy_ipv6_init(snet, par->net);
+	if (err) {
+		nf_ct_netns_put(par->net, par->family);
+		return err;
 	}
 
-	snet->hook_ref6++;
 	return err;
 }
 
@@ -491,10 +91,7 @@
 {
 	struct synproxy_net *snet = synproxy_pernet(par->net);
 
-	snet->hook_ref6--;
-	if (snet->hook_ref6 == 0)
-		nf_unregister_net_hooks(par->net, ipv6_synproxy_ops,
-					ARRAY_SIZE(ipv6_synproxy_ops));
+	nf_synproxy_ipv6_fini(snet, par->net);
 	nf_ct_netns_put(par->net, par->family);
 }
 

diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 04099ab..4e15a14 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Kernel module to match AH parameters. */
 
 /* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
@@ -58,7 +55,7 @@
 		return false;
 	}
 
-	hdrlen = (ah->hdrlen + 2) << 2;
+	hdrlen = ipv6_authlen(ah);
 
 	pr_debug("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen);
 	pr_debug("RES %04X ", ah->reserved);

diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index aab0706..d704f7e 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Kernel module to match EUI64 address parameters. */
 
 /* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>

diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 3b5735e..fb91eee 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Kernel module to match FRAG parameters. */
 
 /* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>

diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 01df142..467b2a8 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Kernel module to match Hop-by-Hop and Destination parameters. */
 
 /* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>

diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 8b14744..c52ff92 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* ipv6header match - matches IPv6 packets based
    on whether they contain certain headers */
 
@@ -5,10 +6,6 @@
  * Rewritten by: Andras Kis-Szabo <kisza@sch.bme.hu> */
 
 /* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -19,7 +16,7 @@
 #include <net/ipv6.h>
 
 #include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6.h>
 #include <linux/netfilter_ipv6/ip6t_ipv6header.h>
 
 MODULE_LICENSE("GPL");
@@ -45,7 +42,7 @@
 	len = skb->len - ptr;
 	temp = 0;
 
-	while (ip6t_ext_hdr(nexthdr)) {
+	while (nf_ip6_ext_hdr(nexthdr)) {
 		const struct ipv6_opt_hdr *hp;
 		struct ipv6_opt_hdr _hdr;
 		int hdrlen;
@@ -65,13 +62,16 @@
 		}
 
 		hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-		BUG_ON(hp == NULL);
+		if (!hp) {
+			par->hotdrop = true;
+			return false;
+		}
 
 		/* Calculate the header length */
 		if (nexthdr == NEXTHDR_FRAGMENT)
 			hdrlen = 8;
 		else if (nexthdr == NEXTHDR_AUTH)
-			hdrlen = (hp->hdrlen + 2) << 2;
+			hdrlen = ipv6_authlen(hp);
 		else
 			hdrlen = ipv6_optlen(hp);
 

diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index 0c90c66..fd492b6 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c

@@ -1,15 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C)2006 USAGI/WIDE Project
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Author:
  *	Masahide NAKAMURA @USAGI <masahide.nakamura.cz@hitachi.com>
  *
  * Based on net/netfilter/xt_tcpudp.c
- *
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/types.h>

diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index c3c6b09..d800801 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2011 Florian Westphal <fw@strlen.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
@@ -58,7 +55,9 @@
 	if (rpfilter_addr_linklocal(&iph->saddr)) {
 		lookup_flags |= RT6_LOOKUP_F_IFACE;
 		fl6.flowi6_oif = dev->ifindex;
-	} else if ((flags & XT_RPFILTER_LOOSE) == 0)
+	/* Set flowi6_oif for vrf devices to lookup route in l3mdev domain. */
+	} else if (netif_is_l3_master(dev) || netif_is_l3_slave(dev) ||
+		  (flags & XT_RPFILTER_LOOSE) == 0)
 		fl6.flowi6_oif = dev->ifindex;
 
 	rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags);
@@ -73,7 +72,9 @@
 		goto out;
 	}
 
-	if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE))
+	if (rt->rt6i_idev->dev == dev ||
+	    l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == dev->ifindex ||
+	    (flags & XT_RPFILTER_LOOSE))
 		ret = true;
  out:
 	ip6_rt_put(rt);

diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 2c99b94..f633dc8 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Kernel module to match ROUTING parameters. */
 
 /* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
@@ -137,7 +134,10 @@
 							sizeof(_addr),
 							&_addr);
 
-				BUG_ON(ap == NULL);
+				if (ap == NULL) {
+					par->hotdrop = true;
+					return false;
+				}
 
 				if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) {
 					pr_debug("i=%d temp=%d;\n", i, temp);
@@ -166,7 +166,10 @@
 							+ temp * sizeof(_addr),
 							sizeof(_addr),
 							&_addr);
-				BUG_ON(ap == NULL);
+				if (ap == NULL) {
+					par->hotdrop = true;
+					return false;
+				}
 
 				if (!ipv6_addr_equal(ap, &rtinfo->addrs[temp]))
 					break;

diff --git a/net/ipv6/netfilter/ip6t_srh.c b/net/ipv6/netfilter/ip6t_srh.c
index 1059894..db0fd64 100644
--- a/net/ipv6/netfilter/ip6t_srh.c
+++ b/net/ipv6/netfilter/ip6t_srh.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Kernel module to match Segment Routing Header (SRH) parameters. */
 
 /* Author:
  * Ahmed Abdelsalam <amsalam20@gmail.com>
- *
- *  This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version 2
- *	of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -210,6 +206,8 @@
 		psidoff = srhoff + sizeof(struct ipv6_sr_hdr) +
 			  ((srh->segments_left + 1) * sizeof(struct in6_addr));
 		psid = skb_header_pointer(skb, psidoff, sizeof(_psid), &_psid);
+		if (!psid)
+			return false;
 		if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_PSID,
 				ipv6_masked_addr_cmp(psid, &srhinfo->psid_msk,
 						     &srhinfo->psid_addr)))
@@ -223,6 +221,8 @@
 		nsidoff = srhoff + sizeof(struct ipv6_sr_hdr) +
 			  ((srh->segments_left - 1) * sizeof(struct in6_addr));
 		nsid = skb_header_pointer(skb, nsidoff, sizeof(_nsid), &_nsid);
+		if (!nsid)
+			return false;
 		if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_NSID,
 				ipv6_masked_addr_cmp(nsid, &srhinfo->nsid_msk,
 						     &srhinfo->nsid_addr)))
@@ -233,6 +233,8 @@
 	if (srhinfo->mt_flags & IP6T_SRH_LSID) {
 		lsidoff = srhoff + sizeof(struct ipv6_sr_hdr);
 		lsid = skb_header_pointer(skb, lsidoff, sizeof(_lsid), &_lsid);
+		if (!lsid)
+			return false;
 		if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LSID,
 				ipv6_masked_addr_cmp(lsid, &srhinfo->lsid_msk,
 						     &srhinfo->lsid_addr)))

diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 1343077..32667f5 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x.
  *
  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
  * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>

diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index b0524b1..070afb9 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * IPv6 packet mangling table, a port of the IPv4 mangle table to IPv6
  *
  * Copyright (C) 2000-2001 by Harald Welte <laforge@gnumonks.org>
  * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/module.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>

diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 67ba70a..0f48759 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Based on Rusty Russell's IPv4 NAT code. Development of IPv6 NAT
  * funded by Astaro.
  */
@@ -17,8 +14,6 @@
 #include <net/ipv6.h>
 
 #include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l3proto.h>
 
 static int __net_init ip6table_nat_table_init(struct net *net);
 
@@ -72,10 +67,10 @@
 	int i, ret;
 
 	for (i = 0; i < ARRAY_SIZE(nf_nat_ipv6_ops); i++) {
-		ret = nf_nat_l3proto_ipv6_register_fn(net, &nf_nat_ipv6_ops[i]);
+		ret = nf_nat_ipv6_register_fn(net, &nf_nat_ipv6_ops[i]);
 		if (ret) {
 			while (i)
-				nf_nat_l3proto_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[--i]);
+				nf_nat_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[--i]);
 
 			return ret;
 		}
@@ -89,7 +84,7 @@
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(nf_nat_ipv6_ops); i++)
-		nf_nat_l3proto_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[i]);
+		nf_nat_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[i]);
 }
 
 static int __net_init ip6table_nat_table_init(struct net *net)

diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 710fa08..a22100b 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c

@@ -1,7 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * IPv6 raw table, a port of the IPv4 raw table to IPv6
  *
- * Copyright (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Copyright (C) 2003 Jozsef Kadlecsik <kadlec@netfilter.org>
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>

diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index cf26ccb..a74335f 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * "security" table for IPv6
  *
@@ -10,10 +11,6 @@
  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
  * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org>
  * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/module.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 043ed8e..fed9666 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * IPv6 fragment reassembly for connection tracking
  *
@@ -7,11 +8,6 @@
  *	Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
  *
  * Based on: net/ipv6/reassembly.c
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) "IPv6-nf: " fmt
@@ -58,26 +54,21 @@
 static struct ctl_table nf_ct_frag6_sysctl_table[] = {
 	{
 		.procname	= "nf_conntrack_frag6_timeout",
-		.data		= &init_net.nf_frag.frags.timeout,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "nf_conntrack_frag6_low_thresh",
-		.data		= &init_net.nf_frag.frags.low_thresh,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
-		.extra2		= &init_net.nf_frag.frags.high_thresh
 	},
 	{
 		.procname	= "nf_conntrack_frag6_high_thresh",
-		.data		= &init_net.nf_frag.frags.high_thresh,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
-		.extra1		= &init_net.nf_frag.frags.low_thresh
 	},
 	{ }
 };
@@ -93,15 +84,15 @@
 				GFP_KERNEL);
 		if (table == NULL)
 			goto err_alloc;
-
-		table[0].data = &net->nf_frag.frags.timeout;
-		table[1].data = &net->nf_frag.frags.low_thresh;
-		table[1].extra2 = &net->nf_frag.frags.high_thresh;
-		table[2].data = &net->nf_frag.frags.high_thresh;
-		table[2].extra1 = &net->nf_frag.frags.low_thresh;
-		table[2].extra2 = &init_net.nf_frag.frags.high_thresh;
 	}
 
+	table[0].data	= &net->nf_frag.fqdir->timeout;
+	table[1].data	= &net->nf_frag.fqdir->low_thresh;
+	table[1].extra2	= &net->nf_frag.fqdir->high_thresh;
+	table[2].data	= &net->nf_frag.fqdir->high_thresh;
+	table[2].extra1	= &net->nf_frag.fqdir->low_thresh;
+	table[2].extra2	= &init_net.nf_frag.fqdir->high_thresh;
+
 	hdr = register_net_sysctl(net, "net/netfilter", table);
 	if (hdr == NULL)
 		goto err_reg;
@@ -136,6 +127,9 @@
 }
 #endif
 
+static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
+			     struct sk_buff *prev_tail, struct net_device *dev);
+
 static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
 {
 	return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
@@ -145,12 +139,10 @@
 {
 	struct inet_frag_queue *frag = from_timer(frag, t, timer);
 	struct frag_queue *fq;
-	struct net *net;
 
 	fq = container_of(frag, struct frag_queue, q);
-	net = container_of(fq->q.net, struct net, nf_frag.frags);
 
-	ip6frag_expire_frag_queue(net, fq);
+	ip6frag_expire_frag_queue(fq->q.fqdir->net, fq);
 }
 
 /* Creation primitives. */
@@ -166,7 +158,7 @@
 	};
 	struct inet_frag_queue *q;
 
-	q = inet_frag_find(&net->nf_frag.frags, &key);
+	q = inet_frag_find(net->nf_frag.fqdir, &key);
 	if (!q)
 		return NULL;
 
@@ -177,9 +169,10 @@
 static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
 			     const struct frag_hdr *fhdr, int nhoff)
 {
-	struct sk_buff *prev, *next;
 	unsigned int payload_len;
-	int offset, end;
+	struct net_device *dev;
+	struct sk_buff *prev;
+	int offset, end, err;
 	u8 ecn;
 
 	if (fq->q.flags & INET_FRAG_COMPLETE) {
@@ -254,62 +247,31 @@
 		goto err;
 	}
 
-	/* Find out which fragments are in front and at the back of us
-	 * in the chain of fragments so far.  We must know where to put
-	 * this fragment, right?
-	 */
-	prev = fq->q.fragments_tail;
-	if (!prev || prev->ip_defrag_offset < offset) {
-		next = NULL;
-		goto found;
-	}
-	prev = NULL;
-	for (next = fq->q.fragments; next != NULL; next = next->next) {
-		if (next->ip_defrag_offset >= offset)
-			break;	/* bingo! */
-		prev = next;
-	}
-
-found:
-	/* RFC5722, Section 4:
-	 *                                  When reassembling an IPv6 datagram, if
-	 *   one or more its constituent fragments is determined to be an
-	 *   overlapping fragment, the entire datagram (and any constituent
-	 *   fragments, including those not yet received) MUST be silently
-	 *   discarded.
-	 */
-
-	/* Check for overlap with preceding fragment. */
-	if (prev &&
-	    (prev->ip_defrag_offset + prev->len) > offset)
-		goto discard_fq;
-
-	/* Look for overlap with succeeding segment. */
-	if (next && next->ip_defrag_offset < end)
-		goto discard_fq;
-
-	/* Note : skb->ip_defrag_offset and skb->dev share the same location */
-	if (skb->dev)
-		fq->iif = skb->dev->ifindex;
+	/* Note : skb->rbnode and skb->dev share the same location. */
+	dev = skb->dev;
 	/* Makes sure compiler wont do silly aliasing games */
 	barrier();
-	skb->ip_defrag_offset = offset;
 
-	/* Insert this fragment in the chain of fragments. */
-	skb->next = next;
-	if (!next)
-		fq->q.fragments_tail = skb;
-	if (prev)
-		prev->next = skb;
-	else
-		fq->q.fragments = skb;
+	prev = fq->q.fragments_tail;
+	err = inet_frag_queue_insert(&fq->q, skb, offset, end);
+	if (err) {
+		if (err == IPFRAG_DUP) {
+			/* No error for duplicates, pretend they got queued. */
+			kfree_skb(skb);
+			return -EINPROGRESS;
+		}
+		goto insert_error;
+	}
+
+	if (dev)
+		fq->iif = dev->ifindex;
 
 	fq->q.stamp = skb->tstamp;
 	fq->q.meat += skb->len;
 	fq->ecn |= ecn;
 	if (payload_len > fq->q.max_size)
 		fq->q.max_size = payload_len;
-	add_frag_mem_limit(fq->q.net, skb->truesize);
+	add_frag_mem_limit(fq->q.fqdir, skb->truesize);
 
 	/* The first fragment.
 	 * nhoffset is obtained from the first fragment, of course.
@@ -319,11 +281,27 @@
 		fq->q.flags |= INET_FRAG_FIRST_IN;
 	}
 
-	return 0;
+	if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+	    fq->q.meat == fq->q.len) {
+		unsigned long orefdst = skb->_skb_refdst;
 
-discard_fq:
+		skb->_skb_refdst = 0UL;
+		err = nf_ct_frag6_reasm(fq, skb, prev, dev);
+		skb->_skb_refdst = orefdst;
+
+		/* After queue has assumed skb ownership, only 0 or
+		 * -EINPROGRESS must be returned.
+		 */
+		return err ? -EINPROGRESS : 0;
+	}
+
+	skb_dst_drop(skb);
+	return -EINPROGRESS;
+
+insert_error:
 	inet_frag_kill(&fq->q);
 err:
+	skb_dst_drop(skb);
 	return -EINVAL;
 }
 
@@ -333,147 +311,66 @@
  *	It is called with locked fq, and caller must check that
  *	queue is eligible for reassembly i.e. it is not COMPLETE,
  *	the last and the first frames arrived and all the bits are here.
- *
- *	returns true if *prev skb has been transformed into the reassembled
- *	skb, false otherwise.
  */
-static bool
-nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev,  struct net_device *dev)
+static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
+			     struct sk_buff *prev_tail, struct net_device *dev)
 {
-	struct sk_buff *fp, *head = fq->q.fragments;
-	int    payload_len, delta;
+	void *reasm_data;
+	int payload_len;
 	u8 ecn;
 
 	inet_frag_kill(&fq->q);
 
-	WARN_ON(head == NULL);
-	WARN_ON(head->ip_defrag_offset != 0);
-
 	ecn = ip_frag_ecn_table[fq->ecn];
 	if (unlikely(ecn == 0xff))
-		return false;
+		goto err;
 
-	/* Unfragmented part is taken from the first segment. */
-	payload_len = ((head->data - skb_network_header(head)) -
+	reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
+	if (!reasm_data)
+		goto err;
+
+	payload_len = ((skb->data - skb_network_header(skb)) -
 		       sizeof(struct ipv6hdr) + fq->q.len -
 		       sizeof(struct frag_hdr));
 	if (payload_len > IPV6_MAXPLEN) {
 		net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n",
 				    payload_len);
-		return false;
-	}
-
-	delta = - head->truesize;
-
-	/* Head of list must not be cloned. */
-	if (skb_unclone(head, GFP_ATOMIC))
-		return false;
-
-	delta += head->truesize;
-	if (delta)
-		add_frag_mem_limit(fq->q.net, delta);
-
-	/* If the first fragment is fragmented itself, we split
-	 * it to two chunks: the first with data and paged part
-	 * and the second, holding only fragments. */
-	if (skb_has_frag_list(head)) {
-		struct sk_buff *clone;
-		int i, plen = 0;
-
-		clone = alloc_skb(0, GFP_ATOMIC);
-		if (clone == NULL)
-			return false;
-
-		clone->next = head->next;
-		head->next = clone;
-		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
-		skb_frag_list_init(head);
-		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
-			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
-		clone->len = clone->data_len = head->data_len - plen;
-		head->data_len -= clone->len;
-		head->len -= clone->len;
-		clone->csum = 0;
-		clone->ip_summed = head->ip_summed;
-
-		add_frag_mem_limit(fq->q.net, clone->truesize);
-	}
-
-	/* morph head into last received skb: prev.
-	 *
-	 * This allows callers of ipv6 conntrack defrag to continue
-	 * to use the last skb(frag) passed into the reasm engine.
-	 * The last skb frag 'silently' turns into the full reassembled skb.
-	 *
-	 * Since prev is also part of q->fragments we have to clone it first.
-	 */
-	if (head != prev) {
-		struct sk_buff *iter;
-
-		fp = skb_clone(prev, GFP_ATOMIC);
-		if (!fp)
-			return false;
-
-		fp->next = prev->next;
-
-		iter = head;
-		while (iter) {
-			if (iter->next == prev) {
-				iter->next = fp;
-				break;
-			}
-			iter = iter->next;
-		}
-
-		skb_morph(prev, head);
-		prev->next = head->next;
-		consume_skb(head);
-		head = prev;
+		goto err;
 	}
 
 	/* We have to remove fragment header from datagram and to relocate
 	 * header in order to calculate ICV correctly. */
-	skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0];
-	memmove(head->head + sizeof(struct frag_hdr), head->head,
-		(head->data - head->head) - sizeof(struct frag_hdr));
-	head->mac_header += sizeof(struct frag_hdr);
-	head->network_header += sizeof(struct frag_hdr);
+	skb_network_header(skb)[fq->nhoffset] = skb_transport_header(skb)[0];
+	memmove(skb->head + sizeof(struct frag_hdr), skb->head,
+		(skb->data - skb->head) - sizeof(struct frag_hdr));
+	skb->mac_header += sizeof(struct frag_hdr);
+	skb->network_header += sizeof(struct frag_hdr);
 
-	skb_shinfo(head)->frag_list = head->next;
-	skb_reset_transport_header(head);
-	skb_push(head, head->data - skb_network_header(head));
+	skb_reset_transport_header(skb);
 
-	for (fp = head->next; fp; fp = fp->next) {
-		head->data_len += fp->len;
-		head->len += fp->len;
-		if (head->ip_summed != fp->ip_summed)
-			head->ip_summed = CHECKSUM_NONE;
-		else if (head->ip_summed == CHECKSUM_COMPLETE)
-			head->csum = csum_add(head->csum, fp->csum);
-		head->truesize += fp->truesize;
-		fp->sk = NULL;
-	}
-	sub_frag_mem_limit(fq->q.net, head->truesize);
+	inet_frag_reasm_finish(&fq->q, skb, reasm_data, false);
 
-	head->ignore_df = 1;
-	head->next = NULL;
-	head->dev = dev;
-	head->tstamp = fq->q.stamp;
-	ipv6_hdr(head)->payload_len = htons(payload_len);
-	ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
-	IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
+	skb->ignore_df = 1;
+	skb->dev = dev;
+	ipv6_hdr(skb)->payload_len = htons(payload_len);
+	ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn);
+	IP6CB(skb)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
 
 	/* Yes, and fold redundant checksum back. 8) */
-	if (head->ip_summed == CHECKSUM_COMPLETE)
-		head->csum = csum_partial(skb_network_header(head),
-					  skb_network_header_len(head),
-					  head->csum);
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->csum = csum_partial(skb_network_header(skb),
+					 skb_network_header_len(skb),
+					 skb->csum);
 
-	fq->q.fragments = NULL;
 	fq->q.rb_fragments = RB_ROOT;
 	fq->q.fragments_tail = NULL;
+	fq->q.last_run_head = NULL;
 
-	return true;
+	return 0;
+
+err:
+	inet_frag_kill(&fq->q);
+	return -EINVAL;
 }
 
 /*
@@ -517,7 +414,7 @@
 		if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
 			BUG();
 		if (nexthdr == NEXTHDR_AUTH)
-			hdrlen = (hdr.hdrlen+2)<<2;
+			hdrlen = ipv6_authlen(&hdr);
 		else
 			hdrlen = ipv6_optlen(&hdr);
 
@@ -542,7 +439,6 @@
 int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
 {
 	u16 savethdr = skb->transport_header;
-	struct net_device *dev = skb->dev;
 	int fhoff, nhoff, ret;
 	struct frag_hdr *fhdr;
 	struct frag_queue *fq;
@@ -565,10 +461,6 @@
 	hdr = ipv6_hdr(skb);
 	fhdr = (struct frag_hdr *)skb_transport_header(skb);
 
-	if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
-	    fhdr->frag_off & htons(IP6_MF))
-		return -EINVAL;
-
 	skb_orphan(skb);
 	fq = fq_find(net, fhdr->identification, user, hdr,
 		     skb->dev ? skb->dev->ifindex : 0);
@@ -580,31 +472,11 @@
 	spin_lock_bh(&fq->q.lock);
 
 	ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff);
-	if (ret < 0) {
-		if (ret == -EPROTO) {
-			skb->transport_header = savethdr;
-			ret = 0;
-		}
-		goto out_unlock;
+	if (ret == -EPROTO) {
+		skb->transport_header = savethdr;
+		ret = 0;
 	}
 
-	/* after queue has assumed skb ownership, only 0 or -EINPROGRESS
-	 * must be returned.
-	 */
-	ret = -EINPROGRESS;
-	if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
-	    fq->q.meat == fq->q.len) {
-		unsigned long orefdst = skb->_skb_refdst;
-
-		skb->_skb_refdst = 0UL;
-		if (nf_ct_frag6_reasm(fq, skb, dev))
-			ret = 0;
-		skb->_skb_refdst = orefdst;
-	} else {
-		skb_dst_drop(skb);
-	}
-
-out_unlock:
 	spin_unlock_bh(&fq->q.lock);
 	inet_frag_put(&fq->q);
 	return ret;
@@ -615,29 +487,35 @@
 {
 	int res;
 
-	net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
-	net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
-	net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
-	net->nf_frag.frags.f = &nf_frags;
-
-	res = inet_frags_init_net(&net->nf_frag.frags);
+	res = fqdir_init(&net->nf_frag.fqdir, &nf_frags, net);
 	if (res < 0)
 		return res;
+
+	net->nf_frag.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
+	net->nf_frag.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
+	net->nf_frag.fqdir->timeout = IPV6_FRAG_TIMEOUT;
+
 	res = nf_ct_frag6_sysctl_register(net);
 	if (res < 0)
-		inet_frags_exit_net(&net->nf_frag.frags);
+		fqdir_exit(net->nf_frag.fqdir);
 	return res;
 }
 
+static void nf_ct_net_pre_exit(struct net *net)
+{
+	fqdir_pre_exit(net->nf_frag.fqdir);
+}
+
 static void nf_ct_net_exit(struct net *net)
 {
 	nf_ct_frags6_sysctl_unregister(net);
-	inet_frags_exit_net(&net->nf_frag.frags);
+	fqdir_exit(net->nf_frag.fqdir);
 }
 
 static struct pernet_operations nf_ct_net_ops = {
-	.init = nf_ct_net_init,
-	.exit = nf_ct_net_exit,
+	.init		= nf_ct_net_init,
+	.pre_exit	= nf_ct_net_pre_exit,
+	.exit		= nf_ct_net_exit,
 };
 
 static const struct rhashtable_params nfct_rhash_params = {

diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index 72dd3e2..6646a87 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/types.h>

diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c
index 4a7dded..a0a2de3 100644
--- a/net/ipv6/netfilter/nf_dup_ipv6.c
+++ b/net/ipv6/netfilter/nf_dup_ipv6.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * (C) 2007 by Sebastian Claßen <sebastian.classen@freenet.ag>
  * (C) 2007-2010 by Jan Engelhardt <jengelh@medozas.de>
  *
  * Extracted from xt_TEE.c
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 or later, as
- * published by the Free Software Foundation.
  */
 #include <linux/module.h>
 #include <linux/percpu.h>
@@ -57,7 +54,7 @@
 		return;
 
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
-	nf_reset(skb);
+	nf_reset_ct(skb);
 	nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
 #endif
 	if (hooknum == NF_INET_PRE_ROUTING ||

diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
index c511d20..f6d9a48 100644
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>

diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index c6bf580..22b80db 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -21,7 +18,7 @@
 #include <net/route.h>
 
 #include <linux/netfilter.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6.h>
 #include <linux/netfilter/xt_LOG.h>
 #include <net/netfilter/nf_log.h>
 
@@ -73,7 +70,7 @@
 	fragment = 0;
 	ptr = ip6hoff + sizeof(struct ipv6hdr);
 	currenthdr = ih->nexthdr;
-	while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) {
+	while (currenthdr != NEXTHDR_NONE && nf_ip6_ext_hdr(currenthdr)) {
 		struct ipv6_opt_hdr _hdr;
 		const struct ipv6_opt_hdr *hp;
 
@@ -158,7 +155,7 @@
 
 			}
 
-			hdrlen = (hp->hdrlen+2)<<2;
+			hdrlen = ipv6_authlen(hp);
 			break;
 		case IPPROTO_ESP:
 			if (logflags & NF_LOG_IPOPT) {

diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
deleted file mode 100644
index ca6d386..0000000
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ /dev/null

@@ -1,444 +0,0 @@
-/*
- * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of IPv6 NAT funded by Astaro.
- */
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ipv6.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6.h>
-#include <net/secure_seq.h>
-#include <net/checksum.h>
-#include <net/ip6_checksum.h>
-#include <net/ip6_route.h>
-#include <net/ipv6.h>
-
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-
-static const struct nf_nat_l3proto nf_nat_l3proto_ipv6;
-
-#ifdef CONFIG_XFRM
-static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
-				       const struct nf_conn *ct,
-				       enum ip_conntrack_dir dir,
-				       unsigned long statusbit,
-				       struct flowi *fl)
-{
-	const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
-	struct flowi6 *fl6 = &fl->u.ip6;
-
-	if (ct->status & statusbit) {
-		fl6->daddr = t->dst.u3.in6;
-		if (t->dst.protonum == IPPROTO_TCP ||
-		    t->dst.protonum == IPPROTO_UDP ||
-		    t->dst.protonum == IPPROTO_UDPLITE ||
-		    t->dst.protonum == IPPROTO_DCCP ||
-		    t->dst.protonum == IPPROTO_SCTP)
-			fl6->fl6_dport = t->dst.u.all;
-	}
-
-	statusbit ^= IPS_NAT_MASK;
-
-	if (ct->status & statusbit) {
-		fl6->saddr = t->src.u3.in6;
-		if (t->dst.protonum == IPPROTO_TCP ||
-		    t->dst.protonum == IPPROTO_UDP ||
-		    t->dst.protonum == IPPROTO_UDPLITE ||
-		    t->dst.protonum == IPPROTO_DCCP ||
-		    t->dst.protonum == IPPROTO_SCTP)
-			fl6->fl6_sport = t->src.u.all;
-	}
-}
-#endif
-
-static bool nf_nat_ipv6_in_range(const struct nf_conntrack_tuple *t,
-				 const struct nf_nat_range2 *range)
-{
-	return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 &&
-	       ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0;
-}
-
-static u32 nf_nat_ipv6_secure_port(const struct nf_conntrack_tuple *t,
-				   __be16 dport)
-{
-	return secure_ipv6_port_ephemeral(t->src.u3.ip6, t->dst.u3.ip6, dport);
-}
-
-static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
-				  unsigned int iphdroff,
-				  const struct nf_nat_l4proto *l4proto,
-				  const struct nf_conntrack_tuple *target,
-				  enum nf_nat_manip_type maniptype)
-{
-	struct ipv6hdr *ipv6h;
-	__be16 frag_off;
-	int hdroff;
-	u8 nexthdr;
-
-	if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h)))
-		return false;
-
-	ipv6h = (void *)skb->data + iphdroff;
-	nexthdr = ipv6h->nexthdr;
-	hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
-				  &nexthdr, &frag_off);
-	if (hdroff < 0)
-		goto manip_addr;
-
-	if ((frag_off & htons(~0x7)) == 0 &&
-	    !l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
-				target, maniptype))
-		return false;
-
-	/* must reload, offset might have changed */
-	ipv6h = (void *)skb->data + iphdroff;
-
-manip_addr:
-	if (maniptype == NF_NAT_MANIP_SRC)
-		ipv6h->saddr = target->src.u3.in6;
-	else
-		ipv6h->daddr = target->dst.u3.in6;
-
-	return true;
-}
-
-static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
-				    unsigned int iphdroff, __sum16 *check,
-				    const struct nf_conntrack_tuple *t,
-				    enum nf_nat_manip_type maniptype)
-{
-	const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
-	const struct in6_addr *oldip, *newip;
-
-	if (maniptype == NF_NAT_MANIP_SRC) {
-		oldip = &ipv6h->saddr;
-		newip = &t->src.u3.in6;
-	} else {
-		oldip = &ipv6h->daddr;
-		newip = &t->dst.u3.in6;
-	}
-	inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
-				  newip->s6_addr32, true);
-}
-
-static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
-				    u8 proto, void *data, __sum16 *check,
-				    int datalen, int oldlen)
-{
-	if (skb->ip_summed != CHECKSUM_PARTIAL) {
-		const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
-
-		skb->ip_summed = CHECKSUM_PARTIAL;
-		skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
-			(data - (void *)skb->data);
-		skb->csum_offset = (void *)check - data;
-		*check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
-					  datalen, proto, 0);
-	} else
-		inet_proto_csum_replace2(check, skb,
-					 htons(oldlen), htons(datalen), true);
-}
-
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
-				       struct nf_nat_range2 *range)
-{
-	if (tb[CTA_NAT_V6_MINIP]) {
-		nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP],
-			   sizeof(struct in6_addr));
-		range->flags |= NF_NAT_RANGE_MAP_IPS;
-	}
-
-	if (tb[CTA_NAT_V6_MAXIP])
-		nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP],
-			   sizeof(struct in6_addr));
-	else
-		range->max_addr = range->min_addr;
-
-	return 0;
-}
-#endif
-
-static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
-	.l3proto		= NFPROTO_IPV6,
-	.secure_port		= nf_nat_ipv6_secure_port,
-	.in_range		= nf_nat_ipv6_in_range,
-	.manip_pkt		= nf_nat_ipv6_manip_pkt,
-	.csum_update		= nf_nat_ipv6_csum_update,
-	.csum_recalc		= nf_nat_ipv6_csum_recalc,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	.nlattr_to_range	= nf_nat_ipv6_nlattr_to_range,
-#endif
-#ifdef CONFIG_XFRM
-	.decode_session	= nf_nat_ipv6_decode_session,
-#endif
-};
-
-int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
-				    struct nf_conn *ct,
-				    enum ip_conntrack_info ctinfo,
-				    unsigned int hooknum,
-				    unsigned int hdrlen)
-{
-	struct {
-		struct icmp6hdr	icmp6;
-		struct ipv6hdr	ip6;
-	} *inside;
-	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
-	const struct nf_nat_l4proto *l4proto;
-	struct nf_conntrack_tuple target;
-	unsigned long statusbit;
-
-	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
-
-	if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
-		return 0;
-	if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
-		return 0;
-
-	inside = (void *)skb->data + hdrlen;
-	if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
-		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
-			return 0;
-		if (ct->status & IPS_NAT_MASK)
-			return 0;
-	}
-
-	if (manip == NF_NAT_MANIP_SRC)
-		statusbit = IPS_SRC_NAT;
-	else
-		statusbit = IPS_DST_NAT;
-
-	/* Invert if this is reply direction */
-	if (dir == IP_CT_DIR_REPLY)
-		statusbit ^= IPS_NAT_MASK;
-
-	if (!(ct->status & statusbit))
-		return 1;
-
-	l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, inside->ip6.nexthdr);
-	if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
-				   l4proto, &ct->tuplehash[!dir].tuple, !manip))
-		return 0;
-
-	if (skb->ip_summed != CHECKSUM_PARTIAL) {
-		struct ipv6hdr *ipv6h = ipv6_hdr(skb);
-		inside = (void *)skb->data + hdrlen;
-		inside->icmp6.icmp6_cksum = 0;
-		inside->icmp6.icmp6_cksum =
-			csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
-					skb->len - hdrlen, IPPROTO_ICMPV6,
-					skb_checksum(skb, hdrlen,
-						     skb->len - hdrlen, 0));
-	}
-
-	nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
-	l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, IPPROTO_ICMPV6);
-	if (!nf_nat_ipv6_manip_pkt(skb, 0, l4proto, &target, manip))
-		return 0;
-
-	return 1;
-}
-EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
-
-static unsigned int
-nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
-	       const struct nf_hook_state *state)
-{
-	struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
-	__be16 frag_off;
-	int hdrlen;
-	u8 nexthdr;
-
-	ct = nf_ct_get(skb, &ctinfo);
-	/* Can't track?  It's not due to stress, or conntrack would
-	 * have dropped it.  Hence it's the user's responsibilty to
-	 * packet filter it out, or implement conntrack/NAT for that
-	 * protocol. 8) --RR
-	 */
-	if (!ct)
-		return NF_ACCEPT;
-
-	if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
-		nexthdr = ipv6_hdr(skb)->nexthdr;
-		hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
-					  &nexthdr, &frag_off);
-
-		if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
-			if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
-							     state->hook,
-							     hdrlen))
-				return NF_DROP;
-			else
-				return NF_ACCEPT;
-		}
-	}
-
-	return nf_nat_inet_fn(priv, skb, state);
-}
-
-static unsigned int
-nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
-	       const struct nf_hook_state *state)
-{
-	unsigned int ret;
-	struct in6_addr daddr = ipv6_hdr(skb)->daddr;
-
-	ret = nf_nat_ipv6_fn(priv, skb, state);
-	if (ret != NF_DROP && ret != NF_STOLEN &&
-	    ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
-		skb_dst_drop(skb);
-
-	return ret;
-}
-
-static unsigned int
-nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
-		const struct nf_hook_state *state)
-{
-#ifdef CONFIG_XFRM
-	const struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
-	int err;
-#endif
-	unsigned int ret;
-
-	ret = nf_nat_ipv6_fn(priv, skb, state);
-#ifdef CONFIG_XFRM
-	if (ret != NF_DROP && ret != NF_STOLEN &&
-	    !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
-	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
-		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
-		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
-				      &ct->tuplehash[!dir].tuple.dst.u3) ||
-		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
-		     ct->tuplehash[dir].tuple.src.u.all !=
-		     ct->tuplehash[!dir].tuple.dst.u.all)) {
-			err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
-			if (err < 0)
-				ret = NF_DROP_ERR(err);
-		}
-	}
-#endif
-	return ret;
-}
-
-static unsigned int
-nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
-		     const struct nf_hook_state *state)
-{
-	const struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
-	unsigned int ret;
-	int err;
-
-	ret = nf_nat_ipv6_fn(priv, skb, state);
-	if (ret != NF_DROP && ret != NF_STOLEN &&
-	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
-		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
-		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
-				      &ct->tuplehash[!dir].tuple.src.u3)) {
-			err = ip6_route_me_harder(state->net, skb);
-			if (err < 0)
-				ret = NF_DROP_ERR(err);
-		}
-#ifdef CONFIG_XFRM
-		else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
-			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
-			 ct->tuplehash[dir].tuple.dst.u.all !=
-			 ct->tuplehash[!dir].tuple.src.u.all) {
-			err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
-			if (err < 0)
-				ret = NF_DROP_ERR(err);
-		}
-#endif
-	}
-	return ret;
-}
-
-static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
-	/* Before packet filtering, change destination */
-	{
-		.hook		= nf_nat_ipv6_in,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_PRE_ROUTING,
-		.priority	= NF_IP6_PRI_NAT_DST,
-	},
-	/* After packet filtering, change source */
-	{
-		.hook		= nf_nat_ipv6_out,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_POST_ROUTING,
-		.priority	= NF_IP6_PRI_NAT_SRC,
-	},
-	/* Before packet filtering, change destination */
-	{
-		.hook		= nf_nat_ipv6_local_fn,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_LOCAL_OUT,
-		.priority	= NF_IP6_PRI_NAT_DST,
-	},
-	/* After packet filtering, change source */
-	{
-		.hook		= nf_nat_ipv6_fn,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_LOCAL_IN,
-		.priority	= NF_IP6_PRI_NAT_SRC,
-	},
-};
-
-int nf_nat_l3proto_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops)
-{
-	return nf_nat_register_fn(net, ops, nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
-}
-EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_register_fn);
-
-void nf_nat_l3proto_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
-{
-	nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
-}
-EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_unregister_fn);
-
-static int __init nf_nat_l3proto_ipv6_init(void)
-{
-	int err;
-
-	err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
-	if (err < 0)
-		goto err1;
-	err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv6);
-	if (err < 0)
-		goto err2;
-	return err;
-
-err2:
-	nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
-err1:
-	return err;
-}
-
-static void __exit nf_nat_l3proto_ipv6_exit(void)
-{
-	nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6);
-	nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
-}
-
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("nf-nat-" __stringify(AF_INET6));
-
-module_init(nf_nat_l3proto_ipv6_init);
-module_exit(nf_nat_l3proto_ipv6_exit);

diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
deleted file mode 100644
index 37b1d41..0000000
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ /dev/null

@@ -1,210 +0,0 @@
-/*
- * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6
- * NAT funded by Astaro.
- */
-
-#include <linux/kernel.h>
-#include <linux/atomic.h>
-#include <linux/netdevice.h>
-#include <linux/ipv6.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/addrconf.h>
-#include <net/ipv6.h>
-#include <net/netfilter/ipv6/nf_nat_masquerade.h>
-
-#define MAX_WORK_COUNT	16
-
-static atomic_t v6_worker_count;
-
-unsigned int
-nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
-		       const struct net_device *out)
-{
-	enum ip_conntrack_info ctinfo;
-	struct nf_conn_nat *nat;
-	struct in6_addr src;
-	struct nf_conn *ct;
-	struct nf_nat_range2 newrange;
-
-	ct = nf_ct_get(skb, &ctinfo);
-	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
-			 ctinfo == IP_CT_RELATED_REPLY)));
-
-	if (ipv6_dev_get_saddr(nf_ct_net(ct), out,
-			       &ipv6_hdr(skb)->daddr, 0, &src) < 0)
-		return NF_DROP;
-
-	nat = nf_ct_nat_ext_add(ct);
-	if (nat)
-		nat->masq_index = out->ifindex;
-
-	newrange.flags		= range->flags | NF_NAT_RANGE_MAP_IPS;
-	newrange.min_addr.in6	= src;
-	newrange.max_addr.in6	= src;
-	newrange.min_proto	= range->min_proto;
-	newrange.max_proto	= range->max_proto;
-
-	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
-}
-EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
-
-static int device_cmp(struct nf_conn *ct, void *ifindex)
-{
-	const struct nf_conn_nat *nat = nfct_nat(ct);
-
-	if (!nat)
-		return 0;
-	if (nf_ct_l3num(ct) != NFPROTO_IPV6)
-		return 0;
-	return nat->masq_index == (int)(long)ifindex;
-}
-
-static int masq_device_event(struct notifier_block *this,
-			     unsigned long event, void *ptr)
-{
-	const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct net *net = dev_net(dev);
-
-	if (event == NETDEV_DOWN)
-		nf_ct_iterate_cleanup_net(net, device_cmp,
-					  (void *)(long)dev->ifindex, 0, 0);
-
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block masq_dev_notifier = {
-	.notifier_call	= masq_device_event,
-};
-
-struct masq_dev_work {
-	struct work_struct work;
-	struct net *net;
-	int ifindex;
-};
-
-static void iterate_cleanup_work(struct work_struct *work)
-{
-	struct masq_dev_work *w;
-	long index;
-
-	w = container_of(work, struct masq_dev_work, work);
-
-	index = w->ifindex;
-	nf_ct_iterate_cleanup_net(w->net, device_cmp, (void *)index, 0, 0);
-
-	put_net(w->net);
-	kfree(w);
-	atomic_dec(&v6_worker_count);
-	module_put(THIS_MODULE);
-}
-
-/* ipv6 inet notifier is an atomic notifier, i.e. we cannot
- * schedule.
- *
- * Unfortunately, nf_ct_iterate_cleanup_net can run for a long
- * time if there are lots of conntracks and the system
- * handles high softirq load, so it frequently calls cond_resched
- * while iterating the conntrack table.
- *
- * So we defer nf_ct_iterate_cleanup_net walk to the system workqueue.
- *
- * As we can have 'a lot' of inet_events (depending on amount
- * of ipv6 addresses being deleted), we also need to add an upper
- * limit to the number of queued work items.
- */
-static int masq_inet6_event(struct notifier_block *this,
-			    unsigned long event, void *ptr)
-{
-	struct inet6_ifaddr *ifa = ptr;
-	const struct net_device *dev;
-	struct masq_dev_work *w;
-	struct net *net;
-
-	if (event != NETDEV_DOWN ||
-	    atomic_read(&v6_worker_count) >= MAX_WORK_COUNT)
-		return NOTIFY_DONE;
-
-	dev = ifa->idev->dev;
-	net = maybe_get_net(dev_net(dev));
-	if (!net)
-		return NOTIFY_DONE;
-
-	if (!try_module_get(THIS_MODULE))
-		goto err_module;
-
-	w = kmalloc(sizeof(*w), GFP_ATOMIC);
-	if (w) {
-		atomic_inc(&v6_worker_count);
-
-		INIT_WORK(&w->work, iterate_cleanup_work);
-		w->ifindex = dev->ifindex;
-		w->net = net;
-		schedule_work(&w->work);
-
-		return NOTIFY_DONE;
-	}
-
-	module_put(THIS_MODULE);
- err_module:
-	put_net(net);
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block masq_inet6_notifier = {
-	.notifier_call	= masq_inet6_event,
-};
-
-static int masq_refcnt;
-static DEFINE_MUTEX(masq_mutex);
-
-int nf_nat_masquerade_ipv6_register_notifier(void)
-{
-	int ret = 0;
-
-	mutex_lock(&masq_mutex);
-	/* check if the notifier is already set */
-	if (++masq_refcnt > 1)
-		goto out_unlock;
-
-	ret = register_netdevice_notifier(&masq_dev_notifier);
-	if (ret)
-		goto err_dec;
-
-	ret = register_inet6addr_notifier(&masq_inet6_notifier);
-	if (ret)
-		goto err_unregister;
-
-	mutex_unlock(&masq_mutex);
-	return ret;
-
-err_unregister:
-	unregister_netdevice_notifier(&masq_dev_notifier);
-err_dec:
-	masq_refcnt--;
-out_unlock:
-	mutex_unlock(&masq_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier);
-
-void nf_nat_masquerade_ipv6_unregister_notifier(void)
-{
-	mutex_lock(&masq_mutex);
-	/* check if the notifier still has clients */
-	if (--masq_refcnt > 0)
-		goto out_unlock;
-
-	unregister_inet6addr_notifier(&masq_inet6_notifier);
-	unregister_netdevice_notifier(&masq_dev_notifier);
-out_unlock:
-	mutex_unlock(&masq_mutex);
-}
-EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier);

diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
deleted file mode 100644
index d9bf42b..0000000
--- a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
+++ /dev/null

@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2011 Patrick Mchardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on Rusty Russell's IPv4 ICMP NAT code. Development of IPv6
- * NAT funded by Astaro.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/icmpv6.h>
-
-#include <linux/netfilter.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-
-static bool
-icmpv6_in_range(const struct nf_conntrack_tuple *tuple,
-		enum nf_nat_manip_type maniptype,
-		const union nf_conntrack_man_proto *min,
-		const union nf_conntrack_man_proto *max)
-{
-	return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
-	       ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
-}
-
-static void
-icmpv6_unique_tuple(const struct nf_nat_l3proto *l3proto,
-		    struct nf_conntrack_tuple *tuple,
-		    const struct nf_nat_range2 *range,
-		    enum nf_nat_manip_type maniptype,
-		    const struct nf_conn *ct)
-{
-	static u16 id;
-	unsigned int range_size;
-	unsigned int i;
-
-	range_size = ntohs(range->max_proto.icmp.id) -
-		     ntohs(range->min_proto.icmp.id) + 1;
-
-	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
-		range_size = 0xffff;
-
-	for (i = 0; ; ++id) {
-		tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) +
-					     (id % range_size));
-		if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
-			return;
-	}
-}
-
-static bool
-icmpv6_manip_pkt(struct sk_buff *skb,
-		 const struct nf_nat_l3proto *l3proto,
-		 unsigned int iphdroff, unsigned int hdroff,
-		 const struct nf_conntrack_tuple *tuple,
-		 enum nf_nat_manip_type maniptype)
-{
-	struct icmp6hdr *hdr;
-
-	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
-		return false;
-
-	hdr = (struct icmp6hdr *)(skb->data + hdroff);
-	l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum,
-			     tuple, maniptype);
-	if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
-	    hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
-		inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
-					 hdr->icmp6_identifier,
-					 tuple->src.u.icmp.id, false);
-		hdr->icmp6_identifier = tuple->src.u.icmp.id;
-	}
-	return true;
-}
-
-const struct nf_nat_l4proto nf_nat_l4proto_icmpv6 = {
-	.l4proto		= IPPROTO_ICMPV6,
-	.manip_pkt		= icmpv6_manip_pkt,
-	.in_range		= icmpv6_in_range,
-	.unique_tuple		= icmpv6_unique_tuple,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	.nlattr_to_range	= nf_nat_l4proto_nlattr_to_range,
-#endif
-};

diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 2485840..5fae66f 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -131,6 +128,7 @@
 
 void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
 {
+	struct net_device *br_indev __maybe_unused;
 	struct sk_buff *nskb;
 	struct tcphdr _otcph;
 	const struct tcphdr *otcph;
@@ -197,15 +195,18 @@
 	 * build the eth header using the original destination's MAC as the
 	 * source, and send the RST packet directly.
 	 */
-	if (oldskb->nf_bridge) {
+	br_indev = nf_bridge_get_physindev(oldskb);
+	if (br_indev) {
 		struct ethhdr *oeth = eth_hdr(oldskb);
 
-		nskb->dev = nf_bridge_get_physindev(oldskb);
+		nskb->dev = br_indev;
 		nskb->protocol = htons(ETH_P_IPV6);
 		ip6h->payload_len = htons(sizeof(struct tcphdr));
 		if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
-				    oeth->h_source, oeth->h_dest, nskb->len) < 0)
+				    oeth->h_source, oeth->h_dest, nskb->len) < 0) {
+			kfree_skb(nskb);
 			return;
+		}
 		dev_queue_xmit(nskb);
 	} else
 #endif
@@ -229,6 +230,9 @@
 	if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
 		return false;
 
+	if (!nf_reject_verify_csum(proto))
+		return true;
+
 	return nf_ip6_checksum(skb, hook, thoff, proto) == 0;
 }
 

diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c
index f14de4b..b9df879 100644
--- a/net/ipv6/netfilter/nf_socket_ipv6.c
+++ b/net/ipv6/netfilter/nf_socket_ipv6.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2007-2008 BalaBit IT Ltd.
  * Author: Krisztian Kovacs
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
@@ -16,7 +12,6 @@
 #include <net/sock.h>
 #include <net/inet_sock.h>
 #include <net/inet6_hashtables.h>
-#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 #include <net/netfilter/nf_socket.h>
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 #include <net/netfilter/nf_conntrack.h>

diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c
index 5dfd33a..34d51cd 100644
--- a/net/ipv6/netfilter/nf_tproxy_ipv6.c
+++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <net/netfilter/nf_tproxy.h>
 #include <linux/module.h>
 #include <net/inet6_hashtables.h>

diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
deleted file mode 100644
index 8a081ad..0000000
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ /dev/null

@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2012 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables_ipv6.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/ipv6.h>
-
-static unsigned int nft_nat_do_chain(void *priv,
-				     struct sk_buff *skb,
-				     const struct nf_hook_state *state)
-{
-	struct nft_pktinfo pkt;
-
-	nft_set_pktinfo(&pkt, skb, state);
-	nft_set_pktinfo_ipv6(&pkt, skb);
-
-	return nft_do_chain(&pkt, priv);
-}
-
-static int nft_nat_ipv6_reg(struct net *net, const struct nf_hook_ops *ops)
-{
-	return nf_nat_l3proto_ipv6_register_fn(net, ops);
-}
-
-static void nft_nat_ipv6_unreg(struct net *net, const struct nf_hook_ops *ops)
-{
-	nf_nat_l3proto_ipv6_unregister_fn(net, ops);
-}
-
-static const struct nft_chain_type nft_chain_nat_ipv6 = {
-	.name		= "nat",
-	.type		= NFT_CHAIN_T_NAT,
-	.family		= NFPROTO_IPV6,
-	.owner		= THIS_MODULE,
-	.hook_mask	= (1 << NF_INET_PRE_ROUTING) |
-			  (1 << NF_INET_POST_ROUTING) |
-			  (1 << NF_INET_LOCAL_OUT) |
-			  (1 << NF_INET_LOCAL_IN),
-	.hooks		= {
-		[NF_INET_PRE_ROUTING]	= nft_nat_do_chain,
-		[NF_INET_POST_ROUTING]	= nft_nat_do_chain,
-		[NF_INET_LOCAL_OUT]	= nft_nat_do_chain,
-		[NF_INET_LOCAL_IN]	= nft_nat_do_chain,
-	},
-	.ops_register		= nft_nat_ipv6_reg,
-	.ops_unregister		= nft_nat_ipv6_unreg,
-};
-
-static int __init nft_chain_nat_ipv6_init(void)
-{
-	nft_register_chain_type(&nft_chain_nat_ipv6);
-
-	return 0;
-}
-
-static void __exit nft_chain_nat_ipv6_exit(void)
-{
-	nft_unregister_chain_type(&nft_chain_nat_ipv6);
-}
-
-module_init(nft_chain_nat_ipv6_init);
-module_exit(nft_chain_nat_ipv6_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
-MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat");

diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
deleted file mode 100644
index da3f1f8..0000000
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ /dev/null

@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/skbuff.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6.h>
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables_ipv6.h>
-#include <net/route.h>
-
-static unsigned int nf_route_table_hook(void *priv,
-					struct sk_buff *skb,
-					const struct nf_hook_state *state)
-{
-	unsigned int ret;
-	struct nft_pktinfo pkt;
-	struct in6_addr saddr, daddr;
-	u_int8_t hop_limit;
-	u32 mark, flowlabel;
-	int err;
-
-	nft_set_pktinfo(&pkt, skb, state);
-	nft_set_pktinfo_ipv6(&pkt, skb);
-
-	/* save source/dest address, mark, hoplimit, flowlabel, priority */
-	memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
-	memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr));
-	mark = skb->mark;
-	hop_limit = ipv6_hdr(skb)->hop_limit;
-
-	/* flowlabel and prio (includes version, which shouldn't change either */
-	flowlabel = *((u32 *)ipv6_hdr(skb));
-
-	ret = nft_do_chain(&pkt, priv);
-	if (ret != NF_DROP && ret != NF_STOLEN &&
-	    (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
-	     memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
-	     skb->mark != mark ||
-	     ipv6_hdr(skb)->hop_limit != hop_limit ||
-	     flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
-		err = ip6_route_me_harder(state->net, skb);
-		if (err < 0)
-			ret = NF_DROP_ERR(err);
-	}
-
-	return ret;
-}
-
-static const struct nft_chain_type nft_chain_route_ipv6 = {
-	.name		= "route",
-	.type		= NFT_CHAIN_T_ROUTE,
-	.family		= NFPROTO_IPV6,
-	.owner		= THIS_MODULE,
-	.hook_mask	= (1 << NF_INET_LOCAL_OUT),
-	.hooks		= {
-		[NF_INET_LOCAL_OUT]	= nf_route_table_hook,
-	},
-};
-
-static int __init nft_chain_route_init(void)
-{
-	nft_register_chain_type(&nft_chain_route_ipv6);
-
-	return 0;
-}
-
-static void __exit nft_chain_route_exit(void)
-{
-	nft_unregister_chain_type(&nft_chain_route_ipv6);
-}
-
-module_init(nft_chain_route_init);
-module_exit(nft_chain_route_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(AF_INET6, "route");

diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c
index d8b5b60..2af3220 100644
--- a/net/ipv6/netfilter/nft_dup_ipv6.c
+++ b/net/ipv6/netfilter/nft_dup_ipv6.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
  */
 
 #include <linux/kernel.h>

diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 36be3cf..7ece86a 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c

@@ -1,8 +1,4 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0-only
 
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -59,7 +55,6 @@
 				struct ipv6hdr *iph)
 {
 	const struct net_device *dev = NULL;
-	const struct nf_ipv6_ops *v6ops;
 	int route_err, addrtype;
 	struct rt6_info *rt;
 	struct flowi6 fl6 = {
@@ -68,10 +63,6 @@
 	};
 	u32 ret = 0;
 
-	v6ops = nf_get_ipv6_ops();
-	if (!v6ops)
-		return RTN_UNREACHABLE;
-
 	if (priv->flags & NFTA_FIB_F_IIF)
 		dev = nft_in(pkt);
 	else if (priv->flags & NFTA_FIB_F_OIF)
@@ -79,10 +70,10 @@
 
 	nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph);
 
-	if (dev && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
+	if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
 		ret = RTN_LOCAL;
 
-	route_err = v6ops->route(nft_net(pkt), (struct dst_entry **)&rt,
+	route_err = nf_ip6_route(nft_net(pkt), (struct dst_entry **)&rt,
 				 flowi6_to_flowi(&fl6), false);
 	if (route_err)
 		goto err;
@@ -174,8 +165,7 @@
 
 	if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
 	    nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
-		nft_fib_store_result(dest, priv, pkt,
-				     nft_in(pkt)->ifindex);
+		nft_fib_store_result(dest, priv, nft_in(pkt));
 		return;
 	}
 
@@ -192,18 +182,7 @@
 	if (oif && oif != rt->rt6i_idev->dev)
 		goto put_rt_err;
 
-	switch (priv->result) {
-	case NFT_FIB_RESULT_OIF:
-		*dest = rt->rt6i_idev->dev->ifindex;
-		break;
-	case NFT_FIB_RESULT_OIFNAME:
-		strncpy((char *)dest, rt->rt6i_idev->dev->name, IFNAMSIZ);
-		break;
-	default:
-		WARN_ON_ONCE(1);
-		break;
-	}
-
+	nft_fib_store_result(dest, priv, rt->rt6i_idev->dev);
  put_rt_err:
 	ip6_rt_put(rt);
 }

diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
deleted file mode 100644
index e06c82e..0000000
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ /dev/null

@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nft_masq.h>
-#include <net/netfilter/ipv6/nf_nat_masquerade.h>
-
-static void nft_masq_ipv6_eval(const struct nft_expr *expr,
-			       struct nft_regs *regs,
-			       const struct nft_pktinfo *pkt)
-{
-	struct nft_masq *priv = nft_expr_priv(expr);
-	struct nf_nat_range2 range;
-
-	memset(&range, 0, sizeof(range));
-	range.flags = priv->flags;
-	if (priv->sreg_proto_min) {
-		range.min_proto.all = (__force __be16)nft_reg_load16(
-			&regs->data[priv->sreg_proto_min]);
-		range.max_proto.all = (__force __be16)nft_reg_load16(
-			&regs->data[priv->sreg_proto_max]);
-	}
-	regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range,
-						    nft_out(pkt));
-}
-
-static void
-nft_masq_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
-{
-	nf_ct_netns_put(ctx->net, NFPROTO_IPV6);
-}
-
-static struct nft_expr_type nft_masq_ipv6_type;
-static const struct nft_expr_ops nft_masq_ipv6_ops = {
-	.type		= &nft_masq_ipv6_type,
-	.size		= NFT_EXPR_SIZE(sizeof(struct nft_masq)),
-	.eval		= nft_masq_ipv6_eval,
-	.init		= nft_masq_init,
-	.destroy	= nft_masq_ipv6_destroy,
-	.dump		= nft_masq_dump,
-	.validate	= nft_masq_validate,
-};
-
-static struct nft_expr_type nft_masq_ipv6_type __read_mostly = {
-	.family		= NFPROTO_IPV6,
-	.name		= "masq",
-	.ops		= &nft_masq_ipv6_ops,
-	.policy		= nft_masq_policy,
-	.maxattr	= NFTA_MASQ_MAX,
-	.owner		= THIS_MODULE,
-};
-
-static int __init nft_masq_ipv6_module_init(void)
-{
-	int ret;
-
-	ret = nft_register_expr(&nft_masq_ipv6_type);
-	if (ret < 0)
-		return ret;
-
-	ret = nf_nat_masquerade_ipv6_register_notifier();
-	if (ret)
-		nft_unregister_expr(&nft_masq_ipv6_type);
-
-	return ret;
-}
-
-static void __exit nft_masq_ipv6_module_exit(void)
-{
-	nft_unregister_expr(&nft_masq_ipv6_type);
-	nf_nat_masquerade_ipv6_unregister_notifier();
-}
-
-module_init(nft_masq_ipv6_module_init);
-module_exit(nft_masq_ipv6_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
-MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq");

diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c
deleted file mode 100644
index 7426986..0000000
--- a/net/ipv6/netfilter/nft_redir_ipv6.c
+++ /dev/null

@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nft_redir.h>
-#include <net/netfilter/nf_nat_redirect.h>
-
-static void nft_redir_ipv6_eval(const struct nft_expr *expr,
-				struct nft_regs *regs,
-				const struct nft_pktinfo *pkt)
-{
-	struct nft_redir *priv = nft_expr_priv(expr);
-	struct nf_nat_range2 range;
-
-	memset(&range, 0, sizeof(range));
-	if (priv->sreg_proto_min) {
-		range.min_proto.all = (__force __be16)nft_reg_load16(
-			&regs->data[priv->sreg_proto_min]);
-		range.max_proto.all = (__force __be16)nft_reg_load16(
-			&regs->data[priv->sreg_proto_max]);
-		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
-	}
-
-	range.flags |= priv->flags;
-
-	regs->verdict.code =
-		nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt));
-}
-
-static void
-nft_redir_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
-{
-	nf_ct_netns_put(ctx->net, NFPROTO_IPV6);
-}
-
-static struct nft_expr_type nft_redir_ipv6_type;
-static const struct nft_expr_ops nft_redir_ipv6_ops = {
-	.type		= &nft_redir_ipv6_type,
-	.size		= NFT_EXPR_SIZE(sizeof(struct nft_redir)),
-	.eval		= nft_redir_ipv6_eval,
-	.init		= nft_redir_init,
-	.destroy	= nft_redir_ipv6_destroy,
-	.dump		= nft_redir_dump,
-	.validate	= nft_redir_validate,
-};
-
-static struct nft_expr_type nft_redir_ipv6_type __read_mostly = {
-	.family		= NFPROTO_IPV6,
-	.name		= "redir",
-	.ops		= &nft_redir_ipv6_ops,
-	.policy		= nft_redir_policy,
-	.maxattr	= NFTA_REDIR_MAX,
-	.owner		= THIS_MODULE,
-};
-
-static int __init nft_redir_ipv6_module_init(void)
-{
-	return nft_register_expr(&nft_redir_ipv6_type);
-}
-
-static void __exit nft_redir_ipv6_module_exit(void)
-{
-	nft_unregister_expr(&nft_redir_ipv6_type);
-}
-
-module_init(nft_redir_ipv6_module_init);
-module_exit(nft_redir_ipv6_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
-MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir");

diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
index 057deea..680a28c 100644
--- a/net/ipv6/netfilter/nft_reject_ipv6.c
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
  * Copyright (c) 2013 Eric Leblond <eric@regit.org>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 

diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 4fe7c90..af36acc 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * IPv6 library code, needed by static components when full IPv6 support is
  * not configured or static.  These functions are needed by GSO/GRO implementation.
@@ -10,15 +11,25 @@
 #include <net/secure_seq.h>
 #include <linux/netfilter.h>
 
-static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
+static u32 __ipv6_select_ident(struct net *net,
 			       const struct in6_addr *dst,
 			       const struct in6_addr *src)
 {
+	const struct {
+		struct in6_addr dst;
+		struct in6_addr src;
+	} __aligned(SIPHASH_ALIGNMENT) combined = {
+		.dst = *dst,
+		.src = *src,
+	};
 	u32 hash, id;
 
-	hash = __ipv6_addr_jhash(dst, hashrnd);
-	hash = __ipv6_addr_jhash(src, hash);
-	hash ^= net_hash_mix(net);
+	/* Note the following code is not safe, but this is okay. */
+	if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
+		get_random_bytes(&net->ipv4.ip_id_key,
+				 sizeof(net->ipv4.ip_id_key));
+
+	hash = siphash(&combined, sizeof(combined), &net->ipv4.ip_id_key);
 
 	/* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve,
 	 * set the hight order instead thus minimizing possible future
@@ -41,7 +52,6 @@
  */
 __be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
 {
-	static u32 ip6_proxy_idents_hashrnd __read_mostly;
 	struct in6_addr buf[2];
 	struct in6_addr *addrs;
 	u32 id;
@@ -53,11 +63,7 @@
 	if (!addrs)
 		return 0;
 
-	net_get_random_once(&ip6_proxy_idents_hashrnd,
-			    sizeof(ip6_proxy_idents_hashrnd));
-
-	id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd,
-				 &addrs[1], &addrs[0]);
+	id = __ipv6_select_ident(net, &addrs[1], &addrs[0]);
 	return htonl(id);
 }
 EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
@@ -66,12 +72,9 @@
 			 const struct in6_addr *daddr,
 			 const struct in6_addr *saddr)
 {
-	static u32 ip6_idents_hashrnd __read_mostly;
 	u32 id;
 
-	net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
-
-	id = __ipv6_select_ident(net, ip6_idents_hashrnd, daddr, saddr);
+	id = __ipv6_select_ident(net, daddr, saddr);
 	return htonl(id);
 }
 EXPORT_SYMBOL(ipv6_select_ident);

diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 4c04bcc..98ac32b 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -5,17 +6,11 @@
  *
  *		"Ping" sockets
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Based on ipv4/ping.c code.
  *
  * Authors:	Lorenzo Colitti (IPv6 support)
  *		Vasiliy Kulikov / Openwall (IPv4 implementation, for Linux 2.6),
  *		Pavel Kankovsky (IPv4 implementation, for Linux 2.4.32)
- *
  */
 
 #include <net/addrconf.h>
@@ -228,7 +223,7 @@
 	return 0;
 }
 
-static void __net_init ping_v6_proc_exit_net(struct net *net)
+static void __net_exit ping_v6_proc_exit_net(struct net *net)
 {
 	remove_proc_entry("icmp6", net->proc_net);
 }

diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 2356b4a..bbff3e0 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -9,11 +10,6 @@
  *
  * Authors:	David S. Miller (davem@caip.rutgers.edu)
  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  */
 #include <linux/socket.h>
 #include <linux/net.h>
@@ -48,8 +44,8 @@
 	seq_printf(seq, "RAW6: inuse %d\n",
 		       sock_prot_inuse_get(net, &rawv6_prot));
 	seq_printf(seq, "FRAG6: inuse %u memory %lu\n",
-		   atomic_read(&net->ipv6.frags.rhashtable.nelems),
-		   frag_mem_limit(&net->ipv6.frags));
+		   atomic_read(&net->ipv6.fqdir->rhashtable.nelems),
+		   frag_mem_limit(net->ipv6.fqdir));
 	return 0;
 }
 

diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index b5d54d4..d4b1806 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -6,11 +7,6 @@
  *		PF_INET6 protocol dispatch tables.
  *
  * Authors:	Pedro Roque	<roque@di.fc.ul.pt>
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  */
 
 /*

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 5e0efd3..a77f6b7 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	RAW sockets for IPv6
  *	Linux INET6 implementation
@@ -11,11 +12,6 @@
  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
  *	YOSHIFUJI,H.@USAGI	:	raw checksum (RFC2292(bis) compliance)
  *	Kazunori MIYAZAWA @USAGI:	change process style to use ip6_append_data
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 #include <linux/errno.h>
@@ -86,9 +82,8 @@
 			    !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
 				continue;
 
-			if (sk->sk_bound_dev_if &&
-			    sk->sk_bound_dev_if != dif &&
-			    sk->sk_bound_dev_if != sdif)
+			if (!raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if,
+						 dif, sdif))
 				continue;
 
 			if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
@@ -220,7 +215,7 @@
 
 			/* Not releasing hash table! */
 			if (clone) {
-				nf_reset(clone);
+				nf_reset_ct(clone);
 				rawv6_rcv(sk, clone);
 			}
 		}
@@ -288,7 +283,9 @@
 			/* Binding to link-local address requires an interface */
 			if (!sk->sk_bound_dev_if)
 				goto out_unlock;
+		}
 
+		if (sk->sk_bound_dev_if) {
 			err = -ENODEV;
 			dev = dev_get_by_index_rcu(sock_net(sk),
 						   sk->sk_bound_dev_if);
@@ -649,7 +646,7 @@
 
 	skb->protocol = htons(ETH_P_IPV6);
 	skb->priority = sk->sk_priority;
-	skb->mark = sk->sk_mark;
+	skb->mark = sockc->mark;
 	skb->tstamp = sockc->transmit_time;
 
 	skb_put(skb, length);
@@ -658,6 +655,8 @@
 
 	skb->ip_summed = CHECKSUM_NONE;
 
+	skb_setup_tx_timestamp(skb, sockc->tsflags);
+
 	if (flags & MSG_CONFIRM)
 		skb_set_dst_pending_confirm(skb, 1);
 
@@ -780,6 +779,7 @@
 	struct flowi6 fl6;
 	struct ipcm6_cookie ipc6;
 	int addr_len = msg->msg_namelen;
+	int hdrincl;
 	u16 proto;
 	int err;
 
@@ -793,6 +793,13 @@
 	if (msg->msg_flags & MSG_OOB)
 		return -EOPNOTSUPP;
 
+	/* hdrincl should be READ_ONCE(inet->hdrincl)
+	 * but READ_ONCE() doesn't work with bit fields.
+	 * Doing this indirectly yields the same result.
+	 */
+	hdrincl = inet->hdrincl;
+	hdrincl = READ_ONCE(hdrincl);
+
 	/*
 	 *	Get and verify the address.
 	 */
@@ -803,6 +810,7 @@
 
 	ipcm6_init(&ipc6);
 	ipc6.sockc.tsflags = sk->sk_tsflags;
+	ipc6.sockc.mark = sk->sk_mark;
 
 	if (sin6) {
 		if (addr_len < SIN6_LEN_RFC2133)
@@ -827,7 +835,7 @@
 			fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
 			if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
 				flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
-				if (!flowlabel)
+				if (IS_ERR(flowlabel))
 					return -EINVAL;
 			}
 		}
@@ -869,7 +877,7 @@
 		}
 		if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
-			if (!flowlabel)
+			if (IS_ERR(flowlabel))
 				return -EINVAL;
 		}
 		if (!(opt->opt_nflen|opt->opt_flen))
@@ -884,11 +892,15 @@
 	opt = ipv6_fixup_options(&opt_space, opt);
 
 	fl6.flowi6_proto = proto;
-	rfv.msg = msg;
-	rfv.hlen = 0;
-	err = rawv6_probe_proto_opt(&rfv, &fl6);
-	if (err)
-		goto out;
+	fl6.flowi6_mark = ipc6.sockc.mark;
+
+	if (!hdrincl) {
+		rfv.msg = msg;
+		rfv.hlen = 0;
+		err = rawv6_probe_proto_opt(&rfv, &fl6);
+		if (err)
+			goto out;
+	}
 
 	if (!ipv6_addr_any(daddr))
 		fl6.daddr = *daddr;
@@ -905,7 +917,7 @@
 		fl6.flowi6_oif = np->ucast_oif;
 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
-	if (inet->hdrincl)
+	if (hdrincl)
 		fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH;
 
 	if (ipc6.tclass < 0)
@@ -928,7 +940,7 @@
 		goto do_confirm;
 
 back_from_confirm:
-	if (inet->hdrincl)
+	if (hdrincl)
 		err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst,
 					msg->msg_flags, &ipc6.sockc);
 	else {
@@ -1355,6 +1367,7 @@
 	.getname	   = inet6_getname,
 	.poll		   = datagram_poll,		/* ok		*/
 	.ioctl		   = inet6_ioctl,		/* must change  */
+	.gettstamp	   = sock_gettstamp,
 	.listen		   = sock_no_listen,		/* ok		*/
 	.shutdown	   = inet_shutdown,		/* ok		*/
 	.setsockopt	   = sock_common_setsockopt,	/* ok		*/

diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index d3fd2d7..1f5d4d1 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	IPv6 fragment reassembly
  *	Linux INET6 implementation
@@ -6,11 +7,6 @@
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *
  *	Based on: net/ipv4/ip_fragment.c
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 /*
@@ -69,19 +65,17 @@
 
 static struct inet_frags ip6_frags;
 
-static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
-			  struct net_device *dev);
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
+			  struct sk_buff *prev_tail, struct net_device *dev);
 
 static void ip6_frag_expire(struct timer_list *t)
 {
 	struct inet_frag_queue *frag = from_timer(frag, t, timer);
 	struct frag_queue *fq;
-	struct net *net;
 
 	fq = container_of(frag, struct frag_queue, q);
-	net = container_of(fq->q.net, struct net, ipv6.frags);
 
-	ip6frag_expire_frag_queue(net, fq);
+	ip6frag_expire_frag_queue(fq->q.fqdir->net, fq);
 }
 
 static struct frag_queue *
@@ -100,7 +94,7 @@
 					    IPV6_ADDR_LINKLOCAL)))
 		key.iif = 0;
 
-	q = inet_frag_find(&net->ipv6.frags, &key);
+	q = inet_frag_find(net->ipv6.fqdir, &key);
 	if (!q)
 		return NULL;
 
@@ -111,21 +105,26 @@
 			  struct frag_hdr *fhdr, int nhoff,
 			  u32 *prob_offset)
 {
-	struct sk_buff *prev, *next;
-	struct net_device *dev;
-	int offset, end, fragsize;
 	struct net *net = dev_net(skb_dst(skb)->dev);
+	int offset, end, fragsize;
+	struct sk_buff *prev_tail;
+	struct net_device *dev;
+	int err = -ENOENT;
 	u8 ecn;
 
 	if (fq->q.flags & INET_FRAG_COMPLETE)
 		goto err;
 
+	err = -EINVAL;
 	offset = ntohs(fhdr->frag_off) & ~0x7;
 	end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
 			((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
 
 	if ((unsigned int)end > IPV6_MAXPLEN) {
 		*prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb);
+		/* note that if prob_offset is set, the skb is freed elsewhere,
+		 * we do not free it here.
+		 */
 		return -1;
 	}
 
@@ -145,7 +144,7 @@
 		 */
 		if (end < fq->q.len ||
 		    ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len))
-			goto err;
+			goto discard_fq;
 		fq->q.flags |= INET_FRAG_LAST_IN;
 		fq->q.len = end;
 	} else {
@@ -162,75 +161,40 @@
 		if (end > fq->q.len) {
 			/* Some bits beyond end -> corruption. */
 			if (fq->q.flags & INET_FRAG_LAST_IN)
-				goto err;
+				goto discard_fq;
 			fq->q.len = end;
 		}
 	}
 
 	if (end == offset)
-		goto err;
+		goto discard_fq;
 
+	err = -ENOMEM;
 	/* Point into the IP datagram 'data' part. */
 	if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
-		goto err;
-
-	if (pskb_trim_rcsum(skb, end - offset))
-		goto err;
-
-	/* Find out which fragments are in front and at the back of us
-	 * in the chain of fragments so far.  We must know where to put
-	 * this fragment, right?
-	 */
-	prev = fq->q.fragments_tail;
-	if (!prev || prev->ip_defrag_offset < offset) {
-		next = NULL;
-		goto found;
-	}
-	prev = NULL;
-	for (next = fq->q.fragments; next != NULL; next = next->next) {
-		if (next->ip_defrag_offset >= offset)
-			break;	/* bingo! */
-		prev = next;
-	}
-
-found:
-	/* RFC5722, Section 4, amended by Errata ID : 3089
-	 *                          When reassembling an IPv6 datagram, if
-	 *   one or more its constituent fragments is determined to be an
-	 *   overlapping fragment, the entire datagram (and any constituent
-	 *   fragments) MUST be silently discarded.
-	 */
-
-	/* Check for overlap with preceding fragment. */
-	if (prev &&
-	    (prev->ip_defrag_offset + prev->len) > offset)
 		goto discard_fq;
 
-	/* Look for overlap with succeeding segment. */
-	if (next && next->ip_defrag_offset < end)
+	err = pskb_trim_rcsum(skb, end - offset);
+	if (err)
 		goto discard_fq;
 
-	/* Note : skb->ip_defrag_offset and skb->dev share the same location */
+	/* Note : skb->rbnode and skb->dev share the same location. */
 	dev = skb->dev;
-	if (dev)
-		fq->iif = dev->ifindex;
 	/* Makes sure compiler wont do silly aliasing games */
 	barrier();
-	skb->ip_defrag_offset = offset;
 
-	/* Insert this fragment in the chain of fragments. */
-	skb->next = next;
-	if (!next)
-		fq->q.fragments_tail = skb;
-	if (prev)
-		prev->next = skb;
-	else
-		fq->q.fragments = skb;
+	prev_tail = fq->q.fragments_tail;
+	err = inet_frag_queue_insert(&fq->q, skb, offset, end);
+	if (err)
+		goto insert_error;
+
+	if (dev)
+		fq->iif = dev->ifindex;
 
 	fq->q.stamp = skb->tstamp;
 	fq->q.meat += skb->len;
 	fq->ecn |= ecn;
-	add_frag_mem_limit(fq->q.net, skb->truesize);
+	add_frag_mem_limit(fq->q.fqdir, skb->truesize);
 
 	fragsize = -skb_network_offset(skb) + skb->len;
 	if (fragsize > fq->q.max_size)
@@ -246,44 +210,48 @@
 
 	if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
 	    fq->q.meat == fq->q.len) {
-		int res;
 		unsigned long orefdst = skb->_skb_refdst;
 
 		skb->_skb_refdst = 0UL;
-		res = ip6_frag_reasm(fq, prev, dev);
+		err = ip6_frag_reasm(fq, skb, prev_tail, dev);
 		skb->_skb_refdst = orefdst;
-		return res;
+		return err;
 	}
 
 	skb_dst_drop(skb);
-	return -1;
+	return -EINPROGRESS;
 
+insert_error:
+	if (err == IPFRAG_DUP) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+	err = -EINVAL;
+	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+			IPSTATS_MIB_REASM_OVERLAPS);
 discard_fq:
 	inet_frag_kill(&fq->q);
-err:
 	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 			IPSTATS_MIB_REASMFAILS);
+err:
 	kfree_skb(skb);
-	return -1;
+	return err;
 }
 
 /*
  *	Check if this packet is complete.
- *	Returns NULL on failure by any reason, and pointer
- *	to current nexthdr field in reassembled frame.
  *
  *	It is called with locked fq, and caller must check that
  *	queue is eligible for reassembly i.e. it is not COMPLETE,
  *	the last and the first frames arrived and all the bits are here.
  */
-static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
-			  struct net_device *dev)
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
+			  struct sk_buff *prev_tail, struct net_device *dev)
 {
-	struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
-	struct sk_buff *fp, *head = fq->q.fragments;
-	int    payload_len, delta;
+	struct net *net = fq->q.fqdir->net;
 	unsigned int nhoff;
-	int sum_truesize;
+	void *reasm_data;
+	int payload_len;
 	u8 ecn;
 
 	inet_frag_kill(&fq->q);
@@ -292,127 +260,47 @@
 	if (unlikely(ecn == 0xff))
 		goto out_fail;
 
-	/* Make the one we just received the head. */
-	if (prev) {
-		head = prev->next;
-		fp = skb_clone(head, GFP_ATOMIC);
+	reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
+	if (!reasm_data)
+		goto out_oom;
 
-		if (!fp)
-			goto out_oom;
-
-		fp->next = head->next;
-		if (!fp->next)
-			fq->q.fragments_tail = fp;
-		prev->next = fp;
-
-		skb_morph(head, fq->q.fragments);
-		head->next = fq->q.fragments->next;
-
-		consume_skb(fq->q.fragments);
-		fq->q.fragments = head;
-	}
-
-	WARN_ON(head == NULL);
-	WARN_ON(head->ip_defrag_offset != 0);
-
-	/* Unfragmented part is taken from the first segment. */
-	payload_len = ((head->data - skb_network_header(head)) -
+	payload_len = ((skb->data - skb_network_header(skb)) -
 		       sizeof(struct ipv6hdr) + fq->q.len -
 		       sizeof(struct frag_hdr));
 	if (payload_len > IPV6_MAXPLEN)
 		goto out_oversize;
 
-	delta = - head->truesize;
-
-	/* Head of list must not be cloned. */
-	if (skb_unclone(head, GFP_ATOMIC))
-		goto out_oom;
-
-	delta += head->truesize;
-	if (delta)
-		add_frag_mem_limit(fq->q.net, delta);
-
-	/* If the first fragment is fragmented itself, we split
-	 * it to two chunks: the first with data and paged part
-	 * and the second, holding only fragments. */
-	if (skb_has_frag_list(head)) {
-		struct sk_buff *clone;
-		int i, plen = 0;
-
-		clone = alloc_skb(0, GFP_ATOMIC);
-		if (!clone)
-			goto out_oom;
-		clone->next = head->next;
-		head->next = clone;
-		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
-		skb_frag_list_init(head);
-		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
-			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
-		clone->len = clone->data_len = head->data_len - plen;
-		head->data_len -= clone->len;
-		head->len -= clone->len;
-		clone->csum = 0;
-		clone->ip_summed = head->ip_summed;
-		add_frag_mem_limit(fq->q.net, clone->truesize);
-	}
-
 	/* We have to remove fragment header from datagram and to relocate
 	 * header in order to calculate ICV correctly. */
 	nhoff = fq->nhoffset;
-	skb_network_header(head)[nhoff] = skb_transport_header(head)[0];
-	memmove(head->head + sizeof(struct frag_hdr), head->head,
-		(head->data - head->head) - sizeof(struct frag_hdr));
-	if (skb_mac_header_was_set(head))
-		head->mac_header += sizeof(struct frag_hdr);
-	head->network_header += sizeof(struct frag_hdr);
+	skb_network_header(skb)[nhoff] = skb_transport_header(skb)[0];
+	memmove(skb->head + sizeof(struct frag_hdr), skb->head,
+		(skb->data - skb->head) - sizeof(struct frag_hdr));
+	if (skb_mac_header_was_set(skb))
+		skb->mac_header += sizeof(struct frag_hdr);
+	skb->network_header += sizeof(struct frag_hdr);
 
-	skb_reset_transport_header(head);
-	skb_push(head, head->data - skb_network_header(head));
+	skb_reset_transport_header(skb);
 
-	sum_truesize = head->truesize;
-	for (fp = head->next; fp;) {
-		bool headstolen;
-		int delta;
-		struct sk_buff *next = fp->next;
+	inet_frag_reasm_finish(&fq->q, skb, reasm_data, true);
 
-		sum_truesize += fp->truesize;
-		if (head->ip_summed != fp->ip_summed)
-			head->ip_summed = CHECKSUM_NONE;
-		else if (head->ip_summed == CHECKSUM_COMPLETE)
-			head->csum = csum_add(head->csum, fp->csum);
-
-		if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
-			kfree_skb_partial(fp, headstolen);
-		} else {
-			if (!skb_shinfo(head)->frag_list)
-				skb_shinfo(head)->frag_list = fp;
-			head->data_len += fp->len;
-			head->len += fp->len;
-			head->truesize += fp->truesize;
-		}
-		fp = next;
-	}
-	sub_frag_mem_limit(fq->q.net, sum_truesize);
-
-	head->next = NULL;
-	head->dev = dev;
-	head->tstamp = fq->q.stamp;
-	ipv6_hdr(head)->payload_len = htons(payload_len);
-	ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
-	IP6CB(head)->nhoff = nhoff;
-	IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
-	IP6CB(head)->frag_max_size = fq->q.max_size;
+	skb->dev = dev;
+	ipv6_hdr(skb)->payload_len = htons(payload_len);
+	ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn);
+	IP6CB(skb)->nhoff = nhoff;
+	IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
+	IP6CB(skb)->frag_max_size = fq->q.max_size;
 
 	/* Yes, and fold redundant checksum back. 8) */
-	skb_postpush_rcsum(head, skb_network_header(head),
-			   skb_network_header_len(head));
+	skb_postpush_rcsum(skb, skb_network_header(skb),
+			   skb_network_header_len(skb));
 
 	rcu_read_lock();
-	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
+	__IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMOKS);
 	rcu_read_unlock();
-	fq->q.fragments = NULL;
 	fq->q.rb_fragments = RB_ROOT;
 	fq->q.fragments_tail = NULL;
+	fq->q.last_run_head = NULL;
 	return 1;
 
 out_oversize:
@@ -422,8 +310,9 @@
 	net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n");
 out_fail:
 	rcu_read_lock();
-	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+	__IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMFAILS);
 	rcu_read_unlock();
+	inet_frag_kill(&fq->q);
 	return -1;
 }
 
@@ -462,10 +351,6 @@
 		return 1;
 	}
 
-	if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
-	    fhdr->frag_off & htons(IP6_MF))
-		goto fail_hdr;
-
 	iif = skb->dev ? skb->dev->ifindex : 0;
 	fq = fq_find(net, fhdr->identification, hdr, iif);
 	if (fq) {
@@ -483,6 +368,7 @@
 		if (prob_offset) {
 			__IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
 					IPSTATS_MIB_INHDRERRORS);
+			/* icmpv6_param_prob() calls kfree_skb(skb) */
 			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset);
 		}
 		return ret;
@@ -509,23 +395,18 @@
 static struct ctl_table ip6_frags_ns_ctl_table[] = {
 	{
 		.procname	= "ip6frag_high_thresh",
-		.data		= &init_net.ipv6.frags.high_thresh,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
-		.extra1		= &init_net.ipv6.frags.low_thresh
 	},
 	{
 		.procname	= "ip6frag_low_thresh",
-		.data		= &init_net.ipv6.frags.low_thresh,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
-		.extra2		= &init_net.ipv6.frags.high_thresh
 	},
 	{
 		.procname	= "ip6frag_time",
-		.data		= &init_net.ipv6.frags.timeout,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
@@ -557,13 +438,12 @@
 		if (!table)
 			goto err_alloc;
 
-		table[0].data = &net->ipv6.frags.high_thresh;
-		table[0].extra1 = &net->ipv6.frags.low_thresh;
-		table[0].extra2 = &init_net.ipv6.frags.high_thresh;
-		table[1].data = &net->ipv6.frags.low_thresh;
-		table[1].extra2 = &net->ipv6.frags.high_thresh;
-		table[2].data = &net->ipv6.frags.timeout;
 	}
+	table[0].data	= &net->ipv6.fqdir->high_thresh;
+	table[0].extra1	= &net->ipv6.fqdir->low_thresh;
+	table[1].data	= &net->ipv6.fqdir->low_thresh;
+	table[1].extra2	= &net->ipv6.fqdir->high_thresh;
+	table[2].data	= &net->ipv6.fqdir->timeout;
 
 	hdr = register_net_sysctl(net, "net/ipv6", table);
 	if (!hdr)
@@ -626,30 +506,35 @@
 {
 	int res;
 
-	net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
-	net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
-	net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
-	net->ipv6.frags.f = &ip6_frags;
-
-	res = inet_frags_init_net(&net->ipv6.frags);
+	res = fqdir_init(&net->ipv6.fqdir, &ip6_frags, net);
 	if (res < 0)
 		return res;
 
+	net->ipv6.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
+	net->ipv6.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
+	net->ipv6.fqdir->timeout = IPV6_FRAG_TIMEOUT;
+
 	res = ip6_frags_ns_sysctl_register(net);
 	if (res < 0)
-		inet_frags_exit_net(&net->ipv6.frags);
+		fqdir_exit(net->ipv6.fqdir);
 	return res;
 }
 
+static void __net_exit ipv6_frags_pre_exit_net(struct net *net)
+{
+	fqdir_pre_exit(net->ipv6.fqdir);
+}
+
 static void __net_exit ipv6_frags_exit_net(struct net *net)
 {
 	ip6_frags_ns_sysctl_unregister(net);
-	inet_frags_exit_net(&net->ipv6.frags);
+	fqdir_exit(net->ipv6.fqdir);
 }
 
 static struct pernet_operations ip6_frags_ops = {
-	.init = ipv6_frags_init_net,
-	.exit = ipv6_frags_exit_net,
+	.init		= ipv6_frags_init_net,
+	.pre_exit	= ipv6_frags_pre_exit_net,
+	.exit		= ipv6_frags_exit_net,
 };
 
 static const struct rhashtable_params ip6_rhash_params = {
@@ -700,8 +585,8 @@
 
 void ipv6_frag_exit(void)
 {
-	inet_frags_fini(&ip6_frags);
 	ip6_frags_sysctl_unregister();
 	unregister_pernet_subsys(&ip6_frags_ops);
 	inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+	inet_frags_fini(&ip6_frags);
 }

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a33681d..3f83ea8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Linux INET6 implementation
  *	FIB front-end.
  *
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 /*	Changes:
@@ -59,7 +55,7 @@
 #include <net/xfrm.h>
 #include <net/netevent.h>
 #include <net/netlink.h>
-#include <net/nexthop.h>
+#include <net/rtnh.h>
 #include <net/lwtunnel.h>
 #include <net/ip_tunnels.h>
 #include <net/l3mdev.h>
@@ -102,16 +98,17 @@
 					   struct sk_buff *skb, u32 mtu);
 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
 					struct sk_buff *skb);
-static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
-static size_t rt6_nlmsg_size(struct fib6_info *rt);
+static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
+			   int strict);
+static size_t rt6_nlmsg_size(struct fib6_info *f6i);
 static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 			 struct fib6_info *rt, struct dst_entry *dst,
 			 struct in6_addr *dest, struct in6_addr *src,
 			 int iif, int type, u32 portid, u32 seq,
 			 unsigned int flags);
-static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
-					   struct in6_addr *daddr,
-					   struct in6_addr *saddr);
+static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
+					   const struct in6_addr *daddr,
+					   const struct in6_addr *saddr);
 
 #ifdef CONFIG_IPV6_ROUTE_INFO
 static struct fib6_info *rt6_add_route_info(struct net *net,
@@ -179,7 +176,7 @@
 			}
 
 			if (rt_dev == dev) {
-				rt->dst.dev = loopback_dev;
+				rt->dst.dev = blackhole_netdev;
 				dev_hold(rt->dst.dev);
 				dev_put(rt_dev);
 			}
@@ -210,7 +207,9 @@
 	n = __ipv6_neigh_lookup(dev, daddr);
 	if (n)
 		return n;
-	return neigh_create(&nd_tbl, daddr, dev);
+
+	n = neigh_create(&nd_tbl, daddr, dev);
+	return IS_ERR(n) ? NULL : n;
 }
 
 static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
@@ -219,7 +218,8 @@
 {
 	const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
 
-	return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
+	return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any),
+				dst->dev, skb, daddr);
 }
 
 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
@@ -227,7 +227,7 @@
 	struct net_device *dev = dst->dev;
 	struct rt6_info *rt = (struct rt6_info *)dst;
 
-	daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
+	daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr);
 	if (!daddr)
 		return;
 	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
@@ -293,7 +293,7 @@
 	.fib6_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 	.fib6_protocol  = RTPROT_KERNEL,
 	.fib6_metric	= ~(u32)0,
-	.fib6_ref	= ATOMIC_INIT(1),
+	.fib6_ref	= REFCOUNT_INIT(1),
 	.fib6_type	= RTN_UNREACHABLE,
 	.fib6_metrics	= (struct dst_metrics *)&dst_default_metrics,
 };
@@ -364,14 +364,11 @@
 
 static void ip6_dst_destroy(struct dst_entry *dst)
 {
-	struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
 	struct rt6_info *rt = (struct rt6_info *)dst;
 	struct fib6_info *from;
 	struct inet6_dev *idev;
 
-	if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
-		kfree(p);
-
+	ip_dst_metrics_put(dst);
 	rt6_uncached_list_del(rt);
 
 	idev = rt->rt6i_idev;
@@ -380,11 +377,8 @@
 		in6_dev_put(idev);
 	}
 
-	rcu_read_lock();
-	from = rcu_dereference(rt->from);
-	rcu_assign_pointer(rt->from, NULL);
+	from = xchg((__force struct fib6_info **)&rt->from, NULL);
 	fib6_info_release(from);
-	rcu_read_unlock();
 }
 
 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -428,75 +422,180 @@
 	return false;
 }
 
-struct fib6_info *fib6_multipath_select(const struct net *net,
-					struct fib6_info *match,
-					struct flowi6 *fl6, int oif,
-					const struct sk_buff *skb,
-					int strict)
+void fib6_select_path(const struct net *net, struct fib6_result *res,
+		      struct flowi6 *fl6, int oif, bool have_oif_match,
+		      const struct sk_buff *skb, int strict)
 {
 	struct fib6_info *sibling, *next_sibling;
+	struct fib6_info *match = res->f6i;
+
+	if ((!match->fib6_nsiblings && !match->nh) || have_oif_match)
+		goto out;
 
 	/* We might have already computed the hash for ICMPv6 errors. In such
 	 * case it will always be non-zero. Otherwise now is the time to do it.
 	 */
-	if (!fl6->mp_hash)
+	if (!fl6->mp_hash &&
+	    (!match->nh || nexthop_is_multipath(match->nh)))
 		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
 
-	if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
-		return match;
+	if (unlikely(match->nh)) {
+		nexthop_path_fib6_result(res, fl6->mp_hash);
+		return;
+	}
+
+	if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
+		goto out;
 
 	list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
 				 fib6_siblings) {
+		const struct fib6_nh *nh = sibling->fib6_nh;
 		int nh_upper_bound;
 
-		nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
+		nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
 		if (fl6->mp_hash > nh_upper_bound)
 			continue;
-		if (rt6_score_route(sibling, oif, strict) < 0)
+		if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
 			break;
 		match = sibling;
 		break;
 	}
 
-	return match;
+out:
+	res->f6i = match;
+	res->nh = match->fib6_nh;
 }
 
 /*
  *	Route lookup. rcu_read_lock() should be held.
  */
 
-static inline struct fib6_info *rt6_device_match(struct net *net,
-						 struct fib6_info *rt,
-						    const struct in6_addr *saddr,
-						    int oif,
-						    int flags)
+static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
+			       const struct in6_addr *saddr, int oif, int flags)
 {
-	struct fib6_info *sprt;
+	const struct net_device *dev;
 
-	if (!oif && ipv6_addr_any(saddr) &&
-	    !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
-		return rt;
+	if (nh->fib_nh_flags & RTNH_F_DEAD)
+		return false;
 
-	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
-		const struct net_device *dev = sprt->fib6_nh.nh_dev;
+	dev = nh->fib_nh_dev;
+	if (oif) {
+		if (dev->ifindex == oif)
+			return true;
+	} else {
+		if (ipv6_chk_addr(net, saddr, dev,
+				  flags & RT6_LOOKUP_F_IFACE))
+			return true;
+	}
 
-		if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
-			continue;
+	return false;
+}
 
-		if (oif) {
-			if (dev->ifindex == oif)
-				return sprt;
+struct fib6_nh_dm_arg {
+	struct net		*net;
+	const struct in6_addr	*saddr;
+	int			oif;
+	int			flags;
+	struct fib6_nh		*nh;
+};
+
+static int __rt6_nh_dev_match(struct fib6_nh *nh, void *_arg)
+{
+	struct fib6_nh_dm_arg *arg = _arg;
+
+	arg->nh = nh;
+	return __rt6_device_match(arg->net, nh, arg->saddr, arg->oif,
+				  arg->flags);
+}
+
+/* returns fib6_nh from nexthop or NULL */
+static struct fib6_nh *rt6_nh_dev_match(struct net *net, struct nexthop *nh,
+					struct fib6_result *res,
+					const struct in6_addr *saddr,
+					int oif, int flags)
+{
+	struct fib6_nh_dm_arg arg = {
+		.net   = net,
+		.saddr = saddr,
+		.oif   = oif,
+		.flags = flags,
+	};
+
+	if (nexthop_is_blackhole(nh))
+		return NULL;
+
+	if (nexthop_for_each_fib6_nh(nh, __rt6_nh_dev_match, &arg))
+		return arg.nh;
+
+	return NULL;
+}
+
+static void rt6_device_match(struct net *net, struct fib6_result *res,
+			     const struct in6_addr *saddr, int oif, int flags)
+{
+	struct fib6_info *f6i = res->f6i;
+	struct fib6_info *spf6i;
+	struct fib6_nh *nh;
+
+	if (!oif && ipv6_addr_any(saddr)) {
+		if (unlikely(f6i->nh)) {
+			nh = nexthop_fib6_nh(f6i->nh);
+			if (nexthop_is_blackhole(f6i->nh))
+				goto out_blackhole;
 		} else {
-			if (ipv6_chk_addr(net, saddr, dev,
-					  flags & RT6_LOOKUP_F_IFACE))
-				return sprt;
+			nh = f6i->fib6_nh;
+		}
+		if (!(nh->fib_nh_flags & RTNH_F_DEAD))
+			goto out;
+	}
+
+	for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
+		bool matched = false;
+
+		if (unlikely(spf6i->nh)) {
+			nh = rt6_nh_dev_match(net, spf6i->nh, res, saddr,
+					      oif, flags);
+			if (nh)
+				matched = true;
+		} else {
+			nh = spf6i->fib6_nh;
+			if (__rt6_device_match(net, nh, saddr, oif, flags))
+				matched = true;
+		}
+		if (matched) {
+			res->f6i = spf6i;
+			goto out;
 		}
 	}
 
-	if (oif && flags & RT6_LOOKUP_F_IFACE)
-		return net->ipv6.fib6_null_entry;
+	if (oif && flags & RT6_LOOKUP_F_IFACE) {
+		res->f6i = net->ipv6.fib6_null_entry;
+		nh = res->f6i->fib6_nh;
+		goto out;
+	}
 
-	return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
+	if (unlikely(f6i->nh)) {
+		nh = nexthop_fib6_nh(f6i->nh);
+		if (nexthop_is_blackhole(f6i->nh))
+			goto out_blackhole;
+	} else {
+		nh = f6i->fib6_nh;
+	}
+
+	if (nh->fib_nh_flags & RTNH_F_DEAD) {
+		res->f6i = net->ipv6.fib6_null_entry;
+		nh = res->f6i->fib6_nh;
+	}
+out:
+	res->nh = nh;
+	res->fib6_type = res->f6i->fib6_type;
+	res->fib6_flags = res->f6i->fib6_flags;
+	return;
+
+out_blackhole:
+	res->fib6_flags |= RTF_REJECT;
+	res->fib6_type = RTN_BLACKHOLE;
+	res->nh = nh;
 }
 
 #ifdef CONFIG_IPV6_ROUTER_PREF
@@ -518,10 +617,11 @@
 	kfree(work);
 }
 
-static void rt6_probe(struct fib6_info *rt)
+static void rt6_probe(struct fib6_nh *fib6_nh)
 {
 	struct __rt6_probe_work *work = NULL;
 	const struct in6_addr *nh_gw;
+	unsigned long last_probe;
 	struct neighbour *neigh;
 	struct net_device *dev;
 	struct inet6_dev *idev;
@@ -534,12 +634,13 @@
 	 * Router Reachability Probe MUST be rate-limited
 	 * to no more than one per minute.
 	 */
-	if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
+	if (!fib6_nh->fib_nh_gw_family)
 		return;
 
-	nh_gw = &rt->fib6_nh.nh_gw;
-	dev = rt->fib6_nh.nh_dev;
+	nh_gw = &fib6_nh->fib_nh_gw6;
+	dev = fib6_nh->fib_nh_dev;
 	rcu_read_lock_bh();
+	last_probe = READ_ONCE(fib6_nh->last_probe);
 	idev = __in6_dev_get(dev);
 	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
 	if (neigh) {
@@ -555,13 +656,15 @@
 				__neigh_set_probe_once(neigh);
 		}
 		write_unlock(&neigh->lock);
-	} else if (time_after(jiffies, rt->last_probe +
+	} else if (time_after(jiffies, last_probe +
 				       idev->cnf.rtr_probe_interval)) {
 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
 	}
 
-	if (work) {
-		rt->last_probe = jiffies;
+	if (!work || cmpxchg(&fib6_nh->last_probe,
+			     last_probe, jiffies) != last_probe) {
+		kfree(work);
+	} else {
 		INIT_WORK(&work->work, rt6_probe_deferred);
 		work->target = *nh_gw;
 		dev_hold(dev);
@@ -573,7 +676,7 @@
 	rcu_read_unlock_bh();
 }
 #else
-static inline void rt6_probe(struct fib6_info *rt)
+static inline void rt6_probe(struct fib6_nh *fib6_nh)
 {
 }
 #endif
@@ -581,27 +684,14 @@
 /*
  * Default Router Selection (RFC 2461 6.3.6)
  */
-static inline int rt6_check_dev(struct fib6_info *rt, int oif)
-{
-	const struct net_device *dev = rt->fib6_nh.nh_dev;
-
-	if (!oif || dev->ifindex == oif)
-		return 2;
-	return 0;
-}
-
-static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
+static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
 {
 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
 	struct neighbour *neigh;
 
-	if (rt->fib6_flags & RTF_NONEXTHOP ||
-	    !(rt->fib6_flags & RTF_GATEWAY))
-		return RT6_NUD_SUCCEED;
-
 	rcu_read_lock_bh();
-	neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
-					  &rt->fib6_nh.nh_gw);
+	neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
+					  &fib6_nh->fib_nh_gw6);
 	if (neigh) {
 		read_lock(&neigh->lock);
 		if (neigh->nud_state & NUD_VALID)
@@ -622,58 +712,44 @@
 	return ret;
 }
 
-static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
+static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
+			   int strict)
 {
-	int m;
+	int m = 0;
 
-	m = rt6_check_dev(rt, oif);
+	if (!oif || nh->fib_nh_dev->ifindex == oif)
+		m = 2;
+
 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 		return RT6_NUD_FAIL_HARD;
 #ifdef CONFIG_IPV6_ROUTER_PREF
-	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
+	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
 #endif
-	if (strict & RT6_LOOKUP_F_REACHABLE) {
-		int n = rt6_check_neigh(rt);
+	if ((strict & RT6_LOOKUP_F_REACHABLE) &&
+	    !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
+		int n = rt6_check_neigh(nh);
 		if (n < 0)
 			return n;
 	}
 	return m;
 }
 
-/* called with rc_read_lock held */
-static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
+static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
+		       int oif, int strict, int *mpri, bool *do_rr)
 {
-	const struct net_device *dev = fib6_info_nh_dev(f6i);
-	bool rc = false;
-
-	if (dev) {
-		const struct inet6_dev *idev = __in6_dev_get(dev);
-
-		rc = !!idev->cnf.ignore_routes_with_linkdown;
-	}
-
-	return rc;
-}
-
-static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
-				   int *mpri, struct fib6_info *match,
-				   bool *do_rr)
-{
-	int m;
 	bool match_do_rr = false;
+	bool rc = false;
+	int m;
 
-	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
+	if (nh->fib_nh_flags & RTNH_F_DEAD)
 		goto out;
 
-	if (fib6_ignore_linkdown(rt) &&
-	    rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
+	if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
+	    nh->fib_nh_flags & RTNH_F_LINKDOWN &&
 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
 		goto out;
 
-	if (fib6_check_expired(rt))
-		goto out;
-
-	m = rt6_score_route(rt, oif, strict);
+	m = rt6_score_route(nh, fib6_flags, oif, strict);
 	if (m == RT6_NUD_FAIL_DO_RR) {
 		match_do_rr = true;
 		m = 0; /* lowest valid score */
@@ -682,67 +758,127 @@
 	}
 
 	if (strict & RT6_LOOKUP_F_REACHABLE)
-		rt6_probe(rt);
+		rt6_probe(nh);
 
 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
 	if (m > *mpri) {
 		*do_rr = match_do_rr;
 		*mpri = m;
-		match = rt;
+		rc = true;
 	}
 out:
-	return match;
+	return rc;
 }
 
-static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
-				     struct fib6_info *leaf,
-				     struct fib6_info *rr_head,
-				     u32 metric, int oif, int strict,
-				     bool *do_rr)
+struct fib6_nh_frl_arg {
+	u32		flags;
+	int		oif;
+	int		strict;
+	int		*mpri;
+	bool		*do_rr;
+	struct fib6_nh	*nh;
+};
+
+static int rt6_nh_find_match(struct fib6_nh *nh, void *_arg)
 {
-	struct fib6_info *rt, *match, *cont;
+	struct fib6_nh_frl_arg *arg = _arg;
+
+	arg->nh = nh;
+	return find_match(nh, arg->flags, arg->oif, arg->strict,
+			  arg->mpri, arg->do_rr);
+}
+
+static void __find_rr_leaf(struct fib6_info *f6i_start,
+			   struct fib6_info *nomatch, u32 metric,
+			   struct fib6_result *res, struct fib6_info **cont,
+			   int oif, int strict, bool *do_rr, int *mpri)
+{
+	struct fib6_info *f6i;
+
+	for (f6i = f6i_start;
+	     f6i && f6i != nomatch;
+	     f6i = rcu_dereference(f6i->fib6_next)) {
+		bool matched = false;
+		struct fib6_nh *nh;
+
+		if (cont && f6i->fib6_metric != metric) {
+			*cont = f6i;
+			return;
+		}
+
+		if (fib6_check_expired(f6i))
+			continue;
+
+		if (unlikely(f6i->nh)) {
+			struct fib6_nh_frl_arg arg = {
+				.flags  = f6i->fib6_flags,
+				.oif    = oif,
+				.strict = strict,
+				.mpri   = mpri,
+				.do_rr  = do_rr
+			};
+
+			if (nexthop_is_blackhole(f6i->nh)) {
+				res->fib6_flags = RTF_REJECT;
+				res->fib6_type = RTN_BLACKHOLE;
+				res->f6i = f6i;
+				res->nh = nexthop_fib6_nh(f6i->nh);
+				return;
+			}
+			if (nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_find_match,
+						     &arg)) {
+				matched = true;
+				nh = arg.nh;
+			}
+		} else {
+			nh = f6i->fib6_nh;
+			if (find_match(nh, f6i->fib6_flags, oif, strict,
+				       mpri, do_rr))
+				matched = true;
+		}
+		if (matched) {
+			res->f6i = f6i;
+			res->nh = nh;
+			res->fib6_flags = f6i->fib6_flags;
+			res->fib6_type = f6i->fib6_type;
+		}
+	}
+}
+
+static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf,
+			 struct fib6_info *rr_head, int oif, int strict,
+			 bool *do_rr, struct fib6_result *res)
+{
+	u32 metric = rr_head->fib6_metric;
+	struct fib6_info *cont = NULL;
 	int mpri = -1;
 
-	match = NULL;
-	cont = NULL;
-	for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
-		if (rt->fib6_metric != metric) {
-			cont = rt;
-			break;
-		}
+	__find_rr_leaf(rr_head, NULL, metric, res, &cont,
+		       oif, strict, do_rr, &mpri);
 
-		match = find_match(rt, oif, strict, &mpri, match, do_rr);
-	}
+	__find_rr_leaf(leaf, rr_head, metric, res, &cont,
+		       oif, strict, do_rr, &mpri);
 
-	for (rt = leaf; rt && rt != rr_head;
-	     rt = rcu_dereference(rt->fib6_next)) {
-		if (rt->fib6_metric != metric) {
-			cont = rt;
-			break;
-		}
+	if (res->f6i || !cont)
+		return;
 
-		match = find_match(rt, oif, strict, &mpri, match, do_rr);
-	}
-
-	if (match || !cont)
-		return match;
-
-	for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
-		match = find_match(rt, oif, strict, &mpri, match, do_rr);
-
-	return match;
+	__find_rr_leaf(cont, NULL, metric, res, NULL,
+		       oif, strict, do_rr, &mpri);
 }
 
-static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
-				   int oif, int strict)
+static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
+		       struct fib6_result *res, int strict)
 {
 	struct fib6_info *leaf = rcu_dereference(fn->leaf);
-	struct fib6_info *match, *rt0;
+	struct fib6_info *rt0;
 	bool do_rr = false;
 	int key_plen;
 
+	/* make sure this function or its helpers sets f6i */
+	res->f6i = NULL;
+
 	if (!leaf || leaf == net->ipv6.fib6_null_entry)
-		return net->ipv6.fib6_null_entry;
+		goto out;
 
 	rt0 = rcu_dereference(fn->rr_ptr);
 	if (!rt0)
@@ -759,11 +895,9 @@
 		key_plen = rt0->fib6_src.plen;
 #endif
 	if (fn->fn_bit != key_plen)
-		return net->ipv6.fib6_null_entry;
+		goto out;
 
-	match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
-			     &do_rr);
-
+	find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res);
 	if (do_rr) {
 		struct fib6_info *next = rcu_dereference(rt0->fib6_next);
 
@@ -780,12 +914,19 @@
 		}
 	}
 
-	return match ? match : net->ipv6.fib6_null_entry;
+out:
+	if (!res->f6i) {
+		res->f6i = net->ipv6.fib6_null_entry;
+		res->nh = res->f6i->fib6_nh;
+		res->fib6_flags = res->f6i->fib6_flags;
+		res->fib6_type = res->f6i->fib6_type;
+	}
 }
 
-static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
+static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res)
 {
-	return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
+	return (res->f6i->fib6_flags & RTF_NONEXTHOP) ||
+	       res->nh->fib_nh_gw_family;
 }
 
 #ifdef CONFIG_IPV6_ROUTE_INFO
@@ -869,17 +1010,17 @@
  */
 
 /* called with rcu_lock held */
-static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
+static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
 {
-	struct net_device *dev = rt->fib6_nh.nh_dev;
+	struct net_device *dev = res->nh->fib_nh_dev;
 
-	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
+	if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
 		/* for copies of local routes, dst->dev needs to be the
 		 * device if it is a master device, the master device if
 		 * device is enslaved, and the loopback as the default
 		 */
 		if (netif_is_l3_slave(dev) &&
-		    !rt6_need_strict(&rt->fib6_dst.addr))
+		    !rt6_need_strict(&res->f6i->fib6_dst.addr))
 			dev = l3mdev_master_dev_rcu(dev);
 		else if (!netif_is_l3_master(dev))
 			dev = dev_net(dev)->loopback_dev;
@@ -925,11 +1066,11 @@
 	return flags;
 }
 
-static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
+static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type)
 {
-	rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
+	rt->dst.error = ip6_rt_type_to_error(fib6_type);
 
-	switch (ort->fib6_type) {
+	switch (fib6_type) {
 	case RTN_BLACKHOLE:
 		rt->dst.output = dst_discard_out;
 		rt->dst.input = dst_discard;
@@ -947,26 +1088,28 @@
 	}
 }
 
-static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
+static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res)
 {
-	if (ort->fib6_flags & RTF_REJECT) {
-		ip6_rt_init_dst_reject(rt, ort);
+	struct fib6_info *f6i = res->f6i;
+
+	if (res->fib6_flags & RTF_REJECT) {
+		ip6_rt_init_dst_reject(rt, res->fib6_type);
 		return;
 	}
 
 	rt->dst.error = 0;
 	rt->dst.output = ip6_output;
 
-	if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
+	if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) {
 		rt->dst.input = ip6_input;
-	} else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
+	} else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
 		rt->dst.input = ip6_mc_input;
 	} else {
 		rt->dst.input = ip6_forward;
 	}
 
-	if (ort->fib6_nh.nh_lwtstate) {
-		rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
+	if (res->nh->fib_nh_lws) {
+		rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws);
 		lwtunnel_set_redirect(&rt->dst);
 	}
 
@@ -978,29 +1121,29 @@
 {
 	rt->rt6i_flags &= ~RTF_EXPIRES;
 	rcu_assign_pointer(rt->from, from);
-	dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
-	if (from->fib6_metrics != &dst_default_metrics) {
-		rt->dst._metrics |= DST_METRICS_REFCOUNTED;
-		refcount_inc(&from->fib6_metrics->refcnt);
-	}
+	ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
 }
 
-/* Caller must already hold reference to @ort */
-static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
+/* Caller must already hold reference to f6i in result */
+static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res)
 {
-	struct net_device *dev = fib6_info_nh_dev(ort);
+	const struct fib6_nh *nh = res->nh;
+	const struct net_device *dev = nh->fib_nh_dev;
+	struct fib6_info *f6i = res->f6i;
 
-	ip6_rt_init_dst(rt, ort);
+	ip6_rt_init_dst(rt, res);
 
-	rt->rt6i_dst = ort->fib6_dst;
+	rt->rt6i_dst = f6i->fib6_dst;
 	rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
-	rt->rt6i_gateway = ort->fib6_nh.nh_gw;
-	rt->rt6i_flags = ort->fib6_flags;
-	rt6_set_from(rt, ort);
+	rt->rt6i_flags = res->fib6_flags;
+	if (nh->fib_nh_gw_family) {
+		rt->rt6i_gateway = nh->fib_nh_gw6;
+		rt->rt6i_flags |= RTF_GATEWAY;
+	}
+	rt6_set_from(rt, f6i);
 #ifdef CONFIG_IPV6_SUBTREES
-	rt->rt6i_src = ort->fib6_src;
+	rt->rt6i_src = f6i->fib6_src;
 #endif
-	rt->rt6i_prefsrc = ort->fib6_prefsrc;
 }
 
 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
@@ -1021,14 +1164,13 @@
 	}
 }
 
-static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
-			  bool null_fallback)
+static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
 {
 	struct rt6_info *rt = *prt;
 
 	if (dst_hold_safe(&rt->dst))
 		return true;
-	if (null_fallback) {
+	if (net) {
 		rt = net->ipv6.ip6_null_entry;
 		dst_hold(&rt->dst);
 	} else {
@@ -1039,21 +1181,29 @@
 }
 
 /* called with rcu_lock held */
-static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
+static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res)
 {
-	unsigned short flags = fib6_info_dst_flags(rt);
-	struct net_device *dev = rt->fib6_nh.nh_dev;
+	struct net_device *dev = res->nh->fib_nh_dev;
+	struct fib6_info *f6i = res->f6i;
+	unsigned short flags;
 	struct rt6_info *nrt;
 
-	if (!fib6_info_hold_safe(rt))
-		return NULL;
+	if (!fib6_info_hold_safe(f6i))
+		goto fallback;
 
+	flags = fib6_info_dst_flags(f6i);
 	nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
-	if (nrt)
-		ip6_rt_copy_init(nrt, rt);
-	else
-		fib6_info_release(rt);
+	if (!nrt) {
+		fib6_info_release(f6i);
+		goto fallback;
+	}
 
+	ip6_rt_copy_init(nrt, res);
+	return nrt;
+
+fallback:
+	nrt = dev_net(dev)->ipv6.ip6_null_entry;
+	dst_hold(&nrt->dst);
 	return nrt;
 }
 
@@ -1063,7 +1213,7 @@
 					     const struct sk_buff *skb,
 					     int flags)
 {
-	struct fib6_info *f6i;
+	struct fib6_result res = {};
 	struct fib6_node *fn;
 	struct rt6_info *rt;
 
@@ -1073,41 +1223,41 @@
 	rcu_read_lock();
 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 restart:
-	f6i = rcu_dereference(fn->leaf);
-	if (!f6i) {
-		f6i = net->ipv6.fib6_null_entry;
-	} else {
-		f6i = rt6_device_match(net, f6i, &fl6->saddr,
-				      fl6->flowi6_oif, flags);
-		if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
-			f6i = fib6_multipath_select(net, f6i, fl6,
-						    fl6->flowi6_oif, skb,
-						    flags);
-	}
-	if (f6i == net->ipv6.fib6_null_entry) {
+	res.f6i = rcu_dereference(fn->leaf);
+	if (!res.f6i)
+		res.f6i = net->ipv6.fib6_null_entry;
+	else
+		rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif,
+				 flags);
+
+	if (res.f6i == net->ipv6.fib6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
 			goto restart;
-	}
 
-	trace_fib6_table_lookup(net, f6i, table, fl6);
-
-	/* Search through exception table */
-	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
-	if (rt) {
-		if (ip6_hold_safe(net, &rt, true))
-			dst_use_noref(&rt->dst, jiffies);
-	} else if (f6i == net->ipv6.fib6_null_entry) {
 		rt = net->ipv6.ip6_null_entry;
 		dst_hold(&rt->dst);
-	} else {
-		rt = ip6_create_rt_rcu(f6i);
-		if (!rt) {
-			rt = net->ipv6.ip6_null_entry;
-			dst_hold(&rt->dst);
-		}
+		goto out;
+	} else if (res.fib6_flags & RTF_REJECT) {
+		goto do_create;
 	}
 
+	fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
+			 fl6->flowi6_oif != 0, skb, flags);
+
+	/* Search through exception table */
+	rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
+	if (rt) {
+		if (ip6_hold_safe(net, &rt))
+			dst_use_noref(&rt->dst, jiffies);
+	} else {
+do_create:
+		rt = ip6_create_rt_rcu(&res);
+	}
+
+out:
+	trace_fib6_table_lookup(net, &res, table, fl6);
+
 	rcu_read_unlock();
 
 	return rt;
@@ -1173,10 +1323,11 @@
 	return __ip6_ins_rt(rt, &info, NULL);
 }
 
-static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
+static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res,
 					   const struct in6_addr *daddr,
 					   const struct in6_addr *saddr)
 {
+	struct fib6_info *f6i = res->f6i;
 	struct net_device *dev;
 	struct rt6_info *rt;
 
@@ -1184,25 +1335,25 @@
 	 *	Clone the route.
 	 */
 
-	if (!fib6_info_hold_safe(ort))
+	if (!fib6_info_hold_safe(f6i))
 		return NULL;
 
-	dev = ip6_rt_get_dev_rcu(ort);
+	dev = ip6_rt_get_dev_rcu(res);
 	rt = ip6_dst_alloc(dev_net(dev), dev, 0);
 	if (!rt) {
-		fib6_info_release(ort);
+		fib6_info_release(f6i);
 		return NULL;
 	}
 
-	ip6_rt_copy_init(rt, ort);
+	ip6_rt_copy_init(rt, res);
 	rt->rt6i_flags |= RTF_CACHE;
 	rt->dst.flags |= DST_HOST;
 	rt->rt6i_dst.addr = *daddr;
 	rt->rt6i_dst.plen = 128;
 
-	if (!rt6_is_gw_or_nonexthop(ort)) {
-		if (ort->fib6_dst.plen != 128 &&
-		    ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
+	if (!rt6_is_gw_or_nonexthop(res)) {
+		if (f6i->fib6_dst.plen != 128 &&
+		    ipv6_addr_equal(&f6i->fib6_dst.addr, daddr))
 			rt->rt6i_flags |= RTF_ANYCAST;
 #ifdef CONFIG_IPV6_SUBTREES
 		if (rt->rt6i_src.plen && saddr) {
@@ -1215,58 +1366,59 @@
 	return rt;
 }
 
-static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
+static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
 {
-	unsigned short flags = fib6_info_dst_flags(rt);
+	struct fib6_info *f6i = res->f6i;
+	unsigned short flags = fib6_info_dst_flags(f6i);
 	struct net_device *dev;
 	struct rt6_info *pcpu_rt;
 
-	if (!fib6_info_hold_safe(rt))
+	if (!fib6_info_hold_safe(f6i))
 		return NULL;
 
 	rcu_read_lock();
-	dev = ip6_rt_get_dev_rcu(rt);
+	dev = ip6_rt_get_dev_rcu(res);
 	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
 	rcu_read_unlock();
 	if (!pcpu_rt) {
-		fib6_info_release(rt);
+		fib6_info_release(f6i);
 		return NULL;
 	}
-	ip6_rt_copy_init(pcpu_rt, rt);
+	ip6_rt_copy_init(pcpu_rt, res);
 	pcpu_rt->rt6i_flags |= RTF_PCPU;
 	return pcpu_rt;
 }
 
 /* It should be called with rcu_read_lock() acquired */
-static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
+static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
 {
-	struct rt6_info *pcpu_rt, **p;
+	struct rt6_info *pcpu_rt;
 
-	p = this_cpu_ptr(rt->rt6i_pcpu);
-	pcpu_rt = *p;
-
-	if (pcpu_rt)
-		ip6_hold_safe(NULL, &pcpu_rt, false);
+	pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu);
 
 	return pcpu_rt;
 }
 
 static struct rt6_info *rt6_make_pcpu_route(struct net *net,
-					    struct fib6_info *rt)
+					    const struct fib6_result *res)
 {
 	struct rt6_info *pcpu_rt, *prev, **p;
 
-	pcpu_rt = ip6_rt_pcpu_alloc(rt);
-	if (!pcpu_rt) {
-		dst_hold(&net->ipv6.ip6_null_entry->dst);
-		return net->ipv6.ip6_null_entry;
-	}
+	pcpu_rt = ip6_rt_pcpu_alloc(res);
+	if (!pcpu_rt)
+		return NULL;
 
-	dst_hold(&pcpu_rt->dst);
-	p = this_cpu_ptr(rt->rt6i_pcpu);
+	p = this_cpu_ptr(res->nh->rt6i_pcpu);
 	prev = cmpxchg(p, NULL, pcpu_rt);
 	BUG_ON(prev);
 
+	if (res->f6i->fib6_destroying) {
+		struct fib6_info *from;
+
+		from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
+		fib6_info_release(from);
+	}
+
 	return pcpu_rt;
 }
 
@@ -1280,18 +1432,27 @@
 static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
 				 struct rt6_exception *rt6_ex)
 {
+	struct fib6_info *from;
 	struct net *net;
 
 	if (!bucket || !rt6_ex)
 		return;
 
 	net = dev_net(rt6_ex->rt6i->dst.dev);
+	net->ipv6.rt6_stats->fib_rt_cache--;
+
+	/* purge completely the exception to allow releasing the held resources:
+	 * some [sk] cache may keep the dst around for unlimited time
+	 */
+	from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
+	fib6_info_release(from);
+	dst_dev_put(&rt6_ex->rt6i->dst);
+
 	hlist_del_rcu(&rt6_ex->hlist);
 	dst_release(&rt6_ex->rt6i->dst);
 	kfree_rcu(rt6_ex, rcu);
 	WARN_ON_ONCE(!bucket->depth);
 	bucket->depth--;
-	net->ipv6.rt6_stats->fib_rt_cache--;
 }
 
 /* Remove oldest rt6_ex in bucket and free the memory
@@ -1395,14 +1556,15 @@
 	return NULL;
 }
 
-static unsigned int fib6_mtu(const struct fib6_info *rt)
+static unsigned int fib6_mtu(const struct fib6_result *res)
 {
+	const struct fib6_nh *nh = res->nh;
 	unsigned int mtu;
 
-	if (rt->fib6_pmtu) {
-		mtu = rt->fib6_pmtu;
+	if (res->f6i->fib6_pmtu) {
+		mtu = res->f6i->fib6_pmtu;
 	} else {
-		struct net_device *dev = fib6_info_nh_dev(rt);
+		struct net_device *dev = nh->fib_nh_dev;
 		struct inet6_dev *idev;
 
 		rcu_read_lock();
@@ -1413,27 +1575,77 @@
 
 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
 
-	return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
+	return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
+}
+
+#define FIB6_EXCEPTION_BUCKET_FLUSHED  0x1UL
+
+/* used when the flushed bit is not relevant, only access to the bucket
+ * (ie., all bucket users except rt6_insert_exception);
+ *
+ * called under rcu lock; sometimes called with rt6_exception_lock held
+ */
+static
+struct rt6_exception_bucket *fib6_nh_get_excptn_bucket(const struct fib6_nh *nh,
+						       spinlock_t *lock)
+{
+	struct rt6_exception_bucket *bucket;
+
+	if (lock)
+		bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
+						   lockdep_is_held(lock));
+	else
+		bucket = rcu_dereference(nh->rt6i_exception_bucket);
+
+	/* remove bucket flushed bit if set */
+	if (bucket) {
+		unsigned long p = (unsigned long)bucket;
+
+		p &= ~FIB6_EXCEPTION_BUCKET_FLUSHED;
+		bucket = (struct rt6_exception_bucket *)p;
+	}
+
+	return bucket;
+}
+
+static bool fib6_nh_excptn_bucket_flushed(struct rt6_exception_bucket *bucket)
+{
+	unsigned long p = (unsigned long)bucket;
+
+	return !!(p & FIB6_EXCEPTION_BUCKET_FLUSHED);
+}
+
+/* called with rt6_exception_lock held */
+static void fib6_nh_excptn_bucket_set_flushed(struct fib6_nh *nh,
+					      spinlock_t *lock)
+{
+	struct rt6_exception_bucket *bucket;
+	unsigned long p;
+
+	bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
+					   lockdep_is_held(lock));
+
+	p = (unsigned long)bucket;
+	p |= FIB6_EXCEPTION_BUCKET_FLUSHED;
+	bucket = (struct rt6_exception_bucket *)p;
+	rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
 }
 
 static int rt6_insert_exception(struct rt6_info *nrt,
-				struct fib6_info *ort)
+				const struct fib6_result *res)
 {
 	struct net *net = dev_net(nrt->dst.dev);
 	struct rt6_exception_bucket *bucket;
+	struct fib6_info *f6i = res->f6i;
 	struct in6_addr *src_key = NULL;
 	struct rt6_exception *rt6_ex;
+	struct fib6_nh *nh = res->nh;
 	int err = 0;
 
 	spin_lock_bh(&rt6_exception_lock);
 
-	if (ort->exception_bucket_flushed) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
-					lockdep_is_held(&rt6_exception_lock));
+	bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
+					  lockdep_is_held(&rt6_exception_lock));
 	if (!bucket) {
 		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
 				 GFP_ATOMIC);
@@ -1441,29 +1653,27 @@
 			err = -ENOMEM;
 			goto out;
 		}
-		rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
+		rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
+	} else if (fib6_nh_excptn_bucket_flushed(bucket)) {
+		err = -EINVAL;
+		goto out;
 	}
 
 #ifdef CONFIG_IPV6_SUBTREES
-	/* rt6i_src.plen != 0 indicates ort is in subtree
+	/* fib6_src.plen != 0 indicates f6i is in subtree
 	 * and exception table is indexed by a hash of
-	 * both rt6i_dst and rt6i_src.
+	 * both fib6_dst and fib6_src.
 	 * Otherwise, the exception table is indexed by
-	 * a hash of only rt6i_dst.
+	 * a hash of only fib6_dst.
 	 */
-	if (ort->fib6_src.plen)
+	if (f6i->fib6_src.plen)
 		src_key = &nrt->rt6i_src.addr;
 #endif
-
-	/* Update rt6i_prefsrc as it could be changed
-	 * in rt6_remove_prefsrc()
-	 */
-	nrt->rt6i_prefsrc = ort->fib6_prefsrc;
-	/* rt6_mtu_change() might lower mtu on ort.
+	/* rt6_mtu_change() might lower mtu on f6i.
 	 * Only insert this exception route if its mtu
-	 * is less than ort's mtu value.
+	 * is less than f6i's mtu value.
 	 */
-	if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
+	if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) {
 		err = -EINVAL;
 		goto out;
 	}
@@ -1492,16 +1702,16 @@
 
 	/* Update fn->fn_sernum to invalidate all cached dst */
 	if (!err) {
-		spin_lock_bh(&ort->fib6_table->tb6_lock);
-		fib6_update_sernum(net, ort);
-		spin_unlock_bh(&ort->fib6_table->tb6_lock);
+		spin_lock_bh(&f6i->fib6_table->tb6_lock);
+		fib6_update_sernum(net, f6i);
+		spin_unlock_bh(&f6i->fib6_table->tb6_lock);
 		fib6_force_start_gc(net);
 	}
 
 	return err;
 }
 
-void rt6_flush_exceptions(struct fib6_info *rt)
+static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
@@ -1509,77 +1719,106 @@
 	int i;
 
 	spin_lock_bh(&rt6_exception_lock);
-	/* Prevent rt6_insert_exception() to recreate the bucket list */
-	rt->exception_bucket_flushed = 1;
 
-	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
-				    lockdep_is_held(&rt6_exception_lock));
+	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
 	if (!bucket)
 		goto out;
 
+	/* Prevent rt6_insert_exception() to recreate the bucket list */
+	if (!from)
+		fib6_nh_excptn_bucket_set_flushed(nh, &rt6_exception_lock);
+
 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
-		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
-			rt6_remove_exception(bucket, rt6_ex);
-		WARN_ON_ONCE(bucket->depth);
+		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) {
+			if (!from ||
+			    rcu_access_pointer(rt6_ex->rt6i->from) == from)
+				rt6_remove_exception(bucket, rt6_ex);
+		}
+		WARN_ON_ONCE(!from && bucket->depth);
 		bucket++;
 	}
-
 out:
 	spin_unlock_bh(&rt6_exception_lock);
 }
 
+static int rt6_nh_flush_exceptions(struct fib6_nh *nh, void *arg)
+{
+	struct fib6_info *f6i = arg;
+
+	fib6_nh_flush_exceptions(nh, f6i);
+
+	return 0;
+}
+
+void rt6_flush_exceptions(struct fib6_info *f6i)
+{
+	if (f6i->nh)
+		nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions,
+					 f6i);
+	else
+		fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
+}
+
 /* Find cached rt in the hash table inside passed in rt
  * Caller has to hold rcu_read_lock()
  */
-static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
-					   struct in6_addr *daddr,
-					   struct in6_addr *saddr)
+static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
+					   const struct in6_addr *daddr,
+					   const struct in6_addr *saddr)
 {
+	const struct in6_addr *src_key = NULL;
 	struct rt6_exception_bucket *bucket;
-	struct in6_addr *src_key = NULL;
 	struct rt6_exception *rt6_ex;
-	struct rt6_info *res = NULL;
-
-	bucket = rcu_dereference(rt->rt6i_exception_bucket);
+	struct rt6_info *ret = NULL;
 
 #ifdef CONFIG_IPV6_SUBTREES
-	/* rt6i_src.plen != 0 indicates rt is in subtree
+	/* fib6i_src.plen != 0 indicates f6i is in subtree
 	 * and exception table is indexed by a hash of
-	 * both rt6i_dst and rt6i_src.
-	 * Otherwise, the exception table is indexed by
-	 * a hash of only rt6i_dst.
+	 * both fib6_dst and fib6_src.
+	 * However, the src addr used to create the hash
+	 * might not be exactly the passed in saddr which
+	 * is a /128 addr from the flow.
+	 * So we need to use f6i->fib6_src to redo lookup
+	 * if the passed in saddr does not find anything.
+	 * (See the logic in ip6_rt_cache_alloc() on how
+	 * rt->rt6i_src is updated.)
 	 */
-	if (rt->fib6_src.plen)
+	if (res->f6i->fib6_src.plen)
 		src_key = saddr;
+find_ex:
 #endif
+	bucket = fib6_nh_get_excptn_bucket(res->nh, NULL);
 	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
 
 	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
-		res = rt6_ex->rt6i;
+		ret = rt6_ex->rt6i;
 
-	return res;
+#ifdef CONFIG_IPV6_SUBTREES
+	/* Use fib6_src as src_key and redo lookup */
+	if (!ret && src_key && src_key != &res->f6i->fib6_src.addr) {
+		src_key = &res->f6i->fib6_src.addr;
+		goto find_ex;
+	}
+#endif
+
+	return ret;
 }
 
 /* Remove the passed in cached rt from the hash table that contains it */
-static int rt6_remove_exception_rt(struct rt6_info *rt)
+static int fib6_nh_remove_exception(const struct fib6_nh *nh, int plen,
+				    const struct rt6_info *rt)
 {
+	const struct in6_addr *src_key = NULL;
 	struct rt6_exception_bucket *bucket;
-	struct in6_addr *src_key = NULL;
 	struct rt6_exception *rt6_ex;
-	struct fib6_info *from;
 	int err;
 
-	from = rcu_dereference(rt->from);
-	if (!from ||
-	    !(rt->rt6i_flags & RTF_CACHE))
-		return -EINVAL;
-
-	if (!rcu_access_pointer(from->rt6i_exception_bucket))
+	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
 		return -ENOENT;
 
 	spin_lock_bh(&rt6_exception_lock);
-	bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
-				    lockdep_is_held(&rt6_exception_lock));
+	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
+
 #ifdef CONFIG_IPV6_SUBTREES
 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
 	 * and exception table is indexed by a hash of
@@ -1587,7 +1826,7 @@
 	 * Otherwise, the exception table is indexed by
 	 * a hash of only rt6i_dst.
 	 */
-	if (from->fib6_src.plen)
+	if (plen)
 		src_key = &rt->rt6i_src.addr;
 #endif
 	rt6_ex = __rt6_find_exception_spinlock(&bucket,
@@ -1604,23 +1843,60 @@
 	return err;
 }
 
+struct fib6_nh_excptn_arg {
+	struct rt6_info	*rt;
+	int		plen;
+};
+
+static int rt6_nh_remove_exception_rt(struct fib6_nh *nh, void *_arg)
+{
+	struct fib6_nh_excptn_arg *arg = _arg;
+	int err;
+
+	err = fib6_nh_remove_exception(nh, arg->plen, arg->rt);
+	if (err == 0)
+		return 1;
+
+	return 0;
+}
+
+static int rt6_remove_exception_rt(struct rt6_info *rt)
+{
+	struct fib6_info *from;
+
+	from = rcu_dereference(rt->from);
+	if (!from || !(rt->rt6i_flags & RTF_CACHE))
+		return -EINVAL;
+
+	if (from->nh) {
+		struct fib6_nh_excptn_arg arg = {
+			.rt = rt,
+			.plen = from->fib6_src.plen
+		};
+		int rc;
+
+		/* rc = 1 means an entry was found */
+		rc = nexthop_for_each_fib6_nh(from->nh,
+					      rt6_nh_remove_exception_rt,
+					      &arg);
+		return rc ? 0 : -ENOENT;
+	}
+
+	return fib6_nh_remove_exception(from->fib6_nh,
+					from->fib6_src.plen, rt);
+}
+
 /* Find rt6_ex which contains the passed in rt cache and
  * refresh its stamp
  */
-static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+static void fib6_nh_update_exception(const struct fib6_nh *nh, int plen,
+				     const struct rt6_info *rt)
 {
+	const struct in6_addr *src_key = NULL;
 	struct rt6_exception_bucket *bucket;
-	struct fib6_info *from = rt->from;
-	struct in6_addr *src_key = NULL;
 	struct rt6_exception *rt6_ex;
 
-	if (!from ||
-	    !(rt->rt6i_flags & RTF_CACHE))
-		return;
-
-	rcu_read_lock();
-	bucket = rcu_dereference(from->rt6i_exception_bucket);
-
+	bucket = fib6_nh_get_excptn_bucket(nh, NULL);
 #ifdef CONFIG_IPV6_SUBTREES
 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
 	 * and exception table is indexed by a hash of
@@ -1628,35 +1904,65 @@
 	 * Otherwise, the exception table is indexed by
 	 * a hash of only rt6i_dst.
 	 */
-	if (from->fib6_src.plen)
+	if (plen)
 		src_key = &rt->rt6i_src.addr;
 #endif
-	rt6_ex = __rt6_find_exception_rcu(&bucket,
-					  &rt->rt6i_dst.addr,
-					  src_key);
+	rt6_ex = __rt6_find_exception_rcu(&bucket, &rt->rt6i_dst.addr, src_key);
 	if (rt6_ex)
 		rt6_ex->stamp = jiffies;
-
-	rcu_read_unlock();
 }
 
-static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt)
+struct fib6_nh_match_arg {
+	const struct net_device *dev;
+	const struct in6_addr	*gw;
+	struct fib6_nh		*match;
+};
+
+/* determine if fib6_nh has given device and gateway */
+static int fib6_nh_find_match(struct fib6_nh *nh, void *_arg)
 {
-	struct rt6_exception_bucket *bucket;
-	struct rt6_exception *rt6_ex;
-	int i;
+	struct fib6_nh_match_arg *arg = _arg;
 
-	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
-					lockdep_is_held(&rt6_exception_lock));
+	if (arg->dev != nh->fib_nh_dev ||
+	    (arg->gw && !nh->fib_nh_gw_family) ||
+	    (!arg->gw && nh->fib_nh_gw_family) ||
+	    (arg->gw && !ipv6_addr_equal(arg->gw, &nh->fib_nh_gw6)))
+		return 0;
 
-	if (bucket) {
-		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
-			hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
-				rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
-			}
-			bucket++;
-		}
+	arg->match = nh;
+
+	/* found a match, break the loop */
+	return 1;
+}
+
+static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+{
+	struct fib6_info *from;
+	struct fib6_nh *fib6_nh;
+
+	rcu_read_lock();
+
+	from = rcu_dereference(rt->from);
+	if (!from || !(rt->rt6i_flags & RTF_CACHE))
+		goto unlock;
+
+	if (from->nh) {
+		struct fib6_nh_match_arg arg = {
+			.dev = rt->dst.dev,
+			.gw = &rt->rt6i_gateway,
+		};
+
+		nexthop_for_each_fib6_nh(from->nh, fib6_nh_find_match, &arg);
+
+		if (!arg.match)
+			goto unlock;
+		fib6_nh = arg.match;
+	} else {
+		fib6_nh = from->fib6_nh;
 	}
+	fib6_nh_update_exception(fib6_nh, from->fib6_src.plen, rt);
+unlock:
+	rcu_read_unlock();
 }
 
 static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
@@ -1682,15 +1988,13 @@
 }
 
 static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
-				       struct fib6_info *rt, int mtu)
+				       const struct fib6_nh *nh, int mtu)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
 	int i;
 
-	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
-					lockdep_is_held(&rt6_exception_lock));
-
+	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
 	if (!bucket)
 		return;
 
@@ -1712,21 +2016,19 @@
 
 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
 
-static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
-					struct in6_addr *gateway)
+static void fib6_nh_exceptions_clean_tohost(const struct fib6_nh *nh,
+					    const struct in6_addr *gateway)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
 	struct hlist_node *tmp;
 	int i;
 
-	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
 		return;
 
 	spin_lock_bh(&rt6_exception_lock);
-	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
-				     lockdep_is_held(&rt6_exception_lock));
-
+	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
 	if (bucket) {
 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
 			hlist_for_each_entry_safe(rt6_ex, tmp,
@@ -1791,23 +2093,21 @@
 	gc_args->more++;
 }
 
-void rt6_age_exceptions(struct fib6_info *rt,
-			struct fib6_gc_args *gc_args,
-			unsigned long now)
+static void fib6_nh_age_exceptions(const struct fib6_nh *nh,
+				   struct fib6_gc_args *gc_args,
+				   unsigned long now)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
 	struct hlist_node *tmp;
 	int i;
 
-	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
 		return;
 
 	rcu_read_lock_bh();
 	spin_lock(&rt6_exception_lock);
-	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
-				    lockdep_is_held(&rt6_exception_lock));
-
+	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
 	if (bucket) {
 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
 			hlist_for_each_entry_safe(rt6_ex, tmp,
@@ -1822,12 +2122,41 @@
 	rcu_read_unlock_bh();
 }
 
+struct fib6_nh_age_excptn_arg {
+	struct fib6_gc_args	*gc_args;
+	unsigned long		now;
+};
+
+static int rt6_nh_age_exceptions(struct fib6_nh *nh, void *_arg)
+{
+	struct fib6_nh_age_excptn_arg *arg = _arg;
+
+	fib6_nh_age_exceptions(nh, arg->gc_args, arg->now);
+	return 0;
+}
+
+void rt6_age_exceptions(struct fib6_info *f6i,
+			struct fib6_gc_args *gc_args,
+			unsigned long now)
+{
+	if (f6i->nh) {
+		struct fib6_nh_age_excptn_arg arg = {
+			.gc_args = gc_args,
+			.now = now
+		};
+
+		nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_age_exceptions,
+					 &arg);
+	} else {
+		fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now);
+	}
+}
+
 /* must be called with rcu lock held */
-struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
-				    int oif, struct flowi6 *fl6, int strict)
+int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
+		      struct flowi6 *fl6, struct fib6_result *res, int strict)
 {
 	struct fib6_node *fn, *saved_fn;
-	struct fib6_info *f6i;
 
 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 	saved_fn = fn;
@@ -1836,8 +2165,8 @@
 		oif = 0;
 
 redo_rt6_select:
-	f6i = rt6_select(net, fn, oif, strict);
-	if (f6i == net->ipv6.fib6_null_entry) {
+	rt6_select(net, fn, oif, res, strict);
+	if (res->f6i == net->ipv6.fib6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
 			goto redo_rt6_select;
@@ -1849,19 +2178,22 @@
 		}
 	}
 
-	trace_fib6_table_lookup(net, f6i, table, fl6);
+	trace_fib6_table_lookup(net, res, table, fl6);
 
-	return f6i;
+	return 0;
 }
 
 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 			       int oif, struct flowi6 *fl6,
 			       const struct sk_buff *skb, int flags)
 {
-	struct fib6_info *f6i;
-	struct rt6_info *rt;
+	struct fib6_result res = {};
+	struct rt6_info *rt = NULL;
 	int strict = 0;
 
+	WARN_ON_ONCE((flags & RT6_LOOKUP_F_DST_NOREF) &&
+		     !rcu_read_lock_held());
+
 	strict |= flags & RT6_LOOKUP_F_IFACE;
 	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
 	if (net->ipv6.devconf_all->forwarding == 0)
@@ -1869,66 +2201,55 @@
 
 	rcu_read_lock();
 
-	f6i = fib6_table_lookup(net, table, oif, fl6, strict);
-	if (f6i->fib6_nsiblings)
-		f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
+	fib6_table_lookup(net, table, oif, fl6, &res, strict);
+	if (res.f6i == net->ipv6.fib6_null_entry)
+		goto out;
 
-	if (f6i == net->ipv6.fib6_null_entry) {
-		rt = net->ipv6.ip6_null_entry;
-		rcu_read_unlock();
-		dst_hold(&rt->dst);
-		return rt;
-	}
+	fib6_select_path(net, &res, fl6, oif, false, skb, strict);
 
 	/*Search through exception table */
-	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
+	rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
 	if (rt) {
-		if (ip6_hold_safe(net, &rt, true))
-			dst_use_noref(&rt->dst, jiffies);
-
-		rcu_read_unlock();
-		return rt;
+		goto out;
 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
-			    !(f6i->fib6_flags & RTF_GATEWAY))) {
+			    !res.nh->fib_nh_gw_family)) {
 		/* Create a RTF_CACHE clone which will not be
 		 * owned by the fib6 tree.  It is for the special case where
 		 * the daddr in the skb during the neighbor look-up is different
 		 * from the fl6->daddr used to look-up route here.
 		 */
-		struct rt6_info *uncached_rt;
+		rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
 
-		uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
-
-		rcu_read_unlock();
-
-		if (uncached_rt) {
-			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
-			 * No need for another dst_hold()
+		if (rt) {
+			/* 1 refcnt is taken during ip6_rt_cache_alloc().
+			 * As rt6_uncached_list_add() does not consume refcnt,
+			 * this refcnt is always returned to the caller even
+			 * if caller sets RT6_LOOKUP_F_DST_NOREF flag.
 			 */
-			rt6_uncached_list_add(uncached_rt);
+			rt6_uncached_list_add(rt);
 			atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
-		} else {
-			uncached_rt = net->ipv6.ip6_null_entry;
-			dst_hold(&uncached_rt->dst);
-		}
+			rcu_read_unlock();
 
-		return uncached_rt;
+			return rt;
+		}
 	} else {
 		/* Get a percpu copy */
-
-		struct rt6_info *pcpu_rt;
-
 		local_bh_disable();
-		pcpu_rt = rt6_get_pcpu_route(f6i);
+		rt = rt6_get_pcpu_route(&res);
 
-		if (!pcpu_rt)
-			pcpu_rt = rt6_make_pcpu_route(net, f6i);
+		if (!rt)
+			rt = rt6_make_pcpu_route(net, &res);
 
 		local_bh_enable();
-		rcu_read_unlock();
-
-		return pcpu_rt;
 	}
+out:
+	if (!rt)
+		rt = net->ipv6.ip6_null_entry;
+	if (!(flags & RT6_LOOKUP_F_DST_NOREF))
+		ip6_hold_safe(net, &rt);
+	rcu_read_unlock();
+
+	return rt;
 }
 EXPORT_SYMBOL_GPL(ip6_pol_route);
 
@@ -2053,17 +2374,54 @@
 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
 		}
 		break;
+	case 2:
+		memset(&hash_keys, 0, sizeof(hash_keys));
+		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+		if (skb) {
+			struct flow_keys keys;
+
+			if (!flkeys) {
+				skb_flow_dissect_flow_keys(skb, &keys, 0);
+				flkeys = &keys;
+			}
+
+			/* Inner can be v4 or v6 */
+			if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+				hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
+				hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
+			} else if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+				hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
+				hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
+				hash_keys.tags.flow_label = flkeys->tags.flow_label;
+				hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
+			} else {
+				/* Same as case 0 */
+				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+				ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
+			}
+		} else {
+			/* Same as case 0 */
+			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+			hash_keys.addrs.v6addrs.src = fl6->saddr;
+			hash_keys.addrs.v6addrs.dst = fl6->daddr;
+			hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
+			hash_keys.basic.ip_proto = fl6->flowi6_proto;
+		}
+		break;
 	}
 	mhash = flow_hash_from_keys(&hash_keys);
 
 	return mhash >> 1;
 }
 
+/* Called with rcu held */
 void ip6_route_input(struct sk_buff *skb)
 {
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
 	struct net *net = dev_net(skb->dev);
-	int flags = RT6_LOOKUP_F_HAS_SADDR;
+	int flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_DST_NOREF;
 	struct ip_tunnel_info *tun_info;
 	struct flowi6 fl6 = {
 		.flowi6_iif = skb->dev->ifindex,
@@ -2085,8 +2443,8 @@
 	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
 		fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
 	skb_dst_drop(skb);
-	skb_dst_set(skb,
-		    ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
+	skb_dst_set_noref(skb, ip6_route_input_lookup(net, skb->dev,
+						      &fl6, skb, flags));
 }
 
 static struct rt6_info *ip6_pol_route_output(struct net *net,
@@ -2098,14 +2456,17 @@
 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
 }
 
-struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
-					 struct flowi6 *fl6, int flags)
+struct dst_entry *ip6_route_output_flags_noref(struct net *net,
+					       const struct sock *sk,
+					       struct flowi6 *fl6, int flags)
 {
 	bool any_src;
 
-	if (rt6_need_strict(&fl6->daddr)) {
+	if (ipv6_addr_type(&fl6->daddr) &
+	    (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
 		struct dst_entry *dst;
 
+		/* This function does not take refcnt on the dst */
 		dst = l3mdev_link_scope_lookup(net, fl6);
 		if (dst)
 			return dst;
@@ -2113,6 +2474,7 @@
 
 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
 
+	flags |= RT6_LOOKUP_F_DST_NOREF;
 	any_src = ipv6_addr_any(&fl6->saddr);
 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
 	    (fl6->flowi6_oif && any_src))
@@ -2125,6 +2487,28 @@
 
 	return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
 }
+EXPORT_SYMBOL_GPL(ip6_route_output_flags_noref);
+
+struct dst_entry *ip6_route_output_flags(struct net *net,
+					 const struct sock *sk,
+					 struct flowi6 *fl6,
+					 int flags)
+{
+        struct dst_entry *dst;
+        struct rt6_info *rt6;
+
+        rcu_read_lock();
+        dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
+        rt6 = (struct rt6_info *)dst;
+        /* For dst cached in uncached_list, refcnt is already taken. */
+        if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) {
+                dst = &net->ipv6.ip6_null_entry->dst;
+                dst_hold(dst);
+        }
+        rcu_read_unlock();
+
+        return dst;
+}
 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
 
 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
@@ -2183,7 +2567,7 @@
 {
 	u32 rt_cookie = 0;
 
-	if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
+	if (!from || !fib6_get_cookie_safe(from, &rt_cookie) ||
 	    rt_cookie != cookie)
 		return NULL;
 
@@ -2306,14 +2690,8 @@
 
 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
 {
-	bool from_set;
-
-	rcu_read_lock();
-	from_set = !!rcu_dereference(rt->from);
-	rcu_read_unlock();
-
 	return !(rt->rt6i_flags & RTF_CACHE) &&
-		(rt->rt6i_flags & RTF_PCPU || from_set);
+		(rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
 }
 
 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
@@ -2346,17 +2724,44 @@
 		if (rt6->rt6i_flags & RTF_CACHE)
 			rt6_update_exception_stamp_rt(rt6);
 	} else if (daddr) {
-		struct fib6_info *from;
+		struct fib6_result res = {};
 		struct rt6_info *nrt6;
 
 		rcu_read_lock();
-		from = rcu_dereference(rt6->from);
-		nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
+		res.f6i = rcu_dereference(rt6->from);
+		if (!res.f6i)
+			goto out_unlock;
+
+		res.fib6_flags = res.f6i->fib6_flags;
+		res.fib6_type = res.f6i->fib6_type;
+
+		if (res.f6i->nh) {
+			struct fib6_nh_match_arg arg = {
+				.dev = dst->dev,
+				.gw = &rt6->rt6i_gateway,
+			};
+
+			nexthop_for_each_fib6_nh(res.f6i->nh,
+						 fib6_nh_find_match, &arg);
+
+			/* fib6_info uses a nexthop that does not have fib6_nh
+			 * using the dst->dev + gw. Should be impossible.
+			 */
+			if (!arg.match)
+				goto out_unlock;
+
+			res.nh = arg.match;
+		} else {
+			res.nh = res.f6i->fib6_nh;
+		}
+
+		nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
 		if (nrt6) {
 			rt6_do_update_pmtu(nrt6, mtu);
-			if (rt6_insert_exception(nrt6, from))
+			if (rt6_insert_exception(nrt6, &res))
 				dst_release_immediate(&nrt6->dst);
 		}
+out_unlock:
 		rcu_read_unlock();
 	}
 }
@@ -2372,15 +2777,14 @@
 {
 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
 	struct dst_entry *dst;
-	struct flowi6 fl6;
-
-	memset(&fl6, 0, sizeof(fl6));
-	fl6.flowi6_oif = oif;
-	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
-	fl6.daddr = iph->daddr;
-	fl6.saddr = iph->saddr;
-	fl6.flowlabel = ip6_flowinfo(iph);
-	fl6.flowi6_uid = uid;
+	struct flowi6 fl6 = {
+		.flowi6_oif = oif,
+		.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
+		.daddr = iph->daddr,
+		.saddr = iph->saddr,
+		.flowlabel = ip6_flowinfo(iph),
+		.flowi6_uid = uid,
+	};
 
 	dst = ip6_route_output(net, NULL, &fl6);
 	if (!dst->error)
@@ -2428,6 +2832,51 @@
 		      NULL);
 }
 
+static bool ip6_redirect_nh_match(const struct fib6_result *res,
+				  struct flowi6 *fl6,
+				  const struct in6_addr *gw,
+				  struct rt6_info **ret)
+{
+	const struct fib6_nh *nh = res->nh;
+
+	if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
+	    fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
+		return false;
+
+	/* rt_cache's gateway might be different from its 'parent'
+	 * in the case of an ip redirect.
+	 * So we keep searching in the exception table if the gateway
+	 * is different.
+	 */
+	if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
+		struct rt6_info *rt_cache;
+
+		rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr);
+		if (rt_cache &&
+		    ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
+			*ret = rt_cache;
+			return true;
+		}
+		return false;
+	}
+	return true;
+}
+
+struct fib6_nh_rd_arg {
+	struct fib6_result	*res;
+	struct flowi6		*fl6;
+	const struct in6_addr	*gw;
+	struct rt6_info		**ret;
+};
+
+static int fib6_nh_redirect_match(struct fib6_nh *nh, void *_arg)
+{
+	struct fib6_nh_rd_arg *arg = _arg;
+
+	arg->res->nh = nh;
+	return ip6_redirect_nh_match(arg->res, arg->fl6, arg->gw, arg->ret);
+}
+
 /* Handle redirects */
 struct ip6rd_flowi {
 	struct flowi6 fl6;
@@ -2441,10 +2890,23 @@
 					     int flags)
 {
 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
-	struct rt6_info *ret = NULL, *rt_cache;
+	struct rt6_info *ret = NULL;
+	struct fib6_result res = {};
+	struct fib6_nh_rd_arg arg = {
+		.res = &res,
+		.fl6 = fl6,
+		.gw  = &rdfl->gateway,
+		.ret = &ret
+	};
 	struct fib6_info *rt;
 	struct fib6_node *fn;
 
+	/* l3mdev_update_flow overrides oif if the device is enslaved; in
+	 * this case we must match on the real ingress device, so reset it
+	 */
+	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
+		fl6->flowi6_oif = skb->dev->ifindex;
+
 	/* Get the "current" route for this destination and
 	 * check if the redirect has come from appropriate router.
 	 *
@@ -2459,34 +2921,25 @@
 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 restart:
 	for_each_fib6_node_rt_rcu(fn) {
-		if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
-			continue;
+		res.f6i = rt;
 		if (fib6_check_expired(rt))
 			continue;
 		if (rt->fib6_flags & RTF_REJECT)
 			break;
-		if (!(rt->fib6_flags & RTF_GATEWAY))
-			continue;
-		if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
-			continue;
-		/* rt_cache's gateway might be different from its 'parent'
-		 * in the case of an ip redirect.
-		 * So we keep searching in the exception table if the gateway
-		 * is different.
-		 */
-		if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
-			rt_cache = rt6_find_cached_rt(rt,
-						      &fl6->daddr,
-						      &fl6->saddr);
-			if (rt_cache &&
-			    ipv6_addr_equal(&rdfl->gateway,
-					    &rt_cache->rt6i_gateway)) {
-				ret = rt_cache;
-				break;
-			}
-			continue;
+		if (unlikely(rt->nh)) {
+			if (nexthop_is_blackhole(rt->nh))
+				continue;
+			/* on match, res->nh is filled in and potentially ret */
+			if (nexthop_for_each_fib6_nh(rt->nh,
+						     fib6_nh_redirect_match,
+						     &arg))
+				goto out;
+		} else {
+			res.nh = rt->fib6_nh;
+			if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway,
+						  &ret))
+				goto out;
 		}
-		break;
 	}
 
 	if (!rt)
@@ -2502,15 +2955,20 @@
 			goto restart;
 	}
 
+	res.f6i = rt;
+	res.nh = rt->fib6_nh;
 out:
-	if (ret)
-		ip6_hold_safe(net, &ret, true);
-	else
-		ret = ip6_create_rt_rcu(rt);
+	if (ret) {
+		ip6_hold_safe(net, &ret);
+	} else {
+		res.fib6_flags = res.f6i->fib6_flags;
+		res.fib6_type = res.f6i->fib6_type;
+		ret = ip6_create_rt_rcu(&res);
+	}
 
 	rcu_read_unlock();
 
-	trace_fib6_table_lookup(net, rt, table, fl6);
+	trace_fib6_table_lookup(net, &res, table, fl6);
 	return ret;
 };
 
@@ -2534,16 +2992,15 @@
 {
 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
 	struct dst_entry *dst;
-	struct flowi6 fl6;
-
-	memset(&fl6, 0, sizeof(fl6));
-	fl6.flowi6_iif = LOOPBACK_IFINDEX;
-	fl6.flowi6_oif = oif;
-	fl6.flowi6_mark = mark;
-	fl6.daddr = iph->daddr;
-	fl6.saddr = iph->saddr;
-	fl6.flowlabel = ip6_flowinfo(iph);
-	fl6.flowi6_uid = uid;
+	struct flowi6 fl6 = {
+		.flowi6_iif = LOOPBACK_IFINDEX,
+		.flowi6_oif = oif,
+		.flowi6_mark = mark,
+		.daddr = iph->daddr,
+		.saddr = iph->saddr,
+		.flowlabel = ip6_flowinfo(iph),
+		.flowi6_uid = uid,
+	};
 
 	dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
 	rt6_do_redirect(dst, NULL, skb);
@@ -2551,21 +3008,18 @@
 }
 EXPORT_SYMBOL_GPL(ip6_redirect);
 
-void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
-			    u32 mark)
+void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
 {
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
 	struct dst_entry *dst;
-	struct flowi6 fl6;
-
-	memset(&fl6, 0, sizeof(fl6));
-	fl6.flowi6_iif = LOOPBACK_IFINDEX;
-	fl6.flowi6_oif = oif;
-	fl6.flowi6_mark = mark;
-	fl6.daddr = msg->dest;
-	fl6.saddr = iph->daddr;
-	fl6.flowi6_uid = sock_net_uid(net, NULL);
+	struct flowi6 fl6 = {
+		.flowi6_iif = LOOPBACK_IFINDEX,
+		.flowi6_oif = oif,
+		.daddr = msg->dest,
+		.saddr = iph->daddr,
+		.flowi6_uid = sock_net_uid(net, NULL),
+	};
 
 	dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
 	rt6_do_redirect(dst, NULL, skb);
@@ -2632,13 +3086,14 @@
  * based on ip6_dst_mtu_forward and exception logic of
  * rt6_find_cached_rt; called with rcu_read_lock
  */
-u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
-		      struct in6_addr *saddr)
+u32 ip6_mtu_from_fib6(const struct fib6_result *res,
+		      const struct in6_addr *daddr,
+		      const struct in6_addr *saddr)
 {
-	struct rt6_exception_bucket *bucket;
-	struct rt6_exception *rt6_ex;
-	struct in6_addr *src_key;
+	const struct fib6_nh *nh = res->nh;
+	struct fib6_info *f6i = res->f6i;
 	struct inet6_dev *idev;
+	struct rt6_info *rt;
 	u32 mtu = 0;
 
 	if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
@@ -2647,19 +3102,11 @@
 			goto out;
 	}
 
-	src_key = NULL;
-#ifdef CONFIG_IPV6_SUBTREES
-	if (f6i->fib6_src.plen)
-		src_key = saddr;
-#endif
-
-	bucket = rcu_dereference(f6i->rt6i_exception_bucket);
-	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
-	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
-		mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
-
-	if (likely(!mtu)) {
-		struct net_device *dev = fib6_info_nh_dev(f6i);
+	rt = rt6_find_cached_rt(res, daddr, saddr);
+	if (unlikely(rt)) {
+		mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
+	} else {
+		struct net_device *dev = nh->fib_nh_dev;
 
 		mtu = IPV6_MIN_MTU;
 		idev = __in6_dev_get(dev);
@@ -2669,7 +3116,7 @@
 
 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
 out:
-	return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
+	return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
 }
 
 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
@@ -2736,28 +3183,9 @@
 	return entries > rt_max_size;
 }
 
-static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
-			       struct fib6_config *cfg)
-{
-	struct dst_metrics *p;
-
-	if (!cfg->fc_mx)
-		return 0;
-
-	p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL);
-	if (unlikely(!p))
-		return -ENOMEM;
-
-	refcount_set(&p->refcnt, 1);
-	rt->fib6_metrics = p;
-
-	return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics);
-}
-
-static struct rt6_info *ip6_nh_lookup_table(struct net *net,
-					    struct fib6_config *cfg,
-					    const struct in6_addr *gw_addr,
-					    u32 tbid, int flags)
+static int ip6_nh_lookup_table(struct net *net, struct fib6_config *cfg,
+			       const struct in6_addr *gw_addr, u32 tbid,
+			       int flags, struct fib6_result *res)
 {
 	struct flowi6 fl6 = {
 		.flowi6_oif = cfg->fc_ifindex,
@@ -2765,25 +3193,23 @@
 		.saddr = cfg->fc_prefsrc,
 	};
 	struct fib6_table *table;
-	struct rt6_info *rt;
+	int err;
 
 	table = fib6_get_table(net, tbid);
 	if (!table)
-		return NULL;
+		return -EINVAL;
 
 	if (!ipv6_addr_any(&cfg->fc_prefsrc))
 		flags |= RT6_LOOKUP_F_HAS_SADDR;
 
 	flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
-	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
 
-	/* if table lookup failed, fall back to full lookup */
-	if (rt == net->ipv6.ip6_null_entry) {
-		ip6_rt_put(rt);
-		rt = NULL;
-	}
+	err = fib6_table_lookup(net, table, cfg->fc_ifindex, &fl6, res, flags);
+	if (!err && res->f6i != net->ipv6.fib6_null_entry)
+		fib6_select_path(net, res, &fl6, cfg->fc_ifindex,
+				 cfg->fc_ifindex != 0, NULL, flags);
 
-	return rt;
+	return err;
 }
 
 static int ip6_route_check_nh_onlink(struct net *net,
@@ -2791,25 +3217,19 @@
 				     const struct net_device *dev,
 				     struct netlink_ext_ack *extack)
 {
-	u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
+	u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
-	u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
-	struct rt6_info *grt;
+	struct fib6_result res = {};
 	int err;
 
-	err = 0;
-	grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
-	if (grt) {
-		if (!grt->dst.error &&
-		    /* ignore match if it is the default route */
-		    grt->from && !ipv6_addr_any(&grt->from->fib6_dst.addr) &&
-		    (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
-			NL_SET_ERR_MSG(extack,
-				       "Nexthop has invalid gateway or device mismatch");
-			err = -EINVAL;
-		}
-
-		ip6_rt_put(grt);
+	err = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0, &res);
+	if (!err && !(res.fib6_flags & RTF_REJECT) &&
+	    /* ignore match if it is the default route */
+	    !ipv6_addr_any(&res.f6i->fib6_dst.addr) &&
+	    (res.fib6_type != RTN_UNICAST || dev != res.nh->fib_nh_dev)) {
+		NL_SET_ERR_MSG(extack,
+			       "Nexthop has invalid gateway or device mismatch");
+		err = -EINVAL;
 	}
 
 	return err;
@@ -2822,47 +3242,50 @@
 {
 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
 	struct net_device *dev = _dev ? *_dev : NULL;
-	struct rt6_info *grt = NULL;
+	int flags = RT6_LOOKUP_F_IFACE;
+	struct fib6_result res = {};
 	int err = -EHOSTUNREACH;
 
 	if (cfg->fc_table) {
-		int flags = RT6_LOOKUP_F_IFACE;
-
-		grt = ip6_nh_lookup_table(net, cfg, gw_addr,
-					  cfg->fc_table, flags);
-		if (grt) {
-			if (grt->rt6i_flags & RTF_GATEWAY ||
-			    (dev && dev != grt->dst.dev)) {
-				ip6_rt_put(grt);
-				grt = NULL;
-			}
-		}
+		err = ip6_nh_lookup_table(net, cfg, gw_addr,
+					  cfg->fc_table, flags, &res);
+		/* gw_addr can not require a gateway or resolve to a reject
+		 * route. If a device is given, it must match the result.
+		 */
+		if (err || res.fib6_flags & RTF_REJECT ||
+		    res.nh->fib_nh_gw_family ||
+		    (dev && dev != res.nh->fib_nh_dev))
+			err = -EHOSTUNREACH;
 	}
 
-	if (!grt)
-		grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
+	if (err < 0) {
+		struct flowi6 fl6 = {
+			.flowi6_oif = cfg->fc_ifindex,
+			.daddr = *gw_addr,
+		};
 
-	if (!grt)
-		goto out;
+		err = fib6_lookup(net, cfg->fc_ifindex, &fl6, &res, flags);
+		if (err || res.fib6_flags & RTF_REJECT ||
+		    res.nh->fib_nh_gw_family)
+			err = -EHOSTUNREACH;
 
+		if (err)
+			return err;
+
+		fib6_select_path(net, &res, &fl6, cfg->fc_ifindex,
+				 cfg->fc_ifindex != 0, NULL, flags);
+	}
+
+	err = 0;
 	if (dev) {
-		if (dev != grt->dst.dev) {
-			ip6_rt_put(grt);
-			goto out;
-		}
+		if (dev != res.nh->fib_nh_dev)
+			err = -EHOSTUNREACH;
 	} else {
-		*_dev = dev = grt->dst.dev;
-		*idev = grt->rt6i_idev;
+		*_dev = dev = res.nh->fib_nh_dev;
 		dev_hold(dev);
-		in6_dev_hold(grt->rt6i_idev);
+		*idev = in6_dev_get(dev);
 	}
 
-	if (!(grt->rt6i_flags & RTF_GATEWAY))
-		err = 0;
-
-	ip6_rt_put(grt);
-
-out:
 	return err;
 }
 
@@ -2903,11 +3326,15 @@
 			goto out;
 		}
 
+		rcu_read_lock();
+
 		if (cfg->fc_flags & RTNH_F_ONLINK)
 			err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
 		else
 			err = ip6_route_check_nh(net, cfg, _dev, idev);
 
+		rcu_read_unlock();
+
 		if (err)
 			goto out;
 	}
@@ -2939,17 +3366,187 @@
 	return err;
 }
 
+static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
+{
+	if ((flags & RTF_REJECT) ||
+	    (dev && (dev->flags & IFF_LOOPBACK) &&
+	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
+	     !(flags & RTF_LOCAL)))
+		return true;
+
+	return false;
+}
+
+int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
+		 struct fib6_config *cfg, gfp_t gfp_flags,
+		 struct netlink_ext_ack *extack)
+{
+	struct net_device *dev = NULL;
+	struct inet6_dev *idev = NULL;
+	int addr_type;
+	int err;
+
+	fib6_nh->fib_nh_family = AF_INET6;
+#ifdef CONFIG_IPV6_ROUTER_PREF
+	fib6_nh->last_probe = jiffies;
+#endif
+
+	err = -ENODEV;
+	if (cfg->fc_ifindex) {
+		dev = dev_get_by_index(net, cfg->fc_ifindex);
+		if (!dev)
+			goto out;
+		idev = in6_dev_get(dev);
+		if (!idev)
+			goto out;
+	}
+
+	if (cfg->fc_flags & RTNH_F_ONLINK) {
+		if (!dev) {
+			NL_SET_ERR_MSG(extack,
+				       "Nexthop device required for onlink");
+			goto out;
+		}
+
+		if (!(dev->flags & IFF_UP)) {
+			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+			err = -ENETDOWN;
+			goto out;
+		}
+
+		fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
+	}
+
+	fib6_nh->fib_nh_weight = 1;
+
+	/* We cannot add true routes via loopback here,
+	 * they would result in kernel looping; promote them to reject routes
+	 */
+	addr_type = ipv6_addr_type(&cfg->fc_dst);
+	if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
+		/* hold loopback dev/idev if we haven't done so. */
+		if (dev != net->loopback_dev) {
+			if (dev) {
+				dev_put(dev);
+				in6_dev_put(idev);
+			}
+			dev = net->loopback_dev;
+			dev_hold(dev);
+			idev = in6_dev_get(dev);
+			if (!idev) {
+				err = -ENODEV;
+				goto out;
+			}
+		}
+		goto pcpu_alloc;
+	}
+
+	if (cfg->fc_flags & RTF_GATEWAY) {
+		err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
+		if (err)
+			goto out;
+
+		fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
+		fib6_nh->fib_nh_gw_family = AF_INET6;
+	}
+
+	err = -ENODEV;
+	if (!dev)
+		goto out;
+
+	if (idev->cnf.disable_ipv6) {
+		NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
+		err = -EACCES;
+		goto out;
+	}
+
+	if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
+		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+		err = -ENETDOWN;
+		goto out;
+	}
+
+	if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
+	    !netif_carrier_ok(dev))
+		fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
+
+	err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap,
+				 cfg->fc_encap_type, cfg, gfp_flags, extack);
+	if (err)
+		goto out;
+
+pcpu_alloc:
+	fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
+	if (!fib6_nh->rt6i_pcpu) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	fib6_nh->fib_nh_dev = dev;
+	fib6_nh->fib_nh_oif = dev->ifindex;
+	err = 0;
+out:
+	if (idev)
+		in6_dev_put(idev);
+
+	if (err) {
+		lwtstate_put(fib6_nh->fib_nh_lws);
+		fib6_nh->fib_nh_lws = NULL;
+		if (dev)
+			dev_put(dev);
+	}
+
+	return err;
+}
+
+void fib6_nh_release(struct fib6_nh *fib6_nh)
+{
+	struct rt6_exception_bucket *bucket;
+
+	rcu_read_lock();
+
+	fib6_nh_flush_exceptions(fib6_nh, NULL);
+	bucket = fib6_nh_get_excptn_bucket(fib6_nh, NULL);
+	if (bucket) {
+		rcu_assign_pointer(fib6_nh->rt6i_exception_bucket, NULL);
+		kfree(bucket);
+	}
+
+	rcu_read_unlock();
+
+	if (fib6_nh->rt6i_pcpu) {
+		int cpu;
+
+		for_each_possible_cpu(cpu) {
+			struct rt6_info **ppcpu_rt;
+			struct rt6_info *pcpu_rt;
+
+			ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
+			pcpu_rt = *ppcpu_rt;
+			if (pcpu_rt) {
+				dst_dev_put(&pcpu_rt->dst);
+				dst_release(&pcpu_rt->dst);
+				*ppcpu_rt = NULL;
+			}
+		}
+
+		free_percpu(fib6_nh->rt6i_pcpu);
+	}
+
+	fib_nh_common_release(&fib6_nh->nh_common);
+}
+
 static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
 					      gfp_t gfp_flags,
 					      struct netlink_ext_ack *extack)
 {
 	struct net *net = cfg->fc_nlinfo.nl_net;
 	struct fib6_info *rt = NULL;
-	struct net_device *dev = NULL;
-	struct inet6_dev *idev = NULL;
+	struct nexthop *nh = NULL;
 	struct fib6_table *table;
-	int addr_type;
+	struct fib6_nh *fib6_nh;
 	int err = -EINVAL;
+	int addr_type;
 
 	/* RTF_PCPU is an internal flag; can not be set by userspace */
 	if (cfg->fc_flags & RTF_PCPU) {
@@ -2983,32 +3580,15 @@
 		goto out;
 	}
 #endif
-	if (cfg->fc_ifindex) {
-		err = -ENODEV;
-		dev = dev_get_by_index(net, cfg->fc_ifindex);
-		if (!dev)
-			goto out;
-		idev = in6_dev_get(dev);
-		if (!idev)
-			goto out;
-	}
-
-	if (cfg->fc_metric == 0)
-		cfg->fc_metric = IP6_RT_PRIO_USER;
-
-	if (cfg->fc_flags & RTNH_F_ONLINK) {
-		if (!dev) {
-			NL_SET_ERR_MSG(extack,
-				       "Nexthop device required for onlink");
-			err = -ENODEV;
+	if (cfg->fc_nh_id) {
+		nh = nexthop_find_by_id(net, cfg->fc_nh_id);
+		if (!nh) {
+			NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
 			goto out;
 		}
-
-		if (!(dev->flags & IFF_UP)) {
-			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
-			err = -ENETDOWN;
+		err = fib6_check_nexthop(nh, cfg, extack);
+		if (err)
 			goto out;
-		}
 	}
 
 	err = -ENOBUFS;
@@ -3027,17 +3607,22 @@
 		goto out;
 
 	err = -ENOMEM;
-	rt = fib6_info_alloc(gfp_flags);
+	rt = fib6_info_alloc(gfp_flags, !nh);
 	if (!rt)
 		goto out;
 
+	rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
+					       extack);
+	if (IS_ERR(rt->fib6_metrics)) {
+		err = PTR_ERR(rt->fib6_metrics);
+		/* Do not leave garbage there. */
+		rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
+		goto out;
+	}
+
 	if (cfg->fc_flags & RTF_ADDRCONF)
 		rt->dst_nocount = true;
 
-	err = ip6_convert_metrics(net, rt, cfg);
-	if (err < 0)
-		goto out;
-
 	if (cfg->fc_flags & RTF_EXPIRES)
 		fib6_set_expires(rt, jiffies +
 				clock_t_to_jiffies(cfg->fc_expires));
@@ -3048,18 +3633,10 @@
 		cfg->fc_protocol = RTPROT_BOOT;
 	rt->fib6_protocol = cfg->fc_protocol;
 
-	addr_type = ipv6_addr_type(&cfg->fc_dst);
-
-	if (cfg->fc_encap) {
-		struct lwtunnel_state *lwtstate;
-
-		err = lwtunnel_build_state(cfg->fc_encap_type,
-					   cfg->fc_encap, AF_INET6, cfg,
-					   &lwtstate, extack);
-		if (err)
-			goto out;
-		rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
-	}
+	rt->fib6_table = table;
+	rt->fib6_metric = cfg->fc_metric;
+	rt->fib6_type = cfg->fc_type ? : RTN_UNICAST;
+	rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
 
 	ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
 	rt->fib6_dst.plen = cfg->fc_dst_len;
@@ -3070,62 +3647,36 @@
 	ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
 	rt->fib6_src.plen = cfg->fc_src_len;
 #endif
-
-	rt->fib6_metric = cfg->fc_metric;
-	rt->fib6_nh.nh_weight = 1;
-
-	rt->fib6_type = cfg->fc_type;
-
-	/* We cannot add true routes via loopback here,
-	   they would result in kernel looping; promote them to reject routes
-	 */
-	if ((cfg->fc_flags & RTF_REJECT) ||
-	    (dev && (dev->flags & IFF_LOOPBACK) &&
-	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
-	     !(cfg->fc_flags & RTF_LOCAL))) {
-		/* hold loopback dev/idev if we haven't done so. */
-		if (dev != net->loopback_dev) {
-			if (dev) {
-				dev_put(dev);
-				in6_dev_put(idev);
-			}
-			dev = net->loopback_dev;
-			dev_hold(dev);
-			idev = in6_dev_get(dev);
-			if (!idev) {
-				err = -ENODEV;
-				goto out;
-			}
+	if (nh) {
+		if (!nexthop_get(nh)) {
+			NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
+			goto out;
 		}
-		rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
-		goto install_route;
-	}
-
-	if (cfg->fc_flags & RTF_GATEWAY) {
-		err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
+		if (rt->fib6_src.plen) {
+			NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
+			goto out;
+		}
+		rt->nh = nh;
+		fib6_nh = nexthop_fib6_nh(rt->nh);
+	} else {
+		err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
 		if (err)
 			goto out;
 
-		rt->fib6_nh.nh_gw = cfg->fc_gateway;
-	}
+		fib6_nh = rt->fib6_nh;
 
-	err = -ENODEV;
-	if (!dev)
-		goto out;
-
-	if (idev->cnf.disable_ipv6) {
-		NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
-		err = -EACCES;
-		goto out;
-	}
-
-	if (!(dev->flags & IFF_UP)) {
-		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
-		err = -ENETDOWN;
-		goto out;
+		/* We cannot add true routes via loopback here, they would
+		 * result in kernel looping; promote them to reject routes
+		 */
+		addr_type = ipv6_addr_type(&cfg->fc_dst);
+		if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev,
+				   addr_type))
+			rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
 	}
 
 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
+		struct net_device *dev = fib6_nh->fib_nh_dev;
+
 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
 			NL_SET_ERR_MSG(extack, "Invalid source address");
 			err = -EINVAL;
@@ -3136,28 +3687,8 @@
 	} else
 		rt->fib6_prefsrc.plen = 0;
 
-	rt->fib6_flags = cfg->fc_flags;
-
-install_route:
-	if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
-	    !netif_carrier_ok(dev))
-		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
-	rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
-	rt->fib6_nh.nh_dev = dev;
-	rt->fib6_table = table;
-
-	cfg->fc_nlinfo.nl_net = dev_net(dev);
-
-	if (idev)
-		in6_dev_put(idev);
-
 	return rt;
 out:
-	if (dev)
-		dev_put(dev);
-	if (idev)
-		in6_dev_put(idev);
-
 	fib6_info_release(rt);
 	return ERR_PTR(err);
 }
@@ -3236,6 +3767,12 @@
 				info->skip_notify = 1;
 		}
 
+		info->skip_notify_kernel = 1;
+		call_fib6_multipath_entry_notifiers(net,
+						    FIB_EVENT_ENTRY_DEL,
+						    rt,
+						    rt->fib6_nsiblings,
+						    NULL);
 		list_for_each_entry_safe(sibling, next_sibling,
 					 &rt->fib6_siblings,
 					 fib6_siblings) {
@@ -3258,7 +3795,7 @@
 	return err;
 }
 
-static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
+static int __ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
 {
 	int rc = -ESRCH;
 
@@ -3274,10 +3811,49 @@
 	return rc;
 }
 
+static int ip6_del_cached_rt(struct fib6_config *cfg, struct fib6_info *rt,
+			     struct fib6_nh *nh)
+{
+	struct fib6_result res = {
+		.f6i = rt,
+		.nh = nh,
+	};
+	struct rt6_info *rt_cache;
+
+	rt_cache = rt6_find_cached_rt(&res, &cfg->fc_dst, &cfg->fc_src);
+	if (rt_cache)
+		return __ip6_del_cached_rt(rt_cache, cfg);
+
+	return 0;
+}
+
+struct fib6_nh_del_cached_rt_arg {
+	struct fib6_config *cfg;
+	struct fib6_info *f6i;
+};
+
+static int fib6_nh_del_cached_rt(struct fib6_nh *nh, void *_arg)
+{
+	struct fib6_nh_del_cached_rt_arg *arg = _arg;
+	int rc;
+
+	rc = ip6_del_cached_rt(arg->cfg, arg->f6i, nh);
+	return rc != -ESRCH ? rc : 0;
+}
+
+static int ip6_del_cached_rt_nh(struct fib6_config *cfg, struct fib6_info *f6i)
+{
+	struct fib6_nh_del_cached_rt_arg arg = {
+		.cfg = cfg,
+		.f6i = f6i
+	};
+
+	return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_del_cached_rt, &arg);
+}
+
 static int ip6_route_del(struct fib6_config *cfg,
 			 struct netlink_ext_ack *extack)
 {
-	struct rt6_info *rt_cache;
 	struct fib6_table *table;
 	struct fib6_info *rt;
 	struct fib6_node *fn;
@@ -3298,30 +3874,53 @@
 
 	if (fn) {
 		for_each_fib6_node_rt_rcu(fn) {
-			if (cfg->fc_flags & RTF_CACHE) {
-				int rc;
+			struct fib6_nh *nh;
 
-				rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
-							      &cfg->fc_src);
-				if (rt_cache) {
-					rc = ip6_del_cached_rt(rt_cache, cfg);
-					if (rc != -ESRCH) {
-						rcu_read_unlock();
-						return rc;
-					}
+			if (rt->nh && cfg->fc_nh_id &&
+			    rt->nh->id != cfg->fc_nh_id)
+				continue;
+
+			if (cfg->fc_flags & RTF_CACHE) {
+				int rc = 0;
+
+				if (rt->nh) {
+					rc = ip6_del_cached_rt_nh(cfg, rt);
+				} else if (cfg->fc_nh_id) {
+					continue;
+				} else {
+					nh = rt->fib6_nh;
+					rc = ip6_del_cached_rt(cfg, rt, nh);
+				}
+				if (rc != -ESRCH) {
+					rcu_read_unlock();
+					return rc;
 				}
 				continue;
 			}
-			if (cfg->fc_ifindex &&
-			    (!rt->fib6_nh.nh_dev ||
-			     rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
-				continue;
-			if (cfg->fc_flags & RTF_GATEWAY &&
-			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
-				continue;
+
 			if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
 				continue;
-			if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
+			if (cfg->fc_protocol &&
+			    cfg->fc_protocol != rt->fib6_protocol)
+				continue;
+
+			if (rt->nh) {
+				if (!fib6_info_hold_safe(rt))
+					continue;
+				rcu_read_unlock();
+
+				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
+			}
+			if (cfg->fc_nh_id)
+				continue;
+
+			nh = rt->fib6_nh;
+			if (cfg->fc_ifindex &&
+			    (!nh->fib_nh_dev ||
+			     nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
+				continue;
+			if (cfg->fc_flags & RTF_GATEWAY &&
+			    !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
 				continue;
 			if (!fib6_info_hold_safe(rt))
 				continue;
@@ -3343,10 +3942,10 @@
 {
 	struct netevent_redirect netevent;
 	struct rt6_info *rt, *nrt = NULL;
+	struct fib6_result res = {};
 	struct ndisc_options ndopts;
 	struct inet6_dev *in6_dev;
 	struct neighbour *neigh;
-	struct fib6_info *from;
 	struct rd_msg *msg;
 	int optlen, on_link;
 	u8 *lladdr;
@@ -3429,14 +4028,32 @@
 		     NDISC_REDIRECT, &ndopts);
 
 	rcu_read_lock();
-	from = rcu_dereference(rt->from);
-	/* This fib6_info_hold() is safe here because we hold reference to rt
-	 * and rt already holds reference to fib6_info.
-	 */
-	fib6_info_hold(from);
-	rcu_read_unlock();
+	res.f6i = rcu_dereference(rt->from);
+	if (!res.f6i)
+		goto out;
 
-	nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
+	if (res.f6i->nh) {
+		struct fib6_nh_match_arg arg = {
+			.dev = dst->dev,
+			.gw = &rt->rt6i_gateway,
+		};
+
+		nexthop_for_each_fib6_nh(res.f6i->nh,
+					 fib6_nh_find_match, &arg);
+
+		/* fib6_info uses a nexthop that does not have fib6_nh
+		 * using the dst->dev. Should be impossible
+		 */
+		if (!arg.match)
+			goto out;
+		res.nh = arg.match;
+	} else {
+		res.nh = res.f6i->fib6_nh;
+	}
+
+	res.fib6_flags = res.f6i->fib6_flags;
+	res.fib6_type = res.f6i->fib6_type;
+	nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
 	if (!nrt)
 		goto out;
 
@@ -3446,11 +4063,8 @@
 
 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
 
-	/* No need to remove rt from the exception table if rt is
-	 * a cached route because rt6_insert_exception() will
-	 * takes care of it
-	 */
-	if (rt6_insert_exception(nrt, from)) {
+	/* rt6_insert_exception() will take care of duplicated exceptions */
+	if (rt6_insert_exception(nrt, &res)) {
 		dst_release_immediate(&nrt->dst);
 		goto out;
 	}
@@ -3462,7 +4076,7 @@
 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
 
 out:
-	fib6_info_release(from);
+	rcu_read_unlock();
 	neigh_release(neigh);
 }
 
@@ -3488,11 +4102,15 @@
 		goto out;
 
 	for_each_fib6_node_rt_rcu(fn) {
-		if (rt->fib6_nh.nh_dev->ifindex != ifindex)
+		/* these routes do not use nexthops */
+		if (rt->nh)
 			continue;
-		if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
+		if (rt->fib6_nh->fib_nh_dev->ifindex != ifindex)
 			continue;
-		if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
+		if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
+		    !rt->fib6_nh->fib_nh_gw_family)
+			continue;
+		if (!ipv6_addr_equal(&rt->fib6_nh->fib_nh_gw6, gwaddr))
 			continue;
 		if (!fib6_info_hold_safe(rt))
 			continue;
@@ -3550,9 +4168,16 @@
 
 	rcu_read_lock();
 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
-		if (dev == rt->fib6_nh.nh_dev &&
+		struct fib6_nh *nh;
+
+		/* RA routes do not use nexthops */
+		if (rt->nh)
+			continue;
+
+		nh = rt->fib6_nh;
+		if (dev == nh->fib_nh_dev &&
 		    ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
-		    ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
+		    ipv6_addr_equal(&nh->fib_nh_gw6, addr))
 			break;
 	}
 	if (rt && !fib6_info_hold_safe(rt))
@@ -3639,23 +4264,23 @@
 				 struct in6_rtmsg *rtmsg,
 				 struct fib6_config *cfg)
 {
-	memset(cfg, 0, sizeof(*cfg));
+	*cfg = (struct fib6_config){
+		.fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
+			 : RT6_TABLE_MAIN,
+		.fc_ifindex = rtmsg->rtmsg_ifindex,
+		.fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
+		.fc_expires = rtmsg->rtmsg_info,
+		.fc_dst_len = rtmsg->rtmsg_dst_len,
+		.fc_src_len = rtmsg->rtmsg_src_len,
+		.fc_flags = rtmsg->rtmsg_flags,
+		.fc_type = rtmsg->rtmsg_type,
 
-	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
-			 : RT6_TABLE_MAIN;
-	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
-	cfg->fc_metric = rtmsg->rtmsg_metric;
-	cfg->fc_expires = rtmsg->rtmsg_info;
-	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
-	cfg->fc_src_len = rtmsg->rtmsg_src_len;
-	cfg->fc_flags = rtmsg->rtmsg_flags;
-	cfg->fc_type = rtmsg->rtmsg_type;
+		.fc_nlinfo.nl_net = net,
 
-	cfg->fc_nlinfo.nl_net = net;
-
-	cfg->fc_dst = rtmsg->rtmsg_dst;
-	cfg->fc_src = rtmsg->rtmsg_src;
-	cfg->fc_gateway = rtmsg->rtmsg_gateway;
+		.fc_dst = rtmsg->rtmsg_dst,
+		.fc_src = rtmsg->rtmsg_src,
+		.fc_gateway = rtmsg->rtmsg_gateway,
+	};
 }
 
 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
@@ -3701,23 +4326,34 @@
 
 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
 {
-	int type;
 	struct dst_entry *dst = skb_dst(skb);
+	struct net *net = dev_net(dst->dev);
+	struct inet6_dev *idev;
+	int type;
+
+	if (netif_is_l3_master(skb->dev) &&
+	    dst->dev == net->loopback_dev)
+		idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
+	else
+		idev = ip6_dst_idev(dst);
+
 	switch (ipstats_mib_noroutes) {
 	case IPSTATS_MIB_INNOROUTES:
 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
 		if (type == IPV6_ADDR_ANY) {
-			IP6_INC_STATS(dev_net(dst->dev),
-				      __in6_dev_get_safely(skb->dev),
-				      IPSTATS_MIB_INADDRERRORS);
+			IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
 			break;
 		}
 		/* FALLTHROUGH */
 	case IPSTATS_MIB_OUTNOROUTES:
-		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
-			      ipstats_mib_noroutes);
+		IP6_INC_STATS(net, idev, ipstats_mib_noroutes);
 		break;
 	}
+
+	/* Start over by dropping the dst for l3mdev case */
+	if (netif_is_l3_master(skb->dev))
+		skb_dst_drop(skb);
+
 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
 	kfree_skb(skb);
 	return 0;
@@ -3754,34 +4390,29 @@
 				     const struct in6_addr *addr,
 				     bool anycast, gfp_t gfp_flags)
 {
-	u32 tb_id;
-	struct net_device *dev = idev->dev;
+	struct fib6_config cfg = {
+		.fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
+		.fc_ifindex = idev->dev->ifindex,
+		.fc_flags = RTF_UP | RTF_NONEXTHOP,
+		.fc_dst = *addr,
+		.fc_dst_len = 128,
+		.fc_protocol = RTPROT_KERNEL,
+		.fc_nlinfo.nl_net = net,
+		.fc_ignore_dev_down = true,
+	};
 	struct fib6_info *f6i;
 
-	f6i = fib6_info_alloc(gfp_flags);
-	if (!f6i)
-		return ERR_PTR(-ENOMEM);
-
-	f6i->dst_nocount = true;
-	f6i->dst_host = true;
-	f6i->fib6_protocol = RTPROT_KERNEL;
-	f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
 	if (anycast) {
-		f6i->fib6_type = RTN_ANYCAST;
-		f6i->fib6_flags |= RTF_ANYCAST;
+		cfg.fc_type = RTN_ANYCAST;
+		cfg.fc_flags |= RTF_ANYCAST;
 	} else {
-		f6i->fib6_type = RTN_LOCAL;
-		f6i->fib6_flags |= RTF_LOCAL;
+		cfg.fc_type = RTN_LOCAL;
+		cfg.fc_flags |= RTF_LOCAL;
 	}
 
-	f6i->fib6_nh.nh_gw = *addr;
-	dev_hold(dev);
-	f6i->fib6_nh.nh_dev = dev;
-	f6i->fib6_dst.addr = *addr;
-	f6i->fib6_dst.plen = 128;
-	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
-	f6i->fib6_table = fib6_get_table(net, tb_id);
-
+	f6i = ip6_route_info_create(&cfg, gfp_flags, NULL);
+	if (!IS_ERR(f6i))
+		f6i->dst_nocount = true;
 	return f6i;
 }
 
@@ -3798,14 +4429,13 @@
 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
 
-	if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
+	if (!rt->nh &&
+	    ((void *)rt->fib6_nh->fib_nh_dev == dev || !dev) &&
 	    rt != net->ipv6.fib6_null_entry &&
 	    ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
 		spin_lock_bh(&rt6_exception_lock);
 		/* remove prefsrc entry */
 		rt->fib6_prefsrc.plen = 0;
-		/* need to update cache as well */
-		rt6_exceptions_remove_prefsrc(rt);
 		spin_unlock_bh(&rt6_exception_lock);
 	}
 	return 0;
@@ -3822,23 +4452,28 @@
 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
 }
 
-#define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
+#define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT)
 
 /* Remove routers and update dst entries when gateway turn into host. */
 static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
 {
 	struct in6_addr *gateway = (struct in6_addr *)arg;
+	struct fib6_nh *nh;
 
+	/* RA routes do not use nexthops */
+	if (rt->nh)
+		return 0;
+
+	nh = rt->fib6_nh;
 	if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
-	    ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
+	    nh->fib_nh_gw_family && ipv6_addr_equal(gateway, &nh->fib_nh_gw6))
 		return -1;
-	}
 
 	/* Further clean up cached routes in exception table.
 	 * This is needed because cached route may have a different
 	 * gateway than its 'parent' in the case of an ip redirect.
 	 */
-	rt6_exceptions_clean_tohost(rt, gateway);
+	fib6_nh_exceptions_clean_tohost(nh, gateway);
 
 	return 0;
 }
@@ -3851,7 +4486,7 @@
 struct arg_netdev_event {
 	const struct net_device *dev;
 	union {
-		unsigned int nh_flags;
+		unsigned char nh_flags;
 		unsigned long event;
 	};
 };
@@ -3876,11 +4511,12 @@
 	return NULL;
 }
 
+/* only called for fib entries with builtin fib6_nh */
 static bool rt6_is_dead(const struct fib6_info *rt)
 {
-	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
-	    (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
-	     fib6_ignore_linkdown(rt)))
+	if (rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD ||
+	    (rt->fib6_nh->fib_nh_flags & RTNH_F_LINKDOWN &&
+	     ip6_ignore_linkdown(rt->fib6_nh->fib_nh_dev)))
 		return true;
 
 	return false;
@@ -3892,11 +4528,11 @@
 	int total = 0;
 
 	if (!rt6_is_dead(rt))
-		total += rt->fib6_nh.nh_weight;
+		total += rt->fib6_nh->fib_nh_weight;
 
 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
 		if (!rt6_is_dead(iter))
-			total += iter->fib6_nh.nh_weight;
+			total += iter->fib6_nh->fib_nh_weight;
 	}
 
 	return total;
@@ -3907,11 +4543,11 @@
 	int upper_bound = -1;
 
 	if (!rt6_is_dead(rt)) {
-		*weight += rt->fib6_nh.nh_weight;
+		*weight += rt->fib6_nh->fib_nh_weight;
 		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
 						    total) - 1;
 	}
-	atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
+	atomic_set(&rt->fib6_nh->fib_nh_upper_bound, upper_bound);
 }
 
 static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
@@ -3954,8 +4590,9 @@
 	const struct arg_netdev_event *arg = p_arg;
 	struct net *net = dev_net(arg->dev);
 
-	if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
-		rt->fib6_nh.nh_flags &= ~arg->nh_flags;
+	if (rt != net->ipv6.fib6_null_entry && !rt->nh &&
+	    rt->fib6_nh->fib_nh_dev == arg->dev) {
+		rt->fib6_nh->fib_nh_flags &= ~arg->nh_flags;
 		fib6_update_sernum_upto_root(net, rt);
 		rt6_multipath_rebalance(rt);
 	}
@@ -3963,7 +4600,7 @@
 	return 0;
 }
 
-void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
+void rt6_sync_up(struct net_device *dev, unsigned char nh_flags)
 {
 	struct arg_netdev_event arg = {
 		.dev = dev,
@@ -3978,15 +4615,16 @@
 	fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
 }
 
+/* only called for fib entries with inline fib6_nh */
 static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
 				   const struct net_device *dev)
 {
 	struct fib6_info *iter;
 
-	if (rt->fib6_nh.nh_dev == dev)
+	if (rt->fib6_nh->fib_nh_dev == dev)
 		return true;
 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
-		if (iter->fib6_nh.nh_dev == dev)
+		if (iter->fib6_nh->fib_nh_dev == dev)
 			return true;
 
 	return false;
@@ -4007,12 +4645,12 @@
 	struct fib6_info *iter;
 	unsigned int dead = 0;
 
-	if (rt->fib6_nh.nh_dev == down_dev ||
-	    rt->fib6_nh.nh_flags & RTNH_F_DEAD)
+	if (rt->fib6_nh->fib_nh_dev == down_dev ||
+	    rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
 		dead++;
 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
-		if (iter->fib6_nh.nh_dev == down_dev ||
-		    iter->fib6_nh.nh_flags & RTNH_F_DEAD)
+		if (iter->fib6_nh->fib_nh_dev == down_dev ||
+		    iter->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
 			dead++;
 
 	return dead;
@@ -4020,15 +4658,15 @@
 
 static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
 				       const struct net_device *dev,
-				       unsigned int nh_flags)
+				       unsigned char nh_flags)
 {
 	struct fib6_info *iter;
 
-	if (rt->fib6_nh.nh_dev == dev)
-		rt->fib6_nh.nh_flags |= nh_flags;
+	if (rt->fib6_nh->fib_nh_dev == dev)
+		rt->fib6_nh->fib_nh_flags |= nh_flags;
 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
-		if (iter->fib6_nh.nh_dev == dev)
-			iter->fib6_nh.nh_flags |= nh_flags;
+		if (iter->fib6_nh->fib_nh_dev == dev)
+			iter->fib6_nh->fib_nh_flags |= nh_flags;
 }
 
 /* called with write lock held for table with rt */
@@ -4038,17 +4676,17 @@
 	const struct net_device *dev = arg->dev;
 	struct net *net = dev_net(dev);
 
-	if (rt == net->ipv6.fib6_null_entry)
+	if (rt == net->ipv6.fib6_null_entry || rt->nh)
 		return 0;
 
 	switch (arg->event) {
 	case NETDEV_UNREGISTER:
-		return rt->fib6_nh.nh_dev == dev ? -1 : 0;
+		return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
 	case NETDEV_DOWN:
 		if (rt->should_flush)
 			return -1;
 		if (!rt->fib6_nsiblings)
-			return rt->fib6_nh.nh_dev == dev ? -1 : 0;
+			return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
 		if (rt6_multipath_uses_dev(rt, dev)) {
 			unsigned int count;
 
@@ -4064,10 +4702,10 @@
 		}
 		return -2;
 	case NETDEV_CHANGE:
-		if (rt->fib6_nh.nh_dev != dev ||
+		if (rt->fib6_nh->fib_nh_dev != dev ||
 		    rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
 			break;
-		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
+		rt->fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
 		rt6_multipath_rebalance(rt);
 		break;
 	}
@@ -4083,8 +4721,12 @@
 			.event = event,
 		},
 	};
+	struct net *net = dev_net(dev);
 
-	fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
+	if (net->ipv6.sysctl.skip_notify_on_dev_down)
+		fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
+	else
+		fib6_clean_all(net, fib6_ifdown, &arg);
 }
 
 void rt6_disable_ip(struct net_device *dev, unsigned long event)
@@ -4097,9 +4739,36 @@
 struct rt6_mtu_change_arg {
 	struct net_device *dev;
 	unsigned int mtu;
+	struct fib6_info *f6i;
 };
 
-static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
+static int fib6_nh_mtu_change(struct fib6_nh *nh, void *_arg)
+{
+	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *)_arg;
+	struct fib6_info *f6i = arg->f6i;
+
+	/* For administrative MTU increase, there is no way to discover
+	 * IPv6 PMTU increase, so PMTU increase should be updated here.
+	 * Since RFC 1981 doesn't include administrative MTU increase
+	 * update PMTU increase is a MUST. (i.e. jumbo frame)
+	 */
+	if (nh->fib_nh_dev == arg->dev) {
+		struct inet6_dev *idev = __in6_dev_get(arg->dev);
+		u32 mtu = f6i->fib6_pmtu;
+
+		if (mtu >= arg->mtu ||
+		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
+			fib6_metric_set(f6i, RTAX_MTU, arg->mtu);
+
+		spin_lock_bh(&rt6_exception_lock);
+		rt6_exceptions_update_pmtu(idev, nh, arg->mtu);
+		spin_unlock_bh(&rt6_exception_lock);
+	}
+
+	return 0;
+}
+
+static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg)
 {
 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
 	struct inet6_dev *idev;
@@ -4114,24 +4783,17 @@
 	if (!idev)
 		return 0;
 
-	/* For administrative MTU increase, there is no way to discover
-	   IPv6 PMTU increase, so PMTU increase should be updated here.
-	   Since RFC 1981 doesn't include administrative MTU increase
-	   update PMTU increase is a MUST. (i.e. jumbo frame)
-	 */
-	if (rt->fib6_nh.nh_dev == arg->dev &&
-	    !fib6_metric_locked(rt, RTAX_MTU)) {
-		u32 mtu = rt->fib6_pmtu;
+	if (fib6_metric_locked(f6i, RTAX_MTU))
+		return 0;
 
-		if (mtu >= arg->mtu ||
-		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
-			fib6_metric_set(rt, RTAX_MTU, arg->mtu);
-
-		spin_lock_bh(&rt6_exception_lock);
-		rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
-		spin_unlock_bh(&rt6_exception_lock);
+	arg->f6i = f6i;
+	if (f6i->nh) {
+		/* fib6_nh_mtu_change only returns 0, so this is safe */
+		return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_mtu_change,
+						arg);
 	}
-	return 0;
+
+	return fib6_nh_mtu_change(f6i->fib6_nh, arg);
 }
 
 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
@@ -4145,6 +4807,7 @@
 }
 
 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
+	[RTA_UNSPEC]		= { .strict_start_type = RTA_DPORT + 1 },
 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
 	[RTA_PREFSRC]		= { .len = sizeof(struct in6_addr) },
 	[RTA_OIF]               = { .type = NLA_U32 },
@@ -4162,6 +4825,7 @@
 	[RTA_IP_PROTO]		= { .type = NLA_U8 },
 	[RTA_SPORT]		= { .type = NLA_U16 },
 	[RTA_DPORT]		= { .type = NLA_U16 },
+	[RTA_NH_ID]		= { .type = NLA_U32 },
 };
 
 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -4173,21 +4837,26 @@
 	unsigned int pref;
 	int err;
 
-	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
-			  NULL);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
+				     rtm_ipv6_policy, extack);
 	if (err < 0)
 		goto errout;
 
 	err = -EINVAL;
 	rtm = nlmsg_data(nlh);
-	memset(cfg, 0, sizeof(*cfg));
 
-	cfg->fc_table = rtm->rtm_table;
-	cfg->fc_dst_len = rtm->rtm_dst_len;
-	cfg->fc_src_len = rtm->rtm_src_len;
-	cfg->fc_flags = RTF_UP;
-	cfg->fc_protocol = rtm->rtm_protocol;
-	cfg->fc_type = rtm->rtm_type;
+	*cfg = (struct fib6_config){
+		.fc_table = rtm->rtm_table,
+		.fc_dst_len = rtm->rtm_dst_len,
+		.fc_src_len = rtm->rtm_src_len,
+		.fc_flags = RTF_UP,
+		.fc_protocol = rtm->rtm_protocol,
+		.fc_type = rtm->rtm_type,
+
+		.fc_nlinfo.portid = NETLINK_CB(skb).portid,
+		.fc_nlinfo.nlh = nlh,
+		.fc_nlinfo.nl_net = sock_net(skb->sk),
+	};
 
 	if (rtm->rtm_type == RTN_UNREACHABLE ||
 	    rtm->rtm_type == RTN_BLACKHOLE ||
@@ -4203,14 +4872,24 @@
 
 	cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
 
-	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
-	cfg->fc_nlinfo.nlh = nlh;
-	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
+	if (tb[RTA_NH_ID]) {
+		if (tb[RTA_GATEWAY]   || tb[RTA_OIF] ||
+		    tb[RTA_MULTIPATH] || tb[RTA_ENCAP]) {
+			NL_SET_ERR_MSG(extack,
+				       "Nexthop specification and nexthop id are mutually exclusive");
+			goto errout;
+		}
+		cfg->fc_nh_id = nla_get_u32(tb[RTA_NH_ID]);
+	}
 
 	if (tb[RTA_GATEWAY]) {
 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
 		cfg->fc_flags |= RTF_GATEWAY;
 	}
+	if (tb[RTA_VIA]) {
+		NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
+		goto errout;
+	}
 
 	if (tb[RTA_DST]) {
 		int plen = (rtm->rtm_dst_len + 7) >> 3;
@@ -4296,17 +4975,6 @@
 	struct list_head next;
 };
 
-static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
-{
-	struct rt6_nh *nh;
-
-	list_for_each_entry(nh, rt6_nh_list, next) {
-		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
-		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
-		        nh->r_cfg.fc_ifindex);
-	}
-}
-
 static int ip6_route_info_append(struct net *net,
 				 struct list_head *rt6_nh_list,
 				 struct fib6_info *rt,
@@ -4357,6 +5025,7 @@
 {
 	struct fib6_info *rt_notif = NULL, *rt_last = NULL;
 	struct nl_info *info = &cfg->fc_nlinfo;
+	enum fib_event_type event_type;
 	struct fib6_config r_cfg;
 	struct rtnexthop *rtnh;
 	struct fib6_info *rt;
@@ -4416,7 +5085,7 @@
 			goto cleanup;
 		}
 
-		rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
+		rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1;
 
 		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
 					    rt, &r_cfg);
@@ -4428,12 +5097,23 @@
 		rtnh = rtnh_next(rtnh, &remaining);
 	}
 
+	if (list_empty(&rt6_nh_list)) {
+		NL_SET_ERR_MSG(extack,
+			       "Invalid nexthop configuration - no valid nexthops");
+		return -EINVAL;
+	}
+
 	/* for add and replace send one notification with all nexthops.
 	 * Skip the notification in fib6_add_rt2node and send one with
 	 * the full route when done
 	 */
 	info->skip_notify = 1;
 
+	/* For add and replace, send one notification with all nexthops. For
+	 * append, send one notification with all appended nexthops.
+	 */
+	info->skip_notify_kernel = 1;
+
 	err_nh = NULL;
 	list_for_each_entry(nh, &rt6_nh_list, next) {
 		err = __ip6_ins_rt(nh->fib6_info, info, extack);
@@ -4452,7 +5132,8 @@
 		nh->fib6_info = NULL;
 		if (err) {
 			if (replace && nhn)
-				ip6_print_replace_route_err(&rt6_nh_list);
+				NL_SET_ERR_MSG_MOD(extack,
+						   "multipath route replace failed (check consistency of installed routes)");
 			err_nh = nh;
 			goto add_errout;
 		}
@@ -4469,6 +5150,15 @@
 		nhn++;
 	}
 
+	event_type = replace ? FIB_EVENT_ENTRY_REPLACE : FIB_EVENT_ENTRY_ADD;
+	err = call_fib6_multipath_entry_notifiers(info->nl_net, event_type,
+						  rt_notif, nhn - 1, extack);
+	if (err) {
+		/* Delete all the siblings that were just added */
+		err_nh = NULL;
+		goto add_errout;
+	}
+
 	/* success ... tell user about new route */
 	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
 	goto cleanup;
@@ -4547,6 +5237,12 @@
 	if (err < 0)
 		return err;
 
+	if (cfg.fc_nh_id &&
+	    !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id)) {
+		NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+		return -EINVAL;
+	}
+
 	if (cfg.fc_mp)
 		return ip6_route_multipath_del(&cfg, extack);
 	else {
@@ -4565,23 +5261,55 @@
 	if (err < 0)
 		return err;
 
+	if (cfg.fc_metric == 0)
+		cfg.fc_metric = IP6_RT_PRIO_USER;
+
 	if (cfg.fc_mp)
 		return ip6_route_multipath_add(&cfg, extack);
 	else
 		return ip6_route_add(&cfg, GFP_KERNEL, extack);
 }
 
-static size_t rt6_nlmsg_size(struct fib6_info *rt)
+/* add the overhead of this fib6_nh to nexthop_len */
+static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg)
 {
-	int nexthop_len = 0;
+	int *nexthop_len = arg;
 
-	if (rt->fib6_nsiblings) {
-		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
-			    + NLA_ALIGN(sizeof(struct rtnexthop))
-			    + nla_total_size(16) /* RTA_GATEWAY */
-			    + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
+	*nexthop_len += nla_total_size(0)	 /* RTA_MULTIPATH */
+		     + NLA_ALIGN(sizeof(struct rtnexthop))
+		     + nla_total_size(16); /* RTA_GATEWAY */
 
-		nexthop_len *= rt->fib6_nsiblings;
+	if (nh->fib_nh_lws) {
+		/* RTA_ENCAP_TYPE */
+		*nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
+		/* RTA_ENCAP */
+		*nexthop_len += nla_total_size(2);
+	}
+
+	return 0;
+}
+
+static size_t rt6_nlmsg_size(struct fib6_info *f6i)
+{
+	int nexthop_len;
+
+	if (f6i->nh) {
+		nexthop_len = nla_total_size(4); /* RTA_NH_ID */
+		nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
+					 &nexthop_len);
+	} else {
+		struct fib6_nh *nh = f6i->fib6_nh;
+
+		nexthop_len = 0;
+		if (f6i->fib6_nsiblings) {
+			nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
+				    + NLA_ALIGN(sizeof(struct rtnexthop))
+				    + nla_total_size(16) /* RTA_GATEWAY */
+				    + lwtunnel_get_encap_size(nh->fib_nh_lws);
+
+			nexthop_len *= f6i->fib6_nsiblings;
+		}
+		nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
 	}
 
 	return NLMSG_ALIGN(sizeof(struct rtmsg))
@@ -4597,71 +5325,32 @@
 	       + nla_total_size(sizeof(struct rta_cacheinfo))
 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
 	       + nla_total_size(1) /* RTA_PREF */
-	       + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
 	       + nexthop_len;
 }
 
-static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
-			    unsigned int *flags, bool skip_oif)
+static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh,
+				 unsigned char *flags)
 {
-	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
-		*flags |= RTNH_F_DEAD;
+	if (nexthop_is_multipath(nh)) {
+		struct nlattr *mp;
 
-	if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
-		*flags |= RTNH_F_LINKDOWN;
+		mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
+		if (!mp)
+			goto nla_put_failure;
 
-		rcu_read_lock();
-		if (fib6_ignore_linkdown(rt))
-			*flags |= RTNH_F_DEAD;
-		rcu_read_unlock();
-	}
+		if (nexthop_mpath_fill_node(skb, nh, AF_INET6))
+			goto nla_put_failure;
 
-	if (rt->fib6_flags & RTF_GATEWAY) {
-		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
+		nla_nest_end(skb, mp);
+	} else {
+		struct fib6_nh *fib6_nh;
+
+		fib6_nh = nexthop_fib6_nh(nh);
+		if (fib_nexthop_info(skb, &fib6_nh->nh_common, AF_INET6,
+				     flags, false) < 0)
 			goto nla_put_failure;
 	}
 
-	*flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
-	if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
-		*flags |= RTNH_F_OFFLOAD;
-
-	/* not needed for multipath encoding b/c it has a rtnexthop struct */
-	if (!skip_oif && rt->fib6_nh.nh_dev &&
-	    nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
-		goto nla_put_failure;
-
-	if (rt->fib6_nh.nh_lwtstate &&
-	    lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
-		goto nla_put_failure;
-
-	return 0;
-
-nla_put_failure:
-	return -EMSGSIZE;
-}
-
-/* add multipath next hop */
-static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
-{
-	const struct net_device *dev = rt->fib6_nh.nh_dev;
-	struct rtnexthop *rtnh;
-	unsigned int flags = 0;
-
-	rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
-	if (!rtnh)
-		goto nla_put_failure;
-
-	rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
-	rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
-
-	if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
-		goto nla_put_failure;
-
-	rtnh->rtnh_flags = flags;
-
-	/* length of rtnetlink header + attributes */
-	rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
-
 	return 0;
 
 nla_put_failure:
@@ -4677,6 +5366,7 @@
 	struct rt6_info *rt6 = (struct rt6_info *)dst;
 	struct rt6key *rt6_dst, *rt6_src;
 	u32 *pmetrics, table, rt6_flags;
+	unsigned char nh_flags = 0;
 	struct nlmsghdr *nlh;
 	struct rtmsg *rtm;
 	long expires = 0;
@@ -4704,7 +5394,7 @@
 		table = rt->fib6_table->tb6_id;
 	else
 		table = RT6_TABLE_UNSPEC;
-	rtm->rtm_table = table;
+	rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
 	if (nla_put_u32(skb, RTA_TABLE, table))
 		goto nla_put_failure;
 
@@ -4780,23 +5470,40 @@
 		struct fib6_info *sibling, *next_sibling;
 		struct nlattr *mp;
 
-		mp = nla_nest_start(skb, RTA_MULTIPATH);
+		mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
 		if (!mp)
 			goto nla_put_failure;
 
-		if (rt6_add_nexthop(skb, rt) < 0)
+		if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common,
+				    rt->fib6_nh->fib_nh_weight, AF_INET6) < 0)
 			goto nla_put_failure;
 
 		list_for_each_entry_safe(sibling, next_sibling,
 					 &rt->fib6_siblings, fib6_siblings) {
-			if (rt6_add_nexthop(skb, sibling) < 0)
+			if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
+					    sibling->fib6_nh->fib_nh_weight,
+					    AF_INET6) < 0)
 				goto nla_put_failure;
 		}
 
 		nla_nest_end(skb, mp);
-	} else {
-		if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
+	} else if (rt->nh) {
+		if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id))
 			goto nla_put_failure;
+
+		if (nexthop_is_blackhole(rt->nh))
+			rtm->rtm_type = RTN_BLACKHOLE;
+
+		if (rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
+			goto nla_put_failure;
+
+		rtm->rtm_flags |= nh_flags;
+	} else {
+		if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common, AF_INET6,
+				     &nh_flags, false) < 0)
+			goto nla_put_failure;
+
+		rtm->rtm_flags |= nh_flags;
 	}
 
 	if (rt6_flags & RTF_EXPIRES) {
@@ -4819,28 +5526,235 @@
 	return -EMSGSIZE;
 }
 
-int rt6_dump_route(struct fib6_info *rt, void *p_arg)
+static int fib6_info_nh_uses_dev(struct fib6_nh *nh, void *arg)
 {
-	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
-	struct net *net = arg->net;
+	const struct net_device *dev = arg;
 
-	if (rt == net->ipv6.fib6_null_entry)
-		return 0;
+	if (nh->fib_nh_dev == dev)
+		return 1;
 
-	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
-		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
+	return 0;
+}
 
-		/* user wants prefix routes only */
-		if (rtm->rtm_flags & RTM_F_PREFIX &&
-		    !(rt->fib6_flags & RTF_PREFIX_RT)) {
-			/* success since this is not a prefix route */
-			return 1;
+static bool fib6_info_uses_dev(const struct fib6_info *f6i,
+			       const struct net_device *dev)
+{
+	if (f6i->nh) {
+		struct net_device *_dev = (struct net_device *)dev;
+
+		return !!nexthop_for_each_fib6_nh(f6i->nh,
+						  fib6_info_nh_uses_dev,
+						  _dev);
+	}
+
+	if (f6i->fib6_nh->fib_nh_dev == dev)
+		return true;
+
+	if (f6i->fib6_nsiblings) {
+		struct fib6_info *sibling, *next_sibling;
+
+		list_for_each_entry_safe(sibling, next_sibling,
+					 &f6i->fib6_siblings, fib6_siblings) {
+			if (sibling->fib6_nh->fib_nh_dev == dev)
+				return true;
 		}
 	}
 
-	return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
-			     RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
-			     arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
+	return false;
+}
+
+struct fib6_nh_exception_dump_walker {
+	struct rt6_rtnl_dump_arg *dump;
+	struct fib6_info *rt;
+	unsigned int flags;
+	unsigned int skip;
+	unsigned int count;
+};
+
+static int rt6_nh_dump_exceptions(struct fib6_nh *nh, void *arg)
+{
+	struct fib6_nh_exception_dump_walker *w = arg;
+	struct rt6_rtnl_dump_arg *dump = w->dump;
+	struct rt6_exception_bucket *bucket;
+	struct rt6_exception *rt6_ex;
+	int i, err;
+
+	bucket = fib6_nh_get_excptn_bucket(nh, NULL);
+	if (!bucket)
+		return 0;
+
+	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
+			if (w->skip) {
+				w->skip--;
+				continue;
+			}
+
+			/* Expiration of entries doesn't bump sernum, insertion
+			 * does. Removal is triggered by insertion, so we can
+			 * rely on the fact that if entries change between two
+			 * partial dumps, this node is scanned again completely,
+			 * see rt6_insert_exception() and fib6_dump_table().
+			 *
+			 * Count expired entries we go through as handled
+			 * entries that we'll skip next time, in case of partial
+			 * node dump. Otherwise, if entries expire meanwhile,
+			 * we'll skip the wrong amount.
+			 */
+			if (rt6_check_expired(rt6_ex->rt6i)) {
+				w->count++;
+				continue;
+			}
+
+			err = rt6_fill_node(dump->net, dump->skb, w->rt,
+					    &rt6_ex->rt6i->dst, NULL, NULL, 0,
+					    RTM_NEWROUTE,
+					    NETLINK_CB(dump->cb->skb).portid,
+					    dump->cb->nlh->nlmsg_seq, w->flags);
+			if (err)
+				return err;
+
+			w->count++;
+		}
+		bucket++;
+	}
+
+	return 0;
+}
+
+/* Return -1 if done with node, number of handled routes on partial dump */
+int rt6_dump_route(struct fib6_info *rt, void *p_arg, unsigned int skip)
+{
+	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
+	struct fib_dump_filter *filter = &arg->filter;
+	unsigned int flags = NLM_F_MULTI;
+	struct net *net = arg->net;
+	int count = 0;
+
+	if (rt == net->ipv6.fib6_null_entry)
+		return -1;
+
+	if ((filter->flags & RTM_F_PREFIX) &&
+	    !(rt->fib6_flags & RTF_PREFIX_RT)) {
+		/* success since this is not a prefix route */
+		return -1;
+	}
+	if (filter->filter_set &&
+	    ((filter->rt_type  && rt->fib6_type != filter->rt_type) ||
+	     (filter->dev      && !fib6_info_uses_dev(rt, filter->dev)) ||
+	     (filter->protocol && rt->fib6_protocol != filter->protocol))) {
+		return -1;
+	}
+
+	if (filter->filter_set ||
+	    !filter->dump_routes || !filter->dump_exceptions) {
+		flags |= NLM_F_DUMP_FILTERED;
+	}
+
+	if (filter->dump_routes) {
+		if (skip) {
+			skip--;
+		} else {
+			if (rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL,
+					  0, RTM_NEWROUTE,
+					  NETLINK_CB(arg->cb->skb).portid,
+					  arg->cb->nlh->nlmsg_seq, flags)) {
+				return 0;
+			}
+			count++;
+		}
+	}
+
+	if (filter->dump_exceptions) {
+		struct fib6_nh_exception_dump_walker w = { .dump = arg,
+							   .rt = rt,
+							   .flags = flags,
+							   .skip = skip,
+							   .count = 0 };
+		int err;
+
+		rcu_read_lock();
+		if (rt->nh) {
+			err = nexthop_for_each_fib6_nh(rt->nh,
+						       rt6_nh_dump_exceptions,
+						       &w);
+		} else {
+			err = rt6_nh_dump_exceptions(rt->fib6_nh, &w);
+		}
+		rcu_read_unlock();
+
+		if (err)
+			return count += w.count;
+	}
+
+	return -1;
+}
+
+static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
+					const struct nlmsghdr *nlh,
+					struct nlattr **tb,
+					struct netlink_ext_ack *extack)
+{
+	struct rtmsg *rtm;
+	int i, err;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Invalid header for get route request");
+		return -EINVAL;
+	}
+
+	if (!netlink_strict_get_check(skb))
+		return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
+					      rtm_ipv6_policy, extack);
+
+	rtm = nlmsg_data(nlh);
+	if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
+	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
+	    rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
+	    rtm->rtm_type) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
+		return -EINVAL;
+	}
+	if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Invalid flags for get route request");
+		return -EINVAL;
+	}
+
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
+					    rtm_ipv6_policy, extack);
+	if (err)
+		return err;
+
+	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
+	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
+		NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
+		return -EINVAL;
+	}
+
+	for (i = 0; i <= RTA_MAX; i++) {
+		if (!tb[i])
+			continue;
+
+		switch (i) {
+		case RTA_SRC:
+		case RTA_DST:
+		case RTA_IIF:
+		case RTA_OIF:
+		case RTA_MARK:
+		case RTA_UID:
+		case RTA_SPORT:
+		case RTA_DPORT:
+		case RTA_IP_PROTO:
+			break;
+		default:
+			NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
 }
 
 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
@@ -4854,16 +5768,14 @@
 	struct rt6_info *rt;
 	struct sk_buff *skb;
 	struct rtmsg *rtm;
-	struct flowi6 fl6;
+	struct flowi6 fl6 = {};
 	bool fibmatch;
 
-	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
-			  extack);
+	err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
 	if (err < 0)
 		goto errout;
 
 	err = -EINVAL;
-	memset(&fl6, 0, sizeof(fl6));
 	rtm = nlmsg_data(nlh);
 	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
 	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
@@ -4905,7 +5817,8 @@
 
 	if (tb[RTA_IP_PROTO]) {
 		err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
-						  &fl6.flowi6_proto, extack);
+						  &fl6.flowi6_proto, AF_INET6,
+						  extack);
 		if (err)
 			goto errout;
 	}
@@ -4962,16 +5875,20 @@
 
 	rcu_read_lock();
 	from = rcu_dereference(rt->from);
-
-	if (fibmatch)
-		err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
-				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
-				    nlh->nlmsg_seq, 0);
-	else
-		err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
-				    &fl6.saddr, iif, RTM_NEWROUTE,
-				    NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
-				    0);
+	if (from) {
+		if (fibmatch)
+			err = rt6_fill_node(net, skb, from, NULL, NULL, NULL,
+					    iif, RTM_NEWROUTE,
+					    NETLINK_CB(in_skb).portid,
+					    nlh->nlmsg_seq, 0);
+		else
+			err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
+					    &fl6.saddr, iif, RTM_NEWROUTE,
+					    NETLINK_CB(in_skb).portid,
+					    nlh->nlmsg_seq, 0);
+	} else {
+		err = -ENETUNREACH;
+	}
 	rcu_read_unlock();
 
 	if (err < 0) {
@@ -5015,6 +5932,38 @@
 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
 }
 
+void fib6_rt_update(struct net *net, struct fib6_info *rt,
+		    struct nl_info *info)
+{
+	u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
+	struct sk_buff *skb;
+	int err = -ENOBUFS;
+
+	/* call_fib6_entry_notifiers will be removed when in-kernel notifier
+	 * is implemented and supported for nexthop objects
+	 */
+	call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, rt, NULL);
+
+	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
+	if (!skb)
+		goto errout;
+
+	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
+			    RTM_NEWROUTE, info->portid, seq, NLM_F_REPLACE);
+	if (err < 0) {
+		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(skb);
+		goto errout;
+	}
+	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
+		    info->nlh, gfp_any());
+	return;
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
+}
+
 static int ip6_route_dev_notify(struct notifier_block *this,
 				unsigned long event, void *ptr)
 {
@@ -5025,7 +5974,7 @@
 		return NOTIFY_OK;
 
 	if (event == NETDEV_REGISTER) {
-		net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
+		net->ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = dev;
 		net->ipv6.ip6_null_entry->dst.dev = dev;
 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
@@ -5078,17 +6027,21 @@
 {
 	struct net *net;
 	int delay;
+	int ret;
 	if (!write)
 		return -EINVAL;
 
 	net = (struct net *)ctl->extra1;
 	delay = net->ipv6.sysctl.flush_delay;
-	proc_dointvec(ctl, write, buffer, lenp, ppos);
+	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+	if (ret)
+		return ret;
+
 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
 	return 0;
 }
 
-struct ctl_table ipv6_route_table_template[] = {
+static struct ctl_table ipv6_route_table_template[] = {
 	{
 		.procname	=	"flush",
 		.data		=	&init_net.ipv6.sysctl.flush_delay,
@@ -5159,6 +6112,15 @@
 		.mode		=	0644,
 		.proc_handler	=	proc_dointvec_ms_jiffies,
 	},
+	{
+		.procname	=	"skip_notify_on_dev_down",
+		.data		=	&init_net.ipv6.sysctl.skip_notify_on_dev_down,
+		.maxlen		=	sizeof(int),
+		.mode		=	0644,
+		.proc_handler	=	proc_dointvec_minmax,
+		.extra1		=	SYSCTL_ZERO,
+		.extra2		=	SYSCTL_ONE,
+	},
 	{ }
 };
 
@@ -5182,6 +6144,7 @@
 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
+		table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
 
 		/* Don't export sysctls to unprivileged users */
 		if (net->user_ns != &init_user_ns)
@@ -5202,11 +6165,11 @@
 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
 		goto out_ip6_dst_ops;
 
-	net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
-					    sizeof(*net->ipv6.fib6_null_entry),
-					    GFP_KERNEL);
+	net->ipv6.fib6_null_entry = fib6_info_alloc(GFP_KERNEL, true);
 	if (!net->ipv6.fib6_null_entry)
 		goto out_ip6_dst_entries;
+	memcpy(net->ipv6.fib6_null_entry, &fib6_null_entry_template,
+	       sizeof(*net->ipv6.fib6_null_entry));
 
 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
 					   sizeof(*net->ipv6.ip6_null_entry),
@@ -5216,6 +6179,7 @@
 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
 			 ip6_template_metrics, true);
+	INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->rt6i_uncached);
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	net->ipv6.fib6_has_custom_rules = false;
@@ -5227,6 +6191,7 @@
 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
 			 ip6_template_metrics, true);
+	INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->rt6i_uncached);
 
 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
@@ -5236,6 +6201,7 @@
 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
 			 ip6_template_metrics, true);
+	INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->rt6i_uncached);
 #endif
 
 	net->ipv6.sysctl.flush_delay = 0;
@@ -5246,6 +6212,7 @@
 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
+	net->ipv6.sysctl.skip_notify_on_dev_down = 0;
 
 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
 
@@ -5342,7 +6309,7 @@
 	/* Registering of the loopback is done before this portion of code,
 	 * the loopback reference in rt6_info will not be taken, do it
 	 * manually for init_net */
-	init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
+	init_net.ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = init_net.loopback_dev;
 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
   #ifdef CONFIG_IPV6_MULTIPLE_TABLES

diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 8d0ba75..75421a4 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c

@@ -1,14 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  SR-IPv6 implementation
  *
  *  Author:
  *  David Lebrun <david.lebrun@uclouvain.be>
- *
- *
- *  This program is free software; you can redistribute it and/or
- *	  modify it under the terms of the GNU General Public License
- *	  as published by the Free Software Foundation; either version
- *	  2 of the License, or (at your option) any later version.
  */
 
 #include <linux/errno.h>
@@ -221,9 +216,7 @@
 	rcu_read_unlock();
 
 	genlmsg_end(msg, hdr);
-	genlmsg_reply(msg, info);
-
-	return 0;
+	return genlmsg_reply(msg, info);
 
 nla_put_failure:
 	rcu_read_unlock();
@@ -400,28 +393,28 @@
 static const struct genl_ops seg6_genl_ops[] = {
 	{
 		.cmd	= SEG6_CMD_SETHMAC,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit	= seg6_genl_sethmac,
-		.policy	= seg6_genl_policy,
 		.flags	= GENL_ADMIN_PERM,
 	},
 	{
 		.cmd	= SEG6_CMD_DUMPHMAC,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.start	= seg6_genl_dumphmac_start,
 		.dumpit	= seg6_genl_dumphmac,
 		.done	= seg6_genl_dumphmac_done,
-		.policy	= seg6_genl_policy,
 		.flags	= GENL_ADMIN_PERM,
 	},
 	{
 		.cmd	= SEG6_CMD_SET_TUNSRC,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit	= seg6_genl_set_tunsrc,
-		.policy	= seg6_genl_policy,
 		.flags	= GENL_ADMIN_PERM,
 	},
 	{
 		.cmd	= SEG6_CMD_GET_TUNSRC,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit	= seg6_genl_get_tunsrc,
-		.policy = seg6_genl_policy,
 		.flags	= GENL_ADMIN_PERM,
 	},
 };
@@ -431,6 +424,7 @@
 	.name		= SEG6_GENL_NAME,
 	.version	= SEG6_GENL_VERSION,
 	.maxattr	= SEG6_ATTR_MAX,
+	.policy = seg6_genl_policy,
 	.netnsok	= true,
 	.parallel_ops	= true,
 	.ops		= seg6_genl_ops,

diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index 8546f94..ffcfcd2 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c

@@ -1,14 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  SR-IPv6 implementation -- HMAC functions
  *
  *  Author:
  *  David Lebrun <david.lebrun@uclouvain.be>
- *
- *
- *  This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 #include <linux/errno.h>

diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 8181ee7..ab7f124 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c

@@ -1,14 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  SR-IPv6 implementation
  *
  *  Author:
  *  David Lebrun <david.lebrun@uclouvain.be>
- *
- *
- *  This program is free software; you can redistribute it and/or
- *        modify it under the terms of the GNU General Public License
- *        as published by the Free Software Foundation; either version
- *        2 of the License, or (at your option) any later version.
  */
 
 #include <linux/types.h>
@@ -146,6 +141,8 @@
 	} else {
 		ip6_flow_hdr(hdr, 0, flowlabel);
 		hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
+
+		memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
 	}
 
 	hdr->nexthdr = NEXTHDR_ROUTING;
@@ -394,8 +391,8 @@
 	if (family != AF_INET && family != AF_INET6)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla,
-			       seg6_iptunnel_policy, extack);
+	err = nla_parse_nested_deprecated(tb, SEG6_IPTUNNEL_MAX, nla,
+					  seg6_iptunnel_policy, extack);
 
 	if (err < 0)
 		return err;

diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 60325db..e705674 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c

@@ -1,15 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  SR-IPv6 implementation
  *
  *  Authors:
  *  David Lebrun <david.lebrun@uclouvain.be>
  *  eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com>
- *
- *
- *  This program is free software; you can redistribute it and/or
- *        modify it under the terms of the GNU General Public License
- *        as published by the Free Software Foundation; either version
- *        2 of the License, or (at your option) any later version.
  */
 
 #include <linux/types.h>
@@ -86,6 +81,11 @@
 	if (!pskb_may_pull(skb, srhoff + len))
 		return NULL;
 
+	/* note that pskb_may_pull may change pointers in header;
+	 * for this reason it is necessary to reload them when needed.
+	 */
+	srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+
 	if (!seg6_validate_srh(srh, len))
 		return NULL;
 
@@ -341,6 +341,8 @@
 	if (!ipv6_addr_any(&slwt->nh6))
 		nhaddr = &slwt->nh6;
 
+	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
 	seg6_lookup_nexthop(skb, nhaddr, 0);
 
 	return dst_input(skb);
@@ -370,6 +372,8 @@
 
 	skb_dst_drop(skb);
 
+	skb_set_transport_header(skb, sizeof(struct iphdr));
+
 	err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
 	if (err)
 		goto drop;
@@ -390,6 +394,8 @@
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 		goto drop;
 
+	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
 	seg6_lookup_nexthop(skb, NULL, slwt->table);
 
 	return dst_input(skb);
@@ -823,8 +829,9 @@
 	int ret;
 	u32 fd;
 
-	ret = nla_parse_nested(tb, SEG6_LOCAL_BPF_PROG_MAX,
-			       attrs[SEG6_LOCAL_BPF], bpf_prog_policy, NULL);
+	ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_BPF_PROG_MAX,
+					  attrs[SEG6_LOCAL_BPF],
+					  bpf_prog_policy, NULL);
 	if (ret < 0)
 		return ret;
 
@@ -853,7 +860,7 @@
 	if (!slwt->bpf.prog)
 		return 0;
 
-	nest = nla_nest_start(skb, SEG6_LOCAL_BPF);
+	nest = nla_nest_start_noflag(skb, SEG6_LOCAL_BPF);
 	if (!nest)
 		return -EMSGSIZE;
 
@@ -959,8 +966,8 @@
 	if (family != AF_INET6)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, SEG6_LOCAL_MAX, nla, seg6_local_policy,
-			       extack);
+	err = nla_parse_nested_deprecated(tb, SEG6_LOCAL_MAX, nla,
+					  seg6_local_policy, extack);
 
 	if (err < 0)
 		return err;

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index e9400ff..b2ccbc4 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	IPv6 over IPv4 tunnel device - Simple Internet Transition (SIT)
  *	Linux INET6 implementation
@@ -6,11 +7,6 @@
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
  *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
  *	Changes:
  * Roger Venning <r.venning@telstra.com>:	6to4 support
  * Nate Thompson <nate@thebog.net>:		6to4 support
@@ -534,19 +530,20 @@
 
 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
-				 t->parms.link, 0, iph->protocol, 0);
+				 t->parms.link, iph->protocol);
 		err = 0;
 		goto out;
 	}
 	if (type == ICMP_REDIRECT) {
-		ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
-			      iph->protocol, 0);
+		ipv4_redirect(skb, dev_net(skb->dev), t->parms.link,
+			      iph->protocol);
 		err = 0;
 		goto out;
 	}
 
 	err = 0;
-	if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len))
+	if (__in6_dev_get(skb->dev) &&
+	    !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len))
 		goto out;
 
 	if (t->parms.iph.daddr == 0)
@@ -668,6 +665,10 @@
 		    !net_eq(tunnel->net, dev_net(tunnel->dev))))
 			goto out;
 
+		/* skb can be uncloned in iptunnel_pull_header, so
+		 * old iph is no longer valid
+		 */
+		iph = (const struct iphdr *)skb_mac_header(skb);
 		err = IP_ECN_decapsulate(iph, skb);
 		if (unlikely(err)) {
 			if (log_ecn_error)
@@ -777,8 +778,9 @@
 		pbw0 = tunnel->ip6rd.prefixlen >> 5;
 		pbi0 = tunnel->ip6rd.prefixlen & 0x1f;
 
-		d = (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >>
-		    tunnel->ip6rd.relay_prefixlen;
+		d = tunnel->ip6rd.relay_prefixlen < 32 ?
+			(ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >>
+		    tunnel->ip6rd.relay_prefixlen : 0;
 
 		pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen;
 		if (pbi1 > 0)
@@ -898,12 +900,17 @@
 			   RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPV6,
 			   0, dst, tiph->saddr, 0, 0,
 			   sock_net_uid(tunnel->net, NULL));
-	rt = ip_route_output_flow(tunnel->net, &fl4, NULL);
 
-	if (IS_ERR(rt)) {
-		dev->stats.tx_carrier_errors++;
-		goto tx_error_icmp;
+	rt = dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr);
+	if (!rt) {
+		rt = ip_route_output_flow(tunnel->net, &fl4, NULL);
+		if (IS_ERR(rt)) {
+			dev->stats.tx_carrier_errors++;
+			goto tx_error_icmp;
+		}
+		dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, fl4.saddr);
 	}
+
 	if (rt->rt_type != RTN_UNICAST) {
 		ip_rt_put(rt);
 		dev->stats.tx_carrier_errors++;
@@ -1021,6 +1028,9 @@
 static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
 				   struct net_device *dev)
 {
+	if (!pskb_inet_may_pull(skb))
+		goto tx_err;
+
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
 		sit_tunnel_xmit__(skb, dev, IPPROTO_IPIP);
@@ -1075,7 +1085,7 @@
 	if (!tdev && tunnel->parms.link)
 		tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
 
-	if (tdev) {
+	if (tdev && !netif_is_l3_master(tdev)) {
 		int t_hlen = tunnel->hlen + sizeof(struct iphdr);
 
 		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
@@ -1869,6 +1879,7 @@
 
 err_reg_dev:
 	ipip6_dev_free(sitn->fb_tunnel_dev);
+	free_netdev(sitn->fb_tunnel_dev);
 err_alloc_dev:
 	return err;
 }

diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index e997141..16632e0 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  IPv6 Syncookies implementation for the Linux kernel
  *
@@ -6,12 +7,6 @@
  *
  *  Based on IPv4 implementation by Andi Kleen
  *  linux/net/ipv4/syncookies.c
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
  */
 
 #include <linux/tcp.h>

diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index e15cd37..ec8fcfc 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c

@@ -21,8 +21,7 @@
 #include <net/calipso.h>
 #endif
 
-static int zero;
-static int one = 1;
+static int flowlabel_reflect_max = 0x7;
 static int auto_flowlabels_min;
 static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
 
@@ -113,7 +112,9 @@
 		.data		= &init_net.ipv6.sysctl.flowlabel_reflect,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &flowlabel_reflect_max,
 	},
 	{
 		.procname	= "max_dst_opts_number",
@@ -149,8 +150,8 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler   = proc_rt6_multipath_hash_policy,
-		.extra1		= &zero,
-		.extra2		= &one,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
 	},
 	{
 		.procname	= "seg6_flowlabel",
@@ -176,7 +177,7 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one
+		.extra1		= SYSCTL_ONE
 	},
 #ifdef CONFIG_NETLABEL
 	{

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 03e6b7a..4804b6d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	TCP over IPv6
  *	Linux INET6 implementation
@@ -16,11 +17,6 @@
  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
  *					a single port at the same time.
  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 #include <linux/bottom_half.h>
@@ -43,6 +39,7 @@
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
 #include <linux/random.h>
+#include <linux/indirect_call_wrapper.h>
 
 #include <net/tcp.h>
 #include <net/ndisc.h>
@@ -90,6 +87,18 @@
 }
 #endif
 
+/* Helper returning the inet6 address from a given tcp socket.
+ * It can be used in TCP stack instead of inet6_sk(sk).
+ * This avoids a dereference and allow compiler optimizations.
+ * It is a specialized version of inet6_sk_generic().
+ */
+static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
+{
+	unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
+
+	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
+}
+
 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
@@ -99,7 +108,7 @@
 
 		sk->sk_rx_dst = dst;
 		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
-		inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
+		tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
 	}
 }
 
@@ -138,7 +147,7 @@
 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
 	struct inet_sock *inet = inet_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
-	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct in6_addr *saddr = NULL, *final_p, final;
 	struct ipv6_txoptions *opt;
@@ -162,7 +171,7 @@
 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
 			struct ip6_flowlabel *flowlabel;
 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
-			if (!flowlabel)
+			if (IS_ERR(flowlabel))
 				return -EINVAL;
 			fl6_sock_release(flowlabel);
 		}
@@ -206,7 +215,7 @@
 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
 		tp->rx_opt.ts_recent = 0;
 		tp->rx_opt.ts_recent_stamp = 0;
-		tp->write_seq = 0;
+		WRITE_ONCE(tp->write_seq, 0);
 	}
 
 	sk->sk_v6_daddr = usin->sin6_addr;
@@ -220,8 +229,6 @@
 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
 		struct sockaddr_in sin;
 
-		SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
-
 		if (__ipv6_only_sock(sk))
 			return -ENETUNREACH;
 
@@ -304,10 +311,11 @@
 
 	if (likely(!tp->repair)) {
 		if (!tp->write_seq)
-			tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32,
-							 sk->sk_v6_daddr.s6_addr32,
-							 inet->inet_sport,
-							 inet->inet_dport);
+			WRITE_ONCE(tp->write_seq,
+				   secure_tcpv6_seq(np->saddr.s6_addr32,
+						    sk->sk_v6_daddr.s6_addr32,
+						    inet->inet_sport,
+						    inet->inet_dport));
 		tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
 						   np->saddr.s6_addr32,
 						   sk->sk_v6_daddr.s6_addr32);
@@ -349,7 +357,7 @@
 	}
 }
 
-static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		u8 type, u8 code, int offset, __be32 info)
 {
 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
@@ -371,17 +379,19 @@
 	if (!sk) {
 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
 				  ICMP6_MIB_INERRORS);
-		return;
+		return -ENOENT;
 	}
 
 	if (sk->sk_state == TCP_TIME_WAIT) {
 		inet_twsk_put(inet_twsk(sk));
-		return;
+		return 0;
 	}
 	seq = ntohl(th->seq);
 	fatal = icmpv6_err_convert(type, code, &err);
-	if (sk->sk_state == TCP_NEW_SYN_RECV)
-		return tcp_req_err(sk, seq, fatal);
+	if (sk->sk_state == TCP_NEW_SYN_RECV) {
+		tcp_req_err(sk, seq, fatal);
+		return 0;
+	}
 
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
@@ -390,14 +400,14 @@
 	if (sk->sk_state == TCP_CLOSE)
 		goto out;
 
-	if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
+	if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
 		goto out;
 	}
 
 	tp = tcp_sk(sk);
 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
-	fastopen = tp->fastopen_rsk;
+	fastopen = rcu_dereference(tp->fastopen_rsk);
 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
 	if (sk->sk_state != TCP_LISTEN &&
 	    !between(seq, snd_una, tp->snd_nxt)) {
@@ -405,7 +415,7 @@
 		goto out;
 	}
 
-	np = inet6_sk(sk);
+	np = tcp_inet6_sk(sk);
 
 	if (type == NDISC_REDIRECT) {
 		if (!sock_owned_by_user(sk)) {
@@ -467,6 +477,7 @@
 out:
 	bh_unlock_sock(sk);
 	sock_put(sk);
+	return 0;
 }
 
 
@@ -477,7 +488,7 @@
 			      enum tcp_synack_type synack_type)
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
-	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
 	struct ipv6_txoptions *opt;
 	struct flowi6 *fl6 = &fl->u.ip6;
 	struct sk_buff *skb;
@@ -502,7 +513,8 @@
 		opt = ireq->ipv6_opt;
 		if (!opt)
 			opt = rcu_dereference(np->opt);
-		err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass);
+		err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass,
+			       sk->sk_priority);
 		rcu_read_unlock();
 		err = net_xmit_eval(err);
 	}
@@ -734,14 +746,15 @@
 			    const struct sock *sk_listener,
 			    struct sk_buff *skb)
 {
+	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
 	struct inet_request_sock *ireq = inet_rsk(req);
-	const struct ipv6_pinfo *np = inet6_sk(sk_listener);
+	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
 
 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
 
 	/* So that link locals have meaning */
-	if (!sk_listener->sk_bound_dev_if &&
+	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
 		ireq->ir_iif = tcp_v6_iif(skb);
 
@@ -792,7 +805,7 @@
 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
 				 int oif, struct tcp_md5sig_key *key, int rst,
-				 u8 tclass, __be32 label)
+				 u8 tclass, __be32 label, u32 priority)
 {
 	const struct tcphdr *th = tcp_hdr(skb);
 	struct tcphdr *t1;
@@ -872,9 +885,17 @@
 		fl6.flowi6_oif = oif;
 	}
 
-	if (sk)
-		mark = (sk->sk_state == TCP_TIME_WAIT) ?
-			inet_twsk(sk)->tw_mark : sk->sk_mark;
+	if (sk) {
+		if (sk->sk_state == TCP_TIME_WAIT) {
+			mark = inet_twsk(sk)->tw_mark;
+			/* autoflowlabel relies on buff->hash */
+			skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
+				     PKT_HASH_TYPE_L4);
+		} else {
+			mark = sk->sk_mark;
+		}
+		buff->tstamp = tcp_transmit_time(sk);
+	}
 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
 	fl6.fl6_dport = t1->dest;
 	fl6.fl6_sport = t1->source;
@@ -888,7 +909,8 @@
 	dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
 	if (!IS_ERR(dst)) {
 		skb_dst_set(buff, dst);
-		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
+		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass,
+			 priority);
 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
 		if (rst)
 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
@@ -901,15 +923,18 @@
 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
 {
 	const struct tcphdr *th = tcp_hdr(skb);
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	u32 seq = 0, ack_seq = 0;
 	struct tcp_md5sig_key *key = NULL;
 #ifdef CONFIG_TCP_MD5SIG
 	const __u8 *hash_location = NULL;
-	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	unsigned char newhash[16];
 	int genhash;
 	struct sock *sk1 = NULL;
 #endif
+	__be32 label = 0;
+	u32 priority = 0;
+	struct net *net;
 	int oif = 0;
 
 	if (th->rst)
@@ -921,6 +946,7 @@
 	if (!sk && !ipv6_unicast_destination(skb))
 		return;
 
+	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
 #ifdef CONFIG_TCP_MD5SIG
 	rcu_read_lock();
 	hash_location = tcp_parse_md5sig_option(th);
@@ -934,7 +960,7 @@
 		 * Incoming packet is checked with md5 hash with finding key,
 		 * no RST generated if md5 hash doesn't match.
 		 */
-		sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
+		sk1 = inet6_lookup_listener(net,
 					   &tcp_hashinfo, NULL, 0,
 					   &ipv6h->saddr,
 					   th->source, &ipv6h->daddr,
@@ -962,11 +988,25 @@
 
 	if (sk) {
 		oif = sk->sk_bound_dev_if;
-		if (sk_fullsock(sk))
+		if (sk_fullsock(sk)) {
+			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
+
 			trace_tcp_send_reset(sk, skb);
+			if (np->repflow)
+				label = ip6_flowlabel(ipv6h);
+			priority = sk->sk_priority;
+		}
+		if (sk->sk_state == TCP_TIME_WAIT) {
+			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
+			priority = inet_twsk(sk)->tw_priority;
+		}
+	} else {
+		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
+			label = ip6_flowlabel(ipv6h);
 	}
 
-	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
+	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0,
+			     label, priority);
 
 #ifdef CONFIG_TCP_MD5SIG
 out:
@@ -977,10 +1017,10 @@
 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
 			    struct tcp_md5sig_key *key, u8 tclass,
-			    __be32 label)
+			    __be32 label, u32 priority)
 {
 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
-			     tclass, label);
+			     tclass, label, priority);
 }
 
 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
@@ -992,7 +1032,7 @@
 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
-			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
+			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
 
 	inet_twsk_put(tw);
 }
@@ -1015,7 +1055,7 @@
 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
 			req->ts_recent, sk->sk_bound_dev_if,
 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
-			0, 0);
+			0, 0, sk->sk_priority);
 }
 
 
@@ -1030,6 +1070,21 @@
 	return sk;
 }
 
+u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
+			 struct tcphdr *th, u32 *cookie)
+{
+	u16 mss = 0;
+#ifdef CONFIG_SYN_COOKIES
+	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
+				    &tcp_request_sock_ipv6_ops, sk, th);
+	if (mss) {
+		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
+		tcp_synq_overflow(sk);
+	}
+#endif
+	return mss;
+}
+
 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	if (skb->protocol == htons(ETH_P_IP))
@@ -1064,9 +1119,8 @@
 {
 	struct inet_request_sock *ireq;
 	struct ipv6_pinfo *newnp;
-	const struct ipv6_pinfo *np = inet6_sk(sk);
+	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
 	struct ipv6_txoptions *opt;
-	struct tcp6_sock *newtcp6sk;
 	struct inet_sock *newinet;
 	struct tcp_sock *newtp;
 	struct sock *newsk;
@@ -1086,11 +1140,10 @@
 		if (!newsk)
 			return NULL;
 
-		newtcp6sk = (struct tcp6_sock *)newsk;
-		inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
+		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
 
 		newinet = inet_sk(newsk);
-		newnp = inet6_sk(newsk);
+		newnp = tcp_inet6_sk(newsk);
 		newtp = tcp_sk(newsk);
 
 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
@@ -1108,11 +1161,11 @@
 		newnp->ipv6_fl_list = NULL;
 		newnp->pktoptions  = NULL;
 		newnp->opt	   = NULL;
-		newnp->mcast_oif   = tcp_v6_iif(skb);
-		newnp->mcast_hops  = ipv6_hdr(skb)->hop_limit;
-		newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
+		newnp->mcast_oif   = inet_iif(skb);
+		newnp->mcast_hops  = ip_hdr(skb)->ttl;
+		newnp->rcv_flowinfo = 0;
 		if (np->repflow)
-			newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
+			newnp->flow_label = 0;
 
 		/*
 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
@@ -1154,12 +1207,11 @@
 	ip6_dst_store(newsk, dst, NULL, NULL);
 	inet6_sk_rx_dst_set(newsk, skb);
 
-	newtcp6sk = (struct tcp6_sock *)newsk;
-	inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
+	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
 
 	newtp = tcp_sk(newsk);
 	newinet = inet_sk(newsk);
-	newnp = inet6_sk(newsk);
+	newnp = tcp_inet6_sk(newsk);
 
 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
@@ -1274,9 +1326,9 @@
  */
 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 {
-	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct tcp_sock *tp;
+	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
 	struct sk_buff *opt_skb = NULL;
+	struct tcp_sock *tp;
 
 	/* Imagine: socket is IPv6. IPv4 packet arrives,
 	   goes to IPv4 receive handler and backlogged.
@@ -1424,8 +1476,9 @@
 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
 }
 
-static int tcp_v6_rcv(struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
 {
+	struct sk_buff *skb_to_free;
 	int sdif = inet6_sdif(skb);
 	const struct tcphdr *th;
 	const struct ipv6hdr *hdr;
@@ -1522,7 +1575,7 @@
 			return 0;
 		}
 	}
-	if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
+	if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
 		goto discard_and_relse;
 	}
@@ -1552,12 +1605,17 @@
 	tcp_segs_in(tcp_sk(sk), skb);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
+		skb_to_free = sk->sk_rx_skb_cache;
+		sk->sk_rx_skb_cache = NULL;
 		ret = tcp_v6_do_rcv(sk, skb);
-	} else if (tcp_add_backlog(sk, skb)) {
-		goto discard_and_relse;
+	} else {
+		if (tcp_add_backlog(sk, skb))
+			goto discard_and_relse;
+		skb_to_free = NULL;
 	}
 	bh_unlock_sock(sk);
-
+	if (skb_to_free)
+		__kfree_skb(skb_to_free);
 put_and_return:
 	if (refcounted)
 		sock_put(sk);
@@ -1637,7 +1695,7 @@
 	goto discard_it;
 }
 
-static void tcp_v6_early_demux(struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
 {
 	const struct ipv6hdr *hdr;
 	const struct tcphdr *th;
@@ -1667,7 +1725,7 @@
 			struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
 
 			if (dst)
-				dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
+				dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
 			if (dst &&
 			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
 				skb_dst_set_noref(skb, dst);
@@ -1838,7 +1896,8 @@
 		/* Because we don't lock the socket,
 		 * we might find a transient negative value.
 		 */
-		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
+		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
+				      READ_ONCE(tp->copied_seq), 0);
 
 	seq_printf(seq,
 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
@@ -1849,7 +1908,7 @@
 		   dest->s6_addr32[0], dest->s6_addr32[1],
 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
 		   state,
-		   tp->write_seq - tp->snd_una,
+		   READ_ONCE(tp->write_seq) - tp->snd_una,
 		   rx_queue,
 		   timer_active,
 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
@@ -1860,7 +1919,7 @@
 		   refcount_read(&sp->sk_refcnt), sp,
 		   jiffies_to_clock_t(icsk->icsk_rto),
 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
-		   (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
+		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
 		   tp->snd_cwnd,
 		   state == TCP_LISTEN ?
 			fastopenq->max_qlen :

diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index e72947c..1796856 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c

@@ -1,22 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	IPV6 GSO/GRO offload support
  *	Linux INET6 implementation
  *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
  *      TCPv6 GSO/GRO support
  */
+#include <linux/indirect_call_wrapper.h>
 #include <linux/skbuff.h>
 #include <net/protocol.h>
 #include <net/tcp.h>
 #include <net/ip6_checksum.h>
 #include "ip6_offload.h"
 
-static struct sk_buff *tcp6_gro_receive(struct list_head *head,
-					struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE
+struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
 	/* Don't bother verifying checksum if we're going to flush anyway. */
 	if (!NAPI_GRO_CB(skb)->flush &&
@@ -29,7 +26,7 @@
 	return tcp_gro_receive(head, skb);
 }
 
-static int tcp6_gro_complete(struct sk_buff *skb, int thoff)
+INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
 {
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
 	struct tcphdr *th = tcp_hdr(skb);

diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index dae25ca..21e7b95 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c

@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C)2003,2004 USAGI/WIDE Project
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
  * Authors	Mitsuru KANDA  <mk@linux-ipv6.org>
  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  */
@@ -134,24 +122,28 @@
 	return 0;
 }
 
-static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			u8 type, u8 code, int offset, __be32 info)
 {
 	struct xfrm6_tunnel *handler;
 
 	for_each_tunnel_rcu(tunnel6_handlers, handler)
 		if (!handler->err_handler(skb, opt, type, code, offset, info))
-			break;
+			return 0;
+
+	return -ENOENT;
 }
 
-static void tunnel46_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int tunnel46_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			 u8 type, u8 code, int offset, __be32 info)
 {
 	struct xfrm6_tunnel *handler;
 
 	for_each_tunnel_rcu(tunnel46_handlers, handler)
 		if (!handler->err_handler(skb, opt, type, code, offset, info))
-			break;
+			return 0;
+
+	return -ENOENT;
 }
 
 static const struct inet6_protocol tunnel6_protocol = {

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b36694b..9fec580 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	UDP over IPv6
  *	Linux INET6 implementation
@@ -14,11 +15,6 @@
  *					a single port at the same time.
  *      Kazunori MIYAZAWA @USAGI:       change process style to use ip6_append_data
  *      YOSHIFUJI Hideaki @USAGI:	convert /proc/net/udp6 to seq_file.
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 #include <linux/errno.h>
@@ -36,6 +32,7 @@
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
+#include <linux/indirect_call_wrapper.h>
 
 #include <net/addrconf.h>
 #include <net/ndisc.h>
@@ -45,6 +42,7 @@
 #include <net/raw.h>
 #include <net/tcp_states.h>
 #include <net/ip6_checksum.h>
+#include <net/ip6_tunnel.h>
 #include <net/xfrm.h>
 #include <net/inet_hashtables.h>
 #include <net/inet6_hashtables.h>
@@ -56,16 +54,6 @@
 #include <trace/events/skb.h>
 #include "udp_impl.h"
 
-static bool udp6_lib_exact_dif_match(struct net *net, struct sk_buff *skb)
-{
-#if defined(CONFIG_NET_L3_MASTER_DEV)
-	if (!net->ipv4.sysctl_udp_l3mdev_accept &&
-	    skb && ipv6_l3mdev_skb(IP6CB(skb)->flags))
-		return true;
-#endif
-	return false;
-}
-
 static u32 udp6_ehashfn(const struct net *net,
 			const struct in6_addr *laddr,
 			const u16 lport,
@@ -101,7 +89,7 @@
 	return udp_lib_get_port(sk, snum, hash2_nulladdr);
 }
 
-static void udp_v6_rehash(struct sock *sk)
+void udp_v6_rehash(struct sock *sk)
 {
 	u16 new_hash = ipv6_portaddr_hash(sock_net(sk),
 					  &sk->sk_v6_rcv_saddr,
@@ -113,16 +101,20 @@
 static int compute_score(struct sock *sk, struct net *net,
 			 const struct in6_addr *saddr, __be16 sport,
 			 const struct in6_addr *daddr, unsigned short hnum,
-			 int dif, int sdif, bool exact_dif)
+			 int dif, int sdif)
 {
 	int score;
 	struct inet_sock *inet;
+	bool dev_match;
 
 	if (!net_eq(sock_net(sk), net) ||
 	    udp_sk(sk)->udp_port_hash != hnum ||
 	    sk->sk_family != PF_INET6)
 		return -1;
 
+	if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
+		return -1;
+
 	score = 0;
 	inet = inet_sk(sk);
 
@@ -132,29 +124,18 @@
 		score++;
 	}
 
-	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
-		if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
-			return -1;
-		score++;
-	}
-
 	if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
 		if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
 			return -1;
 		score++;
 	}
 
-	if (sk->sk_bound_dev_if || exact_dif) {
-		bool dev_match = (sk->sk_bound_dev_if == dif ||
-				  sk->sk_bound_dev_if == sdif);
+	dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif);
+	if (!dev_match)
+		return -1;
+	score++;
 
-		if (!dev_match)
-			return -1;
-		if (sk->sk_bound_dev_if)
-			score++;
-	}
-
-	if (sk->sk_incoming_cpu == raw_smp_processor_id())
+	if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
 		score++;
 
 	return score;
@@ -164,8 +145,8 @@
 static struct sock *udp6_lib_lookup2(struct net *net,
 		const struct in6_addr *saddr, __be16 sport,
 		const struct in6_addr *daddr, unsigned int hnum,
-		int dif, int sdif, bool exact_dif,
-		struct udp_hslot *hslot2, struct sk_buff *skb)
+		int dif, int sdif, struct udp_hslot *hslot2,
+		struct sk_buff *skb)
 {
 	struct sock *sk, *result;
 	int score, badness;
@@ -175,15 +156,16 @@
 	badness = -1;
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
 		score = compute_score(sk, net, saddr, sport,
-				      daddr, hnum, dif, sdif, exact_dif);
+				      daddr, hnum, dif, sdif);
 		if (score > badness) {
-			if (sk->sk_reuseport) {
+			if (sk->sk_reuseport &&
+			    sk->sk_state != TCP_ESTABLISHED) {
 				hash = udp6_ehashfn(net, daddr, hnum,
 						    saddr, sport);
 
 				result = reuseport_select_sock(sk, hash, skb,
 							sizeof(struct udphdr));
-				if (result)
+				if (result && !reuseport_has_conns(sk, false))
 					return result;
 			}
 			result = sk;
@@ -200,66 +182,30 @@
 			       int dif, int sdif, struct udp_table *udptable,
 			       struct sk_buff *skb)
 {
-	struct sock *sk, *result;
 	unsigned short hnum = ntohs(dport);
-	unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
-	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
-	bool exact_dif = udp6_lib_exact_dif_match(net, skb);
-	int score, badness;
-	u32 hash = 0;
+	unsigned int hash2, slot2;
+	struct udp_hslot *hslot2;
+	struct sock *result;
 
-	if (hslot->count > 10) {
-		hash2 = ipv6_portaddr_hash(net, daddr, hnum);
+	hash2 = ipv6_portaddr_hash(net, daddr, hnum);
+	slot2 = hash2 & udptable->mask;
+	hslot2 = &udptable->hash2[slot2];
+
+	result = udp6_lib_lookup2(net, saddr, sport,
+				  daddr, hnum, dif, sdif,
+				  hslot2, skb);
+	if (!result) {
+		hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
 		slot2 = hash2 & udptable->mask;
+
 		hslot2 = &udptable->hash2[slot2];
-		if (hslot->count < hslot2->count)
-			goto begin;
 
 		result = udp6_lib_lookup2(net, saddr, sport,
-					  daddr, hnum, dif, sdif, exact_dif,
+					  &in6addr_any, hnum, dif, sdif,
 					  hslot2, skb);
-		if (!result) {
-			unsigned int old_slot2 = slot2;
-			hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
-			slot2 = hash2 & udptable->mask;
-			/* avoid searching the same slot again. */
-			if (unlikely(slot2 == old_slot2))
-				return result;
-
-			hslot2 = &udptable->hash2[slot2];
-			if (hslot->count < hslot2->count)
-				goto begin;
-
-			result = udp6_lib_lookup2(net, saddr, sport,
-						  daddr, hnum, dif, sdif,
-						  exact_dif, hslot2,
-						  skb);
-		}
-		if (unlikely(IS_ERR(result)))
-			return NULL;
-		return result;
 	}
-begin:
-	result = NULL;
-	badness = -1;
-	sk_for_each_rcu(sk, &hslot->head) {
-		score = compute_score(sk, net, saddr, sport, daddr, hnum, dif,
-				      sdif, exact_dif);
-		if (score > badness) {
-			if (sk->sk_reuseport) {
-				hash = udp6_ehashfn(net, daddr, hnum,
-						    saddr, sport);
-				result = reuseport_select_sock(sk, hash, skb,
-							sizeof(struct udphdr));
-				if (unlikely(IS_ERR(result)))
-					return NULL;
-				if (result)
-					return result;
-			}
-			result = sk;
-			badness = score;
-		}
-	}
+	if (IS_ERR(result))
+		return NULL;
 	return result;
 }
 EXPORT_SYMBOL_GPL(__udp6_lib_lookup);
@@ -282,7 +228,7 @@
 
 	return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
 				 &iph->daddr, dport, inet6_iif(skb),
-				 inet6_sdif(skb), &udp_table, skb);
+				 inet6_sdif(skb), &udp_table, NULL);
 }
 EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb);
 
@@ -325,9 +271,9 @@
 	struct inet_sock *inet = inet_sk(sk);
 	struct sk_buff *skb;
 	unsigned int ulen, copied;
-	int peeked, peeking, off;
-	int err;
+	int off, err, peeking = flags & MSG_PEEK;
 	int is_udplite = IS_UDPLITE(sk);
+	struct udp_mib __percpu *mib;
 	bool checksum_valid = false;
 	int is_udp4;
 
@@ -338,9 +284,8 @@
 		return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
 
 try_again:
-	peeking = flags & MSG_PEEK;
 	off = sk_peek_offset(sk, flags);
-	skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
+	skb = __skb_recv_udp(sk, flags, noblock, &off, &err);
 	if (!skb)
 		return err;
 
@@ -352,6 +297,7 @@
 		msg->msg_flags |= MSG_TRUNC;
 
 	is_udp4 = (skb->protocol == htons(ETH_P_IP));
+	mib = __UDPX_MIB(sk, is_udp4);
 
 	/*
 	 * If checksum is needed at all, try to do it while copying the
@@ -378,26 +324,15 @@
 			goto csum_copy_err;
 	}
 	if (unlikely(err)) {
-		if (!peeked) {
+		if (!peeking) {
 			atomic_inc(&sk->sk_drops);
-			if (is_udp4)
-				UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
-					      is_udplite);
-			else
-				UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
-					       is_udplite);
+			SNMP_INC_STATS(mib, UDP_MIB_INERRORS);
 		}
 		kfree_skb(skb);
 		return err;
 	}
-	if (!peeked) {
-		if (is_udp4)
-			UDP_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS,
-				      is_udplite);
-		else
-			UDP6_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS,
-				       is_udplite);
-	}
+	if (!peeking)
+		SNMP_INC_STATS(mib, UDP_MIB_INDATAGRAMS);
 
 	sock_recv_ts_and_drops(msg, sk, skb);
 
@@ -419,8 +354,15 @@
 						    inet6_iif(skb));
 		}
 		*addr_len = sizeof(*sin6);
+
+		if (cgroup_bpf_enabled)
+			BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk,
+						(struct sockaddr *)sin6);
 	}
 
+	if (udp_sk(sk)->gro_enabled)
+		udp_cmsg_recv(msg, sk, skb);
+
 	if (np->rxopt.all)
 		ip6_datagram_recv_common_ctl(sk, msg, skb);
 
@@ -443,17 +385,8 @@
 csum_copy_err:
 	if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags,
 				 udp_skb_destructor)) {
-		if (is_udp4) {
-			UDP_INC_STATS(sock_net(sk),
-				      UDP_MIB_CSUMERRORS, is_udplite);
-			UDP_INC_STATS(sock_net(sk),
-				      UDP_MIB_INERRORS, is_udplite);
-		} else {
-			UDP6_INC_STATS(sock_net(sk),
-				       UDP_MIB_CSUMERRORS, is_udplite);
-			UDP6_INC_STATS(sock_net(sk),
-				       UDP_MIB_INERRORS, is_udplite);
-		}
+		SNMP_INC_STATS(mib, UDP_MIB_CSUMERRORS);
+		SNMP_INC_STATS(mib, UDP_MIB_INERRORS);
 	}
 	kfree_skb(skb);
 
@@ -463,26 +396,133 @@
 	goto try_again;
 }
 
-void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-		    u8 type, u8 code, int offset, __be32 info,
-		    struct udp_table *udptable)
+DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+void udpv6_encap_enable(void)
+{
+	static_branch_inc(&udpv6_encap_needed_key);
+}
+EXPORT_SYMBOL(udpv6_encap_enable);
+
+/* Handler for tunnels with arbitrary destination ports: no socket lookup, go
+ * through error handlers in encapsulations looking for a match.
+ */
+static int __udp6_lib_err_encap_no_sk(struct sk_buff *skb,
+				      struct inet6_skb_parm *opt,
+				      u8 type, u8 code, int offset, __be32 info)
+{
+	int i;
+
+	for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) {
+		int (*handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
+			       u8 type, u8 code, int offset, __be32 info);
+		const struct ip6_tnl_encap_ops *encap;
+
+		encap = rcu_dereference(ip6tun_encaps[i]);
+		if (!encap)
+			continue;
+		handler = encap->err_handler;
+		if (handler && !handler(skb, opt, type, code, offset, info))
+			return 0;
+	}
+
+	return -ENOENT;
+}
+
+/* Try to match ICMP errors to UDP tunnels by looking up a socket without
+ * reversing source and destination port: this will match tunnels that force the
+ * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that
+ * lwtunnels might actually break this assumption by being configured with
+ * different destination ports on endpoints, in this case we won't be able to
+ * trace ICMP messages back to them.
+ *
+ * If this doesn't match any socket, probe tunnels with arbitrary destination
+ * ports (e.g. FoU, GUE): there, the receiving socket is useless, as the port
+ * we've sent packets to won't necessarily match the local destination port.
+ *
+ * Then ask the tunnel implementation to match the error against a valid
+ * association.
+ *
+ * Return an error if we can't find a match, the socket if we need further
+ * processing, zero otherwise.
+ */
+static struct sock *__udp6_lib_err_encap(struct net *net,
+					 const struct ipv6hdr *hdr, int offset,
+					 struct udphdr *uh,
+					 struct udp_table *udptable,
+					 struct sk_buff *skb,
+					 struct inet6_skb_parm *opt,
+					 u8 type, u8 code, __be32 info)
+{
+	int network_offset, transport_offset;
+	struct sock *sk;
+
+	network_offset = skb_network_offset(skb);
+	transport_offset = skb_transport_offset(skb);
+
+	/* Network header needs to point to the outer IPv6 header inside ICMP */
+	skb_reset_network_header(skb);
+
+	/* Transport header needs to point to the UDP header */
+	skb_set_transport_header(skb, offset);
+
+	sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source,
+			       &hdr->saddr, uh->dest,
+			       inet6_iif(skb), 0, udptable, skb);
+	if (sk) {
+		int (*lookup)(struct sock *sk, struct sk_buff *skb);
+		struct udp_sock *up = udp_sk(sk);
+
+		lookup = READ_ONCE(up->encap_err_lookup);
+		if (!lookup || lookup(sk, skb))
+			sk = NULL;
+	}
+
+	if (!sk) {
+		sk = ERR_PTR(__udp6_lib_err_encap_no_sk(skb, opt, type, code,
+							offset, info));
+	}
+
+	skb_set_transport_header(skb, transport_offset);
+	skb_set_network_header(skb, network_offset);
+
+	return sk;
+}
+
+int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+		   u8 type, u8 code, int offset, __be32 info,
+		   struct udp_table *udptable)
 {
 	struct ipv6_pinfo *np;
 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
 	const struct in6_addr *saddr = &hdr->saddr;
 	const struct in6_addr *daddr = &hdr->daddr;
 	struct udphdr *uh = (struct udphdr *)(skb->data+offset);
+	bool tunnel = false;
 	struct sock *sk;
 	int harderr;
 	int err;
 	struct net *net = dev_net(skb->dev);
 
 	sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
-			       inet6_iif(skb), 0, udptable, skb);
+			       inet6_iif(skb), inet6_sdif(skb), udptable, NULL);
 	if (!sk) {
-		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
-				  ICMP6_MIB_INERRORS);
-		return;
+		/* No socket for error: try tunnels before discarding */
+		sk = ERR_PTR(-ENOENT);
+		if (static_branch_unlikely(&udpv6_encap_needed_key)) {
+			sk = __udp6_lib_err_encap(net, hdr, offset, uh,
+						  udptable, skb,
+						  opt, type, code, info);
+			if (!sk)
+				return 0;
+		}
+
+		if (IS_ERR(sk)) {
+			__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
+					  ICMP6_MIB_INERRORS);
+			return PTR_ERR(sk);
+		}
+
+		tunnel = true;
 	}
 
 	harderr = icmpv6_err_convert(type, code, &err);
@@ -496,10 +536,19 @@
 			harderr = 1;
 	}
 	if (type == NDISC_REDIRECT) {
-		ip6_sk_redirect(skb, sk);
+		if (tunnel) {
+			ip6_redirect(skb, sock_net(sk), inet6_iif(skb),
+				     sk->sk_mark, sk->sk_uid);
+		} else {
+			ip6_sk_redirect(skb, sk);
+		}
 		goto out;
 	}
 
+	/* Tunnels don't have an application socket: don't pass errors back */
+	if (tunnel)
+		goto out;
+
 	if (!np->recverr) {
 		if (!harderr || sk->sk_state != TCP_ESTABLISHED)
 			goto out;
@@ -510,7 +559,7 @@
 	sk->sk_err = err;
 	sk->sk_error_report(sk);
 out:
-	return;
+	return 0;
 }
 
 static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
@@ -541,21 +590,14 @@
 	return 0;
 }
 
-static __inline__ void udpv6_err(struct sk_buff *skb,
-				 struct inet6_skb_parm *opt, u8 type,
-				 u8 code, int offset, __be32 info)
+static __inline__ int udpv6_err(struct sk_buff *skb,
+				struct inet6_skb_parm *opt, u8 type,
+				u8 code, int offset, __be32 info)
 {
-	__udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
+	return __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
 }
 
-static DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
-void udpv6_encap_enable(void)
-{
-	static_branch_enable(&udpv6_encap_needed_key);
-}
-EXPORT_SYMBOL(udpv6_encap_enable);
-
-static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
 {
 	struct udp_sock *up = udp_sk(sk);
 	int is_udplite = IS_UDPLITE(sk);
@@ -638,10 +680,32 @@
 	return -1;
 }
 
+static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	struct sk_buff *next, *segs;
+	int ret;
+
+	if (likely(!udp_unexpected_gso(sk, skb)))
+		return udpv6_queue_rcv_one_skb(sk, skb);
+
+	__skb_push(skb, -skb_mac_offset(skb));
+	segs = udp_rcv_segment(sk, skb, false);
+	for (skb = segs; skb; skb = next) {
+		next = skb->next;
+		__skb_pull(skb, skb_transport_offset(skb));
+
+		ret = udpv6_queue_rcv_one_skb(sk, skb);
+		if (ret > 0)
+			ip6_protocol_deliver_rcu(dev_net(skb->dev), skb, ret,
+						 true);
+	}
+	return 0;
+}
+
 static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk,
 				   __be16 loc_port, const struct in6_addr *loc_addr,
 				   __be16 rmt_port, const struct in6_addr *rmt_addr,
-				   int dif, unsigned short hnum)
+				   int dif, int sdif, unsigned short hnum)
 {
 	struct inet_sock *inet = inet_sk(sk);
 
@@ -653,7 +717,7 @@
 	    (inet->inet_dport && inet->inet_dport != rmt_port) ||
 	    (!ipv6_addr_any(&sk->sk_v6_daddr) &&
 		    !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) ||
-	    (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) ||
+	    !udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif) ||
 	    (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) &&
 		    !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)))
 		return false;
@@ -687,6 +751,7 @@
 	unsigned int offset = offsetof(typeof(*sk), sk_node);
 	unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
 	int dif = inet6_iif(skb);
+	int sdif = inet6_sdif(skb);
 	struct hlist_node *node;
 	struct sk_buff *nskb;
 
@@ -701,7 +766,8 @@
 
 	sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
 		if (!__udp_v6_is_mcast_sock(net, sk, uh->dest, daddr,
-					    uh->source, saddr, dif, hnum))
+					    uh->source, saddr, dif, sdif,
+					    hnum))
 			continue;
 		/* If zero checksum and no_check is not on for
 		 * the socket then skip it.
@@ -761,8 +827,7 @@
 	int ret;
 
 	if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
-		skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
-					 ip6_compute_pseudo);
+		skb_checksum_try_convert(skb, IPPROTO_UDP, ip6_compute_pseudo);
 
 	ret = udpv6_queue_rcv_skb(sk, skb);
 
@@ -904,7 +969,7 @@
 	return NULL;
 }
 
-static void udp_v6_early_demux(struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb)
 {
 	struct net *net = dev_net(skb->dev);
 	const struct udphdr *uh;
@@ -945,7 +1010,7 @@
 	}
 }
 
-static __inline__ int udpv6_rcv(struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE int udpv6_rcv(struct sk_buff *skb)
 {
 	return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP);
 }
@@ -969,6 +1034,8 @@
 static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
 			     int addr_len)
 {
+	if (addr_len < offsetofend(struct sockaddr, sa_family))
+		return -EINVAL;
 	/* The following checks are replicated from __ip6_datagram_connect()
 	 * and intended to prevent BPF program called below from accessing
 	 * bytes that are out of the bound specified by user in addr_len.
@@ -1042,6 +1109,7 @@
 	__wsum csum = 0;
 	int offset = skb_transport_offset(skb);
 	int len = skb->len - offset;
+	int datalen = len - sizeof(*uh);
 
 	/*
 	 * Create a UDP header
@@ -1056,18 +1124,30 @@
 		const int hlen = skb_network_header_len(skb) +
 				 sizeof(struct udphdr);
 
-		if (hlen + cork->gso_size > cork->fragsize)
+		if (hlen + cork->gso_size > cork->fragsize) {
+			kfree_skb(skb);
 			return -EINVAL;
-		if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS)
+		}
+		if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) {
+			kfree_skb(skb);
 			return -EINVAL;
-		if (udp_sk(sk)->no_check6_tx)
+		}
+		if (udp_sk(sk)->no_check6_tx) {
+			kfree_skb(skb);
 			return -EINVAL;
+		}
 		if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
-		    dst_xfrm(skb_dst(skb)))
+		    dst_xfrm(skb_dst(skb))) {
+			kfree_skb(skb);
 			return -EIO;
+		}
 
-		skb_shinfo(skb)->gso_size = cork->gso_size;
-		skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
+		if (datalen > cork->gso_size) {
+			skb_shinfo(skb)->gso_size = cork->gso_size;
+			skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
+			skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(datalen,
+								 cork->gso_size);
+		}
 		goto csum_partial;
 	}
 
@@ -1156,6 +1236,7 @@
 	ipcm6_init(&ipc6);
 	ipc6.gso_size = up->gso_size;
 	ipc6.sockc.tsflags = sk->sk_tsflags;
+	ipc6.sockc.mark = sk->sk_mark;
 
 	/* destination address check */
 	if (sin6) {
@@ -1245,7 +1326,7 @@
 			fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
 			if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
 				flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
-				if (!flowlabel)
+				if (IS_ERR(flowlabel))
 					return -EINVAL;
 			}
 		}
@@ -1278,7 +1359,7 @@
 	if (!fl6.flowi6_oif)
 		fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
 
-	fl6.flowi6_mark = sk->sk_mark;
+	fl6.flowi6_mark = ipc6.sockc.mark;
 	fl6.flowi6_uid = sk->sk_uid;
 
 	if (msg->msg_controllen) {
@@ -1297,7 +1378,7 @@
 		}
 		if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
-			if (!flowlabel)
+			if (IS_ERR(flowlabel))
 				return -EINVAL;
 		}
 		if (!(opt->opt_nflen|opt->opt_flen))
@@ -1314,10 +1395,7 @@
 	ipc6.opt = opt;
 
 	fl6.flowi6_proto = sk->sk_protocol;
-	if (!ipv6_addr_any(daddr))
-		fl6.daddr = *daddr;
-	else
-		fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
+	fl6.daddr = *daddr;
 	if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
 		fl6.saddr = np->saddr;
 	fl6.fl6_sport = inet->inet_sport;
@@ -1345,6 +1423,9 @@
 		}
 	}
 
+	if (ipv6_addr_any(&fl6.daddr))
+		fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
+
 	final_p = fl6_update_dst(&fl6, opt, &final);
 	if (final_p)
 		connected = false;
@@ -1458,11 +1539,15 @@
 	udp_v6_flush_pending_frames(sk);
 	release_sock(sk);
 
-	if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) {
-		void (*encap_destroy)(struct sock *sk);
-		encap_destroy = READ_ONCE(up->encap_destroy);
-		if (encap_destroy)
-			encap_destroy(sk);
+	if (static_branch_unlikely(&udpv6_encap_needed_key)) {
+		if (up->encap_type) {
+			void (*encap_destroy)(struct sock *sk);
+			encap_destroy = READ_ONCE(up->encap_destroy);
+			if (encap_destroy)
+				encap_destroy(sk);
+		}
+		if (up->encap_enabled)
+			static_branch_dec(&udpv6_encap_needed_key);
 	}
 
 	inet6_destroy_sock(sk);

diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 7903e21..20e324b 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h

@@ -9,10 +9,11 @@
 #include <net/transp_v6.h>
 
 int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int);
-void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int,
-		    __be32, struct udp_table *);
+int __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int,
+		   __be32, struct udp_table *);
 
 int udp_v6_get_port(struct sock *sk, unsigned short snum);
+void udp_v6_rehash(struct sock *sk);
 
 int udpv6_getsockopt(struct sock *sk, int level, int optname,
 		     char __user *optval, int __user *optlen);

diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 95dee9c..64b8f05 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c

@@ -1,16 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	IPV6 GSO/GRO offload support
  *	Linux INET6 implementation
  *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
  *      UDPv6 GSO support
  */
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
+#include <linux/indirect_call_wrapper.h>
 #include <net/protocol.h>
 #include <net/ipv6.h>
 #include <net/udp.h>
@@ -114,12 +111,12 @@
 	return segs;
 }
 
-static struct sk_buff *udp6_gro_receive(struct list_head *head,
-					struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE
+struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
 	struct udphdr *uh = udp_gro_udphdr(skb);
 
-	if (unlikely(!uh))
+	if (unlikely(!uh) || !static_branch_unlikely(&udpv6_encap_needed_key))
 		goto flush;
 
 	/* Don't bother verifying checksum if we're going to flush anyway. */
@@ -142,18 +139,14 @@
 	return NULL;
 }
 
-static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
+INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
 {
 	const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 
-	if (uh->check) {
-		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+	if (uh->check)
 		uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr,
 					  &ipv6h->daddr, 0);
-	} else {
-		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
-	}
 
 	return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb);
 }

diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 5000ad6..bf7a7ac 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  UDPLITEv6   An implementation of the UDP-Lite protocol over IPv6.
  *              See also net/ipv4/udplite.c
@@ -6,10 +7,6 @@
  *
  *  Changes:
  *  Fixes:
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  */
 #include <linux/export.h>
 #include <linux/proc_fs.h>
@@ -20,11 +17,12 @@
 	return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE);
 }
 
-static void udplitev6_err(struct sk_buff *skb,
+static int udplitev6_err(struct sk_buff *skb,
 			  struct inet6_skb_parm *opt,
 			  u8 type, u8 code, int offset, __be32 info)
 {
-	__udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table);
+	return __udp6_lib_err(skb, opt, type, code, offset, info,
+			      &udplite_table);
 }
 
 static const struct inet6_protocol udplitev6_protocol = {
@@ -48,6 +46,7 @@
 	.recvmsg	   = udpv6_recvmsg,
 	.hash		   = udp_lib_hash,
 	.unhash		   = udp_lib_unhash,
+	.rehash		   = udp_v6_rehash,
 	.get_port	   = udp_v6_get_port,
 	.memory_allocated  = &udp_memory_allocated,
 	.sysctl_mem	   = sysctl_udp_mem,

diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 9ef490d..a52cb3f 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c

@@ -86,14 +86,16 @@
 {
 	struct net *net = dev_net(skb->dev);
 	struct xfrm_state *x = NULL;
+	struct sec_path *sp;
 	int i = 0;
 
-	if (secpath_set(skb)) {
+	sp = secpath_set(skb);
+	if (!sp) {
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR);
 		goto drop;
 	}
 
-	if (1 + skb->sp->len == XFRM_MAX_DEPTH) {
+	if (1 + sp->len == XFRM_MAX_DEPTH) {
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
 		goto drop;
 	}
@@ -145,7 +147,7 @@
 		goto drop;
 	}
 
-	skb->sp->xvec[skb->sp->len++] = x;
+	sp->xvec[sp->len++] = x;
 
 	spin_lock(&x->lock);
 

diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
deleted file mode 100644
index 57fd314..0000000
--- a/net/ipv6/xfrm6_mode_beet.c
+++ /dev/null

@@ -1,131 +0,0 @@
-/*
- * xfrm6_mode_beet.c - BEET mode encapsulation for IPv6.
- *
- * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com>
- *                    Miika Komu     <miika@iki.fi>
- *                    Herbert Xu     <herbert@gondor.apana.org.au>
- *                    Abhinav Pathak <abhinav.pathak@hiit.fi>
- *                    Jeff Ahrenholz <ahrenholz@gmail.com>
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/stringify.h>
-#include <net/dsfield.h>
-#include <net/dst.h>
-#include <net/inet_ecn.h>
-#include <net/ipv6.h>
-#include <net/xfrm.h>
-
-static void xfrm6_beet_make_header(struct sk_buff *skb)
-{
-	struct ipv6hdr *iph = ipv6_hdr(skb);
-
-	iph->version = 6;
-
-	memcpy(iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
-	       sizeof(iph->flow_lbl));
-	iph->nexthdr = XFRM_MODE_SKB_CB(skb)->protocol;
-
-	ipv6_change_dsfield(iph, 0, XFRM_MODE_SKB_CB(skb)->tos);
-	iph->hop_limit = XFRM_MODE_SKB_CB(skb)->ttl;
-}
-
-/* Add encapsulation header.
- *
- * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
- */
-static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
-{
-	struct ipv6hdr *top_iph;
-	struct ip_beet_phdr *ph;
-	int optlen, hdr_len;
-
-	hdr_len = 0;
-	optlen = XFRM_MODE_SKB_CB(skb)->optlen;
-	if (unlikely(optlen))
-		hdr_len += IPV4_BEET_PHMAXLEN - (optlen & 4);
-
-	skb_set_network_header(skb, -x->props.header_len - hdr_len);
-	if (x->sel.family != AF_INET6)
-		skb->network_header += IPV4_BEET_PHMAXLEN;
-	skb->mac_header = skb->network_header +
-			  offsetof(struct ipv6hdr, nexthdr);
-	skb->transport_header = skb->network_header + sizeof(*top_iph);
-	ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdr_len);
-
-	xfrm6_beet_make_header(skb);
-
-	top_iph = ipv6_hdr(skb);
-	if (unlikely(optlen)) {
-
-		BUG_ON(optlen < 0);
-
-		ph->padlen = 4 - (optlen & 4);
-		ph->hdrlen = optlen / 8;
-		ph->nexthdr = top_iph->nexthdr;
-		if (ph->padlen)
-			memset(ph + 1, IPOPT_NOP, ph->padlen);
-
-		top_iph->nexthdr = IPPROTO_BEETPH;
-	}
-
-	top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
-	top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
-	return 0;
-}
-
-static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
-{
-	struct ipv6hdr *ip6h;
-	int size = sizeof(struct ipv6hdr);
-	int err;
-
-	err = skb_cow_head(skb, size + skb->mac_len);
-	if (err)
-		goto out;
-
-	__skb_push(skb, size);
-	skb_reset_network_header(skb);
-	skb_mac_header_rebuild(skb);
-
-	xfrm6_beet_make_header(skb);
-
-	ip6h = ipv6_hdr(skb);
-	ip6h->payload_len = htons(skb->len - size);
-	ip6h->daddr = x->sel.daddr.in6;
-	ip6h->saddr = x->sel.saddr.in6;
-	err = 0;
-out:
-	return err;
-}
-
-static struct xfrm_mode xfrm6_beet_mode = {
-	.input2 = xfrm6_beet_input,
-	.input = xfrm_prepare_input,
-	.output2 = xfrm6_beet_output,
-	.output = xfrm6_prepare_output,
-	.owner = THIS_MODULE,
-	.encap = XFRM_MODE_BEET,
-	.flags = XFRM_MODE_FLAG_TUNNEL,
-};
-
-static int __init xfrm6_beet_init(void)
-{
-	return xfrm_register_mode(&xfrm6_beet_mode, AF_INET6);
-}
-
-static void __exit xfrm6_beet_exit(void)
-{
-	int err;
-
-	err = xfrm_unregister_mode(&xfrm6_beet_mode, AF_INET6);
-	BUG_ON(err);
-}
-
-module_init(xfrm6_beet_init);
-module_exit(xfrm6_beet_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_BEET);

diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
deleted file mode 100644
index da28e44..0000000
--- a/net/ipv6/xfrm6_mode_ro.c
+++ /dev/null

@@ -1,85 +0,0 @@
-/*
- * xfrm6_mode_ro.c - Route optimization mode for IPv6.
- *
- * Copyright (C)2003-2006 Helsinki University of Technology
- * Copyright (C)2003-2006 USAGI/WIDE Project
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-/*
- * Authors:
- *	Noriaki TAKAMIYA @USAGI
- *	Masahide NAKAMURA @USAGI
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-#include <linux/stringify.h>
-#include <linux/time.h>
-#include <net/ipv6.h>
-#include <net/xfrm.h>
-
-/* Add route optimization header space.
- *
- * The IP header and mutable extension headers will be moved forward to make
- * space for the route optimization header.
- */
-static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
-{
-	struct ipv6hdr *iph;
-	u8 *prevhdr;
-	int hdr_len;
-
-	iph = ipv6_hdr(skb);
-
-	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
-	if (hdr_len < 0)
-		return hdr_len;
-	skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
-	skb_set_network_header(skb, -x->props.header_len);
-	skb->transport_header = skb->network_header + hdr_len;
-	__skb_pull(skb, hdr_len);
-	memmove(ipv6_hdr(skb), iph, hdr_len);
-
-	x->lastused = ktime_get_real_seconds();
-
-	return 0;
-}
-
-static struct xfrm_mode xfrm6_ro_mode = {
-	.output = xfrm6_ro_output,
-	.owner = THIS_MODULE,
-	.encap = XFRM_MODE_ROUTEOPTIMIZATION,
-};
-
-static int __init xfrm6_ro_init(void)
-{
-	return xfrm_register_mode(&xfrm6_ro_mode, AF_INET6);
-}
-
-static void __exit xfrm6_ro_exit(void)
-{
-	int err;
-
-	err = xfrm_unregister_mode(&xfrm6_ro_mode, AF_INET6);
-	BUG_ON(err);
-}
-
-module_init(xfrm6_ro_init);
-module_exit(xfrm6_ro_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_ROUTEOPTIMIZATION);

diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
deleted file mode 100644
index 3c29da5..0000000
--- a/net/ipv6/xfrm6_mode_transport.c
+++ /dev/null

@@ -1,121 +0,0 @@
-/*
- * xfrm6_mode_transport.c - Transport mode encapsulation for IPv6.
- *
- * Copyright (C) 2002 USAGI/WIDE Project
- * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/stringify.h>
-#include <net/dst.h>
-#include <net/ipv6.h>
-#include <net/xfrm.h>
-#include <net/protocol.h>
-
-/* Add encapsulation header.
- *
- * The IP header and mutable extension headers will be moved forward to make
- * space for the encapsulation header.
- */
-static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
-{
-	struct ipv6hdr *iph;
-	u8 *prevhdr;
-	int hdr_len;
-
-	iph = ipv6_hdr(skb);
-	skb_set_inner_transport_header(skb, skb_transport_offset(skb));
-
-	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
-	if (hdr_len < 0)
-		return hdr_len;
-	skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
-	skb_set_network_header(skb, -x->props.header_len);
-	skb->transport_header = skb->network_header + hdr_len;
-	__skb_pull(skb, hdr_len);
-	memmove(ipv6_hdr(skb), iph, hdr_len);
-	return 0;
-}
-
-/* Remove encapsulation header.
- *
- * The IP header will be moved over the top of the encapsulation header.
- *
- * On entry, skb->h shall point to where the IP header should be and skb->nh
- * shall be set to where the IP header currently is.  skb->data shall point
- * to the start of the payload.
- */
-static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
-{
-	int ihl = skb->data - skb_transport_header(skb);
-
-	if (skb->transport_header != skb->network_header) {
-		memmove(skb_transport_header(skb),
-			skb_network_header(skb), ihl);
-		skb->network_header = skb->transport_header;
-	}
-	ipv6_hdr(skb)->payload_len = htons(skb->len + ihl -
-					   sizeof(struct ipv6hdr));
-	skb_reset_transport_header(skb);
-	return 0;
-}
-
-static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x,
-						   struct sk_buff *skb,
-						   netdev_features_t features)
-{
-	const struct net_offload *ops;
-	struct sk_buff *segs = ERR_PTR(-EINVAL);
-	struct xfrm_offload *xo = xfrm_offload(skb);
-
-	skb->transport_header += x->props.header_len;
-	ops = rcu_dereference(inet6_offloads[xo->proto]);
-	if (likely(ops && ops->callbacks.gso_segment))
-		segs = ops->callbacks.gso_segment(skb, features);
-
-	return segs;
-}
-
-static void xfrm6_transport_xmit(struct xfrm_state *x, struct sk_buff *skb)
-{
-	struct xfrm_offload *xo = xfrm_offload(skb);
-
-	skb_reset_mac_len(skb);
-	pskb_pull(skb, skb->mac_len + sizeof(struct ipv6hdr) + x->props.header_len);
-
-	if (xo->flags & XFRM_GSO_SEGMENT) {
-		 skb_reset_transport_header(skb);
-		 skb->transport_header -= x->props.header_len;
-	}
-}
-
-
-static struct xfrm_mode xfrm6_transport_mode = {
-	.input = xfrm6_transport_input,
-	.output = xfrm6_transport_output,
-	.gso_segment = xfrm4_transport_gso_segment,
-	.xmit = xfrm6_transport_xmit,
-	.owner = THIS_MODULE,
-	.encap = XFRM_MODE_TRANSPORT,
-};
-
-static int __init xfrm6_transport_init(void)
-{
-	return xfrm_register_mode(&xfrm6_transport_mode, AF_INET6);
-}
-
-static void __exit xfrm6_transport_exit(void)
-{
-	int err;
-
-	err = xfrm_unregister_mode(&xfrm6_transport_mode, AF_INET6);
-	BUG_ON(err);
-}
-
-module_init(xfrm6_transport_init);
-module_exit(xfrm6_transport_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TRANSPORT);

diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
deleted file mode 100644
index de1b0b8..0000000
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ /dev/null

@@ -1,151 +0,0 @@
-/*
- * xfrm6_mode_tunnel.c - Tunnel mode encapsulation for IPv6.
- *
- * Copyright (C) 2002 USAGI/WIDE Project
- * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
- */
-
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/stringify.h>
-#include <net/dsfield.h>
-#include <net/dst.h>
-#include <net/inet_ecn.h>
-#include <net/ip6_route.h>
-#include <net/ipv6.h>
-#include <net/xfrm.h>
-
-static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
-{
-	struct ipv6hdr *inner_iph = ipipv6_hdr(skb);
-
-	if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos))
-		IP6_ECN_set_ce(skb, inner_iph);
-}
-
-/* Add encapsulation header.
- *
- * The top IP header will be constructed per RFC 2401.
- */
-static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
-{
-	struct dst_entry *dst = skb_dst(skb);
-	struct ipv6hdr *top_iph;
-	int dsfield;
-
-	skb_set_inner_network_header(skb, skb_network_offset(skb));
-	skb_set_inner_transport_header(skb, skb_transport_offset(skb));
-
-	skb_set_network_header(skb, -x->props.header_len);
-	skb->mac_header = skb->network_header +
-			  offsetof(struct ipv6hdr, nexthdr);
-	skb->transport_header = skb->network_header + sizeof(*top_iph);
-	top_iph = ipv6_hdr(skb);
-
-	top_iph->version = 6;
-
-	memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
-	       sizeof(top_iph->flow_lbl));
-	top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family);
-
-	if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
-		dsfield = 0;
-	else
-		dsfield = XFRM_MODE_SKB_CB(skb)->tos;
-	dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos);
-	if (x->props.flags & XFRM_STATE_NOECN)
-		dsfield &= ~INET_ECN_MASK;
-	ipv6_change_dsfield(top_iph, 0, dsfield);
-	top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst));
-	top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
-	top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
-	return 0;
-}
-
-#define for_each_input_rcu(head, handler)	\
-	for (handler = rcu_dereference(head);	\
-	     handler != NULL;			\
-	     handler = rcu_dereference(handler->next))
-
-
-static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
-{
-	int err = -EINVAL;
-
-	if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
-		goto out;
-	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
-		goto out;
-
-	err = skb_unclone(skb, GFP_ATOMIC);
-	if (err)
-		goto out;
-
-	if (x->props.flags & XFRM_STATE_DECAP_DSCP)
-		ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)),
-			       ipipv6_hdr(skb));
-	if (!(x->props.flags & XFRM_STATE_NOECN))
-		ipip6_ecn_decapsulate(skb);
-
-	skb_reset_network_header(skb);
-	skb_mac_header_rebuild(skb);
-	if (skb->mac_len)
-		eth_hdr(skb)->h_proto = skb->protocol;
-
-	err = 0;
-
-out:
-	return err;
-}
-
-static struct sk_buff *xfrm6_mode_tunnel_gso_segment(struct xfrm_state *x,
-						     struct sk_buff *skb,
-						     netdev_features_t features)
-{
-	__skb_push(skb, skb->mac_len);
-	return skb_mac_gso_segment(skb, features);
-}
-
-static void xfrm6_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb)
-{
-	struct xfrm_offload *xo = xfrm_offload(skb);
-
-	if (xo->flags & XFRM_GSO_SEGMENT)
-		skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
-
-	skb_reset_mac_len(skb);
-	pskb_pull(skb, skb->mac_len + x->props.header_len);
-}
-
-static struct xfrm_mode xfrm6_tunnel_mode = {
-	.input2 = xfrm6_mode_tunnel_input,
-	.input = xfrm_prepare_input,
-	.output2 = xfrm6_mode_tunnel_output,
-	.output = xfrm6_prepare_output,
-	.gso_segment = xfrm6_mode_tunnel_gso_segment,
-	.xmit = xfrm6_mode_tunnel_xmit,
-	.owner = THIS_MODULE,
-	.encap = XFRM_MODE_TUNNEL,
-	.flags = XFRM_MODE_FLAG_TUNNEL,
-};
-
-static int __init xfrm6_mode_tunnel_init(void)
-{
-	return xfrm_register_mode(&xfrm6_tunnel_mode, AF_INET6);
-}
-
-static void __exit xfrm6_mode_tunnel_exit(void)
-{
-	int err;
-
-	err = xfrm_unregister_mode(&xfrm6_tunnel_mode, AF_INET6);
-	BUG_ON(err);
-}
-
-module_init(xfrm6_mode_tunnel_init);
-module_exit(xfrm6_mode_tunnel_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TUNNEL);

diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 6a74080..eecac1b 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * xfrm6_output.c - Common IPsec encapsulation code for IPv6.
  * Copyright (C) 2002 USAGI/WIDE Project
  * Copyright (c) 2004 Herbert Xu <herbert@gondor.apana.org.au>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/if_ether.h>
@@ -111,21 +107,6 @@
 	return xfrm6_extract_header(skb);
 }
 
-int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
-{
-	int err;
-
-	err = xfrm_inner_extract_output(x, skb);
-	if (err)
-		return err;
-
-	skb->ignore_df = 1;
-	skb->protocol = htons(ETH_P_IPV6);
-
-	return x->outer_mode->output2(x, skb);
-}
-EXPORT_SYMBOL(xfrm6_prepare_output);
-
 int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb)
 {
 	memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
@@ -137,11 +118,28 @@
 	return xfrm_output(sk, skb);
 }
 
+static int __xfrm6_output_state_finish(struct xfrm_state *x, struct sock *sk,
+				       struct sk_buff *skb)
+{
+	const struct xfrm_state_afinfo *afinfo;
+	int ret = -EAFNOSUPPORT;
+
+	rcu_read_lock();
+	afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family);
+	if (likely(afinfo))
+		ret = afinfo->output_finish(sk, skb);
+	else
+		kfree_skb(skb);
+	rcu_read_unlock();
+
+	return ret;
+}
+
 static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct xfrm_state *x = skb_dst(skb)->xfrm;
 
-	return x->outer_mode->afinfo->output_finish(sk, skb);
+	return __xfrm6_output_state_finish(x, sk, skb);
 }
 
 static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -183,7 +181,7 @@
 				    __xfrm6_output_finish);
 
 skip_frag:
-	return x->outer_mode->afinfo->output_finish(sk, skb);
+	return __xfrm6_output_state_finish(x, sk, skb);
 }
 
 int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)

diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index d35bcf9..699e073 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c

@@ -22,9 +22,6 @@
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
 #include <net/l3mdev.h>
-#if IS_ENABLED(CONFIG_IPV6_MIP6)
-#include <net/mip6.h>
-#endif
 
 static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
 					  const xfrm_address_t *saddr,
@@ -71,24 +68,6 @@
 	return 0;
 }
 
-static int xfrm6_get_tos(const struct flowi *fl)
-{
-	return 0;
-}
-
-static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
-			   int nfheader_len)
-{
-	if (dst->ops->family == AF_INET6) {
-		struct rt6_info *rt = (struct rt6_info *)dst;
-		path->path_cookie = rt6_get_cookie(rt);
-	}
-
-	path->u.rt6.rt6i_nfheader_len = nfheader_len;
-
-	return 0;
-}
-
 static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 			  const struct flowi *fl)
 {
@@ -118,108 +97,6 @@
 	return 0;
 }
 
-static inline void
-_decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
-{
-	struct flowi6 *fl6 = &fl->u.ip6;
-	int onlyproto = 0;
-	const struct ipv6hdr *hdr = ipv6_hdr(skb);
-	u32 offset = sizeof(*hdr);
-	struct ipv6_opt_hdr *exthdr;
-	const unsigned char *nh = skb_network_header(skb);
-	u16 nhoff = IP6CB(skb)->nhoff;
-	int oif = 0;
-	u8 nexthdr;
-
-	if (!nhoff)
-		nhoff = offsetof(struct ipv6hdr, nexthdr);
-
-	nexthdr = nh[nhoff];
-
-	if (skb_dst(skb))
-		oif = skb_dst(skb)->dev->ifindex;
-
-	memset(fl6, 0, sizeof(struct flowi6));
-	fl6->flowi6_mark = skb->mark;
-	fl6->flowi6_oif = reverse ? skb->skb_iif : oif;
-
-	fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
-	fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
-
-	while (nh + offset + sizeof(*exthdr) < skb->data ||
-	       pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) {
-		nh = skb_network_header(skb);
-		exthdr = (struct ipv6_opt_hdr *)(nh + offset);
-
-		switch (nexthdr) {
-		case NEXTHDR_FRAGMENT:
-			onlyproto = 1;
-			/* fall through */
-		case NEXTHDR_ROUTING:
-		case NEXTHDR_HOP:
-		case NEXTHDR_DEST:
-			offset += ipv6_optlen(exthdr);
-			nexthdr = exthdr->nexthdr;
-			exthdr = (struct ipv6_opt_hdr *)(nh + offset);
-			break;
-
-		case IPPROTO_UDP:
-		case IPPROTO_UDPLITE:
-		case IPPROTO_TCP:
-		case IPPROTO_SCTP:
-		case IPPROTO_DCCP:
-			if (!onlyproto && (nh + offset + 4 < skb->data ||
-			     pskb_may_pull(skb, nh + offset + 4 - skb->data))) {
-				__be16 *ports;
-
-				nh = skb_network_header(skb);
-				ports = (__be16 *)(nh + offset);
-				fl6->fl6_sport = ports[!!reverse];
-				fl6->fl6_dport = ports[!reverse];
-			}
-			fl6->flowi6_proto = nexthdr;
-			return;
-
-		case IPPROTO_ICMPV6:
-			if (!onlyproto && (nh + offset + 2 < skb->data ||
-			    pskb_may_pull(skb, nh + offset + 2 - skb->data))) {
-				u8 *icmp;
-
-				nh = skb_network_header(skb);
-				icmp = (u8 *)(nh + offset);
-				fl6->fl6_icmp_type = icmp[0];
-				fl6->fl6_icmp_code = icmp[1];
-			}
-			fl6->flowi6_proto = nexthdr;
-			return;
-
-#if IS_ENABLED(CONFIG_IPV6_MIP6)
-		case IPPROTO_MH:
-			offset += ipv6_optlen(exthdr);
-			if (!onlyproto && (nh + offset + 3 < skb->data ||
-			    pskb_may_pull(skb, nh + offset + 3 - skb->data))) {
-				struct ip6_mh *mh;
-
-				nh = skb_network_header(skb);
-				mh = (struct ip6_mh *)(nh + offset);
-				fl6->fl6_mh_type = mh->ip6mh_type;
-			}
-			fl6->flowi6_proto = nexthdr;
-			return;
-#endif
-
-		/* XXX Why are there these headers? */
-		case IPPROTO_AH:
-		case IPPROTO_ESP:
-		case IPPROTO_COMP:
-		default:
-			fl6->fl6_ipsec_spi = 0;
-			fl6->flowi6_proto = nexthdr;
-			return;
-		}
-	}
-}
-
 static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
 			      struct sk_buff *skb, u32 mtu)
 {
@@ -262,7 +139,6 @@
 	if (xdst->u.rt6.rt6i_idev->dev == dev) {
 		struct inet6_dev *loopback_idev =
 			in6_dev_get(dev_net(dev)->loopback_dev);
-		BUG_ON(!loopback_idev);
 
 		do {
 			in6_dev_put(xdst->u.rt6.rt6i_idev);
@@ -292,9 +168,6 @@
 	.dst_ops =		&xfrm6_dst_ops_template,
 	.dst_lookup =		xfrm6_dst_lookup,
 	.get_saddr =		xfrm6_get_saddr,
-	.decode_session =	_decode_session6,
-	.get_tos =		xfrm6_get_tos,
-	.init_path =		xfrm6_init_path,
 	.fill_dst =		xfrm6_fill_dst,
 	.blackhole_route =	ip6_blackhole_route,
 };

diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c
index b2dc8ce..34cb65c 100644
--- a/net/ipv6/xfrm6_protocol.c
+++ b/net/ipv6/xfrm6_protocol.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* xfrm6_protocol.c - Generic xfrm protocol multiplexer for ipv6.
  *
  * Copyright (C) 2013 secunet Security Networks AG
@@ -7,11 +8,6 @@
  *
  * Based on:
  * net/ipv4/xfrm4_protocol.c
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #include <linux/init.h>
@@ -46,7 +42,7 @@
 	     handler != NULL;				\
 	     handler = rcu_dereference(handler->next))	\
 
-int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
+static int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
 {
 	int ret;
 	struct xfrm6_protocol *handler;
@@ -61,7 +57,6 @@
 
 	return 0;
 }
-EXPORT_SYMBOL(xfrm6_rcv_cb);
 
 static int xfrm6_esp_rcv(struct sk_buff *skb)
 {
@@ -80,14 +75,16 @@
 	return 0;
 }
 
-static void xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			  u8 type, u8 code, int offset, __be32 info)
 {
 	struct xfrm6_protocol *handler;
 
 	for_each_protocol_rcu(esp6_handlers, handler)
 		if (!handler->err_handler(skb, opt, type, code, offset, info))
-			break;
+			return 0;
+
+	return -ENOENT;
 }
 
 static int xfrm6_ah_rcv(struct sk_buff *skb)
@@ -107,14 +104,16 @@
 	return 0;
 }
 
-static void xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			 u8 type, u8 code, int offset, __be32 info)
 {
 	struct xfrm6_protocol *handler;
 
 	for_each_protocol_rcu(ah6_handlers, handler)
 		if (!handler->err_handler(skb, opt, type, code, offset, info))
-			break;
+			return 0;
+
+	return -ENOENT;
 }
 
 static int xfrm6_ipcomp_rcv(struct sk_buff *skb)
@@ -134,14 +133,16 @@
 	return 0;
 }
 
-static void xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			     u8 type, u8 code, int offset, __be32 info)
 {
 	struct xfrm6_protocol *handler;
 
 	for_each_protocol_rcu(ipcomp6_handlers, handler)
 		if (!handler->err_handler(skb, opt, type, code, offset, info))
-			break;
+			return 0;
+
+	return -ENOENT;
 }
 
 static const struct inet6_protocol esp6_protocol = {

diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 5bdca3d..78daade 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c

@@ -21,137 +21,6 @@
 #include <net/ipv6.h>
 #include <net/addrconf.h>
 
-static void
-__xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
-{
-	const struct flowi6 *fl6 = &fl->u.ip6;
-
-	/* Initialize temporary selector matching only
-	 * to current session. */
-	*(struct in6_addr *)&sel->daddr = fl6->daddr;
-	*(struct in6_addr *)&sel->saddr = fl6->saddr;
-	sel->dport = xfrm_flowi_dport(fl, &fl6->uli);
-	sel->dport_mask = htons(0xffff);
-	sel->sport = xfrm_flowi_sport(fl, &fl6->uli);
-	sel->sport_mask = htons(0xffff);
-	sel->family = AF_INET6;
-	sel->prefixlen_d = 128;
-	sel->prefixlen_s = 128;
-	sel->proto = fl6->flowi6_proto;
-	sel->ifindex = fl6->flowi6_oif;
-}
-
-static void
-xfrm6_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl,
-		   const xfrm_address_t *daddr, const xfrm_address_t *saddr)
-{
-	x->id = tmpl->id;
-	if (ipv6_addr_any((struct in6_addr *)&x->id.daddr))
-		memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr));
-	memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr));
-	if (ipv6_addr_any((struct in6_addr *)&x->props.saddr))
-		memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr));
-	x->props.mode = tmpl->mode;
-	x->props.reqid = tmpl->reqid;
-	x->props.family = AF_INET6;
-}
-
-/* distribution counting sort function for xfrm_state and xfrm_tmpl */
-static int
-__xfrm6_sort(void **dst, void **src, int n, int (*cmp)(void *p), int maxclass)
-{
-	int count[XFRM_MAX_DEPTH] = { };
-	int class[XFRM_MAX_DEPTH];
-	int i;
-
-	for (i = 0; i < n; i++) {
-		int c;
-		class[i] = c = cmp(src[i]);
-		count[c]++;
-	}
-
-	for (i = 2; i < maxclass; i++)
-		count[i] += count[i - 1];
-
-	for (i = 0; i < n; i++) {
-		dst[count[class[i] - 1]++] = src[i];
-		src[i] = NULL;
-	}
-
-	return 0;
-}
-
-/*
- * Rule for xfrm_state:
- *
- * rule 1: select IPsec transport except AH
- * rule 2: select MIPv6 RO or inbound trigger
- * rule 3: select IPsec transport AH
- * rule 4: select IPsec tunnel
- * rule 5: others
- */
-static int __xfrm6_state_sort_cmp(void *p)
-{
-	struct xfrm_state *v = p;
-
-	switch (v->props.mode) {
-	case XFRM_MODE_TRANSPORT:
-		if (v->id.proto != IPPROTO_AH)
-			return 1;
-		else
-			return 3;
-#if IS_ENABLED(CONFIG_IPV6_MIP6)
-	case XFRM_MODE_ROUTEOPTIMIZATION:
-	case XFRM_MODE_IN_TRIGGER:
-		return 2;
-#endif
-	case XFRM_MODE_TUNNEL:
-	case XFRM_MODE_BEET:
-		return 4;
-	}
-	return 5;
-}
-
-static int
-__xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n)
-{
-	return __xfrm6_sort((void **)dst, (void **)src, n,
-			    __xfrm6_state_sort_cmp, 6);
-}
-
-/*
- * Rule for xfrm_tmpl:
- *
- * rule 1: select IPsec transport
- * rule 2: select MIPv6 RO or inbound trigger
- * rule 3: select IPsec tunnel
- * rule 4: others
- */
-static int __xfrm6_tmpl_sort_cmp(void *p)
-{
-	struct xfrm_tmpl *v = p;
-	switch (v->mode) {
-	case XFRM_MODE_TRANSPORT:
-		return 1;
-#if IS_ENABLED(CONFIG_IPV6_MIP6)
-	case XFRM_MODE_ROUTEOPTIMIZATION:
-	case XFRM_MODE_IN_TRIGGER:
-		return 2;
-#endif
-	case XFRM_MODE_TUNNEL:
-	case XFRM_MODE_BEET:
-		return 3;
-	}
-	return 4;
-}
-
-static int
-__xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n)
-{
-	return __xfrm6_sort((void **)dst, (void **)src, n,
-			    __xfrm6_tmpl_sort_cmp, 5);
-}
-
 int xfrm6_extract_header(struct sk_buff *skb)
 {
 	struct ipv6hdr *iph = ipv6_hdr(skb);
@@ -171,12 +40,6 @@
 static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.family			= AF_INET6,
 	.proto			= IPPROTO_IPV6,
-	.eth_proto		= htons(ETH_P_IPV6),
-	.owner			= THIS_MODULE,
-	.init_tempsel		= __xfrm6_init_tempsel,
-	.init_temprop		= xfrm6_init_temprop,
-	.tmpl_sort		= __xfrm6_tmpl_sort,
-	.state_sort		= __xfrm6_state_sort,
 	.output			= xfrm6_output,
 	.output_finish		= xfrm6_output_finish,
 	.extract_input		= xfrm6_extract_input,

diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 4a46df8..e11bdb0 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c

@@ -1,24 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C)2003,2004 USAGI/WIDE Project
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
  * Authors	Mitsuru KANDA  <mk@linux-ipv6.org>
  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  *
  * Based on net/ipv4/xfrm4_tunnel.c
- *
  */
 #include <linux/module.h>
 #include <linux/xfrm.h>
@@ -144,6 +131,9 @@
 		index = __xfrm6_tunnel_spi_check(net, spi);
 		if (index >= 0)
 			goto alloc_spi;
+
+		if (spi == XFRM6_TUNNEL_SPI_MAX)
+			break;
 	}
 	for (spi = XFRM6_TUNNEL_SPI_MIN; spi < xfrm6_tn->spi; spi++) {
 		index = __xfrm6_tunnel_spi_check(net, spi);
@@ -341,8 +331,8 @@
 	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
 	unsigned int i;
 
-	xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
 	xfrm_flush_gc();
+	xfrm_state_flush(net, 0, false, true);
 
 	for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
 		WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i]));
@@ -399,6 +389,10 @@
 	xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6);
 	xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
 	unregister_pernet_subsys(&xfrm6_tunnel_net_ops);
+	/* Someone maybe has gotten the xfrm6_tunnel_spi.
+	 * So need to wait it.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(xfrm6_tunnel_spi_kmem);
 }
 

diff --git a/net/iucv/Kconfig b/net/iucv/Kconfig
index 497fbe7..5cfddc9 100644
--- a/net/iucv/Kconfig
+++ b/net/iucv/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config IUCV
 	depends on S390
 	def_tristate y if S390

diff --git a/net/iucv/Makefile b/net/iucv/Makefile
index 7bfdc85..984d7ff 100644
--- a/net/iucv/Makefile
+++ b/net/iucv/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for IUCV
 #

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index f024914..ebb62a4 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  IUCV protocol stack for Linux on zSeries
  *
@@ -13,6 +14,7 @@
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
 #include <linux/module.h>
+#include <linux/netdevice.h>
 #include <linux/types.h>
 #include <linux/list.h>
 #include <linux/errno.h>
@@ -48,7 +50,7 @@
 static const u8 iprm_shutdown[8] =
 	{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01};
 
-#define TRGCLS_SIZE	(sizeof(((struct iucv_message *)0)->class))
+#define TRGCLS_SIZE	FIELD_SIZEOF(struct iucv_message, class)
 
 #define __iucv_sock_wait(sk, condition, timeo, ret)			\
 do {									\
@@ -320,13 +322,9 @@
 	struct sk_buff *nskb;
 	int err, confirm_recv = 0;
 
-	memset(skb->head, 0, ETH_HLEN);
-	phs_hdr = skb_push(skb, sizeof(struct af_iucv_trans_hdr));
-	skb_reset_mac_header(skb);
+	phs_hdr = skb_push(skb, sizeof(*phs_hdr));
+	memset(phs_hdr, 0, sizeof(*phs_hdr));
 	skb_reset_network_header(skb);
-	skb_push(skb, ETH_HLEN);
-	skb_reset_mac_header(skb);
-	memset(phs_hdr, 0, sizeof(struct af_iucv_trans_hdr));
 
 	phs_hdr->magic = ETH_P_AF_IUCV;
 	phs_hdr->version = 1;
@@ -355,6 +353,9 @@
 		err = -ENODEV;
 		goto err_free;
 	}
+
+	dev_hard_header(skb, skb->dev, ETH_P_AF_IUCV, NULL, NULL, skb->len);
+
 	if (!(skb->dev->flags & IFF_UP) || !netif_carrier_ok(skb->dev)) {
 		err = -ENETDOWN;
 		goto err_free;
@@ -367,6 +368,8 @@
 		skb_trim(skb, skb->dev->mtu);
 	}
 	skb->protocol = cpu_to_be16(ETH_P_AF_IUCV);
+
+	__skb_header_release(skb);
 	nskb = skb_clone(skb, GFP_ATOMIC);
 	if (!nskb) {
 		err = -ENOMEM;
@@ -466,12 +469,14 @@
 /* Send controlling flags through an IUCV socket for HIPER transport */
 static int iucv_send_ctrl(struct sock *sk, u8 flags)
 {
+	struct iucv_sock *iucv = iucv_sk(sk);
 	int err = 0;
 	int blen;
 	struct sk_buff *skb;
 	u8 shutdown = 0;
 
-	blen = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN;
+	blen = sizeof(struct af_iucv_trans_hdr) +
+	       LL_RESERVED_SPACE(iucv->hs_dev);
 	if (sk->sk_shutdown & SEND_SHUTDOWN) {
 		/* controlling flags should be sent anyway */
 		shutdown = sk->sk_shutdown;
@@ -507,7 +512,9 @@
 			sk->sk_state = IUCV_DISCONN;
 			sk->sk_state_change(sk);
 		}
-	case IUCV_DISCONN:   /* fall through */
+		/* fall through */
+
+	case IUCV_DISCONN:
 		sk->sk_state = IUCV_CLOSING;
 		sk->sk_state_change(sk);
 
@@ -520,8 +527,9 @@
 					iucv_sock_in_state(sk, IUCV_CLOSED, 0),
 					timeo);
 		}
+		/* fall through */
 
-	case IUCV_CLOSING:   /* fall through */
+	case IUCV_CLOSING:
 		sk->sk_state = IUCV_CLOSED;
 		sk->sk_state_change(sk);
 
@@ -530,8 +538,9 @@
 
 		skb_queue_purge(&iucv->send_skb_q);
 		skb_queue_purge(&iucv->backlog_skb_q);
+		/* fall through */
 
-	default:   /* fall through */
+	default:
 		iucv_sever_path(sk, 1);
 	}
 
@@ -588,7 +597,6 @@
 
 	sk->sk_destruct = iucv_sock_destruct;
 	sk->sk_sndtimeo = IUCV_CONN_TIMEOUT;
-	sk->sk_allocation = GFP_DMA;
 
 	sock_reset_flag(sk, SOCK_ZAPPED);
 
@@ -782,6 +790,7 @@
 		memcpy(iucv->src_user_id, iucv_userid, 8);
 		sk->sk_state = IUCV_BOUND;
 		iucv->transport = AF_IUCV_TRANS_IUCV;
+		sk->sk_allocation |= GFP_DMA;
 		if (!iucv->msglimit)
 			iucv->msglimit = IUCV_QUEUELEN_DEFAULT;
 		goto done_unlock;
@@ -806,6 +815,8 @@
 		return -EPROTO;
 
 	memcpy(iucv->src_user_id, iucv_userid, 8);
+	iucv->transport = AF_IUCV_TRANS_IUCV;
+	sk->sk_allocation |= GFP_DMA;
 
 	write_lock_bh(&iucv_sk_list.lock);
 	__iucv_auto_name(iucv);
@@ -1131,7 +1142,8 @@
 	 * segmented records using the MSG_EOR flag), but
 	 * for SOCK_STREAM we might want to improve it in future */
 	if (iucv->transport == AF_IUCV_TRANS_HIPER) {
-		headroom = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN;
+		headroom = sizeof(struct af_iucv_trans_hdr) +
+			   LL_RESERVED_SPACE(iucv->hs_dev);
 		linear = len;
 	} else {
 		if (len < PAGE_SIZE) {
@@ -1781,6 +1793,8 @@
 
 	niucv = iucv_sk(nsk);
 	iucv_sock_init(nsk, sk);
+	niucv->transport = AF_IUCV_TRANS_IUCV;
+	nsk->sk_allocation |= GFP_DMA;
 
 	/* Set the new iucv_sock */
 	memcpy(niucv->dst_name, ipuser + 8, 8);
@@ -1874,30 +1888,26 @@
 	struct sock *sk = path->private;
 	struct sk_buff *this = NULL;
 	struct sk_buff_head *list = &iucv_sk(sk)->send_skb_q;
-	struct sk_buff *list_skb = list->next;
+	struct sk_buff *list_skb;
 	unsigned long flags;
 
 	bh_lock_sock(sk);
-	if (!skb_queue_empty(list)) {
-		spin_lock_irqsave(&list->lock, flags);
 
-		while (list_skb != (struct sk_buff *)list) {
-			if (msg->tag == IUCV_SKB_CB(list_skb)->tag) {
-				this = list_skb;
-				break;
-			}
-			list_skb = list_skb->next;
+	spin_lock_irqsave(&list->lock, flags);
+	skb_queue_walk(list, list_skb) {
+		if (msg->tag == IUCV_SKB_CB(list_skb)->tag) {
+			this = list_skb;
+			break;
 		}
-		if (this)
-			__skb_unlink(this, list);
+	}
+	if (this)
+		__skb_unlink(this, list);
+	spin_unlock_irqrestore(&list->lock, flags);
 
-		spin_unlock_irqrestore(&list->lock, flags);
-
-		if (this) {
-			kfree_skb(this);
-			/* wake up any process waiting for sending */
-			iucv_sock_wake_msglim(sk);
-		}
+	if (this) {
+		kfree_skb(this);
+		/* wake up any process waiting for sending */
+		iucv_sock_wake_msglim(sk);
 	}
 
 	if (sk->sk_state == IUCV_CLOSING) {
@@ -1943,8 +1953,7 @@
 /***************** HiperSockets transport callbacks ********************/
 static void afiucv_swap_src_dest(struct sk_buff *skb)
 {
-	struct af_iucv_trans_hdr *trans_hdr =
-				(struct af_iucv_trans_hdr *)skb->data;
+	struct af_iucv_trans_hdr *trans_hdr = iucv_trans_hdr(skb);
 	char tmpID[8];
 	char tmpName[8];
 
@@ -1967,13 +1976,12 @@
  **/
 static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb)
 {
+	struct af_iucv_trans_hdr *trans_hdr = iucv_trans_hdr(skb);
 	struct sock *nsk;
 	struct iucv_sock *iucv, *niucv;
-	struct af_iucv_trans_hdr *trans_hdr;
 	int err;
 
 	iucv = iucv_sk(sk);
-	trans_hdr = (struct af_iucv_trans_hdr *)skb->data;
 	if (!iucv) {
 		/* no sock - connection refused */
 		afiucv_swap_src_dest(skb);
@@ -2034,15 +2042,13 @@
 static int afiucv_hs_callback_synack(struct sock *sk, struct sk_buff *skb)
 {
 	struct iucv_sock *iucv = iucv_sk(sk);
-	struct af_iucv_trans_hdr *trans_hdr =
-					(struct af_iucv_trans_hdr *)skb->data;
 
 	if (!iucv)
 		goto out;
 	if (sk->sk_state != IUCV_BOUND)
 		goto out;
 	bh_lock_sock(sk);
-	iucv->msglimit_peer = trans_hdr->window;
+	iucv->msglimit_peer = iucv_trans_hdr(skb)->window;
 	sk->sk_state = IUCV_CONNECTED;
 	sk->sk_state_change(sk);
 	bh_unlock_sock(sk);
@@ -2098,8 +2104,6 @@
 static int afiucv_hs_callback_win(struct sock *sk, struct sk_buff *skb)
 {
 	struct iucv_sock *iucv = iucv_sk(sk);
-	struct af_iucv_trans_hdr *trans_hdr =
-					(struct af_iucv_trans_hdr *)skb->data;
 
 	if (!iucv)
 		return NET_RX_SUCCESS;
@@ -2107,7 +2111,7 @@
 	if (sk->sk_state != IUCV_CONNECTED)
 		return NET_RX_SUCCESS;
 
-	atomic_sub(trans_hdr->window, &iucv->msg_sent);
+	atomic_sub(iucv_trans_hdr(skb)->window, &iucv->msg_sent);
 	iucv_sock_wake_msglim(sk);
 	return NET_RX_SUCCESS;
 }
@@ -2170,22 +2174,13 @@
 	int err = NET_RX_SUCCESS;
 	char nullstring[8];
 
-	if (skb->len < (ETH_HLEN + sizeof(struct af_iucv_trans_hdr))) {
-		WARN_ONCE(1, "AF_IUCV too short skb, len=%d, min=%d",
-			  (int)skb->len,
-			  (int)(ETH_HLEN + sizeof(struct af_iucv_trans_hdr)));
+	if (!pskb_may_pull(skb, sizeof(*trans_hdr))) {
+		WARN_ONCE(1, "AF_IUCV failed to receive skb, len=%u", skb->len);
 		kfree_skb(skb);
 		return NET_RX_SUCCESS;
 	}
-	if (skb_headlen(skb) < (ETH_HLEN + sizeof(struct af_iucv_trans_hdr)))
-		if (skb_linearize(skb)) {
-			WARN_ONCE(1, "AF_IUCV skb_linearize failed, len=%d",
-				  (int)skb->len);
-			kfree_skb(skb);
-			return NET_RX_SUCCESS;
-		}
-	skb_pull(skb, ETH_HLEN);
-	trans_hdr = (struct af_iucv_trans_hdr *)skb->data;
+
+	trans_hdr = iucv_trans_hdr(skb);
 	EBCASC(trans_hdr->destAppName, sizeof(trans_hdr->destAppName));
 	EBCASC(trans_hdr->destUserID, sizeof(trans_hdr->destUserID));
 	EBCASC(trans_hdr->srcAppName, sizeof(trans_hdr->srcAppName));
@@ -2256,10 +2251,10 @@
 			kfree_skb(skb);
 			break;
 		}
-		/* fall through and receive non-zero length data */
+		/* fall through - and receive non-zero length data */
 	case (AF_IUCV_FLAG_SHT):
 		/* shutdown request */
-		/* fall through and receive zero length data */
+		/* fall through - and receive zero length data */
 	case 0:
 		/* plain data frame */
 		IUCV_SKB_CB(skb)->class = trans_hdr->iucv_hdr.class;
@@ -2300,11 +2295,7 @@
 
 	list = &iucv->send_skb_q;
 	spin_lock_irqsave(&list->lock, flags);
-	if (skb_queue_empty(list))
-		goto out_unlock;
-	list_skb = list->next;
-	nskb = list_skb->next;
-	while (list_skb != (struct sk_buff *)list) {
+	skb_queue_walk_safe(list, list_skb, nskb) {
 		if (skb_shinfo(list_skb) == skb_shinfo(skb)) {
 			switch (n) {
 			case TX_NOTIFY_OK:
@@ -2337,10 +2328,7 @@
 			}
 			break;
 		}
-		list_skb = nskb;
-		nskb = nskb->next;
 	}
-out_unlock:
 	spin_unlock_irqrestore(&list->lock, flags);
 
 	if (sk->sk_state == IUCV_CLOSING) {
@@ -2456,6 +2444,13 @@
 	return err;
 }
 
+static void afiucv_iucv_exit(void)
+{
+	device_unregister(af_iucv_dev);
+	driver_unregister(&af_iucv_driver);
+	pr_iucv->iucv_unregister(&af_iucv_handler, 0);
+}
+
 static int __init afiucv_init(void)
 {
 	int err;
@@ -2489,11 +2484,18 @@
 		err = afiucv_iucv_init();
 		if (err)
 			goto out_sock;
-	} else
-		register_netdevice_notifier(&afiucv_netdev_notifier);
+	}
+
+	err = register_netdevice_notifier(&afiucv_netdev_notifier);
+	if (err)
+		goto out_notifier;
+
 	dev_add_pack(&iucv_packet_type);
 	return 0;
 
+out_notifier:
+	if (pr_iucv)
+		afiucv_iucv_exit();
 out_sock:
 	sock_unregister(PF_IUCV);
 out_proto:
@@ -2507,12 +2509,11 @@
 static void __exit afiucv_exit(void)
 {
 	if (pr_iucv) {
-		device_unregister(af_iucv_dev);
-		driver_unregister(&af_iucv_driver);
-		pr_iucv->iucv_unregister(&af_iucv_handler, 0);
+		afiucv_iucv_exit();
 		symbol_put(iucv_if);
-	} else
-		unregister_netdevice_notifier(&afiucv_netdev_notifier);
+	}
+
+	unregister_netdevice_notifier(&afiucv_netdev_notifier);
 	dev_remove_pack(&iucv_packet_type);
 	sock_unregister(PF_IUCV);
 	proto_unregister(&iucv_proto);

diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index eb502c6..9a2d023 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * IUCV base infrastructure.
  *
@@ -17,20 +18,6 @@
  * Documentation used:
  *    The original source
  *    CP Programming Service, IBM document # SC24-5760
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define KMSG_COMPONENT "iucv"

diff --git a/net/kcm/Kconfig b/net/kcm/Kconfig
index 9ca83f2..bf7e970 100644
--- a/net/kcm/Kconfig
+++ b/net/kcm/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 
 config AF_KCM
 	tristate "KCM sockets"

diff --git a/net/kcm/Makefile b/net/kcm/Makefile
index 7125613..6c45692 100644
--- a/net/kcm/Makefile
+++ b/net/kcm/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_AF_KCM) += kcm.o
 
 kcm-y := kcmsock.o kcmproc.o

diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 571d824..ea9e734 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Kernel Connection Multiplexor
  *
  * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
  */
 
 #include <linux/bpf.h>
@@ -381,8 +378,12 @@
 {
 	struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
 	struct bpf_prog *prog = psock->bpf_prog;
+	int res;
 
-	return (*prog->bpf_func)(skb, prog->insnsi);
+	preempt_disable();
+	res = BPF_PROG_RUN(prog, skb);
+	preempt_enable();
+	return res;
 }
 
 static int kcm_read_sock_done(struct strparser *strp, int err)
@@ -638,15 +639,15 @@
 			frag_offset = 0;
 do_frag:
 			frag = &skb_shinfo(skb)->frags[fragidx];
-			if (WARN_ON(!frag->size)) {
+			if (WARN_ON(!skb_frag_size(frag))) {
 				ret = -EINVAL;
 				goto out;
 			}
 
 			ret = kernel_sendpage(psock->sk->sk_socket,
-					      frag->page.p,
-					      frag->page_offset + frag_offset,
-					      frag->size - frag_offset,
+					      skb_frag_page(frag),
+					      skb_frag_off(frag) + frag_offset,
+					      skb_frag_size(frag) - frag_offset,
 					      MSG_DONTWAIT);
 			if (ret <= 0) {
 				if (ret == -EAGAIN) {
@@ -681,7 +682,7 @@
 			sent += ret;
 			frag_offset += ret;
 			KCM_STATS_ADD(psock->stats.tx_bytes, ret);
-			if (frag_offset < frag->size) {
+			if (frag_offset < skb_frag_size(frag)) {
 				/* Not finished with this frag */
 				goto do_frag;
 			}
@@ -2036,13 +2037,13 @@
 
 	kcm_muxp = kmem_cache_create("kcm_mux_cache",
 				     sizeof(struct kcm_mux), 0,
-				     SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
+				     SLAB_HWCACHE_ALIGN, NULL);
 	if (!kcm_muxp)
 		goto fail;
 
 	kcm_psockp = kmem_cache_create("kcm_psock_cache",
 				       sizeof(struct kcm_psock), 0,
-					SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
+					SLAB_HWCACHE_ALIGN, NULL);
 	if (!kcm_psockp)
 		goto fail;
 
@@ -2054,14 +2055,14 @@
 	if (err)
 		goto fail;
 
-	err = sock_register(&kcm_family_ops);
-	if (err)
-		goto sock_register_fail;
-
 	err = register_pernet_device(&kcm_net_ops);
 	if (err)
 		goto net_ops_fail;
 
+	err = sock_register(&kcm_family_ops);
+	if (err)
+		goto sock_register_fail;
+
 	err = kcm_proc_init();
 	if (err)
 		goto proc_init_fail;
@@ -2069,12 +2070,12 @@
 	return 0;
 
 proc_init_fail:
-	unregister_pernet_device(&kcm_net_ops);
-
-net_ops_fail:
 	sock_unregister(PF_KCM);
 
 sock_register_fail:
+	unregister_pernet_device(&kcm_net_ops);
+
+net_ops_fail:
 	proto_unregister(&kcm_proto);
 
 fail:
@@ -2090,8 +2091,8 @@
 static void __exit kcm_exit(void)
 {
 	kcm_proc_exit();
-	unregister_pernet_device(&kcm_net_ops);
 	sock_unregister(PF_KCM);
+	unregister_pernet_device(&kcm_net_ops);
 	proto_unregister(&kcm_proto);
 	destroy_workqueue(kcm_wq);
 

diff --git a/net/key/Makefile b/net/key/Makefile
index 8576080..ed779c2 100644
--- a/net/key/Makefile
+++ b/net/key/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the key AF.
 #

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 9d61266..b67ed3a 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/key/af_key.c	An implementation of PF_KEYv2 sockets.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Maxim Giryaev	<gem@asplinux.ru>
  *		David S. Miller	<davem@redhat.com>
  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
@@ -196,30 +192,22 @@
 	return 0;
 }
 
-static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,
-			       gfp_t allocation, struct sock *sk)
+static int pfkey_broadcast_one(struct sk_buff *skb, gfp_t allocation,
+			       struct sock *sk)
 {
 	int err = -ENOBUFS;
 
-	sock_hold(sk);
-	if (*skb2 == NULL) {
-		if (refcount_read(&skb->users) != 1) {
-			*skb2 = skb_clone(skb, allocation);
-		} else {
-			*skb2 = skb;
-			refcount_inc(&skb->users);
-		}
+	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
+		return err;
+
+	skb = skb_clone(skb, allocation);
+
+	if (skb) {
+		skb_set_owner_r(skb, sk);
+		skb_queue_tail(&sk->sk_receive_queue, skb);
+		sk->sk_data_ready(sk);
+		err = 0;
 	}
-	if (*skb2 != NULL) {
-		if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
-			skb_set_owner_r(*skb2, sk);
-			skb_queue_tail(&sk->sk_receive_queue, *skb2);
-			sk->sk_data_ready(sk);
-			*skb2 = NULL;
-			err = 0;
-		}
-	}
-	sock_put(sk);
 	return err;
 }
 
@@ -234,7 +222,6 @@
 {
 	struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
 	struct sock *sk;
-	struct sk_buff *skb2 = NULL;
 	int err = -ESRCH;
 
 	/* XXX Do we need something like netlink_overrun?  I think
@@ -253,7 +240,7 @@
 		 * socket.
 		 */
 		if (pfk->promisc)
-			pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk);
+			pfkey_broadcast_one(skb, GFP_ATOMIC, sk);
 
 		/* the exact target will be processed later */
 		if (sk == one_sk)
@@ -268,7 +255,7 @@
 				continue;
 		}
 
-		err2 = pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk);
+		err2 = pfkey_broadcast_one(skb, GFP_ATOMIC, sk);
 
 		/* Error is cleared after successful sending to at least one
 		 * registered KM */
@@ -278,9 +265,8 @@
 	rcu_read_unlock();
 
 	if (one_sk != NULL)
-		err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk);
+		err = pfkey_broadcast_one(skb, allocation, one_sk);
 
-	kfree_skb(skb2);
 	kfree_skb(skb);
 	return err;
 }
@@ -942,8 +928,7 @@
 		pfkey_sockaddr_fill(&x->props.saddr, 0,
 				    (struct sockaddr *) (addr + 1),
 				    x->props.family);
-	if (!addr->sadb_address_prefixlen)
-		BUG();
+	BUG_ON(!addr->sadb_address_prefixlen);
 
 	/* dst address */
 	addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size);
@@ -958,8 +943,7 @@
 		pfkey_sockaddr_fill(&x->id.daddr, 0,
 				    (struct sockaddr *) (addr + 1),
 				    x->props.family);
-	if (!addr->sadb_address_prefixlen)
-		BUG();
+	BUG_ON(!addr->sadb_address_prefixlen);
 
 	if (!xfrm_addr_equal(&x->sel.saddr, &x->props.saddr,
 			     x->props.family)) {
@@ -1783,7 +1767,7 @@
 	if (proto == 0)
 		return -EINVAL;
 
-	err = xfrm_state_flush(net, proto, true);
+	err = xfrm_state_flush(net, proto, true, false);
 	err2 = unicast_flush_resp(sk, hdr);
 	if (err || err2) {
 		if (err == -ESRCH) /* empty table - go quietly */
@@ -1961,8 +1945,10 @@
 
 	if (rq->sadb_x_ipsecrequest_mode == 0)
 		return -EINVAL;
+	if (!xfrm_id_proto_valid(rq->sadb_x_ipsecrequest_proto))
+		return -EINVAL;
 
-	t->id.proto = rq->sadb_x_ipsecrequest_proto; /* XXX check proto */
+	t->id.proto = rq->sadb_x_ipsecrequest_proto;
 	if ((mode = pfkey_mode_to_xfrm(rq->sadb_x_ipsecrequest_mode)) < 0)
 		return -EINVAL;
 	t->mode = mode;
@@ -2020,7 +2006,7 @@
 
 static inline int pfkey_xfrm_policy2sec_ctx_size(const struct xfrm_policy *xp)
 {
-  struct xfrm_sec_ctx *xfrm_ctx = xp->security;
+	struct xfrm_sec_ctx *xfrm_ctx = xp->security;
 
 	if (xfrm_ctx) {
 		int len = sizeof(struct sadb_x_sec_ctx);
@@ -2450,8 +2436,10 @@
 		goto out;
 	}
 	err = pfkey_xfrm_policy2msg(out_skb, xp, dir);
-	if (err < 0)
+	if (err < 0) {
+		kfree_skb(out_skb);
 		goto out;
+	}
 
 	out_hdr = (struct sadb_msg *) out_skb->data;
 	out_hdr->sadb_msg_version = hdr->sadb_msg_version;
@@ -2702,8 +2690,10 @@
 		return PTR_ERR(out_skb);
 
 	err = pfkey_xfrm_policy2msg(out_skb, xp, dir);
-	if (err < 0)
+	if (err < 0) {
+		kfree_skb(out_skb);
 		return err;
+	}
 
 	out_hdr = (struct sadb_msg *) out_skb->data;
 	out_hdr->sadb_msg_version = pfk->dump.msg_version;

diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
index 378c73b..655e064 100644
--- a/net/l2tp/Kconfig
+++ b/net/l2tp/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Layer Two Tunneling Protocol (L2TP)
 #

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 26f1d43..f82ea12 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * L2TP core.
  *
@@ -12,10 +13,6 @@
  *		Michal Ostrowski <mostrows@speakeasy.net>
  *		Arnaldo Carvalho de Melo <acme@xconectiva.com.br>
  *		David S. Miller (davem@redhat.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -83,8 +80,7 @@
 #define L2TP_SLFLAG_S	   0x40000000
 #define L2TP_SL_SEQ_MASK   0x00ffffff
 
-#define L2TP_HDR_SIZE_SEQ		10
-#define L2TP_HDR_SIZE_NOSEQ		6
+#define L2TP_HDR_SIZE_MAX		14
 
 /* Default trace flags */
 #define L2TP_DEFAULT_DEBUG_FLAGS	0
@@ -170,8 +166,8 @@
 
 	rcu_read_lock_bh();
 	list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
-		if (tunnel->tunnel_id == tunnel_id) {
-			l2tp_tunnel_inc_refcount(tunnel);
+		if (tunnel->tunnel_id == tunnel_id &&
+		    refcount_inc_not_zero(&tunnel->ref_count)) {
 			rcu_read_unlock_bh();
 
 			return tunnel;
@@ -191,8 +187,8 @@
 
 	rcu_read_lock_bh();
 	list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
-		if (++count > nth) {
-			l2tp_tunnel_inc_refcount(tunnel);
+		if (++count > nth &&
+		    refcount_inc_not_zero(&tunnel->ref_count)) {
 			rcu_read_unlock_bh();
 			return tunnel;
 		}
@@ -808,7 +804,7 @@
 	__skb_pull(skb, sizeof(struct udphdr));
 
 	/* Short packet? */
-	if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) {
+	if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) {
 		l2tp_info(tunnel, L2TP_MSG_DATA,
 			  "%s: recv short packet (len=%d)\n",
 			  tunnel->name, skb->len);
@@ -884,6 +880,10 @@
 		goto error;
 	}
 
+	if (tunnel->version == L2TP_HDR_VER_3 &&
+	    l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
+		goto error;
+
 	l2tp_recv_common(session, skb, ptr, optr, hdrflags, length);
 	l2tp_session_dec_refcount(session);
 
@@ -906,7 +906,7 @@
 {
 	struct l2tp_tunnel *tunnel;
 
-	tunnel = l2tp_tunnel(sk);
+	tunnel = rcu_dereference_sk_user_data(sk);
 	if (tunnel == NULL)
 		goto pass_up;
 
@@ -1078,7 +1078,7 @@
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 			      IPSKB_REROUTED);
-	nf_reset(skb);
+	nf_reset_ct(skb);
 
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk)) {
@@ -1732,7 +1732,8 @@
 	}
 	rcu_read_unlock_bh();
 
-	flush_workqueue(l2tp_wq);
+	if (l2tp_wq)
+		flush_workqueue(l2tp_wq);
 	rcu_barrier();
 
 	for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)

diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 9c9afe9..2db3d50 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h

@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * L2TP internal definitions.
  *
  * Copyright (c) 2008,2009 Katalix Systems Ltd
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/refcount.h>
 
@@ -301,6 +298,26 @@
 }
 #endif
 
+static inline int l2tp_v3_ensure_opt_in_linear(struct l2tp_session *session, struct sk_buff *skb,
+					       unsigned char **ptr, unsigned char **optr)
+{
+	int opt_len = session->peer_cookie_len + l2tp_get_l2specific_len(session);
+
+	if (opt_len > 0) {
+		int off = *ptr - *optr;
+
+		if (!pskb_may_pull(skb, off + opt_len))
+			return -1;
+
+		if (skb->data != *optr) {
+			*optr = skb->data;
+			*ptr = skb->data + off;
+		}
+	}
+
+	return 0;
+}
+
 #define l2tp_printk(ptr, type, func, fmt, ...)				\
 do {									\
 	if (((ptr)->debug) & (type))					\

diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 9821a14..35bb4f3 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * L2TP subsystem debugfs
  *
  * Copyright (c) 2010 Katalix Systems Ltd
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -35,7 +31,6 @@
 #include "l2tp_core.h"
 
 static struct dentry *rootdir;
-static struct dentry *tunnels;
 
 struct l2tp_dfs_seq_data {
 	struct net *net;
@@ -330,32 +325,18 @@
 
 static int __init l2tp_debugfs_init(void)
 {
-	int rc = 0;
-
 	rootdir = debugfs_create_dir("l2tp", NULL);
-	if (IS_ERR(rootdir)) {
-		rc = PTR_ERR(rootdir);
-		rootdir = NULL;
-		goto out;
-	}
 
-	tunnels = debugfs_create_file("tunnels", 0600, rootdir, NULL, &l2tp_dfs_fops);
-	if (tunnels == NULL)
-		rc = -EIO;
+	debugfs_create_file("tunnels", 0600, rootdir, NULL, &l2tp_dfs_fops);
 
 	pr_info("L2TP debugfs support\n");
 
-out:
-	if (rc)
-		pr_warn("unable to init\n");
-
-	return rc;
+	return 0;
 }
 
 static void __exit l2tp_debugfs_exit(void)
 {
-	debugfs_remove(tunnels);
-	debugfs_remove(rootdir);
+	debugfs_remove_recursive(rootdir);
 }
 
 module_init(l2tp_debugfs_init);

diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 8aadc4f..d3b520b 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * L2TPv3 ethernet pseudowire driver
  *
  * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -60,7 +56,6 @@
 {
 	eth_hw_addr_random(dev);
 	eth_broadcast_addr(dev->broadcast);
-	netdev_lockdep_set_classes(dev);
 
 	return 0;
 }
@@ -155,7 +150,7 @@
 	skb->ip_summed = CHECKSUM_NONE;
 
 	skb_dst_drop(skb);
-	nf_reset(skb);
+	nf_reset_ct(skb);
 
 	rcu_read_lock();
 	dev = rcu_dereference(spriv->dev);

diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 35f6f86..0d7c887 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * L2TPv3 IP encapsulation support
  *
  * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -165,6 +161,9 @@
 		print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
 	}
 
+	if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
+		goto discard_sess;
+
 	l2tp_recv_common(session, skb, ptr, optr, 0, skb->len);
 	l2tp_session_dec_refcount(session);
 
@@ -194,7 +193,7 @@
 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_put;
 
-	nf_reset(skb);
+	nf_reset_ct(skb);
 
 	return sk_receive_skb(sk, skb, 1);
 
@@ -615,6 +614,7 @@
 	.getname	   = l2tp_ip_getname,
 	.poll		   = datagram_poll,
 	.ioctl		   = inet_ioctl,
+	.gettstamp	   = sock_gettstamp,
 	.listen		   = sock_no_listen,
 	.shutdown	   = inet_shutdown,
 	.setsockopt	   = sock_common_setsockopt,

diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 237f1a4..802f19a 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * L2TPv3 IP encapsulation support for IPv6
  *
  * Copyright (c) 2012 Katalix Systems Ltd
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -178,6 +174,9 @@
 		print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
 	}
 
+	if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
+		goto discard_sess;
+
 	l2tp_recv_common(session, skb, ptr, optr, 0, skb->len);
 	l2tp_session_dec_refcount(session);
 
@@ -207,7 +206,7 @@
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_put;
 
-	nf_reset(skb);
+	nf_reset_ct(skb);
 
 	return sk_receive_skb(sk, skb, 1);
 
@@ -537,7 +536,7 @@
 			fl6.flowlabel = lsa->l2tp_flowinfo & IPV6_FLOWINFO_MASK;
 			if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
 				flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
-				if (flowlabel == NULL)
+				if (IS_ERR(flowlabel))
 					return -EINVAL;
 			}
 		}
@@ -578,7 +577,7 @@
 		}
 		if ((fl6.flowlabel & IPV6_FLOWLABEL_MASK) && !flowlabel) {
 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
-			if (flowlabel == NULL)
+			if (IS_ERR(flowlabel))
 				return -EINVAL;
 		}
 		if (!(opt->opt_nflen|opt->opt_flen))
@@ -671,9 +670,6 @@
 	if (flags & MSG_OOB)
 		goto out;
 
-	if (addr_len)
-		*addr_len = sizeof(*lsa);
-
 	if (flags & MSG_ERRQUEUE)
 		return ipv6_recv_error(sk, msg, len, addr_len);
 
@@ -703,6 +699,7 @@
 		lsa->l2tp_conn_id = 0;
 		if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL)
 			lsa->l2tp_scope_id = inet6_iif(skb);
+		*addr_len = sizeof(*lsa);
 	}
 
 	if (np->rxopt.all)
@@ -751,6 +748,7 @@
 	.getname	   = l2tp_ip6_getname,
 	.poll		   = datagram_poll,
 	.ioctl		   = inet6_ioctl,
+	.gettstamp	   = sock_gettstamp,
 	.listen		   = sock_no_listen,
 	.shutdown	   = inet_shutdown,
 	.setsockopt	   = sock_common_setsockopt,

diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index edbd5d1..f5a9bdc 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * L2TP netlink layer, for management
  *
@@ -8,10 +9,6 @@
  * Copyright (c) 2007 Samuel Ortiz <samuel@sortiz.org>
  * which is in turn partly based on the wireless netlink code:
  * Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -345,7 +342,7 @@
 	    nla_put_u16(skb, L2TP_ATTR_ENCAP_TYPE, tunnel->encap))
 		goto nla_put_failure;
 
-	nest = nla_nest_start(skb, L2TP_ATTR_STATS);
+	nest = nla_nest_start_noflag(skb, L2TP_ATTR_STATS);
 	if (nest == NULL)
 		goto nla_put_failure;
 
@@ -742,7 +739,7 @@
 			   session->reorder_timeout, L2TP_ATTR_PAD)))
 		goto nla_put_failure;
 
-	nest = nla_nest_start(skb, L2TP_ATTR_STATS);
+	nest = nla_nest_start_noflag(skb, L2TP_ATTR_STATS);
 	if (nest == NULL)
 		goto nla_put_failure;
 
@@ -915,58 +912,58 @@
 static const struct genl_ops l2tp_nl_ops[] = {
 	{
 		.cmd = L2TP_CMD_NOOP,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = l2tp_nl_cmd_noop,
-		.policy = l2tp_nl_policy,
 		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = L2TP_CMD_TUNNEL_CREATE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = l2tp_nl_cmd_tunnel_create,
-		.policy = l2tp_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
 	{
 		.cmd = L2TP_CMD_TUNNEL_DELETE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = l2tp_nl_cmd_tunnel_delete,
-		.policy = l2tp_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
 	{
 		.cmd = L2TP_CMD_TUNNEL_MODIFY,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = l2tp_nl_cmd_tunnel_modify,
-		.policy = l2tp_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
 	{
 		.cmd = L2TP_CMD_TUNNEL_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = l2tp_nl_cmd_tunnel_get,
 		.dumpit = l2tp_nl_cmd_tunnel_dump,
-		.policy = l2tp_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
 	{
 		.cmd = L2TP_CMD_SESSION_CREATE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = l2tp_nl_cmd_session_create,
-		.policy = l2tp_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
 	{
 		.cmd = L2TP_CMD_SESSION_DELETE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = l2tp_nl_cmd_session_delete,
-		.policy = l2tp_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
 	{
 		.cmd = L2TP_CMD_SESSION_MODIFY,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = l2tp_nl_cmd_session_modify,
-		.policy = l2tp_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
 	{
 		.cmd = L2TP_CMD_SESSION_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = l2tp_nl_cmd_session_get,
 		.dumpit = l2tp_nl_cmd_session_dump,
-		.policy = l2tp_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
 };
@@ -976,6 +973,7 @@
 	.version	= L2TP_GENL_VERSION,
 	.hdrsize	= 0,
 	.maxattr	= L2TP_ATTR_MAX,
+	.policy = l2tp_nl_policy,
 	.netnsok	= true,
 	.module		= THIS_MODULE,
 	.ops		= l2tp_nl_ops,

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 04d9946..c54cb59 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*****************************************************************************
  * Linux PPP over L2TP (PPPoX/PPPoL2TP) Sockets
  *
@@ -11,11 +12,6 @@
  * Based on original work by Martijn van Oosterhout <kleptog@svana.org>
  *
  * License:
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  */
 
 /* This driver handles only L2TP data frames; control frames are handled by a
@@ -1070,7 +1066,6 @@
 {
 	struct pppol2tp_ioc_stats stats;
 	struct l2tp_session *session;
-	int val;
 
 	switch (cmd) {
 	case PPPIOCGMRU:
@@ -1097,7 +1092,7 @@
 		if (!session->session_id && !session->peer_session_id)
 			return -ENOSYS;
 
-		if (get_user(val, (int __user *)arg))
+		if (!access_ok((int __user *)arg, sizeof(int)))
 			return -EFAULT;
 		break;
 
@@ -1686,6 +1681,9 @@
 	.recvmsg	= pppol2tp_recvmsg,
 	.mmap		= sock_no_mmap,
 	.ioctl		= pppox_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = pppox_compat_ioctl,
+#endif
 };
 
 static const struct pppox_proto pppol2tp_proto = {

diff --git a/net/l3mdev/Kconfig b/net/l3mdev/Kconfig
index 5d47325..de186df 100644
--- a/net/l3mdev/Kconfig
+++ b/net/l3mdev/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Configuration for L3 master device support
 #

diff --git a/net/l3mdev/Makefile b/net/l3mdev/Makefile
index 84a53a6..59755a9 100644
--- a/net/l3mdev/Makefile
+++ b/net/l3mdev/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the L3 device API
 #

diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index 8da86ce..f35899d 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/l3mdev/l3mdev.c - L3 master device implementation
  * Copyright (c) 2015 Cumulus Networks
  * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/netdevice.h>
@@ -47,6 +43,24 @@
 EXPORT_SYMBOL_GPL(l3mdev_master_ifindex_rcu);
 
 /**
+ *	l3mdev_master_upper_ifindex_by_index - get index of upper l3 master
+ *					       device
+ *	@net: network namespace for device index lookup
+ *	@ifindex: targeted interface
+ */
+int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex)
+{
+	struct net_device *dev;
+
+	dev = dev_get_by_index_rcu(net, ifindex);
+	while (dev && !netif_is_l3_master(dev))
+		dev = netdev_master_upper_dev_get(dev);
+
+	return dev ? dev->ifindex : 0;
+}
+EXPORT_SYMBOL_GPL(l3mdev_master_upper_ifindex_by_index_rcu);
+
+/**
  *	l3mdev_fib_table - get FIB table id associated with an L3
  *                             master interface
  *	@dev: targeted interface
@@ -104,6 +118,8 @@
  *			     local and multicast addresses
  *	@net: network namespace for device index lookup
  *	@fl6: IPv6 flow struct for lookup
+ *	This function does not hold refcnt on the returned dst.
+ *	Caller must hold rcu_read_lock().
  */
 
 struct dst_entry *l3mdev_link_scope_lookup(struct net *net,
@@ -112,9 +128,8 @@
 	struct dst_entry *dst = NULL;
 	struct net_device *dev;
 
+	WARN_ON_ONCE(!rcu_read_lock_held());
 	if (fl6->flowi6_oif) {
-		rcu_read_lock();
-
 		dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
 		if (dev && netif_is_l3_slave(dev))
 			dev = netdev_master_upper_dev_get_rcu(dev);
@@ -122,8 +137,6 @@
 		if (dev && netif_is_l3_master(dev) &&
 		    dev->l3mdev_ops->l3mdev_link_scope_lookup)
 			dst = dev->l3mdev_ops->l3mdev_link_scope_lookup(dev, fl6);
-
-		rcu_read_unlock();
 	}
 
 	return dst;

diff --git a/net/lapb/Kconfig b/net/lapb/Kconfig
index 6481839..6acfc99 100644
--- a/net/lapb/Kconfig
+++ b/net/lapb/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # LAPB Data Link Drive
 #

diff --git a/net/lapb/Makefile b/net/lapb/Makefile
index fff797d..7be91b4 100644
--- a/net/lapb/Makefile
+++ b/net/lapb/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Linux LAPB layer.
 #

diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index db6e0af..3c03f65 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c

@@ -1,14 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	LAPB release 002
  *
  *	This code REQUIRES 2.1.15 or higher/ NET3.038
  *
- *	This module:
- *		This module is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  *	History
  *	LAPB 001	Jonathan Naylor	Started Coding
  *	LAPB 002	Jonathan Naylor	New timer architecture.
@@ -73,7 +68,6 @@
 		lapb_put(lapb);
 	}
 }
-EXPORT_SYMBOL(lapb_register);
 
 /*
  *	Add a socket to the bound sockets list.
@@ -120,7 +114,6 @@
 {
 	struct lapb_cb *lapb = kzalloc(sizeof(*lapb), GFP_ATOMIC);
 
-
 	if (!lapb)
 		goto out;
 
@@ -172,6 +165,7 @@
 	write_unlock_bh(&lapb_list_lock);
 	return rc;
 }
+EXPORT_SYMBOL(lapb_register);
 
 int lapb_unregister(struct net_device *dev)
 {
@@ -182,6 +176,7 @@
 	lapb = __lapb_devtostruct(dev);
 	if (!lapb)
 		goto out;
+	lapb_put(lapb);
 
 	lapb_stop_t1timer(lapb);
 	lapb_stop_t2timer(lapb);

diff --git a/net/lapb/lapb_in.c b/net/lapb/lapb_in.c
index d5d2110..38ae23c 100644
--- a/net/lapb/lapb_in.c
+++ b/net/lapb/lapb_in.c

@@ -1,14 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	LAPB release 002
  *
  *	This code REQUIRES 2.1.15 or higher/ NET3.038
  *
- *	This module:
- *		This module is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  *	History
  *	LAPB 001	Jonathan Naulor	Started Coding
  *	LAPB 002	Jonathan Naylor	New timer architecture.

diff --git a/net/lapb/lapb_out.c b/net/lapb/lapb_out.c
index eda726e..7a4d071 100644
--- a/net/lapb/lapb_out.c
+++ b/net/lapb/lapb_out.c

@@ -1,14 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	LAPB release 002
  *
  *	This code REQUIRES 2.1.15 or higher/ NET3.038
  *
- *	This module:
- *		This module is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  *	History
  *	LAPB 001	Jonathan Naylor	Started Coding
  *	LAPB 002	Jonathan Naylor	New timer architecture.

diff --git a/net/lapb/lapb_subr.c b/net/lapb/lapb_subr.c
index 75efde3..592a22d 100644
--- a/net/lapb/lapb_subr.c
+++ b/net/lapb/lapb_subr.c

@@ -1,14 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	LAPB release 002
  *
  *	This code REQUIRES 2.1.15 or higher/ NET3.038
  *
- *	This module:
- *		This module is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  *	History
  *	LAPB 001	Jonathan Naylor	Started Coding
  */

diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c
index 5d4ae01..8f5b170 100644
--- a/net/lapb/lapb_timer.c
+++ b/net/lapb/lapb_timer.c

@@ -1,14 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	LAPB release 002
  *
  *	This code REQUIRES 2.1.15 or higher/ NET3.038
  *
- *	This module:
- *		This module is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  *	History
  *	LAPB 001	Jonathan Naylor	Started Coding
  *	LAPB 002	Jonathan Naylor	New timer architecture.

diff --git a/net/llc/Kconfig b/net/llc/Kconfig
index 176a6c1..b0e646a 100644
--- a/net/llc/Kconfig
+++ b/net/llc/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config LLC
 	tristate
 	depends on NET

diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index b99e73a..c74f44d 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c

@@ -113,22 +113,26 @@
  *
  *	Send data via reliable llc2 connection.
  *	Returns 0 upon success, non-zero if action did not succeed.
+ *
+ *	This function always consumes a reference to the skb.
  */
 static int llc_ui_send_data(struct sock* sk, struct sk_buff *skb, int noblock)
 {
 	struct llc_sock* llc = llc_sk(sk);
-	int rc = 0;
 
 	if (unlikely(llc_data_accept_state(llc->state) ||
 		     llc->remote_busy_flag ||
 		     llc->p_flag)) {
 		long timeout = sock_sndtimeo(sk, noblock);
+		int rc;
 
 		rc = llc_ui_wait_for_busy_core(sk, timeout);
+		if (rc) {
+			kfree_skb(skb);
+			return rc;
+		}
 	}
-	if (unlikely(!rc))
-		rc = llc_build_and_send_pkt(sk, skb);
-	return rc;
+	return llc_build_and_send_pkt(sk, skb);
 }
 
 static void llc_ui_sk_init(struct socket *sock, struct sock *sk)
@@ -320,14 +324,13 @@
 	struct llc_sap *sap;
 	int rc = -EINVAL;
 
-	dprintk("%s: binding %02X\n", __func__, addr->sllc_sap);
-
 	lock_sock(sk);
 	if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr)))
 		goto out;
 	rc = -EAFNOSUPPORT;
 	if (unlikely(addr->sllc_family != AF_LLC))
 		goto out;
+	dprintk("%s: binding %02X\n", __func__, addr->sllc_sap);
 	rc = -ENODEV;
 	rcu_read_lock();
 	if (sk->sk_bound_dev_if) {
@@ -900,7 +903,7 @@
 	DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name);
 	int flags = msg->msg_flags;
 	int noblock = flags & MSG_DONTWAIT;
-	struct sk_buff *skb;
+	struct sk_buff *skb = NULL;
 	size_t size = 0;
 	int rc = -EINVAL, copied = 0, hdrlen;
 
@@ -909,10 +912,10 @@
 	lock_sock(sk);
 	if (addr) {
 		if (msg->msg_namelen < sizeof(*addr))
-			goto release;
+			goto out;
 	} else {
 		if (llc_ui_addr_null(&llc->addr))
-			goto release;
+			goto out;
 		addr = &llc->addr;
 	}
 	/* must bind connection to sap if user hasn't done it. */
@@ -920,7 +923,7 @@
 		/* bind to sap with null dev, exclusive. */
 		rc = llc_ui_autobind(sock, addr);
 		if (rc)
-			goto release;
+			goto out;
 	}
 	hdrlen = llc->dev->hard_header_len + llc_ui_header_len(sk, addr);
 	size = hdrlen + len;
@@ -929,12 +932,12 @@
 	copied = size - hdrlen;
 	rc = -EINVAL;
 	if (copied < 0)
-		goto release;
+		goto out;
 	release_sock(sk);
 	skb = sock_alloc_send_skb(sk, size, noblock, &rc);
 	lock_sock(sk);
 	if (!skb)
-		goto release;
+		goto out;
 	skb->dev      = llc->dev;
 	skb->protocol = llc_proto_type(addr->sllc_arphrd);
 	skb_reserve(skb, hdrlen);
@@ -944,29 +947,31 @@
 	if (sk->sk_type == SOCK_DGRAM || addr->sllc_ua) {
 		llc_build_and_send_ui_pkt(llc->sap, skb, addr->sllc_mac,
 					  addr->sllc_sap);
+		skb = NULL;
 		goto out;
 	}
 	if (addr->sllc_test) {
 		llc_build_and_send_test_pkt(llc->sap, skb, addr->sllc_mac,
 					    addr->sllc_sap);
+		skb = NULL;
 		goto out;
 	}
 	if (addr->sllc_xid) {
 		llc_build_and_send_xid_pkt(llc->sap, skb, addr->sllc_mac,
 					   addr->sllc_sap);
+		skb = NULL;
 		goto out;
 	}
 	rc = -ENOPROTOOPT;
 	if (!(sk->sk_type == SOCK_STREAM && !addr->sllc_ua))
 		goto out;
 	rc = llc_ui_send_data(sk, skb, noblock);
+	skb = NULL;
 out:
-	if (rc) {
-		kfree_skb(skb);
-release:
+	kfree_skb(skb);
+	if (rc)
 		dprintk("%s: failed sending from %02X to %02X: %d\n",
 			__func__, llc->laddr.lsap, llc->daddr.lsap, rc);
-	}
 	release_sock(sk);
 	return rc ? : copied;
 }

diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index 4d78375..647c055 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c

@@ -372,6 +372,7 @@
 	llc_pdu_init_as_i_cmd(skb, 1, llc->vS, llc->vR);
 	rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
 	if (likely(!rc)) {
+		skb_get(skb);
 		llc_conn_send_pdu(sk, skb);
 		llc_conn_ac_inc_vs_by_1(sk, skb);
 	}
@@ -389,7 +390,8 @@
 	llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR);
 	rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
 	if (likely(!rc)) {
-		rc = llc_conn_send_pdu(sk, skb);
+		skb_get(skb);
+		llc_conn_send_pdu(sk, skb);
 		llc_conn_ac_inc_vs_by_1(sk, skb);
 	}
 	return rc;
@@ -406,6 +408,7 @@
 	llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR);
 	rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
 	if (likely(!rc)) {
+		skb_get(skb);
 		llc_conn_send_pdu(sk, skb);
 		llc_conn_ac_inc_vs_by_1(sk, skb);
 	}
@@ -916,7 +919,8 @@
 	llc_pdu_init_as_i_cmd(skb, llc->ack_pf, llc->vS, llc->vR);
 	rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
 	if (likely(!rc)) {
-		rc = llc_conn_send_pdu(sk, skb);
+		skb_get(skb);
+		llc_conn_send_pdu(sk, skb);
 		llc_conn_ac_inc_vs_by_1(sk, skb);
 	}
 	return rc;

diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 4ff89cb..7b620ac 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c

@@ -30,7 +30,7 @@
 #endif
 
 static int llc_find_offset(int state, int ev_type);
-static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *skb);
+static void llc_conn_send_pdus(struct sock *sk);
 static int llc_conn_service(struct sock *sk, struct sk_buff *skb);
 static int llc_exec_conn_trans_actions(struct sock *sk,
 				       struct llc_conn_state_trans *trans,
@@ -55,6 +55,8 @@
  *	(executing it's actions and changing state), upper layer will be
  *	indicated or confirmed, if needed. Returns 0 for success, 1 for
  *	failure. The socket lock has to be held before calling this function.
+ *
+ *	This function always consumes a reference to the skb.
  */
 int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
 {
@@ -62,12 +64,6 @@
 	struct llc_sock *llc = llc_sk(skb->sk);
 	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
 
-	/*
-	 * We have to hold the skb, because llc_conn_service will kfree it in
-	 * the sending path and we need to look at the skb->cb, where we encode
-	 * llc_conn_state_ev.
-	 */
-	skb_get(skb);
 	ev->ind_prim = ev->cfm_prim = 0;
 	/*
 	 * Send event to state machine
@@ -75,21 +71,12 @@
 	rc = llc_conn_service(skb->sk, skb);
 	if (unlikely(rc != 0)) {
 		printk(KERN_ERR "%s: llc_conn_service failed\n", __func__);
-		goto out_kfree_skb;
-	}
-
-	if (unlikely(!ev->ind_prim && !ev->cfm_prim)) {
-		/* indicate or confirm not required */
-		if (!skb->next)
-			goto out_kfree_skb;
 		goto out_skb_put;
 	}
 
-	if (unlikely(ev->ind_prim && ev->cfm_prim)) /* Paranoia */
-		skb_get(skb);
-
 	switch (ev->ind_prim) {
 	case LLC_DATA_PRIM:
+		skb_get(skb);
 		llc_save_primitive(sk, skb, LLC_DATA_PRIM);
 		if (unlikely(sock_queue_rcv_skb(sk, skb))) {
 			/*
@@ -106,6 +93,7 @@
 		 * skb->sk pointing to the newly created struct sock in
 		 * llc_conn_handler. -acme
 		 */
+		skb_get(skb);
 		skb_queue_tail(&sk->sk_receive_queue, skb);
 		sk->sk_state_change(sk);
 		break;
@@ -121,7 +109,6 @@
 				sk->sk_state_change(sk);
 			}
 		}
-		kfree_skb(skb);
 		sock_put(sk);
 		break;
 	case LLC_RESET_PRIM:
@@ -130,14 +117,11 @@
 		 * RESET is not being notified to upper layers for now
 		 */
 		printk(KERN_INFO "%s: received a reset ind!\n", __func__);
-		kfree_skb(skb);
 		break;
 	default:
-		if (ev->ind_prim) {
+		if (ev->ind_prim)
 			printk(KERN_INFO "%s: received unknown %d prim!\n",
 				__func__, ev->ind_prim);
-			kfree_skb(skb);
-		}
 		/* No indication */
 		break;
 	}
@@ -179,25 +163,22 @@
 		printk(KERN_INFO "%s: received a reset conf!\n", __func__);
 		break;
 	default:
-		if (ev->cfm_prim) {
+		if (ev->cfm_prim)
 			printk(KERN_INFO "%s: received unknown %d prim!\n",
 					__func__, ev->cfm_prim);
-			break;
-		}
-		goto out_skb_put; /* No confirmation */
+		/* No confirmation */
+		break;
 	}
-out_kfree_skb:
-	kfree_skb(skb);
 out_skb_put:
 	kfree_skb(skb);
 	return rc;
 }
 
-int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb)
+void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb)
 {
 	/* queue PDU to send to MAC layer */
 	skb_queue_tail(&sk->sk_write_queue, skb);
-	return llc_conn_send_pdus(sk, skb);
+	llc_conn_send_pdus(sk);
 }
 
 /**
@@ -255,7 +236,7 @@
 	if (howmany_resend > 0)
 		llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO;
 	/* any PDUs to re-send are queued up; start sending to MAC */
-	llc_conn_send_pdus(sk, NULL);
+	llc_conn_send_pdus(sk);
 out:;
 }
 
@@ -296,7 +277,7 @@
 	if (howmany_resend > 0)
 		llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO;
 	/* any PDUs to re-send are queued up; start sending to MAC */
-	llc_conn_send_pdus(sk, NULL);
+	llc_conn_send_pdus(sk);
 out:;
 }
 
@@ -340,16 +321,12 @@
 /**
  *	llc_conn_send_pdus - Sends queued PDUs
  *	@sk: active connection
- *	@hold_skb: the skb held by caller, or NULL if does not care
  *
- *	Sends queued pdus to MAC layer for transmission. When @hold_skb is
- *	NULL, always return 0. Otherwise, return 0 if @hold_skb is sent
- *	successfully, or 1 for failure.
+ *	Sends queued pdus to MAC layer for transmission.
  */
-static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *hold_skb)
+static void llc_conn_send_pdus(struct sock *sk)
 {
 	struct sk_buff *skb;
-	int ret = 0;
 
 	while ((skb = skb_dequeue(&sk->sk_write_queue)) != NULL) {
 		struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
@@ -361,20 +338,10 @@
 			skb_queue_tail(&llc_sk(sk)->pdu_unack_q, skb);
 			if (!skb2)
 				break;
-			dev_queue_xmit(skb2);
-		} else {
-			bool is_target = skb == hold_skb;
-			int rc;
-
-			if (is_target)
-				skb_get(skb);
-			rc = dev_queue_xmit(skb);
-			if (is_target)
-				ret = rc;
+			skb = skb2;
 		}
+		dev_queue_xmit(skb);
 	}
-
-	return ret;
 }
 
 /**
@@ -846,7 +813,7 @@
 	else {
 		dprintk("%s: adding to backlog...\n", __func__);
 		llc_set_backlog_type(skb, LLC_PACKET);
-		if (sk_add_backlog(sk, skb, sk->sk_rcvbuf))
+		if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)))
 			goto drop_unlock;
 	}
 out:

diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 260b3dc..64d4bef 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c

@@ -127,9 +127,7 @@
 	list_del_rcu(&sap->node);
 	spin_unlock_bh(&llc_sap_list_lock);
 
-	synchronize_rcu();
-
-	kfree(sap);
+	kfree_rcu(sap, rcu);
 }
 
 static struct packet_type llc_packet_type __read_mostly = {

diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c
index 8db03c2..ad65477 100644
--- a/net/llc/llc_if.c
+++ b/net/llc/llc_if.c

@@ -38,6 +38,8 @@
  *	closed and -EBUSY when sending data is not permitted in this state or
  *	LLC has send an I pdu with p bit set to 1 and is waiting for it's
  *	response.
+ *
+ *	This function always consumes a reference to the skb.
  */
 int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb)
 {
@@ -46,20 +48,22 @@
 	struct llc_sock *llc = llc_sk(sk);
 
 	if (unlikely(llc->state == LLC_CONN_STATE_ADM))
-		goto out;
+		goto out_free;
 	rc = -EBUSY;
 	if (unlikely(llc_data_accept_state(llc->state) || /* data_conn_refuse */
 		     llc->p_flag)) {
 		llc->failed_data_req = 1;
-		goto out;
+		goto out_free;
 	}
 	ev = llc_conn_ev(skb);
 	ev->type      = LLC_CONN_EV_TYPE_PRIM;
 	ev->prim      = LLC_DATA_PRIM;
 	ev->prim_type = LLC_PRIM_TYPE_REQ;
 	skb->dev      = llc->dev;
-	rc = llc_conn_state_process(sk, skb);
-out:
+	return llc_conn_state_process(sk, skb);
+
+out_free:
+	kfree_skb(skb);
 	return rc;
 }
 

diff --git a/net/llc/llc_output.c b/net/llc/llc_output.c
index 94425e4..b9ad087 100644
--- a/net/llc/llc_output.c
+++ b/net/llc/llc_output.c

@@ -1,16 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * llc_output.c - LLC minimal output path
  *
  * Copyright (c) 1997 by Procom Technology, Inc.
  * 		 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- *
- * This program can be redistributed or modified under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation.
- * This program is distributed without any warranty or implied warranty
- * of merchantability or fitness for a particular purpose.
- *
- * See the GNU General Public License version 2 for more details.
  */
 
 #include <linux/if_arp.h>
@@ -72,6 +65,8 @@
 	rc = llc_mac_hdr_init(skb, skb->dev->dev_addr, dmac);
 	if (likely(!rc))
 		rc = dev_queue_xmit(skb);
+	else
+		kfree_skb(skb);
 	return rc;
 }
 

diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c
index a94bd56..7ae4cc6 100644
--- a/net/llc/llc_s_ac.c
+++ b/net/llc/llc_s_ac.c

@@ -58,8 +58,10 @@
 			    ev->daddr.lsap, LLC_PDU_CMD);
 	llc_pdu_init_as_ui_cmd(skb);
 	rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
-	if (likely(!rc))
+	if (likely(!rc)) {
+		skb_get(skb);
 		rc = dev_queue_xmit(skb);
+	}
 	return rc;
 }
 
@@ -81,8 +83,10 @@
 			    ev->daddr.lsap, LLC_PDU_CMD);
 	llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0);
 	rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
-	if (likely(!rc))
+	if (likely(!rc)) {
+		skb_get(skb);
 		rc = dev_queue_xmit(skb);
+	}
 	return rc;
 }
 
@@ -135,8 +139,10 @@
 			    ev->daddr.lsap, LLC_PDU_CMD);
 	llc_pdu_init_as_test_cmd(skb);
 	rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
-	if (likely(!rc))
+	if (likely(!rc)) {
+		skb_get(skb);
 		rc = dev_queue_xmit(skb);
+	}
 	return rc;
 }
 

diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index a7f7b8f..be41906 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c

@@ -197,29 +197,22 @@
  *	After executing actions of the event, upper layer will be indicated
  *	if needed(on receiving an UI frame). sk can be null for the
  *	datalink_proto case.
+ *
+ *	This function always consumes a reference to the skb.
  */
 static void llc_sap_state_process(struct llc_sap *sap, struct sk_buff *skb)
 {
 	struct llc_sap_state_ev *ev = llc_sap_ev(skb);
 
-	/*
-	 * We have to hold the skb, because llc_sap_next_state
-	 * will kfree it in the sending path and we need to
-	 * look at the skb->cb, where we encode llc_sap_state_ev.
-	 */
-	skb_get(skb);
 	ev->ind_cfm_flag = 0;
 	llc_sap_next_state(sap, skb);
-	if (ev->ind_cfm_flag == LLC_IND) {
-		if (skb->sk->sk_state == TCP_LISTEN)
-			kfree_skb(skb);
-		else {
-			llc_save_primitive(skb->sk, skb, ev->prim);
 
-			/* queue skb to the user. */
-			if (sock_queue_rcv_skb(skb->sk, skb))
-				kfree_skb(skb);
-		}
+	if (ev->ind_cfm_flag == LLC_IND && skb->sk->sk_state != TCP_LISTEN) {
+		llc_save_primitive(skb->sk, skb, ev->prim);
+
+		/* queue skb to the user. */
+		if (sock_queue_rcv_skb(skb->sk, skb) == 0)
+			return;
 	}
 	kfree_skb(skb);
 }

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 76e30f4..0c93b1b 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig

@@ -1,8 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config MAC80211
 	tristate "Generic IEEE 802.11 Networking Stack (mac80211)"
 	depends on CFG80211
 	select CRYPTO
-	select CRYPTO_ARC4
+	select CRYPTO_LIB_ARC4
 	select CRYPTO_AES
 	select CRYPTO_CCM
 	select CRYPTO_GCM
@@ -27,20 +28,6 @@
 	---help---
 	  This option enables the 'minstrel' TX rate control algorithm
 
-config MAC80211_RC_MINSTREL_HT
-	bool "Minstrel 802.11n support" if EXPERT
-	depends on MAC80211_RC_MINSTREL
-	default y
-	---help---
-	  This option enables the 'minstrel_ht' TX rate control algorithm
-
-config MAC80211_RC_MINSTREL_VHT
-	bool "Minstrel 802.11ac support" if EXPERT
-	depends on MAC80211_RC_MINSTREL_HT
-	default n
-	---help---
-	  This option enables VHT in the 'minstrel_ht' TX rate control algorithm
-
 choice
 	prompt "Default rate control algorithm"
 	depends on MAC80211_HAS_RC
@@ -62,8 +49,7 @@
 
 config MAC80211_RC_DEFAULT
 	string
-	default "minstrel_ht" if MAC80211_RC_DEFAULT_MINSTREL && MAC80211_RC_MINSTREL_HT
-	default "minstrel" if MAC80211_RC_DEFAULT_MINSTREL
+	default "minstrel_ht" if MAC80211_RC_DEFAULT_MINSTREL
 	default ""
 
 endif
@@ -72,14 +58,13 @@
 	depends on MAC80211 && MAC80211_HAS_RC=n
 
 config MAC80211_MESH
-	bool "Enable mac80211 mesh networking (pre-802.11s) support"
+	bool "Enable mac80211 mesh networking support"
 	depends on MAC80211
 	---help---
-	 This options enables support of Draft 802.11s mesh networking.
-	 The implementation is based on Draft 2.08 of the Mesh Networking
-	 amendment.  However, no compliance with that draft is claimed or even
-	 possible, as drafts leave a number of identifiers to be defined after
-	 ratification.  For more information visit http://o11s.org/.
+	  Select this option to enable 802.11 mesh operation in mac80211
+	  drivers that support it.  802.11 mesh connects multiple stations
+	  over (possibly multi-hop) wireless links to form a single logical
+	  LAN.
 
 config MAC80211_LEDS
 	bool "Enable LED triggers"

diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index bb70778..4f03ebe 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile

@@ -53,13 +53,14 @@
 
 CFLAGS_trace.o := -I$(src)
 
-rc80211_minstrel-y := rc80211_minstrel.o
-rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_debugfs.o
+rc80211_minstrel-y := \
+	rc80211_minstrel.o \
+	rc80211_minstrel_ht.o
 
-rc80211_minstrel_ht-y := rc80211_minstrel_ht.o
-rc80211_minstrel_ht-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_ht_debugfs.o
+rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += \
+	rc80211_minstrel_debugfs.o \
+	rc80211_minstrel_ht_debugfs.o
 
 mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y)
-mac80211-$(CONFIG_MAC80211_RC_MINSTREL_HT) += $(rc80211_minstrel_ht-y)
 
 ccflags-y += -DDEBUG

diff --git a/net/mac80211/aead_api.c b/net/mac80211/aead_api.c
index 160f9df..c5fe95e 100644
--- a/net/mac80211/aead_api.c
+++ b/net/mac80211/aead_api.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2003-2004, Instant802 Networks, Inc.
  * Copyright 2005-2006, Devicescape Software, Inc.
  * Copyright 2014-2015, Qualcomm Atheros, Inc.
  *
  * Rewrite: Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/kernel.h>

diff --git a/net/mac80211/aead_api.h b/net/mac80211/aead_api.h
index 5e39ea8..7d463b8 100644
--- a/net/mac80211/aead_api.h
+++ b/net/mac80211/aead_api.h

@@ -1,8 +1,4 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+/* SPDX-License-Identifier: GPL-2.0-only */
 
 #ifndef _AEAD_API_H
 #define _AEAD_API_H

diff --git a/net/mac80211/aes_ccm.h b/net/mac80211/aes_ccm.h
index e9b7ca0..9625619 100644
--- a/net/mac80211/aes_ccm.h
+++ b/net/mac80211/aes_ccm.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright 2003-2004, Instant802 Networks, Inc.
  * Copyright 2006, Devicescape Software, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef AES_CCM_H

diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c
index 2fb6558..57748ca 100644
--- a/net/mac80211/aes_cmac.c
+++ b/net/mac80211/aes_cmac.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * AES-128-CMAC with TLen 16 for IEEE 802.11w BIP
  * Copyright 2008, Jouni Malinen <j@w1.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/kernel.h>

diff --git a/net/mac80211/aes_cmac.h b/net/mac80211/aes_cmac.h
index fef531f..7681744 100644
--- a/net/mac80211/aes_cmac.h
+++ b/net/mac80211/aes_cmac.h

@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright 2008, Jouni Malinen <j@w1.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef AES_CMAC_H

diff --git a/net/mac80211/aes_gcm.h b/net/mac80211/aes_gcm.h
index d2b0960..b14093b 100644
--- a/net/mac80211/aes_gcm.h
+++ b/net/mac80211/aes_gcm.h

@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright 2014-2015, Qualcomm Atheros, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef AES_GCM_H

diff --git a/net/mac80211/aes_gmac.c b/net/mac80211/aes_gmac.c
index bd72a86..363ad1c 100644
--- a/net/mac80211/aes_gmac.c
+++ b/net/mac80211/aes_gmac.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * AES-GMAC for IEEE 802.11 BIP-GMAC-128 and BIP-GMAC-256
  * Copyright 2015, Qualcomm Atheros, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/kernel.h>

diff --git a/net/mac80211/aes_gmac.h b/net/mac80211/aes_gmac.h
index 32e6442..c739356 100644
--- a/net/mac80211/aes_gmac.h
+++ b/net/mac80211/aes_gmac.h

@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright 2015, Qualcomm Atheros, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef AES_GMAC_H

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 6a4f154..4d1c335 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * HT handling
  *
@@ -9,10 +10,6 @@
  * Copyright 2007-2010, Intel Corporation
  * Copyright(c) 2015-2017 Intel Deutschland GmbH
  * Copyright (C) 2018        Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 /**
@@ -181,17 +178,54 @@
 	rcu_read_unlock();
 }
 
-static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
-				      u8 dialog_token, u16 status, u16 policy,
-				      u16 buf_size, u16 timeout)
+static void ieee80211_add_addbaext(struct ieee80211_sub_if_data *sdata,
+				   struct sk_buff *skb,
+				   const struct ieee80211_addba_ext_ie *req)
 {
+	struct ieee80211_supported_band *sband;
+	struct ieee80211_addba_ext_ie *resp;
+	const struct ieee80211_sta_he_cap *he_cap;
+	u8 frag_level, cap_frag_level;
+	u8 *pos;
+
+	sband = ieee80211_get_sband(sdata);
+	if (!sband)
+		return;
+	he_cap = ieee80211_get_he_iftype_cap(sband, sdata->vif.type);
+	if (!he_cap)
+		return;
+
+	pos = skb_put_zero(skb, 2 + sizeof(struct ieee80211_addba_ext_ie));
+	*pos++ = WLAN_EID_ADDBA_EXT;
+	*pos++ = sizeof(struct ieee80211_addba_ext_ie);
+	resp = (struct ieee80211_addba_ext_ie *)pos;
+	resp->data = req->data & IEEE80211_ADDBA_EXT_NO_FRAG;
+
+	frag_level = u32_get_bits(req->data,
+				  IEEE80211_ADDBA_EXT_FRAG_LEVEL_MASK);
+	cap_frag_level = u32_get_bits(he_cap->he_cap_elem.mac_cap_info[0],
+				      IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_MASK);
+	if (frag_level > cap_frag_level)
+		frag_level = cap_frag_level;
+	resp->data |= u8_encode_bits(frag_level,
+				     IEEE80211_ADDBA_EXT_FRAG_LEVEL_MASK);
+}
+
+static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid,
+				      u8 dialog_token, u16 status, u16 policy,
+				      u16 buf_size, u16 timeout,
+				      const struct ieee80211_addba_ext_ie *addbaext)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
 	bool amsdu = ieee80211_hw_check(&local->hw, SUPPORTS_AMSDU_IN_AMPDU);
 	u16 capab;
 
-	skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
+	skb = dev_alloc_skb(sizeof(*mgmt) +
+		    2 + sizeof(struct ieee80211_addba_ext_ie) +
+		    local->hw.extra_tx_headroom);
 	if (!skb)
 		return;
 
@@ -225,13 +259,17 @@
 	mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout);
 	mgmt->u.action.u.addba_resp.status = cpu_to_le16(status);
 
+	if (sta->sta.he_cap.has_he && addbaext)
+		ieee80211_add_addbaext(sdata, skb, addbaext);
+
 	ieee80211_tx_skb(sdata, skb);
 }
 
 void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
 				      u8 dialog_token, u16 timeout,
 				      u16 start_seq_num, u16 ba_policy, u16 tid,
-				      u16 buf_size, bool tx, bool auto_seq)
+				      u16 buf_size, bool tx, bool auto_seq,
+				      const struct ieee80211_addba_ext_ie *addbaext)
 {
 	struct ieee80211_local *local = sta->sdata->local;
 	struct tid_ampdu_rx *tid_agg_rx;
@@ -413,21 +451,22 @@
 	}
 
 	if (tx)
-		ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid,
+		ieee80211_send_addba_resp(sta, sta->sta.addr, tid,
 					  dialog_token, status, 1, buf_size,
-					  timeout);
+					  timeout, addbaext);
 }
 
 static void __ieee80211_start_rx_ba_session(struct sta_info *sta,
 					    u8 dialog_token, u16 timeout,
 					    u16 start_seq_num, u16 ba_policy,
 					    u16 tid, u16 buf_size, bool tx,
-					    bool auto_seq)
+					    bool auto_seq,
+					    const struct ieee80211_addba_ext_ie *addbaext)
 {
 	mutex_lock(&sta->ampdu_mlme.mtx);
 	___ieee80211_start_rx_ba_session(sta, dialog_token, timeout,
 					 start_seq_num, ba_policy, tid,
-					 buf_size, tx, auto_seq);
+					 buf_size, tx, auto_seq, addbaext);
 	mutex_unlock(&sta->ampdu_mlme.mtx);
 }
 
@@ -437,7 +476,9 @@
 				     size_t len)
 {
 	u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num;
+	struct ieee802_11_elems elems = { 0 };
 	u8 dialog_token;
+	int ies_len;
 
 	/* extract session parameters from addba request frame */
 	dialog_token = mgmt->u.action.u.addba_req.dialog_token;
@@ -450,9 +491,19 @@
 	tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
 	buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
 
+	ies_len = len - offsetof(struct ieee80211_mgmt,
+				 u.action.u.addba_req.variable);
+	if (ies_len) {
+		ieee802_11_parse_elems(mgmt->u.action.u.addba_req.variable,
+                                ies_len, true, &elems, mgmt->bssid, NULL);
+		if (elems.parse_error)
+			return;
+	}
+
 	__ieee80211_start_rx_ba_session(sta, dialog_token, timeout,
 					start_seq_num, ba_policy, tid,
-					buf_size, true, false);
+					buf_size, true, false,
+					elems.addba_ext_ie);
 }
 
 void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 69e831b..b11883d 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * HT handling
  *
@@ -8,11 +9,7 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2007-2010, Intel Corporation
  * Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Copyright (C) 2018 - 2019 Intel Corporation
  */
 
 #include <linux/ieee80211.h>
@@ -229,7 +226,7 @@
 	clear_bit(IEEE80211_TXQ_STOP, &txqi->flags);
 	local_bh_disable();
 	rcu_read_lock();
-	drv_wake_tx_queue(sta->sdata->local, txqi);
+	schedule_and_wake_txq(sta->sdata->local, txqi);
 	rcu_read_unlock();
 	local_bh_enable();
 }
@@ -366,6 +363,8 @@
 
 	set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state);
 
+	ieee80211_agg_stop_txq(sta, tid);
+
 	spin_unlock_bh(&sta->lock);
 
 	ht_dbg(sta->sdata, "Tx BA session stop requested for %pM tid %u\n",

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 5d22eda..70739e7 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c

@@ -1,12 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * mac80211 configuration hooks for cfg80211
  *
  * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2015  Intel Mobile Communications GmbH
  * Copyright (C) 2015-2017 Intel Deutschland GmbH
+ * Copyright (C) 2018-2019 Intel Corporation
  * Copyright (C) 2018 Intel Corporation
- *
- * This file is GPLv2 as found in COPYING.
  */
 
 #include <linux/ieee80211.h>
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <linux/rcupdate.h>
+#include <linux/fips.h>
 #include <linux/if_ether.h>
 #include <net/cfg80211.h>
 #include "ieee80211_i.h"
@@ -158,12 +159,10 @@
 	if (ret)
 		return ret;
 
-	if (type == NL80211_IFTYPE_AP_VLAN &&
-	    params && params->use_4addr == 0) {
+	if (type == NL80211_IFTYPE_AP_VLAN && params->use_4addr == 0) {
 		RCU_INIT_POINTER(sdata->u.vlan.sta, NULL);
 		ieee80211_check_fast_rx_iface(sdata);
-	} else if (type == NL80211_IFTYPE_STATION &&
-		   params && params->use_4addr >= 0) {
+	} else if (type == NL80211_IFTYPE_STATION && params->use_4addr >= 0) {
 		sdata->u.mgd.use_4addr = params->use_4addr;
 	}
 
@@ -353,6 +352,36 @@
 	return 0;
 }
 
+static int ieee80211_set_tx(struct ieee80211_sub_if_data *sdata,
+			    const u8 *mac_addr, u8 key_idx)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_key *key;
+	struct sta_info *sta;
+	int ret = -EINVAL;
+
+	if (!wiphy_ext_feature_isset(local->hw.wiphy,
+				     NL80211_EXT_FEATURE_EXT_KEY_ID))
+		return -EINVAL;
+
+	sta = sta_info_get_bss(sdata, mac_addr);
+
+	if (!sta)
+		return -EINVAL;
+
+	if (sta->ptk_idx == key_idx)
+		return 0;
+
+	mutex_lock(&local->key_mtx);
+	key = key_mtx_dereference(local, sta->ptk[key_idx]);
+
+	if (key && key->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX)
+		ret = ieee80211_set_tx_key(key);
+
+	mutex_unlock(&local->key_mtx);
+	return ret;
+}
+
 static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 			     u8 key_idx, bool pairwise, const u8 *mac_addr,
 			     struct key_params *params)
@@ -367,14 +396,16 @@
 	if (!ieee80211_sdata_running(sdata))
 		return -ENETDOWN;
 
+	if (pairwise && params->mode == NL80211_KEY_SET_TX)
+		return ieee80211_set_tx(sdata, mac_addr, key_idx);
+
 	/* reject WEP and TKIP keys if WEP failed to initialize */
 	switch (params->cipher) {
 	case WLAN_CIPHER_SUITE_WEP40:
 	case WLAN_CIPHER_SUITE_TKIP:
 	case WLAN_CIPHER_SUITE_WEP104:
-		if (IS_ERR(local->wep_tx_tfm))
+		if (WARN_ON_ONCE(fips_enabled))
 			return -EINVAL;
-		break;
 	case WLAN_CIPHER_SUITE_CCMP:
 	case WLAN_CIPHER_SUITE_CCMP_256:
 	case WLAN_CIPHER_SUITE_AES_CMAC:
@@ -398,6 +429,9 @@
 	if (pairwise)
 		key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE;
 
+	if (params->mode == NL80211_KEY_NO_TX)
+		key->conf.flags |= IEEE80211_KEY_FLAG_NO_AUTO_TX;
+
 	mutex_lock(&local->sta_mtx);
 
 	if (mac_addr) {
@@ -792,6 +826,48 @@
 	return 0;
 }
 
+static int ieee80211_set_ftm_responder_params(
+				struct ieee80211_sub_if_data *sdata,
+				const u8 *lci, size_t lci_len,
+				const u8 *civicloc, size_t civicloc_len)
+{
+	struct ieee80211_ftm_responder_params *new, *old;
+	struct ieee80211_bss_conf *bss_conf;
+	u8 *pos;
+	int len;
+
+	if (!lci_len && !civicloc_len)
+		return 0;
+
+	bss_conf = &sdata->vif.bss_conf;
+	old = bss_conf->ftmr_params;
+	len = lci_len + civicloc_len;
+
+	new = kzalloc(sizeof(*new) + len, GFP_KERNEL);
+	if (!new)
+		return -ENOMEM;
+
+	pos = (u8 *)(new + 1);
+	if (lci_len) {
+		new->lci_len = lci_len;
+		new->lci = pos;
+		memcpy(pos, lci, lci_len);
+		pos += lci_len;
+	}
+
+	if (civicloc_len) {
+		new->civicloc_len = civicloc_len;
+		new->civicloc = pos;
+		memcpy(pos, civicloc, civicloc_len);
+		pos += civicloc_len;
+	}
+
+	bss_conf->ftmr_params = new;
+	kfree(old);
+
+	return 0;
+}
+
 static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
 				   struct cfg80211_beacon_data *params,
 				   const struct ieee80211_csa_settings *csa)
@@ -860,11 +936,29 @@
 
 	err = ieee80211_set_probe_resp(sdata, params->probe_resp,
 				       params->probe_resp_len, csa);
-	if (err < 0)
+	if (err < 0) {
+		kfree(new);
 		return err;
+	}
 	if (err == 0)
 		changed |= BSS_CHANGED_AP_PROBE_RESP;
 
+	if (params->ftm_responder != -1) {
+		sdata->vif.bss_conf.ftm_responder = params->ftm_responder;
+		err = ieee80211_set_ftm_responder_params(sdata,
+							 params->lci,
+							 params->lci_len,
+							 params->civicloc,
+							 params->civicloc_len);
+
+		if (err < 0) {
+			kfree(new);
+			return err;
+		}
+
+		changed |= BSS_CHANGED_FTM_RESPONDER;
+	}
+
 	rcu_assign_pointer(sdata->u.ap.beacon, new);
 
 	if (old)
@@ -885,8 +979,11 @@
 		      BSS_CHANGED_BEACON |
 		      BSS_CHANGED_SSID |
 		      BSS_CHANGED_P2P_PS |
-		      BSS_CHANGED_TXPOWER;
+		      BSS_CHANGED_TXPOWER |
+		      BSS_CHANGED_TWT |
+		      BSS_CHANGED_HE_OBSS_PD;
 	int err;
+	int prev_beacon_int;
 
 	old = sdata_dereference(sdata->u.ap.beacon, sdata);
 	if (old)
@@ -909,16 +1006,22 @@
 
 	sdata->needed_rx_chains = sdata->local->rx_chains;
 
+	prev_beacon_int = sdata->vif.bss_conf.beacon_int;
 	sdata->vif.bss_conf.beacon_int = params->beacon_interval;
 
+	if (params->he_cap)
+		sdata->vif.bss_conf.he_support = true;
+
 	mutex_lock(&local->mtx);
 	err = ieee80211_vif_use_channel(sdata, &params->chandef,
 					IEEE80211_CHANCTX_SHARED);
 	if (!err)
 		ieee80211_vif_copy_chanctx_to_vlans(sdata, false);
 	mutex_unlock(&local->mtx);
-	if (err)
+	if (err) {
+		sdata->vif.bss_conf.beacon_int = prev_beacon_int;
 		return err;
+	}
 
 	/*
 	 * Apply control port protocol, this allows us to
@@ -948,6 +1051,9 @@
 	sdata->vif.bss_conf.dtim_period = params->dtim_period;
 	sdata->vif.bss_conf.enable_beacon = true;
 	sdata->vif.bss_conf.allow_p2p_go_ps = sdata->vif.p2p;
+	sdata->vif.bss_conf.twt_responder = params->twt_responder;
+	memcpy(&sdata->vif.bss_conf.he_obss_pd, &params->he_obss_pd,
+	       sizeof(struct ieee80211_he_obss_pd));
 
 	sdata->vif.bss_conf.ssid_len = params->ssid_len;
 	if (params->ssid_len)
@@ -1062,6 +1168,9 @@
 		kfree_rcu(old_probe_resp, rcu_head);
 	sdata->u.ap.driver_smps_mode = IEEE80211_SMPS_OFF;
 
+	kfree(sdata->vif.bss_conf.ftmr_params);
+	sdata->vif.bss_conf.ftmr_params = NULL;
+
 	__sta_info_flush(sdata, true);
 	ieee80211_free_keys(sdata, true);
 
@@ -1092,50 +1201,6 @@
 	return 0;
 }
 
-/* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */
-struct iapp_layer2_update {
-	u8 da[ETH_ALEN];	/* broadcast */
-	u8 sa[ETH_ALEN];	/* STA addr */
-	__be16 len;		/* 6 */
-	u8 dsap;		/* 0 */
-	u8 ssap;		/* 0 */
-	u8 control;
-	u8 xid_info[3];
-} __packed;
-
-static void ieee80211_send_layer2_update(struct sta_info *sta)
-{
-	struct iapp_layer2_update *msg;
-	struct sk_buff *skb;
-
-	/* Send Level 2 Update Frame to update forwarding tables in layer 2
-	 * bridge devices */
-
-	skb = dev_alloc_skb(sizeof(*msg));
-	if (!skb)
-		return;
-	msg = skb_put(skb, sizeof(*msg));
-
-	/* 802.2 Type 1 Logical Link Control (LLC) Exchange Identifier (XID)
-	 * Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */
-
-	eth_broadcast_addr(msg->da);
-	memcpy(msg->sa, sta->sta.addr, ETH_ALEN);
-	msg->len = htons(6);
-	msg->dsap = 0;
-	msg->ssap = 0x01;	/* NULL LSAP, CR Bit: Response */
-	msg->control = 0xaf;	/* XID response lsb.1111F101.
-				 * F=0 (no poll command; unsolicited frame) */
-	msg->xid_info[0] = 0x81;	/* XID format identifier */
-	msg->xid_info[1] = 1;	/* LLC types/classes: Type 1 LLC */
-	msg->xid_info[2] = 0;	/* XID sender's receive window size (RW) */
-
-	skb->dev = sta->sdata->dev;
-	skb->protocol = eth_type_trans(skb, sta->sdata->dev);
-	memset(skb->cb, 0, sizeof(skb->cb));
-	netif_rx_ni(skb);
-}
-
 static int sta_apply_auth_flags(struct ieee80211_local *local,
 				struct sta_info *sta,
 				u32 mask, u32 set)
@@ -1215,6 +1280,11 @@
 			ieee80211_mps_sta_status_update(sta);
 			changed |= ieee80211_mps_set_sta_local_pm(sta,
 				      sdata->u.mesh.mshcfg.power_mode);
+
+			ewma_mesh_tx_rate_avg_init(&sta->mesh->tx_rate_avg);
+			/* init at low value */
+			ewma_mesh_tx_rate_avg_add(&sta->mesh->tx_rate_avg, 10);
+
 			break;
 		case NL80211_PLINK_LISTEN:
 		case NL80211_PLINK_BLOCKED:
@@ -1396,7 +1466,16 @@
 	if (params->listen_interval >= 0)
 		sta->listen_interval = params->listen_interval;
 
-	if (params->supported_rates) {
+	if (params->sta_modify_mask & STATION_PARAM_APPLY_STA_TXPOWER) {
+		sta->sta.txpwr.type = params->txpwr.type;
+		if (params->txpwr.type == NL80211_TX_POWER_LIMITED)
+			sta->sta.txpwr.power = params->txpwr.power;
+		ret = drv_sta_set_txpwr(local, sdata, sta);
+		if (ret)
+			return ret;
+	}
+
+	if (params->supported_rates && params->supported_rates_len) {
 		ieee80211_parse_bitrates(&sdata->vif.bss_conf.chandef,
 					 sband, params->supported_rates,
 					 params->supported_rates_len,
@@ -1431,6 +1510,9 @@
 	if (ieee80211_vif_is_mesh(&sdata->vif))
 		sta_apply_mesh_params(local, sta, params);
 
+	if (params->airtime_weight)
+		sta->airtime_weight = params->airtime_weight;
+
 	/* set the STA state after all sta info from usermode has been set */
 	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) ||
 	    set & BIT(NL80211_STA_FLAG_ASSOCIATED)) {
@@ -1450,7 +1532,6 @@
 	struct sta_info *sta;
 	struct ieee80211_sub_if_data *sdata;
 	int err;
-	int layer2_update;
 
 	if (params->vlan) {
 		sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
@@ -1464,7 +1545,12 @@
 	if (ether_addr_equal(mac, sdata->vif.addr))
 		return -EINVAL;
 
-	if (is_multicast_ether_addr(mac))
+	if (!is_valid_ether_addr(mac))
+		return -EINVAL;
+
+	if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER) &&
+	    sdata->vif.type == NL80211_IFTYPE_STATION &&
+	    !sdata->u.mgd.associated)
 		return -EINVAL;
 
 	sta = sta_info_alloc(sdata, mac, GFP_KERNEL);
@@ -1489,18 +1575,12 @@
 	    test_sta_flag(sta, WLAN_STA_ASSOC))
 		rate_control_rate_init(sta);
 
-	layer2_update = sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
-		sdata->vif.type == NL80211_IFTYPE_AP;
-
 	err = sta_info_insert_rcu(sta);
 	if (err) {
 		rcu_read_unlock();
 		return err;
 	}
 
-	if (layer2_update)
-		ieee80211_send_layer2_update(sta);
-
 	rcu_read_unlock();
 
 	return 0;
@@ -1598,10 +1678,11 @@
 		sta->sdata = vlansdata;
 		ieee80211_check_fast_xmit(sta);
 
-		if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
+		if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) {
 			ieee80211_vif_inc_num_mcast(sta->sdata);
-
-		ieee80211_send_layer2_update(sta);
+			cfg80211_send_layer2_update(sta->sdata->dev,
+						    sta->sta.addr);
+		}
 	}
 
 	err = sta_apply_parameters(local, sta, params);
@@ -1726,7 +1807,9 @@
 			MPATH_INFO_EXPTIME |
 			MPATH_INFO_DISCOVERY_TIMEOUT |
 			MPATH_INFO_DISCOVERY_RETRIES |
-			MPATH_INFO_FLAGS;
+			MPATH_INFO_FLAGS |
+			MPATH_INFO_HOP_COUNT |
+			MPATH_INFO_PATH_CHANGE;
 
 	pinfo->frame_qlen = mpath->frame_queue.qlen;
 	pinfo->sn = mpath->sn;
@@ -1746,6 +1829,8 @@
 		pinfo->flags |= NL80211_MPATH_FLAG_FIXED;
 	if (mpath->flags & MESH_PATH_RESOLVED)
 		pinfo->flags |= NL80211_MPATH_FLAG_RESOLVED;
+	pinfo->hop_count = mpath->hop_count;
+	pinfo->path_change_count = mpath->path_change_count;
 }
 
 static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev,
@@ -2012,6 +2097,9 @@
 			nconf->dot11MeshAwakeWindowDuration;
 	if (_chg_mesh_attr(NL80211_MESHCONF_PLINK_TIMEOUT, mask))
 		conf->plink_timeout = nconf->plink_timeout;
+	if (_chg_mesh_attr(NL80211_MESHCONF_CONNECTED_TO_GATE, mask))
+		conf->dot11MeshConnectedToMeshGate =
+			nconf->dot11MeshConnectedToMeshGate;
 	ieee80211_mbss_info_change_notify(sdata, BSS_CHANGED_BEACON);
 	return 0;
 }
@@ -2875,7 +2963,7 @@
 
 	len = beacon->head_len + beacon->tail_len + beacon->beacon_ies_len +
 	      beacon->proberesp_ies_len + beacon->assocresp_ies_len +
-	      beacon->probe_resp_len;
+	      beacon->probe_resp_len + beacon->lci_len + beacon->civicloc_len;
 
 	new_beacon = kzalloc(sizeof(*new_beacon) + len, GFP_KERNEL);
 	if (!new_beacon)
@@ -2919,6 +3007,21 @@
 		pos += beacon->probe_resp_len;
 	}
 
+	/* might copy -1, meaning no changes requested */
+	new_beacon->ftm_responder = beacon->ftm_responder;
+	if (beacon->lci) {
+		new_beacon->lci_len = beacon->lci_len;
+		new_beacon->lci = pos;
+		memcpy(pos, beacon->lci, beacon->lci_len);
+		pos += beacon->lci_len;
+	}
+	if (beacon->civicloc) {
+		new_beacon->civicloc_len = beacon->civicloc_len;
+		new_beacon->civicloc = pos;
+		memcpy(pos, beacon->civicloc, beacon->civicloc_len);
+		pos += beacon->civicloc_len;
+	}
+
 	return new_beacon;
 }
 
@@ -3808,6 +3911,37 @@
 	return ret;
 }
 
+static int
+ieee80211_get_ftm_responder_stats(struct wiphy *wiphy,
+				  struct net_device *dev,
+				  struct cfg80211_ftm_responder_stats *ftm_stats)
+{
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	return drv_get_ftm_responder_stats(local, sdata, ftm_stats);
+}
+
+static int
+ieee80211_start_pmsr(struct wiphy *wiphy, struct wireless_dev *dev,
+		     struct cfg80211_pmsr_request *request)
+{
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+	struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(dev);
+
+	return drv_start_pmsr(local, sdata, request);
+}
+
+static void
+ieee80211_abort_pmsr(struct wiphy *wiphy, struct wireless_dev *dev,
+		     struct cfg80211_pmsr_request *request)
+{
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+	struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(dev);
+
+	return drv_abort_pmsr(local, sdata, request);
+}
+
 const struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
@@ -3902,4 +4036,8 @@
 	.set_multicast_to_unicast = ieee80211_set_multicast_to_unicast,
 	.tx_control_port = ieee80211_tx_control_port,
 	.get_txq_stats = ieee80211_get_txq_stats,
+	.get_ftm_responder_stats = ieee80211_get_ftm_responder_stats,
+	.start_pmsr = ieee80211_start_pmsr,
+	.abort_pmsr = ieee80211_abort_pmsr,
+	.probe_mesh_link = ieee80211_probe_mesh_link,
 };

diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index d9558ff..9c94baa 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * mac80211 - channel management
  */

diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index b5adf36..568b3b2 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c

@@ -1,11 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * mac80211 debugfs for wireless PHYs
  *
  * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
- *
- * GPLv2
- *
+ * Copyright (C) 2018 - 2019 Intel Corporation
  */
 
 #include <linux/debugfs.h>
@@ -149,6 +148,58 @@
 	.llseek = default_llseek,
 };
 
+static ssize_t force_tx_status_read(struct file *file,
+				    char __user *user_buf,
+				    size_t count,
+				    loff_t *ppos)
+{
+	struct ieee80211_local *local = file->private_data;
+	char buf[3];
+	int len = 0;
+
+	len = scnprintf(buf, sizeof(buf), "%d\n", (int)local->force_tx_status);
+
+	return simple_read_from_buffer(user_buf, count, ppos,
+				       buf, len);
+}
+
+static ssize_t force_tx_status_write(struct file *file,
+				     const char __user *user_buf,
+				     size_t count,
+				     loff_t *ppos)
+{
+	struct ieee80211_local *local = file->private_data;
+	char buf[3];
+	size_t len;
+
+	if (count > sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(buf, user_buf, count))
+		return -EFAULT;
+
+	buf[sizeof(buf) - 1] = '\0';
+	len = strlen(buf);
+	if (len > 0 && buf[len - 1] == '\n')
+		buf[len - 1] = 0;
+
+	if (buf[0] == '0' && buf[1] == '\0')
+		local->force_tx_status = 0;
+	else if (buf[0] == '1' && buf[1] == '\0')
+		local->force_tx_status = 1;
+	else
+		return -EINVAL;
+
+	return count;
+}
+
+static const struct file_operations force_tx_status_ops = {
+	.write = force_tx_status_write,
+	.read = force_tx_status_read,
+	.open = simple_open,
+	.llseek = default_llseek,
+};
+
 #ifdef CONFIG_PM
 static ssize_t reset_write(struct file *file, const char __user *user_buf,
 			   size_t count, loff_t *ppos)
@@ -214,6 +265,13 @@
 	FLAG(SUPPORTS_TDLS_BUFFER_STA),
 	FLAG(DEAUTH_NEED_MGD_TX_PREP),
 	FLAG(DOESNT_SUPPORT_QOS_NDP),
+	FLAG(BUFF_MMPDU_TXQ),
+	FLAG(SUPPORTS_VHT_EXT_NSS_BW),
+	FLAG(STA_MMPDU_TXQ),
+	FLAG(TX_STATUS_NO_AMPDU_LEN),
+	FLAG(SUPPORTS_MULTI_BSSID),
+	FLAG(SUPPORTS_ONLY_HE_MULTI_BSSID),
+	FLAG(AMPDU_KEYBORDER_SUPPORT),
 #undef FLAG
 };
 
@@ -375,10 +433,14 @@
 	DEBUGFS_ADD(hwflags);
 	DEBUGFS_ADD(user_power);
 	DEBUGFS_ADD(power);
+	DEBUGFS_ADD_MODE(force_tx_status, 0600);
 
 	if (local->ops->wake_tx_queue)
 		DEBUGFS_ADD_MODE(aqm, 0600);
 
+	debugfs_create_u16("airtime_flags", 0600,
+			   phyd, &local->airtime_flags);
+
 	statsd = debugfs_create_dir("statistics", phyd);
 
 	/* if the dir failed, don't put all the other things into the root! */

diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index a2ef95f..7b8735c 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2003-2005	Devicescape Software, Inc.
  * Copyright (c) 2006	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
  * Copyright (C) 2015	Intel Deutschland GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/kobject.h>
@@ -342,9 +339,6 @@
 	key->debugfs.dir = debugfs_create_dir(buf,
 					key->local->debugfs.keys);
 
-	if (!key->debugfs.dir)
-		return;
-
 	sta = key->sta;
 	if (sta) {
 		sprintf(buf, "../../netdev:%s/stations/%pM",

diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index c813207..64b544a 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2006	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/kernel.h>
@@ -490,9 +487,14 @@
 	const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
 {
 	struct ieee80211_local *local = sdata->local;
-	struct txq_info *txqi = to_txq_info(sdata->vif.txq);
+	struct txq_info *txqi;
 	int len;
 
+	if (!sdata->vif.txq)
+		return 0;
+
+	txqi = to_txq_info(sdata->vif.txq);
+
 	spin_lock_bh(&local->fq.lock);
 	rcu_read_lock();
 
@@ -641,6 +643,8 @@
 IEEE80211_IF_FILE(power_mode, u.mesh.mshcfg.power_mode, DEC);
 IEEE80211_IF_FILE(dot11MeshAwakeWindowDuration,
 		  u.mesh.mshcfg.dot11MeshAwakeWindowDuration, DEC);
+IEEE80211_IF_FILE(dot11MeshConnectedToMeshGate,
+		  u.mesh.mshcfg.dot11MeshConnectedToMeshGate, DEC);
 #endif
 
 #define DEBUGFS_ADD_MODE(name, mode) \
@@ -659,7 +663,9 @@
 	DEBUGFS_ADD(rc_rateidx_vht_mcs_mask_5ghz);
 	DEBUGFS_ADD(hw_queues);
 
-	if (sdata->local->ops->wake_tx_queue)
+	if (sdata->local->ops->wake_tx_queue &&
+	    sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
+	    sdata->vif.type != NL80211_IFTYPE_NAN)
 		DEBUGFS_ADD(aqm);
 }
 
@@ -762,6 +768,7 @@
 	MESHPARAMS_ADD(dot11MeshHWMPconfirmationInterval);
 	MESHPARAMS_ADD(power_mode);
 	MESHPARAMS_ADD(dot11MeshAwakeWindowDuration);
+	MESHPARAMS_ADD(dot11MeshConnectedToMeshGate);
 #undef MESHPARAMS_ADD
 }
 #endif
@@ -815,9 +822,8 @@
 	sprintf(buf, "netdev:%s", sdata->name);
 	sdata->vif.debugfs_dir = debugfs_create_dir(buf,
 		sdata->local->hw.wiphy->debugfsdir);
-	if (sdata->vif.debugfs_dir)
-		sdata->debugfs.subdir_stations = debugfs_create_dir("stations",
-			sdata->vif.debugfs_dir);
+	sdata->debugfs.subdir_stations = debugfs_create_dir("stations",
+							sdata->vif.debugfs_dir);
 	add_files(sdata);
 }
 
@@ -838,12 +844,9 @@
 
 	dir = sdata->vif.debugfs_dir;
 
-	if (!dir)
+	if (IS_ERR_OR_NULL(dir))
 		return;
 
 	sprintf(buf, "netdev:%s", sdata->name);
-	if (!debugfs_rename(dir->d_parent, dir, dir->d_parent, buf))
-		sdata_err(sdata,
-			  "debugfs: failed to rename debugfs dir to %s\n",
-			  buf);
+	debugfs_rename(dir->d_parent, dir, dir->d_parent, buf);
 }

diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 4105081..c8ad20c 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c

@@ -1,13 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2003-2005	Devicescape Software, Inc.
  * Copyright (c) 2006	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright(c) 2016 Intel Deutschland GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Copyright (C) 2018 - 2019 Intel Corporation
  */
 
 #include <linux/debugfs.h>
@@ -140,7 +138,7 @@
 {
 	struct sta_info *sta = file->private_data;
 	struct ieee80211_local *local = sta->local;
-	size_t bufsz = AQM_TXQ_ENTRY_LEN*(IEEE80211_NUM_TIDS+1);
+	size_t bufsz = AQM_TXQ_ENTRY_LEN * (IEEE80211_NUM_TIDS + 2);
 	char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf;
 	struct txq_info *txqi;
 	ssize_t rv;
@@ -162,7 +160,9 @@
 		       bufsz+buf-p,
 		       "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets flags\n");
 
-	for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
+	for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
+		if (!sta->sta.txq[i])
+			continue;
 		txqi = to_txq_info(sta->sta.txq[i]);
 		p += scnprintf(p, bufsz+buf-p,
 			       "%d %d %u %u %u %u %u %u %u %u %u 0x%lx(%s%s%s)\n",
@@ -178,9 +178,9 @@
 			       txqi->tin.tx_bytes,
 			       txqi->tin.tx_packets,
 			       txqi->flags,
-			       txqi->flags & (1<<IEEE80211_TXQ_STOP) ? "STOP" : "RUN",
-			       txqi->flags & (1<<IEEE80211_TXQ_AMPDU) ? " AMPDU" : "",
-			       txqi->flags & (1<<IEEE80211_TXQ_NO_AMSDU) ? " NO-AMSDU" : "");
+			       test_bit(IEEE80211_TXQ_STOP, &txqi->flags) ? "STOP" : "RUN",
+			       test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags) ? " AMPDU" : "",
+			       test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags) ? " NO-AMSDU" : "");
 	}
 
 	rcu_read_unlock();
@@ -192,6 +192,64 @@
 }
 STA_OPS(aqm);
 
+static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
+				size_t count, loff_t *ppos)
+{
+	struct sta_info *sta = file->private_data;
+	struct ieee80211_local *local = sta->sdata->local;
+	size_t bufsz = 200;
+	char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf;
+	u64 rx_airtime = 0, tx_airtime = 0;
+	s64 deficit[IEEE80211_NUM_ACS];
+	ssize_t rv;
+	int ac;
+
+	if (!buf)
+		return -ENOMEM;
+
+	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+		spin_lock_bh(&local->active_txq_lock[ac]);
+		rx_airtime += sta->airtime[ac].rx_airtime;
+		tx_airtime += sta->airtime[ac].tx_airtime;
+		deficit[ac] = sta->airtime[ac].deficit;
+		spin_unlock_bh(&local->active_txq_lock[ac]);
+	}
+
+	p += scnprintf(p, bufsz + buf - p,
+		"RX: %llu us\nTX: %llu us\nWeight: %u\n"
+		"Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n",
+		rx_airtime,
+		tx_airtime,
+		sta->airtime_weight,
+		deficit[0],
+		deficit[1],
+		deficit[2],
+		deficit[3]);
+
+	rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+	kfree(buf);
+	return rv;
+}
+
+static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf,
+				 size_t count, loff_t *ppos)
+{
+	struct sta_info *sta = file->private_data;
+	struct ieee80211_local *local = sta->sdata->local;
+	int ac;
+
+	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+		spin_lock_bh(&local->active_txq_lock[ac]);
+		sta->airtime[ac].rx_airtime = 0;
+		sta->airtime[ac].tx_airtime = 0;
+		sta->airtime[ac].deficit = sta->airtime_weight;
+		spin_unlock_bh(&local->active_txq_lock[ac]);
+	}
+
+	return count;
+}
+STA_OPS_RW(airtime);
+
 static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 					size_t count, loff_t *ppos)
 {
@@ -487,12 +545,383 @@
 			p += scnprintf(p, sizeof(buf)+buf-p,
 				       "MCS TX highest: %d Mbps\n",
 				       le16_to_cpu(vhtc->vht_mcs.tx_highest));
+#undef PFLAG
 	}
 
 	return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
 }
 STA_OPS(vht_capa);
 
+static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf,
+				size_t count, loff_t *ppos)
+{
+	char *buf, *p;
+	size_t buf_sz = PAGE_SIZE;
+	struct sta_info *sta = file->private_data;
+	struct ieee80211_sta_he_cap *hec = &sta->sta.he_cap;
+	struct ieee80211_he_mcs_nss_supp *nss = &hec->he_mcs_nss_supp;
+	u8 ppe_size;
+	u8 *cap;
+	int i;
+	ssize_t ret;
+
+	buf = kmalloc(buf_sz, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	p = buf;
+
+	p += scnprintf(p, buf_sz + buf - p, "HE %ssupported\n",
+		       hec->has_he ? "" : "not ");
+	if (!hec->has_he)
+		goto out;
+
+	cap = hec->he_cap_elem.mac_cap_info;
+	p += scnprintf(p, buf_sz + buf - p,
+		       "MAC-CAP: %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x\n",
+		       cap[0], cap[1], cap[2], cap[3], cap[4], cap[5]);
+
+#define PRINT(fmt, ...)							\
+	p += scnprintf(p, buf_sz + buf - p, "\t\t" fmt "\n",		\
+		       ##__VA_ARGS__)
+
+#define PFLAG(t, n, a, b)						\
+	do {								\
+		if (cap[n] & IEEE80211_HE_##t##_CAP##n##_##a)		\
+			PRINT("%s", b);					\
+	} while (0)
+
+#define PFLAG_RANGE(t, i, n, s, m, off, fmt)				\
+	do {								\
+		u8 msk = IEEE80211_HE_##t##_CAP##i##_##n##_MASK;	\
+		u8 idx = ((cap[i] & msk) >> (ffs(msk) - 1)) + off;	\
+		PRINT(fmt, (s << idx) + (m * idx));			\
+	} while (0)
+
+#define PFLAG_RANGE_DEFAULT(t, i, n, s, m, off, fmt, a, b)		\
+	do {								\
+		if (cap[i] == IEEE80211_HE_##t ##_CAP##i##_##n##_##a) {	\
+			PRINT("%s", b);					\
+			break;						\
+		}							\
+		PFLAG_RANGE(t, i, n, s, m, off, fmt);			\
+	} while (0)
+
+	PFLAG(MAC, 0, HTC_HE, "HTC-HE");
+	PFLAG(MAC, 0, TWT_REQ, "TWT-REQ");
+	PFLAG(MAC, 0, TWT_RES, "TWT-RES");
+	PFLAG_RANGE_DEFAULT(MAC, 0, DYNAMIC_FRAG, 0, 1, 0,
+			    "DYNAMIC-FRAG-LEVEL-%d", NOT_SUPP, "NOT-SUPP");
+	PFLAG_RANGE_DEFAULT(MAC, 0, MAX_NUM_FRAG_MSDU, 1, 0, 0,
+			    "MAX-NUM-FRAG-MSDU-%d", UNLIMITED, "UNLIMITED");
+
+	PFLAG_RANGE_DEFAULT(MAC, 1, MIN_FRAG_SIZE, 128, 0, -1,
+			    "MIN-FRAG-SIZE-%d", UNLIMITED, "UNLIMITED");
+	PFLAG_RANGE_DEFAULT(MAC, 1, TF_MAC_PAD_DUR, 0, 8, 0,
+			    "TF-MAC-PAD-DUR-%dUS", MASK, "UNKNOWN");
+	PFLAG_RANGE(MAC, 1, MULTI_TID_AGG_RX_QOS, 0, 1, 1,
+		    "MULTI-TID-AGG-RX-QOS-%d");
+
+	if (cap[0] & IEEE80211_HE_MAC_CAP0_HTC_HE) {
+		switch (((cap[2] << 1) | (cap[1] >> 7)) & 0x3) {
+		case 0:
+			PRINT("LINK-ADAPTATION-NO-FEEDBACK");
+			break;
+		case 1:
+			PRINT("LINK-ADAPTATION-RESERVED");
+			break;
+		case 2:
+			PRINT("LINK-ADAPTATION-UNSOLICITED-FEEDBACK");
+			break;
+		case 3:
+			PRINT("LINK-ADAPTATION-BOTH");
+			break;
+		}
+	}
+
+	PFLAG(MAC, 2, ALL_ACK, "ALL-ACK");
+	PFLAG(MAC, 2, TRS, "TRS");
+	PFLAG(MAC, 2, BSR, "BSR");
+	PFLAG(MAC, 2, BCAST_TWT, "BCAST-TWT");
+	PFLAG(MAC, 2, 32BIT_BA_BITMAP, "32BIT-BA-BITMAP");
+	PFLAG(MAC, 2, MU_CASCADING, "MU-CASCADING");
+	PFLAG(MAC, 2, ACK_EN, "ACK-EN");
+
+	PFLAG(MAC, 3, OMI_CONTROL, "OMI-CONTROL");
+	PFLAG(MAC, 3, OFDMA_RA, "OFDMA-RA");
+
+	switch (cap[3] & IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_MASK) {
+	case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_USE_VHT:
+		PRINT("MAX-AMPDU-LEN-EXP-USE-VHT");
+		break;
+	case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_1:
+		PRINT("MAX-AMPDU-LEN-EXP-VHT-1");
+		break;
+	case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_2:
+		PRINT("MAX-AMPDU-LEN-EXP-VHT-2");
+		break;
+	case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_RESERVED:
+		PRINT("MAX-AMPDU-LEN-EXP-RESERVED");
+		break;
+	}
+
+	PFLAG(MAC, 3, AMSDU_FRAG, "AMSDU-FRAG");
+	PFLAG(MAC, 3, FLEX_TWT_SCHED, "FLEX-TWT-SCHED");
+	PFLAG(MAC, 3, RX_CTRL_FRAME_TO_MULTIBSS, "RX-CTRL-FRAME-TO-MULTIBSS");
+
+	PFLAG(MAC, 4, BSRP_BQRP_A_MPDU_AGG, "BSRP-BQRP-A-MPDU-AGG");
+	PFLAG(MAC, 4, QTP, "QTP");
+	PFLAG(MAC, 4, BQR, "BQR");
+	PFLAG(MAC, 4, SRP_RESP, "SRP-RESP");
+	PFLAG(MAC, 4, NDP_FB_REP, "NDP-FB-REP");
+	PFLAG(MAC, 4, OPS, "OPS");
+	PFLAG(MAC, 4, AMDSU_IN_AMPDU, "AMSDU-IN-AMPDU");
+
+	PRINT("MULTI-TID-AGG-TX-QOS-%d", ((cap[5] << 1) | (cap[4] >> 7)) & 0x7);
+
+	PFLAG(MAC, 5, SUBCHAN_SELECVITE_TRANSMISSION,
+	      "SUBCHAN-SELECVITE-TRANSMISSION");
+	PFLAG(MAC, 5, UL_2x996_TONE_RU, "UL-2x996-TONE-RU");
+	PFLAG(MAC, 5, OM_CTRL_UL_MU_DATA_DIS_RX, "OM-CTRL-UL-MU-DATA-DIS-RX");
+	PFLAG(MAC, 5, HE_DYNAMIC_SM_PS, "HE-DYNAMIC-SM-PS");
+	PFLAG(MAC, 5, PUNCTURED_SOUNDING, "PUNCTURED-SOUNDING");
+	PFLAG(MAC, 5, HT_VHT_TRIG_FRAME_RX, "HT-VHT-TRIG-FRAME-RX");
+
+	cap = hec->he_cap_elem.phy_cap_info;
+	p += scnprintf(p, buf_sz + buf - p,
+		       "PHY CAP: %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x\n",
+		       cap[0], cap[1], cap[2], cap[3], cap[4], cap[5], cap[6],
+		       cap[7], cap[8], cap[9], cap[10]);
+
+	PFLAG(PHY, 0, CHANNEL_WIDTH_SET_40MHZ_IN_2G,
+	      "CHANNEL-WIDTH-SET-40MHZ-IN-2G");
+	PFLAG(PHY, 0, CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G,
+	      "CHANNEL-WIDTH-SET-40MHZ-80MHZ-IN-5G");
+	PFLAG(PHY, 0, CHANNEL_WIDTH_SET_160MHZ_IN_5G,
+	      "CHANNEL-WIDTH-SET-160MHZ-IN-5G");
+	PFLAG(PHY, 0, CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G,
+	      "CHANNEL-WIDTH-SET-80PLUS80-MHZ-IN-5G");
+	PFLAG(PHY, 0, CHANNEL_WIDTH_SET_RU_MAPPING_IN_2G,
+	      "CHANNEL-WIDTH-SET-RU-MAPPING-IN-2G");
+	PFLAG(PHY, 0, CHANNEL_WIDTH_SET_RU_MAPPING_IN_5G,
+	      "CHANNEL-WIDTH-SET-RU-MAPPING-IN-5G");
+
+	switch (cap[1] & IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_MASK) {
+	case IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_80MHZ_ONLY_SECOND_20MHZ:
+		PRINT("PREAMBLE-PUNC-RX-80MHZ-ONLY-SECOND-20MHZ");
+		break;
+	case IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_80MHZ_ONLY_SECOND_40MHZ:
+		PRINT("PREAMBLE-PUNC-RX-80MHZ-ONLY-SECOND-40MHZ");
+		break;
+	case IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_160MHZ_ONLY_SECOND_20MHZ:
+		PRINT("PREAMBLE-PUNC-RX-160MHZ-ONLY-SECOND-20MHZ");
+		break;
+	case IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_160MHZ_ONLY_SECOND_40MHZ:
+		PRINT("PREAMBLE-PUNC-RX-160MHZ-ONLY-SECOND-40MHZ");
+		break;
+	}
+
+	PFLAG(PHY, 1, DEVICE_CLASS_A,
+	      "IEEE80211-HE-PHY-CAP1-DEVICE-CLASS-A");
+	PFLAG(PHY, 1, LDPC_CODING_IN_PAYLOAD,
+	      "LDPC-CODING-IN-PAYLOAD");
+	PFLAG(PHY, 1, HE_LTF_AND_GI_FOR_HE_PPDUS_0_8US,
+	      "HY-CAP1-HE-LTF-AND-GI-FOR-HE-PPDUS-0-8US");
+	PRINT("MIDAMBLE-RX-MAX-NSTS-%d", ((cap[2] << 1) | (cap[1] >> 7)) & 0x3);
+
+	PFLAG(PHY, 2, NDP_4x_LTF_AND_3_2US, "NDP-4X-LTF-AND-3-2US");
+	PFLAG(PHY, 2, STBC_TX_UNDER_80MHZ, "STBC-TX-UNDER-80MHZ");
+	PFLAG(PHY, 2, STBC_RX_UNDER_80MHZ, "STBC-RX-UNDER-80MHZ");
+	PFLAG(PHY, 2, DOPPLER_TX, "DOPPLER-TX");
+	PFLAG(PHY, 2, DOPPLER_RX, "DOPPLER-RX");
+	PFLAG(PHY, 2, UL_MU_FULL_MU_MIMO, "UL-MU-FULL-MU-MIMO");
+	PFLAG(PHY, 2, UL_MU_PARTIAL_MU_MIMO, "UL-MU-PARTIAL-MU-MIMO");
+
+	switch (cap[3] & IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_MASK) {
+	case IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_NO_DCM:
+		PRINT("DCM-MAX-CONST-TX-NO-DCM");
+		break;
+	case IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_BPSK:
+		PRINT("DCM-MAX-CONST-TX-BPSK");
+		break;
+	case IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_QPSK:
+		PRINT("DCM-MAX-CONST-TX-QPSK");
+		break;
+	case IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_16_QAM:
+		PRINT("DCM-MAX-CONST-TX-16-QAM");
+		break;
+	}
+
+	PFLAG(PHY, 3, DCM_MAX_TX_NSS_1, "DCM-MAX-TX-NSS-1");
+	PFLAG(PHY, 3, DCM_MAX_TX_NSS_2, "DCM-MAX-TX-NSS-2");
+
+	switch (cap[3] & IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_MASK) {
+	case IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_NO_DCM:
+		PRINT("DCM-MAX-CONST-RX-NO-DCM");
+		break;
+	case IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_BPSK:
+		PRINT("DCM-MAX-CONST-RX-BPSK");
+		break;
+	case IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_QPSK:
+		PRINT("DCM-MAX-CONST-RX-QPSK");
+		break;
+	case IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_16_QAM:
+		PRINT("DCM-MAX-CONST-RX-16-QAM");
+		break;
+	}
+
+	PFLAG(PHY, 3, DCM_MAX_RX_NSS_1, "DCM-MAX-RX-NSS-1");
+	PFLAG(PHY, 3, DCM_MAX_RX_NSS_2, "DCM-MAX-RX-NSS-2");
+	PFLAG(PHY, 3, RX_HE_MU_PPDU_FROM_NON_AP_STA,
+	      "RX-HE-MU-PPDU-FROM-NON-AP-STA");
+	PFLAG(PHY, 3, SU_BEAMFORMER, "SU-BEAMFORMER");
+
+	PFLAG(PHY, 4, SU_BEAMFORMEE, "SU-BEAMFORMEE");
+	PFLAG(PHY, 4, MU_BEAMFORMER, "MU-BEAMFORMER");
+
+	PFLAG_RANGE(PHY, 4, BEAMFORMEE_MAX_STS_UNDER_80MHZ, 0, 1, 4,
+		    "BEAMFORMEE-MAX-STS-UNDER-%d");
+	PFLAG_RANGE(PHY, 4, BEAMFORMEE_MAX_STS_ABOVE_80MHZ, 0, 1, 4,
+		    "BEAMFORMEE-MAX-STS-ABOVE-%d");
+
+	PFLAG_RANGE(PHY, 5, BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ, 0, 1, 1,
+		    "NUM-SND-DIM-UNDER-80MHZ-%d");
+	PFLAG_RANGE(PHY, 5, BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ, 0, 1, 1,
+		    "NUM-SND-DIM-ABOVE-80MHZ-%d");
+	PFLAG(PHY, 5, NG16_SU_FEEDBACK, "NG16-SU-FEEDBACK");
+	PFLAG(PHY, 5, NG16_MU_FEEDBACK, "NG16-MU-FEEDBACK");
+
+	PFLAG(PHY, 6, CODEBOOK_SIZE_42_SU, "CODEBOOK-SIZE-42-SU");
+	PFLAG(PHY, 6, CODEBOOK_SIZE_75_MU, "CODEBOOK-SIZE-75-MU");
+	PFLAG(PHY, 6, TRIG_SU_BEAMFORMER_FB, "TRIG-SU-BEAMFORMER-FB");
+	PFLAG(PHY, 6, TRIG_MU_BEAMFORMER_FB, "TRIG-MU-BEAMFORMER-FB");
+	PFLAG(PHY, 6, TRIG_CQI_FB, "TRIG-CQI-FB");
+	PFLAG(PHY, 6, PARTIAL_BW_EXT_RANGE, "PARTIAL-BW-EXT-RANGE");
+	PFLAG(PHY, 6, PARTIAL_BANDWIDTH_DL_MUMIMO,
+	      "PARTIAL-BANDWIDTH-DL-MUMIMO");
+	PFLAG(PHY, 6, PPE_THRESHOLD_PRESENT, "PPE-THRESHOLD-PRESENT");
+
+	PFLAG(PHY, 7, SRP_BASED_SR, "SRP-BASED-SR");
+	PFLAG(PHY, 7, POWER_BOOST_FACTOR_AR, "POWER-BOOST-FACTOR-AR");
+	PFLAG(PHY, 7, HE_SU_MU_PPDU_4XLTF_AND_08_US_GI,
+	      "HE-SU-MU-PPDU-4XLTF-AND-08-US-GI");
+	PFLAG_RANGE(PHY, 7, MAX_NC, 0, 1, 1, "MAX-NC-%d");
+	PFLAG(PHY, 7, STBC_TX_ABOVE_80MHZ, "STBC-TX-ABOVE-80MHZ");
+	PFLAG(PHY, 7, STBC_RX_ABOVE_80MHZ, "STBC-RX-ABOVE-80MHZ");
+
+	PFLAG(PHY, 8, HE_ER_SU_PPDU_4XLTF_AND_08_US_GI,
+	      "HE-ER-SU-PPDU-4XLTF-AND-08-US-GI");
+	PFLAG(PHY, 8, 20MHZ_IN_40MHZ_HE_PPDU_IN_2G,
+	      "20MHZ-IN-40MHZ-HE-PPDU-IN-2G");
+	PFLAG(PHY, 8, 20MHZ_IN_160MHZ_HE_PPDU, "20MHZ-IN-160MHZ-HE-PPDU");
+	PFLAG(PHY, 8, 80MHZ_IN_160MHZ_HE_PPDU, "80MHZ-IN-160MHZ-HE-PPDU");
+	PFLAG(PHY, 8, HE_ER_SU_1XLTF_AND_08_US_GI,
+	      "HE-ER-SU-1XLTF-AND-08-US-GI");
+	PFLAG(PHY, 8, MIDAMBLE_RX_TX_2X_AND_1XLTF,
+	      "MIDAMBLE-RX-TX-2X-AND-1XLTF");
+
+	switch (cap[8] & IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_MASK) {
+	case IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_242:
+		PRINT("DCM-MAX-RU-242");
+		break;
+	case IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_484:
+		PRINT("DCM-MAX-RU-484");
+		break;
+	case IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_996:
+		PRINT("DCM-MAX-RU-996");
+		break;
+	case IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_2x996:
+		PRINT("DCM-MAX-RU-2x996");
+		break;
+	}
+
+	PFLAG(PHY, 9, LONGER_THAN_16_SIGB_OFDM_SYM,
+	      "LONGER-THAN-16-SIGB-OFDM-SYM");
+	PFLAG(PHY, 9, NON_TRIGGERED_CQI_FEEDBACK,
+	      "NON-TRIGGERED-CQI-FEEDBACK");
+	PFLAG(PHY, 9, TX_1024_QAM_LESS_THAN_242_TONE_RU,
+	      "TX-1024-QAM-LESS-THAN-242-TONE-RU");
+	PFLAG(PHY, 9, RX_1024_QAM_LESS_THAN_242_TONE_RU,
+	      "RX-1024-QAM-LESS-THAN-242-TONE-RU");
+	PFLAG(PHY, 9, RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB,
+	      "RX-FULL-BW-SU-USING-MU-WITH-COMP-SIGB");
+	PFLAG(PHY, 9, RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB,
+	      "RX-FULL-BW-SU-USING-MU-WITH-NON-COMP-SIGB");
+
+	switch (cap[9] & IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_MASK) {
+	case IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_0US:
+		PRINT("NOMINAL-PACKET-PADDING-0US");
+		break;
+	case IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_8US:
+		PRINT("NOMINAL-PACKET-PADDING-8US");
+		break;
+	case IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_16US:
+		PRINT("NOMINAL-PACKET-PADDING-16US");
+		break;
+	}
+
+#undef PFLAG_RANGE_DEFAULT
+#undef PFLAG_RANGE
+#undef PFLAG
+
+#define PRINT_NSS_SUPP(f, n)						\
+	do {								\
+		int _i;							\
+		u16 v = le16_to_cpu(nss->f);				\
+		p += scnprintf(p, buf_sz + buf - p, n ": %#.4x\n", v);	\
+		for (_i = 0; _i < 8; _i += 2) {				\
+			switch ((v >> _i) & 0x3) {			\
+			case 0:						\
+				PRINT(n "-%d-SUPPORT-0-7", _i / 2);	\
+				break;					\
+			case 1:						\
+				PRINT(n "-%d-SUPPORT-0-9", _i / 2);	\
+				break;					\
+			case 2:						\
+				PRINT(n "-%d-SUPPORT-0-11", _i / 2);	\
+				break;					\
+			case 3:						\
+				PRINT(n "-%d-NOT-SUPPORTED", _i / 2);	\
+				break;					\
+			}						\
+		}							\
+	} while (0)
+
+	PRINT_NSS_SUPP(rx_mcs_80, "RX-MCS-80");
+	PRINT_NSS_SUPP(tx_mcs_80, "TX-MCS-80");
+
+	if (cap[0] & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G) {
+		PRINT_NSS_SUPP(rx_mcs_160, "RX-MCS-160");
+		PRINT_NSS_SUPP(tx_mcs_160, "TX-MCS-160");
+	}
+
+	if (cap[0] &
+	    IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G) {
+		PRINT_NSS_SUPP(rx_mcs_80p80, "RX-MCS-80P80");
+		PRINT_NSS_SUPP(tx_mcs_80p80, "TX-MCS-80P80");
+	}
+
+#undef PRINT_NSS_SUPP
+#undef PRINT
+
+	if (!(cap[6] & IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT))
+		goto out;
+
+	p += scnprintf(p, buf_sz + buf - p, "PPE-THRESHOLDS: %#.2x",
+		       hec->ppe_thres[0]);
+
+	ppe_size = ieee80211_he_ppe_size(hec->ppe_thres[0], cap);
+	for (i = 1; i < ppe_size; i++) {
+		p += scnprintf(p, buf_sz + buf - p, " %#.2x",
+			       hec->ppe_thres[i]);
+	}
+	p += scnprintf(p, buf_sz + buf - p, "\n");
+
+out:
+	ret = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+	kfree(buf);
+	return ret;
+}
+STA_OPS(he_capa);
 
 #define DEBUGFS_ADD(name) \
 	debugfs_create_file(#name, 0400, \
@@ -528,8 +957,6 @@
 	 * dir might still be around.
 	 */
 	sta->debugfs_dir = debugfs_create_dir(mac, stations_dir);
-	if (!sta->debugfs_dir)
-		return;
 
 	DEBUGFS_ADD(flags);
 	DEBUGFS_ADD(aid);
@@ -538,6 +965,7 @@
 	DEBUGFS_ADD(agg_status);
 	DEBUGFS_ADD(ht_capa);
 	DEBUGFS_ADD(vht_capa);
+	DEBUGFS_ADD(he_capa);
 
 	DEBUGFS_ADD_COUNTER(rx_duplicates, rx_stats.num_duplicates);
 	DEBUGFS_ADD_COUNTER(rx_fragments, rx_stats.fragments);
@@ -546,6 +974,10 @@
 	if (local->ops->wake_tx_queue)
 		DEBUGFS_ADD(aqm);
 
+	if (wiphy_ext_feature_isset(local->hw.wiphy,
+				    NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
+		DEBUGFS_ADD(airtime);
+
 	if (sizeof(sta->driver_buffered_tids) == sizeof(u32))
 		debugfs_create_x32("driver_buffered_tids", 0400,
 				   sta->debugfs_dir,

diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index bb886e7..c9a8a24 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2015 Intel Deutschland GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <net/mac80211.h>
 #include "ieee80211_i.h"
@@ -138,6 +135,27 @@
 	return ret;
 }
 
+__must_check
+int drv_sta_set_txpwr(struct ieee80211_local *local,
+		      struct ieee80211_sub_if_data *sdata,
+		      struct sta_info *sta)
+{
+	int ret = -EOPNOTSUPP;
+
+	might_sleep();
+
+	sdata = get_bss_sdata(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
+
+	trace_drv_sta_set_txpwr(local, sdata, &sta->sta);
+	if (local->ops->sta_set_txpwr)
+		ret = local->ops->sta_set_txpwr(&local->hw, &sdata->vif,
+						&sta->sta);
+	trace_drv_return_int(local, ret);
+	return ret;
+}
+
 void drv_sta_rc_update(struct ieee80211_local *local,
 		       struct ieee80211_sub_if_data *sdata,
 		       struct ieee80211_sta *sta, u32 changed)
@@ -169,11 +187,16 @@
 	if (!check_sdata_in_driver(sdata))
 		return -EIO;
 
-	if (WARN_ONCE(params->cw_min == 0 ||
-		      params->cw_min > params->cw_max,
-		      "%s: invalid CW_min/CW_max: %d/%d\n",
-		      sdata->name, params->cw_min, params->cw_max))
+	if (params->cw_min == 0 || params->cw_min > params->cw_max) {
+		/*
+		 * If we can't configure hardware anyway, don't warn. We may
+		 * never have initialized the CW parameters.
+		 */
+		WARN_ONCE(local->ops->conf_tx,
+			  "%s: invalid CW_min/CW_max: %d/%d\n",
+			  sdata->name, params->cw_min, params->cw_max);
 		return -EINVAL;
+	}
 
 	trace_drv_conf_tx(local, sdata, ac, params);
 	if (local->ops->conf_tx)

diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 8f69980..2c9b3eb 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h

@@ -2,7 +2,7 @@
 /*
 * Portions of this file
 * Copyright(c) 2016 Intel Deutschland GmbH
-* Copyright (C) 2018 Intel Corporation
+* Copyright (C) 2018 - 2019 Intel Corporation
 */
 
 #ifndef __MAC80211_DRIVER_OPS
@@ -529,6 +529,11 @@
 		  enum ieee80211_sta_state old_state,
 		  enum ieee80211_sta_state new_state);
 
+__must_check
+int drv_sta_set_txpwr(struct ieee80211_local *local,
+		      struct ieee80211_sub_if_data *sdata,
+		      struct sta_info *sta);
+
 void drv_sta_rc_update(struct ieee80211_local *local,
 		       struct ieee80211_sub_if_data *sdata,
 		       struct ieee80211_sta *sta, u32 changed);
@@ -687,14 +692,16 @@
 	return ret;
 }
 
-static inline int drv_cancel_remain_on_channel(struct ieee80211_local *local)
+static inline int
+drv_cancel_remain_on_channel(struct ieee80211_local *local,
+			     struct ieee80211_sub_if_data *sdata)
 {
 	int ret;
 
 	might_sleep();
 
-	trace_drv_cancel_remain_on_channel(local);
-	ret = local->ops->cancel_remain_on_channel(&local->hw);
+	trace_drv_cancel_remain_on_channel(local, sdata);
+	ret = local->ops->cancel_remain_on_channel(&local->hw, &sdata->vif);
 	trace_drv_return_int(local, ret);
 
 	return ret;
@@ -1052,6 +1059,35 @@
 	return ret;
 }
 
+static inline void
+drv_abort_channel_switch(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_local *local = sdata->local;
+
+	if (!check_sdata_in_driver(sdata))
+		return;
+
+	trace_drv_abort_channel_switch(local, sdata);
+
+	if (local->ops->abort_channel_switch)
+		local->ops->abort_channel_switch(&local->hw, &sdata->vif);
+}
+
+static inline void
+drv_channel_switch_rx_beacon(struct ieee80211_sub_if_data *sdata,
+			     struct ieee80211_channel_switch *ch_switch)
+{
+	struct ieee80211_local *local = sdata->local;
+
+	if (!check_sdata_in_driver(sdata))
+		return;
+
+	trace_drv_channel_switch_rx_beacon(local, sdata, ch_switch);
+	if (local->ops->channel_switch_rx_beacon)
+		local->ops->channel_switch_rx_beacon(&local->hw, &sdata->vif,
+						     ch_switch);
+}
+
 static inline int drv_join_ibss(struct ieee80211_local *local,
 				struct ieee80211_sub_if_data *sdata)
 {
@@ -1166,6 +1202,9 @@
 {
 	struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->txq.vif);
 
+	if (local->in_reconfig)
+		return;
+
 	if (!check_sdata_in_driver(sdata))
 		return;
 
@@ -1173,6 +1212,73 @@
 	local->ops->wake_tx_queue(&local->hw, &txq->txq);
 }
 
+static inline void schedule_and_wake_txq(struct ieee80211_local *local,
+					 struct txq_info *txqi)
+{
+	ieee80211_schedule_txq(&local->hw, &txqi->txq);
+	drv_wake_tx_queue(local, txqi);
+}
+
+static inline int drv_can_aggregate_in_amsdu(struct ieee80211_local *local,
+					     struct sk_buff *head,
+					     struct sk_buff *skb)
+{
+	if (!local->ops->can_aggregate_in_amsdu)
+		return true;
+
+	return local->ops->can_aggregate_in_amsdu(&local->hw, head, skb);
+}
+
+static inline int
+drv_get_ftm_responder_stats(struct ieee80211_local *local,
+			    struct ieee80211_sub_if_data *sdata,
+			    struct cfg80211_ftm_responder_stats *ftm_stats)
+{
+	u32 ret = -EOPNOTSUPP;
+
+	if (local->ops->get_ftm_responder_stats)
+		ret = local->ops->get_ftm_responder_stats(&local->hw,
+							 &sdata->vif,
+							 ftm_stats);
+	trace_drv_get_ftm_responder_stats(local, sdata, ftm_stats);
+
+	return ret;
+}
+
+static inline int drv_start_pmsr(struct ieee80211_local *local,
+				 struct ieee80211_sub_if_data *sdata,
+				 struct cfg80211_pmsr_request *request)
+{
+	int ret = -EOPNOTSUPP;
+
+	might_sleep();
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
+
+	trace_drv_start_pmsr(local, sdata);
+
+	if (local->ops->start_pmsr)
+		ret = local->ops->start_pmsr(&local->hw, &sdata->vif, request);
+	trace_drv_return_int(local, ret);
+
+	return ret;
+}
+
+static inline void drv_abort_pmsr(struct ieee80211_local *local,
+				  struct ieee80211_sub_if_data *sdata,
+				  struct cfg80211_pmsr_request *request)
+{
+	trace_drv_abort_pmsr(local, sdata);
+
+	might_sleep();
+	if (!check_sdata_in_driver(sdata))
+		return;
+
+	if (local->ops->abort_pmsr)
+		local->ops->abort_pmsr(&local->hw, &sdata->vif, request);
+	trace_drv_return_void(local);
+}
+
 static inline int drv_start_nan(struct ieee80211_local *local,
 				struct ieee80211_sub_if_data *sdata,
 				struct cfg80211_nan_conf *conf)

diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c
index 5ac7438..99a2e30 100644
--- a/net/mac80211/ethtool.c
+++ b/net/mac80211/ethtool.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * mac80211 ethtool hooks for cfg80211
  *
@@ -5,8 +6,6 @@
  * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2014	Intel Corporation (Author: Johannes Berg)
  * Copyright (C) 2018 Intel Corporation
- *
- * This file is GPLv2 as found in COPYING.
  */
 #include <linux/types.h>
 #include <net/cfg80211.h>

diff --git a/net/mac80211/fils_aead.c b/net/mac80211/fils_aead.c
index 3cfb1e2..a13ae14 100644
--- a/net/mac80211/fils_aead.c
+++ b/net/mac80211/fils_aead.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * FILS AEAD for (Re)Association Request/Response frames
  * Copyright 2016, Qualcomm Atheros, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <crypto/aes.h>

diff --git a/net/mac80211/fils_aead.h b/net/mac80211/fils_aead.h
index fbc6523..c868153 100644
--- a/net/mac80211/fils_aead.h
+++ b/net/mac80211/fils_aead.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * FILS AEAD for (Re)Association Request/Response frames
  * Copyright 2016, Qualcomm Atheros, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef FILS_AEAD_H

diff --git a/net/mac80211/he.c b/net/mac80211/he.c
index 769078e..736da00 100644
--- a/net/mac80211/he.c
+++ b/net/mac80211/he.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * HE handling
  *
  * Copyright(c) 2017 Intel Deutschland GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include "ieee80211_i.h"
@@ -53,3 +50,43 @@
 
 	he_cap->has_he = true;
 }
+
+void
+ieee80211_he_op_ie_to_bss_conf(struct ieee80211_vif *vif,
+			const struct ieee80211_he_operation *he_op_ie_elem)
+{
+	struct ieee80211_he_operation *he_operation =
+					&vif->bss_conf.he_operation;
+
+	if (!he_op_ie_elem) {
+		memset(he_operation, 0, sizeof(*he_operation));
+		return;
+	}
+
+	vif->bss_conf.he_operation = *he_op_ie_elem;
+}
+
+void
+ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif,
+				const struct ieee80211_he_spr *he_spr_ie_elem)
+{
+	struct ieee80211_he_obss_pd *he_obss_pd =
+					&vif->bss_conf.he_obss_pd;
+	const u8 *data;
+
+	memset(he_obss_pd, 0, sizeof(*he_obss_pd));
+
+	if (!he_spr_ie_elem)
+		return;
+	data = he_spr_ie_elem->optional;
+
+	if (he_spr_ie_elem->he_sr_control &
+	    IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT)
+		data++;
+	if (he_spr_ie_elem->he_sr_control &
+	    IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT) {
+		he_obss_pd->max_offset = *data++;
+		he_obss_pd->min_offset = *data++;
+		he_obss_pd->enable = true;
+	}
+}

diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index f849ea8..a2e4d6b 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * HT handling
  *
@@ -8,10 +9,6 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2007-2010, Intel Corporation
  * Copyright 2017	Intel Deutschland GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/ieee80211.h>
@@ -107,6 +104,15 @@
 	__check_htcap_enable(ht_capa, ht_capa_mask, ht_cap,
 			     IEEE80211_HT_CAP_40MHZ_INTOLERANT);
 
+	/* Allow user to enable TX STBC bit  */
+	__check_htcap_enable(ht_capa, ht_capa_mask, ht_cap,
+			     IEEE80211_HT_CAP_TX_STBC);
+
+	/* Allow user to configure RX STBC bits */
+	if (ht_capa_mask->cap_info & cpu_to_le16(IEEE80211_HT_CAP_RX_STBC))
+		ht_cap->cap |= le16_to_cpu(ht_capa->cap_info) &
+					IEEE80211_HT_CAP_RX_STBC;
+
 	/* Allow user to decrease AMPDU factor */
 	if (ht_capa_mask->ampdu_params_info &
 	    IEEE80211_HT_AMPDU_PARM_FACTOR) {
@@ -353,7 +359,7 @@
 				       sta->ampdu_mlme.tid_rx_manage_offl))
 			___ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid,
 							 IEEE80211_MAX_AMPDU_BUF_HT,
-							 false, true);
+							 false, true, NULL);
 
 		if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS,
 				       sta->ampdu_mlme.tid_rx_manage_offl))

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index f0f5fed..0a6ff01 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * IBSS mode implementation
  * Copyright 2003-2008, Jouni Malinen <j@w1.fi>
@@ -8,10 +9,7 @@
  * Copyright 2009, Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright(c) 2016 Intel Deutschland GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Copyright(c) 2018-2019 Intel Corporation
  */
 
 #include <linux/delay.h>
@@ -1070,7 +1068,9 @@
 			struct ieee80211_vht_cap cap_ie;
 			struct ieee80211_sta_vht_cap cap = sta->sta.vht_cap;
 
-			ieee80211_chandef_vht_oper(elems->vht_operation,
+			ieee80211_chandef_vht_oper(&local->hw,
+						   elems->vht_operation,
+						   elems->ht_operation,
 						   &chandef);
 			memcpy(&cap_ie, elems->vht_cap_elem, sizeof(cap_ie));
 			ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
@@ -1122,8 +1122,7 @@
 
 	ieee80211_update_sta_info(sdata, mgmt, len, rx_status, elems, channel);
 
-	bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems,
-					channel);
+	bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, channel);
 	if (!bss)
 		return;
 
@@ -1253,6 +1252,7 @@
 
 static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata)
 {
+	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
 	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta, *tmp;
 	unsigned long exp_time = IEEE80211_IBSS_INACTIVITY_LIMIT;
@@ -1269,10 +1269,17 @@
 		if (time_is_before_jiffies(last_active + exp_time) ||
 		    (time_is_before_jiffies(last_active + exp_rsn) &&
 		     sta->sta_state != IEEE80211_STA_AUTHORIZED)) {
+			u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
+
 			sta_dbg(sta->sdata, "expiring inactive %sSTA %pM\n",
 				sta->sta_state != IEEE80211_STA_AUTHORIZED ?
 				"not authorized " : "", sta->sta.addr);
 
+			ieee80211_send_deauth_disassoc(sdata, sta->sta.addr,
+						       ifibss->bssid,
+						       IEEE80211_STYPE_DEAUTH,
+						       WLAN_REASON_DEAUTH_LEAVING,
+						       true, frame_buf);
 			WARN_ON(__sta_info_destroy(sta));
 		}
 	}
@@ -1602,7 +1609,7 @@
 		return;
 
 	ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
-			       false, &elems);
+			       false, &elems, mgmt->bssid, NULL);
 
 	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
 }
@@ -1652,7 +1659,7 @@
 
 			ieee802_11_parse_elems(
 				mgmt->u.action.u.chan_switch.variable,
-				ies_len, true, &elems);
+				ies_len, true, &elems, mgmt->bssid, NULL);
 
 			if (elems.parse_error)
 				break;

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 172aeae..05406e9 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h

@@ -1,14 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright 2002-2005, Instant802 Networks, Inc.
  * Copyright 2005, Devicescape Software, Inc.
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2015  Intel Mobile Communications GmbH
- * Copyright (C) 2018 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Copyright (C) 2018-2019 Intel Corporation
  */
 
 #ifndef IEEE80211_I_H
@@ -377,6 +374,7 @@
 	u8 key[WLAN_KEY_LEN_WEP104];
 	u8 key_len, key_idx;
 	bool done;
+	bool peer_confirmed;
 	bool timeout_started;
 
 	u16 sae_trans, sae_status;
@@ -499,6 +497,7 @@
 	unsigned int uapsd_max_sp_len;
 
 	int wmm_last_param_set;
+	int mu_edca_last_param_set;
 
 	u8 use_4addr;
 
@@ -554,6 +553,12 @@
 	 * get stuck in a downgraded situation and flush takes forever.
 	 */
 	struct delayed_work tx_tspec_wk;
+
+	/* Information elements from the last transmitted (Re)Association
+	 * Request frame.
+	 */
+	u8 *assoc_req_ies;
+	size_t assoc_req_ies_len;
 };
 
 struct ieee80211_if_ibss {
@@ -818,6 +823,7 @@
 	IEEE80211_TXQ_STOP,
 	IEEE80211_TXQ_AMPDU,
 	IEEE80211_TXQ_NO_AMSDU,
+	IEEE80211_TXQ_STOP_NETIF_TX,
 };
 
 /**
@@ -828,6 +834,8 @@
  *	a fq_flow which is already owned by a different tin
  * @def_cvars: codel vars for @def_flow
  * @frags: used to keep fragments created after dequeue
+ * @schedule_order: used with ieee80211_local->active_txqs
+ * @schedule_round: counter to prevent infinite loops on TXQ scheduling
  */
 struct txq_info {
 	struct fq_tin tin;
@@ -835,6 +843,8 @@
 	struct codel_vars def_cvars;
 	struct codel_stats cstats;
 	struct sk_buff_head frags;
+	struct list_head schedule_order;
+	u16 schedule_round;
 	unsigned long flags;
 
 	/* keep last! */
@@ -1126,6 +1136,13 @@
 	struct codel_vars *cvars;
 	struct codel_params cparams;
 
+	/* protects active_txqs and txqi->schedule_order */
+	spinlock_t active_txq_lock[IEEE80211_NUM_ACS];
+	struct list_head active_txqs[IEEE80211_NUM_ACS];
+	u16 schedule_round[IEEE80211_NUM_ACS];
+
+	u16 airtime_flags;
+
 	const struct ieee80211_ops *ops;
 
 	/*
@@ -1198,6 +1215,9 @@
 	/* number of RX chains the hardware has */
 	u8 rx_chains;
 
+	/* bitmap of which sbands were copied */
+	u8 sband_allocated;
+
 	int tx_headroom; /* required headroom for hardware/radiotap */
 
 	/* Tasklet and skb queue to process calls from IRQ mode. All frames
@@ -1226,6 +1246,7 @@
 
 	struct sk_buff_head pending[IEEE80211_MAX_QUEUES];
 	struct tasklet_struct tx_pending_tasklet;
+	struct tasklet_struct wake_txqs_tasklet;
 
 	atomic_t agg_queue_stop[IEEE80211_MAX_QUEUES];
 
@@ -1234,8 +1255,8 @@
 
 	struct rate_control_ref *rate_ctrl;
 
-	struct crypto_cipher *wep_tx_tfm;
-	struct crypto_cipher *wep_rx_tfm;
+	struct arc4_ctx wep_tx_ctx;
+	struct arc4_ctx wep_rx_ctx;
 	u32 wep_iv;
 
 	/* see iface.c */
@@ -1245,7 +1266,7 @@
 
 	/*
 	 * Key mutex, protects sdata's key_list and sta_info's
-	 * key pointers (write access, they're RCU.)
+	 * key pointers and ptk_idx (write access, they're RCU.)
 	 */
 	struct mutex key_mtx;
 
@@ -1360,6 +1381,7 @@
 		struct dentry *rcdir;
 		struct dentry *keys;
 	} debugfs;
+	bool force_tx_status;
 #endif
 
 	/*
@@ -1410,7 +1432,7 @@
 	rcu_read_lock();
 	chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
 
-	if (WARN_ON(!chanctx_conf)) {
+	if (WARN_ON_ONCE(!chanctx_conf)) {
 		rcu_read_unlock();
 		return NULL;
 	}
@@ -1429,6 +1451,7 @@
 	u8 ttl;
 	u16 pre_value;
 	u16 reason_code;
+	u32 max_switch_time;
 };
 
 /* Parsed Information Elements */
@@ -1457,6 +1480,7 @@
 	const struct ieee80211_meshconf_ie *mesh_config;
 	const u8 *he_cap;
 	const struct ieee80211_he_operation *he_operation;
+	const struct ieee80211_he_spr *he_spr;
 	const struct ieee80211_mu_edca_param_set *mu_edca_param_set;
 	const u8 *uora_element;
 	const u8 *mesh_id;
@@ -1469,6 +1493,7 @@
 	const struct ieee80211_channel_sw_ie *ch_switch_ie;
 	const struct ieee80211_ext_chansw_ie *ext_chansw_ie;
 	const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie;
+	const u8 *max_channel_switch_time;
 	const u8 *country_elem;
 	const u8 *pwr_constr_elem;
 	const u8 *cisco_dtpc_elem;
@@ -1477,6 +1502,12 @@
 	const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
 	struct ieee80211_mesh_chansw_params_ie *mesh_chansw_params_ie;
 	const struct ieee80211_bss_max_idle_period_ie *max_idle_period_ie;
+	const struct ieee80211_multiple_bssid_configuration *mbssid_config_ie;
+	const struct ieee80211_bssid_index *bssid_index;
+	u8 max_bssid_indicator;
+	u8 dtim_count;
+	u8 dtim_period;
+	const struct ieee80211_addba_ext_ie *addba_ext_ie;
 
 	/* length of them, respectively */
 	u8 ext_capab_len;
@@ -1495,6 +1526,7 @@
 	u8 prep_len;
 	u8 perr_len;
 	u8 country_elem_len;
+	u8 bssid_index_len;
 
 	/* whether a parse error occurred while retrieving these elements */
 	bool parse_error;
@@ -1654,7 +1686,6 @@
 			  struct ieee80211_rx_status *rx_status,
 			  struct ieee80211_mgmt *mgmt,
 			  size_t len,
-			  struct ieee802_11_elems *elems,
 			  struct ieee80211_channel *channel);
 void ieee80211_rx_bss_put(struct ieee80211_local *local,
 			  struct ieee80211_bss *bss);
@@ -1729,7 +1760,8 @@
 				       struct net_device *dev);
 void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 				  struct net_device *dev,
-				  u32 info_flags);
+				  u32 info_flags,
+				  u32 ctrl_flags);
 void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
 			      struct sk_buff_head *skbs);
 struct sk_buff *
@@ -1737,7 +1769,8 @@
 			      struct sk_buff *skb, u32 info_flags);
 void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
 			  struct ieee80211_supported_band *sband,
-			  int retry_count, int shift, bool send_to_cooked);
+			  int retry_count, int shift, bool send_to_cooked,
+			  struct ieee80211_tx_status *status);
 
 void ieee80211_check_fast_xmit(struct sta_info *sta);
 void ieee80211_check_fast_xmit_all(struct ieee80211_local *local);
@@ -1746,6 +1779,8 @@
 int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
 			      const u8 *buf, size_t len,
 			      const u8 *dest, __be16 proto, bool unencrypted);
+int ieee80211_probe_mesh_link(struct wiphy *wiphy, struct net_device *dev,
+			      const u8 *buf, size_t len);
 
 /* HT */
 void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
@@ -1772,7 +1807,8 @@
 void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
 				      u8 dialog_token, u16 timeout,
 				      u16 start_seq_num, u16 ba_policy, u16 tid,
-				      u16 buf_size, bool tx, bool auto_seq);
+				      u16 buf_size, bool tx, bool auto_seq,
+				      const struct ieee80211_addba_ext_ie *addbaext);
 void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
 					 enum ieee80211_agg_stop_reason reason);
 void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
@@ -1837,6 +1873,13 @@
 				  struct ieee80211_supported_band *sband,
 				  const u8 *he_cap_ie, u8 he_cap_len,
 				  struct sta_info *sta);
+void
+ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif,
+				const struct ieee80211_he_spr *he_spr_ie_elem);
+
+void
+ieee80211_he_op_ie_to_bss_conf(struct ieee80211_vif *vif,
+			const struct ieee80211_he_operation *he_op_ie_elem);
 
 /* Spectrum management */
 void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
@@ -1938,12 +1981,16 @@
 
 u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 			       struct ieee802_11_elems *elems,
-			       u64 filter, u32 crc);
+			       u64 filter, u32 crc, u8 *transmitter_bssid,
+			       u8 *bss_bssid);
 static inline void ieee802_11_parse_elems(const u8 *start, size_t len,
 					  bool action,
-					  struct ieee802_11_elems *elems)
+					  struct ieee802_11_elems *elems,
+					  u8 *transmitter_bssid,
+					  u8 *bss_bssid)
 {
-	ieee802_11_parse_elems_crc(start, len, action, elems, 0, 0);
+	ieee802_11_parse_elems_crc(start, len, action, elems, 0, 0,
+				   transmitter_bssid, bss_bssid);
 }
 
 
@@ -1999,6 +2046,13 @@
 static inline bool ieee80211_can_run_worker(struct ieee80211_local *local)
 {
 	/*
+	 * It's unsafe to try to do any work during reconfigure flow.
+	 * When the flow ends the work will be requeued.
+	 */
+	if (local->in_reconfig)
+		return false;
+
+	/*
 	 * If quiescing is set, we are racing with __ieee80211_suspend.
 	 * __ieee80211_suspend flushes the workers after setting quiescing,
 	 * and we check quiescing / suspended before enqueing new workers.
@@ -2038,13 +2092,15 @@
 			       struct ieee80211_sub_if_data *sdata);
 void ieee80211_fill_txq_stats(struct cfg80211_txq_stats *txqstats,
 			      struct txq_info *txqi);
+void ieee80211_wake_txqs(unsigned long data);
 void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 			 u16 transaction, u16 auth_alg, u16 status,
 			 const u8 *extra, size_t extra_len, const u8 *bssid,
 			 const u8 *da, const u8 *key, u8 key_len, u8 key_idx,
 			 u32 tx_flags);
 void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
-				    const u8 *bssid, u16 stype, u16 reason,
+				    const u8 *da, const u8 *bssid,
+				    u16 stype, u16 reason,
 				    bool send_frame, u8 *frame_buf);
 
 enum {
@@ -2089,9 +2145,11 @@
 			       u32 cap);
 u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
 				const struct cfg80211_chan_def *chandef);
+u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype);
 u8 *ieee80211_ie_build_he_cap(u8 *pos,
 			      const struct ieee80211_sta_he_cap *he_cap,
 			      u8 *end);
+u8 *ieee80211_ie_build_he_oper(u8 *pos);
 int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef,
 			     const struct ieee80211_supported_band *sband,
 			     const u8 *srates, int srates_len, u32 *rates);
@@ -2106,7 +2164,9 @@
 /* channel management */
 bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper,
 			       struct cfg80211_chan_def *chandef);
-bool ieee80211_chandef_vht_oper(const struct ieee80211_vht_operation *oper,
+bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw,
+				const struct ieee80211_vht_operation *oper,
+				const struct ieee80211_ht_operation *htop,
 				struct cfg80211_chan_def *chandef);
 u32 ieee80211_chandef_downgrade(struct cfg80211_chan_def *c);
 
@@ -2183,6 +2243,9 @@
 					  const u8 *addr);
 void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata);
 void ieee80211_tdls_chsw_work(struct work_struct *wk);
+void ieee80211_tdls_handle_disconnect(struct ieee80211_sub_if_data *sdata,
+				      const u8 *peer, u16 reason);
+const char *ieee80211_get_reason_code_string(u16 reason_code);
 
 extern const struct ethtool_ops ieee80211_ethtool_ops;
 

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 5f3c81e..af8b092 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Interface handling
  *
@@ -7,10 +8,7 @@
  * Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (c) 2016        Intel Deutschland GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Copyright (C) 2018 Intel Corporation
  */
 #include <linux/slab.h>
 #include <linux/kernel.h>
@@ -1132,8 +1130,7 @@
 
 static u16 ieee80211_netdev_select_queue(struct net_device *dev,
 					 struct sk_buff *skb,
-					 struct net_device *sb_dev,
-					 select_queue_fallback_t fallback)
+					 struct net_device *sb_dev)
 {
 	return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb);
 }
@@ -1178,8 +1175,7 @@
 
 static u16 ieee80211_monitor_select_queue(struct net_device *dev,
 					  struct sk_buff *skb,
-					  struct net_device *sb_dev,
-					  select_queue_fallback_t fallback)
+					  struct net_device *sb_dev)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
@@ -1763,13 +1759,13 @@
 			txq_size += sizeof(struct txq_info) +
 				    local->hw.txq_data_size;
 
-		if (local->ops->wake_tx_queue)
+		if (local->ops->wake_tx_queue) {
 			if_setup = ieee80211_if_setup_no_queue;
-		else
+		} else {
 			if_setup = ieee80211_if_setup;
-
-		if (local->hw.queues >= IEEE80211_NUM_ACS)
-			txqs = IEEE80211_NUM_ACS;
+			if (local->hw.queues >= IEEE80211_NUM_ACS)
+				txqs = IEEE80211_NUM_ACS;
+		}
 
 		ndev = alloc_netdev_mqs(size + txq_size,
 					name, name_assign_type,
@@ -1801,7 +1797,7 @@
 		}
 
 		ieee80211_assign_perm_addr(local, ndev->perm_addr, type);
-		if (params && is_valid_ether_addr(params->macaddr))
+		if (is_valid_ether_addr(params->macaddr))
 			memcpy(ndev->dev_addr, params->macaddr, ETH_ALEN);
 		else
 			memcpy(ndev->dev_addr, ndev->perm_addr, ETH_ALEN);
@@ -1870,11 +1866,9 @@
 	ieee80211_setup_sdata(sdata, type);
 
 	if (ndev) {
-		if (params) {
-			ndev->ieee80211_ptr->use_4addr = params->use_4addr;
-			if (type == NL80211_IFTYPE_STATION)
-				sdata->u.mgd.use_4addr = params->use_4addr;
-		}
+		ndev->ieee80211_ptr->use_4addr = params->use_4addr;
+		if (type == NL80211_IFTYPE_STATION)
+			sdata->u.mgd.use_4addr = params->use_4addr;
 
 		ndev->features |= local->hw.netdev_features;
 
@@ -1882,7 +1876,7 @@
 
 		/* MTU range: 256 - 2304 */
 		ndev->min_mtu = 256;
-		ndev->max_mtu = IEEE80211_MAX_DATA_LEN;
+		ndev->max_mtu = local->hw.max_mtu;
 
 		ret = register_netdevice(ndev);
 		if (ret) {
@@ -1909,6 +1903,9 @@
 	list_del_rcu(&sdata->list);
 	mutex_unlock(&sdata->local->iflist_mtx);
 
+	if (sdata->vif.txq)
+		ieee80211_txq_purge(sdata->local, to_txq_info(sdata->vif.txq));
+
 	synchronize_rcu();
 
 	if (sdata->dev) {
@@ -1951,6 +1948,8 @@
 	WARN(local->open_count, "%s: open count remains %d\n",
 	     wiphy_name(local->hw.wiphy), local->open_count);
 
+	ieee80211_txq_teardown_flows(local);
+
 	mutex_lock(&local->iflist_mtx);
 	list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) {
 		list_del(&sdata->list);

diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index c054ac8..0f889b9 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2002-2005, Instant802 Networks, Inc.
  * Copyright 2005-2006, Devicescape Software, Inc.
@@ -5,10 +6,7 @@
  * Copyright 2007-2008	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright 2015-2017	Intel Deutschland GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Copyright 2018-2019  Intel Corporation
  */
 
 #include <linux/if_ether.h>
@@ -140,6 +138,12 @@
 		 * so clear that flag now to avoid trying to remove
 		 * it again later.
 		 */
+		if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE &&
+		    !(key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC |
+					 IEEE80211_KEY_FLAG_PUT_MIC_SPACE |
+					 IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
+			increment_tailroom_need_count(sdata);
+
 		key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
 		return -EINVAL;
 	}
@@ -167,8 +171,10 @@
 		 * The driver doesn't know anything about VLAN interfaces.
 		 * Hence, don't send GTKs for VLAN interfaces to the driver.
 		 */
-		if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE))
+		if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) {
+			ret = 1;
 			goto out_unsupported;
+		}
 	}
 
 	ret = drv_set_key(key->local, SET_KEY, sdata,
@@ -177,9 +183,9 @@
 	if (!ret) {
 		key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE;
 
-		if (!((key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC |
-					   IEEE80211_KEY_FLAG_PUT_MIC_SPACE)) ||
-		      (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
+		if (!(key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC |
+					 IEEE80211_KEY_FLAG_PUT_MIC_SPACE |
+					 IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
 			decrease_tailroom_need_count(sdata, 1);
 
 		WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) &&
@@ -213,11 +219,8 @@
 		/* all of these we can do in software - if driver can */
 		if (ret == 1)
 			return 0;
-		if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL)) {
-			if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
-				return 0;
+		if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL))
 			return -EINVAL;
-		}
 		return 0;
 	default:
 		return -EINVAL;
@@ -243,11 +246,12 @@
 	sta = key->sta;
 	sdata = key->sdata;
 
-	if (!((key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC |
-				   IEEE80211_KEY_FLAG_PUT_MIC_SPACE)) ||
-	      (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
+	if (!(key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC |
+				 IEEE80211_KEY_FLAG_PUT_MIC_SPACE |
+				 IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
 		increment_tailroom_need_count(sdata);
 
+	key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
 	ret = drv_set_key(key->local, DISABLE_KEY, sdata,
 			  sta ? &sta->sta : NULL, &key->conf);
 
@@ -256,8 +260,81 @@
 			  "failed to remove key (%d, %pM) from hardware (%d)\n",
 			  key->conf.keyidx,
 			  sta ? sta->sta.addr : bcast_addr, ret);
+}
 
-	key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
+int ieee80211_set_tx_key(struct ieee80211_key *key)
+{
+	struct sta_info *sta = key->sta;
+	struct ieee80211_local *local = key->local;
+
+	assert_key_lock(local);
+
+	sta->ptk_idx = key->conf.keyidx;
+
+	if (!ieee80211_hw_check(&local->hw, AMPDU_KEYBORDER_SUPPORT))
+		clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
+	ieee80211_check_fast_xmit(sta);
+
+	return 0;
+}
+
+static void ieee80211_pairwise_rekey(struct ieee80211_key *old,
+				     struct ieee80211_key *new)
+{
+	struct ieee80211_local *local = new->local;
+	struct sta_info *sta = new->sta;
+	int i;
+
+	assert_key_lock(local);
+
+	if (new->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX) {
+		/* Extended Key ID key install, initial one or rekey */
+
+		if (sta->ptk_idx != INVALID_PTK_KEYIDX &&
+		    !ieee80211_hw_check(&local->hw, AMPDU_KEYBORDER_SUPPORT)) {
+			/* Aggregation Sessions with Extended Key ID must not
+			 * mix MPDUs with different keyIDs within one A-MPDU.
+			 * Tear down running Tx aggregation sessions and block
+			 * new Rx/Tx aggregation requests during rekey to
+			 * ensure there are no A-MPDUs when the driver is not
+			 * supporting A-MPDU key borders. (Blocking Tx only
+			 * would be sufficient but WLAN_STA_BLOCK_BA gets the
+			 * job done for the few ms we need it.)
+			 */
+			set_sta_flag(sta, WLAN_STA_BLOCK_BA);
+			mutex_lock(&sta->ampdu_mlme.mtx);
+			for (i = 0; i <  IEEE80211_NUM_TIDS; i++)
+				___ieee80211_stop_tx_ba_session(sta, i,
+								AGG_STOP_LOCAL_REQUEST);
+			mutex_unlock(&sta->ampdu_mlme.mtx);
+		}
+	} else if (old) {
+		/* Rekey without Extended Key ID.
+		 * Aggregation sessions are OK when running on SW crypto.
+		 * A broken remote STA may cause issues not observed with HW
+		 * crypto, though.
+		 */
+		if (!(old->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
+			return;
+
+		/* Stop Tx till we are on the new key */
+		old->flags |= KEY_FLAG_TAINTED;
+		ieee80211_clear_fast_xmit(sta);
+		if (ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION)) {
+			set_sta_flag(sta, WLAN_STA_BLOCK_BA);
+			ieee80211_sta_tear_down_BA_sessions(sta,
+							    AGG_STOP_LOCAL_REQUEST);
+		}
+		if (!wiphy_ext_feature_isset(local->hw.wiphy,
+					     NL80211_EXT_FEATURE_CAN_REPLACE_PTK0)) {
+			pr_warn_ratelimited("Rekeying PTK for STA %pM but driver can't safely do that.",
+					    sta->sta.addr);
+			/* Flushing the driver queues *may* help prevent
+			 * the clear text leaks and freezes.
+			 */
+			ieee80211_flush_queues(local, old->sdata, false);
+		}
+	}
 }
 
 static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata,
@@ -315,39 +392,69 @@
 	mutex_unlock(&sdata->local->key_mtx);
 }
 
-
-static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
+static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
 				  struct sta_info *sta,
 				  bool pairwise,
 				  struct ieee80211_key *old,
 				  struct ieee80211_key *new)
 {
 	int idx;
+	int ret = 0;
 	bool defunikey, defmultikey, defmgmtkey;
 
 	/* caller must provide at least one old/new */
 	if (WARN_ON(!new && !old))
-		return;
+		return 0;
 
 	if (new)
 		list_add_tail_rcu(&new->list, &sdata->key_list);
 
 	WARN_ON(new && old && new->conf.keyidx != old->conf.keyidx);
 
-	if (old)
+	if (new && sta && pairwise) {
+		/* Unicast rekey needs special handling. With Extended Key ID
+		 * old is still NULL for the first rekey.
+		 */
+		ieee80211_pairwise_rekey(old, new);
+	}
+
+	if (old) {
 		idx = old->conf.keyidx;
-	else
+
+		if (old->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) {
+			ieee80211_key_disable_hw_accel(old);
+
+			if (new)
+				ret = ieee80211_key_enable_hw_accel(new);
+		}
+	} else {
+		/* new must be provided in case old is not */
 		idx = new->conf.keyidx;
+		if (!new->local->wowlan)
+			ret = ieee80211_key_enable_hw_accel(new);
+	}
+
+	if (ret)
+		return ret;
 
 	if (sta) {
 		if (pairwise) {
 			rcu_assign_pointer(sta->ptk[idx], new);
-			sta->ptk_idx = idx;
-			ieee80211_check_fast_xmit(sta);
+			if (new &&
+			    !(new->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX)) {
+				sta->ptk_idx = idx;
+				clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
+				ieee80211_check_fast_xmit(sta);
+			}
 		} else {
 			rcu_assign_pointer(sta->gtk[idx], new);
 		}
-		ieee80211_check_fast_rx(sta);
+		/* Only needed for transition from no key -> key.
+		 * Still triggers unnecessary when using Extended Key ID
+		 * and installing the second key ID the first time.
+		 */
+		if (new && !old)
+			ieee80211_check_fast_rx(sta);
 	} else {
 		defunikey = old &&
 			old == key_mtx_dereference(sdata->local,
@@ -380,6 +487,8 @@
 
 	if (old)
 		list_del_rcu(&old->list);
+
+	return 0;
 }
 
 struct ieee80211_key *
@@ -575,9 +684,6 @@
 static void __ieee80211_key_destroy(struct ieee80211_key *key,
 				    bool delay_tailroom)
 {
-	if (key->local)
-		ieee80211_key_disable_hw_accel(key);
-
 	if (key->local) {
 		struct ieee80211_sub_if_data *sdata = key->sdata;
 
@@ -654,7 +760,6 @@
 		       struct ieee80211_sub_if_data *sdata,
 		       struct sta_info *sta)
 {
-	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_key *old_key;
 	int idx = key->conf.keyidx;
 	bool pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
@@ -664,16 +769,33 @@
 	 * can cause warnings to appear.
 	 */
 	bool delay_tailroom = sdata->vif.type == NL80211_IFTYPE_STATION;
-	int ret;
+	int ret = -EOPNOTSUPP;
 
 	mutex_lock(&sdata->local->key_mtx);
 
-	if (sta && pairwise)
+	if (sta && pairwise) {
+		struct ieee80211_key *alt_key;
+
 		old_key = key_mtx_dereference(sdata->local, sta->ptk[idx]);
-	else if (sta)
+		alt_key = key_mtx_dereference(sdata->local, sta->ptk[idx ^ 1]);
+
+		/* The rekey code assumes that the old and new key are using
+		 * the same cipher. Enforce the assumption for pairwise keys.
+		 */
+		if ((alt_key && alt_key->conf.cipher != key->conf.cipher) ||
+		    (old_key && old_key->conf.cipher != key->conf.cipher))
+			goto out;
+	} else if (sta) {
 		old_key = key_mtx_dereference(sdata->local, sta->gtk[idx]);
-	else
+	} else {
 		old_key = key_mtx_dereference(sdata->local, sdata->keys[idx]);
+	}
+
+	/* Non-pairwise keys must also not switch the cipher on rekey */
+	if (!pairwise) {
+		if (old_key && old_key->conf.cipher != key->conf.cipher)
+			goto out;
+	}
 
 	/*
 	 * Silently accept key re-installation without really installing the
@@ -691,17 +813,13 @@
 
 	increment_tailroom_need_count(sdata);
 
-	ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
-	ieee80211_key_destroy(old_key, delay_tailroom);
+	ret = ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
 
-	ieee80211_debugfs_key_add(key);
-
-	if (!local->wowlan) {
-		ret = ieee80211_key_enable_hw_accel(key);
-		if (ret)
-			ieee80211_key_free(key, delay_tailroom);
+	if (!ret) {
+		ieee80211_debugfs_key_add(key);
+		ieee80211_key_destroy(old_key, delay_tailroom);
 	} else {
-		ret = 0;
+		ieee80211_key_free(key, delay_tailroom);
 	}
 
  out:
@@ -725,46 +843,30 @@
 	ieee80211_key_destroy(key, delay_tailroom);
 }
 
-void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata)
+void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_key *key;
 	struct ieee80211_sub_if_data *vlan;
 
 	ASSERT_RTNL();
 
-	if (WARN_ON(!ieee80211_sdata_running(sdata)))
-		return;
-
-	mutex_lock(&sdata->local->key_mtx);
-
-	WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt ||
-		     sdata->crypto_tx_tailroom_pending_dec);
-
-	if (sdata->vif.type == NL80211_IFTYPE_AP) {
-		list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
-			WARN_ON_ONCE(vlan->crypto_tx_tailroom_needed_cnt ||
-				     vlan->crypto_tx_tailroom_pending_dec);
-	}
-
-	list_for_each_entry(key, &sdata->key_list, list) {
-		increment_tailroom_need_count(sdata);
-		ieee80211_key_enable_hw_accel(key);
-	}
-
-	mutex_unlock(&sdata->local->key_mtx);
-}
-
-void ieee80211_reset_crypto_tx_tailroom(struct ieee80211_sub_if_data *sdata)
-{
-	struct ieee80211_sub_if_data *vlan;
-
 	mutex_lock(&sdata->local->key_mtx);
 
 	sdata->crypto_tx_tailroom_needed_cnt = 0;
+	sdata->crypto_tx_tailroom_pending_dec = 0;
 
 	if (sdata->vif.type == NL80211_IFTYPE_AP) {
-		list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
+		list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) {
 			vlan->crypto_tx_tailroom_needed_cnt = 0;
+			vlan->crypto_tx_tailroom_pending_dec = 0;
+		}
+	}
+
+	if (ieee80211_sdata_running(sdata)) {
+		list_for_each_entry(key, &sdata->key_list, list) {
+			increment_tailroom_need_count(sdata);
+			ieee80211_key_enable_hw_accel(key);
+		}
 	}
 
 	mutex_unlock(&sdata->local->key_mtx);
@@ -1117,9 +1219,9 @@
 	if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) {
 		key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
 
-		if (!((key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC |
-					   IEEE80211_KEY_FLAG_PUT_MIC_SPACE)) ||
-		      (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
+		if (!(key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC |
+					 IEEE80211_KEY_FLAG_PUT_MIC_SPACE |
+					 IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
 			increment_tailroom_need_count(key->sdata);
 	}
 

diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index ebdb80b..d6d6e89 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h

@@ -1,10 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright 2002-2004, Instant802 Networks, Inc.
  * Copyright 2005, Devicescape Software, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Copyright (C) 2019 Intel Corporation
  */
 
 #ifndef IEEE80211_KEY_H
@@ -14,10 +12,12 @@
 #include <linux/list.h>
 #include <linux/crypto.h>
 #include <linux/rcupdate.h>
+#include <crypto/arc4.h>
 #include <net/mac80211.h>
 
 #define NUM_DEFAULT_KEYS 4
 #define NUM_DEFAULT_MGMT_KEYS 2
+#define INVALID_PTK_KEYIDX 2 /* Keyidx always pointing to a NULL key for PTK */
 
 struct ieee80211_local;
 struct ieee80211_sub_if_data;
@@ -146,6 +146,7 @@
 int ieee80211_key_link(struct ieee80211_key *key,
 		       struct ieee80211_sub_if_data *sdata,
 		       struct sta_info *sta);
+int ieee80211_set_tx_key(struct ieee80211_key *key);
 void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom);
 void ieee80211_key_free_unused(struct ieee80211_key *key);
 void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx,
@@ -156,8 +157,7 @@
 			 bool force_synchronize);
 void ieee80211_free_sta_keys(struct ieee80211_local *local,
 			     struct sta_info *sta);
-void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata);
-void ieee80211_reset_crypto_tx_tailroom(struct ieee80211_sub_if_data *sdata);
+void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata);
 
 #define key_mtx_dereference(local, ref) \
 	rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx)))

diff --git a/net/mac80211/led.c b/net/mac80211/led.c
index d6c66fc..b275c88 100644
--- a/net/mac80211/led.c
+++ b/net/mac80211/led.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2006, Johannes Berg <johannes@sipsolutions.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 /* just for IFNAMSIZ */

diff --git a/net/mac80211/led.h b/net/mac80211/led.h
index a7893a1..fb3aaa3 100644
--- a/net/mac80211/led.h
+++ b/net/mac80211/led.h

@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright 2006, Johannes Berg <johannes@sipsolutions.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/list.h>

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 5136278..2d05c4c 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c

@@ -1,17 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2002-2005, Instant802 Networks, Inc.
  * Copyright 2005-2006, Devicescape Software, Inc.
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2017     Intel Deutschland GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Copyright (C) 2018 - 2019 Intel Corporation
  */
 
 #include <net/mac80211.h>
 #include <linux/module.h>
+#include <linux/fips.h>
 #include <linux/init.h>
 #include <linux/netdevice.h>
 #include <linux/types.h>
@@ -353,11 +352,11 @@
 	sdata_lock(sdata);
 
 	/* Copy the addresses to the bss_conf list */
-	ifa = idev->ifa_list;
+	ifa = rtnl_dereference(idev->ifa_list);
 	while (ifa) {
 		if (c < IEEE80211_BSS_ARP_ADDR_LIST_LEN)
 			bss_conf->arp_addr_list[c] = ifa->ifa_address;
-		ifa = ifa->ifa_next;
+		ifa = rtnl_dereference(ifa->ifa_next);
 		c++;
 	}
 
@@ -477,6 +476,8 @@
 				IEEE80211_HT_CAP_MAX_AMSDU |
 				IEEE80211_HT_CAP_SGI_20 |
 				IEEE80211_HT_CAP_SGI_40 |
+				IEEE80211_HT_CAP_TX_STBC |
+				IEEE80211_HT_CAP_RX_STBC |
 				IEEE80211_HT_CAP_LDPC_CODING |
 				IEEE80211_HT_CAP_40MHZ_INTOLERANT),
 	.mcs = {
@@ -610,6 +611,18 @@
 	local->ops = ops;
 	local->use_chanctx = use_chanctx;
 
+	/*
+	 * We need a bit of data queued to build aggregates properly, so
+	 * instruct the TCP stack to allow more than a single ms of data
+	 * to be queued in the stack. The value is a bit-shift of 1
+	 * second, so 7 is ~8ms of queued data. Only affects local TCP
+	 * sockets.
+	 * This is the default, anyhow - drivers may need to override it
+	 * for local reasons (longer buffers, longer completion time, or
+	 * similar).
+	 */
+	local->hw.tx_sk_pacing_shift = 7;
+
 	/* set up some defaults */
 	local->hw.queues = 1;
 	local->hw.max_rates = 1;
@@ -626,6 +639,7 @@
 					 IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH;
 	local->hw.uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES;
 	local->hw.uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN;
+	local->hw.max_mtu = IEEE80211_MAX_DATA_LEN;
 	local->user_power_level = IEEE80211_UNSET_POWER_LEVEL;
 	wiphy->ht_capa_mod_mask = &mac80211_ht_capa_mod_mask;
 	wiphy->vht_capa_mod_mask = &mac80211_vht_capa_mod_mask;
@@ -650,6 +664,12 @@
 	spin_lock_init(&local->rx_path_lock);
 	spin_lock_init(&local->queue_stop_reason_lock);
 
+	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
+		INIT_LIST_HEAD(&local->active_txqs[i]);
+		spin_lock_init(&local->active_txq_lock[i]);
+	}
+	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
+
 	INIT_LIST_HEAD(&local->chanctx_list);
 	mutex_init(&local->chanctx_mtx);
 
@@ -684,6 +704,10 @@
 	tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending,
 		     (unsigned long)local);
 
+	if (ops->wake_tx_queue)
+		tasklet_init(&local->wake_txqs_tasklet, ieee80211_wake_txqs,
+			     (unsigned long)local);
+
 	tasklet_init(&local->tasklet,
 		     ieee80211_tasklet_handler,
 		     (unsigned long) local);
@@ -708,8 +732,7 @@
 
 static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 {
-	bool have_wep = !(IS_ERR(local->wep_tx_tfm) ||
-			  IS_ERR(local->wep_rx_tfm));
+	bool have_wep = !fips_enabled; /* FIPS does not permit the use of RC4 */
 	bool have_mfp = ieee80211_hw_check(&local->hw, MFP_CAPABLE);
 	int n_suites = 0, r = 0, w = 0;
 	u32 *suites;
@@ -1026,6 +1049,16 @@
 		}
 	}
 
+	/* Mac80211 and therefore all drivers using SW crypto only
+	 * are able to handle PTK rekeys and Extended Key ID.
+	 */
+	if (!local->ops->set_key) {
+		wiphy_ext_feature_set(local->hw.wiphy,
+				      NL80211_EXT_FEATURE_CAN_REPLACE_PTK0);
+		wiphy_ext_feature_set(local->hw.wiphy,
+				      NL80211_EXT_FEATURE_EXT_KEY_ID);
+	}
+
 	/*
 	 * Calculate scan IE length -- we need this to alloc
 	 * memory and to subtract from the driver limit. It
@@ -1087,6 +1120,17 @@
 	if (ieee80211_hw_check(&local->hw, CHANCTX_STA_CSA))
 		local->ext_capa[0] |= WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING;
 
+	/* mac80211 supports multi BSSID, if the driver supports it */
+	if (ieee80211_hw_check(&local->hw, SUPPORTS_MULTI_BSSID)) {
+		local->hw.wiphy->support_mbssid = true;
+		if (ieee80211_hw_check(&local->hw,
+				       SUPPORTS_ONLY_HE_MULTI_BSSID))
+			local->hw.wiphy->support_only_he_mbssid = true;
+		else
+			local->ext_capa[2] |=
+				WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT;
+	}
+
 	local->hw.wiphy->max_num_csa_counters = IEEE80211_MAX_CSA_COUNTERS_NUM;
 
 	result = wiphy_register(local->hw.wiphy);
@@ -1131,6 +1175,9 @@
 	if (!local->hw.max_nan_de_entries)
 		local->hw.max_nan_de_entries = IEEE80211_MAX_NAN_INSTANCE_ID;
 
+	if (!local->hw.weight_multiplier)
+		local->hw.weight_multiplier = 1;
+
 	result = ieee80211_wep_init(local);
 	if (result < 0)
 		wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n",
@@ -1154,11 +1201,60 @@
 		goto fail_rate;
 	}
 
+	if (local->rate_ctrl) {
+		clear_bit(IEEE80211_HW_SUPPORTS_VHT_EXT_NSS_BW, hw->flags);
+		if (local->rate_ctrl->ops->capa & RATE_CTRL_CAPA_VHT_EXT_NSS_BW)
+			ieee80211_hw_set(hw, SUPPORTS_VHT_EXT_NSS_BW);
+	}
+
+	/*
+	 * If the VHT capabilities don't have IEEE80211_VHT_EXT_NSS_BW_CAPABLE,
+	 * or have it when we don't, copy the sband structure and set/clear it.
+	 * This is necessary because rate scaling algorithms could be switched
+	 * and have different support values.
+	 * Print a message so that in the common case the reallocation can be
+	 * avoided.
+	 */
+	BUILD_BUG_ON(NUM_NL80211_BANDS > 8 * sizeof(local->sband_allocated));
+	for (band = 0; band < NUM_NL80211_BANDS; band++) {
+		struct ieee80211_supported_band *sband;
+		bool local_cap, ie_cap;
+
+		local_cap = ieee80211_hw_check(hw, SUPPORTS_VHT_EXT_NSS_BW);
+
+		sband = local->hw.wiphy->bands[band];
+		if (!sband || !sband->vht_cap.vht_supported)
+			continue;
+
+		ie_cap = !!(sband->vht_cap.vht_mcs.tx_highest &
+			    cpu_to_le16(IEEE80211_VHT_EXT_NSS_BW_CAPABLE));
+
+		if (local_cap == ie_cap)
+			continue;
+
+		sband = kmemdup(sband, sizeof(*sband), GFP_KERNEL);
+		if (!sband) {
+			result = -ENOMEM;
+			goto fail_rate;
+		}
+
+		wiphy_dbg(hw->wiphy, "copying sband (band %d) due to VHT EXT NSS BW flag\n",
+			  band);
+
+		sband->vht_cap.vht_mcs.tx_highest ^=
+			cpu_to_le16(IEEE80211_VHT_EXT_NSS_BW_CAPABLE);
+
+		local->hw.wiphy->bands[band] = sband;
+		local->sband_allocated |= BIT(band);
+	}
+
 	/* add one default STA interface if supported */
 	if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION) &&
 	    !ieee80211_hw_check(hw, NO_AUTO_VIF)) {
+		struct vif_params params = {0};
+
 		result = ieee80211_if_add(local, "wlan%d", NET_NAME_ENUM, NULL,
-					  NL80211_IFTYPE_STATION, NULL);
+					  NL80211_IFTYPE_STATION, &params);
 		if (result)
 			wiphy_warn(local->hw.wiphy,
 				   "Failed to add default virtual iface\n");
@@ -1196,10 +1292,8 @@
 	ieee80211_remove_interfaces(local);
  fail_rate:
 	rtnl_unlock();
-	ieee80211_led_exit(local);
-	ieee80211_wep_free(local);
-	ieee80211_txq_teardown_flows(local);
  fail_flows:
+	ieee80211_led_exit(local);
 	destroy_workqueue(local->workqueue);
  fail_workqueue:
 	wiphy_unregister(local->hw.wiphy);
@@ -1224,7 +1318,6 @@
 #if IS_ENABLED(CONFIG_IPV6)
 	unregister_inet6addr_notifier(&local->ifa6_notifier);
 #endif
-	ieee80211_txq_teardown_flows(local);
 
 	rtnl_lock();
 
@@ -1256,7 +1349,6 @@
 
 	destroy_workqueue(local->workqueue);
 	wiphy_unregister(local->hw.wiphy);
-	ieee80211_wep_free(local);
 	ieee80211_led_exit(local);
 	kfree(local->int_scan_req);
 }
@@ -1272,6 +1364,7 @@
 void ieee80211_free_hw(struct ieee80211_hw *hw)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
+	enum nl80211_band band;
 
 	mutex_destroy(&local->iflist_mtx);
 	mutex_destroy(&local->mtx);
@@ -1287,6 +1380,12 @@
 
 	ieee80211_free_led_names(local);
 
+	for (band = 0; band < NUM_NL80211_BANDS; band++) {
+		if (!(local->sband_allocated & BIT(band)))
+			continue;
+		kfree(local->hw.wiphy->bands[band]);
+	}
+
 	wiphy_free(local->hw.wiphy);
 }
 EXPORT_SYMBOL(ieee80211_free_hw);
@@ -1304,18 +1403,12 @@
 	if (ret)
 		return ret;
 
-	ret = rc80211_minstrel_ht_init();
-	if (ret)
-		goto err_minstrel;
-
 	ret = ieee80211_iface_init();
 	if (ret)
 		goto err_netdev;
 
 	return 0;
  err_netdev:
-	rc80211_minstrel_ht_exit();
- err_minstrel:
 	rc80211_minstrel_exit();
 
 	return ret;
@@ -1323,7 +1416,6 @@
 
 static void __exit ieee80211_exit(void)
 {
-	rc80211_minstrel_ht_exit();
 	rc80211_minstrel_exit();
 
 	ieee80211s_stop();

diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index d51da26..d09b3c7 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c

@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008, 2009 open80211s Ltd.
+ * Copyright (C) 2018 - 2019 Intel Corporation
  * Authors:    Luis Carlos Cobo <luisca@cozybit.com>
  * 	       Javier Cardona <javier@cozybit.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/slab.h>
@@ -98,7 +96,9 @@
 	cfg80211_chandef_create(&sta_chan_def, sdata->vif.bss_conf.chandef.chan,
 				NL80211_CHAN_NO_HT);
 	ieee80211_chandef_ht_oper(ie->ht_operation, &sta_chan_def);
-	ieee80211_chandef_vht_oper(ie->vht_operation, &sta_chan_def);
+	ieee80211_chandef_vht_oper(&sdata->local->hw,
+				   ie->vht_operation, ie->ht_operation,
+				   &sta_chan_def);
 
 	if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chandef,
 					 &sta_chan_def))
@@ -251,6 +251,9 @@
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	u8 *pos, neighbors;
 	u8 meshconf_len = sizeof(struct ieee80211_meshconf_ie);
+	bool is_connected_to_gate = ifmsh->num_gates > 0 ||
+		ifmsh->mshcfg.dot11MeshGateAnnouncementProtocol ||
+		ifmsh->mshcfg.dot11MeshConnectedToMeshGate;
 
 	if (skb_tailroom(skb) < 2 + meshconf_len)
 		return -ENOMEM;
@@ -275,7 +278,7 @@
 	/* Mesh Formation Info - number of neighbors */
 	neighbors = atomic_read(&ifmsh->estab_plinks);
 	neighbors = min_t(int, neighbors, IEEE80211_MAX_MESH_PEERINGS);
-	*pos++ = neighbors << 1;
+	*pos++ = (neighbors << 1) | is_connected_to_gate;
 	/* Mesh capability */
 	*pos = 0x00;
 	*pos |= ifmsh->mshcfg.dot11MeshForwarding ?
@@ -529,6 +532,61 @@
 	return 0;
 }
 
+int mesh_add_he_cap_ie(struct ieee80211_sub_if_data *sdata,
+		       struct sk_buff *skb, u8 ie_len)
+{
+	const struct ieee80211_sta_he_cap *he_cap;
+	struct ieee80211_supported_band *sband;
+	u8 *pos;
+
+	sband = ieee80211_get_sband(sdata);
+	if (!sband)
+		return -EINVAL;
+
+	he_cap = ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT);
+
+	if (!he_cap ||
+	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
+	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
+	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+		return 0;
+
+	if (skb_tailroom(skb) < ie_len)
+		return -ENOMEM;
+
+	pos = skb_put(skb, ie_len);
+	ieee80211_ie_build_he_cap(pos, he_cap, pos + ie_len);
+
+	return 0;
+}
+
+int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata,
+			struct sk_buff *skb)
+{
+	const struct ieee80211_sta_he_cap *he_cap;
+	struct ieee80211_supported_band *sband;
+	u8 *pos;
+
+	sband = ieee80211_get_sband(sdata);
+	if (!sband)
+		return -EINVAL;
+
+	he_cap = ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT);
+	if (!he_cap ||
+	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
+	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
+	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+		return 0;
+
+	if (skb_tailroom(skb) < 2 + 1 + sizeof(struct ieee80211_he_operation))
+		return -ENOMEM;
+
+	pos = skb_put(skb, 2 + 1 + sizeof(struct ieee80211_he_operation));
+	ieee80211_ie_build_he_oper(pos);
+
+	return 0;
+}
+
 static void ieee80211_mesh_path_timer(struct timer_list *t)
 {
 	struct ieee80211_sub_if_data *sdata =
@@ -674,6 +732,7 @@
 	struct ieee80211_chanctx_conf *chanctx_conf;
 	struct mesh_csa_settings *csa;
 	enum nl80211_band band;
+	u8 ie_len_he_cap;
 	u8 *pos;
 	struct ieee80211_sub_if_data *sdata;
 	int hdr_len = offsetofend(struct ieee80211_mgmt, u.beacon);
@@ -684,6 +743,8 @@
 	band = chanctx_conf->def.chan->band;
 	rcu_read_unlock();
 
+	ie_len_he_cap = ieee80211_ie_len_he_cap(sdata,
+						NL80211_IFTYPE_MESH_POINT);
 	head_len = hdr_len +
 		   2 + /* NULL SSID */
 		   /* Channel Switch Announcement */
@@ -703,6 +764,8 @@
 		   2 + sizeof(__le16) + /* awake window */
 		   2 + sizeof(struct ieee80211_vht_cap) +
 		   2 + sizeof(struct ieee80211_vht_operation) +
+		   ie_len_he_cap +
+		   2 + 1 + sizeof(struct ieee80211_he_operation) +
 		   ifmsh->ie_len;
 
 	bcn = kzalloc(sizeof(*bcn) + head_len + tail_len, GFP_KERNEL);
@@ -820,6 +883,8 @@
 	    mesh_add_awake_window_ie(sdata, skb) ||
 	    mesh_add_vht_cap_ie(sdata, skb) ||
 	    mesh_add_vht_oper_ie(sdata, skb) ||
+	    mesh_add_he_cap_ie(sdata, skb, ie_len_he_cap) ||
+	    mesh_add_he_oper_ie(sdata, skb) ||
 	    mesh_add_vendor_ies(sdata, skb))
 		goto out_free;
 
@@ -923,6 +988,7 @@
 
 	/* flush STAs and mpaths on this iface */
 	sta_info_flush(sdata);
+	ieee80211_free_keys(sdata, true);
 	mesh_path_flush_by_iface(sdata);
 
 	/* stop the beacon */
@@ -1100,7 +1166,8 @@
 	if (baselen > len)
 		return;
 
-	ieee802_11_parse_elems(pos, len - baselen, false, &elems);
+	ieee802_11_parse_elems(pos, len - baselen, false, &elems, mgmt->bssid,
+			       NULL);
 
 	if (!elems.mesh_id)
 		return;
@@ -1164,7 +1231,7 @@
 		return;
 
 	ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
-			       false, &elems);
+			       false, &elems, mgmt->bssid, NULL);
 
 	/* ignore non-mesh or secure / unsecure mismatch */
 	if ((!elems.mesh_id || !elems.mesh_config) ||
@@ -1188,7 +1255,8 @@
 		if (!sdata->u.mesh.user_mpm ||
 		    sdata->u.mesh.mshcfg.rssi_threshold == 0 ||
 		    sdata->u.mesh.mshcfg.rssi_threshold < rx_status->signal)
-			mesh_neighbour_update(sdata, mgmt->sa, &elems);
+			mesh_neighbour_update(sdata, mgmt->sa, &elems,
+					      rx_status);
 	}
 
 	if (ifmsh->sync_ops)
@@ -1212,7 +1280,8 @@
 	ifmsh->chsw_ttl = 0;
 
 	/* Remove the CSA and MCSP elements from the beacon */
-	tmp_csa_settings = rcu_dereference(ifmsh->csa);
+	tmp_csa_settings = rcu_dereference_protected(ifmsh->csa,
+					    lockdep_is_held(&sdata->wdev.mtx));
 	RCU_INIT_POINTER(ifmsh->csa, NULL);
 	if (tmp_csa_settings)
 		kfree_rcu(tmp_csa_settings, rcu_head);
@@ -1234,6 +1303,8 @@
 	struct mesh_csa_settings *tmp_csa_settings;
 	int ret = 0;
 
+	lockdep_assert_held(&sdata->wdev.mtx);
+
 	tmp_csa_settings = kmalloc(sizeof(*tmp_csa_settings),
 				   GFP_ATOMIC);
 	if (!tmp_csa_settings)
@@ -1299,7 +1370,8 @@
 	pos = mgmt->u.action.u.chan_switch.variable;
 	baselen = offsetof(struct ieee80211_mgmt,
 			   u.action.u.chan_switch.variable);
-	ieee802_11_parse_elems(pos, len - baselen, true, &elems);
+	ieee802_11_parse_elems(pos, len - baselen, true, &elems,
+			       mgmt->bssid, NULL);
 
 	ifmsh->chsw_ttl = elems.mesh_chansw_params_ie->mesh_ttl;
 	if (!--ifmsh->chsw_ttl)

diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 2152663..953f720 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h

@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (c) 2008, 2009 open80211s Ltd.
  * Authors:    Luis Carlos Cobo <luisca@cozybit.com>
  *             Javier Cardona <javier@cozybit.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef IEEE80211S_H
@@ -70,6 +67,7 @@
  * @dst: mesh path destination mac address
  * @mpp: mesh proxy mac address
  * @rhash: rhashtable list pointer
+ * @walk_list: linked list containing all mesh_path objects.
  * @gate_list: list pointer for known gates list
  * @sdata: mesh subif
  * @next_hop: mesh neighbor to which frames for this destination will be
@@ -94,6 +92,7 @@
  * @last_preq_to_root: Timestamp of last PREQ sent to root
  * @is_root: the destination station of this path is a root node
  * @is_gate: the destination station of this path is a mesh gate
+ * @path_change_count: the number of path changes to destination
  *
  *
  * The dst address is unique in the mesh path table. Since the mesh_path is
@@ -105,6 +104,7 @@
 	u8 dst[ETH_ALEN];
 	u8 mpp[ETH_ALEN];	/* used for MPP or MAP */
 	struct rhash_head rhash;
+	struct hlist_node walk_list;
 	struct hlist_node gate_list;
 	struct ieee80211_sub_if_data *sdata;
 	struct sta_info __rcu *next_hop;
@@ -124,6 +124,7 @@
 	unsigned long last_preq_to_root;
 	bool is_root;
 	bool is_gate;
+	u32 path_change_count;
 };
 
 /**
@@ -133,12 +134,16 @@
  * gate's mpath may or may not be resolved and active.
  * @gates_lock: protects updates to known_gates
  * @rhead: the rhashtable containing struct mesh_paths, keyed by dest addr
+ * @walk_head: linked list containging all mesh_path objects
+ * @walk_lock: lock protecting walk_head
  * @entries: number of entries in the table
  */
 struct mesh_table {
 	struct hlist_head known_gates;
 	spinlock_t gates_lock;
 	struct rhashtable rhead;
+	struct hlist_head walk_head;
+	spinlock_t walk_lock;
 	atomic_t entries;		/* Up to MAX_MESH_NEIGHBOURS */
 };
 
@@ -213,6 +218,10 @@
 			struct sk_buff *skb);
 int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata,
 			 struct sk_buff *skb);
+int mesh_add_he_cap_ie(struct ieee80211_sub_if_data *sdata,
+		       struct sk_buff *skb, u8 ie_len);
+int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata,
+			struct sk_buff *skb);
 void mesh_rmc_free(struct ieee80211_sub_if_data *sdata);
 int mesh_rmc_init(struct ieee80211_sub_if_data *sdata);
 void ieee80211s_init(void);
@@ -270,10 +279,13 @@
 int mesh_path_add_gate(struct mesh_path *mpath);
 int mesh_path_send_to_gates(struct mesh_path *mpath);
 int mesh_gate_num(struct ieee80211_sub_if_data *sdata);
+u32 airtime_link_metric_get(struct ieee80211_local *local,
+			    struct sta_info *sta);
 
 /* Mesh plinks */
 void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
-			   u8 *hw_addr, struct ieee802_11_elems *ie);
+			   u8 *hw_addr, struct ieee802_11_elems *ie,
+			   struct ieee80211_rx_status *rx_status);
 bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie);
 u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata);
 void mesh_plink_timer(struct timer_list *t);

diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 6950cd0..68af623 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c

@@ -1,10 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008, 2009 open80211s Ltd.
+ * Copyright (C) 2019 Intel Corporation
  * Author:     Luis Carlos Cobo <luisca@cozybit.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/slab.h>
@@ -300,6 +298,7 @@
 {
 	struct ieee80211_tx_info *txinfo = st->info;
 	int failed;
+	struct rate_info rinfo;
 
 	failed = !(txinfo->flags & IEEE80211_TX_STAT_ACK);
 
@@ -310,12 +309,15 @@
 	if (ewma_mesh_fail_avg_read(&sta->mesh->fail_avg) >
 			LINK_FAIL_THRESH)
 		mesh_plink_broken(sta);
+
+	sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo);
+	ewma_mesh_tx_rate_avg_add(&sta->mesh->tx_rate_avg,
+				  cfg80211_calculate_bitrate(&rinfo));
 }
 
-static u32 airtime_link_metric_get(struct ieee80211_local *local,
-				   struct sta_info *sta)
+u32 airtime_link_metric_get(struct ieee80211_local *local,
+			    struct sta_info *sta)
 {
-	struct rate_info rinfo;
 	/* This should be adjusted for each device */
 	int device_constant = 1 << ARITH_SHIFT;
 	int test_frame_len = TEST_FRAME_LEN << ARITH_SHIFT;
@@ -339,8 +341,7 @@
 		if (fail_avg > LINK_FAIL_THRESH)
 			return MAX_METRIC;
 
-		sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo);
-		rate = cfg80211_calculate_bitrate(&rinfo);
+		rate = ewma_mesh_tx_rate_avg_read(&sta->mesh->tx_rate_avg);
 		if (WARN_ON(!rate))
 			return MAX_METRIC;
 
@@ -386,6 +387,7 @@
 	unsigned long orig_lifetime, exp_time;
 	u32 last_hop_metric, new_metric;
 	bool process = true;
+	u8 hopcount;
 
 	rcu_read_lock();
 	sta = sta_info_get(sdata, mgmt->sa);
@@ -404,6 +406,7 @@
 		orig_sn = PREQ_IE_ORIG_SN(hwmp_ie);
 		orig_lifetime = PREQ_IE_LIFETIME(hwmp_ie);
 		orig_metric = PREQ_IE_METRIC(hwmp_ie);
+		hopcount = PREQ_IE_HOPCOUNT(hwmp_ie) + 1;
 		break;
 	case MPATH_PREP:
 		/* Originator here refers to the MP that was the target in the
@@ -415,6 +418,7 @@
 		orig_sn = PREP_IE_TARGET_SN(hwmp_ie);
 		orig_lifetime = PREP_IE_LIFETIME(hwmp_ie);
 		orig_metric = PREP_IE_METRIC(hwmp_ie);
+		hopcount = PREP_IE_HOPCOUNT(hwmp_ie) + 1;
 		break;
 	default:
 		rcu_read_unlock();
@@ -441,7 +445,10 @@
 			    (mpath->flags & MESH_PATH_SN_VALID)) {
 				if (SN_GT(mpath->sn, orig_sn) ||
 				    (mpath->sn == orig_sn &&
-				     new_metric >= mpath->metric)) {
+				     (rcu_access_pointer(mpath->next_hop) !=
+						      sta ?
+					      mult_frac(new_metric, 10, 9) :
+					      new_metric) >= mpath->metric)) {
 					process = false;
 					fresh_info = false;
 				}
@@ -476,12 +483,15 @@
 		}
 
 		if (fresh_info) {
+			if (rcu_access_pointer(mpath->next_hop) != sta)
+				mpath->path_change_count++;
 			mesh_path_assign_nexthop(mpath, sta);
 			mpath->flags |= MESH_PATH_SN_VALID;
 			mpath->metric = new_metric;
 			mpath->sn = orig_sn;
 			mpath->exp_time = time_after(mpath->exp_time, exp_time)
 					  ?  mpath->exp_time : exp_time;
+			mpath->hop_count = hopcount;
 			mesh_path_activate(mpath);
 			spin_unlock_bh(&mpath->state_lock);
 			ewma_mesh_fail_avg_init(&sta->mesh->fail_avg);
@@ -506,8 +516,10 @@
 		if (mpath) {
 			spin_lock_bh(&mpath->state_lock);
 			if ((mpath->flags & MESH_PATH_FIXED) ||
-				((mpath->flags & MESH_PATH_ACTIVE) &&
-					(last_hop_metric > mpath->metric)))
+			    ((mpath->flags & MESH_PATH_ACTIVE) &&
+			     ((rcu_access_pointer(mpath->next_hop) != sta ?
+				       mult_frac(last_hop_metric, 10, 9) :
+				       last_hop_metric) > mpath->metric)))
 				fresh_info = false;
 		} else {
 			mpath = mesh_path_add(sdata, ta);
@@ -519,10 +531,13 @@
 		}
 
 		if (fresh_info) {
+			if (rcu_access_pointer(mpath->next_hop) != sta)
+				mpath->path_change_count++;
 			mesh_path_assign_nexthop(mpath, sta);
 			mpath->metric = last_hop_metric;
 			mpath->exp_time = time_after(mpath->exp_time, exp_time)
 					  ?  mpath->exp_time : exp_time;
+			mpath->hop_count = 1;
 			mesh_path_activate(mpath);
 			spin_unlock_bh(&mpath->state_lock);
 			ewma_mesh_fail_avg_init(&sta->mesh->fail_avg);
@@ -909,7 +924,7 @@
 
 	baselen = (u8 *) mgmt->u.action.u.mesh_action.variable - (u8 *) mgmt;
 	ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable,
-			       len - baselen, false, &elems);
+			       len - baselen, false, &elems, mgmt->bssid, NULL);
 
 	if (elems.preq) {
 		if (elems.preq_len != 37)
@@ -1112,16 +1127,17 @@
 	struct mesh_path *mpath;
 	struct sk_buff *skb_to_free = NULL;
 	u8 *target_addr = hdr->addr3;
-	int err = 0;
 
 	/* Nulls are only sent to peers for PS and should be pre-addressed */
 	if (ieee80211_is_qos_nullfunc(hdr->frame_control))
 		return 0;
 
-	rcu_read_lock();
-	err = mesh_nexthop_lookup(sdata, skb);
-	if (!err)
-		goto endlookup;
+	/* Allow injected packets to bypass mesh routing */
+	if (info->control.flags & IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP)
+		return 0;
+
+	if (!mesh_nexthop_lookup(sdata, skb))
+		return 0;
 
 	/* no nexthop found, start resolving */
 	mpath = mesh_path_lookup(sdata, target_addr);
@@ -1129,8 +1145,7 @@
 		mpath = mesh_path_add(sdata, target_addr);
 		if (IS_ERR(mpath)) {
 			mesh_path_discard_frame(sdata, skb);
-			err = PTR_ERR(mpath);
-			goto endlookup;
+			return PTR_ERR(mpath);
 		}
 	}
 
@@ -1143,13 +1158,10 @@
 	info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
 	ieee80211_set_qos_hdr(sdata, skb);
 	skb_queue_tail(&mpath->frame_queue, skb);
-	err = -ENOENT;
 	if (skb_to_free)
 		mesh_path_discard_frame(sdata, skb_to_free);
 
-endlookup:
-	rcu_read_unlock();
-	return err;
+	return -ENOENT;
 }
 
 /**
@@ -1169,13 +1181,10 @@
 	struct sta_info *next_hop;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	u8 *target_addr = hdr->addr3;
-	int err = -ENOENT;
 
-	rcu_read_lock();
 	mpath = mesh_path_lookup(sdata, target_addr);
-
 	if (!mpath || !(mpath->flags & MESH_PATH_ACTIVE))
-		goto endlookup;
+		return -ENOENT;
 
 	if (time_after(jiffies,
 		       mpath->exp_time -
@@ -1190,12 +1199,10 @@
 		memcpy(hdr->addr1, next_hop->sta.addr, ETH_ALEN);
 		memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
 		ieee80211_mps_set_frame_flags(sdata, next_hop, hdr);
-		err = 0;
+		return 0;
 	}
 
-endlookup:
-	rcu_read_unlock();
-	return err;
+	return -ENOENT;
 }
 
 void mesh_path_timer(struct timer_list *t)

diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index a512562..117519b 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008, 2009 open80211s Ltd.
  * Author:     Luis Carlos Cobo <luisca@cozybit.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/etherdevice.h>
@@ -23,7 +20,7 @@
 static u32 mesh_table_hash(const void *addr, u32 len, u32 seed)
 {
 	/* Use last four bytes of hw addr as hash index */
-	return jhash_1word(*(u32 *)(addr+2), seed);
+	return jhash_1word(__get_unaligned_cpu32((u8 *)addr + 2), seed);
 }
 
 static const struct rhashtable_params mesh_rht_params = {
@@ -59,8 +56,10 @@
 		return NULL;
 
 	INIT_HLIST_HEAD(&newtbl->known_gates);
+	INIT_HLIST_HEAD(&newtbl->walk_head);
 	atomic_set(&newtbl->entries,  0);
 	spin_lock_init(&newtbl->gates_lock);
+	spin_lock_init(&newtbl->walk_lock);
 
 	return newtbl;
 }
@@ -215,7 +214,7 @@
 {
 	struct mesh_path *mpath;
 
-	mpath = rhashtable_lookup_fast(&tbl->rhead, dst, mesh_rht_params);
+	mpath = rhashtable_lookup(&tbl->rhead, dst, mesh_rht_params);
 
 	if (mpath && mpath_expired(mpath)) {
 		spin_lock_bh(&mpath->state_lock);
@@ -249,28 +248,15 @@
 static struct mesh_path *
 __mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx)
 {
-	int i = 0, ret;
-	struct mesh_path *mpath = NULL;
-	struct rhashtable_iter iter;
+	int i = 0;
+	struct mesh_path *mpath;
 
-	ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC);
-	if (ret)
-		return NULL;
-
-	rhashtable_walk_start(&iter);
-
-	while ((mpath = rhashtable_walk_next(&iter))) {
-		if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
-			continue;
-		if (IS_ERR(mpath))
-			break;
+	hlist_for_each_entry_rcu(mpath, &tbl->walk_head, walk_list) {
 		if (i++ == idx)
 			break;
 	}
-	rhashtable_walk_stop(&iter);
-	rhashtable_walk_exit(&iter);
 
-	if (IS_ERR(mpath) || !mpath)
+	if (!mpath)
 		return NULL;
 
 	if (mpath_expired(mpath)) {
@@ -415,7 +401,6 @@
 {
 	struct mesh_table *tbl;
 	struct mesh_path *mpath, *new_mpath;
-	int ret;
 
 	if (ether_addr_equal(dst, sdata->vif.addr))
 		/* never add ourselves as neighbours */
@@ -432,29 +417,23 @@
 		return ERR_PTR(-ENOMEM);
 
 	tbl = sdata->u.mesh.mesh_paths;
-	do {
-		ret = rhashtable_lookup_insert_fast(&tbl->rhead,
-						    &new_mpath->rhash,
-						    mesh_rht_params);
+	spin_lock_bh(&tbl->walk_lock);
+	mpath = rhashtable_lookup_get_insert_fast(&tbl->rhead,
+						  &new_mpath->rhash,
+						  mesh_rht_params);
+	if (!mpath)
+		hlist_add_head(&new_mpath->walk_list, &tbl->walk_head);
+	spin_unlock_bh(&tbl->walk_lock);
 
-		if (ret == -EEXIST)
-			mpath = rhashtable_lookup_fast(&tbl->rhead,
-						       dst,
-						       mesh_rht_params);
-
-	} while (unlikely(ret == -EEXIST && !mpath));
-
-	if (ret && ret != -EEXIST)
-		return ERR_PTR(ret);
-
-	/* At this point either new_mpath was added, or we found a
-	 * matching entry already in the table; in the latter case
-	 * free the unnecessary new entry.
-	 */
-	if (ret == -EEXIST) {
+	if (mpath) {
 		kfree(new_mpath);
+
+		if (IS_ERR(mpath))
+			return mpath;
+
 		new_mpath = mpath;
 	}
+
 	sdata->u.mesh.mesh_paths_generation++;
 	return new_mpath;
 }
@@ -480,9 +459,17 @@
 
 	memcpy(new_mpath->mpp, mpp, ETH_ALEN);
 	tbl = sdata->u.mesh.mpp_paths;
+
+	spin_lock_bh(&tbl->walk_lock);
 	ret = rhashtable_lookup_insert_fast(&tbl->rhead,
 					    &new_mpath->rhash,
 					    mesh_rht_params);
+	if (!ret)
+		hlist_add_head_rcu(&new_mpath->walk_list, &tbl->walk_head);
+	spin_unlock_bh(&tbl->walk_lock);
+
+	if (ret)
+		kfree(new_mpath);
 
 	sdata->u.mesh.mpp_paths_generation++;
 	return ret;
@@ -503,20 +490,9 @@
 	struct mesh_table *tbl = sdata->u.mesh.mesh_paths;
 	static const u8 bcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 	struct mesh_path *mpath;
-	struct rhashtable_iter iter;
-	int ret;
 
-	ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC);
-	if (ret)
-		return;
-
-	rhashtable_walk_start(&iter);
-
-	while ((mpath = rhashtable_walk_next(&iter))) {
-		if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
-			continue;
-		if (IS_ERR(mpath))
-			break;
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(mpath, &tbl->walk_head, walk_list) {
 		if (rcu_access_pointer(mpath->next_hop) == sta &&
 		    mpath->flags & MESH_PATH_ACTIVE &&
 		    !(mpath->flags & MESH_PATH_FIXED)) {
@@ -530,8 +506,7 @@
 				WLAN_REASON_MESH_PATH_DEST_UNREACHABLE, bcast);
 		}
 	}
-	rhashtable_walk_stop(&iter);
-	rhashtable_walk_exit(&iter);
+	rcu_read_unlock();
 }
 
 static void mesh_path_free_rcu(struct mesh_table *tbl,
@@ -551,6 +526,7 @@
 
 static void __mesh_path_del(struct mesh_table *tbl, struct mesh_path *mpath)
 {
+	hlist_del_rcu(&mpath->walk_list);
 	rhashtable_remove_fast(&tbl->rhead, &mpath->rhash, mesh_rht_params);
 	mesh_path_free_rcu(tbl, mpath);
 }
@@ -571,27 +547,14 @@
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	struct mesh_table *tbl = sdata->u.mesh.mesh_paths;
 	struct mesh_path *mpath;
-	struct rhashtable_iter iter;
-	int ret;
+	struct hlist_node *n;
 
-	ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC);
-	if (ret)
-		return;
-
-	rhashtable_walk_start(&iter);
-
-	while ((mpath = rhashtable_walk_next(&iter))) {
-		if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
-			continue;
-		if (IS_ERR(mpath))
-			break;
-
+	spin_lock_bh(&tbl->walk_lock);
+	hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) {
 		if (rcu_access_pointer(mpath->next_hop) == sta)
 			__mesh_path_del(tbl, mpath);
 	}
-
-	rhashtable_walk_stop(&iter);
-	rhashtable_walk_exit(&iter);
+	spin_unlock_bh(&tbl->walk_lock);
 }
 
 static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata,
@@ -599,51 +562,26 @@
 {
 	struct mesh_table *tbl = sdata->u.mesh.mpp_paths;
 	struct mesh_path *mpath;
-	struct rhashtable_iter iter;
-	int ret;
+	struct hlist_node *n;
 
-	ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC);
-	if (ret)
-		return;
-
-	rhashtable_walk_start(&iter);
-
-	while ((mpath = rhashtable_walk_next(&iter))) {
-		if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
-			continue;
-		if (IS_ERR(mpath))
-			break;
-
+	spin_lock_bh(&tbl->walk_lock);
+	hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) {
 		if (ether_addr_equal(mpath->mpp, proxy))
 			__mesh_path_del(tbl, mpath);
 	}
-
-	rhashtable_walk_stop(&iter);
-	rhashtable_walk_exit(&iter);
+	spin_unlock_bh(&tbl->walk_lock);
 }
 
 static void table_flush_by_iface(struct mesh_table *tbl)
 {
 	struct mesh_path *mpath;
-	struct rhashtable_iter iter;
-	int ret;
+	struct hlist_node *n;
 
-	ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC);
-	if (ret)
-		return;
-
-	rhashtable_walk_start(&iter);
-
-	while ((mpath = rhashtable_walk_next(&iter))) {
-		if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
-			continue;
-		if (IS_ERR(mpath))
-			break;
+	spin_lock_bh(&tbl->walk_lock);
+	hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) {
 		__mesh_path_del(tbl, mpath);
 	}
-
-	rhashtable_walk_stop(&iter);
-	rhashtable_walk_exit(&iter);
+	spin_unlock_bh(&tbl->walk_lock);
 }
 
 /**
@@ -675,15 +613,15 @@
 {
 	struct mesh_path *mpath;
 
-	rcu_read_lock();
+	spin_lock_bh(&tbl->walk_lock);
 	mpath = rhashtable_lookup_fast(&tbl->rhead, addr, mesh_rht_params);
 	if (!mpath) {
-		rcu_read_unlock();
+		spin_unlock_bh(&tbl->walk_lock);
 		return -ENXIO;
 	}
 
 	__mesh_path_del(tbl, mpath);
-	rcu_read_unlock();
+	spin_unlock_bh(&tbl->walk_lock);
 	return 0;
 }
 
@@ -854,28 +792,16 @@
 			  struct mesh_table *tbl)
 {
 	struct mesh_path *mpath;
-	struct rhashtable_iter iter;
-	int ret;
+	struct hlist_node *n;
 
-	ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_KERNEL);
-	if (ret)
-		return;
-
-	rhashtable_walk_start(&iter);
-
-	while ((mpath = rhashtable_walk_next(&iter))) {
-		if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
-			continue;
-		if (IS_ERR(mpath))
-			break;
+	spin_lock_bh(&tbl->walk_lock);
+	hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) {
 		if ((!(mpath->flags & MESH_PATH_RESOLVING)) &&
 		    (!(mpath->flags & MESH_PATH_FIXED)) &&
 		     time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE))
 			__mesh_path_del(tbl, mpath);
 	}
-
-	rhashtable_walk_stop(&iter);
-	rhashtable_walk_exit(&iter);
+	spin_unlock_bh(&tbl->walk_lock);
 }
 
 void mesh_path_expire(struct ieee80211_sub_if_data *sdata)

diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 5b5b0f9..737c5f4 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c

@@ -1,10 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008, 2009 open80211s Ltd.
+ * Copyright (C) 2019 Intel Corporation
  * Author:     Luis Carlos Cobo <luisca@cozybit.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/gfp.h>
 #include <linux/kernel.h>
@@ -220,9 +218,12 @@
 	bool include_plid = false;
 	u16 peering_proto = 0;
 	u8 *pos, ie_len = 4;
+	u8 ie_len_he_cap;
 	int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.self_prot);
 	int err = -ENOMEM;
 
+	ie_len_he_cap = ieee80211_ie_len_he_cap(sdata,
+						NL80211_IFTYPE_MESH_POINT);
 	skb = dev_alloc_skb(local->tx_headroom +
 			    hdr_len +
 			    2 + /* capability info */
@@ -235,6 +236,8 @@
 			    2 + sizeof(struct ieee80211_ht_operation) +
 			    2 + sizeof(struct ieee80211_vht_cap) +
 			    2 + sizeof(struct ieee80211_vht_operation) +
+			    ie_len_he_cap +
+			    2 + 1 + sizeof(struct ieee80211_he_operation) +
 			    2 + 8 + /* peering IE */
 			    sdata->u.mesh.ie_len);
 	if (!skb)
@@ -323,7 +326,9 @@
 		if (mesh_add_ht_cap_ie(sdata, skb) ||
 		    mesh_add_ht_oper_ie(sdata, skb) ||
 		    mesh_add_vht_cap_ie(sdata, skb) ||
-		    mesh_add_vht_oper_ie(sdata, skb))
+		    mesh_add_vht_oper_ie(sdata, skb) ||
+		    mesh_add_he_cap_ie(sdata, skb, ie_len_he_cap) ||
+		    mesh_add_he_oper_ie(sdata, skb))
 			goto free;
 	}
 
@@ -435,6 +440,9 @@
 	ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
 					    elems->vht_cap_elem, sta);
 
+	ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, elems->he_cap,
+					  elems->he_cap_len, sta);
+
 	if (bw != sta->sta.bandwidth)
 		changed |= IEEE80211_RC_BW_CHANGED;
 
@@ -513,7 +521,8 @@
 
 static struct sta_info *
 mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *addr,
-		    struct ieee802_11_elems *elems)
+		    struct ieee802_11_elems *elems,
+		    struct ieee80211_rx_status *rx_status)
 {
 	struct sta_info *sta = NULL;
 
@@ -521,11 +530,17 @@
 	if (sdata->u.mesh.user_mpm ||
 	    sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED) {
 		if (mesh_peer_accepts_plinks(elems) &&
-		    mesh_plink_availables(sdata))
+		    mesh_plink_availables(sdata)) {
+			int sig = 0;
+
+			if (ieee80211_hw_check(&sdata->local->hw, SIGNAL_DBM))
+				sig = rx_status->signal;
+
 			cfg80211_notify_new_peer_candidate(sdata->dev, addr,
 							   elems->ie_start,
 							   elems->total_len,
-							   GFP_KERNEL);
+							   sig, GFP_KERNEL);
+		}
 	} else
 		sta = __mesh_sta_info_alloc(sdata, addr);
 
@@ -538,13 +553,15 @@
  * @sdata: local meshif
  * @addr: peer's address
  * @elems: IEs from beacon or mesh peering frame.
+ * @rx_status: rx status for the frame for signal reporting
  *
  * Return existing or newly allocated sta_info under RCU read lock.
  * (re)initialize with given IEs.
  */
 static struct sta_info *
 mesh_sta_info_get(struct ieee80211_sub_if_data *sdata,
-		  u8 *addr, struct ieee802_11_elems *elems) __acquires(RCU)
+		  u8 *addr, struct ieee802_11_elems *elems,
+		  struct ieee80211_rx_status *rx_status) __acquires(RCU)
 {
 	struct sta_info *sta = NULL;
 
@@ -555,7 +572,7 @@
 	} else {
 		rcu_read_unlock();
 		/* can't run atomic */
-		sta = mesh_sta_info_alloc(sdata, addr, elems);
+		sta = mesh_sta_info_alloc(sdata, addr, elems, rx_status);
 		if (!sta) {
 			rcu_read_lock();
 			return NULL;
@@ -576,20 +593,25 @@
  * @sdata: local meshif
  * @addr: peer's address
  * @elems: IEs from beacon or mesh peering frame
+ * @rx_status: rx status for the frame for signal reporting
  *
  * Initiates peering if appropriate.
  */
 void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
 			   u8 *hw_addr,
-			   struct ieee802_11_elems *elems)
+			   struct ieee802_11_elems *elems,
+			   struct ieee80211_rx_status *rx_status)
 {
 	struct sta_info *sta;
 	u32 changed = 0;
 
-	sta = mesh_sta_info_get(sdata, hw_addr, elems);
+	sta = mesh_sta_info_get(sdata, hw_addr, elems, rx_status);
 	if (!sta)
 		goto out;
 
+	sta->mesh->connected_to_gate = elems->mesh_config->meshconf_form &
+		IEEE80211_MESHCONF_FORM_CONNECTED_TO_GATE;
+
 	if (mesh_peer_accepts_plinks(elems) &&
 	    sta->mesh->plink_state == NL80211_PLINK_LISTEN &&
 	    sdata->u.mesh.accepting_plinks &&
@@ -1069,7 +1091,8 @@
 static void
 mesh_process_plink_frame(struct ieee80211_sub_if_data *sdata,
 			 struct ieee80211_mgmt *mgmt,
-			 struct ieee802_11_elems *elems)
+			 struct ieee802_11_elems *elems,
+			 struct ieee80211_rx_status *rx_status)
 {
 
 	struct sta_info *sta;
@@ -1134,7 +1157,7 @@
 	if (event == OPN_ACPT) {
 		rcu_read_unlock();
 		/* allocate sta entry if necessary and update info */
-		sta = mesh_sta_info_get(sdata, mgmt->sa, elems);
+		sta = mesh_sta_info_get(sdata, mgmt->sa, elems, rx_status);
 		if (!sta) {
 			mpl_dbg(sdata, "Mesh plink: failed to init peer!\n");
 			goto unlock_rcu;
@@ -1199,6 +1222,7 @@
 		if (baselen > len)
 			return;
 	}
-	ieee802_11_parse_elems(baseaddr, len - baselen, true, &elems);
-	mesh_process_plink_frame(sdata, mgmt, &elems);
+	ieee802_11_parse_elems(baseaddr, len - baselen, true, &elems,
+			       mgmt->bssid, NULL);
+	mesh_process_plink_frame(sdata, mgmt, &elems, rx_status);
 }

diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c
index d8cd914..031e905 100644
--- a/net/mac80211/mesh_ps.c
+++ b/net/mac80211/mesh_ps.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2012-2013, Marco Porsch <marco.porsch@s2005.tu-chemnitz.de>
  * Copyright 2012-2013, cozybit Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include "mesh.h"

diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c
index a435f09..fde93de 100644
--- a/net/mac80211/mesh_sync.c
+++ b/net/mac80211/mesh_sync.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2011-2012, Pavel Zubarev <pavel.zubarev@gmail.com>
  * Copyright 2011-2012, Marco Porsch <marco.porsch@s2005.tu-chemnitz.de>
  * Copyright 2011-2012, cozybit Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include "ieee80211_i.h"

diff --git a/net/mac80211/michael.c b/net/mac80211/michael.c
index 37e1727..a57502d 100644
--- a/net/mac80211/michael.c
+++ b/net/mac80211/michael.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Michael MIC implementation - optimized for TKIP MIC operations
  * Copyright 2002-2003, Instant802 Networks, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/types.h>
 #include <linux/bitops.h>

diff --git a/net/mac80211/michael.h b/net/mac80211/michael.h
index 0e4886f..a7fdb8e 100644
--- a/net/mac80211/michael.h
+++ b/net/mac80211/michael.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Michael MIC implementation - optimized for TKIP MIC operations
  * Copyright 2002-2003, Instant802 Networks, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef MICHAEL_H

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 3dbecae..54dd884 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * BSS client mode implementation
  * Copyright 2003-2008, Jouni Malinen <j@w1.fi>
@@ -7,14 +8,11 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018        Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Copyright (C) 2018 - 2019 Intel Corporation
  */
 
 #include <linux/delay.h>
+#include <linux/fips.h>
 #include <linux/if_ether.h>
 #include <linux/skbuff.h>
 #include <linux/if_arp.h>
@@ -160,10 +158,10 @@
 	memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap));
 	ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
 
+	memset(chandef, 0, sizeof(struct cfg80211_chan_def));
 	chandef->chan = channel;
 	chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
 	chandef->center_freq1 = channel->center_freq;
-	chandef->center_freq2 = 0;
 
 	if (!ht_oper || !sta_ht_cap.ht_supported) {
 		ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
@@ -220,7 +218,8 @@
 		memcpy(&he_oper_vht_cap, he_oper->optional, 3);
 		he_oper_vht_cap.basic_mcs_set = cpu_to_le16(0);
 
-		if (!ieee80211_chandef_vht_oper(&he_oper_vht_cap,
+		if (!ieee80211_chandef_vht_oper(&sdata->local->hw,
+						&he_oper_vht_cap, ht_oper,
 						&vht_chandef)) {
 			if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
 				sdata_info(sdata,
@@ -228,7 +227,8 @@
 			ret = IEEE80211_STA_DISABLE_HE;
 			goto out;
 		}
-	} else if (!ieee80211_chandef_vht_oper(vht_oper, &vht_chandef)) {
+	} else if (!ieee80211_chandef_vht_oper(&sdata->local->hw, vht_oper,
+					       ht_oper, &vht_chandef)) {
 		if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
 			sdata_info(sdata,
 				   "AP VHT information is invalid, disable VHT\n");
@@ -642,7 +642,7 @@
 	struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
-	u8 *pos, qos_info;
+	u8 *pos, qos_info, *ie_start;
 	size_t offset = 0, noffset;
 	int i, count, rates_len, supp_rates_len, shift;
 	u16 capab;
@@ -750,6 +750,7 @@
 
 	/* SSID */
 	pos = skb_put(skb, 2 + assoc_data->ssid_len);
+	ie_start = pos;
 	*pos++ = WLAN_EID_SSID;
 	*pos++ = assoc_data->ssid_len;
 	memcpy(pos, assoc_data->ssid, assoc_data->ssid_len);
@@ -811,6 +812,21 @@
 		}
 	}
 
+	/* Set MBSSID support for HE AP if needed */
+	if (ieee80211_hw_check(&local->hw, SUPPORTS_ONLY_HE_MULTI_BSSID) &&
+	    !(ifmgd->flags & IEEE80211_STA_DISABLE_HE) && assoc_data->ie_len) {
+		struct element *elem;
+
+		/* we know it's writable, cast away the const */
+		elem = (void *)cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY,
+						  assoc_data->ie,
+						  assoc_data->ie_len);
+
+		/* We can probably assume both always true */
+		if (elem && elem->datalen >= 3)
+			elem->data[2] |= WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT;
+	}
+
 	/* if present, add any custom IEs that go before HT */
 	if (assoc_data->ie_len) {
 		static const u8 before_ht[] = {
@@ -914,6 +930,15 @@
 		ieee80211_add_vht_ie(sdata, skb, sband,
 				     &assoc_data->ap_vht_cap);
 
+	/*
+	 * If AP doesn't support HT, mark HE as disabled.
+	 * If on the 5GHz band, make sure it supports VHT.
+	 */
+	if (ifmgd->flags & IEEE80211_STA_DISABLE_HT ||
+	    (sband->band == NL80211_BAND_5GHZ &&
+	     ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
+		ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
+
 	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
 		ieee80211_add_he_ie(sdata, skb, sband);
 
@@ -950,6 +975,11 @@
 		return;
 	}
 
+	pos = skb_tail_pointer(skb);
+	kfree(ifmgd->assoc_req_ies);
+	ifmgd->assoc_req_ies = kmemdup(ie_start, pos - ie_start, GFP_ATOMIC);
+	ifmgd->assoc_req_ies_len = pos - ie_start;
+
 	drv_mgd_prepare_tx(local, sdata, 0);
 
 	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
@@ -1156,9 +1186,6 @@
 		goto out;
 	}
 
-	/* XXX: shouldn't really modify cfg80211-owned data! */
-	ifmgd->associated->channel = sdata->csa_chandef.chan;
-
 	ifmgd->csa_waiting_bcn = true;
 
 	ieee80211_sta_reset_beacon_monitor(sdata);
@@ -1227,6 +1254,32 @@
 }
 
 static void
+ieee80211_sta_abort_chanswitch(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_local *local = sdata->local;
+
+	if (!local->ops->abort_channel_switch)
+		return;
+
+	mutex_lock(&local->mtx);
+
+	mutex_lock(&local->chanctx_mtx);
+	ieee80211_vif_unreserve_chanctx(sdata);
+	mutex_unlock(&local->chanctx_mtx);
+
+	if (sdata->csa_block_tx)
+		ieee80211_wake_vif_queues(local, sdata,
+					  IEEE80211_QUEUE_STOP_REASON_CSA);
+
+	sdata->csa_block_tx = false;
+	sdata->vif.csa_active = false;
+
+	mutex_unlock(&local->mtx);
+
+	drv_abort_channel_switch(sdata);
+}
+
+static void
 ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 				 u64 timestamp, u32 device_timestamp,
 				 struct ieee802_11_elems *elems,
@@ -1250,19 +1303,36 @@
 	if (local->scanning)
 		return;
 
-	/* disregard subsequent announcements if we are already processing */
-	if (sdata->vif.csa_active)
-		return;
-
 	current_band = cbss->channel->band;
 	res = ieee80211_parse_ch_switch_ie(sdata, elems, current_band,
 					   ifmgd->flags,
 					   ifmgd->associated->bssid, &csa_ie);
-	if (res	< 0)
+
+	if (!res) {
+		ch_switch.timestamp = timestamp;
+		ch_switch.device_timestamp = device_timestamp;
+		ch_switch.block_tx =  beacon ? csa_ie.mode : 0;
+		ch_switch.chandef = csa_ie.chandef;
+		ch_switch.count = csa_ie.count;
+		ch_switch.delay = csa_ie.max_switch_time;
+	}
+
+	if (res < 0) {
 		ieee80211_queue_work(&local->hw,
 				     &ifmgd->csa_connection_drop_work);
-	if (res)
 		return;
+	}
+
+	if (beacon && sdata->vif.csa_active && !ifmgd->csa_waiting_bcn) {
+		if (res)
+			ieee80211_sta_abort_chanswitch(sdata);
+		else
+			drv_channel_switch_rx_beacon(sdata, &ch_switch);
+		return;
+	} else if (sdata->vif.csa_active || res) {
+		/* disregard subsequent announcements if already processing */
+		return;
+	}
 
 	if (!cfg80211_chandef_usable(local->hw.wiphy, &csa_ie.chandef,
 				     IEEE80211_CHAN_DISABLED)) {
@@ -1278,7 +1348,8 @@
 	}
 
 	if (cfg80211_chandef_identical(&csa_ie.chandef,
-				       &sdata->vif.bss_conf.chandef)) {
+				       &sdata->vif.bss_conf.chandef) &&
+	    (!csa_ie.mode || !beacon)) {
 		if (ifmgd->csa_ignored_same_chan)
 			return;
 		sdata_info(sdata,
@@ -1315,12 +1386,6 @@
 		goto drop_connection;
 	}
 
-	ch_switch.timestamp = timestamp;
-	ch_switch.device_timestamp = device_timestamp;
-	ch_switch.block_tx = csa_ie.mode;
-	ch_switch.chandef = csa_ie.chandef;
-	ch_switch.count = csa_ie.count;
-
 	if (drv_pre_channel_switch(sdata, &ch_switch)) {
 		sdata_info(sdata,
 			   "preparing for channel switch failed, disconnecting\n");
@@ -1339,7 +1404,7 @@
 
 	sdata->vif.csa_active = true;
 	sdata->csa_chandef = csa_ie.chandef;
-	sdata->csa_block_tx = csa_ie.mode;
+	sdata->csa_block_tx = ch_switch.block_tx;
 	ifmgd->csa_ignored_same_chan = false;
 
 	if (sdata->csa_block_tx)
@@ -1373,7 +1438,7 @@
 	 * reset when the disconnection worker runs.
 	 */
 	sdata->vif.csa_active = true;
-	sdata->csa_block_tx = csa_ie.mode;
+	sdata->csa_block_tx = ch_switch.block_tx;
 
 	ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work);
 	mutex_unlock(&local->chanctx_mtx);
@@ -1867,7 +1932,7 @@
 	struct ieee80211_tx_queue_params params[IEEE80211_NUM_ACS];
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	size_t left;
-	int count, ac;
+	int count, mu_edca_count, ac;
 	const u8 *pos;
 	u8 uapsd_queues = 0;
 
@@ -1887,9 +1952,16 @@
 		uapsd_queues = ifmgd->uapsd_queues;
 
 	count = wmm_param[6] & 0x0f;
-	if (count == ifmgd->wmm_last_param_set)
+	/* -1 is the initial value of ifmgd->mu_edca_last_param_set.
+	 * if mu_edca was preset before and now it disappeared tell
+	 * the driver about it.
+	 */
+	mu_edca_count = mu_edca ? mu_edca->mu_qos_info & 0x0f : -1;
+	if (count == ifmgd->wmm_last_param_set &&
+	    mu_edca_count == ifmgd->mu_edca_last_param_set)
 		return false;
 	ifmgd->wmm_last_param_set = count;
+	ifmgd->mu_edca_last_param_set = mu_edca_count;
 
 	pos = wmm_param + 8;
 	left = wmm_param_len - 8;
@@ -1970,6 +2042,16 @@
 		ieee80211_regulatory_limit_wmm_params(sdata, &params[ac], ac);
 	}
 
+	/* WMM specification requires all 4 ACIs. */
+	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+		if (params[ac].cw_min == 0) {
+			sdata_info(sdata,
+				   "AP has invalid WMM params (missing AC %d), using defaults\n",
+				   ac);
+			return false;
+		}
+	}
+
 	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
 		mlme_dbg(sdata,
 			 "WMM AC=%d acm=%d aifs=%d cWmin=%d cWmax=%d txop=%d uapsd=%d, downgraded=%d\n",
@@ -2196,8 +2278,9 @@
 		    !ifmgd->have_beacon)
 			drv_mgd_prepare_tx(sdata->local, sdata, 0);
 
-		ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid, stype,
-					       reason, tx, frame_buf);
+		ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid,
+					       ifmgd->bssid, stype, reason,
+					       tx, frame_buf);
 	}
 
 	/* flush out frame - make sure the deauth was actually sent */
@@ -2440,7 +2523,10 @@
 
 	if (ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) {
 		ifmgd->nullfunc_failed = false;
-		ieee80211_send_nullfunc(sdata->local, sdata, false);
+		if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
+			ifmgd->probe_send_count--;
+		else
+			ieee80211_send_nullfunc(sdata->local, sdata, false);
 	} else {
 		int ssid_len;
 
@@ -2547,7 +2633,8 @@
 
 	rcu_read_lock();
 	ssid = ieee80211_bss_get_ie(cbss, WLAN_EID_SSID);
-	if (WARN_ON_ONCE(ssid == NULL))
+	if (WARN_ONCE(!ssid || ssid[1] > IEEE80211_MAX_SSID_LEN,
+		      "invalid SSID element (len=%d)", ssid ? ssid[1] : -1))
 		ssid_len = 0;
 	else
 		ssid_len = ssid[1];
@@ -2744,7 +2831,8 @@
 	u32 tx_flags = 0;
 
 	pos = mgmt->u.auth.variable;
-	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems);
+	ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, &elems,
+			       mgmt->bssid, auth_data->bss->bssid);
 	if (!elems.challenge)
 		return;
 	auth_data->expected_transaction = 4;
@@ -2759,13 +2847,44 @@
 			    auth_data->key_idx, tx_flags);
 }
 
+static bool ieee80211_mark_sta_auth(struct ieee80211_sub_if_data *sdata,
+				    const u8 *bssid)
+{
+	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+	struct sta_info *sta;
+	bool result = true;
+
+	sdata_info(sdata, "authenticated\n");
+	ifmgd->auth_data->done = true;
+	ifmgd->auth_data->timeout = jiffies + IEEE80211_AUTH_WAIT_ASSOC;
+	ifmgd->auth_data->timeout_started = true;
+	run_again(sdata, ifmgd->auth_data->timeout);
+
+	/* move station state to auth */
+	mutex_lock(&sdata->local->sta_mtx);
+	sta = sta_info_get(sdata, bssid);
+	if (!sta) {
+		WARN_ONCE(1, "%s: STA %pM not found", sdata->name, bssid);
+		result = false;
+		goto out;
+	}
+	if (sta_info_move_state(sta, IEEE80211_STA_AUTH)) {
+		sdata_info(sdata, "failed moving %pM to auth\n", bssid);
+		result = false;
+		goto out;
+	}
+
+out:
+	mutex_unlock(&sdata->local->sta_mtx);
+	return result;
+}
+
 static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 				   struct ieee80211_mgmt *mgmt, size_t len)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	u8 bssid[ETH_ALEN];
 	u16 auth_alg, auth_transaction, status_code;
-	struct sta_info *sta;
 	struct ieee80211_event event = {
 		.type = MLME_EVENT,
 		.u.mlme.data = AUTH_EVENT,
@@ -2789,7 +2908,11 @@
 	status_code = le16_to_cpu(mgmt->u.auth.status_code);
 
 	if (auth_alg != ifmgd->auth_data->algorithm ||
-	    auth_transaction != ifmgd->auth_data->expected_transaction) {
+	    (auth_alg != WLAN_AUTH_SAE &&
+	     auth_transaction != ifmgd->auth_data->expected_transaction) ||
+	    (auth_alg == WLAN_AUTH_SAE &&
+	     (auth_transaction < ifmgd->auth_data->expected_transaction ||
+	      auth_transaction > 2))) {
 		sdata_info(sdata, "%pM unexpected authentication state: alg %d (expected %d) transact %d (expected %d)\n",
 			   mgmt->sa, auth_alg, ifmgd->auth_data->algorithm,
 			   auth_transaction,
@@ -2832,35 +2955,17 @@
 
 	event.u.mlme.status = MLME_SUCCESS;
 	drv_event_callback(sdata->local, sdata, &event);
-	sdata_info(sdata, "authenticated\n");
-	ifmgd->auth_data->done = true;
-	ifmgd->auth_data->timeout = jiffies + IEEE80211_AUTH_WAIT_ASSOC;
-	ifmgd->auth_data->timeout_started = true;
-	run_again(sdata, ifmgd->auth_data->timeout);
-
-	if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE &&
-	    ifmgd->auth_data->expected_transaction != 2) {
-		/*
-		 * Report auth frame to user space for processing since another
-		 * round of Authentication frames is still needed.
-		 */
-		cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
-		return;
+	if (ifmgd->auth_data->algorithm != WLAN_AUTH_SAE ||
+	    (auth_transaction == 2 &&
+	     ifmgd->auth_data->expected_transaction == 2)) {
+		if (!ieee80211_mark_sta_auth(sdata, bssid))
+			goto out_err;
+	} else if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE &&
+		   auth_transaction == 2) {
+		sdata_info(sdata, "SAE peer confirmed\n");
+		ifmgd->auth_data->peer_confirmed = true;
 	}
 
-	/* move station state to auth */
-	mutex_lock(&sdata->local->sta_mtx);
-	sta = sta_info_get(sdata, bssid);
-	if (!sta) {
-		WARN_ONCE(1, "%s: STA %pM not found", sdata->name, bssid);
-		goto out_err;
-	}
-	if (sta_info_move_state(sta, IEEE80211_STA_AUTH)) {
-		sdata_info(sdata, "failed moving %pM to auth\n", bssid);
-		goto out_err;
-	}
-	mutex_unlock(&sdata->local->sta_mtx);
-
 	cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
 	return;
  out_err:
@@ -2871,7 +2976,7 @@
 #define case_WLAN(type) \
 	case WLAN_REASON_##type: return #type
 
-static const char *ieee80211_get_reason_code_string(u16 reason_code)
+const char *ieee80211_get_reason_code_string(u16 reason_code)
 {
 	switch (reason_code) {
 	case_WLAN(UNSPECIFIED);
@@ -2936,6 +3041,11 @@
 	if (len < 24 + 2)
 		return;
 
+	if (!ether_addr_equal(mgmt->bssid, mgmt->sa)) {
+		ieee80211_tdls_handle_disconnect(sdata, mgmt->sa, reason_code);
+		return;
+	}
+
 	if (ifmgd->associated &&
 	    ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) {
 		const u8 *bssid = ifmgd->associated->bssid;
@@ -2985,6 +3095,11 @@
 
 	reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code);
 
+	if (!ether_addr_equal(mgmt->bssid, mgmt->sa)) {
+		ieee80211_tdls_handle_disconnect(sdata, mgmt->sa, reason_code);
+		return;
+	}
+
 	sdata_info(sdata, "disassociated from %pM (Reason: %u=%s)\n",
 		   mgmt->sa, reason_code,
 		   ieee80211_get_reason_code_string(reason_code));
@@ -3043,6 +3158,32 @@
 	}
 }
 
+static bool ieee80211_twt_req_supported(const struct sta_info *sta,
+					const struct ieee802_11_elems *elems)
+{
+	if (elems->ext_capab_len < 10)
+		return false;
+
+	if (!(elems->ext_capab[9] & WLAN_EXT_CAPA10_TWT_RESPONDER_SUPPORT))
+		return false;
+
+	return sta->sta.he_cap.he_cap_elem.mac_cap_info[0] &
+		IEEE80211_HE_MAC_CAP0_TWT_RES;
+}
+
+static int ieee80211_recalc_twt_req(struct ieee80211_sub_if_data *sdata,
+				    struct sta_info *sta,
+				    struct ieee802_11_elems *elems)
+{
+	bool twt = ieee80211_twt_req_supported(sta, elems);
+
+	if (sdata->vif.bss_conf.twt_requester != twt) {
+		sdata->vif.bss_conf.twt_requester = twt;
+		return BSS_CHANGED_TWT;
+	}
+	return 0;
+}
+
 static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 				    struct cfg80211_bss *cbss,
 				    struct ieee80211_mgmt *mgmt, size_t len)
@@ -3082,7 +3223,8 @@
 	}
 
 	pos = mgmt->u.assoc_resp.variable;
-	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems);
+	ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, &elems,
+			       mgmt->bssid, assoc_data->bss->bssid);
 
 	if (!elems.supp_rates) {
 		sdata_info(sdata, "no SuppRates element in AssocResp\n");
@@ -3119,7 +3261,9 @@
 			return false;
 
 		ieee802_11_parse_elems(bss_ies->data, bss_ies->len,
-				       false, &bss_elems);
+				       false, &bss_elems,
+				       mgmt->bssid,
+				       assoc_data->bss->bssid);
 		if (assoc_data->wmm &&
 		    !elems.wmm_param && bss_elems.wmm_param) {
 			elems.wmm_param = bss_elems.wmm_param;
@@ -3196,16 +3340,6 @@
 		goto out;
 	}
 
-	/*
-	 * If AP doesn't support HT, or it doesn't have HE mandatory IEs, mark
-	 * HE as disabled. If on the 5GHz band, make sure it supports VHT.
-	 */
-	if (ifmgd->flags & IEEE80211_STA_DISABLE_HT ||
-	    (sband->band == NL80211_BAND_5GHZ &&
-	     ifmgd->flags & IEEE80211_STA_DISABLE_VHT) ||
-	    (!elems.he_cap && !elems.he_operation))
-		ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
-
 	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE) &&
 	    (!elems.he_cap || !elems.he_operation)) {
 		mutex_unlock(&sdata->local->sta_mtx);
@@ -3232,24 +3366,23 @@
 						  sta);
 
 		bss_conf->he_support = sta->sta.he_cap.has_he;
+		changed |= ieee80211_recalc_twt_req(sdata, sta, &elems);
 	} else {
 		bss_conf->he_support = false;
+		bss_conf->twt_requester = false;
 	}
 
 	if (bss_conf->he_support) {
-		u32 he_oper_params =
-			le32_to_cpu(elems.he_operation->he_oper_params);
+		bss_conf->bss_color =
+			le32_get_bits(elems.he_operation->he_oper_params,
+				      IEEE80211_HE_OPERATION_BSS_COLOR_MASK);
 
-		bss_conf->bss_color = he_oper_params &
-				      IEEE80211_HE_OPERATION_BSS_COLOR_MASK;
 		bss_conf->htc_trig_based_pkt_ext =
-			(he_oper_params &
-			 IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK) <<
-			IEEE80211_HE_OPERATION_DFLT_PE_DURATION_OFFSET;
+			le32_get_bits(elems.he_operation->he_oper_params,
+			      IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK);
 		bss_conf->frame_time_rts_th =
-			(he_oper_params &
-			 IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK) <<
-			IEEE80211_HE_OPERATION_RTS_THRESHOLD_OFFSET;
+			le32_get_bits(elems.he_operation->he_oper_params,
+			      IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK);
 
 		bss_conf->multi_sta_back_32bit =
 			sta->sta.he_cap.he_cap_elem.mac_cap_info[2] &
@@ -3263,9 +3396,19 @@
 		if (elems.uora_element)
 			bss_conf->uora_ocw_range = elems.uora_element[0];
 
+		ieee80211_he_op_ie_to_bss_conf(&sdata->vif, elems.he_operation);
+		ieee80211_he_spr_ie_to_bss_conf(&sdata->vif, elems.he_spr);
 		/* TODO: OPEN: what happens if BSS color disable is set? */
 	}
 
+	if (cbss->transmitted_bss) {
+		bss_conf->nontransmitted = true;
+		ether_addr_copy(bss_conf->transmitter_bssid,
+				cbss->transmitted_bss->bssid);
+		bss_conf->bssid_indicator = cbss->max_bssid_indicator;
+		bss_conf->bssid_index = cbss->bssid_index;
+	}
+
 	/*
 	 * Some APs, e.g. Netgear WNDR3700, report invalid HT operation data
 	 * in their association response, so ignore that data for our own
@@ -3321,6 +3464,7 @@
 	 * 4-bit value.
 	 */
 	ifmgd->wmm_last_param_set = -1;
+	ifmgd->mu_edca_last_param_set = -1;
 
 	if (ifmgd->flags & IEEE80211_STA_DISABLE_WMM) {
 		ieee80211_set_wmm_default(sdata, false, false);
@@ -3425,7 +3569,8 @@
 		return;
 
 	pos = mgmt->u.assoc_resp.variable;
-	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems);
+	ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, &elems,
+			       mgmt->bssid, assoc_data->bss->bssid);
 
 	if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY &&
 	    elems.timeout_int &&
@@ -3477,13 +3622,13 @@
 				uapsd_queues |= ieee80211_ac_to_qos_mask[ac];
 	}
 
-	cfg80211_rx_assoc_resp(sdata->dev, bss, (u8 *)mgmt, len, uapsd_queues);
+	cfg80211_rx_assoc_resp(sdata->dev, bss, (u8 *)mgmt, len, uapsd_queues,
+			       ifmgd->assoc_req_ies, ifmgd->assoc_req_ies_len);
 }
 
 static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 				  struct ieee80211_mgmt *mgmt, size_t len,
-				  struct ieee80211_rx_status *rx_status,
-				  struct ieee802_11_elems *elems)
+				  struct ieee80211_rx_status *rx_status)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_bss *bss;
@@ -3495,8 +3640,7 @@
 	if (!channel)
 		return;
 
-	bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems,
-					channel);
+	bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, channel);
 	if (bss) {
 		sdata->vif.bss_conf.beacon_rate = bss->beacon_rate;
 		ieee80211_rx_bss_put(local, bss);
@@ -3511,7 +3655,6 @@
 	struct ieee80211_if_managed *ifmgd;
 	struct ieee80211_rx_status *rx_status = (void *) skb->cb;
 	size_t baselen, len = skb->len;
-	struct ieee802_11_elems elems;
 
 	ifmgd = &sdata->u.mgd;
 
@@ -3524,10 +3667,7 @@
 	if (baselen > len)
 		return;
 
-	ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
-			       false, &elems);
-
-	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
+	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status);
 
 	if (ifmgd->associated &&
 	    ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid))
@@ -3654,6 +3794,16 @@
 	}
 }
 
+static bool ieee80211_rx_our_beacon(const u8 *tx_bssid,
+				    struct cfg80211_bss *bss)
+{
+	if (ether_addr_equal(tx_bssid, bss->bssid))
+		return true;
+	if (!bss->transmitted_bss)
+		return false;
+	return ether_addr_equal(tx_bssid, bss->transmitted_bss->bssid);
+}
+
 static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_mgmt *mgmt, size_t len,
 				     struct ieee80211_rx_status *rx_status)
@@ -3695,15 +3845,16 @@
 	rcu_read_unlock();
 
 	if (ifmgd->assoc_data && ifmgd->assoc_data->need_beacon &&
-	    ether_addr_equal(mgmt->bssid, ifmgd->assoc_data->bss->bssid)) {
+	    ieee80211_rx_our_beacon(mgmt->bssid, ifmgd->assoc_data->bss)) {
 		ieee802_11_parse_elems(mgmt->u.beacon.variable,
-				       len - baselen, false, &elems);
+				       len - baselen, false, &elems,
+				       mgmt->bssid,
+				       ifmgd->assoc_data->bss->bssid);
 
-		ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
-		if (elems.tim && !elems.parse_error) {
-			const struct ieee80211_tim_ie *tim_ie = elems.tim;
-			ifmgd->dtim_period = tim_ie->dtim_period;
-		}
+		ieee80211_rx_bss_info(sdata, mgmt, len, rx_status);
+
+		if (elems.dtim_period)
+			ifmgd->dtim_period = elems.dtim_period;
 		ifmgd->have_beacon = true;
 		ifmgd->assoc_data->need_beacon = false;
 		if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) {
@@ -3711,12 +3862,17 @@
 				le64_to_cpu(mgmt->u.beacon.timestamp);
 			sdata->vif.bss_conf.sync_device_ts =
 				rx_status->device_timestamp;
-			if (elems.tim)
-				sdata->vif.bss_conf.sync_dtim_count =
-					elems.tim->dtim_count;
-			else
-				sdata->vif.bss_conf.sync_dtim_count = 0;
+			sdata->vif.bss_conf.sync_dtim_count = elems.dtim_count;
 		}
+
+		if (elems.mbssid_config_ie)
+			bss_conf->profile_periodicity =
+				elems.mbssid_config_ie->profile_periodicity;
+
+		if (elems.ext_capab_len >= 11 &&
+		    (elems.ext_capab[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
+			bss_conf->ema_ap = true;
+
 		/* continue assoc process */
 		ifmgd->assoc_data->timeout = jiffies;
 		ifmgd->assoc_data->timeout_started = true;
@@ -3725,7 +3881,7 @@
 	}
 
 	if (!ifmgd->associated ||
-	    !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid))
+	    !ieee80211_rx_our_beacon(mgmt->bssid,  ifmgd->associated))
 		return;
 	bssid = ifmgd->associated->bssid;
 
@@ -3748,7 +3904,8 @@
 	ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4);
 	ncrc = ieee802_11_parse_elems_crc(mgmt->u.beacon.variable,
 					  len - baselen, false, &elems,
-					  care_about_ies, ncrc);
+					  care_about_ies, ncrc,
+					  mgmt->bssid, bssid);
 
 	if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) &&
 	    ieee80211_check_tim(elems.tim, elems.tim_len, ifmgd->aid)) {
@@ -3820,11 +3977,7 @@
 			le64_to_cpu(mgmt->u.beacon.timestamp);
 		sdata->vif.bss_conf.sync_device_ts =
 			rx_status->device_timestamp;
-		if (elems.tim)
-			sdata->vif.bss_conf.sync_dtim_count =
-				elems.tim->dtim_count;
-		else
-			sdata->vif.bss_conf.sync_dtim_count = 0;
+		sdata->vif.bss_conf.sync_dtim_count = elems.dtim_count;
 	}
 
 	if (ncrc == ifmgd->beacon_crc && ifmgd->beacon_crc_valid)
@@ -3832,7 +3985,7 @@
 	ifmgd->beacon_crc = ncrc;
 	ifmgd->beacon_crc_valid = true;
 
-	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
+	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status);
 
 	ieee80211_sta_process_chanswitch(sdata, rx_status->mactime,
 					 rx_status->device_timestamp,
@@ -3850,10 +4003,7 @@
 	 */
 	if (!ifmgd->have_beacon) {
 		/* a few bogus AP send dtim_period = 0 or no TIM IE */
-		if (elems.tim)
-			bss_conf->dtim_period = elems.tim->dtim_period ?: 1;
-		else
-			bss_conf->dtim_period = 1;
+		bss_conf->dtim_period = elems.dtim_period ?: 1;
 
 		changed |= BSS_CHANGED_BEACON_INFO;
 		ifmgd->have_beacon = true;
@@ -3878,6 +4028,8 @@
 	mutex_lock(&local->sta_mtx);
 	sta = sta_info_get(sdata, bssid);
 
+	changed |= ieee80211_recalc_twt_req(sdata, sta, &elems);
+
 	if (ieee80211_config_bw(sdata, sta,
 				elems.ht_cap_elem, elems.ht_operation,
 				elems.vht_operation, elems.he_operation,
@@ -3953,9 +4105,10 @@
 			if (ies_len < 0)
 				break;
 
+			/* CSA IE cannot be overridden, no need for BSSID */
 			ieee802_11_parse_elems(
 				mgmt->u.action.u.chan_switch.variable,
-				ies_len, true, &elems);
+				ies_len, true, &elems, mgmt->bssid, NULL);
 
 			if (elems.parse_error)
 				break;
@@ -3972,9 +4125,13 @@
 			if (ies_len < 0)
 				break;
 
+			/*
+			 * extended CSA IE can't be overridden, no need for
+			 * BSSID
+			 */
 			ieee802_11_parse_elems(
 				mgmt->u.action.u.ext_chan_switch.variable,
-				ies_len, true, &elems);
+				ies_len, true, &elems, mgmt->bssid, NULL);
 
 			if (elems.parse_error)
 				break;
@@ -4354,7 +4511,7 @@
 		 * cfg80211 won't know and won't actually abort those attempts,
 		 * thus we need to do that ourselves.
 		 */
-		ieee80211_send_deauth_disassoc(sdata, bssid,
+		ieee80211_send_deauth_disassoc(sdata, bssid, bssid,
 					       IEEE80211_STYPE_DEAUTH,
 					       WLAN_REASON_DEAUTH_LEAVING,
 					       false, frame_buf);
@@ -4644,8 +4801,10 @@
 		}
 	}
 
-	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE) &&
-	    ieee80211_get_he_sta_cap(sband)) {
+	if (!ieee80211_get_he_sta_cap(sband))
+		ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
+
+	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE)) {
 		const struct cfg80211_bss_ies *ies;
 		const u8 *he_oper_ie;
 
@@ -4713,6 +4872,40 @@
 	return ret;
 }
 
+static bool ieee80211_get_dtim(const struct cfg80211_bss_ies *ies,
+			       u8 *dtim_count, u8 *dtim_period)
+{
+	const u8 *tim_ie = cfg80211_find_ie(WLAN_EID_TIM, ies->data, ies->len);
+	const u8 *idx_ie = cfg80211_find_ie(WLAN_EID_MULTI_BSSID_IDX, ies->data,
+					 ies->len);
+	const struct ieee80211_tim_ie *tim = NULL;
+	const struct ieee80211_bssid_index *idx;
+	bool valid = tim_ie && tim_ie[1] >= 2;
+
+	if (valid)
+		tim = (void *)(tim_ie + 2);
+
+	if (dtim_count)
+		*dtim_count = valid ? tim->dtim_count : 0;
+
+	if (dtim_period)
+		*dtim_period = valid ? tim->dtim_period : 0;
+
+	/* Check if value is overridden by non-transmitted profile */
+	if (!idx_ie || idx_ie[1] < 3)
+		return valid;
+
+	idx = (void *)(idx_ie + 2);
+
+	if (dtim_count)
+		*dtim_count = idx->dtim_count;
+
+	if (dtim_period)
+		*dtim_period = idx->dtim_period;
+
+	return true;
+}
+
 static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
 				     struct cfg80211_bss *cbss, bool assoc,
 				     bool override)
@@ -4787,7 +4980,12 @@
 			basic_rates = BIT(min_rate_index);
 		}
 
-		new_sta->sta.supp_rates[cbss->channel->band] = rates;
+		if (rates)
+			new_sta->sta.supp_rates[cbss->channel->band] = rates;
+		else
+			sdata_info(sdata,
+				   "No rates found, keeping mandatory only\n");
+
 		sdata->vif.bss_conf.basic_rates = basic_rates;
 
 		/* cf. IEEE 802.11 9.2.12 */
@@ -4804,17 +5002,13 @@
 		rcu_read_lock();
 		ies = rcu_dereference(cbss->beacon_ies);
 		if (ies) {
-			const u8 *tim_ie;
-
 			sdata->vif.bss_conf.sync_tsf = ies->tsf;
 			sdata->vif.bss_conf.sync_device_ts =
 				bss->device_ts_beacon;
-			tim_ie = cfg80211_find_ie(WLAN_EID_TIM,
-						  ies->data, ies->len);
-			if (tim_ie && tim_ie[1] >= 2)
-				sdata->vif.bss_conf.sync_dtim_count = tim_ie[2];
-			else
-				sdata->vif.bss_conf.sync_dtim_count = 0;
+
+			ieee80211_get_dtim(ies,
+					   &sdata->vif.bss_conf.sync_dtim_count,
+					   NULL);
 		} else if (!ieee80211_hw_check(&sdata->local->hw,
 					       TIMING_BEACON_ONLY)) {
 			ies = rcu_dereference(cbss->proberesp_ies);
@@ -4879,6 +5073,7 @@
 	struct ieee80211_mgd_auth_data *auth_data;
 	u16 auth_alg;
 	int err;
+	bool cont_auth;
 
 	/* prepare auth data structure */
 
@@ -4887,7 +5082,7 @@
 		auth_alg = WLAN_AUTH_OPEN;
 		break;
 	case NL80211_AUTHTYPE_SHARED_KEY:
-		if (IS_ERR(local->wep_tx_tfm))
+		if (fips_enabled)
 			return -EOPNOTSUPP;
 		auth_alg = WLAN_AUTH_SHARED_KEY;
 		break;
@@ -4913,6 +5108,9 @@
 		return -EOPNOTSUPP;
 	}
 
+	if (ifmgd->assoc_data)
+		return -EBUSY;
+
 	auth_data = kzalloc(sizeof(*auth_data) + req->auth_data_len +
 			    req->ie_len, GFP_KERNEL);
 	if (!auth_data)
@@ -4932,6 +5130,13 @@
 		auth_data->data_len += req->auth_data_len - 4;
 	}
 
+	/* Check if continuing authentication or trying to authenticate with the
+	 * same BSS that we were in the process of authenticating with and avoid
+	 * removal and re-addition of the STA entry in
+	 * ieee80211_prep_connection().
+	 */
+	cont_auth = ifmgd->auth_data && req->bss == ifmgd->auth_data->bss;
+
 	if (req->ie && req->ie_len) {
 		memcpy(&auth_data->data[auth_data->data_len],
 		       req->ie, req->ie_len);
@@ -4948,18 +5153,26 @@
 
 	/* try to authenticate/probe */
 
-	if ((ifmgd->auth_data && !ifmgd->auth_data->done) ||
-	    ifmgd->assoc_data) {
-		err = -EBUSY;
-		goto err_free;
+	if (ifmgd->auth_data) {
+		if (cont_auth && req->auth_type == NL80211_AUTHTYPE_SAE) {
+			auth_data->peer_confirmed =
+				ifmgd->auth_data->peer_confirmed;
+		}
+		ieee80211_destroy_auth_data(sdata, cont_auth);
 	}
 
-	if (ifmgd->auth_data)
-		ieee80211_destroy_auth_data(sdata, false);
-
 	/* prep auth_data so we don't go into idle on disassoc */
 	ifmgd->auth_data = auth_data;
 
+	/* If this is continuation of an ongoing SAE authentication exchange
+	 * (i.e., request to send SAE Confirm) and the peer has already
+	 * confirmed, mark authentication completed since we are about to send
+	 * out SAE Confirm.
+	 */
+	if (cont_auth && req->auth_type == NL80211_AUTHTYPE_SAE &&
+	    auth_data->peer_confirmed && auth_data->sae_trans == 2)
+		ieee80211_mark_sta_auth(sdata, req->bss->bssid);
+
 	if (ifmgd->associated) {
 		u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
 
@@ -4977,7 +5190,7 @@
 
 	sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid);
 
-	err = ieee80211_prep_connection(sdata, req->bss, false, false);
+	err = ieee80211_prep_connection(sdata, req->bss, cont_auth, false);
 	if (err)
 		goto err_clear;
 
@@ -4998,7 +5211,6 @@
 	mutex_lock(&sdata->local->mtx);
 	ieee80211_vif_release_channel(sdata);
 	mutex_unlock(&sdata->local->mtx);
- err_free:
 	kfree(auth_data);
 	return err;
 }
@@ -5022,7 +5234,7 @@
 
 	rcu_read_lock();
 	ssidie = ieee80211_bss_get_ie(req->bss, WLAN_EID_SSID);
-	if (!ssidie) {
+	if (!ssidie || ssidie[1] > sizeof(assoc_data->ssid)) {
 		rcu_read_unlock();
 		kfree(assoc_data);
 		return -EINVAL;
@@ -5086,7 +5298,7 @@
 			ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
 			ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
 			netdev_info(sdata->dev,
-				    "disabling HE/HT/VHT due to WEP/TKIP use\n");
+				    "disabling HT/VHT/HE due to WEP/TKIP use\n");
 		}
 	}
 
@@ -5266,17 +5478,12 @@
 		assoc_data->timeout_started = true;
 		assoc_data->need_beacon = true;
 	} else if (beacon_ies) {
-		const u8 *tim_ie = cfg80211_find_ie(WLAN_EID_TIM,
-						    beacon_ies->data,
-						    beacon_ies->len);
+		const u8 *ie;
 		u8 dtim_count = 0;
 
-		if (tim_ie && tim_ie[1] >= sizeof(struct ieee80211_tim_ie)) {
-			const struct ieee80211_tim_ie *tim;
-			tim = (void *)(tim_ie + 2);
-			ifmgd->dtim_period = tim->dtim_period;
-			dtim_count = tim->dtim_count;
-		}
+		ieee80211_get_dtim(beacon_ies, &dtim_count,
+				   &ifmgd->dtim_period);
+
 		ifmgd->have_beacon = true;
 		assoc_data->timeout = jiffies;
 		assoc_data->timeout_started = true;
@@ -5287,6 +5494,17 @@
 				bss->device_ts_beacon;
 			sdata->vif.bss_conf.sync_dtim_count = dtim_count;
 		}
+
+		ie = cfg80211_find_ext_ie(WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION,
+					  beacon_ies->data, beacon_ies->len);
+		if (ie && ie[1] >= 3)
+			sdata->vif.bss_conf.profile_periodicity = ie[4];
+
+		ie = cfg80211_find_ie(WLAN_EID_EXT_CAPABILITY,
+				      beacon_ies->data, beacon_ies->len);
+		if (ie && ie[1] >= 11 &&
+		    (ie[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
+			sdata->vif.bss_conf.ema_ap = true;
 	} else {
 		assoc_data->timeout = jiffies;
 		assoc_data->timeout_started = true;
@@ -5334,7 +5552,7 @@
 			   ieee80211_get_reason_code_string(req->reason_code));
 
 		drv_mgd_prepare_tx(sdata->local, sdata, 0);
-		ieee80211_send_deauth_disassoc(sdata, req->bssid,
+		ieee80211_send_deauth_disassoc(sdata, req->bssid, req->bssid,
 					       IEEE80211_STYPE_DEAUTH,
 					       req->reason_code, tx,
 					       frame_buf);
@@ -5354,7 +5572,7 @@
 			   ieee80211_get_reason_code_string(req->reason_code));
 
 		drv_mgd_prepare_tx(sdata->local, sdata, 0);
-		ieee80211_send_deauth_disassoc(sdata, req->bssid,
+		ieee80211_send_deauth_disassoc(sdata, req->bssid, req->bssid,
 					       IEEE80211_STYPE_DEAUTH,
 					       req->reason_code, tx,
 					       frame_buf);
@@ -5444,6 +5662,9 @@
 		ifmgd->teardown_skb = NULL;
 		ifmgd->orig_teardown_skb = NULL;
 	}
+	kfree(ifmgd->assoc_req_ies);
+	ifmgd->assoc_req_ies = NULL;
+	ifmgd->assoc_req_ies_len = 0;
 	spin_unlock_bh(&ifmgd->teardown_lock);
 	del_timer_sync(&ifmgd->timer);
 	sdata_unlock(sdata);

diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c
index d351dc1..7c1a735 100644
--- a/net/mac80211/ocb.c
+++ b/net/mac80211/ocb.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * OCB mode implementation
  *
@@ -5,10 +6,6 @@
  *            (c) 2014 Volkswagen Group Research
  * Author:    Rostislav Lisovy <rostislav.lisovy@fel.cvut.cz>
  * Funded by: Volkswagen Group Research
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/delay.h>

diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 8ef4153..c710504 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Off-channel operation helpers
  *
@@ -7,10 +8,7 @@
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2009	Johannes Berg <johannes@sipsolutions.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Copyright (C) 2019 Intel Corporation
  */
 #include <linux/export.h>
 #include <net/mac80211.h>
@@ -202,6 +200,10 @@
 		cfg80211_remain_on_channel_expired(&roc->sdata->wdev,
 						   roc->cookie, roc->chan,
 						   GFP_KERNEL);
+	else
+		cfg80211_tx_mgmt_expired(&roc->sdata->wdev,
+					 roc->mgmt_tx_cookie,
+					 roc->chan, GFP_KERNEL);
 
 	list_del(&roc->list);
 	kfree(roc);
@@ -731,7 +733,7 @@
 	}
 
 	if (local->ops->remain_on_channel) {
-		ret = drv_cancel_remain_on_channel(local);
+		ret = drv_cancel_remain_on_channel(local, roc->sdata);
 		if (WARN_ON_ONCE(ret)) {
 			mutex_unlock(&local->mtx);
 			return ret;
@@ -990,7 +992,7 @@
 		if (roc->started) {
 			if (local->ops->remain_on_channel) {
 				/* can race, so ignore return value */
-				drv_cancel_remain_on_channel(local);
+				drv_cancel_remain_on_channel(local, sdata);
 				ieee80211_roc_notify_destroy(roc);
 			} else {
 				roc->abort = true;

diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 76f303f..a1e9fc7 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2002-2005, Instant802 Networks, Inc.
  * Copyright 2005-2006, Devicescape Software, Inc.
  * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
  * Copyright 2017	Intel Deutschland GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/kernel.h>
@@ -357,8 +354,10 @@
 		break;
 	}
 	WARN_ONCE(i == sband->n_bitrates,
-		  "no supported rates (0x%x) in rate_mask 0x%x with flags 0x%x\n",
+		  "no supported rates for sta %pM (0x%x, band %d) in rate_mask 0x%x with flags 0x%x\n",
+		  sta ? sta->addr : NULL,
 		  sta ? sta->supp_rates[sband->band] : -1,
+		  sband->band,
 		  rate_mask, rate_flags);
 
 	info->control.rates[0].count =
@@ -369,9 +368,8 @@
 }
 
 
-bool rate_control_send_low(struct ieee80211_sta *pubsta,
-			   void *priv_sta,
-			   struct ieee80211_tx_rate_control *txrc)
+static bool rate_control_send_low(struct ieee80211_sta *pubsta,
+				  struct ieee80211_tx_rate_control *txrc)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb);
 	struct ieee80211_supported_band *sband = txrc->sband;
@@ -379,7 +377,7 @@
 	int mcast_rate;
 	bool use_basicrate = false;
 
-	if (!pubsta || !priv_sta || rc_no_data_or_no_ack_use_min(txrc)) {
+	if (!pubsta || rc_no_data_or_no_ack_use_min(txrc)) {
 		__rate_control_send_low(txrc->hw, sband, pubsta, info,
 					txrc->rate_idx_mask);
 
@@ -405,7 +403,6 @@
 	}
 	return false;
 }
-EXPORT_SYMBOL(rate_control_send_low);
 
 static bool rate_idx_match_legacy_mask(s8 *rate_idx, int n_bitrates, u32 mask)
 {
@@ -888,26 +885,29 @@
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb);
 	int i;
 
-	if (sta && test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) {
-		ista = &sta->sta;
-		priv_sta = sta->rate_ctrl_priv;
-	}
-
 	for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
 		info->control.rates[i].idx = -1;
 		info->control.rates[i].flags = 0;
 		info->control.rates[i].count = 0;
 	}
 
+	if (rate_control_send_low(sta ? &sta->sta : NULL, txrc))
+		return;
+
 	if (ieee80211_hw_check(&sdata->local->hw, HAS_RATE_CONTROL))
 		return;
 
+	if (sta && test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) {
+		ista = &sta->sta;
+		priv_sta = sta->rate_ctrl_priv;
+	}
+
 	if (ista) {
 		spin_lock_bh(&sta->rate_ctrl_lock);
 		ref->ops->get_rate(ref->priv, ista, priv_sta, txrc);
 		spin_unlock_bh(&sta->rate_ctrl_lock);
 	} else {
-		ref->ops->get_rate(ref->priv, NULL, NULL, txrc);
+		rate_control_send_low(NULL, txrc);
 	}
 
 	if (ieee80211_hw_check(&sdata->local->hw, SUPPORTS_RC_TABLE))

diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 8212bfe..5397c6d 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h

@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright 2002-2005, Instant802 Networks, Inc.
  * Copyright 2005, Devicescape Software, Inc.
  * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef IEEE80211_RATE_H
@@ -63,15 +60,6 @@
 #endif
 }
 
-static inline void rate_control_remove_sta_debugfs(struct sta_info *sta)
-{
-#ifdef CONFIG_MAC80211_DEBUGFS
-	struct rate_control_ref *ref = sta->rate_ctrl;
-	if (ref && ref->ops->remove_sta_debugfs)
-		ref->ops->remove_sta_debugfs(ref->priv, sta->rate_ctrl_priv);
-#endif
-}
-
 void ieee80211_check_rate_mask(struct ieee80211_sub_if_data *sdata);
 
 /* Get a reference to the rate control algorithm. If `name' is NULL, get the
@@ -95,18 +83,5 @@
 }
 #endif
 
-#ifdef CONFIG_MAC80211_RC_MINSTREL_HT
-int rc80211_minstrel_ht_init(void);
-void rc80211_minstrel_ht_exit(void);
-#else
-static inline int rc80211_minstrel_ht_init(void)
-{
-	return 0;
-}
-static inline void rc80211_minstrel_ht_exit(void)
-{
-}
-#endif
-
 
 #endif /* IEEE80211_RATE_H */

diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 07fb219..ee86c33 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c

@@ -167,12 +167,6 @@
 		if (unlikely(!mrs->att_hist)) {
 			mrs->prob_ewma = cur_prob;
 		} else {
-			/* update exponential weighted moving variance */
-			mrs->prob_ewmv = minstrel_ewmv(mrs->prob_ewmv,
-							cur_prob,
-							mrs->prob_ewma,
-							EWMA_LEVEL);
-
 			/*update exponential weighted moving avarage */
 			mrs->prob_ewma = minstrel_ewma(mrs->prob_ewma,
 						       cur_prob,
@@ -346,10 +340,6 @@
 	int delta;
 	int sampling_ratio;
 
-	/* management/no-ack frames do not use rate control */
-	if (rate_control_send_low(sta, priv_sta, txrc))
-		return;
-
 	/* check multi-rate-retry capabilities & adjust lookaround_rate */
 	mrr_capable = mp->has_mrr &&
 		      !txrc->rts &&
@@ -572,141 +562,6 @@
 	minstrel_update_rates(mp, mi);
 }
 
-static void *
-minstrel_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)
-{
-	struct ieee80211_supported_band *sband;
-	struct minstrel_sta_info *mi;
-	struct minstrel_priv *mp = priv;
-	struct ieee80211_hw *hw = mp->hw;
-	int max_rates = 0;
-	int i;
-
-	mi = kzalloc(sizeof(struct minstrel_sta_info), gfp);
-	if (!mi)
-		return NULL;
-
-	for (i = 0; i < NUM_NL80211_BANDS; i++) {
-		sband = hw->wiphy->bands[i];
-		if (sband && sband->n_bitrates > max_rates)
-			max_rates = sband->n_bitrates;
-	}
-
-	mi->r = kcalloc(max_rates, sizeof(struct minstrel_rate), gfp);
-	if (!mi->r)
-		goto error;
-
-	mi->sample_table = kmalloc_array(max_rates, SAMPLE_COLUMNS, gfp);
-	if (!mi->sample_table)
-		goto error1;
-
-	mi->last_stats_update = jiffies;
-	return mi;
-
-error1:
-	kfree(mi->r);
-error:
-	kfree(mi);
-	return NULL;
-}
-
-static void
-minstrel_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta)
-{
-	struct minstrel_sta_info *mi = priv_sta;
-
-	kfree(mi->sample_table);
-	kfree(mi->r);
-	kfree(mi);
-}
-
-static void
-minstrel_init_cck_rates(struct minstrel_priv *mp)
-{
-	static const int bitrates[4] = { 10, 20, 55, 110 };
-	struct ieee80211_supported_band *sband;
-	u32 rate_flags = ieee80211_chandef_rate_flags(&mp->hw->conf.chandef);
-	int i, j;
-
-	sband = mp->hw->wiphy->bands[NL80211_BAND_2GHZ];
-	if (!sband)
-		return;
-
-	for (i = 0, j = 0; i < sband->n_bitrates; i++) {
-		struct ieee80211_rate *rate = &sband->bitrates[i];
-
-		if (rate->flags & IEEE80211_RATE_ERP_G)
-			continue;
-
-		if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
-			continue;
-
-		for (j = 0; j < ARRAY_SIZE(bitrates); j++) {
-			if (rate->bitrate != bitrates[j])
-				continue;
-
-			mp->cck_rates[j] = i;
-			break;
-		}
-	}
-}
-
-static void *
-minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
-{
-	struct minstrel_priv *mp;
-
-	mp = kzalloc(sizeof(struct minstrel_priv), GFP_ATOMIC);
-	if (!mp)
-		return NULL;
-
-	/* contention window settings
-	 * Just an approximation. Using the per-queue values would complicate
-	 * the calculations and is probably unnecessary */
-	mp->cw_min = 15;
-	mp->cw_max = 1023;
-
-	/* number of packets (in %) to use for sampling other rates
-	 * sample less often for non-mrr packets, because the overhead
-	 * is much higher than with mrr */
-	mp->lookaround_rate = 5;
-	mp->lookaround_rate_mrr = 10;
-
-	/* maximum time that the hw is allowed to stay in one MRR segment */
-	mp->segment_size = 6000;
-
-	if (hw->max_rate_tries > 0)
-		mp->max_retry = hw->max_rate_tries;
-	else
-		/* safe default, does not necessarily have to match hw properties */
-		mp->max_retry = 7;
-
-	if (hw->max_rates >= 4)
-		mp->has_mrr = true;
-
-	mp->hw = hw;
-	mp->update_interval = 100;
-
-#ifdef CONFIG_MAC80211_DEBUGFS
-	mp->fixed_rate_idx = (u32) -1;
-	mp->dbg_fixed_rate = debugfs_create_u32("fixed_rate_idx",
-			0666, debugfsdir, &mp->fixed_rate_idx);
-#endif
-
-	minstrel_init_cck_rates(mp);
-
-	return mp;
-}
-
-static void
-minstrel_free(void *priv)
-{
-#ifdef CONFIG_MAC80211_DEBUGFS
-	debugfs_remove(((struct minstrel_priv *)priv)->dbg_fixed_rate);
-#endif
-	kfree(priv);
-}
-
 static u32 minstrel_get_expected_throughput(void *priv_sta)
 {
 	struct minstrel_sta_info *mi = priv_sta;
@@ -725,29 +580,8 @@
 }
 
 const struct rate_control_ops mac80211_minstrel = {
-	.name = "minstrel",
 	.tx_status_ext = minstrel_tx_status,
 	.get_rate = minstrel_get_rate,
 	.rate_init = minstrel_rate_init,
-	.alloc = minstrel_alloc,
-	.free = minstrel_free,
-	.alloc_sta = minstrel_alloc_sta,
-	.free_sta = minstrel_free_sta,
-#ifdef CONFIG_MAC80211_DEBUGFS
-	.add_sta_debugfs = minstrel_add_sta_debugfs,
-	.remove_sta_debugfs = minstrel_remove_sta_debugfs,
-#endif
 	.get_expected_throughput = minstrel_get_expected_throughput,
 };
-
-int __init
-rc80211_minstrel_init(void)
-{
-	return ieee80211_rate_control_register(&mac80211_minstrel);
-}
-
-void
-rc80211_minstrel_exit(void)
-{
-	ieee80211_rate_control_unregister(&mac80211_minstrel);
-}

diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
index be6c3f3..51d8b2c 100644
--- a/net/mac80211/rc80211_minstrel.h
+++ b/net/mac80211/rc80211_minstrel.h

@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2008 Felix Fietkau <nbd@openwrt.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef __RC_MINSTREL_H
@@ -35,19 +32,6 @@
 	return old + incr;
 }
 
-/*
- * Perform EWMV (Exponentially Weighted Moving Variance) calculation
- */
-static inline int
-minstrel_ewmv(int old_ewmv, int cur_prob, int prob_ewma, int weight)
-{
-	int diff, incr;
-
-	diff = cur_prob - prob_ewma;
-	incr = (EWMA_DIV - weight) * diff / EWMA_DIV;
-	return weight * (old_ewmv + MINSTREL_TRUNC(diff * incr)) / EWMA_DIV;
-}
-
 struct minstrel_rate_stats {
 	/* current / last sampling period attempts/success counters */
 	u16 attempts, last_attempts;
@@ -56,11 +40,8 @@
 	/* total attempts/success counters */
 	u32 att_hist, succ_hist;
 
-	/* statistis of packet delivery probability
-	 *  prob_ewma - exponential weighted moving average of prob
-	 *  prob_ewmsd - exp. weighted moving standard deviation of prob */
+	/* prob_ewma - exponential weighted moving average of prob */
 	u16 prob_ewma;
-	u16 prob_ewmv;
 
 	/* maximum retry counts */
 	u8 retry_count;
@@ -109,16 +90,12 @@
 
 	/* sampling table */
 	u8 *sample_table;
-
-#ifdef CONFIG_MAC80211_DEBUGFS
-	struct dentry *dbg_stats;
-	struct dentry *dbg_stats_csv;
-#endif
 };
 
 struct minstrel_priv {
 	struct ieee80211_hw *hw;
 	bool has_mrr;
+	u32 sample_switch;
 	unsigned int cw_min;
 	unsigned int cw_max;
 	unsigned int max_retry;
@@ -137,7 +114,6 @@
 	 *   - setting will be applied on next update
 	 */
 	u32 fixed_rate_idx;
-	struct dentry *dbg_fixed_rate;
 #endif
 };
 
@@ -146,17 +122,8 @@
 	char buf[];
 };
 
-/* Get EWMSD (Exponentially Weighted Moving Standard Deviation) * 10 */
-static inline int
-minstrel_get_ewmsd10(struct minstrel_rate_stats *mrs)
-{
-	unsigned int ewmv = mrs->prob_ewmv;
-	return int_sqrt(MINSTREL_TRUNC(ewmv * 1000 * 1000));
-}
-
 extern const struct rate_control_ops mac80211_minstrel;
 void minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
-void minstrel_remove_sta_debugfs(void *priv, void *priv_sta);
 
 /* Recalculate success probabilities and counters for a given rate using EWMA */
 void minstrel_calc_rate_stats(struct minstrel_rate_stats *mrs);
@@ -165,7 +132,5 @@
 /* debugfs */
 int minstrel_stats_open(struct inode *inode, struct file *file);
 int minstrel_stats_csv_open(struct inode *inode, struct file *file);
-ssize_t minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos);
-int minstrel_stats_release(struct inode *inode, struct file *file);
 
 #endif

diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index 9ad7d63..c8afd85 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c

@@ -54,22 +54,6 @@
 #include <net/mac80211.h>
 #include "rc80211_minstrel.h"
 
-ssize_t
-minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos)
-{
-	struct minstrel_debugfs_info *ms;
-
-	ms = file->private_data;
-	return simple_read_from_buffer(buf, len, ppos, ms->buf, ms->len);
-}
-
-int
-minstrel_stats_release(struct inode *inode, struct file *file)
-{
-	kfree(file->private_data);
-	return 0;
-}
-
 int
 minstrel_stats_open(struct inode *inode, struct file *file)
 {
@@ -86,14 +70,13 @@
 	p = ms->buf;
 	p += sprintf(p, "\n");
 	p += sprintf(p,
-		     "best   __________rate_________    ________statistics________    ____last_____    ______sum-of________\n");
+		     "best   __________rate_________    ____statistics___    ____last_____    ______sum-of________\n");
 	p += sprintf(p,
-		     "rate  [name idx airtime max_tp]  [avg(tp) avg(prob) sd(prob)]  [retry|suc|att]  [#success | #attempts]\n");
+		     "rate  [name idx airtime max_tp]  [avg(tp) avg(prob)]  [retry|suc|att]  [#success | #attempts]\n");
 
 	for (i = 0; i < mi->n_rates; i++) {
 		struct minstrel_rate *mr = &mi->r[i];
 		struct minstrel_rate_stats *mrs = &mi->r[i].stats;
-		unsigned int prob_ewmsd;
 
 		*(p++) = (i == mi->max_tp_rate[0]) ? 'A' : ' ';
 		*(p++) = (i == mi->max_tp_rate[1]) ? 'B' : ' ';
@@ -109,15 +92,13 @@
 		tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100));
 		tp_avg = minstrel_get_tp_avg(mr, mrs->prob_ewma);
 		eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
-		prob_ewmsd = minstrel_get_ewmsd10(mrs);
 
-		p += sprintf(p, "%4u.%1u    %4u.%1u     %3u.%1u    %3u.%1u"
+		p += sprintf(p, "%4u.%1u    %4u.%1u     %3u.%1u"
 				"     %3u   %3u %-3u   "
 				"%9llu   %-9llu\n",
 				tp_max / 10, tp_max % 10,
 				tp_avg / 10, tp_avg % 10,
 				eprob / 10, eprob % 10,
-				prob_ewmsd / 10, prob_ewmsd % 10,
 				mrs->retry_count,
 				mrs->last_success,
 				mrs->last_attempts,
@@ -135,14 +116,6 @@
 	return 0;
 }
 
-static const struct file_operations minstrel_stat_fops = {
-	.owner = THIS_MODULE,
-	.open = minstrel_stats_open,
-	.read = minstrel_stats_read,
-	.release = minstrel_stats_release,
-	.llseek = default_llseek,
-};
-
 int
 minstrel_stats_csv_open(struct inode *inode, struct file *file)
 {
@@ -161,7 +134,6 @@
 	for (i = 0; i < mi->n_rates; i++) {
 		struct minstrel_rate *mr = &mi->r[i];
 		struct minstrel_rate_stats *mrs = &mi->r[i].stats;
-		unsigned int prob_ewmsd;
 
 		p += sprintf(p, "%s" ,((i == mi->max_tp_rate[0]) ? "A" : ""));
 		p += sprintf(p, "%s" ,((i == mi->max_tp_rate[1]) ? "B" : ""));
@@ -177,14 +149,12 @@
 		tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100));
 		tp_avg = minstrel_get_tp_avg(mr, mrs->prob_ewma);
 		eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
-		prob_ewmsd = minstrel_get_ewmsd10(mrs);
 
-		p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u.%u,%u,%u,%u,"
+		p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u,%u,%u,"
 				"%llu,%llu,%d,%d\n",
 				tp_max / 10, tp_max % 10,
 				tp_avg / 10, tp_avg % 10,
 				eprob / 10, eprob % 10,
-				prob_ewmsd / 10, prob_ewmsd % 10,
 				mrs->retry_count,
 				mrs->last_success,
 				mrs->last_attempts,
@@ -200,33 +170,3 @@
 
 	return 0;
 }
-
-static const struct file_operations minstrel_stat_csv_fops = {
-	.owner = THIS_MODULE,
-	.open = minstrel_stats_csv_open,
-	.read = minstrel_stats_read,
-	.release = minstrel_stats_release,
-	.llseek = default_llseek,
-};
-
-void
-minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir)
-{
-	struct minstrel_sta_info *mi = priv_sta;
-
-	mi->dbg_stats = debugfs_create_file("rc_stats", 0444, dir, mi,
-					    &minstrel_stat_fops);
-
-	mi->dbg_stats_csv = debugfs_create_file("rc_stats_csv", 0444, dir, mi,
-						&minstrel_stat_csv_fops);
-}
-
-void
-minstrel_remove_sta_debugfs(void *priv, void *priv_sta)
-{
-	struct minstrel_sta_info *mi = priv_sta;
-
-	debugfs_remove(mi->dbg_stats);
-
-	debugfs_remove(mi->dbg_stats_csv);
-}

diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 67ebdea..0ef2633 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2010-2013 Felix Fietkau <nbd@openwrt.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/netdevice.h>
 #include <linux/types.h>
@@ -21,6 +18,8 @@
 #define AVG_AMPDU_SIZE	16
 #define AVG_PKT_SIZE	1200
 
+#define SAMPLE_SWITCH_THR	100
+
 /* Number of bits for an average sized packet */
 #define MCS_NBITS ((AVG_PKT_SIZE * AVG_AMPDU_SIZE) << 3)
 
@@ -51,26 +50,40 @@
 	MINSTREL_MAX_STREAMS * _sgi +	\
 	_streams - 1
 
+#define _MAX(a, b) (((a)>(b))?(a):(b))
+
+#define GROUP_SHIFT(duration)						\
+	_MAX(0, 16 - __builtin_clz(duration))
+
 /* MCS rate information for an MCS group */
-#define MCS_GROUP(_streams, _sgi, _ht40)				\
+#define __MCS_GROUP(_streams, _sgi, _ht40, _s)				\
 	[GROUP_IDX(_streams, _sgi, _ht40)] = {				\
 	.streams = _streams,						\
+	.shift = _s,							\
+	.bw = _ht40,							\
 	.flags =							\
 		IEEE80211_TX_RC_MCS |					\
 		(_sgi ? IEEE80211_TX_RC_SHORT_GI : 0) |			\
 		(_ht40 ? IEEE80211_TX_RC_40_MHZ_WIDTH : 0),		\
 	.duration = {							\
-		MCS_DURATION(_streams, _sgi, _ht40 ? 54 : 26),		\
-		MCS_DURATION(_streams, _sgi, _ht40 ? 108 : 52),		\
-		MCS_DURATION(_streams, _sgi, _ht40 ? 162 : 78),		\
-		MCS_DURATION(_streams, _sgi, _ht40 ? 216 : 104),	\
-		MCS_DURATION(_streams, _sgi, _ht40 ? 324 : 156),	\
-		MCS_DURATION(_streams, _sgi, _ht40 ? 432 : 208),	\
-		MCS_DURATION(_streams, _sgi, _ht40 ? 486 : 234),	\
-		MCS_DURATION(_streams, _sgi, _ht40 ? 540 : 260)		\
+		MCS_DURATION(_streams, _sgi, _ht40 ? 54 : 26) >> _s,	\
+		MCS_DURATION(_streams, _sgi, _ht40 ? 108 : 52) >> _s,	\
+		MCS_DURATION(_streams, _sgi, _ht40 ? 162 : 78) >> _s,	\
+		MCS_DURATION(_streams, _sgi, _ht40 ? 216 : 104) >> _s,	\
+		MCS_DURATION(_streams, _sgi, _ht40 ? 324 : 156) >> _s,	\
+		MCS_DURATION(_streams, _sgi, _ht40 ? 432 : 208) >> _s,	\
+		MCS_DURATION(_streams, _sgi, _ht40 ? 486 : 234) >> _s,	\
+		MCS_DURATION(_streams, _sgi, _ht40 ? 540 : 260) >> _s	\
 	}								\
 }
 
+#define MCS_GROUP_SHIFT(_streams, _sgi, _ht40)				\
+	GROUP_SHIFT(MCS_DURATION(_streams, _sgi, _ht40 ? 54 : 26))
+
+#define MCS_GROUP(_streams, _sgi, _ht40)				\
+	__MCS_GROUP(_streams, _sgi, _ht40,				\
+		    MCS_GROUP_SHIFT(_streams, _sgi, _ht40))
+
 #define VHT_GROUP_IDX(_streams, _sgi, _bw)				\
 	(MINSTREL_VHT_GROUP_0 +						\
 	 MINSTREL_MAX_STREAMS * 2 * (_bw) +				\
@@ -80,9 +93,11 @@
 #define BW2VBPS(_bw, r3, r2, r1)					\
 	(_bw == BW_80 ? r3 : _bw == BW_40 ? r2 : r1)
 
-#define VHT_GROUP(_streams, _sgi, _bw)					\
+#define __VHT_GROUP(_streams, _sgi, _bw, _s)				\
 	[VHT_GROUP_IDX(_streams, _sgi, _bw)] = {			\
 	.streams = _streams,						\
+	.shift = _s,							\
+	.bw = _bw,							\
 	.flags =							\
 		IEEE80211_TX_RC_VHT_MCS |				\
 		(_sgi ? IEEE80211_TX_RC_SHORT_GI : 0) |			\
@@ -90,28 +105,36 @@
 		 _bw == BW_40 ? IEEE80211_TX_RC_40_MHZ_WIDTH : 0),	\
 	.duration = {							\
 		MCS_DURATION(_streams, _sgi,				\
-			     BW2VBPS(_bw,  117,  54,  26)),		\
+			     BW2VBPS(_bw,  117,  54,  26)) >> _s,	\
 		MCS_DURATION(_streams, _sgi,				\
-			     BW2VBPS(_bw,  234, 108,  52)),		\
+			     BW2VBPS(_bw,  234, 108,  52)) >> _s,	\
 		MCS_DURATION(_streams, _sgi,				\
-			     BW2VBPS(_bw,  351, 162,  78)),		\
+			     BW2VBPS(_bw,  351, 162,  78)) >> _s,	\
 		MCS_DURATION(_streams, _sgi,				\
-			     BW2VBPS(_bw,  468, 216, 104)),		\
+			     BW2VBPS(_bw,  468, 216, 104)) >> _s,	\
 		MCS_DURATION(_streams, _sgi,				\
-			     BW2VBPS(_bw,  702, 324, 156)),		\
+			     BW2VBPS(_bw,  702, 324, 156)) >> _s,	\
 		MCS_DURATION(_streams, _sgi,				\
-			     BW2VBPS(_bw,  936, 432, 208)),		\
+			     BW2VBPS(_bw,  936, 432, 208)) >> _s,	\
 		MCS_DURATION(_streams, _sgi,				\
-			     BW2VBPS(_bw, 1053, 486, 234)),		\
+			     BW2VBPS(_bw, 1053, 486, 234)) >> _s,	\
 		MCS_DURATION(_streams, _sgi,				\
-			     BW2VBPS(_bw, 1170, 540, 260)),		\
+			     BW2VBPS(_bw, 1170, 540, 260)) >> _s,	\
 		MCS_DURATION(_streams, _sgi,				\
-			     BW2VBPS(_bw, 1404, 648, 312)),		\
+			     BW2VBPS(_bw, 1404, 648, 312)) >> _s,	\
 		MCS_DURATION(_streams, _sgi,				\
-			     BW2VBPS(_bw, 1560, 720, 346))		\
+			     BW2VBPS(_bw, 1560, 720, 346)) >> _s	\
 	}								\
 }
 
+#define VHT_GROUP_SHIFT(_streams, _sgi, _bw)				\
+	GROUP_SHIFT(MCS_DURATION(_streams, _sgi,			\
+				 BW2VBPS(_bw,  117,  54,  26)))
+
+#define VHT_GROUP(_streams, _sgi, _bw)					\
+	__VHT_GROUP(_streams, _sgi, _bw,				\
+		    VHT_GROUP_SHIFT(_streams, _sgi, _bw))
+
 #define CCK_DURATION(_bitrate, _short, _len)		\
 	(1000 * (10 /* SIFS */ +			\
 	 (_short ? 72 + 24 : 144 + 48) +		\
@@ -121,28 +144,33 @@
 	(CCK_DURATION((_bitrate > 10 ? 20 : 10), false, 60) +	\
 	 CCK_DURATION(_bitrate, _short, AVG_PKT_SIZE))
 
-#define CCK_DURATION_LIST(_short)			\
-	CCK_ACK_DURATION(10, _short),			\
-	CCK_ACK_DURATION(20, _short),			\
-	CCK_ACK_DURATION(55, _short),			\
-	CCK_ACK_DURATION(110, _short)
+#define CCK_DURATION_LIST(_short, _s)			\
+	CCK_ACK_DURATION(10, _short) >> _s,		\
+	CCK_ACK_DURATION(20, _short) >> _s,		\
+	CCK_ACK_DURATION(55, _short) >> _s,		\
+	CCK_ACK_DURATION(110, _short) >> _s
 
-#define CCK_GROUP					\
+#define __CCK_GROUP(_s)					\
 	[MINSTREL_CCK_GROUP] = {			\
-		.streams = 0,				\
+		.streams = 1,				\
 		.flags = 0,				\
+		.shift = _s,				\
 		.duration = {				\
-			CCK_DURATION_LIST(false),	\
-			CCK_DURATION_LIST(true)		\
+			CCK_DURATION_LIST(false, _s),	\
+			CCK_DURATION_LIST(true, _s)	\
 		}					\
 	}
 
-#ifdef CONFIG_MAC80211_RC_MINSTREL_VHT
+#define CCK_GROUP_SHIFT					\
+	GROUP_SHIFT(CCK_ACK_DURATION(10, false))
+
+#define CCK_GROUP __CCK_GROUP(CCK_GROUP_SHIFT)
+
+
 static bool minstrel_vht_only = true;
 module_param(minstrel_vht_only, bool, 0644);
 MODULE_PARM_DESC(minstrel_vht_only,
 		 "Use only VHT rates when VHT is supported by sta.");
-#endif
 
 /*
  * To enable sufficiently targeted rate sampling, MCS rates are divided into
@@ -156,46 +184,54 @@
 	MCS_GROUP(1, 0, BW_20),
 	MCS_GROUP(2, 0, BW_20),
 	MCS_GROUP(3, 0, BW_20),
+	MCS_GROUP(4, 0, BW_20),
 
 	MCS_GROUP(1, 1, BW_20),
 	MCS_GROUP(2, 1, BW_20),
 	MCS_GROUP(3, 1, BW_20),
+	MCS_GROUP(4, 1, BW_20),
 
 	MCS_GROUP(1, 0, BW_40),
 	MCS_GROUP(2, 0, BW_40),
 	MCS_GROUP(3, 0, BW_40),
+	MCS_GROUP(4, 0, BW_40),
 
 	MCS_GROUP(1, 1, BW_40),
 	MCS_GROUP(2, 1, BW_40),
 	MCS_GROUP(3, 1, BW_40),
+	MCS_GROUP(4, 1, BW_40),
 
 	CCK_GROUP,
 
-#ifdef CONFIG_MAC80211_RC_MINSTREL_VHT
 	VHT_GROUP(1, 0, BW_20),
 	VHT_GROUP(2, 0, BW_20),
 	VHT_GROUP(3, 0, BW_20),
+	VHT_GROUP(4, 0, BW_20),
 
 	VHT_GROUP(1, 1, BW_20),
 	VHT_GROUP(2, 1, BW_20),
 	VHT_GROUP(3, 1, BW_20),
+	VHT_GROUP(4, 1, BW_20),
 
 	VHT_GROUP(1, 0, BW_40),
 	VHT_GROUP(2, 0, BW_40),
 	VHT_GROUP(3, 0, BW_40),
+	VHT_GROUP(4, 0, BW_40),
 
 	VHT_GROUP(1, 1, BW_40),
 	VHT_GROUP(2, 1, BW_40),
 	VHT_GROUP(3, 1, BW_40),
+	VHT_GROUP(4, 1, BW_40),
 
 	VHT_GROUP(1, 0, BW_80),
 	VHT_GROUP(2, 0, BW_80),
 	VHT_GROUP(3, 0, BW_80),
+	VHT_GROUP(4, 0, BW_80),
 
 	VHT_GROUP(1, 1, BW_80),
 	VHT_GROUP(2, 1, BW_80),
 	VHT_GROUP(3, 1, BW_80),
-#endif
+	VHT_GROUP(4, 1, BW_80),
 };
 
 static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES] __read_mostly;
@@ -282,7 +318,8 @@
 				break;
 
 		/* short preamble */
-		if (!(mi->supported[group] & BIT(idx)))
+		if ((mi->supported[group] & BIT(idx + 4)) &&
+		    (rate->flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE))
 			idx += 4;
 	}
 	return &mi->groups[group].rates[idx];
@@ -294,6 +331,15 @@
 	return &mi->groups[index / MCS_GROUP_RATES].rates[index % MCS_GROUP_RATES];
 }
 
+static unsigned int
+minstrel_ht_avg_ampdu_len(struct minstrel_ht_sta *mi)
+{
+	if (!mi->avg_ampdu_len)
+		return AVG_AMPDU_SIZE;
+
+	return MINSTREL_TRUNC(mi->avg_ampdu_len);
+}
+
 /*
  * Return current throughput based on the average A-MPDU length, taking into
  * account the expected number of retransmissions and their expected length
@@ -309,9 +355,10 @@
 		return 0;
 
 	if (group != MINSTREL_CCK_GROUP)
-		nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len);
+		nsecs = 1000 * mi->overhead / minstrel_ht_avg_ampdu_len(mi);
 
-	nsecs += minstrel_mcs_groups[group].duration[rate];
+	nsecs += minstrel_mcs_groups[group].duration[rate] <<
+		 minstrel_mcs_groups[group].shift;
 
 	/*
 	 * For the throughput calculation, limit the probability value to 90% to
@@ -443,7 +490,7 @@
 	tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma;
 	tmp_mcs_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob);
 
-	if (tmp_cck_tp > tmp_mcs_tp) {
+	if (tmp_cck_tp_rate && tmp_cck_tp > tmp_mcs_tp) {
 		for(i = 0; i < MAX_THR_RATES; i++) {
 			minstrel_ht_sort_best_tp_rates(mi, tmp_cck_tp_rate[i],
 						       tmp_mcs_tp_rate);
@@ -483,6 +530,133 @@
 	}
 }
 
+static inline int
+minstrel_get_duration(int index)
+{
+	const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
+	unsigned int duration = group->duration[index % MCS_GROUP_RATES];
+	return duration << group->shift;
+}
+
+static bool
+minstrel_ht_probe_group(struct minstrel_ht_sta *mi, const struct mcs_group *tp_group,
+						int tp_idx, const struct mcs_group *group)
+{
+	if (group->bw < tp_group->bw)
+		return false;
+
+	if (group->streams == tp_group->streams)
+		return true;
+
+	if (tp_idx < 4 && group->streams == tp_group->streams - 1)
+		return true;
+
+	return group->streams == tp_group->streams + 1;
+}
+
+static void
+minstrel_ht_find_probe_rates(struct minstrel_ht_sta *mi, u16 *rates, int *n_rates,
+			     bool faster_rate)
+{
+	const struct mcs_group *group, *tp_group;
+	int i, g, max_dur;
+	int tp_idx;
+
+	tp_group = &minstrel_mcs_groups[mi->max_tp_rate[0] / MCS_GROUP_RATES];
+	tp_idx = mi->max_tp_rate[0] % MCS_GROUP_RATES;
+
+	max_dur = minstrel_get_duration(mi->max_tp_rate[0]);
+	if (faster_rate)
+		max_dur -= max_dur / 16;
+
+	for (g = 0; g < MINSTREL_GROUPS_NB; g++) {
+		u16 supported = mi->supported[g];
+
+		if (!supported)
+			continue;
+
+		group = &minstrel_mcs_groups[g];
+		if (!minstrel_ht_probe_group(mi, tp_group, tp_idx, group))
+			continue;
+
+		for (i = 0; supported; supported >>= 1, i++) {
+			int idx;
+
+			if (!(supported & 1))
+				continue;
+
+			if ((group->duration[i] << group->shift) > max_dur)
+				continue;
+
+			idx = g * MCS_GROUP_RATES + i;
+			if (idx == mi->max_tp_rate[0])
+				continue;
+
+			rates[(*n_rates)++] = idx;
+			break;
+		}
+	}
+}
+
+static void
+minstrel_ht_rate_sample_switch(struct minstrel_priv *mp,
+			       struct minstrel_ht_sta *mi)
+{
+	struct minstrel_rate_stats *mrs;
+	u16 rates[MINSTREL_GROUPS_NB];
+	int n_rates = 0;
+	int probe_rate = 0;
+	bool faster_rate;
+	int i;
+	u8 random;
+
+	/*
+	 * Use rate switching instead of probing packets for devices with
+	 * little control over retry fallback behavior
+	 */
+	if (mp->hw->max_rates > 1)
+		return;
+
+	/*
+	 * If the current EWMA prob is >75%, look for a rate that's 6.25%
+	 * faster than the max tp rate.
+	 * If that fails, look again for a rate that is at least as fast
+	 */
+	mrs = minstrel_get_ratestats(mi, mi->max_tp_rate[0]);
+	faster_rate = mrs->prob_ewma > MINSTREL_FRAC(75, 100);
+	minstrel_ht_find_probe_rates(mi, rates, &n_rates, faster_rate);
+	if (!n_rates && faster_rate)
+		minstrel_ht_find_probe_rates(mi, rates, &n_rates, false);
+
+	/* If no suitable rate was found, try to pick the next one in the group */
+	if (!n_rates) {
+		int g_idx = mi->max_tp_rate[0] / MCS_GROUP_RATES;
+		u16 supported = mi->supported[g_idx];
+
+		supported >>= mi->max_tp_rate[0] % MCS_GROUP_RATES;
+		for (i = 0; supported; supported >>= 1, i++) {
+			if (!(supported & 1))
+				continue;
+
+			probe_rate = mi->max_tp_rate[0] + i;
+			goto out;
+		}
+
+		return;
+	}
+
+	i = 0;
+	if (n_rates > 1) {
+		random = prandom_u32();
+		i = random % n_rates;
+	}
+	probe_rate = rates[i];
+
+out:
+	mi->sample_rate = probe_rate;
+	mi->sample_mode = MINSTREL_SAMPLE_ACTIVE;
+}
+
 /*
  * Update rate statistics and select new primary rates
  *
@@ -493,7 +667,8 @@
  *    higher throughput rates, even if the probablity is a bit lower
  */
 static void
-minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
+minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
+			 bool sample)
 {
 	struct minstrel_mcs_group_data *mg;
 	struct minstrel_rate_stats *mrs;
@@ -501,9 +676,25 @@
 	u16 tmp_mcs_tp_rate[MAX_THR_RATES], tmp_group_tp_rate[MAX_THR_RATES];
 	u16 tmp_cck_tp_rate[MAX_THR_RATES], index;
 
+	mi->sample_mode = MINSTREL_SAMPLE_IDLE;
+
+	if (sample) {
+		mi->total_packets_cur = mi->total_packets -
+					mi->total_packets_last;
+		mi->total_packets_last = mi->total_packets;
+	}
+	if (!mp->sample_switch)
+		sample = false;
+	if (mi->total_packets_cur < SAMPLE_SWITCH_THR && mp->sample_switch != 1)
+	    sample = false;
+
 	if (mi->ampdu_packets > 0) {
-		mi->avg_ampdu_len = minstrel_ewma(mi->avg_ampdu_len,
-			MINSTREL_FRAC(mi->ampdu_len, mi->ampdu_packets), EWMA_LEVEL);
+		if (!ieee80211_hw_check(mp->hw, TX_STATUS_NO_AMPDU_LEN))
+			mi->avg_ampdu_len = minstrel_ewma(mi->avg_ampdu_len,
+				MINSTREL_FRAC(mi->ampdu_len, mi->ampdu_packets),
+					      EWMA_LEVEL);
+		else
+			mi->avg_ampdu_len = 0;
 		mi->ampdu_len = 0;
 		mi->ampdu_packets = 0;
 	}
@@ -511,11 +702,19 @@
 	mi->sample_slow = 0;
 	mi->sample_count = 0;
 
-	/* Initialize global rate indexes */
-	for(j = 0; j < MAX_THR_RATES; j++){
-		tmp_mcs_tp_rate[j] = 0;
-		tmp_cck_tp_rate[j] = 0;
-	}
+	memset(tmp_mcs_tp_rate, 0, sizeof(tmp_mcs_tp_rate));
+	memset(tmp_cck_tp_rate, 0, sizeof(tmp_cck_tp_rate));
+	if (mi->supported[MINSTREL_CCK_GROUP])
+		for (j = 0; j < ARRAY_SIZE(tmp_cck_tp_rate); j++)
+			tmp_cck_tp_rate[j] = MINSTREL_CCK_GROUP * MCS_GROUP_RATES;
+
+	if (mi->supported[MINSTREL_VHT_GROUP_0])
+		index = MINSTREL_VHT_GROUP_0 * MCS_GROUP_RATES;
+	else
+		index = MINSTREL_HT_GROUP_0 * MCS_GROUP_RATES;
+
+	for (j = 0; j < ARRAY_SIZE(tmp_mcs_tp_rate); j++)
+		tmp_mcs_tp_rate[j] = index;
 
 	/* Find best rate sets within all MCS groups*/
 	for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) {
@@ -528,7 +727,7 @@
 
 		/* (re)Initialize group rate indexes */
 		for(j = 0; j < MAX_THR_RATES; j++)
-			tmp_group_tp_rate[j] = group;
+			tmp_group_tp_rate[j] = MCS_GROUP_RATES * group;
 
 		for (i = 0; i < MCS_GROUP_RATES; i++) {
 			if (!(mi->supported[group] & BIT(i)))
@@ -575,12 +774,16 @@
 	/* try to sample all available rates during each interval */
 	mi->sample_count *= 8;
 
+	if (sample)
+		minstrel_ht_rate_sample_switch(mp, mi);
+
 #ifdef CONFIG_MAC80211_DEBUGFS
 	/* use fixed index if set */
 	if (mp->fixed_rate_idx != -1) {
 		for (i = 0; i < 4; i++)
 			mi->max_tp_rate[i] = mp->fixed_rate_idx;
 		mi->max_prob_rate = mp->fixed_rate_idx;
+		mi->sample_mode = MINSTREL_SAMPLE_IDLE;
 	}
 #endif
 
@@ -684,15 +887,17 @@
 	struct minstrel_ht_sta_priv *msp = priv_sta;
 	struct minstrel_ht_sta *mi = &msp->ht;
 	struct ieee80211_tx_rate *ar = info->status.rates;
-	struct minstrel_rate_stats *rate, *rate2;
+	struct minstrel_rate_stats *rate, *rate2, *rate_sample = NULL;
 	struct minstrel_priv *mp = priv;
 	bool last, update = false;
+	bool sample_status = false;
 	int i;
 
 	if (!msp->is_ht)
 		return mac80211_minstrel.tx_status_ext(priv, sband,
 						       &msp->legacy, st);
 
+
 	/* This packet was aggregated but doesn't carry status info */
 	if ((info->flags & IEEE80211_TX_CTL_AMPDU) &&
 	    !(info->flags & IEEE80211_TX_STAT_AMPDU))
@@ -708,7 +913,9 @@
 	mi->ampdu_len += info->status.ampdu_len;
 
 	if (!mi->sample_wait && !mi->sample_tries && mi->sample_count > 0) {
-		mi->sample_wait = 16 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len);
+		int avg_ampdu_len = minstrel_ht_avg_ampdu_len(mi);
+
+		mi->sample_wait = 16 + 2 * avg_ampdu_len;
 		mi->sample_tries = 1;
 		mi->sample_count--;
 	}
@@ -716,12 +923,17 @@
 	if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE)
 		mi->sample_packets += info->status.ampdu_len;
 
+	if (mi->sample_mode != MINSTREL_SAMPLE_IDLE)
+		rate_sample = minstrel_get_ratestats(mi, mi->sample_rate);
+
 	last = !minstrel_ht_txstat_valid(mp, &ar[0]);
 	for (i = 0; !last; i++) {
 		last = (i == IEEE80211_TX_MAX_RATES - 1) ||
 		       !minstrel_ht_txstat_valid(mp, &ar[i + 1]);
 
 		rate = minstrel_ht_get_stats(mp, mi, &ar[i]);
+		if (rate == rate_sample)
+			sample_status = true;
 
 		if (last)
 			rate->success += info->status.ampdu_ack_len;
@@ -729,30 +941,54 @@
 		rate->attempts += ar[i].count * info->status.ampdu_len;
 	}
 
-	/*
-	 * check for sudden death of spatial multiplexing,
-	 * downgrade to a lower number of streams if necessary.
-	 */
-	rate = minstrel_get_ratestats(mi, mi->max_tp_rate[0]);
-	if (rate->attempts > 30 &&
-	    MINSTREL_FRAC(rate->success, rate->attempts) <
-	    MINSTREL_FRAC(20, 100)) {
-		minstrel_downgrade_rate(mi, &mi->max_tp_rate[0], true);
+	switch (mi->sample_mode) {
+	case MINSTREL_SAMPLE_IDLE:
+		break;
+
+	case MINSTREL_SAMPLE_ACTIVE:
+		if (!sample_status)
+			break;
+
+		mi->sample_mode = MINSTREL_SAMPLE_PENDING;
 		update = true;
+		break;
+
+	case MINSTREL_SAMPLE_PENDING:
+		if (sample_status)
+			break;
+
+		update = true;
+		minstrel_ht_update_stats(mp, mi, false);
+		break;
 	}
 
-	rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate[1]);
-	if (rate2->attempts > 30 &&
-	    MINSTREL_FRAC(rate2->success, rate2->attempts) <
-	    MINSTREL_FRAC(20, 100)) {
-		minstrel_downgrade_rate(mi, &mi->max_tp_rate[1], false);
-		update = true;
+
+	if (mp->hw->max_rates > 1) {
+		/*
+		 * check for sudden death of spatial multiplexing,
+		 * downgrade to a lower number of streams if necessary.
+		 */
+		rate = minstrel_get_ratestats(mi, mi->max_tp_rate[0]);
+		if (rate->attempts > 30 &&
+		    MINSTREL_FRAC(rate->success, rate->attempts) <
+		    MINSTREL_FRAC(20, 100)) {
+			minstrel_downgrade_rate(mi, &mi->max_tp_rate[0], true);
+			update = true;
+		}
+
+		rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate[1]);
+		if (rate2->attempts > 30 &&
+		    MINSTREL_FRAC(rate2->success, rate2->attempts) <
+		    MINSTREL_FRAC(20, 100)) {
+			minstrel_downgrade_rate(mi, &mi->max_tp_rate[1], false);
+			update = true;
+		}
 	}
 
 	if (time_after(jiffies, mi->last_stats_update +
 				(mp->update_interval / 2 * HZ) / 1000)) {
 		update = true;
-		minstrel_ht_update_stats(mp, mi);
+		minstrel_ht_update_stats(mp, mi, true);
 	}
 
 	if (update)
@@ -764,12 +1000,11 @@
                          int index)
 {
 	struct minstrel_rate_stats *mrs;
-	const struct mcs_group *group;
 	unsigned int tx_time, tx_time_rtscts, tx_time_data;
 	unsigned int cw = mp->cw_min;
 	unsigned int ctime = 0;
 	unsigned int t_slot = 9; /* FIXME */
-	unsigned int ampdu_len = MINSTREL_TRUNC(mi->avg_ampdu_len);
+	unsigned int ampdu_len = minstrel_ht_avg_ampdu_len(mi);
 	unsigned int overhead = 0, overhead_rtscts = 0;
 
 	mrs = minstrel_get_ratestats(mi, index);
@@ -783,8 +1018,7 @@
 	mrs->retry_count_rtscts = 2;
 	mrs->retry_updated = true;
 
-	group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
-	tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len / 1000;
+	tx_time_data = minstrel_get_duration(index) * ampdu_len / 1000;
 
 	/* Contention time for first 2 tries */
 	ctime = (t_slot * cw) >> 1;
@@ -878,20 +1112,24 @@
 	int group = mi->max_prob_rate / MCS_GROUP_RATES;
 	const struct mcs_group *g = &minstrel_mcs_groups[group];
 	int rate = mi->max_prob_rate % MCS_GROUP_RATES;
+	unsigned int duration;
 
 	/* Disable A-MSDU if max_prob_rate is bad */
 	if (mi->groups[group].rates[rate].prob_ewma < MINSTREL_FRAC(50, 100))
 		return 1;
 
+	duration = g->duration[rate];
+	duration <<= g->shift;
+
 	/* If the rate is slower than single-stream MCS1, make A-MSDU limit small */
-	if (g->duration[rate] > MCS_DURATION(1, 0, 52))
+	if (duration > MCS_DURATION(1, 0, 52))
 		return 500;
 
 	/*
 	 * If the rate is slower than single-stream MCS4, limit A-MSDU to usual
 	 * data packet size
 	 */
-	if (g->duration[rate] > MCS_DURATION(1, 0, 104))
+	if (duration > MCS_DURATION(1, 0, 104))
 		return 1600;
 
 	/*
@@ -899,7 +1137,7 @@
 	 * rate success probability is less than 75%, limit A-MSDU to twice the usual
 	 * data packet size
 	 */
-	if (g->duration[rate] > MCS_DURATION(1, 0, 260) ||
+	if (duration > MCS_DURATION(1, 0, 260) ||
 	    (minstrel_ht_get_prob_ewma(mi, mi->max_tp_rate[0]) <
 	     MINSTREL_FRAC(75, 100)))
 		return 3200;
@@ -921,14 +1159,18 @@
 minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
 {
 	struct ieee80211_sta_rates *rates;
+	u16 first_rate = mi->max_tp_rate[0];
 	int i = 0;
 
+	if (mi->sample_mode == MINSTREL_SAMPLE_ACTIVE)
+		first_rate = mi->sample_rate;
+
 	rates = kzalloc(sizeof(*rates), GFP_ATOMIC);
 	if (!rates)
 		return;
 
 	/* Start with max_tp_rate[0] */
-	minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate[0]);
+	minstrel_ht_set_rate(mp, mi, rates, i++, first_rate);
 
 	if (mp->hw->max_rates >= 3) {
 		/* At least 3 tx rates supported, use max_tp_rate[1] next */
@@ -936,8 +1178,6 @@
 	}
 
 	if (mp->hw->max_rates >= 2) {
-		/*
-		 * At least 2 tx rates supported, use max_prob_rate next */
 		minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_prob_rate);
 	}
 
@@ -946,13 +1186,6 @@
 	rate_control_set_rates(mp->hw, mi->sta, rates);
 }
 
-static inline int
-minstrel_get_duration(int index)
-{
-	const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
-	return group->duration[index % MCS_GROUP_RATES];
-}
-
 static int
 minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
 {
@@ -962,6 +1195,11 @@
 	int tp_rate1, tp_rate2;
 	int sample_idx = 0;
 
+	if (mp->hw->max_rates == 1 && mp->sample_switch &&
+	    (mi->total_packets_cur >= SAMPLE_SWITCH_THR ||
+	     mp->sample_switch == 1))
+		return -1;
+
 	if (mi->sample_wait > 0) {
 		mi->sample_wait--;
 		return -1;
@@ -1000,10 +1238,28 @@
 		return -1;
 
 	/*
-	 * Do not sample if the probability is already higher than 95%
-	 * to avoid wasting airtime.
+	 * Do not sample if the probability is already higher than 95%,
+	 * or if the rate is 3 times slower than the current max probability
+	 * rate, to avoid wasting airtime.
 	 */
-	if (mrs->prob_ewma > MINSTREL_FRAC(95, 100))
+	sample_dur = minstrel_get_duration(sample_idx);
+	if (mrs->prob_ewma > MINSTREL_FRAC(95, 100) ||
+	    minstrel_get_duration(mi->max_prob_rate) * 3 < sample_dur)
+		return -1;
+
+
+	/*
+	 * For devices with no configurable multi-rate retry, skip sampling
+	 * below the per-group max throughput rate, and only use one sampling
+	 * attempt per rate
+	 */
+	if (mp->hw->max_rates == 1 &&
+	    (minstrel_get_duration(mg->max_group_tp_rate[0]) < sample_dur ||
+	     mrs->attempts))
+		return -1;
+
+	/* Skip already sampled slow rates */
+	if (sample_dur >= minstrel_get_duration(tp_rate1) && mrs->attempts)
 		return -1;
 
 	/*
@@ -1013,7 +1269,6 @@
 
 	cur_max_tp_streams = minstrel_mcs_groups[tp_rate1 /
 		MCS_GROUP_RATES].streams;
-	sample_dur = minstrel_get_duration(sample_idx);
 	if (sample_dur >= minstrel_get_duration(tp_rate2) &&
 	    (cur_max_tp_streams - 1 <
 	     minstrel_mcs_groups[sample_group].streams ||
@@ -1041,9 +1296,6 @@
 	struct minstrel_priv *mp = priv;
 	int sample_idx;
 
-	if (rate_control_send_low(sta, priv_sta, txrc))
-		return;
-
 	if (!msp->is_ht)
 		return mac80211_minstrel.get_rate(priv, sta, &msp->legacy, txrc);
 
@@ -1077,18 +1329,23 @@
 		return;
 
 	sample_group = &minstrel_mcs_groups[sample_idx / MCS_GROUP_RATES];
+	sample_idx %= MCS_GROUP_RATES;
+
+	if (sample_group == &minstrel_mcs_groups[MINSTREL_CCK_GROUP] &&
+	    (sample_idx >= 4) != txrc->short_preamble)
+		return;
+
 	info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
 	rate->count = 1;
 
-	if (sample_idx / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) {
+	if (sample_group == &minstrel_mcs_groups[MINSTREL_CCK_GROUP]) {
 		int idx = sample_idx % ARRAY_SIZE(mp->cck_rates);
 		rate->idx = mp->cck_rates[idx];
 	} else if (sample_group->flags & IEEE80211_TX_RC_VHT_MCS) {
 		ieee80211_rate_set_vht(rate, sample_idx % MCS_GROUP_RATES,
 				       sample_group->streams);
 	} else {
-		rate->idx = sample_idx % MCS_GROUP_RATES +
-			    (sample_group->streams - 1) * 8;
+		rate->idx = sample_idx + (sample_group->streams - 1) * 8;
 	}
 
 	rate->flags = sample_group->flags;
@@ -1130,14 +1387,14 @@
 	struct minstrel_ht_sta_priv *msp = priv_sta;
 	struct minstrel_ht_sta *mi = &msp->ht;
 	struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs;
-	u16 sta_cap = sta->ht_cap.cap;
+	u16 ht_cap = sta->ht_cap.cap;
 	struct ieee80211_sta_vht_cap *vht_cap = &sta->vht_cap;
-	struct sta_info *sinfo = container_of(sta, struct sta_info, sta);
 	int use_vht;
 	int n_supported = 0;
 	int ack_dur;
 	int stbc;
 	int i;
+	bool ldpc;
 
 	/* fall back to the old minstrel for legacy stations */
 	if (!sta->ht_cap.ht_supported)
@@ -1145,12 +1402,10 @@
 
 	BUILD_BUG_ON(ARRAY_SIZE(minstrel_mcs_groups) != MINSTREL_GROUPS_NB);
 
-#ifdef CONFIG_MAC80211_RC_MINSTREL_VHT
 	if (vht_cap->vht_supported)
 		use_vht = vht_cap->vht_mcs.tx_mcs_map != cpu_to_le16(~0);
 	else
-#endif
-	use_vht = 0;
+		use_vht = 0;
 
 	msp->is_ht = true;
 	memset(mi, 0, sizeof(*mi));
@@ -1175,16 +1430,22 @@
 	}
 	mi->sample_tries = 4;
 
-	/* TODO tx_flags for vht - ATM the RC API is not fine-grained enough */
 	if (!use_vht) {
-		stbc = (sta_cap & IEEE80211_HT_CAP_RX_STBC) >>
+		stbc = (ht_cap & IEEE80211_HT_CAP_RX_STBC) >>
 			IEEE80211_HT_CAP_RX_STBC_SHIFT;
-		mi->tx_flags |= stbc << IEEE80211_TX_CTL_STBC_SHIFT;
 
-		if (sta_cap & IEEE80211_HT_CAP_LDPC_CODING)
-			mi->tx_flags |= IEEE80211_TX_CTL_LDPC;
+		ldpc = ht_cap & IEEE80211_HT_CAP_LDPC_CODING;
+	} else {
+		stbc = (vht_cap->cap & IEEE80211_VHT_CAP_RXSTBC_MASK) >>
+			IEEE80211_VHT_CAP_RXSTBC_SHIFT;
+
+		ldpc = vht_cap->cap & IEEE80211_VHT_CAP_RXLDPC;
 	}
 
+	mi->tx_flags |= stbc << IEEE80211_TX_CTL_STBC_SHIFT;
+	if (ldpc)
+		mi->tx_flags |= IEEE80211_TX_CTL_LDPC;
+
 	for (i = 0; i < ARRAY_SIZE(mi->groups); i++) {
 		u32 gflags = minstrel_mcs_groups[i].flags;
 		int bw, nss;
@@ -1197,10 +1458,10 @@
 
 		if (gflags & IEEE80211_TX_RC_SHORT_GI) {
 			if (gflags & IEEE80211_TX_RC_40_MHZ_WIDTH) {
-				if (!(sta_cap & IEEE80211_HT_CAP_SGI_40))
+				if (!(ht_cap & IEEE80211_HT_CAP_SGI_40))
 					continue;
 			} else {
-				if (!(sta_cap & IEEE80211_HT_CAP_SGI_20))
+				if (!(ht_cap & IEEE80211_HT_CAP_SGI_20))
 					continue;
 			}
 		}
@@ -1217,10 +1478,9 @@
 
 		/* HT rate */
 		if (gflags & IEEE80211_TX_RC_MCS) {
-#ifdef CONFIG_MAC80211_RC_MINSTREL_VHT
 			if (use_vht && minstrel_vht_only)
 				continue;
-#endif
+
 			mi->supported[i] = mcs->rx_mask[nss - 1];
 			if (mi->supported[i])
 				n_supported++;
@@ -1258,11 +1518,10 @@
 	if (!n_supported)
 		goto use_legacy;
 
-	if (test_sta_flag(sinfo, WLAN_STA_SHORT_PREAMBLE))
-		mi->cck_supported_short |= mi->cck_supported_short << 4;
+	mi->supported[MINSTREL_CCK_GROUP] |= mi->cck_supported_short << 4;
 
 	/* create an initial rate table with the lowest supported rates */
-	minstrel_ht_update_stats(mp, mi);
+	minstrel_ht_update_stats(mp, mi, true);
 	minstrel_ht_update_rates(mp, mi);
 
 	return;
@@ -1340,16 +1599,92 @@
 	kfree(msp);
 }
 
+static void
+minstrel_ht_init_cck_rates(struct minstrel_priv *mp)
+{
+	static const int bitrates[4] = { 10, 20, 55, 110 };
+	struct ieee80211_supported_band *sband;
+	u32 rate_flags = ieee80211_chandef_rate_flags(&mp->hw->conf.chandef);
+	int i, j;
+
+	sband = mp->hw->wiphy->bands[NL80211_BAND_2GHZ];
+	if (!sband)
+		return;
+
+	for (i = 0; i < sband->n_bitrates; i++) {
+		struct ieee80211_rate *rate = &sband->bitrates[i];
+
+		if (rate->flags & IEEE80211_RATE_ERP_G)
+			continue;
+
+		if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
+			continue;
+
+		for (j = 0; j < ARRAY_SIZE(bitrates); j++) {
+			if (rate->bitrate != bitrates[j])
+				continue;
+
+			mp->cck_rates[j] = i;
+			break;
+		}
+	}
+}
+
 static void *
 minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
 {
-	return mac80211_minstrel.alloc(hw, debugfsdir);
+	struct minstrel_priv *mp;
+
+	mp = kzalloc(sizeof(struct minstrel_priv), GFP_ATOMIC);
+	if (!mp)
+		return NULL;
+
+	mp->sample_switch = -1;
+
+	/* contention window settings
+	 * Just an approximation. Using the per-queue values would complicate
+	 * the calculations and is probably unnecessary */
+	mp->cw_min = 15;
+	mp->cw_max = 1023;
+
+	/* number of packets (in %) to use for sampling other rates
+	 * sample less often for non-mrr packets, because the overhead
+	 * is much higher than with mrr */
+	mp->lookaround_rate = 5;
+	mp->lookaround_rate_mrr = 10;
+
+	/* maximum time that the hw is allowed to stay in one MRR segment */
+	mp->segment_size = 6000;
+
+	if (hw->max_rate_tries > 0)
+		mp->max_retry = hw->max_rate_tries;
+	else
+		/* safe default, does not necessarily have to match hw properties */
+		mp->max_retry = 7;
+
+	if (hw->max_rates >= 4)
+		mp->has_mrr = true;
+
+	mp->hw = hw;
+	mp->update_interval = 100;
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+	mp->fixed_rate_idx = (u32) -1;
+	debugfs_create_u32("fixed_rate_idx", S_IRUGO | S_IWUGO, debugfsdir,
+			   &mp->fixed_rate_idx);
+	debugfs_create_u32("sample_switch", S_IRUGO | S_IWUSR, debugfsdir,
+			   &mp->sample_switch);
+#endif
+
+	minstrel_ht_init_cck_rates(mp);
+
+	return mp;
 }
 
 static void
 minstrel_ht_free(void *priv)
 {
-	mac80211_minstrel.free(priv);
+	kfree(priv);
 }
 
 static u32 minstrel_ht_get_expected_throughput(void *priv_sta)
@@ -1384,7 +1719,6 @@
 	.free = minstrel_ht_free,
 #ifdef CONFIG_MAC80211_DEBUGFS
 	.add_sta_debugfs = minstrel_ht_add_sta_debugfs,
-	.remove_sta_debugfs = minstrel_ht_remove_sta_debugfs,
 #endif
 	.get_expected_throughput = minstrel_ht_get_expected_throughput,
 };
@@ -1409,14 +1743,14 @@
 }
 
 int __init
-rc80211_minstrel_ht_init(void)
+rc80211_minstrel_init(void)
 {
 	init_sample_table();
 	return ieee80211_rate_control_register(&mac80211_minstrel_ht);
 }
 
 void
-rc80211_minstrel_ht_exit(void)
+rc80211_minstrel_exit(void)
 {
 	ieee80211_rate_control_unregister(&mac80211_minstrel_ht);
 }

diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h
index de1646c..f938701 100644
--- a/net/mac80211/rc80211_minstrel_ht.h
+++ b/net/mac80211/rc80211_minstrel_ht.h

@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2010 Felix Fietkau <nbd@openwrt.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef __RC_MINSTREL_HT_H
@@ -13,13 +10,9 @@
  * The number of streams can be changed to 2 to reduce code
  * size and memory footprint.
  */
-#define MINSTREL_MAX_STREAMS		3
+#define MINSTREL_MAX_STREAMS		4
 #define MINSTREL_HT_STREAM_GROUPS	4 /* BW(=2) * SGI(=2) */
-#ifdef CONFIG_MAC80211_RC_MINSTREL_VHT
 #define MINSTREL_VHT_STREAM_GROUPS	6 /* BW(=3) * SGI(=2) */
-#else
-#define MINSTREL_VHT_STREAM_GROUPS	0
-#endif
 
 #define MINSTREL_HT_GROUPS_NB	(MINSTREL_MAX_STREAMS *		\
 				 MINSTREL_HT_STREAM_GROUPS)
@@ -34,16 +27,14 @@
 #define MINSTREL_CCK_GROUP	(MINSTREL_HT_GROUP_0 + MINSTREL_HT_GROUPS_NB)
 #define MINSTREL_VHT_GROUP_0	(MINSTREL_CCK_GROUP + 1)
 
-#ifdef CONFIG_MAC80211_RC_MINSTREL_VHT
 #define MCS_GROUP_RATES		10
-#else
-#define MCS_GROUP_RATES		8
-#endif
 
 struct mcs_group {
-	u32 flags;
-	unsigned int streams;
-	unsigned int duration[MCS_GROUP_RATES];
+	u16 flags;
+	u8 streams;
+	u8 shift;
+	u8 bw;
+	u16 duration[MCS_GROUP_RATES];
 };
 
 extern const struct mcs_group minstrel_mcs_groups[];
@@ -60,6 +51,12 @@
 	struct minstrel_rate_stats rates[MCS_GROUP_RATES];
 };
 
+enum minstrel_sample_mode {
+	MINSTREL_SAMPLE_IDLE,
+	MINSTREL_SAMPLE_ACTIVE,
+	MINSTREL_SAMPLE_PENDING,
+};
+
 struct minstrel_ht_sta {
 	struct ieee80211_sta *sta;
 
@@ -81,6 +78,8 @@
 	unsigned int overhead;
 	unsigned int overhead_rtscts;
 
+	unsigned int total_packets_last;
+	unsigned int total_packets_cur;
 	unsigned int total_packets;
 	unsigned int sample_packets;
 
@@ -92,6 +91,9 @@
 	u8 sample_count;
 	u8 sample_slow;
 
+	enum minstrel_sample_mode sample_mode;
+	u16 sample_rate;
+
 	/* current MCS group to be sampled */
 	u8 sample_group;
 
@@ -110,17 +112,12 @@
 		struct minstrel_ht_sta ht;
 		struct minstrel_sta_info legacy;
 	};
-#ifdef CONFIG_MAC80211_DEBUGFS
-	struct dentry *dbg_stats;
-	struct dentry *dbg_stats_csv;
-#endif
 	void *ratelist;
 	void *sample_table;
 	bool is_ht;
 };
 
 void minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
-void minstrel_ht_remove_sta_debugfs(void *priv, void *priv_sta);
 int minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate,
 			   int prob_ewma);
 

diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c
index bfcc031..5a6e9f3 100644
--- a/net/mac80211/rc80211_minstrel_ht_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2010 Felix Fietkau <nbd@openwrt.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/netdevice.h>
 #include <linux/types.h>
@@ -15,6 +12,22 @@
 #include "rc80211_minstrel.h"
 #include "rc80211_minstrel_ht.h"
 
+static ssize_t
+minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos)
+{
+	struct minstrel_debugfs_info *ms;
+
+	ms = file->private_data;
+	return simple_read_from_buffer(buf, len, ppos, ms->buf, ms->len);
+}
+
+static int
+minstrel_stats_release(struct inode *inode, struct file *file)
+{
+	kfree(file->private_data);
+	return 0;
+}
+
 static char *
 minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p)
 {
@@ -41,7 +54,7 @@
 		struct minstrel_rate_stats *mrs = &mi->groups[i].rates[j];
 		static const int bitrates[4] = { 10, 20, 55, 110 };
 		int idx = i * MCS_GROUP_RATES + j;
-		unsigned int prob_ewmsd;
+		unsigned int duration;
 
 		if (!(mi->supported[i] & BIT(j)))
 			continue;
@@ -79,21 +92,21 @@
 		p += sprintf(p, "  %3u  ", idx);
 
 		/* tx_time[rate(i)] in usec */
-		tx_time = DIV_ROUND_CLOSEST(mg->duration[j], 1000);
+		duration = mg->duration[j];
+		duration <<= mg->shift;
+		tx_time = DIV_ROUND_CLOSEST(duration, 1000);
 		p += sprintf(p, "%6u  ", tx_time);
 
 		tp_max = minstrel_ht_get_tp_avg(mi, i, j, MINSTREL_FRAC(100, 100));
 		tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_ewma);
 		eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
-		prob_ewmsd = minstrel_get_ewmsd10(mrs);
 
-		p += sprintf(p, "%4u.%1u    %4u.%1u     %3u.%1u    %3u.%1u"
+		p += sprintf(p, "%4u.%1u    %4u.%1u     %3u.%1u"
 				"     %3u   %3u %-3u   "
 				"%9llu   %-9llu\n",
 				tp_max / 10, tp_max % 10,
 				tp_avg / 10, tp_avg % 10,
 				eprob / 10, eprob % 10,
-				prob_ewmsd / 10, prob_ewmsd % 10,
 				mrs->retry_count,
 				mrs->last_success,
 				mrs->last_attempts,
@@ -130,9 +143,9 @@
 
 	p += sprintf(p, "\n");
 	p += sprintf(p,
-		     "              best   ____________rate__________    ________statistics________    _____last____    ______sum-of________\n");
+		     "              best   ____________rate__________    ____statistics___    _____last____    ______sum-of________\n");
 	p += sprintf(p,
-		     "mode guard #  rate  [name   idx airtime  max_tp]  [avg(tp) avg(prob) sd(prob)]  [retry|suc|att]  [#success | #attempts]\n");
+		     "mode guard #  rate  [name   idx airtime  max_tp]  [avg(tp) avg(prob)]  [retry|suc|att]  [#success | #attempts]\n");
 
 	p = minstrel_ht_stats_dump(mi, MINSTREL_CCK_GROUP, p);
 	for (i = 0; i < MINSTREL_CCK_GROUP; i++)
@@ -144,9 +157,10 @@
 			"lookaround %d\n",
 			max(0, (int) mi->total_packets - (int) mi->sample_packets),
 			mi->sample_packets);
-	p += sprintf(p, "Average # of aggregated frames per A-MPDU: %d.%d\n",
-		MINSTREL_TRUNC(mi->avg_ampdu_len),
-		MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10);
+	if (mi->avg_ampdu_len)
+		p += sprintf(p, "Average # of aggregated frames per A-MPDU: %d.%d\n",
+			MINSTREL_TRUNC(mi->avg_ampdu_len),
+			MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10);
 	ms->len = p - ms->buf;
 	WARN_ON(ms->len + sizeof(*ms) > 32768);
 
@@ -187,7 +201,7 @@
 		struct minstrel_rate_stats *mrs = &mi->groups[i].rates[j];
 		static const int bitrates[4] = { 10, 20, 55, 110 };
 		int idx = i * MCS_GROUP_RATES + j;
-		unsigned int prob_ewmsd;
+		unsigned int duration;
 
 		if (!(mi->supported[i] & BIT(j)))
 			continue;
@@ -222,20 +236,21 @@
 		}
 
 		p += sprintf(p, "%u,", idx);
-		tx_time = DIV_ROUND_CLOSEST(mg->duration[j], 1000);
+
+		duration = mg->duration[j];
+		duration <<= mg->shift;
+		tx_time = DIV_ROUND_CLOSEST(duration, 1000);
 		p += sprintf(p, "%u,", tx_time);
 
 		tp_max = minstrel_ht_get_tp_avg(mi, i, j, MINSTREL_FRAC(100, 100));
 		tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_ewma);
 		eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
-		prob_ewmsd = minstrel_get_ewmsd10(mrs);
 
-		p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u.%u,%u,%u,"
+		p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u,%u,"
 				"%u,%llu,%llu,",
 				tp_max / 10, tp_max % 10,
 				tp_avg / 10, tp_avg % 10,
 				eprob / 10, eprob % 10,
-				prob_ewmsd / 10, prob_ewmsd % 10,
 				mrs->retry_count,
 				mrs->last_success,
 				mrs->last_attempts,
@@ -303,17 +318,8 @@
 {
 	struct minstrel_ht_sta_priv *msp = priv_sta;
 
-	msp->dbg_stats = debugfs_create_file("rc_stats", 0444, dir, msp,
-					     &minstrel_ht_stat_fops);
-	msp->dbg_stats_csv = debugfs_create_file("rc_stats_csv", 0444, dir, msp,
-						 &minstrel_ht_stat_csv_fops);
-}
-
-void
-minstrel_ht_remove_sta_debugfs(void *priv, void *priv_sta)
-{
-	struct minstrel_ht_sta_priv *msp = priv_sta;
-
-	debugfs_remove(msp->dbg_stats);
-	debugfs_remove(msp->dbg_stats_csv);
+	debugfs_create_file("rc_stats", 0444, dir, msp,
+			    &minstrel_ht_stat_fops);
+	debugfs_create_file("rc_stats_csv", 0444, dir, msp,
+			    &minstrel_ht_stat_csv_fops);
 }

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 5e2b4a4..0e05ff0 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2002-2005, Instant802 Networks, Inc.
  * Copyright 2005-2006, Devicescape Software, Inc.
@@ -5,11 +6,7 @@
  * Copyright 2007-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Copyright (C) 2018-2019 Intel Corporation
  */
 
 #include <linux/jiffies.h>
@@ -115,7 +112,8 @@
 
 	if (status->flag & (RX_FLAG_FAILED_FCS_CRC |
 			    RX_FLAG_FAILED_PLCP_CRC |
-			    RX_FLAG_ONLY_MONITOR))
+			    RX_FLAG_ONLY_MONITOR |
+			    RX_FLAG_NO_PSDU))
 		return true;
 
 	if (unlikely(skb->len < 16 + present_fcs_len + rtap_space))
@@ -142,6 +140,9 @@
 	/* allocate extra bitmaps */
 	if (status->chains)
 		len += 4 * hweight8(status->chains);
+	/* vendor presence bitmap */
+	if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA)
+		len += 4;
 
 	if (ieee80211_have_rx_timestamp(status)) {
 		len = ALIGN(len, 8);
@@ -189,16 +190,40 @@
 		BUILD_BUG_ON(sizeof(struct ieee80211_radiotap_he_mu) != 12);
 	}
 
+	if (status->flag & RX_FLAG_NO_PSDU)
+		len += 1;
+
+	if (status->flag & RX_FLAG_RADIOTAP_LSIG) {
+		len = ALIGN(len, 2);
+		len += 4;
+		BUILD_BUG_ON(sizeof(struct ieee80211_radiotap_lsig) != 4);
+	}
+
 	if (status->chains) {
 		/* antenna and antenna signal fields */
 		len += 2 * hweight8(status->chains);
 	}
 
 	if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) {
-		struct ieee80211_vendor_radiotap *rtap = (void *)skb->data;
+		struct ieee80211_vendor_radiotap *rtap;
+		int vendor_data_offset = 0;
 
-		/* vendor presence bitmap */
-		len += 4;
+		/*
+		 * The position to look at depends on the existence (or non-
+		 * existence) of other elements, so take that into account...
+		 */
+		if (status->flag & RX_FLAG_RADIOTAP_HE)
+			vendor_data_offset +=
+				sizeof(struct ieee80211_radiotap_he);
+		if (status->flag & RX_FLAG_RADIOTAP_HE_MU)
+			vendor_data_offset +=
+				sizeof(struct ieee80211_radiotap_he_mu);
+		if (status->flag & RX_FLAG_RADIOTAP_LSIG)
+			vendor_data_offset +=
+				sizeof(struct ieee80211_radiotap_lsig);
+
+		rtap = (void *)&skb->data[vendor_data_offset];
+
 		/* alignment for fixed 6-byte vendor data header */
 		len = ALIGN(len, 2);
 		/* vendor data header */
@@ -220,7 +245,7 @@
 		struct ieee80211_hdr_3addr hdr;
 		u8 category;
 		u8 action_code;
-	} __packed action;
+	} __packed __aligned(2) action;
 
 	if (!sdata)
 		return;
@@ -279,6 +304,7 @@
 	struct ieee80211_vendor_radiotap rtap = {};
 	struct ieee80211_radiotap_he he = {};
 	struct ieee80211_radiotap_he_mu he_mu = {};
+	struct ieee80211_radiotap_lsig lsig = {};
 
 	if (status->flag & RX_FLAG_RADIOTAP_HE) {
 		he = *(struct ieee80211_radiotap_he *)skb->data;
@@ -291,6 +317,11 @@
 		skb_pull(skb, sizeof(he_mu));
 	}
 
+	if (status->flag & RX_FLAG_RADIOTAP_LSIG) {
+		lsig = *(struct ieee80211_radiotap_lsig *)skb->data;
+		skb_pull(skb, sizeof(lsig));
+	}
+
 	if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) {
 		rtap = *(struct ieee80211_vendor_radiotap *)skb->data;
 		/* rtap.len and rtap.pad are undone immediately */
@@ -549,7 +580,7 @@
 
 	if (status->encoding == RX_ENC_HE &&
 	    status->flag & RX_FLAG_RADIOTAP_HE) {
-#define HE_PREP(f, val)	cpu_to_le16(FIELD_PREP(IEEE80211_RADIOTAP_HE_##f, val))
+#define HE_PREP(f, val)	le16_encode_bits(val, IEEE80211_RADIOTAP_HE_##f)
 
 		if (status->enc_flags & RX_ENC_FLAG_STBC_MASK) {
 			he.data6 |= HE_PREP(DATA6_NSTS,
@@ -630,6 +661,21 @@
 		pos += sizeof(he_mu);
 	}
 
+	if (status->flag & RX_FLAG_NO_PSDU) {
+		rthdr->it_present |=
+			cpu_to_le32(1 << IEEE80211_RADIOTAP_ZERO_LEN_PSDU);
+		*pos++ = status->zero_length_psdu_type;
+	}
+
+	if (status->flag & RX_FLAG_RADIOTAP_LSIG) {
+		/* ensure 2 byte alignment */
+		while ((pos - (u8 *)rthdr) & 1)
+			pos++;
+		rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_LSIG);
+		memcpy(pos, &lsig, sizeof(lsig));
+		pos += sizeof(lsig);
+	}
+
 	for_each_set_bit(chain, &chains, IEEE80211_MAX_CHAINS) {
 		*pos++ = status->chain_signal[chain];
 		*pos++ = chain;
@@ -722,6 +768,7 @@
 	struct ieee80211_sub_if_data *monitor_sdata =
 		rcu_dereference(local->monitor_sdata);
 	bool only_monitor = false;
+	unsigned int min_head_len;
 
 	if (status->flag & RX_FLAG_RADIOTAP_HE)
 		rtap_space += sizeof(struct ieee80211_radiotap_he);
@@ -729,12 +776,18 @@
 	if (status->flag & RX_FLAG_RADIOTAP_HE_MU)
 		rtap_space += sizeof(struct ieee80211_radiotap_he_mu);
 
+	if (status->flag & RX_FLAG_RADIOTAP_LSIG)
+		rtap_space += sizeof(struct ieee80211_radiotap_lsig);
+
 	if (unlikely(status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA)) {
-		struct ieee80211_vendor_radiotap *rtap = (void *)origskb->data;
+		struct ieee80211_vendor_radiotap *rtap =
+			(void *)(origskb->data + rtap_space);
 
 		rtap_space += sizeof(*rtap) + rtap->len + rtap->pad;
 	}
 
+	min_head_len = rtap_space;
+
 	/*
 	 * First, we may need to make a copy of the skb because
 	 *  (1) we need to modify it for radiotap (if not present), and
@@ -744,18 +797,23 @@
 	 * the SKB because it has a bad FCS/PLCP checksum.
 	 */
 
-	if (ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS)) {
-		if (unlikely(origskb->len <= FCS_LEN)) {
-			/* driver bug */
-			WARN_ON(1);
-			dev_kfree_skb(origskb);
-			return NULL;
+	if (!(status->flag & RX_FLAG_NO_PSDU)) {
+		if (ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS)) {
+			if (unlikely(origskb->len <= FCS_LEN + rtap_space)) {
+				/* driver bug */
+				WARN_ON(1);
+				dev_kfree_skb(origskb);
+				return NULL;
+			}
+			present_fcs_len = FCS_LEN;
 		}
-		present_fcs_len = FCS_LEN;
+
+		/* also consider the hdr->frame_control */
+		min_head_len += 2;
 	}
 
-	/* ensure hdr->frame_control and vendor radiotap data are in skb head */
-	if (!pskb_may_pull(origskb, 2 + rtap_space)) {
+	/* ensure that the expected data elements are in skb head */
+	if (!pskb_may_pull(origskb, min_head_len)) {
 		dev_kfree_skb(origskb);
 		return NULL;
 	}
@@ -944,23 +1002,43 @@
 	return -1;
 }
 
-static int ieee80211_get_cs_keyid(const struct ieee80211_cipher_scheme *cs,
-				  struct sk_buff *skb)
+static int ieee80211_get_keyid(struct sk_buff *skb,
+			       const struct ieee80211_cipher_scheme *cs)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 	__le16 fc;
 	int hdrlen;
+	int minlen;
+	u8 key_idx_off;
+	u8 key_idx_shift;
 	u8 keyid;
 
 	fc = hdr->frame_control;
 	hdrlen = ieee80211_hdrlen(fc);
 
-	if (skb->len < hdrlen + cs->hdr_len)
+	if (cs) {
+		minlen = hdrlen + cs->hdr_len;
+		key_idx_off = hdrlen + cs->key_idx_off;
+		key_idx_shift = cs->key_idx_shift;
+	} else {
+		/* WEP, TKIP, CCMP and GCMP */
+		minlen = hdrlen + IEEE80211_WEP_IV_LEN;
+		key_idx_off = hdrlen + 3;
+		key_idx_shift = 6;
+	}
+
+	if (unlikely(skb->len < minlen))
 		return -EINVAL;
 
-	skb_copy_bits(skb, hdrlen + cs->key_idx_off, &keyid, 1);
-	keyid &= cs->key_idx_mask;
-	keyid >>= cs->key_idx_shift;
+	skb_copy_bits(skb, key_idx_off, &keyid, 1);
+
+	if (cs)
+		keyid &= cs->key_idx_mask;
+	keyid >>= key_idx_shift;
+
+	/* cs could use more than the usual two bits for the keyid */
+	if (unlikely(keyid >= NUM_DEFAULT_KEYS))
+		return -EINVAL;
 
 	return keyid;
 }
@@ -1506,8 +1584,16 @@
 	if (!sta->sta.txq[0])
 		return;
 
-	for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
-		if (txq_has_queue(sta->sta.txq[tid]))
+	for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) {
+		struct ieee80211_txq *txq = sta->sta.txq[tid];
+		struct txq_info *txqi = to_txq_info(txq);
+
+		spin_lock(&local->active_txq_lock[txq->ac]);
+		if (!list_empty(&txqi->schedule_order))
+			list_del_init(&txqi->schedule_order);
+		spin_unlock(&local->active_txq_lock[txq->ac]);
+
+		if (txq_has_queue(txq))
 			set_bit(tid, &sta->txq_buffered_tids);
 		else
 			clear_bit(tid, &sta->txq_buffered_tids);
@@ -1791,9 +1877,9 @@
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 	int keyidx;
-	int hdrlen;
 	ieee80211_rx_result result = RX_DROP_UNUSABLE;
 	struct ieee80211_key *sta_ptk = NULL;
+	struct ieee80211_key *ptk_idx = NULL;
 	int mmie_keyidx = -1;
 	__le16 fc;
 	const struct ieee80211_cipher_scheme *cs = NULL;
@@ -1831,21 +1917,24 @@
 
 	if (rx->sta) {
 		int keyid = rx->sta->ptk_idx;
+		sta_ptk = rcu_dereference(rx->sta->ptk[keyid]);
 
-		if (ieee80211_has_protected(fc) && rx->sta->cipher_scheme) {
+		if (ieee80211_has_protected(fc)) {
 			cs = rx->sta->cipher_scheme;
-			keyid = ieee80211_get_cs_keyid(cs, rx->skb);
+			keyid = ieee80211_get_keyid(rx->skb, cs);
+
 			if (unlikely(keyid < 0))
 				return RX_DROP_UNUSABLE;
+
+			ptk_idx = rcu_dereference(rx->sta->ptk[keyid]);
 		}
-		sta_ptk = rcu_dereference(rx->sta->ptk[keyid]);
 	}
 
 	if (!ieee80211_has_protected(fc))
 		mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb);
 
 	if (!is_multicast_ether_addr(hdr->addr1) && sta_ptk) {
-		rx->key = sta_ptk;
+		rx->key = ptk_idx ? ptk_idx : sta_ptk;
 		if ((status->flag & RX_FLAG_DECRYPTED) &&
 		    (status->flag & RX_FLAG_IV_STRIPPED))
 			return RX_CONTINUE;
@@ -1905,8 +1994,6 @@
 		}
 		return RX_CONTINUE;
 	} else {
-		u8 keyid;
-
 		/*
 		 * The device doesn't give us the IV so we won't be
 		 * able to look up the key. That's ok though, we
@@ -1920,23 +2007,10 @@
 		    (status->flag & RX_FLAG_IV_STRIPPED))
 			return RX_CONTINUE;
 
-		hdrlen = ieee80211_hdrlen(fc);
+		keyidx = ieee80211_get_keyid(rx->skb, cs);
 
-		if (cs) {
-			keyidx = ieee80211_get_cs_keyid(cs, rx->skb);
-
-			if (unlikely(keyidx < 0))
-				return RX_DROP_UNUSABLE;
-		} else {
-			if (rx->skb->len < 8 + hdrlen)
-				return RX_DROP_UNUSABLE; /* TODO: count this? */
-			/*
-			 * no need to call ieee80211_wep_get_keyidx,
-			 * it verifies a bunch of things we've done already
-			 */
-			skb_copy_bits(rx->skb, hdrlen + 3, &keyid, 1);
-			keyidx = keyid >> 6;
-		}
+		if (unlikely(keyidx < 0))
+			return RX_DROP_UNUSABLE;
 
 		/* check per-station GTK first, if multicast packet */
 		if (is_multicast_ether_addr(hdr->addr1) && rx->sta)
@@ -2047,6 +2121,7 @@
 	idx = sdata->fragment_next;
 	for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) {
 		struct ieee80211_hdr *f_hdr;
+		struct sk_buff *f_skb;
 
 		idx--;
 		if (idx < 0)
@@ -2058,7 +2133,8 @@
 		    entry->last_frag + 1 != frag)
 			continue;
 
-		f_hdr = (struct ieee80211_hdr *)entry->skb_list.next->data;
+		f_skb = __skb_peek(&entry->skb_list);
+		f_hdr = (struct ieee80211_hdr *) f_skb->data;
 
 		/*
 		 * Check ftype and addresses are equal, else check next fragment
@@ -2315,7 +2391,7 @@
 
 		if (!sdata->u.mgd.use_4addr)
 			return -1;
-		else
+		else if (!ether_addr_equal(hdr->addr1, sdata->vif.addr))
 			check_port_control = true;
 	}
 
@@ -2371,11 +2447,13 @@
 		      skb->protocol == cpu_to_be16(ETH_P_PREAUTH)) &&
 		     sdata->control_port_over_nl80211)) {
 		struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
-		bool noencrypt = status->flag & RX_FLAG_DECRYPTED;
+		bool noencrypt = !(status->flag & RX_FLAG_DECRYPTED);
 
 		cfg80211_rx_control_port(dev, skb, noencrypt);
 		dev_kfree_skb(skb);
 	} else {
+		memset(skb->cb, 0, sizeof(skb->cb));
+
 		/* deliver to local stack */
 		if (rx->napi)
 			napi_gro_receive(rx->napi, skb);
@@ -2426,8 +2504,9 @@
 			if (!xmit_skb)
 				net_info_ratelimited("%s: failed to clone multicast frame\n",
 						    dev->name);
-		} else if (!is_multicast_ether_addr(ehdr->h_dest)) {
-			dsta = sta_info_get(sdata, skb->data);
+		} else if (!is_multicast_ether_addr(ehdr->h_dest) &&
+			   !ether_addr_equal(ehdr->h_dest, ehdr->h_source)) {
+			dsta = sta_info_get(sdata, ehdr->h_dest);
 			if (dsta) {
 				/*
 				 * The destination station is associated to
@@ -2469,8 +2548,6 @@
 
 	if (skb) {
 		skb->protocol = eth_type_trans(skb, dev);
-		memset(skb->cb, 0, sizeof(skb->cb));
-
 		ieee80211_deliver_skb_to_local_stack(skb, rx);
 	}
 
@@ -2597,6 +2674,7 @@
 	struct ieee80211_sub_if_data *sdata = rx->sdata;
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	u16 ac, q, hdrlen;
+	int tailroom = 0;
 
 	hdr = (struct ieee80211_hdr *) skb->data;
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -2676,15 +2754,21 @@
 	skb_set_queue_mapping(skb, q);
 
 	if (!--mesh_hdr->ttl) {
-		IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_ttl);
+		if (!is_multicast_ether_addr(hdr->addr1))
+			IEEE80211_IFSTA_MESH_CTR_INC(ifmsh,
+						     dropped_frames_ttl);
 		goto out;
 	}
 
 	if (!ifmsh->mshcfg.dot11MeshForwarding)
 		goto out;
 
+	if (sdata->crypto_tx_tailroom_needed_cnt)
+		tailroom = IEEE80211_ENCRYPT_TAILROOM;
+
 	fwd_skb = skb_copy_expand(skb, local->tx_headroom +
-				       sdata->encrypt_headroom, 0, GFP_ATOMIC);
+				       sdata->encrypt_headroom,
+				  tailroom, GFP_ATOMIC);
 	if (!fwd_skb)
 		goto out;
 
@@ -3383,9 +3467,18 @@
 	case cpu_to_le16(IEEE80211_STYPE_PROBE_RESP):
 		/* process for all: mesh, mlme, ibss */
 		break;
+	case cpu_to_le16(IEEE80211_STYPE_DEAUTH):
+		if (is_multicast_ether_addr(mgmt->da) &&
+		    !is_broadcast_ether_addr(mgmt->da))
+			return RX_DROP_MONITOR;
+
+		/* process only for station/IBSS */
+		if (sdata->vif.type != NL80211_IFTYPE_STATION &&
+		    sdata->vif.type != NL80211_IFTYPE_ADHOC)
+			return RX_DROP_MONITOR;
+		break;
 	case cpu_to_le16(IEEE80211_STYPE_ASSOC_RESP):
 	case cpu_to_le16(IEEE80211_STYPE_REASSOC_RESP):
-	case cpu_to_le16(IEEE80211_STYPE_DEAUTH):
 	case cpu_to_le16(IEEE80211_STYPE_DISASSOC):
 		if (is_multicast_ether_addr(mgmt->da) &&
 		    !is_broadcast_ether_addr(mgmt->da))
@@ -3744,6 +3837,8 @@
 	case NL80211_IFTYPE_STATION:
 		if (!bssid && !sdata->u.mgd.use_4addr)
 			return false;
+		if (ieee80211_is_robust_mgmt_frame(skb) && !rx->sta)
+			return false;
 		if (multicast)
 			return true;
 		return ether_addr_equal(sdata->vif.addr, hdr->addr1);
@@ -3971,12 +4066,8 @@
 		case WLAN_CIPHER_SUITE_GCMP_256:
 			break;
 		default:
-			/* we also don't want to deal with WEP or cipher scheme
-			 * since those require looking up the key idx in the
-			 * frame, rather than assuming the PTK is used
-			 * (we need to revisit this once we implement the real
-			 * PTK index, which is now valid in the spec, but we
-			 * haven't implemented that part yet)
+			/* We also don't want to deal with
+			 * WEP or cipher scheme.
 			 */
 			goto clear_rcu;
 		}
@@ -4208,11 +4299,10 @@
 
 	if (fast_rx->internal_forward) {
 		struct sk_buff *xmit_skb = NULL;
-		bool multicast = is_multicast_ether_addr(skb->data);
-
-		if (multicast) {
+		if (is_multicast_ether_addr(addrs.da)) {
 			xmit_skb = skb_copy(skb, GFP_ATOMIC);
-		} else if (sta_info_get(rx->sdata, skb->data)) {
+		} else if (!ether_addr_equal(addrs.da, addrs.sa) &&
+			   sta_info_get(rx->sdata, addrs.da)) {
 			xmit_skb = skb;
 			skb = NULL;
 		}

diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 5d2a117..4d31d96 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Scanning implementation
  *
@@ -8,10 +9,7 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2013-2015  Intel Mobile Communications GmbH
  * Copyright 2016-2017  Intel Deutschland GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Copyright (C) 2018-2019 Intel Corporation
  */
 
 #include <linux/if_arp.h>
@@ -57,62 +55,14 @@
 	return qos_info & IEEE80211_WMM_IE_AP_QOSINFO_UAPSD;
 }
 
-struct ieee80211_bss *
-ieee80211_bss_info_update(struct ieee80211_local *local,
-			  struct ieee80211_rx_status *rx_status,
-			  struct ieee80211_mgmt *mgmt, size_t len,
-			  struct ieee802_11_elems *elems,
-			  struct ieee80211_channel *channel)
+static void
+ieee80211_update_bss_from_elems(struct ieee80211_local *local,
+				struct ieee80211_bss *bss,
+				struct ieee802_11_elems *elems,
+				struct ieee80211_rx_status *rx_status,
+				bool beacon)
 {
-	bool beacon = ieee80211_is_beacon(mgmt->frame_control);
-	struct cfg80211_bss *cbss;
-	struct ieee80211_bss *bss;
 	int clen, srlen;
-	struct cfg80211_inform_bss bss_meta = {
-		.boottime_ns = rx_status->boottime_ns,
-	};
-	bool signal_valid;
-	struct ieee80211_sub_if_data *scan_sdata;
-
-	if (rx_status->flag & RX_FLAG_NO_SIGNAL_VAL)
-		bss_meta.signal = 0; /* invalid signal indication */
-	else if (ieee80211_hw_check(&local->hw, SIGNAL_DBM))
-		bss_meta.signal = rx_status->signal * 100;
-	else if (ieee80211_hw_check(&local->hw, SIGNAL_UNSPEC))
-		bss_meta.signal = (rx_status->signal * 100) / local->hw.max_signal;
-
-	bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_20;
-	if (rx_status->bw == RATE_INFO_BW_5)
-		bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_5;
-	else if (rx_status->bw == RATE_INFO_BW_10)
-		bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_10;
-
-	bss_meta.chan = channel;
-
-	rcu_read_lock();
-	scan_sdata = rcu_dereference(local->scan_sdata);
-	if (scan_sdata && scan_sdata->vif.type == NL80211_IFTYPE_STATION &&
-	    scan_sdata->vif.bss_conf.assoc &&
-	    ieee80211_have_rx_timestamp(rx_status)) {
-		bss_meta.parent_tsf =
-			ieee80211_calculate_rx_timestamp(local, rx_status,
-							 len + FCS_LEN, 24);
-		ether_addr_copy(bss_meta.parent_bssid,
-				scan_sdata->vif.bss_conf.bssid);
-	}
-	rcu_read_unlock();
-
-	cbss = cfg80211_inform_bss_frame_data(local->hw.wiphy, &bss_meta,
-					      mgmt, len, GFP_ATOMIC);
-	if (!cbss)
-		return NULL;
-	/* In case the signal is invalid update the status */
-	signal_valid = abs(channel->center_freq - cbss->channel->center_freq)
-		<= local->hw.wiphy->max_adj_channel_rssi_comp;
-	if (!signal_valid)
-		rx_status->flag |= RX_FLAG_NO_SIGNAL_VAL;
-
-	bss = (void *)cbss->priv;
 
 	if (beacon)
 		bss->device_ts_beacon = rx_status->device_timestamp;
@@ -182,6 +132,89 @@
 			bss->beacon_rate =
 				&sband->bitrates[rx_status->rate_idx];
 	}
+}
+
+struct ieee80211_bss *
+ieee80211_bss_info_update(struct ieee80211_local *local,
+			  struct ieee80211_rx_status *rx_status,
+			  struct ieee80211_mgmt *mgmt, size_t len,
+			  struct ieee80211_channel *channel)
+{
+	bool beacon = ieee80211_is_beacon(mgmt->frame_control);
+	struct cfg80211_bss *cbss, *non_tx_cbss;
+	struct ieee80211_bss *bss, *non_tx_bss;
+	struct cfg80211_inform_bss bss_meta = {
+		.boottime_ns = rx_status->boottime_ns,
+	};
+	bool signal_valid;
+	struct ieee80211_sub_if_data *scan_sdata;
+	struct ieee802_11_elems elems;
+	size_t baselen;
+	u8 *elements;
+
+	if (rx_status->flag & RX_FLAG_NO_SIGNAL_VAL)
+		bss_meta.signal = 0; /* invalid signal indication */
+	else if (ieee80211_hw_check(&local->hw, SIGNAL_DBM))
+		bss_meta.signal = rx_status->signal * 100;
+	else if (ieee80211_hw_check(&local->hw, SIGNAL_UNSPEC))
+		bss_meta.signal = (rx_status->signal * 100) / local->hw.max_signal;
+
+	bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_20;
+	if (rx_status->bw == RATE_INFO_BW_5)
+		bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_5;
+	else if (rx_status->bw == RATE_INFO_BW_10)
+		bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_10;
+
+	bss_meta.chan = channel;
+
+	rcu_read_lock();
+	scan_sdata = rcu_dereference(local->scan_sdata);
+	if (scan_sdata && scan_sdata->vif.type == NL80211_IFTYPE_STATION &&
+	    scan_sdata->vif.bss_conf.assoc &&
+	    ieee80211_have_rx_timestamp(rx_status)) {
+		bss_meta.parent_tsf =
+			ieee80211_calculate_rx_timestamp(local, rx_status,
+							 len + FCS_LEN, 24);
+		ether_addr_copy(bss_meta.parent_bssid,
+				scan_sdata->vif.bss_conf.bssid);
+	}
+	rcu_read_unlock();
+
+	cbss = cfg80211_inform_bss_frame_data(local->hw.wiphy, &bss_meta,
+					      mgmt, len, GFP_ATOMIC);
+	if (!cbss)
+		return NULL;
+
+	if (ieee80211_is_probe_resp(mgmt->frame_control)) {
+		elements = mgmt->u.probe_resp.variable;
+		baselen = offsetof(struct ieee80211_mgmt,
+				   u.probe_resp.variable);
+	} else {
+		baselen = offsetof(struct ieee80211_mgmt, u.beacon.variable);
+		elements = mgmt->u.beacon.variable;
+	}
+
+	if (baselen > len)
+		return NULL;
+
+	ieee802_11_parse_elems(elements, len - baselen, false, &elems,
+			       mgmt->bssid, cbss->bssid);
+
+	/* In case the signal is invalid update the status */
+	signal_valid = abs(channel->center_freq - cbss->channel->center_freq)
+		<= local->hw.wiphy->max_adj_channel_rssi_comp;
+	if (!signal_valid)
+		rx_status->flag |= RX_FLAG_NO_SIGNAL_VAL;
+
+	bss = (void *)cbss->priv;
+	ieee80211_update_bss_from_elems(local, bss, &elems, rx_status, beacon);
+
+	list_for_each_entry(non_tx_cbss, &cbss->nontrans_list, nontrans_list) {
+		non_tx_bss = (void *)non_tx_cbss->priv;
+
+		ieee80211_update_bss_from_elems(local, non_tx_bss, &elems,
+						rx_status, beacon);
+	}
 
 	return bss;
 }
@@ -206,10 +239,7 @@
 	struct ieee80211_sub_if_data *sdata1, *sdata2;
 	struct ieee80211_mgmt *mgmt = (void *)skb->data;
 	struct ieee80211_bss *bss;
-	u8 *elements;
 	struct ieee80211_channel *channel;
-	size_t baselen;
-	struct ieee802_11_elems elems;
 
 	if (skb->len < 24 ||
 	    (!ieee80211_is_probe_resp(mgmt->frame_control) &&
@@ -244,26 +274,15 @@
 		    !ieee80211_scan_accept_presp(sdata2, sched_scan_req_flags,
 						 mgmt->da))
 			return;
-
-		elements = mgmt->u.probe_resp.variable;
-		baselen = offsetof(struct ieee80211_mgmt, u.probe_resp.variable);
-	} else {
-		baselen = offsetof(struct ieee80211_mgmt, u.beacon.variable);
-		elements = mgmt->u.beacon.variable;
 	}
 
-	if (baselen > skb->len)
-		return;
-
-	ieee802_11_parse_elems(elements, skb->len - baselen, false, &elems);
-
 	channel = ieee80211_get_channel(local->hw.wiphy, rx_status->freq);
 
 	if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
 		return;
 
 	bss = ieee80211_bss_info_update(local, rx_status,
-					mgmt, skb->len, &elems,
+					mgmt, skb->len,
 					channel);
 	if (bss)
 		ieee80211_rx_bss_put(local, bss);
@@ -356,7 +375,7 @@
 static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
-	bool hw_scan = local->ops->hw_scan;
+	bool hw_scan = test_bit(SCAN_HW_SCANNING, &local->scanning);
 	bool was_scanning = local->scanning;
 	struct cfg80211_scan_request *scan_req;
 	struct ieee80211_sub_if_data *scan_sdata;
@@ -501,10 +520,33 @@
 	return 0;
 }
 
+static bool __ieee80211_can_leave_ch(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_sub_if_data *sdata_iter;
+
+	if (!ieee80211_is_radar_required(local))
+		return true;
+
+	if (!regulatory_pre_cac_allowed(local->hw.wiphy))
+		return false;
+
+	mutex_lock(&local->iflist_mtx);
+	list_for_each_entry(sdata_iter, &local->interfaces, list) {
+		if (sdata_iter->wdev.cac_started) {
+			mutex_unlock(&local->iflist_mtx);
+			return false;
+		}
+	}
+	mutex_unlock(&local->iflist_mtx);
+
+	return true;
+}
+
 static bool ieee80211_can_scan(struct ieee80211_local *local,
 			       struct ieee80211_sub_if_data *sdata)
 {
-	if (ieee80211_is_radar_required(local))
+	if (!__ieee80211_can_leave_ch(sdata))
 		return false;
 
 	if (!list_empty(&local->roc_list))
@@ -606,11 +648,15 @@
 				  struct cfg80211_scan_request *req)
 {
 	struct ieee80211_local *local = sdata->local;
+	bool hw_scan = local->ops->hw_scan;
 	int rc;
 
 	lockdep_assert_held(&local->mtx);
 
-	if (local->scan_req || ieee80211_is_radar_required(local))
+	if (local->scan_req)
+		return -EBUSY;
+
+	if (!__ieee80211_can_leave_ch(sdata))
 		return -EBUSY;
 
 	if (!ieee80211_can_scan(local, sdata)) {
@@ -620,7 +666,8 @@
 		return 0;
 	}
 
-	if (local->ops->hw_scan) {
+ again:
+	if (hw_scan) {
 		u8 *ies;
 
 		local->hw_scan_ies_bufsize = local->scan_ies_len + req->ie_len;
@@ -679,7 +726,7 @@
 	else
 		memcpy(local->scan_addr, sdata->vif.addr, ETH_ALEN);
 
-	if (local->ops->hw_scan) {
+	if (hw_scan) {
 		__set_bit(SCAN_HW_SCANNING, &local->scanning);
 	} else if ((req->n_channels == 1) &&
 		   (req->channels[0] == local->_oper_chandef.chan)) {
@@ -722,7 +769,7 @@
 
 	ieee80211_recalc_idle(local);
 
-	if (local->ops->hw_scan) {
+	if (hw_scan) {
 		WARN_ON(!ieee80211_prep_hw_scan(local));
 		rc = drv_hw_scan(local, sdata, local->hw_scan_req);
 	} else {
@@ -740,6 +787,18 @@
 		RCU_INIT_POINTER(local->scan_sdata, NULL);
 	}
 
+	if (hw_scan && rc == 1) {
+		/*
+		 * we can't fall back to software for P2P-GO
+		 * as it must update NoA etc.
+		 */
+		if (ieee80211_vif_type_p2p(&sdata->vif) ==
+				NL80211_IFTYPE_P2P_GO)
+			return -EOPNOTSUPP;
+		hw_scan = false;
+		goto again;
+	}
+
 	return rc;
 }
 

diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index 0293348..5fe2b64 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * spectrum management
  *
@@ -9,10 +10,6 @@
  * Copyright 2007-2008, Intel Corporation
  * Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
  * Copyright (C) 2018        Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/ieee80211.h>
@@ -144,6 +141,7 @@
 				wide_bw_chansw_ie->new_center_freq_seg1,
 			/* .basic_mcs_set doesn't matter */
 		};
+		struct ieee80211_ht_operation ht_oper = {};
 
 		/* default, for the case of IEEE80211_VHT_CHANWIDTH_USE_HT,
 		 * to the previously parsed chandef
@@ -151,7 +149,9 @@
 		new_vht_chandef = csa_ie->chandef;
 
 		/* ignore if parsing fails */
-		if (!ieee80211_chandef_vht_oper(&vht_oper, &new_vht_chandef))
+		if (!ieee80211_chandef_vht_oper(&sdata->local->hw,
+						&vht_oper, &ht_oper,
+						&new_vht_chandef))
 			new_vht_chandef.chan = NULL;
 
 		if (sta_flags & IEEE80211_STA_DISABLE_80P80MHZ &&
@@ -174,6 +174,12 @@
 		csa_ie->chandef = new_vht_chandef;
 	}
 
+	if (elems->max_channel_switch_time)
+		csa_ie->max_switch_time =
+			(elems->max_channel_switch_time[0] << 0) |
+			(elems->max_channel_switch_time[1] <<  8) |
+			(elems->max_channel_switch_time[2] << 16);
+
 	return 0;
 }
 

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index f342022..8d3a238 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2002-2005, Instant802 Networks, Inc.
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Copyright (C) 2018-2019 Intel Corporation
  */
 
 #include <linux/module.h>
@@ -90,7 +87,6 @@
 	struct tid_ampdu_tx *tid_tx;
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	struct ieee80211_local *local = sdata->local;
-	struct fq *fq = &local->fq;
 	struct ps_data *ps;
 
 	if (test_sta_flag(sta, WLAN_STA_PS_STA) ||
@@ -113,11 +109,14 @@
 
 	if (sta->sta.txq[0]) {
 		for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
-			struct txq_info *txqi = to_txq_info(sta->sta.txq[i]);
+			struct txq_info *txqi;
 
-			spin_lock_bh(&fq->lock);
+			if (!sta->sta.txq[i])
+				continue;
+
+			txqi = to_txq_info(sta->sta.txq[i]);
+
 			ieee80211_txq_purge(local, txqi);
-			spin_unlock_bh(&fq->lock);
 		}
 	}
 
@@ -345,6 +344,15 @@
 	sta->sta.max_rx_aggregation_subframes =
 		local->hw.max_rx_aggregation_subframes;
 
+	/* Extended Key ID needs to install keys for keyid 0 and 1 Rx-only.
+	 * The Tx path starts to use a key as soon as the key slot ptk_idx
+	 * references to is not NULL. To not use the initial Rx-only key
+	 * prematurely for Tx initialize ptk_idx to an impossible PTK keyid
+	 * which always will refer to a NULL key.
+	 */
+	BUILD_BUG_ON(ARRAY_SIZE(sta->ptk) <= INVALID_PTK_KEYIDX);
+	sta->ptk_idx = INVALID_PTK_KEYIDX;
+
 	sta->local = local;
 	sta->sdata = sdata;
 	sta->rx_stats.last_rx = jiffies;
@@ -374,6 +382,7 @@
 		for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
 			struct txq_info *txq = txq_data + i * size;
 
+			/* might not do anything for the bufferable MMPDU TXQ */
 			ieee80211_txq_init(sdata, sta, txq, i);
 		}
 	}
@@ -381,14 +390,58 @@
 	if (sta_prepare_rate_control(local, sta, gfp))
 		goto free_txq;
 
+	sta->airtime_weight = IEEE80211_DEFAULT_AIRTIME_WEIGHT;
+
 	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
 		skb_queue_head_init(&sta->ps_tx_buf[i]);
 		skb_queue_head_init(&sta->tx_filtered[i]);
+		sta->airtime[i].deficit = sta->airtime_weight;
 	}
 
 	for (i = 0; i < IEEE80211_NUM_TIDS; i++)
 		sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX);
 
+	for (i = 0; i < NUM_NL80211_BANDS; i++) {
+		u32 mandatory = 0;
+		int r;
+
+		if (!hw->wiphy->bands[i])
+			continue;
+
+		switch (i) {
+		case NL80211_BAND_2GHZ:
+			/*
+			 * We use both here, even if we cannot really know for
+			 * sure the station will support both, but the only use
+			 * for this is when we don't know anything yet and send
+			 * management frames, and then we'll pick the lowest
+			 * possible rate anyway.
+			 * If we don't include _G here, we cannot find a rate
+			 * in P2P, and thus trigger the WARN_ONCE() in rate.c
+			 */
+			mandatory = IEEE80211_RATE_MANDATORY_B |
+				    IEEE80211_RATE_MANDATORY_G;
+			break;
+		case NL80211_BAND_5GHZ:
+			mandatory = IEEE80211_RATE_MANDATORY_A;
+			break;
+		case NL80211_BAND_60GHZ:
+			WARN_ON(1);
+			mandatory = 0;
+			break;
+		}
+
+		for (r = 0; r < hw->wiphy->bands[i]->n_bitrates; r++) {
+			struct ieee80211_rate *rate;
+
+			rate = &hw->wiphy->bands[i]->bitrates[r];
+
+			if (!(rate->flags & mandatory))
+				continue;
+			sta->sta.supp_rates[i] |= BIT(r);
+		}
+	}
+
 	sta->sta.smps_mode = IEEE80211_SMPS_OFF;
 	if (sdata->vif.type == NL80211_IFTYPE_AP ||
 	    sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
@@ -1012,7 +1065,6 @@
 	cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL);
 	kfree(sinfo);
 
-	rate_control_remove_sta_debugfs(sta);
 	ieee80211_sta_debugfs_remove(sta);
 
 	cleanup_single_sta(sta);
@@ -1239,13 +1291,11 @@
 	if (!ieee80211_hw_check(&local->hw, AP_LINK_PS))
 		drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta);
 
-	if (sta->sta.txq[0]) {
-		for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
-			if (!txq_has_queue(sta->sta.txq[i]))
-				continue;
+	for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
+		if (!sta->sta.txq[i] || !txq_has_queue(sta->sta.txq[i]))
+			continue;
 
-			drv_wake_tx_queue(local, to_txq_info(sta->sta.txq[i]));
-		}
+		schedule_and_wake_txq(local, to_txq_info(sta->sta.txq[i]));
 	}
 
 	skb_queue_head_init(&pending);
@@ -1683,7 +1733,8 @@
 			return;
 
 		for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
-			if (!(driver_release_tids & BIT(tid)) ||
+			if (!sta->sta.txq[tid] ||
+			    !(driver_release_tids & BIT(tid)) ||
 			    txq_has_queue(sta->sta.txq[tid]))
 				continue;
 
@@ -1821,6 +1872,27 @@
 }
 EXPORT_SYMBOL(ieee80211_sta_set_buffered);
 
+void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
+				    u32 tx_airtime, u32 rx_airtime)
+{
+	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
+	struct ieee80211_local *local = sta->sdata->local;
+	u8 ac = ieee80211_ac_from_tid(tid);
+	u32 airtime = 0;
+
+	if (sta->local->airtime_flags & AIRTIME_USE_TX)
+		airtime += tx_airtime;
+	if (sta->local->airtime_flags & AIRTIME_USE_RX)
+		airtime += rx_airtime;
+
+	spin_lock_bh(&local->active_txq_lock[ac]);
+	sta->airtime[ac].tx_airtime += tx_airtime;
+	sta->airtime[ac].rx_airtime += rx_airtime;
+	sta->airtime[ac].deficit -= airtime;
+	spin_unlock_bh(&local->active_txq_lock[ac]);
+}
+EXPORT_SYMBOL(ieee80211_sta_register_airtime);
+
 int sta_info_move_state(struct sta_info *sta,
 			enum ieee80211_sta_state new_state)
 {
@@ -1889,6 +1961,7 @@
 	case IEEE80211_STA_ASSOC:
 		if (sta->sta_state == IEEE80211_STA_AUTH) {
 			set_bit(WLAN_STA_ASSOC, &sta->_flags);
+			sta->assoc_at = ktime_get_boottime_ns();
 			ieee80211_recalc_min_chandef(sta->sdata);
 			if (!sta->sta.support_p2p_ps)
 				ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
@@ -1906,6 +1979,10 @@
 			ieee80211_check_fast_xmit(sta);
 			ieee80211_check_fast_rx(sta);
 		}
+		if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
+		    sta->sdata->vif.type == NL80211_IFTYPE_AP)
+			cfg80211_send_layer2_update(sta->sdata->dev,
+						    sta->sta.addr);
 		break;
 	default:
 		break;
@@ -2118,6 +2195,7 @@
 			 BIT_ULL(NL80211_STA_INFO_STA_FLAGS) |
 			 BIT_ULL(NL80211_STA_INFO_BSS_PARAM) |
 			 BIT_ULL(NL80211_STA_INFO_CONNECTED_TIME) |
+			 BIT_ULL(NL80211_STA_INFO_ASSOC_AT_BOOTTIME) |
 			 BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC);
 
 	if (sdata->vif.type == NL80211_IFTYPE_STATION) {
@@ -2126,6 +2204,7 @@
 	}
 
 	sinfo->connected_time = ktime_get_seconds() - sta->last_connected;
+	sinfo->assoc_at = sta->assoc_at;
 	sinfo->inactive_time =
 		jiffies_to_msecs(jiffies - ieee80211_sta_last_active(sta));
 
@@ -2183,6 +2262,23 @@
 		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED);
 	}
 
+	if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_DURATION))) {
+		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+			sinfo->rx_duration += sta->airtime[ac].rx_airtime;
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_DURATION);
+	}
+
+	if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_DURATION))) {
+		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+			sinfo->tx_duration += sta->airtime[ac].tx_airtime;
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_DURATION);
+	}
+
+	if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT))) {
+		sinfo->airtime_weight = sta->airtime_weight;
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT);
+	}
+
 	sinfo->rx_dropped_misc = sta->rx_stats.dropped;
 	if (sta->pcpu_rx_stats) {
 		for_each_possible_cpu(cpu) {
@@ -2248,11 +2344,8 @@
 	}
 
 	if (tidstats && !cfg80211_sinfo_alloc_tid_stats(sinfo, GFP_KERNEL)) {
-		for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) {
-			struct cfg80211_tid_stats *tidstats = &sinfo->pertid[i];
-
-			sta_set_tidstats(sta, tidstats, i);
-		}
+		for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++)
+			sta_set_tidstats(sta, &sinfo->pertid[i], i);
 	}
 
 	if (ieee80211_vif_is_mesh(&sdata->vif)) {
@@ -2262,7 +2355,8 @@
 				 BIT_ULL(NL80211_STA_INFO_PLINK_STATE) |
 				 BIT_ULL(NL80211_STA_INFO_LOCAL_PM) |
 				 BIT_ULL(NL80211_STA_INFO_PEER_PM) |
-				 BIT_ULL(NL80211_STA_INFO_NONPEER_PM);
+				 BIT_ULL(NL80211_STA_INFO_NONPEER_PM) |
+				 BIT_ULL(NL80211_STA_INFO_CONNECTED_TO_GATE);
 
 		sinfo->llid = sta->mesh->llid;
 		sinfo->plid = sta->mesh->plid;
@@ -2274,6 +2368,7 @@
 		sinfo->local_pm = sta->mesh->local_pm;
 		sinfo->peer_pm = sta->mesh->peer_pm;
 		sinfo->nonpeer_pm = sta->mesh->nonpeer_pm;
+		sinfo->connected_to_gate = sta->mesh->connected_to_gate;
 #endif
 	}
 
@@ -2323,13 +2418,19 @@
 		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL);
 	}
 
-	if (ieee80211_hw_check(&sta->local->hw, REPORTS_TX_ACK_STATUS) &&
-	    !(sinfo->filled & BIT_ULL(NL80211_STA_INFO_DATA_ACK_SIGNAL_AVG))) {
+	if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG)) &&
+	    sta->status_stats.ack_signal_filled) {
 		sinfo->avg_ack_signal =
 			-(s8)ewma_avg_signal_read(
 				&sta->status_stats.avg_ack_signal);
 		sinfo->filled |=
-			BIT_ULL(NL80211_STA_INFO_DATA_ACK_SIGNAL_AVG);
+			BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG);
+	}
+
+	if (ieee80211_vif_is_mesh(&sdata->vif)) {
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_LINK_METRIC);
+		sinfo->airtime_link_metric =
+			airtime_link_metric_get(local, sta);
 	}
 }
 
@@ -2356,7 +2457,8 @@
 {
 	struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta);
 
-	if (time_after(stats->last_rx, sta->status_stats.last_ack))
+	if (!sta->status_stats.last_ack ||
+	    time_after(stats->last_rx, sta->status_stats.last_ack))
 		return stats->last_rx;
 	return sta->status_stats.last_ack;
 }

diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 9a04327..369c2dd 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h

@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright 2002-2005, Devicescape Software, Inc.
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright(c) 2015-2017 Intel Deutschland GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef STA_INFO_H
@@ -127,6 +124,16 @@
 	AGG_STOP_DESTROY_STA,
 };
 
+/* Debugfs flags to enable/disable use of RX/TX airtime in scheduler */
+#define AIRTIME_USE_TX		BIT(0)
+#define AIRTIME_USE_RX		BIT(1)
+
+struct airtime_info {
+	u64 rx_airtime;
+	u64 tx_airtime;
+	s64 deficit;
+};
+
 struct sta_info;
 
 /**
@@ -343,6 +350,7 @@
 
 /* we use only values in the range 0-100, so pick a large precision */
 DECLARE_EWMA(mesh_fail_avg, 20, 8)
+DECLARE_EWMA(mesh_tx_rate_avg, 8, 16)
 
 /**
  * struct mesh_sta - mesh STA information
@@ -364,7 +372,9 @@
  * @nonpeer_pm: STA power save mode towards non-peer neighbors
  * @processed_beacon: set to true after peer rates and capabilities are
  *	processed
+ * @connected_to_gate: true if mesh STA has a path to a mesh gate
  * @fail_avg: moving percentage of failed MSDUs
+ * @tx_rate_avg: moving average of tx bitrate
  */
 struct mesh_sta {
 	struct timer_list plink_timer;
@@ -381,6 +391,7 @@
 	u8 plink_retries;
 
 	bool processed_beacon;
+	bool connected_to_gate;
 
 	enum nl80211_plink_state plink_state;
 	u32 plink_timeout;
@@ -392,6 +403,8 @@
 
 	/* moving percentage of failed MSDUs */
 	struct ewma_mesh_fail_avg fail_avg;
+	/* moving average of tx bitrate */
+	struct ewma_mesh_tx_rate_avg tx_rate_avg;
 };
 
 DECLARE_EWMA(signal, 10, 8)
@@ -453,10 +466,14 @@
  *	the station when it leaves powersave or polls for frames
  * @driver_buffered_tids: bitmap of TIDs the driver has data buffered on
  * @txq_buffered_tids: bitmap of TIDs that mac80211 has txq data buffered on
+ * @assoc_at: clock boottime (in ns) of last association
  * @last_connected: time (in seconds) when a station got connected
  * @last_seq_ctrl: last received seq/frag number from this STA (per TID
  *	plus one for non-QoS frames)
  * @tid_seq: per-TID sequence numbers for sending to this STA
+ * @airtime: per-AC struct airtime_info describing airtime statistics for this
+ *	station
+ * @airtime_weight: station weight for airtime fairness calculation purposes
  * @ampdu_mlme: A-MPDU state machine state
  * @mesh: mesh STA information
  * @debugfs_dir: debug filesystem directory dentry
@@ -478,10 +495,28 @@
  * @tdls_chandef: a TDLS peer can have a wider chandef that is compatible to
  *	the BSS one.
  * @tx_stats: TX statistics
+ * @tx_stats.packets: # of packets transmitted
+ * @tx_stats.bytes: # of bytes in all packets transmitted
+ * @tx_stats.last_rate: last TX rate
+ * @tx_stats.msdu: # of transmitted MSDUs per TID
  * @rx_stats: RX statistics
+ * @rx_stats_avg: averaged RX statistics
+ * @rx_stats_avg.signal: averaged signal
+ * @rx_stats_avg.chain_signal: averaged per-chain signal
  * @pcpu_rx_stats: per-CPU RX statistics, assigned only if the driver needs
  *	this (by advertising the USES_RSS hw flag)
  * @status_stats: TX status statistics
+ * @status_stats.filtered: # of filtered frames
+ * @status_stats.retry_failed: # of frames that failed after retry
+ * @status_stats.retry_count: # of retries attempted
+ * @status_stats.lost_packets: # of lost packets
+ * @status_stats.last_tdls_pkt_time: timestamp of last TDLS packet
+ * @status_stats.msdu_retries: # of MSDU retries
+ * @status_stats.msdu_failed: # of failed MSDUs
+ * @status_stats.last_ack: last ack timestamp (jiffies)
+ * @status_stats.last_ack_signal: last ACK signal
+ * @status_stats.ack_signal_filled: last ACK signal validity
+ * @status_stats.avg_ack_signal: average ACK signal
  */
 struct sta_info {
 	/* General information, mostly static */
@@ -528,6 +563,7 @@
 	unsigned long driver_buffered_tids;
 	unsigned long txq_buffered_tids;
 
+	u64 assoc_at;
 	long last_connected;
 
 	/* Updated from RX path only, no locking requirements */
@@ -563,6 +599,9 @@
 	} tx_stats;
 	u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
 
+	struct airtime_info airtime[IEEE80211_NUM_ACS];
+	u16 airtime_weight;
+
 	/*
 	 * Aggregation information, locked with lock.
 	 */

diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 7fa10d0..ab8ba58 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2002-2005, Instant802 Networks, Inc.
  * Copyright 2005-2006, Devicescape Software, Inc.
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2008-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/export.h>
@@ -257,14 +254,22 @@
 	tid_tx->bar_pending = true;
 }
 
-static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info)
+static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info,
+				     struct ieee80211_tx_status *status)
 {
 	int len = sizeof(struct ieee80211_radiotap_header);
 
 	/* IEEE80211_RADIOTAP_RATE rate */
-	if (info->status.rates[0].idx >= 0 &&
-	    !(info->status.rates[0].flags & (IEEE80211_TX_RC_MCS |
-					     IEEE80211_TX_RC_VHT_MCS)))
+	if (status && status->rate && !(status->rate->flags &
+					(RATE_INFO_FLAGS_MCS |
+					 RATE_INFO_FLAGS_DMG |
+					 RATE_INFO_FLAGS_EDMG |
+					 RATE_INFO_FLAGS_VHT_MCS |
+					 RATE_INFO_FLAGS_HE_MCS)))
+		len += 2;
+	else if (info->status.rates[0].idx >= 0 &&
+		 !(info->status.rates[0].flags &
+		   (IEEE80211_TX_RC_MCS | IEEE80211_TX_RC_VHT_MCS)))
 		len += 2;
 
 	/* IEEE80211_RADIOTAP_TX_FLAGS */
@@ -275,7 +280,14 @@
 
 	/* IEEE80211_RADIOTAP_MCS
 	 * IEEE80211_RADIOTAP_VHT */
-	if (info->status.rates[0].idx >= 0) {
+	if (status && status->rate) {
+		if (status->rate->flags & RATE_INFO_FLAGS_MCS)
+			len += 3;
+		else if (status->rate->flags & RATE_INFO_FLAGS_VHT_MCS)
+			len = ALIGN(len, 2) + 12;
+		else if (status->rate->flags & RATE_INFO_FLAGS_HE_MCS)
+			len = ALIGN(len, 2) + 12;
+	} else if (info->status.rates[0].idx >= 0) {
 		if (info->status.rates[0].flags & IEEE80211_TX_RC_MCS)
 			len += 3;
 		else if (info->status.rates[0].flags & IEEE80211_TX_RC_VHT_MCS)
@@ -289,12 +301,14 @@
 ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
 				 struct ieee80211_supported_band *sband,
 				 struct sk_buff *skb, int retry_count,
-				 int rtap_len, int shift)
+				 int rtap_len, int shift,
+				 struct ieee80211_tx_status *status)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct ieee80211_radiotap_header *rthdr;
 	unsigned char *pos;
+	u16 legacy_rate = 0;
 	u16 txflags;
 
 	rthdr = skb_push(skb, rtap_len);
@@ -313,14 +327,23 @@
 	 */
 
 	/* IEEE80211_RADIOTAP_RATE */
-	if (info->status.rates[0].idx >= 0 &&
-	    !(info->status.rates[0].flags & (IEEE80211_TX_RC_MCS |
-					     IEEE80211_TX_RC_VHT_MCS))) {
-		u16 rate;
 
+	if (status && status->rate) {
+		if (!(status->rate->flags & (RATE_INFO_FLAGS_MCS |
+					     RATE_INFO_FLAGS_DMG |
+					     RATE_INFO_FLAGS_EDMG |
+					     RATE_INFO_FLAGS_VHT_MCS |
+					     RATE_INFO_FLAGS_HE_MCS)))
+			legacy_rate = status->rate->legacy;
+	} else if (info->status.rates[0].idx >= 0 &&
+		 !(info->status.rates[0].flags & (IEEE80211_TX_RC_MCS |
+						  IEEE80211_TX_RC_VHT_MCS)))
+		legacy_rate =
+			sband->bitrates[info->status.rates[0].idx].bitrate;
+
+	if (legacy_rate) {
 		rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE);
-		rate = sband->bitrates[info->status.rates[0].idx].bitrate;
-		*pos = DIV_ROUND_UP(rate, 5 * (1 << shift));
+		*pos = DIV_ROUND_UP(legacy_rate, 5 * (1 << shift));
 		/* padding for tx flags */
 		pos += 2;
 	}
@@ -344,7 +367,140 @@
 	*pos = retry_count;
 	pos++;
 
-	if (info->status.rates[0].idx < 0)
+	if (status && status->rate &&
+	    (status->rate->flags & RATE_INFO_FLAGS_MCS)) {
+		rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS);
+		pos[0] = IEEE80211_RADIOTAP_MCS_HAVE_MCS |
+			 IEEE80211_RADIOTAP_MCS_HAVE_GI |
+			 IEEE80211_RADIOTAP_MCS_HAVE_BW;
+		if (status->rate->flags & RATE_INFO_FLAGS_SHORT_GI)
+			pos[1] |= IEEE80211_RADIOTAP_MCS_SGI;
+		if (status->rate->bw == RATE_INFO_BW_40)
+			pos[1] |= IEEE80211_RADIOTAP_MCS_BW_40;
+		pos[2] = status->rate->mcs;
+		pos += 3;
+	} else if (status && status->rate &&
+		   (status->rate->flags & RATE_INFO_FLAGS_VHT_MCS)) {
+		u16 known = local->hw.radiotap_vht_details &
+			(IEEE80211_RADIOTAP_VHT_KNOWN_GI |
+			 IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH);
+
+		rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT);
+
+		/* required alignment from rthdr */
+		pos = (u8 *)rthdr + ALIGN(pos - (u8 *)rthdr, 2);
+
+		/* u16 known - IEEE80211_RADIOTAP_VHT_KNOWN_* */
+		put_unaligned_le16(known, pos);
+		pos += 2;
+
+		/* u8 flags - IEEE80211_RADIOTAP_VHT_FLAG_* */
+		if (status->rate->flags & RATE_INFO_FLAGS_SHORT_GI)
+			*pos |= IEEE80211_RADIOTAP_VHT_FLAG_SGI;
+		pos++;
+
+		/* u8 bandwidth */
+		switch (status->rate->bw) {
+		case RATE_INFO_BW_160:
+			*pos = 11;
+			break;
+		case RATE_INFO_BW_80:
+			*pos = 4;
+			break;
+		case RATE_INFO_BW_40:
+			*pos = 1;
+			break;
+		default:
+			*pos = 0;
+			break;
+		}
+		pos++;
+
+		/* u8 mcs_nss[4] */
+		*pos = (status->rate->mcs << 4) | status->rate->nss;
+		pos += 4;
+
+		/* u8 coding */
+		pos++;
+		/* u8 group_id */
+		pos++;
+		/* u16 partial_aid */
+		pos += 2;
+	} else if (status && status->rate &&
+		   (status->rate->flags & RATE_INFO_FLAGS_HE_MCS)) {
+		struct ieee80211_radiotap_he *he;
+
+		rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE);
+
+		/* required alignment from rthdr */
+		pos = (u8 *)rthdr + ALIGN(pos - (u8 *)rthdr, 2);
+		he = (struct ieee80211_radiotap_he *)pos;
+
+		he->data1 = cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA1_FORMAT_SU |
+					IEEE80211_RADIOTAP_HE_DATA1_DATA_MCS_KNOWN |
+					IEEE80211_RADIOTAP_HE_DATA1_DATA_DCM_KNOWN |
+					IEEE80211_RADIOTAP_HE_DATA1_BW_RU_ALLOC_KNOWN);
+
+		he->data2 = cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA2_GI_KNOWN);
+
+#define HE_PREP(f, val) le16_encode_bits(val, IEEE80211_RADIOTAP_HE_##f)
+
+		he->data6 |= HE_PREP(DATA6_NSTS, status->rate->nss);
+
+#define CHECK_GI(s) \
+	BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA5_GI_##s != \
+	(int)NL80211_RATE_INFO_HE_GI_##s)
+
+		CHECK_GI(0_8);
+		CHECK_GI(1_6);
+		CHECK_GI(3_2);
+
+		he->data3 |= HE_PREP(DATA3_DATA_MCS, status->rate->mcs);
+		he->data3 |= HE_PREP(DATA3_DATA_DCM, status->rate->he_dcm);
+
+		he->data5 |= HE_PREP(DATA5_GI, status->rate->he_gi);
+
+		switch (status->rate->bw) {
+		case RATE_INFO_BW_20:
+			he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+					     IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_20MHZ);
+			break;
+		case RATE_INFO_BW_40:
+			he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+					     IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_40MHZ);
+			break;
+		case RATE_INFO_BW_80:
+			he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+					     IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_80MHZ);
+			break;
+		case RATE_INFO_BW_160:
+			he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+					     IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_160MHZ);
+			break;
+		case RATE_INFO_BW_HE_RU:
+#define CHECK_RU_ALLOC(s) \
+	BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_##s##T != \
+	NL80211_RATE_INFO_HE_RU_ALLOC_##s + 4)
+
+			CHECK_RU_ALLOC(26);
+			CHECK_RU_ALLOC(52);
+			CHECK_RU_ALLOC(106);
+			CHECK_RU_ALLOC(242);
+			CHECK_RU_ALLOC(484);
+			CHECK_RU_ALLOC(996);
+			CHECK_RU_ALLOC(2x996);
+
+			he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+					     status->rate->he_ru_alloc + 4);
+			break;
+		default:
+			WARN_ONCE(1, "Invalid SU BW %d\n", status->rate->bw);
+		}
+
+		pos += sizeof(struct ieee80211_radiotap_he);
+	}
+
+	if ((status && status->rate) || info->status.rates[0].idx < 0)
 		return;
 
 	/* IEEE80211_RADIOTAP_MCS
@@ -556,6 +712,11 @@
 	}
 
 	ieee80211_led_tx(local);
+
+	if (skb_has_frag_list(skb)) {
+		kfree_skb_list(skb_shinfo(skb)->frag_list);
+		skb_shinfo(skb)->frag_list = NULL;
+	}
 }
 
 /*
@@ -643,7 +804,8 @@
 
 void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
 			  struct ieee80211_supported_band *sband,
-			  int retry_count, int shift, bool send_to_cooked)
+			  int retry_count, int shift, bool send_to_cooked,
+			  struct ieee80211_tx_status *status)
 {
 	struct sk_buff *skb2;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
@@ -652,14 +814,14 @@
 	int rtap_len;
 
 	/* send frame to monitor interfaces now */
-	rtap_len = ieee80211_tx_radiotap_len(info);
+	rtap_len = ieee80211_tx_radiotap_len(info, status);
 	if (WARN_ON_ONCE(skb_headroom(skb) < rtap_len)) {
 		pr_err("ieee80211_tx_status: headroom too small\n");
 		dev_kfree_skb(skb);
 		return;
 	}
 	ieee80211_add_tx_radiotap_header(local, sband, skb, retry_count,
-					 rtap_len, shift);
+					 rtap_len, shift, status);
 
 	/* XXX: is this sufficient for BPF? */
 	skb_reset_mac_header(skb);
@@ -818,6 +980,12 @@
 			ieee80211_sta_tx_notify(sta->sdata, (void *) skb->data,
 						acked, info->status.tx_time);
 
+		if (info->status.tx_time &&
+		    wiphy_ext_feature_isset(local->hw.wiphy,
+					    NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
+			ieee80211_sta_register_airtime(&sta->sta, tid,
+						       info->status.tx_time, 0);
+
 		if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
 			if (info->flags & IEEE80211_TX_STAT_ACK) {
 				if (sta->status_stats.lost_packets)
@@ -893,7 +1061,8 @@
 	}
 
 	/* send to monitor interfaces */
-	ieee80211_tx_monitor(local, skb, sband, retry_count, shift, send_to_cooked);
+	ieee80211_tx_monitor(local, skb, sband, retry_count, shift,
+			     send_to_cooked, status);
 }
 
 void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
@@ -989,6 +1158,25 @@
 }
 EXPORT_SYMBOL(ieee80211_tx_status_ext);
 
+void ieee80211_tx_rate_update(struct ieee80211_hw *hw,
+			      struct ieee80211_sta *pubsta,
+			      struct ieee80211_tx_info *info)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct ieee80211_supported_band *sband = hw->wiphy->bands[info->band];
+	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
+	struct ieee80211_tx_status status = {
+		.info = info,
+		.sta = pubsta,
+	};
+
+	rate_control_tx_status(local, sband, &status);
+
+	if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL))
+		sta->tx_stats.last_rate = info->status.rates[0];
+}
+EXPORT_SYMBOL(ieee80211_tx_rate_update);
+
 void ieee80211_report_low_ack(struct ieee80211_sta *pubsta, u32 num_packets)
 {
 	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);

diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 6c647f4..fca1f54 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * mac80211 TDLS handling code
  *
@@ -5,8 +6,7 @@
  * Copyright 2014, Intel Corporation
  * Copyright 2014  Intel Mobile Communications GmbH
  * Copyright 2015 - 2016 Intel Deutschland GmbH
- *
- * This file is GPLv2 as found in COPYING.
+ * Copyright (C) 2019 Intel Corporation
  */
 
 #include <linux/ieee80211.h>
@@ -1055,7 +1055,7 @@
 
 	/* disable bottom halves when entering the Tx path */
 	local_bh_disable();
-	__ieee80211_subif_start_xmit(skb, dev, flags);
+	__ieee80211_subif_start_xmit(skb, dev, flags, 0);
 	local_bh_enable();
 
 	return ret;
@@ -1716,7 +1716,8 @@
 	}
 
 	ieee802_11_parse_elems(tf->u.chan_switch_resp.variable,
-			       skb->len - baselen, false, &elems);
+			       skb->len - baselen, false, &elems,
+			       NULL, NULL);
 	if (elems.parse_error) {
 		tdls_dbg(sdata, "Invalid IEs in TDLS channel switch resp\n");
 		ret = -EINVAL;
@@ -1828,7 +1829,7 @@
 	}
 
 	ieee802_11_parse_elems(tf->u.chan_switch_req.variable,
-			       skb->len - baselen, false, &elems);
+			       skb->len - baselen, false, &elems, NULL, NULL);
 	if (elems.parse_error) {
 		tdls_dbg(sdata, "Invalid IEs in TDLS channel switch req\n");
 		return -EINVAL;
@@ -1992,3 +1993,26 @@
 	}
 	rtnl_unlock();
 }
+
+void ieee80211_tdls_handle_disconnect(struct ieee80211_sub_if_data *sdata,
+				      const u8 *peer, u16 reason)
+{
+	struct ieee80211_sta *sta;
+
+	rcu_read_lock();
+	sta = ieee80211_find_sta(&sdata->vif, peer);
+	if (!sta || !sta->tdls) {
+		rcu_read_unlock();
+		return;
+	}
+	rcu_read_unlock();
+
+	tdls_dbg(sdata, "disconnected from TDLS peer %pM (Reason: %u=%s)\n",
+		 peer, reason,
+		 ieee80211_get_reason_code_string(reason));
+
+	ieee80211_tdls_oper_request(&sdata->vif, peer,
+				    NL80211_TDLS_TEARDOWN,
+				    WLAN_REASON_TDLS_TEARDOWN_UNREACHABLE,
+				    GFP_ATOMIC);
+}

diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index b362282..727dc9f 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2002-2004, Instant802 Networks, Inc.
  * Copyright 2005, Devicescape Software, Inc.
  * Copyright (C) 2016 Intel Deutschland GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/kernel.h>
 #include <linux/bitops.h>
@@ -222,7 +219,7 @@
  * @payload_len is the length of payload (_not_ including IV/ICV length).
  * @ta is the transmitter addresses.
  */
-int ieee80211_tkip_encrypt_data(struct crypto_cipher *tfm,
+int ieee80211_tkip_encrypt_data(struct arc4_ctx *ctx,
 				struct ieee80211_key *key,
 				struct sk_buff *skb,
 				u8 *payload, size_t payload_len)
@@ -231,7 +228,7 @@
 
 	ieee80211_get_tkip_p2k(&key->conf, skb, rc4key);
 
-	return ieee80211_wep_encrypt_data(tfm, rc4key, 16,
+	return ieee80211_wep_encrypt_data(ctx, rc4key, 16,
 					  payload, payload_len);
 }
 
@@ -239,7 +236,7 @@
  * beginning of the buffer containing IEEE 802.11 header payload, i.e.,
  * including IV, Ext. IV, real data, Michael MIC, ICV. @payload_len is the
  * length of payload, including IV, Ext. IV, MIC, ICV.  */
-int ieee80211_tkip_decrypt_data(struct crypto_cipher *tfm,
+int ieee80211_tkip_decrypt_data(struct arc4_ctx *ctx,
 				struct ieee80211_key *key,
 				u8 *payload, size_t payload_len, u8 *ta,
 				u8 *ra, int only_iv, int queue,
@@ -297,7 +294,7 @@
 
 	tkip_mixing_phase2(tk, &rx_ctx->ctx, iv16, rc4key);
 
-	res = ieee80211_wep_decrypt_data(tfm, rc4key, 16, pos, payload_len - 12);
+	res = ieee80211_wep_decrypt_data(ctx, rc4key, 16, pos, payload_len - 12);
  done:
 	if (res == TKIP_DECRYPT_OK) {
 		/*

diff --git a/net/mac80211/tkip.h b/net/mac80211/tkip.h
index a1bcbfb..9d2f8bd 100644
--- a/net/mac80211/tkip.h
+++ b/net/mac80211/tkip.h

@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright 2002-2004, Instant802 Networks, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef TKIP_H
@@ -13,7 +10,7 @@
 #include <linux/crypto.h>
 #include "key.h"
 
-int ieee80211_tkip_encrypt_data(struct crypto_cipher *tfm,
+int ieee80211_tkip_encrypt_data(struct arc4_ctx *ctx,
 				struct ieee80211_key *key,
 				struct sk_buff *skb,
 				u8 *payload, size_t payload_len);
@@ -24,7 +21,7 @@
 	TKIP_DECRYPT_INVALID_KEYIDX = -2,
 	TKIP_DECRYPT_REPLAY = -3,
 };
-int ieee80211_tkip_decrypt_data(struct crypto_cipher *tfm,
+int ieee80211_tkip_decrypt_data(struct arc4_ctx *ctx,
 				struct ieee80211_key *key,
 				u8 *payload, size_t payload_len, u8 *ta,
 				u8 *ra, int only_iv, int queue,

diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 0ab69a1..4768322 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h

@@ -1,8 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
 * Portions of this file
-* Copyright(c) 2016 Intel Deutschland GmbH
-* Copyright (C) 2018 Intel Corporation
+* Copyright(c) 2016-2017 Intel Deutschland GmbH
+* Copyright (C) 2018 - 2019 Intel Corporation
 */
 
 #if !defined(__MAC80211_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ)
@@ -828,6 +828,36 @@
 	)
 );
 
+TRACE_EVENT(drv_sta_set_txpwr,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 struct ieee80211_sta *sta),
+
+	TP_ARGS(local, sdata, sta),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		VIF_ENTRY
+		STA_ENTRY
+		__field(s16, txpwr)
+		__field(u8, type)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		VIF_ASSIGN;
+		STA_ASSIGN;
+		__entry->txpwr = sta->txpwr.power;
+		__entry->type = sta->txpwr.type;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT  VIF_PR_FMT  STA_PR_FMT " txpwr: %d type %d",
+		LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG,
+		__entry->txpwr,  __entry->type
+	)
+);
+
 TRACE_EVENT(drv_sta_rc_update,
 	TP_PROTO(struct ieee80211_local *local,
 		 struct ieee80211_sub_if_data *sdata,
@@ -1052,10 +1082,10 @@
 );
 
 TRACE_EVENT(drv_get_survey,
-	TP_PROTO(struct ieee80211_local *local, int idx,
+	TP_PROTO(struct ieee80211_local *local, int _idx,
 		 struct survey_info *survey),
 
-	TP_ARGS(local, idx, survey),
+	TP_ARGS(local, _idx, survey),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
@@ -1064,7 +1094,7 @@
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
-		__entry->idx = idx;
+		__entry->idx = _idx;
 	),
 
 	TP_printk(
@@ -1212,9 +1242,10 @@
 	)
 );
 
-DEFINE_EVENT(local_only_evt, drv_cancel_remain_on_channel,
-	TP_PROTO(struct ieee80211_local *local),
-	TP_ARGS(local)
+DEFINE_EVENT(local_sdata_evt, drv_cancel_remain_on_channel,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata),
+	TP_ARGS(local, sdata)
 );
 
 TRACE_EVENT(drv_set_ringparam,
@@ -1882,6 +1913,18 @@
 	)
 );
 
+DEFINE_EVENT(local_sdata_evt, drv_start_pmsr,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata),
+	TP_ARGS(local, sdata)
+);
+
+DEFINE_EVENT(local_sdata_evt, drv_abort_pmsr,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata),
+	TP_ARGS(local, sdata)
+);
+
 /*
  * Tracing for API calls that drivers call.
  */
@@ -2440,6 +2483,48 @@
 	     TP_ARGS(local, sdata)
 );
 
+DEFINE_EVENT(local_sdata_evt, drv_abort_channel_switch,
+	     TP_PROTO(struct ieee80211_local *local,
+		      struct ieee80211_sub_if_data *sdata),
+	     TP_ARGS(local, sdata)
+);
+
+TRACE_EVENT(drv_channel_switch_rx_beacon,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 struct ieee80211_channel_switch *ch_switch),
+
+	TP_ARGS(local, sdata, ch_switch),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		VIF_ENTRY
+		CHANDEF_ENTRY
+		__field(u64, timestamp)
+		__field(u32, device_timestamp)
+		__field(bool, block_tx)
+		__field(u8, count)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		VIF_ASSIGN;
+		CHANDEF_ASSIGN(&ch_switch->chandef)
+		__entry->timestamp = ch_switch->timestamp;
+		__entry->device_timestamp = ch_switch->device_timestamp;
+		__entry->block_tx = ch_switch->block_tx;
+		__entry->count = ch_switch->count;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT VIF_PR_FMT
+		" received a channel switch beacon to "
+		CHANDEF_PR_FMT  " count:%d block_tx:%d timestamp:%llu",
+		LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG, __entry->count,
+		__entry->block_tx, __entry->timestamp
+	)
+);
+
 TRACE_EVENT(drv_get_txpower,
 	TP_PROTO(struct ieee80211_local *local,
 		 struct ieee80211_sub_if_data *sdata,
@@ -2600,6 +2685,29 @@
 	)
 );
 
+TRACE_EVENT(drv_get_ftm_responder_stats,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 struct cfg80211_ftm_responder_stats *ftm_stats),
+
+	TP_ARGS(local, sdata, ftm_stats),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		VIF_ENTRY
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		VIF_ASSIGN;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT VIF_PR_FMT,
+		LOCAL_PR_ARG, VIF_PR_ARG
+	)
+);
+
 #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
 
 #undef TRACE_INCLUDE_PATH

diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h
index 366b9e6..40141df 100644
--- a/net/mac80211/trace_msg.h
+++ b/net/mac80211/trace_msg.h

@@ -1,4 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Portions of this file
+ * Copyright (C) 2019 Intel Corporation
+ */
+
 #ifdef CONFIG_MAC80211_MESSAGE_TRACING
 
 #if !defined(__MAC80211_MSG_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ)
@@ -11,7 +16,7 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM mac80211_msg
 
-#define MAX_MSG_LEN	100
+#define MAX_MSG_LEN	120
 
 DECLARE_EVENT_CLASS(mac80211_msg_event,
 	TP_PROTO(struct va_format *vaf),

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 995a491..1fa4227 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2002-2005, Instant802 Networks, Inc.
  * Copyright 2005-2006, Devicescape Software, Inc.
@@ -6,11 +7,6 @@
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2018 Intel Corporation
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *
  * Transmit and frame generation functions.
  */
 
@@ -166,6 +162,7 @@
 			break;
 		}
 		case NL80211_BAND_5GHZ:
+		case NL80211_BAND_6GHZ:
 			if (r->flags & IEEE80211_RATE_MANDATORY_A)
 				mrate = r->bitrate;
 			break;
@@ -1253,10 +1250,18 @@
 	    (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE))
 		return NULL;
 
-	if (!ieee80211_is_data_present(hdr->frame_control))
-		return NULL;
-
-	if (sta) {
+	if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) {
+		if ((!ieee80211_is_mgmt(hdr->frame_control) ||
+		     ieee80211_is_bufferable_mmpdu(hdr->frame_control) ||
+		     vif->type == NL80211_IFTYPE_STATION) &&
+		    sta && sta->uploaded) {
+			/*
+			 * This will be NULL if the driver didn't set the
+			 * opt-in hardware flag.
+			 */
+			txq = sta->sta.txq[IEEE80211_NUM_TIDS];
+		}
+	} else if (sta) {
 		u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
 
 		if (!sta->uploaded)
@@ -1391,11 +1396,15 @@
 {
 	struct fq *fq = &local->fq;
 	struct fq_tin *tin = &txqi->tin;
+	u32 flow_idx = fq_flow_idx(fq, skb);
 
 	ieee80211_set_skb_enqueue_time(skb);
-	fq_tin_enqueue(fq, tin, skb,
+
+	spin_lock_bh(&fq->lock);
+	fq_tin_enqueue(fq, tin, flow_idx, skb,
 		       fq_skb_free_func,
 		       fq_flow_get_default_func);
+	spin_unlock_bh(&fq->lock);
 }
 
 static bool fq_vlan_filter_func(struct fq *fq, struct fq_tin *tin,
@@ -1441,19 +1450,37 @@
 	codel_vars_init(&txqi->def_cvars);
 	codel_stats_init(&txqi->cstats);
 	__skb_queue_head_init(&txqi->frags);
+	INIT_LIST_HEAD(&txqi->schedule_order);
 
 	txqi->txq.vif = &sdata->vif;
 
-	if (sta) {
-		txqi->txq.sta = &sta->sta;
-		sta->sta.txq[tid] = &txqi->txq;
-		txqi->txq.tid = tid;
-		txqi->txq.ac = ieee80211_ac_from_tid(tid);
-	} else {
+	if (!sta) {
 		sdata->vif.txq = &txqi->txq;
 		txqi->txq.tid = 0;
 		txqi->txq.ac = IEEE80211_AC_BE;
+
+		return;
 	}
+
+	if (tid == IEEE80211_NUM_TIDS) {
+		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
+			/* Drivers need to opt in to the management MPDU TXQ */
+			if (!ieee80211_hw_check(&sdata->local->hw,
+						STA_MMPDU_TXQ))
+				return;
+		} else if (!ieee80211_hw_check(&sdata->local->hw,
+					       BUFF_MMPDU_TXQ)) {
+			/* Drivers need to opt in to the bufferable MMPDU TXQ */
+			return;
+		}
+		txqi->txq.ac = IEEE80211_AC_VO;
+	} else {
+		txqi->txq.ac = ieee80211_ac_from_tid(tid);
+	}
+
+	txqi->txq.sta = &sta->sta;
+	txqi->txq.tid = tid;
+	sta->sta.txq[tid] = &txqi->txq;
 }
 
 void ieee80211_txq_purge(struct ieee80211_local *local,
@@ -1462,8 +1489,14 @@
 	struct fq *fq = &local->fq;
 	struct fq_tin *tin = &txqi->tin;
 
+	spin_lock_bh(&fq->lock);
 	fq_tin_reset(fq, tin, fq_skb_free_func);
 	ieee80211_purge_tx_queue(&local->hw, &txqi->frags);
+	spin_unlock_bh(&fq->lock);
+
+	spin_lock_bh(&local->active_txq_lock[txqi->txq.ac]);
+	list_del_init(&txqi->schedule_order);
+	spin_unlock_bh(&local->active_txq_lock[txqi->txq.ac]);
 }
 
 void ieee80211_txq_set_params(struct ieee80211_local *local)
@@ -1558,7 +1591,6 @@
 				struct sta_info *sta,
 				struct sk_buff *skb)
 {
-	struct fq *fq = &local->fq;
 	struct ieee80211_vif *vif;
 	struct txq_info *txqi;
 
@@ -1576,11 +1608,9 @@
 	if (!txqi)
 		return false;
 
-	spin_lock_bh(&fq->lock);
 	ieee80211_txq_enqueue(local, txqi, skb);
-	spin_unlock_bh(&fq->lock);
 
-	drv_wake_tx_queue(local, txqi);
+	schedule_and_wake_txq(local, txqi);
 
 	return true;
 }
@@ -1913,9 +1943,16 @@
 				int head_need, bool may_encrypt)
 {
 	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_hdr *hdr;
+	bool enc_tailroom;
 	int tail_need = 0;
 
-	if (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt) {
+	hdr = (struct ieee80211_hdr *) skb->data;
+	enc_tailroom = may_encrypt &&
+		       (sdata->crypto_tx_tailroom_needed_cnt ||
+			ieee80211_is_mgmt(hdr->frame_control));
+
+	if (enc_tailroom) {
 		tail_need = IEEE80211_ENCRYPT_TAILROOM;
 		tail_need -= skb_tailroom(skb);
 		tail_need = max_t(int, tail_need, 0);
@@ -1923,8 +1960,7 @@
 
 	if (skb_cloned(skb) &&
 	    (!ieee80211_hw_check(&local->hw, SUPPORTS_CLONED_SKBS) ||
-	     !skb_clone_writable(skb, ETH_HLEN) ||
-	     (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt)))
+	     !skb_clone_writable(skb, ETH_HLEN) || enc_tailroom))
 		I802_DEBUG_INC(local->tx_expand_skb_head_cloned);
 	else if (head_need || tail_need)
 		I802_DEBUG_INC(local->tx_expand_skb_head);
@@ -2393,6 +2429,7 @@
  * @sdata: virtual interface to build the header for
  * @skb: the skb to build the header in
  * @info_flags: skb flags to set
+ * @ctrl_flags: info control flags to set
  *
  * This function takes the skb with 802.3 header and reformats the header to
  * the appropriate IEEE 802.11 header based on which interface the packet is
@@ -2408,7 +2445,7 @@
  */
 static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
 					   struct sk_buff *skb, u32 info_flags,
-					   struct sta_info *sta)
+					   struct sta_info *sta, u32 ctrl_flags)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_tx_info *info;
@@ -2432,6 +2469,11 @@
 	if (IS_ERR(sta))
 		sta = NULL;
 
+#ifdef CONFIG_MAC80211_DEBUGFS
+	if (local->force_tx_status)
+		info_flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+#endif
+
 	/* convert Ethernet header to proper 802.11 header (based on
 	 * operation mode) */
 	ethertype = (skb->data[12] << 8) | skb->data[13];
@@ -2562,6 +2604,13 @@
 			goto free;
 		}
 		band = chanctx_conf->def.chan->band;
+
+		/* For injected frames, fill RA right away as nexthop lookup
+		 * will be skipped.
+		 */
+		if ((ctrl_flags & IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP) &&
+		    is_zero_ether_addr(hdr.addr1))
+			memcpy(hdr.addr1, skb->data, ETH_ALEN);
 		break;
 #endif
 	case NL80211_IFTYPE_STATION:
@@ -2780,6 +2829,7 @@
 	info->flags = info_flags;
 	info->ack_frame_id = info_id;
 	info->band = band;
+	info->control.flags = ctrl_flags;
 
 	return skb;
  free:
@@ -2955,26 +3005,22 @@
 		if (!(build.key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
 			goto out;
 
+		/* Key is being removed */
+		if (build.key->flags & KEY_FLAG_TAINTED)
+			goto out;
+
 		switch (build.key->conf.cipher) {
 		case WLAN_CIPHER_SUITE_CCMP:
 		case WLAN_CIPHER_SUITE_CCMP_256:
-			/* add fixed key ID */
-			if (gen_iv) {
-				(build.hdr + build.hdr_len)[3] =
-					0x20 | (build.key->conf.keyidx << 6);
+			if (gen_iv)
 				build.pn_offs = build.hdr_len;
-			}
 			if (gen_iv || iv_spc)
 				build.hdr_len += IEEE80211_CCMP_HDR_LEN;
 			break;
 		case WLAN_CIPHER_SUITE_GCMP:
 		case WLAN_CIPHER_SUITE_GCMP_256:
-			/* add fixed key ID */
-			if (gen_iv) {
-				(build.hdr + build.hdr_len)[3] =
-					0x20 | (build.key->conf.keyidx << 6);
+			if (gen_iv)
 				build.pn_offs = build.hdr_len;
-			}
 			if (gen_iv || iv_spc)
 				build.hdr_len += IEEE80211_GCMP_HDR_LEN;
 			break;
@@ -3179,6 +3225,8 @@
 	u8 max_subframes = sta->sta.max_amsdu_subframes;
 	int max_frags = local->hw.max_tx_fragments;
 	int max_amsdu_len = sta->sta.max_amsdu_len;
+	int orig_truesize;
+	u32 flow_idx;
 	__be16 len;
 	void *data;
 	bool ret = false;
@@ -3189,6 +3237,9 @@
 	if (!ieee80211_hw_check(&local->hw, TX_AMSDU))
 		return false;
 
+	if (skb_is_gso(skb))
+		return false;
+
 	if (!txq)
 		return false;
 
@@ -3200,6 +3251,12 @@
 		max_amsdu_len = min_t(int, max_amsdu_len,
 				      sta->sta.max_rc_amsdu_len);
 
+	if (sta->sta.max_tid_amsdu_len[tid])
+		max_amsdu_len = min_t(int, max_amsdu_len,
+				      sta->sta.max_tid_amsdu_len[tid]);
+
+	flow_idx = fq_flow_idx(fq, skb);
+
 	spin_lock_bh(&fq->lock);
 
 	/* TODO: Ideally aggregation should be done on dequeue to remain
@@ -3207,11 +3264,13 @@
 	 */
 
 	tin = &txqi->tin;
-	flow = fq_flow_classify(fq, tin, skb, fq_flow_get_default_func);
+	flow = fq_flow_classify(fq, tin, flow_idx, skb,
+				fq_flow_get_default_func);
 	head = skb_peek_tail(&flow->queue);
-	if (!head)
+	if (!head || skb_is_gso(head))
 		goto out;
 
+	orig_truesize = head->truesize;
 	orig_len = head->len;
 
 	if (skb->len + head->len > max_amsdu_len)
@@ -3232,6 +3291,9 @@
 	if (max_frags && nfrags > max_frags)
 		goto out;
 
+	if (!drv_can_aggregate_in_amsdu(local, head, skb))
+		goto out;
+
 	if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head))
 		goto out;
 
@@ -3266,6 +3328,7 @@
 	*frag_tail = skb;
 
 out_recalc:
+	fq->memory_usage += head->truesize - orig_truesize;
 	if (head->len != orig_len) {
 		flow->backlog += head->len - orig_len;
 		tin->backlog_bytes += head->len - orig_len;
@@ -3331,6 +3394,7 @@
 			pn = atomic64_inc_return(&key->conf.tx_pn);
 			crypto_hdr[0] = pn;
 			crypto_hdr[1] = pn >> 8;
+			crypto_hdr[3] = 0x20 | (key->conf.keyidx << 6);
 			crypto_hdr[4] = pn >> 16;
 			crypto_hdr[5] = pn >> 24;
 			crypto_hdr[6] = pn >> 32;
@@ -3423,6 +3487,11 @@
 		      (tid_tx ? IEEE80211_TX_CTL_AMPDU : 0);
 	info->control.flags = IEEE80211_TX_CTRL_FAST_XMIT;
 
+#ifdef CONFIG_MAC80211_DEBUGFS
+	if (local->force_tx_status)
+		info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+#endif
+
 	if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
 		tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
 		*ieee80211_get_qos_ctl(hdr) = tid;
@@ -3476,23 +3545,33 @@
 	struct ieee80211_tx_info *info;
 	struct ieee80211_tx_data tx;
 	ieee80211_tx_result r;
-	struct ieee80211_vif *vif;
+	struct ieee80211_vif *vif = txq->vif;
 
+	WARN_ON_ONCE(softirq_count() == 0);
+
+begin:
 	spin_lock_bh(&fq->lock);
 
-	if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags))
+	if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags) ||
+	    test_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags))
 		goto out;
 
+	if (vif->txqs_stopped[ieee80211_ac_from_tid(txq->tid)]) {
+		set_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags);
+		goto out;
+	}
+
 	/* Make sure fragments stay together. */
 	skb = __skb_dequeue(&txqi->frags);
 	if (skb)
 		goto out;
 
-begin:
 	skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func);
 	if (!skb)
 		goto out;
 
+	spin_unlock_bh(&fq->lock);
+
 	hdr = (struct ieee80211_hdr *)skb->data;
 	info = IEEE80211_SKB_CB(skb);
 
@@ -3537,11 +3616,14 @@
 
 		skb = __skb_dequeue(&tx.skbs);
 
-		if (!skb_queue_empty(&tx.skbs))
+		if (!skb_queue_empty(&tx.skbs)) {
+			spin_lock_bh(&fq->lock);
 			skb_queue_splice_tail(&tx.skbs, &txqi->frags);
+			spin_unlock_bh(&fq->lock);
+		}
 	}
 
-	if (skb && skb_has_frag_list(skb) &&
+	if (skb_has_frag_list(skb) &&
 	    !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) {
 		if (skb_linearize(skb)) {
 			ieee80211_free_txskb(&local->hw, skb);
@@ -3577,6 +3659,8 @@
 	}
 
 	IEEE80211_SKB_CB(skb)->control.vif = vif;
+	return skb;
+
 out:
 	spin_unlock_bh(&fq->lock);
 
@@ -3584,11 +3668,148 @@
 }
 EXPORT_SYMBOL(ieee80211_tx_dequeue);
 
+struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct ieee80211_txq *ret = NULL;
+	struct txq_info *txqi = NULL;
+
+	spin_lock_bh(&local->active_txq_lock[ac]);
+
+ begin:
+	txqi = list_first_entry_or_null(&local->active_txqs[ac],
+					struct txq_info,
+					schedule_order);
+	if (!txqi)
+		goto out;
+
+	if (txqi->txq.sta) {
+		struct sta_info *sta = container_of(txqi->txq.sta,
+						struct sta_info, sta);
+
+		if (sta->airtime[txqi->txq.ac].deficit < 0) {
+			sta->airtime[txqi->txq.ac].deficit +=
+				sta->airtime_weight;
+			list_move_tail(&txqi->schedule_order,
+				       &local->active_txqs[txqi->txq.ac]);
+			goto begin;
+		}
+	}
+
+
+	if (txqi->schedule_round == local->schedule_round[ac])
+		goto out;
+
+	list_del_init(&txqi->schedule_order);
+	txqi->schedule_round = local->schedule_round[ac];
+	ret = &txqi->txq;
+
+out:
+	spin_unlock_bh(&local->active_txq_lock[ac]);
+	return ret;
+}
+EXPORT_SYMBOL(ieee80211_next_txq);
+
+void __ieee80211_schedule_txq(struct ieee80211_hw *hw,
+			      struct ieee80211_txq *txq,
+			      bool force)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct txq_info *txqi = to_txq_info(txq);
+
+	spin_lock_bh(&local->active_txq_lock[txq->ac]);
+
+	if (list_empty(&txqi->schedule_order) &&
+	    (force || !skb_queue_empty(&txqi->frags) ||
+	     txqi->tin.backlog_packets)) {
+		/* If airtime accounting is active, always enqueue STAs at the
+		 * head of the list to ensure that they only get moved to the
+		 * back by the airtime DRR scheduler once they have a negative
+		 * deficit. A station that already has a negative deficit will
+		 * get immediately moved to the back of the list on the next
+		 * call to ieee80211_next_txq().
+		 */
+		if (txqi->txq.sta &&
+		    wiphy_ext_feature_isset(local->hw.wiphy,
+					    NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
+			list_add(&txqi->schedule_order,
+				 &local->active_txqs[txq->ac]);
+		else
+			list_add_tail(&txqi->schedule_order,
+				      &local->active_txqs[txq->ac]);
+	}
+
+	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
+}
+EXPORT_SYMBOL(__ieee80211_schedule_txq);
+
+bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
+				struct ieee80211_txq *txq)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct txq_info *iter, *tmp, *txqi = to_txq_info(txq);
+	struct sta_info *sta;
+	u8 ac = txq->ac;
+
+	spin_lock_bh(&local->active_txq_lock[ac]);
+
+	if (!txqi->txq.sta)
+		goto out;
+
+	if (list_empty(&txqi->schedule_order))
+		goto out;
+
+	list_for_each_entry_safe(iter, tmp, &local->active_txqs[ac],
+				 schedule_order) {
+		if (iter == txqi)
+			break;
+
+		if (!iter->txq.sta) {
+			list_move_tail(&iter->schedule_order,
+				       &local->active_txqs[ac]);
+			continue;
+		}
+		sta = container_of(iter->txq.sta, struct sta_info, sta);
+		if (sta->airtime[ac].deficit < 0)
+			sta->airtime[ac].deficit += sta->airtime_weight;
+		list_move_tail(&iter->schedule_order, &local->active_txqs[ac]);
+	}
+
+	sta = container_of(txqi->txq.sta, struct sta_info, sta);
+	if (sta->airtime[ac].deficit >= 0)
+		goto out;
+
+	sta->airtime[ac].deficit += sta->airtime_weight;
+	list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]);
+	spin_unlock_bh(&local->active_txq_lock[ac]);
+
+	return false;
+out:
+	if (!list_empty(&txqi->schedule_order))
+		list_del_init(&txqi->schedule_order);
+	spin_unlock_bh(&local->active_txq_lock[ac]);
+
+	return true;
+}
+EXPORT_SYMBOL(ieee80211_txq_may_transmit);
+
+void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+
+	spin_lock_bh(&local->active_txq_lock[ac]);
+	local->schedule_round[ac]++;
+	spin_unlock_bh(&local->active_txq_lock[ac]);
+}
+EXPORT_SYMBOL(ieee80211_txq_schedule_start);
+
 void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 				  struct net_device *dev,
-				  u32 info_flags)
+				  u32 info_flags,
+				  u32 ctrl_flags)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
 	struct sk_buff *next;
 
@@ -3602,16 +3823,18 @@
 	if (ieee80211_lookup_ra_sta(sdata, skb, &sta))
 		goto out_free;
 
-	if (!IS_ERR_OR_NULL(sta)) {
+	if (IS_ERR(sta))
+		sta = NULL;
+
+	if (local->ops->wake_tx_queue) {
+		u16 queue = __ieee80211_select_queue(sdata, sta, skb);
+		skb_set_queue_mapping(skb, queue);
+	}
+
+	if (sta) {
 		struct ieee80211_fast_tx *fast_tx;
 
-		/* We need a bit of data queued to build aggregates properly, so
-		 * instruct the TCP stack to allow more than a single ms of data
-		 * to be queued in the stack. The value is a bit-shift of 1
-		 * second, so 8 is ~4ms of queued data. Only affects local TCP
-		 * sockets.
-		 */
-		sk_pacing_shift_update(skb->sk, 8);
+		sk_pacing_shift_update(skb->sk, sdata->local->hw.tx_sk_pacing_shift);
 
 		fast_tx = rcu_dereference(sta->fast_tx);
 
@@ -3657,7 +3880,8 @@
 		skb->prev = NULL;
 		skb->next = NULL;
 
-		skb = ieee80211_build_hdr(sdata, skb, info_flags, sta);
+		skb = ieee80211_build_hdr(sdata, skb, info_flags,
+					  sta, ctrl_flags);
 		if (IS_ERR(skb))
 			goto out;
 
@@ -3797,9 +4021,9 @@
 		__skb_queue_head_init(&queue);
 		ieee80211_convert_to_unicast(skb, dev, &queue);
 		while ((skb = __skb_dequeue(&queue)))
-			__ieee80211_subif_start_xmit(skb, dev, 0);
+			__ieee80211_subif_start_xmit(skb, dev, 0, 0);
 	} else {
-		__ieee80211_subif_start_xmit(skb, dev, 0);
+		__ieee80211_subif_start_xmit(skb, dev, 0, 0);
 	}
 
 	return NETDEV_TX_OK;
@@ -3824,7 +4048,7 @@
 		goto out;
 	}
 
-	skb = ieee80211_build_hdr(sdata, skb, info_flags, sta);
+	skb = ieee80211_build_hdr(sdata, skb, info_flags, sta, 0);
 	if (IS_ERR(skb))
 		goto out;
 
@@ -4426,7 +4650,8 @@
 	if (!sband)
 		return bcn;
 
-	ieee80211_tx_monitor(hw_to_local(hw), copy, sband, 1, shift, false);
+	ieee80211_tx_monitor(hw_to_local(hw), copy, sband, 1, shift, false,
+			     NULL);
 
 	return bcn;
 }
@@ -4542,7 +4767,7 @@
 					      IEEE80211_STYPE_NULLFUNC |
 					      IEEE80211_FCTL_TODS);
 	if (qos) {
-		__le16 qos = cpu_to_le16(7);
+		__le16 qoshdr = cpu_to_le16(7);
 
 		BUILD_BUG_ON((IEEE80211_STYPE_QOS_NULLFUNC |
 			      IEEE80211_STYPE_NULLFUNC) !=
@@ -4551,7 +4776,7 @@
 			cpu_to_le16(IEEE80211_STYPE_QOS_NULLFUNC);
 		skb->priority = 7;
 		skb_set_queue_mapping(skb, IEEE80211_AC_VO);
-		skb_put_data(skb, &qos, sizeof(qos));
+		skb_put_data(skb, &qoshdr, sizeof(qoshdr));
 	}
 
 	memcpy(nullfunc->addr1, ifmgd->bssid, ETH_ALEN);
@@ -4861,7 +5086,36 @@
 	skb_reset_mac_header(skb);
 
 	local_bh_disable();
-	__ieee80211_subif_start_xmit(skb, skb->dev, flags);
+	__ieee80211_subif_start_xmit(skb, skb->dev, flags, 0);
+	local_bh_enable();
+
+	return 0;
+}
+
+int ieee80211_probe_mesh_link(struct wiphy *wiphy, struct net_device *dev,
+			      const u8 *buf, size_t len)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
+	struct sk_buff *skb;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + len +
+			    30 + /* header size */
+			    18); /* 11s header size */
+	if (!skb)
+		return -ENOMEM;
+
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+	skb_put_data(skb, buf, len);
+
+	skb->dev = dev;
+	skb->protocol = htons(ETH_P_802_3);
+	skb_reset_network_header(skb);
+	skb_reset_mac_header(skb);
+
+	local_bh_disable();
+	__ieee80211_subif_start_xmit(skb, skb->dev, 0,
+				     IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP);
 	local_bh_enable();
 
 	return 0;

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 716cd64..32a7a53 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2002-2005, Instant802 Networks, Inc.
  * Copyright 2005-2006, Devicescape Software, Inc.
@@ -5,11 +6,7 @@
  * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2015-2017	Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Copyright (C) 2018-2019 Intel Corporation
  *
  * utilities for mac80211
  */
@@ -240,6 +237,114 @@
 }
 EXPORT_SYMBOL(ieee80211_ctstoself_duration);
 
+static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_vif *vif = &sdata->vif;
+	struct fq *fq = &local->fq;
+	struct ps_data *ps = NULL;
+	struct txq_info *txqi;
+	struct sta_info *sta;
+	int i;
+
+	local_bh_disable();
+	spin_lock(&fq->lock);
+
+	if (sdata->vif.type == NL80211_IFTYPE_AP)
+		ps = &sdata->bss->ps;
+
+	sdata->vif.txqs_stopped[ac] = false;
+
+	list_for_each_entry_rcu(sta, &local->sta_list, list) {
+		if (sdata != sta->sdata)
+			continue;
+
+		for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
+			struct ieee80211_txq *txq = sta->sta.txq[i];
+
+			if (!txq)
+				continue;
+
+			txqi = to_txq_info(txq);
+
+			if (ac != txq->ac)
+				continue;
+
+			if (!test_and_clear_bit(IEEE80211_TXQ_STOP_NETIF_TX,
+						&txqi->flags))
+				continue;
+
+			spin_unlock(&fq->lock);
+			drv_wake_tx_queue(local, txqi);
+			spin_lock(&fq->lock);
+		}
+	}
+
+	if (!vif->txq)
+		goto out;
+
+	txqi = to_txq_info(vif->txq);
+
+	if (!test_and_clear_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags) ||
+	    (ps && atomic_read(&ps->num_sta_ps)) || ac != vif->txq->ac)
+		goto out;
+
+	spin_unlock(&fq->lock);
+
+	drv_wake_tx_queue(local, txqi);
+	local_bh_enable();
+	return;
+out:
+	spin_unlock(&fq->lock);
+	local_bh_enable();
+}
+
+static void
+__releases(&local->queue_stop_reason_lock)
+__acquires(&local->queue_stop_reason_lock)
+_ieee80211_wake_txqs(struct ieee80211_local *local, unsigned long *flags)
+{
+	struct ieee80211_sub_if_data *sdata;
+	int n_acs = IEEE80211_NUM_ACS;
+	int i;
+
+	rcu_read_lock();
+
+	if (local->hw.queues < IEEE80211_NUM_ACS)
+		n_acs = 1;
+
+	for (i = 0; i < local->hw.queues; i++) {
+		if (local->queue_stop_reasons[i])
+			continue;
+
+		spin_unlock_irqrestore(&local->queue_stop_reason_lock, *flags);
+		list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+			int ac;
+
+			for (ac = 0; ac < n_acs; ac++) {
+				int ac_queue = sdata->vif.hw_queue[ac];
+
+				if (ac_queue == i ||
+				    sdata->vif.cab_queue == i)
+					__ieee80211_wake_txqs(sdata, ac);
+			}
+		}
+		spin_lock_irqsave(&local->queue_stop_reason_lock, *flags);
+	}
+
+	rcu_read_unlock();
+}
+
+void ieee80211_wake_txqs(unsigned long data)
+{
+	struct ieee80211_local *local = (struct ieee80211_local *)data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
+	_ieee80211_wake_txqs(local, &flags);
+	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
+}
+
 void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue)
 {
 	struct ieee80211_sub_if_data *sdata;
@@ -275,7 +380,8 @@
 
 static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue,
 				   enum queue_stop_reason reason,
-				   bool refcounted)
+				   bool refcounted,
+				   unsigned long *flags)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
@@ -308,6 +414,20 @@
 		rcu_read_unlock();
 	} else
 		tasklet_schedule(&local->tx_pending_tasklet);
+
+	/*
+	 * Calling _ieee80211_wake_txqs here can be a problem because it may
+	 * release queue_stop_reason_lock which has been taken by
+	 * __ieee80211_wake_queue's caller. It is certainly not very nice to
+	 * release someone's lock, but it is fine because all the callers of
+	 * __ieee80211_wake_queue call it right before releasing the lock.
+	 */
+	if (local->ops->wake_tx_queue) {
+		if (reason == IEEE80211_QUEUE_STOP_REASON_DRIVER)
+			tasklet_schedule(&local->wake_txqs_tasklet);
+		else
+			_ieee80211_wake_txqs(local, flags);
+	}
 }
 
 void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,
@@ -318,7 +438,7 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
-	__ieee80211_wake_queue(hw, queue, reason, refcounted);
+	__ieee80211_wake_queue(hw, queue, reason, refcounted, &flags);
 	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
 }
 
@@ -351,9 +471,6 @@
 	if (__test_and_set_bit(reason, &local->queue_stop_reasons[queue]))
 		return;
 
-	if (local->ops->wake_tx_queue)
-		return;
-
 	if (local->hw.queues < IEEE80211_NUM_ACS)
 		n_acs = 1;
 
@@ -366,8 +483,15 @@
 
 		for (ac = 0; ac < n_acs; ac++) {
 			if (sdata->vif.hw_queue[ac] == queue ||
-			    sdata->vif.cab_queue == queue)
-				netif_stop_subqueue(sdata->dev, ac);
+			    sdata->vif.cab_queue == queue) {
+				if (!local->ops->wake_tx_queue) {
+					netif_stop_subqueue(sdata->dev, ac);
+					continue;
+				}
+				spin_lock(&local->fq.lock);
+				sdata->vif.txqs_stopped[ac] = true;
+				spin_unlock(&local->fq.lock);
+			}
 		}
 	}
 	rcu_read_unlock();
@@ -411,7 +535,7 @@
 			       false);
 	__skb_queue_tail(&local->pending[queue], skb);
 	__ieee80211_wake_queue(hw, queue, IEEE80211_QUEUE_STOP_REASON_SKB_ADD,
-			       false);
+			       false, &flags);
 	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
 }
 
@@ -444,7 +568,7 @@
 	for (i = 0; i < hw->queues; i++)
 		__ieee80211_wake_queue(hw, i,
 			IEEE80211_QUEUE_STOP_REASON_SKB_ADD,
-			false);
+			false, &flags);
 	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
 }
 
@@ -502,7 +626,7 @@
 	spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
 
 	for_each_set_bit(i, &queues, hw->queues)
-		__ieee80211_wake_queue(hw, i, reason, refcounted);
+		__ieee80211_wake_queue(hw, i, reason, refcounted, &flags);
 
 	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
 }
@@ -767,33 +891,29 @@
 }
 EXPORT_SYMBOL(ieee80211_queue_delayed_work);
 
-u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
-			       struct ieee802_11_elems *elems,
-			       u64 filter, u32 crc)
+static u32
+_ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
+			    struct ieee802_11_elems *elems,
+			    u64 filter, u32 crc,
+			    const struct element *check_inherit)
 {
-	size_t left = len;
-	const u8 *pos = start;
+	const struct element *elem;
 	bool calc_crc = filter != 0;
 	DECLARE_BITMAP(seen_elems, 256);
 	const u8 *ie;
 
 	bitmap_zero(seen_elems, 256);
-	memset(elems, 0, sizeof(*elems));
-	elems->ie_start = start;
-	elems->total_len = len;
 
-	while (left >= 2) {
-		u8 id, elen;
+	for_each_element(elem, start, len) {
 		bool elem_parse_failed;
+		u8 id = elem->id;
+		u8 elen = elem->datalen;
+		const u8 *pos = elem->data;
 
-		id = *pos++;
-		elen = *pos++;
-		left -= 2;
-
-		if (elen > left) {
-			elems->parse_error = true;
-			break;
-		}
+		if (check_inherit &&
+		    !cfg80211_is_element_inherited(elem,
+						   check_inherit))
+			continue;
 
 		switch (id) {
 		case WLAN_EID_SSID:
@@ -836,8 +956,6 @@
 		 */
 			if (test_bit(id, seen_elems)) {
 				elems->parse_error = true;
-				left -= elen;
-				pos += elen;
 				continue;
 			}
 			break;
@@ -1085,6 +1203,13 @@
 
 			elems->cisco_dtpc_elem = pos;
 			break;
+		case WLAN_EID_ADDBA_EXT:
+			if (elen != sizeof(struct ieee80211_addba_ext_ie)) {
+				elem_parse_failed = true;
+				break;
+			}
+			elems->addba_ext_ie = (void *)pos;
+			break;
 		case WLAN_EID_TIMEOUT_INTERVAL:
 			if (elen >= sizeof(struct ieee80211_timeout_interval_ie))
 				elems->timeout_int = (void *)pos;
@@ -1099,6 +1224,8 @@
 			if (pos[0] == WLAN_EID_EXT_HE_MU_EDCA &&
 			    elen >= (sizeof(*elems->mu_edca_param_set) + 1)) {
 				elems->mu_edca_param_set = (void *)&pos[1];
+				if (calc_crc)
+					crc = crc32_be(crc, pos - 2, elen + 2);
 			} else if (pos[0] == WLAN_EID_EXT_HE_CAPABILITY) {
 				elems->he_cap = (void *)&pos[1];
 				elems->he_cap_len = elen - 1;
@@ -1108,6 +1235,18 @@
 				elems->he_operation = (void *)&pos[1];
 			} else if (pos[0] == WLAN_EID_EXT_UORA && elen >= 1) {
 				elems->uora_element = (void *)&pos[1];
+			} else if (pos[0] ==
+				   WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME &&
+				   elen == 4) {
+				elems->max_channel_switch_time = pos + 1;
+			} else if (pos[0] ==
+				   WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION &&
+				   elen == 3) {
+				elems->mbssid_config_ie = (void *)&pos[1];
+			} else if (pos[0] == WLAN_EID_EXT_HE_SPR &&
+				   elen >= sizeof(*elems->he_spr) &&
+				   elen >= ieee80211_he_spr_size(&pos[1])) {
+				elems->he_spr = (void *)&pos[1];
 			}
 			break;
 		default:
@@ -1118,17 +1257,140 @@
 			elems->parse_error = true;
 		else
 			__set_bit(id, seen_elems);
-
-		left -= elen;
-		pos += elen;
 	}
 
-	if (left != 0)
+	if (!for_each_element_completed(elem, start, len))
 		elems->parse_error = true;
 
 	return crc;
 }
 
+static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len,
+					    struct ieee802_11_elems *elems,
+					    u8 *transmitter_bssid,
+					    u8 *bss_bssid,
+					    u8 *nontransmitted_profile)
+{
+	const struct element *elem, *sub;
+	size_t profile_len = 0;
+	bool found = false;
+
+	if (!bss_bssid || !transmitter_bssid)
+		return profile_len;
+
+	for_each_element_id(elem, WLAN_EID_MULTIPLE_BSSID, start, len) {
+		if (elem->datalen < 2)
+			continue;
+
+		for_each_element(sub, elem->data + 1, elem->datalen - 1) {
+			u8 new_bssid[ETH_ALEN];
+			const u8 *index;
+
+			if (sub->id != 0 || sub->datalen < 4) {
+				/* not a valid BSS profile */
+				continue;
+			}
+
+			if (sub->data[0] != WLAN_EID_NON_TX_BSSID_CAP ||
+			    sub->data[1] != 2) {
+				/* The first element of the
+				 * Nontransmitted BSSID Profile is not
+				 * the Nontransmitted BSSID Capability
+				 * element.
+				 */
+				continue;
+			}
+
+			memset(nontransmitted_profile, 0, len);
+			profile_len = cfg80211_merge_profile(start, len,
+							     elem,
+							     sub,
+							     nontransmitted_profile,
+							     len);
+
+			/* found a Nontransmitted BSSID Profile */
+			index = cfg80211_find_ie(WLAN_EID_MULTI_BSSID_IDX,
+						 nontransmitted_profile,
+						 profile_len);
+			if (!index || index[1] < 1 || index[2] == 0) {
+				/* Invalid MBSSID Index element */
+				continue;
+			}
+
+			cfg80211_gen_new_bssid(transmitter_bssid,
+					       elem->data[0],
+					       index[2],
+					       new_bssid);
+			if (ether_addr_equal(new_bssid, bss_bssid)) {
+				found = true;
+				elems->bssid_index_len = index[1];
+				elems->bssid_index = (void *)&index[2];
+				break;
+			}
+		}
+	}
+
+	return found ? profile_len : 0;
+}
+
+u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
+			       struct ieee802_11_elems *elems,
+			       u64 filter, u32 crc, u8 *transmitter_bssid,
+			       u8 *bss_bssid)
+{
+	const struct element *non_inherit = NULL;
+	u8 *nontransmitted_profile;
+	int nontransmitted_profile_len = 0;
+
+	memset(elems, 0, sizeof(*elems));
+	elems->ie_start = start;
+	elems->total_len = len;
+
+	nontransmitted_profile = kmalloc(len, GFP_ATOMIC);
+	if (nontransmitted_profile) {
+		nontransmitted_profile_len =
+			ieee802_11_find_bssid_profile(start, len, elems,
+						      transmitter_bssid,
+						      bss_bssid,
+						      nontransmitted_profile);
+		non_inherit =
+			cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
+					       nontransmitted_profile,
+					       nontransmitted_profile_len);
+	}
+
+	crc = _ieee802_11_parse_elems_crc(start, len, action, elems, filter,
+					  crc, non_inherit);
+
+	/* Override with nontransmitted profile, if found */
+	if (nontransmitted_profile_len)
+		_ieee802_11_parse_elems_crc(nontransmitted_profile,
+					    nontransmitted_profile_len,
+					    action, elems, 0, 0, NULL);
+
+	if (elems->tim && !elems->parse_error) {
+		const struct ieee80211_tim_ie *tim_ie = elems->tim;
+
+		elems->dtim_period = tim_ie->dtim_period;
+		elems->dtim_count = tim_ie->dtim_count;
+	}
+
+	/* Override DTIM period and count if needed */
+	if (elems->bssid_index &&
+	    elems->bssid_index_len >=
+	    offsetofend(struct ieee80211_bssid_index, dtim_period))
+		elems->dtim_period = elems->bssid_index->dtim_period;
+
+	if (elems->bssid_index &&
+	    elems->bssid_index_len >=
+	    offsetofend(struct ieee80211_bssid_index, dtim_count))
+		elems->dtim_count = elems->bssid_index->dtim_count;
+
+	kfree(nontransmitted_profile);
+
+	return crc;
+}
+
 void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
 					   struct ieee80211_tx_queue_params
 					   *qparam, int ac)
@@ -1324,7 +1586,8 @@
 }
 
 void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
-				    const u8 *bssid, u16 stype, u16 reason,
+				    const u8 *da, const u8 *bssid,
+				    u16 stype, u16 reason,
 				    bool send_frame, u8 *frame_buf)
 {
 	struct ieee80211_local *local = sdata->local;
@@ -1335,7 +1598,7 @@
 	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | stype);
 	mgmt->duration = 0; /* initialize only */
 	mgmt->seq_ctrl = 0; /* initialize only */
-	memcpy(mgmt->da, bssid, ETH_ALEN);
+	memcpy(mgmt->da, da, ETH_ALEN);
 	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	memcpy(mgmt->bssid, bssid, ETH_ALEN);
 	/* u.deauth.reason_code == u.disassoc.reason_code */
@@ -2020,6 +2283,10 @@
 		case NL80211_IFTYPE_AP_VLAN:
 		case NL80211_IFTYPE_MONITOR:
 			break;
+		case NL80211_IFTYPE_ADHOC:
+			if (sdata->vif.bss_conf.ibss_joined)
+				WARN_ON(drv_join_ibss(local, sdata));
+			/* fall through */
 		default:
 			ieee80211_reconfig_stations(sdata);
 			/* fall through */
@@ -2075,6 +2342,11 @@
 		case NL80211_IFTYPE_AP:
 			changed |= BSS_CHANGED_SSID | BSS_CHANGED_P2P_PS;
 
+			if (sdata->vif.bss_conf.ftm_responder == 1 &&
+			    wiphy_ext_feature_isset(sdata->local->hw.wiphy,
+					NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER))
+				changed |= BSS_CHANGED_FTM_RESPONDER;
+
 			if (sdata->vif.type == NL80211_IFTYPE_AP) {
 				changed |= BSS_CHANGED_AP_PROBE_RESP;
 
@@ -2152,11 +2424,7 @@
 
 	/* add back keys */
 	list_for_each_entry(sdata, &local->interfaces, list)
-		ieee80211_reset_crypto_tx_tailroom(sdata);
-
-	list_for_each_entry(sdata, &local->interfaces, list)
-		if (ieee80211_sdata_running(sdata))
-			ieee80211_enable_keys(sdata);
+		ieee80211_reenable_keys(sdata);
 
 	/* Reconfigure sched scan if it was interrupted by FW restart */
 	mutex_lock(&local->mtx);
@@ -2220,6 +2488,10 @@
 		mutex_lock(&local->mtx);
 		ieee80211_start_next_roc(local);
 		mutex_unlock(&local->mtx);
+
+		/* Requeue all works */
+		list_for_each_entry(sdata, &local->interfaces, list)
+			ieee80211_queue_work(&local->hw, &sdata->work);
 	}
 
 	ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
@@ -2441,6 +2713,27 @@
 	return pos;
 }
 
+u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype)
+{
+	const struct ieee80211_sta_he_cap *he_cap;
+	struct ieee80211_supported_band *sband;
+	u8 n;
+
+	sband = ieee80211_get_sband(sdata);
+	if (!sband)
+		return 0;
+
+	he_cap = ieee80211_get_he_iftype_cap(sband, iftype);
+	if (!he_cap)
+		return 0;
+
+	n = ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem);
+	return 2 + 1 +
+	       sizeof(he_cap->he_cap_elem) + n +
+	       ieee80211_he_ppe_size(he_cap->ppe_thres[0],
+				     he_cap->he_cap_elem.phy_cap_info);
+}
+
 u8 *ieee80211_ie_build_he_cap(u8 *pos,
 			      const struct ieee80211_sta_he_cap *he_cap,
 			      u8 *end)
@@ -2630,6 +2923,34 @@
 	return pos + sizeof(struct ieee80211_vht_operation);
 }
 
+u8 *ieee80211_ie_build_he_oper(u8 *pos)
+{
+	struct ieee80211_he_operation *he_oper;
+	u32 he_oper_params;
+
+	*pos++ = WLAN_EID_EXTENSION;
+	*pos++ = 1 + sizeof(struct ieee80211_he_operation);
+	*pos++ = WLAN_EID_EXT_HE_OPERATION;
+
+	he_oper_params = 0;
+	he_oper_params |= u32_encode_bits(1023, /* disabled */
+				IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK);
+	he_oper_params |= u32_encode_bits(1,
+				IEEE80211_HE_OPERATION_ER_SU_DISABLE);
+	he_oper_params |= u32_encode_bits(1,
+				IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED);
+
+	he_oper = (struct ieee80211_he_operation *)pos;
+	he_oper->he_oper_params = cpu_to_le32(he_oper_params);
+
+	/* don't require special HE peer rates */
+	he_oper->he_mcs_nss_set = cpu_to_le16(0xffff);
+
+	/* TODO add VHT operational and 6GHz operational subelement? */
+
+	return pos + sizeof(struct ieee80211_vht_operation);
+}
+
 bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper,
 			       struct cfg80211_chan_def *chandef)
 {
@@ -2657,49 +2978,65 @@
 	return true;
 }
 
-bool ieee80211_chandef_vht_oper(const struct ieee80211_vht_operation *oper,
+bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw,
+				const struct ieee80211_vht_operation *oper,
+				const struct ieee80211_ht_operation *htop,
 				struct cfg80211_chan_def *chandef)
 {
 	struct cfg80211_chan_def new = *chandef;
-	int cf1, cf2;
+	int cf0, cf1;
+	int ccfs0, ccfs1, ccfs2;
+	int ccf0, ccf1;
 
-	if (!oper)
+	if (!oper || !htop)
 		return false;
 
-	cf1 = ieee80211_channel_to_frequency(oper->center_freq_seg0_idx,
-					     chandef->chan->band);
-	cf2 = ieee80211_channel_to_frequency(oper->center_freq_seg1_idx,
-					     chandef->chan->band);
+	ccfs0 = oper->center_freq_seg0_idx;
+	ccfs1 = oper->center_freq_seg1_idx;
+	ccfs2 = (le16_to_cpu(htop->operation_mode) &
+				IEEE80211_HT_OP_MODE_CCFS2_MASK)
+			>> IEEE80211_HT_OP_MODE_CCFS2_SHIFT;
+
+	/* when parsing (and we know how to) CCFS1 and CCFS2 are equivalent */
+	ccf0 = ccfs0;
+	ccf1 = ccfs1;
+	if (!ccfs1 && ieee80211_hw_check(hw, SUPPORTS_VHT_EXT_NSS_BW))
+		ccf1 = ccfs2;
+
+	cf0 = ieee80211_channel_to_frequency(ccf0, chandef->chan->band);
+	cf1 = ieee80211_channel_to_frequency(ccf1, chandef->chan->band);
 
 	switch (oper->chan_width) {
 	case IEEE80211_VHT_CHANWIDTH_USE_HT:
+		/* just use HT information directly */
 		break;
 	case IEEE80211_VHT_CHANWIDTH_80MHZ:
 		new.width = NL80211_CHAN_WIDTH_80;
-		new.center_freq1 = cf1;
+		new.center_freq1 = cf0;
 		/* If needed, adjust based on the newer interop workaround. */
-		if (oper->center_freq_seg1_idx) {
+		if (ccf1) {
 			unsigned int diff;
 
-			diff = abs(oper->center_freq_seg1_idx -
-				   oper->center_freq_seg0_idx);
+			diff = abs(ccf1 - ccf0);
 			if (diff == 8) {
 				new.width = NL80211_CHAN_WIDTH_160;
-				new.center_freq1 = cf2;
+				new.center_freq1 = cf1;
 			} else if (diff > 8) {
 				new.width = NL80211_CHAN_WIDTH_80P80;
-				new.center_freq2 = cf2;
+				new.center_freq2 = cf1;
 			}
 		}
 		break;
 	case IEEE80211_VHT_CHANWIDTH_160MHZ:
+		/* deprecated encoding */
 		new.width = NL80211_CHAN_WIDTH_160;
-		new.center_freq1 = cf1;
+		new.center_freq1 = cf0;
 		break;
 	case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
+		/* deprecated encoding */
 		new.width = NL80211_CHAN_WIDTH_80P80;
-		new.center_freq1 = cf1;
-		new.center_freq2 = cf2;
+		new.center_freq1 = cf0;
+		new.center_freq2 = cf1;
 		break;
 	default:
 		return false;
@@ -3519,7 +3856,7 @@
 	}
 
 	/* Always allow software iftypes */
-	if (local->hw.wiphy->software_iftypes & BIT(iftype)) {
+	if (cfg80211_iftype_allowed(local->hw.wiphy, iftype, 0, 1)) {
 		if (radar_detect)
 			return -EINVAL;
 		return 0;
@@ -3554,7 +3891,8 @@
 
 		if (sdata_iter == sdata ||
 		    !ieee80211_sdata_running(sdata_iter) ||
-		    local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype))
+		    cfg80211_iftype_allowed(local->hw.wiphy,
+					    wdev_iter->iftype, 0, 1))
 			continue;
 
 		params.iftype_num[wdev_iter->iftype]++;

diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 259325c..ccdcb9a 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c

@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * VHT handling
  *
  * Portions of this file
  * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Copyright (C) 2018 - 2019 Intel Corporation
  */
 
 #include <linux/ieee80211.h>
@@ -231,6 +229,13 @@
 	memcpy(&vht_cap->vht_mcs, &vht_cap_ie->supp_mcs,
 	       sizeof(struct ieee80211_vht_mcs_info));
 
+	/* copy EXT_NSS_BW Support value or remove the capability */
+	if (ieee80211_hw_check(&sdata->local->hw, SUPPORTS_VHT_EXT_NSS_BW))
+		vht_cap->cap |= (cap_info & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK);
+	else
+		vht_cap->vht_mcs.tx_highest &=
+			~cpu_to_le16(IEEE80211_VHT_EXT_NSS_BW_CAPABLE);
+
 	/* but also restrict MCSes */
 	for (i = 0; i < 8; i++) {
 		u16 own_rx, own_tx, peer_rx, peer_tx;
@@ -294,6 +299,18 @@
 		break;
 	default:
 		sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80;
+
+		if (!(vht_cap->vht_mcs.tx_highest &
+				cpu_to_le16(IEEE80211_VHT_EXT_NSS_BW_CAPABLE)))
+			break;
+
+		/*
+		 * If this is non-zero, then it does support 160 MHz after all,
+		 * in one form or the other. We don't distinguish here (or even
+		 * above) between 160 and 80+80 yet.
+		 */
+		if (cap_info & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK)
+			sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
 	}
 
 	sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta);
@@ -332,6 +349,14 @@
 	    cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)
 		return IEEE80211_STA_RX_BW_160;
 
+	/*
+	 * If this is non-zero, then it does support 160 MHz after all,
+	 * in one form or the other. We don't distinguish here (or even
+	 * above) between 160 and 80+80 yet.
+	 */
+	if (vht_cap->cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK)
+		return IEEE80211_STA_RX_BW_160;
+
 	return IEEE80211_STA_RX_BW_80;
 }
 

diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 73e8f34..b75c2c5 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Software WEP encryption implementation
  * Copyright 2002, Jouni Malinen <jkmaline@cc.hut.fi>
  * Copyright 2003, Instant802 Networks, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/netdevice.h>
@@ -30,30 +27,9 @@
 	/* start WEP IV from a random value */
 	get_random_bytes(&local->wep_iv, IEEE80211_WEP_IV_LEN);
 
-	local->wep_tx_tfm = crypto_alloc_cipher("arc4", 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(local->wep_tx_tfm)) {
-		local->wep_rx_tfm = ERR_PTR(-EINVAL);
-		return PTR_ERR(local->wep_tx_tfm);
-	}
-
-	local->wep_rx_tfm = crypto_alloc_cipher("arc4", 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(local->wep_rx_tfm)) {
-		crypto_free_cipher(local->wep_tx_tfm);
-		local->wep_tx_tfm = ERR_PTR(-EINVAL);
-		return PTR_ERR(local->wep_rx_tfm);
-	}
-
 	return 0;
 }
 
-void ieee80211_wep_free(struct ieee80211_local *local)
-{
-	if (!IS_ERR(local->wep_tx_tfm))
-		crypto_free_cipher(local->wep_tx_tfm);
-	if (!IS_ERR(local->wep_rx_tfm))
-		crypto_free_cipher(local->wep_rx_tfm);
-}
-
 static inline bool ieee80211_wep_weak_iv(u32 iv, int keylen)
 {
 	/*
@@ -131,21 +107,17 @@
 /* Perform WEP encryption using given key. data buffer must have tailroom
  * for 4-byte ICV. data_len must not include this ICV. Note: this function
  * does _not_ add IV. data = RC4(data | CRC32(data)) */
-int ieee80211_wep_encrypt_data(struct crypto_cipher *tfm, u8 *rc4key,
+int ieee80211_wep_encrypt_data(struct arc4_ctx *ctx, u8 *rc4key,
 			       size_t klen, u8 *data, size_t data_len)
 {
 	__le32 icv;
-	int i;
-
-	if (IS_ERR(tfm))
-		return -1;
 
 	icv = cpu_to_le32(~crc32_le(~0, data, data_len));
 	put_unaligned(icv, (__le32 *)(data + data_len));
 
-	crypto_cipher_setkey(tfm, rc4key, klen);
-	for (i = 0; i < data_len + IEEE80211_WEP_ICV_LEN; i++)
-		crypto_cipher_encrypt_one(tfm, data + i, data + i);
+	arc4_setkey(ctx, rc4key, klen);
+	arc4_crypt(ctx, data, data, data_len + IEEE80211_WEP_ICV_LEN);
+	memzero_explicit(ctx, sizeof(*ctx));
 
 	return 0;
 }
@@ -184,7 +156,7 @@
 	/* Add room for ICV */
 	skb_put(skb, IEEE80211_WEP_ICV_LEN);
 
-	return ieee80211_wep_encrypt_data(local->wep_tx_tfm, rc4key, keylen + 3,
+	return ieee80211_wep_encrypt_data(&local->wep_tx_ctx, rc4key, keylen + 3,
 					  iv + IEEE80211_WEP_IV_LEN, len);
 }
 
@@ -192,18 +164,14 @@
 /* Perform WEP decryption using given key. data buffer includes encrypted
  * payload, including 4-byte ICV, but _not_ IV. data_len must not include ICV.
  * Return 0 on success and -1 on ICV mismatch. */
-int ieee80211_wep_decrypt_data(struct crypto_cipher *tfm, u8 *rc4key,
+int ieee80211_wep_decrypt_data(struct arc4_ctx *ctx, u8 *rc4key,
 			       size_t klen, u8 *data, size_t data_len)
 {
 	__le32 crc;
-	int i;
 
-	if (IS_ERR(tfm))
-		return -1;
-
-	crypto_cipher_setkey(tfm, rc4key, klen);
-	for (i = 0; i < data_len + IEEE80211_WEP_ICV_LEN; i++)
-		crypto_cipher_decrypt_one(tfm, data + i, data + i);
+	arc4_setkey(ctx, rc4key, klen);
+	arc4_crypt(ctx, data, data, data_len + IEEE80211_WEP_ICV_LEN);
+	memzero_explicit(ctx, sizeof(*ctx));
 
 	crc = cpu_to_le32(~crc32_le(~0, data, data_len));
 	if (memcmp(&crc, data + data_len, IEEE80211_WEP_ICV_LEN) != 0)
@@ -256,7 +224,7 @@
 	/* Copy rest of the WEP key (the secret part) */
 	memcpy(rc4key + 3, key->conf.key, key->conf.keylen);
 
-	if (ieee80211_wep_decrypt_data(local->wep_rx_tfm, rc4key, klen,
+	if (ieee80211_wep_decrypt_data(&local->wep_rx_ctx, rc4key, klen,
 				       skb->data + hdrlen +
 				       IEEE80211_WEP_IV_LEN, len))
 		ret = -1;

diff --git a/net/mac80211/wep.h b/net/mac80211/wep.h
index 9615749..997a034 100644
--- a/net/mac80211/wep.h
+++ b/net/mac80211/wep.h

@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Software WEP encryption implementation
  * Copyright 2002, Jouni Malinen <jkmaline@cc.hut.fi>
  * Copyright 2003, Instant802 Networks, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef WEP_H
@@ -17,13 +14,12 @@
 #include "key.h"
 
 int ieee80211_wep_init(struct ieee80211_local *local);
-void ieee80211_wep_free(struct ieee80211_local *local);
-int ieee80211_wep_encrypt_data(struct crypto_cipher *tfm, u8 *rc4key,
+int ieee80211_wep_encrypt_data(struct arc4_ctx *ctx, u8 *rc4key,
 				size_t klen, u8 *data, size_t data_len);
 int ieee80211_wep_encrypt(struct ieee80211_local *local,
 			  struct sk_buff *skb,
 			  const u8 *key, int keylen, int keyidx);
-int ieee80211_wep_decrypt_data(struct crypto_cipher *tfm, u8 *rc4key,
+int ieee80211_wep_decrypt_data(struct arc4_ctx *ctx, u8 *rc4key,
 			       size_t klen, u8 *data, size_t data_len);
 
 ieee80211_rx_result

diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 5f7c963..72920d8 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2004, Instant802 Networks, Inc.
  * Copyright 2013-2014  Intel Mobile Communications GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/netdevice.h>
@@ -141,71 +138,24 @@
 	return ieee80211_downgrade_queue(sdata, NULL, skb);
 }
 
-/* Indicate which queue to use. */
-u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata,
-			   struct sk_buff *skb)
+u16 __ieee80211_select_queue(struct ieee80211_sub_if_data *sdata,
+			     struct sta_info *sta, struct sk_buff *skb)
 {
-	struct ieee80211_local *local = sdata->local;
-	struct sta_info *sta = NULL;
-	const u8 *ra = NULL;
-	bool qos = false;
 	struct mac80211_qos_map *qos_map;
-	u16 ret;
+	bool qos;
 
-	if (local->hw.queues < IEEE80211_NUM_ACS || skb->len < 6) {
-		skb->priority = 0; /* required for correct WPA/11i MIC */
-		return 0;
-	}
-
-	rcu_read_lock();
-	switch (sdata->vif.type) {
-	case NL80211_IFTYPE_AP_VLAN:
-		sta = rcu_dereference(sdata->u.vlan.sta);
-		if (sta) {
-			qos = sta->sta.wme;
-			break;
-		}
-		/* fall through */
-	case NL80211_IFTYPE_AP:
-		ra = skb->data;
-		break;
-	case NL80211_IFTYPE_WDS:
-		ra = sdata->u.wds.remote_addr;
-		break;
-#ifdef CONFIG_MAC80211_MESH
-	case NL80211_IFTYPE_MESH_POINT:
+	/* all mesh/ocb stations are required to support WME */
+	if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT ||
+	    sdata->vif.type == NL80211_IFTYPE_OCB)
 		qos = true;
-		break;
-#endif
-	case NL80211_IFTYPE_STATION:
-		/* might be a TDLS station */
-		sta = sta_info_get(sdata, skb->data);
-		if (sta)
-			qos = sta->sta.wme;
-
-		ra = sdata->u.mgd.bssid;
-		break;
-	case NL80211_IFTYPE_ADHOC:
-		ra = skb->data;
-		break;
-	case NL80211_IFTYPE_OCB:
-		/* all stations are required to support WME */
-		qos = true;
-		break;
-	default:
-		break;
-	}
-
-	if (!sta && ra && !is_multicast_ether_addr(ra)) {
-		sta = sta_info_get(sdata, ra);
-		if (sta)
-			qos = sta->sta.wme;
-	}
+	else if (sta)
+		qos = sta->sta.wme;
+	else
+		qos = false;
 
 	if (!qos) {
 		skb->priority = 0; /* required for correct WPA/11i MIC */
-		ret = IEEE80211_AC_BE;
-		goto out;
+		return IEEE80211_AC_BE;
 	}
 
 	if (skb->protocol == sdata->control_port_protocol) {
@@ -220,8 +170,61 @@
 					       &qos_map->qos_map : NULL);
 
  downgrade:
-	ret = ieee80211_downgrade_queue(sdata, sta, skb);
- out:
+	return ieee80211_downgrade_queue(sdata, sta, skb);
+}
+
+
+/* Indicate which queue to use. */
+u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata,
+			   struct sk_buff *skb)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct sta_info *sta = NULL;
+	const u8 *ra = NULL;
+	u16 ret;
+
+	/* when using iTXQ, we can do this later */
+	if (local->ops->wake_tx_queue)
+		return 0;
+
+	if (local->hw.queues < IEEE80211_NUM_ACS || skb->len < 6) {
+		skb->priority = 0; /* required for correct WPA/11i MIC */
+		return 0;
+	}
+
+	rcu_read_lock();
+	switch (sdata->vif.type) {
+	case NL80211_IFTYPE_AP_VLAN:
+		sta = rcu_dereference(sdata->u.vlan.sta);
+		if (sta)
+			break;
+		/* fall through */
+	case NL80211_IFTYPE_AP:
+		ra = skb->data;
+		break;
+	case NL80211_IFTYPE_WDS:
+		ra = sdata->u.wds.remote_addr;
+		break;
+	case NL80211_IFTYPE_STATION:
+		/* might be a TDLS station */
+		sta = sta_info_get(sdata, skb->data);
+		if (sta)
+			break;
+
+		ra = sdata->u.mgd.bssid;
+		break;
+	case NL80211_IFTYPE_ADHOC:
+		ra = skb->data;
+		break;
+	default:
+		break;
+	}
+
+	if (!sta && ra && !is_multicast_ether_addr(ra))
+		sta = sta_info_get(sdata, ra);
+
+	ret = __ieee80211_select_queue(sdata, sta, skb);
+
 	rcu_read_unlock();
 	return ret;
 }

diff --git a/net/mac80211/wme.h b/net/mac80211/wme.h
index 80151ed..2e3dec0 100644
--- a/net/mac80211/wme.h
+++ b/net/mac80211/wme.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright 2004, Instant802 Networks, Inc.
  * Copyright 2005, Devicescape Software, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef _WME_H
@@ -16,6 +13,8 @@
 u16 ieee80211_select_queue_80211(struct ieee80211_sub_if_data *sdata,
 				 struct sk_buff *skb,
 				 struct ieee80211_hdr *hdr);
+u16 __ieee80211_select_queue(struct ieee80211_sub_if_data *sdata,
+			     struct sta_info *sta, struct sk_buff *skb);
 u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata,
 			   struct sk_buff *skb);
 void ieee80211_set_qos_hdr(struct ieee80211_sub_if_data *sdata,

diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 58d0b25..91bf32a 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2002-2004, Instant802 Networks, Inc.
  * Copyright 2008, Jouni Malinen <j@w1.fi>
  * Copyright (C) 2016-2017 Intel Deutschland GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/netdevice.h>
@@ -242,7 +239,7 @@
 	/* Add room for ICV */
 	skb_put(skb, IEEE80211_TKIP_ICV_LEN);
 
-	return ieee80211_tkip_encrypt_data(tx->local->wep_tx_tfm,
+	return ieee80211_tkip_encrypt_data(&tx->local->wep_tx_ctx,
 					   key, skb, pos, len);
 }
 
@@ -293,7 +290,7 @@
 	if (status->flag & RX_FLAG_DECRYPTED)
 		hwaccel = 1;
 
-	res = ieee80211_tkip_decrypt_data(rx->local->wep_rx_tfm,
+	res = ieee80211_tkip_decrypt_data(&rx->local->wep_rx_ctx,
 					  key, skb->data + hdrlen,
 					  skb->len - hdrlen, rx->sta->sta.addr,
 					  hdr->addr1, hwaccel, rx->security_idx,
@@ -949,7 +946,8 @@
 
 	info = IEEE80211_SKB_CB(skb);
 
-	if (info->control.hw_key)
+	if (info->control.hw_key &&
+	    !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIE))
 		return TX_CONTINUE;
 
 	if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie)))
@@ -965,6 +963,9 @@
 
 	bip_ipn_set64(mmie->sequence_number, pn64);
 
+	if (info->control.hw_key)
+		return TX_CONTINUE;
+
 	bip_aad(skb, aad);
 
 	/*
@@ -1175,7 +1176,7 @@
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 	struct ieee80211_key *key = rx->key;
 	struct ieee80211_mmie_16 *mmie;
-	u8 aad[GMAC_AAD_LEN], mic[GMAC_MIC_LEN], ipn[6], nonce[GMAC_NONCE_LEN];
+	u8 aad[GMAC_AAD_LEN], *mic, ipn[6], nonce[GMAC_NONCE_LEN];
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 
 	if (!ieee80211_is_mgmt(hdr->frame_control))
@@ -1206,13 +1207,18 @@
 		memcpy(nonce, hdr->addr2, ETH_ALEN);
 		memcpy(nonce + ETH_ALEN, ipn, 6);
 
+		mic = kmalloc(GMAC_MIC_LEN, GFP_ATOMIC);
+		if (!mic)
+			return RX_DROP_UNUSABLE;
 		if (ieee80211_aes_gmac(key->u.aes_gmac.tfm, aad, nonce,
 				       skb->data + 24, skb->len - 24,
 				       mic) < 0 ||
 		    crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) {
 			key->u.aes_gmac.icverrors++;
+			kfree(mic);
 			return RX_DROP_UNUSABLE;
 		}
+		kfree(mic);
 	}
 
 	memcpy(key->u.aes_gmac.rx_pn, ipn, 6);

diff --git a/net/mac80211/wpa.h b/net/mac80211/wpa.h
index d98011e..af32722 100644
--- a/net/mac80211/wpa.h
+++ b/net/mac80211/wpa.h

@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright 2002-2004, Instant802 Networks, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef WPA_H

diff --git a/net/mac802154/Kconfig b/net/mac802154/Kconfig
index fb45287..742624e 100644
--- a/net/mac802154/Kconfig
+++ b/net/mac802154/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config MAC802154
 	tristate "Generic IEEE 802.15.4 Soft Networking Stack (mac802154)"
 	depends on IEEE802154

diff --git a/net/mac802154/Makefile b/net/mac802154/Makefile
index 5857bb1..4059295 100644
--- a/net/mac802154/Makefile
+++ b/net/mac802154/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_MAC802154)	+= mac802154.o
 mac802154-objs		:= main.o rx.o tx.o mac_cmd.o mib.o \
 			   iface.o llsec.o util.o cfg.o trace.o

diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c
index 57b5e94..fbeebe3 100644
--- a/net/mac802154/cfg.c
+++ b/net/mac802154/cfg.c

@@ -1,11 +1,5 @@
-/* This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+// SPDX-License-Identifier: GPL-2.0-only
+/*
  *
  * Authors:
  * Alexander Aring <aar@pengutronix.de>

diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h
index 62141dc..702560a 100644
--- a/net/mac802154/ieee802154_i.h
+++ b/net/mac802154/ieee802154_i.h

@@ -1,15 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2007-2012 Siemens AG
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by:
  * Pavel Smolenskiy <pavel.smolenskiy@gmail.com>
  * Maxim Gorbachyov <maxim.gorbachev@siemens.com>

diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index bd88a9b..1cf5ac0 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c

@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2007-2012 Siemens AG
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by:
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
  * Sergey Lapin <slapin@ossfans.org>

diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 2fb703d..c079ee6 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c

@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2014 Fraunhofer ITWM
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by:
  * Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
  */
@@ -146,18 +138,18 @@
 			goto err_tfm;
 	}
 
-	key->tfm0 = crypto_alloc_skcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC);
+	key->tfm0 = crypto_alloc_sync_skcipher("ctr(aes)", 0, 0);
 	if (IS_ERR(key->tfm0))
 		goto err_tfm;
 
-	if (crypto_skcipher_setkey(key->tfm0, template->key,
+	if (crypto_sync_skcipher_setkey(key->tfm0, template->key,
 				   IEEE802154_LLSEC_KEY_SIZE))
 		goto err_tfm0;
 
 	return key;
 
 err_tfm0:
-	crypto_free_skcipher(key->tfm0);
+	crypto_free_sync_skcipher(key->tfm0);
 err_tfm:
 	for (i = 0; i < ARRAY_SIZE(key->tfm); i++)
 		if (key->tfm[i])
@@ -177,7 +169,7 @@
 	for (i = 0; i < ARRAY_SIZE(key->tfm); i++)
 		crypto_free_aead(key->tfm[i]);
 
-	crypto_free_skcipher(key->tfm0);
+	crypto_free_sync_skcipher(key->tfm0);
 	kzfree(key);
 }
 
@@ -622,7 +614,7 @@
 {
 	u8 iv[16];
 	struct scatterlist src;
-	SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
 	int err, datalen;
 	unsigned char *data;
 
@@ -632,7 +624,7 @@
 	datalen = skb_tail_pointer(skb) - data;
 	sg_init_one(&src, data, datalen);
 
-	skcipher_request_set_tfm(req, key->tfm0);
+	skcipher_request_set_sync_tfm(req, key->tfm0);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, &src, &src, datalen, iv);
 	err = crypto_skcipher_encrypt(req);
@@ -840,7 +832,7 @@
 	unsigned char *data;
 	int datalen;
 	struct scatterlist src;
-	SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
 	int err;
 
 	llsec_geniv(iv, dev_addr, &hdr->sec);
@@ -849,7 +841,7 @@
 
 	sg_init_one(&src, data, datalen);
 
-	skcipher_request_set_tfm(req, key->tfm0);
+	skcipher_request_set_sync_tfm(req, key->tfm0);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, &src, &src, datalen, iv);
 

diff --git a/net/mac802154/llsec.h b/net/mac802154/llsec.h
index 6f3b658..ddeb792 100644
--- a/net/mac802154/llsec.h
+++ b/net/mac802154/llsec.h

@@ -1,15 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2014 Fraunhofer ITWM
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by:
  * Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
  */
@@ -29,7 +21,7 @@
 
 	/* one tfm for each authsize (4/8/16) */
 	struct crypto_aead *tfm[3];
-	struct crypto_skcipher *tfm0;
+	struct crypto_sync_skcipher *tfm0;
 
 	struct kref ref;
 };

diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c
index 3db1634..8ea5b64 100644
--- a/net/mac802154/mac_cmd.c
+++ b/net/mac802154/mac_cmd.c

@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * MAC commands interface
  *
  * Copyright 2007-2012 Siemens AG
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by:
  * Sergey Lapin <slapin@ossfans.org>
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>

diff --git a/net/mac802154/main.c b/net/mac802154/main.c
index 87da85a..06ea0f8 100644
--- a/net/mac802154/main.c
+++ b/net/mac802154/main.c

@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2007-2012 Siemens AG
  *
  * Written by:
  * Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/kernel.h>

diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c
index 73f94fb..81666e1 100644
--- a/net/mac802154/mib.c
+++ b/net/mac802154/mib.c

@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2007-2012 Siemens AG
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by:
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
  * Sergey Lapin <slapin@ossfans.org>

diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index 4dcf6e1..b8ce846 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c

@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2007-2012 Siemens AG
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by:
  * Pavel Smolenskiy <pavel.smolenskiy@gmail.com>
  * Maxim Gorbachyov <maxim.gorbachev@siemens.com>

diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index bcd1a5e..ab52811 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c

@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2007-2012 Siemens AG
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by:
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
  * Sergey Lapin <slapin@ossfans.org>

diff --git a/net/mac802154/util.c b/net/mac802154/util.c
index 7c03fb0..f207823 100644
--- a/net/mac802154/util.c
+++ b/net/mac802154/util.c

@@ -1,11 +1,5 @@
-/* This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+// SPDX-License-Identifier: GPL-2.0-only
+/*
  *
  * Authors:
  * Alexander Aring <aar@pengutronix.de>

diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig
index 801ea90..d1ad69b 100644
--- a/net/mpls/Kconfig
+++ b/net/mpls/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # MPLS configuration
 #
@@ -25,6 +26,7 @@
 config MPLS_ROUTING
 	tristate "MPLS: routing support"
 	depends on NET_IP_TUNNEL || NET_IP_TUNNEL=n
+	depends on PROC_SYSCTL
 	---help---
 	 Add support for forwarding of mpls packets.
 

diff --git a/net/mpls/Makefile b/net/mpls/Makefile
index 9ca9236..53e33b6 100644
--- a/net/mpls/Makefile
+++ b/net/mpls/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for MPLS.
 #

diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 8fbe6cd..c312741 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/types.h>
 #include <linux/skbuff.h>
 #include <linux/socket.h>
@@ -22,8 +23,8 @@
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6.h>
 #endif
-#include <net/addrconf.h>
-#include <net/nexthop.h>
+#include <net/ipv6_stubs.h>
+#include <net/rtnh.h>
 #include "internal.h"
 
 /* max memory we will use for mpls_route */
@@ -36,8 +37,6 @@
 
 #define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1)
 
-static int zero = 0;
-static int one = 1;
 static int label_limit = (1 << 20) - 1;
 static int ttl_max = 255;
 
@@ -1209,21 +1208,59 @@
 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
 };
 
+static int mpls_netconf_valid_get_req(struct sk_buff *skb,
+				      const struct nlmsghdr *nlh,
+				      struct nlattr **tb,
+				      struct netlink_ext_ack *extack)
+{
+	int i, err;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Invalid header for netconf get request");
+		return -EINVAL;
+	}
+
+	if (!netlink_strict_get_check(skb))
+		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
+					      tb, NETCONFA_MAX,
+					      devconf_mpls_policy, extack);
+
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
+					    tb, NETCONFA_MAX,
+					    devconf_mpls_policy, extack);
+	if (err)
+		return err;
+
+	for (i = 0; i <= NETCONFA_MAX; i++) {
+		if (!tb[i])
+			continue;
+
+		switch (i) {
+		case NETCONFA_IFINDEX:
+			break;
+		default:
+			NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in netconf get request");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int mpls_netconf_get_devconf(struct sk_buff *in_skb,
 				    struct nlmsghdr *nlh,
 				    struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(in_skb->sk);
 	struct nlattr *tb[NETCONFA_MAX + 1];
-	struct netconfmsg *ncm;
 	struct net_device *dev;
 	struct mpls_dev *mdev;
 	struct sk_buff *skb;
 	int ifindex;
 	int err;
 
-	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
-			  devconf_mpls_policy, NULL);
+	err = mpls_netconf_valid_get_req(in_skb, nlh, tb, extack);
 	if (err < 0)
 		goto errout;
 
@@ -1263,6 +1300,7 @@
 static int mpls_netconf_dump_devconf(struct sk_buff *skb,
 				     struct netlink_callback *cb)
 {
+	const struct nlmsghdr *nlh = cb->nlh;
 	struct net *net = sock_net(skb->sk);
 	struct hlist_head *head;
 	struct net_device *dev;
@@ -1270,6 +1308,21 @@
 	int idx, s_idx;
 	int h, s_h;
 
+	if (cb->strict_check) {
+		struct netlink_ext_ack *extack = cb->extack;
+		struct netconfmsg *ncm;
+
+		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
+			NL_SET_ERR_MSG_MOD(extack, "Invalid header for netconf dump request");
+			return -EINVAL;
+		}
+
+		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
+			NL_SET_ERR_MSG_MOD(extack, "Invalid data after header in netconf dump request");
+			return -EINVAL;
+		}
+	}
+
 	s_h = cb->args[0];
 	s_idx = idx = cb->args[1];
 
@@ -1286,7 +1339,7 @@
 				goto cont;
 			if (mpls_netconf_fill_devconf(skb, mdev,
 						      NETLINK_CB(cb->skb).portid,
-						      cb->nlh->nlmsg_seq,
+						      nlh->nlmsg_seq,
 						      RTM_NEWNETCONF,
 						      NLM_F_MULTI,
 						      NETCONFA_ALL) < 0) {
@@ -1736,8 +1789,8 @@
 	int index;
 	int err;
 
-	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy,
-			  extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
+				     rtm_mpls_policy, extack);
 	if (err < 0)
 		goto errout;
 
@@ -1822,6 +1875,9 @@
 				goto errout;
 			break;
 		}
+		case RTA_GATEWAY:
+			NL_SET_ERR_MSG(extack, "MPLS does not support RTA_GATEWAY attribute");
+			goto errout;
 		case RTA_VIA:
 		{
 			if (nla_get_via(nla, &cfg->rc_via_alen,
@@ -1962,7 +2018,7 @@
 		u8 linkdown = 0;
 		u8 dead = 0;
 
-		mp = nla_nest_start(skb, RTA_MULTIPATH);
+		mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
 		if (!mp)
 			goto nla_put_failure;
 
@@ -2015,30 +2071,140 @@
 	return -EMSGSIZE;
 }
 
+#if IS_ENABLED(CONFIG_INET)
+static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
+				   struct fib_dump_filter *filter,
+				   struct netlink_callback *cb)
+{
+	return ip_valid_fib_dump_req(net, nlh, filter, cb);
+}
+#else
+static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
+				   struct fib_dump_filter *filter,
+				   struct netlink_callback *cb)
+{
+	struct netlink_ext_ack *extack = cb->extack;
+	struct nlattr *tb[RTA_MAX + 1];
+	struct rtmsg *rtm;
+	int err, i;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid header for FIB dump request");
+		return -EINVAL;
+	}
+
+	rtm = nlmsg_data(nlh);
+	if (rtm->rtm_dst_len || rtm->rtm_src_len  || rtm->rtm_tos   ||
+	    rtm->rtm_table   || rtm->rtm_scope    || rtm->rtm_type  ||
+	    rtm->rtm_flags) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for FIB dump request");
+		return -EINVAL;
+	}
+
+	if (rtm->rtm_protocol) {
+		filter->protocol = rtm->rtm_protocol;
+		filter->filter_set = 1;
+		cb->answer_flags = NLM_F_DUMP_FILTERED;
+	}
+
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
+					    rtm_mpls_policy, extack);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i <= RTA_MAX; ++i) {
+		int ifindex;
+
+		if (i == RTA_OIF) {
+			ifindex = nla_get_u32(tb[i]);
+			filter->dev = __dev_get_by_index(net, ifindex);
+			if (!filter->dev)
+				return -ENODEV;
+			filter->filter_set = 1;
+		} else if (tb[i]) {
+			NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in dump request");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+#endif
+
+static bool mpls_rt_uses_dev(struct mpls_route *rt,
+			     const struct net_device *dev)
+{
+	struct net_device *nh_dev;
+
+	if (rt->rt_nhn == 1) {
+		struct mpls_nh *nh = rt->rt_nh;
+
+		nh_dev = rtnl_dereference(nh->nh_dev);
+		if (dev == nh_dev)
+			return true;
+	} else {
+		for_nexthops(rt) {
+			nh_dev = rtnl_dereference(nh->nh_dev);
+			if (nh_dev == dev)
+				return true;
+		} endfor_nexthops(rt);
+	}
+
+	return false;
+}
+
 static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	const struct nlmsghdr *nlh = cb->nlh;
 	struct net *net = sock_net(skb->sk);
 	struct mpls_route __rcu **platform_label;
+	struct fib_dump_filter filter = {};
+	unsigned int flags = NLM_F_MULTI;
 	size_t platform_labels;
 	unsigned int index;
 
 	ASSERT_RTNL();
 
+	if (cb->strict_check) {
+		int err;
+
+		err = mpls_valid_fib_dump_req(net, nlh, &filter, cb);
+		if (err < 0)
+			return err;
+
+		/* for MPLS, there is only 1 table with fixed type and flags.
+		 * If either are set in the filter then return nothing.
+		 */
+		if ((filter.table_id && filter.table_id != RT_TABLE_MAIN) ||
+		    (filter.rt_type && filter.rt_type != RTN_UNICAST) ||
+		     filter.flags)
+			return skb->len;
+	}
+
 	index = cb->args[0];
 	if (index < MPLS_LABEL_FIRST_UNRESERVED)
 		index = MPLS_LABEL_FIRST_UNRESERVED;
 
 	platform_label = rtnl_dereference(net->mpls.platform_label);
 	platform_labels = net->mpls.platform_labels;
+
+	if (filter.filter_set)
+		flags |= NLM_F_DUMP_FILTERED;
+
 	for (; index < platform_labels; index++) {
 		struct mpls_route *rt;
+
 		rt = rtnl_dereference(platform_label[index]);
 		if (!rt)
 			continue;
 
+		if ((filter.dev && !mpls_rt_uses_dev(rt, filter.dev)) ||
+		    (filter.protocol && rt->rt_protocol != filter.protocol))
+			continue;
+
 		if (mpls_dump_route(skb, NETLINK_CB(cb->skb).portid,
 				    cb->nlh->nlmsg_seq, RTM_NEWROUTE,
-				    index, rt, NLM_F_MULTI) < 0)
+				    index, rt, flags) < 0)
 			break;
 	}
 	cb->args[0] = index;
@@ -2110,6 +2276,64 @@
 		rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err);
 }
 
+static int mpls_valid_getroute_req(struct sk_buff *skb,
+				   const struct nlmsghdr *nlh,
+				   struct nlattr **tb,
+				   struct netlink_ext_ack *extack)
+{
+	struct rtmsg *rtm;
+	int i, err;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Invalid header for get route request");
+		return -EINVAL;
+	}
+
+	if (!netlink_strict_get_check(skb))
+		return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
+					      rtm_mpls_policy, extack);
+
+	rtm = nlmsg_data(nlh);
+	if ((rtm->rtm_dst_len && rtm->rtm_dst_len != 20) ||
+	    rtm->rtm_src_len || rtm->rtm_tos || rtm->rtm_table ||
+	    rtm->rtm_protocol || rtm->rtm_scope || rtm->rtm_type) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
+		return -EINVAL;
+	}
+	if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Invalid flags for get route request");
+		return -EINVAL;
+	}
+
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
+					    rtm_mpls_policy, extack);
+	if (err)
+		return err;
+
+	if ((tb[RTA_DST] || tb[RTA_NEWDST]) && !rtm->rtm_dst_len) {
+		NL_SET_ERR_MSG_MOD(extack, "rtm_dst_len must be 20 for MPLS");
+		return -EINVAL;
+	}
+
+	for (i = 0; i <= RTA_MAX; i++) {
+		if (!tb[i])
+			continue;
+
+		switch (i) {
+		case RTA_DST:
+		case RTA_NEWDST:
+			break;
+		default:
+			NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
 			 struct netlink_ext_ack *extack)
 {
@@ -2129,8 +2353,7 @@
 	u8 n_labels;
 	int err;
 
-	err = nlmsg_parse(in_nlh, sizeof(*rtm), tb, RTA_MAX,
-			  rtm_mpls_policy, extack);
+	err = mpls_valid_getroute_req(in_skb, in_nlh, tb, extack);
 	if (err < 0)
 		goto errout;
 
@@ -2382,7 +2605,7 @@
 		.data		= &platform_labels,
 		.maxlen		= sizeof(int),
 		.mode		= table->mode,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 		.extra2		= &label_limit,
 	};
 
@@ -2411,8 +2634,8 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &one,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
 	},
 	{
 		.procname	= "default_ttl",
@@ -2420,7 +2643,7 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
+		.extra1		= SYSCTL_ONE,
 		.extra2		= &ttl_max,
 	},
 	{ }

diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index b4da6d8..b169014 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	MPLS GSO Support
  *
  *	Authors: Simon Horman (horms@verge.net.au)
  *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
  *	Based on: GSO portions of net/ipv4/gre.c
  */
 

diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 94f53a9..44b6750 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c

@@ -1,14 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * mpls tunnels	An implementation mpls tunnels using the light weight tunnel
  *		infrastructure
  *
  * Authors:	Roopa Prabhu, <roopa@cumulusnetworks.com>
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  */
 #include <linux/types.h>
 #include <linux/skbuff.h>
@@ -28,7 +23,7 @@
 #include "internal.h"
 
 static const struct nla_policy mpls_iptunnel_policy[MPLS_IPTUNNEL_MAX + 1] = {
-	[MPLS_IPTUNNEL_DST]	= { .type = NLA_U32 },
+	[MPLS_IPTUNNEL_DST]	= { .len = sizeof(u32) },
 	[MPLS_IPTUNNEL_TTL]	= { .type = NLA_U8 },
 };
 
@@ -137,12 +132,22 @@
 
 	mpls_stats_inc_outucastpkts(out_dev, skb);
 
-	if (rt)
-		err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gateway,
-				 skb);
-	else if (rt6)
-		err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt6->rt6i_gateway,
-				 skb);
+	if (rt) {
+		if (rt->rt_gw_family == AF_INET6)
+			err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt->rt_gw6,
+					 skb);
+		else
+			err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gw4,
+					 skb);
+	} else if (rt6) {
+		if (ipv6_addr_v4mapped(&rt6->rt6i_gateway)) {
+			/* 6PE (RFC 4798) */
+			err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt6->rt6i_gateway.s6_addr32[3],
+					 skb);
+		} else
+			err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt6->rt6i_gateway,
+					 skb);
+	}
 	if (err)
 		net_dbg_ratelimited("%s: packet transmission failed: %d\n",
 				    __func__, err);
@@ -168,8 +173,8 @@
 	u8 n_labels;
 	int ret;
 
-	ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla,
-			       mpls_iptunnel_policy, extack);
+	ret = nla_parse_nested_deprecated(tb, MPLS_IPTUNNEL_MAX, nla,
+					  mpls_iptunnel_policy, extack);
 	if (ret < 0)
 		return ret;
 
@@ -183,8 +188,8 @@
 			   &n_labels, NULL, extack))
 		return -EINVAL;
 
-	newts = lwtunnel_state_alloc(sizeof(*tun_encap_info) +
-				     n_labels * sizeof(u32));
+	newts = lwtunnel_state_alloc(struct_size(tun_encap_info, label,
+						 n_labels));
 	if (!newts)
 		return -ENOMEM;
 

diff --git a/net/ncsi/Kconfig b/net/ncsi/Kconfig
index 08a8a60..2f1e575 100644
--- a/net/ncsi/Kconfig
+++ b/net/ncsi/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Configuration for NCSI support
 #
@@ -10,3 +11,9 @@
 	  support. Enable this only if your system connects to a network
 	  device via NCSI and the ethernet driver you're using supports
 	  the protocol explicitly.
+config NCSI_OEM_CMD_GET_MAC
+	bool "Get NCSI OEM MAC Address"
+	depends on NET_NCSI
+	---help---
+	  This allows to get MAC address from NCSI firmware and set them back to
+		controller.

diff --git a/net/ncsi/Makefile b/net/ncsi/Makefile
index 436ef68..e205f3b 100644
--- a/net/ncsi/Makefile
+++ b/net/ncsi/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for NCSI API
 #

diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 8055e39..ad3fd7f 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h

@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright Gavin Shan, IBM Corporation 2016.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __NCSI_INTERNAL_H__
@@ -68,6 +64,22 @@
 	NCSI_MODE_MAX
 };
 
+/* OEM Vendor Manufacture ID */
+#define NCSI_OEM_MFR_MLX_ID             0x8119
+#define NCSI_OEM_MFR_BCM_ID             0x113d
+/* Broadcom specific OEM Command */
+#define NCSI_OEM_BCM_CMD_GMA            0x01   /* CMD ID for Get MAC */
+/* Mellanox specific OEM Command */
+#define NCSI_OEM_MLX_CMD_GMA            0x00   /* CMD ID for Get MAC */
+#define NCSI_OEM_MLX_CMD_GMA_PARAM      0x1b   /* Parameter for GMA  */
+/* OEM Command payload lengths*/
+#define NCSI_OEM_BCM_CMD_GMA_LEN        12
+#define NCSI_OEM_MLX_CMD_GMA_LEN        8
+/* Mac address offset in OEM response */
+#define BCM_MAC_ADDR_OFFSET             28
+#define MLX_MAC_ADDR_OFFSET             8
+
+
 struct ncsi_channel_version {
 	u32 version;		/* Supported BCD encoded NCSI version */
 	u32 alpha2;		/* Supported BCD encoded NCSI version */
@@ -171,6 +183,8 @@
 #define NCSI_RESERVED_CHANNEL	0x1f
 #define NCSI_CHANNEL_INDEX(c)	((c) & ((1 << NCSI_PACKAGE_SHIFT) - 1))
 #define NCSI_TO_CHANNEL(p, c)	(((p) << NCSI_PACKAGE_SHIFT) | (c))
+#define NCSI_MAX_PACKAGE	8
+#define NCSI_MAX_CHANNEL	32
 
 struct ncsi_channel {
 	unsigned char               id;
@@ -209,6 +223,10 @@
 	unsigned int         channel_num; /* Number of channels     */
 	struct list_head     channels;    /* List of chanels        */
 	struct list_head     node;        /* Form list of packages  */
+
+	bool                 multi_channel; /* Enable multiple channels  */
+	u32                  channel_whitelist; /* Channels to configure */
+	struct ncsi_channel  *preferred_channel; /* Primary channel      */
 };
 
 struct ncsi_request {
@@ -216,11 +234,15 @@
 	bool                 used;    /* Request that has been assigned  */
 	unsigned int         flags;   /* NCSI request property           */
 #define NCSI_REQ_FLAG_EVENT_DRIVEN	1
+#define NCSI_REQ_FLAG_NETLINK_DRIVEN	2
 	struct ncsi_dev_priv *ndp;    /* Associated NCSI device          */
 	struct sk_buff       *cmd;    /* Associated NCSI command packet  */
 	struct sk_buff       *rsp;    /* Associated NCSI response packet */
 	struct timer_list    timer;   /* Timer on waiting for response   */
 	bool                 enabled; /* Time has been enabled or not    */
+	u32                  snd_seq;     /* netlink sending sequence number */
+	u32                  snd_portid;  /* netlink portid of sender        */
+	struct nlmsghdr      nlhdr;       /* netlink message header          */
 };
 
 enum {
@@ -236,14 +258,13 @@
 	ncsi_dev_state_probe_dp,
 	ncsi_dev_state_config_sp	= 0x0301,
 	ncsi_dev_state_config_cis,
+	ncsi_dev_state_config_oem_gma,
 	ncsi_dev_state_config_clear_vids,
 	ncsi_dev_state_config_svf,
 	ncsi_dev_state_config_ev,
 	ncsi_dev_state_config_sma,
 	ncsi_dev_state_config_ebf,
-#if IS_ENABLED(CONFIG_IPV6)
-	ncsi_dev_state_config_egmf,
-#endif
+	ncsi_dev_state_config_dgmf,
 	ncsi_dev_state_config_ecnt,
 	ncsi_dev_state_config_ec,
 	ncsi_dev_state_config_ae,
@@ -269,15 +290,13 @@
 #define NCSI_DEV_PROBED		1            /* Finalized NCSI topology    */
 #define NCSI_DEV_HWA		2            /* Enabled HW arbitration     */
 #define NCSI_DEV_RESHUFFLE	4
+#define NCSI_DEV_RESET		8            /* Reset state of NC          */
+	unsigned int        gma_flag;        /* OEM GMA flag               */
 	spinlock_t          lock;            /* Protect the NCSI device    */
-#if IS_ENABLED(CONFIG_IPV6)
-	unsigned int        inet6_addr_num;  /* Number of IPv6 addresses   */
-#endif
+	unsigned int        package_probe_id;/* Current ID during probe    */
 	unsigned int        package_num;     /* Number of packages         */
 	struct list_head    packages;        /* List of packages           */
 	struct ncsi_channel *hot_channel;    /* Channel was ever active    */
-	struct ncsi_package *force_package;  /* Force a specific package   */
-	struct ncsi_channel *force_channel;  /* Force a specific channel   */
 	struct ncsi_request requests[256];   /* Request table              */
 	unsigned int        request_id;      /* Last used request ID       */
 #define NCSI_REQ_START_IDX	1
@@ -290,6 +309,9 @@
 	struct list_head    node;            /* Form NCSI device list      */
 #define NCSI_MAX_VLAN_VIDS	15
 	struct list_head    vlan_vids;       /* List of active VLAN IDs */
+
+	bool                multi_package;   /* Enable multiple packages   */
+	u32                 package_whitelist; /* Packages to configure    */
 };
 
 struct ncsi_cmd_arg {
@@ -305,6 +327,8 @@
 		unsigned short words[8];
 		unsigned int   dwords[4];
 	};
+	unsigned char        *data;       /* NCSI OEM data                 */
+	struct genl_info     *info;       /* Netlink information           */
 };
 
 extern struct list_head ncsi_dev_list;
@@ -320,6 +344,7 @@
 	list_for_each_entry_rcu(nc, &np->channels, node)
 
 /* Resources */
+int ncsi_reset_dev(struct ncsi_dev *nd);
 void ncsi_start_channel_monitor(struct ncsi_channel *nc);
 void ncsi_stop_channel_monitor(struct ncsi_channel *nc);
 struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np,
@@ -340,6 +365,13 @@
 void ncsi_free_request(struct ncsi_request *nr);
 struct ncsi_dev *ncsi_find_dev(struct net_device *dev);
 int ncsi_process_next_channel(struct ncsi_dev_priv *ndp);
+bool ncsi_channel_has_link(struct ncsi_channel *channel);
+bool ncsi_channel_is_last(struct ncsi_dev_priv *ndp,
+			  struct ncsi_channel *channel);
+int ncsi_update_tx_channel(struct ncsi_dev_priv *ndp,
+			   struct ncsi_package *np,
+			   struct ncsi_channel *disable,
+			   struct ncsi_channel *enable);
 
 /* Packet handlers */
 u32 ncsi_calculate_checksum(unsigned char *data, int len);

diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index 25e483e..b635c19 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright Gavin Shan, IBM Corporation 2016.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -50,13 +46,15 @@
 static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
 				struct ncsi_aen_pkt_hdr *h)
 {
-	struct ncsi_aen_lsc_pkt *lsc;
-	struct ncsi_channel *nc;
+	struct ncsi_channel *nc, *tmp;
 	struct ncsi_channel_mode *ncm;
+	unsigned long old_data, data;
+	struct ncsi_aen_lsc_pkt *lsc;
+	struct ncsi_package *np;
+	bool had_link, has_link;
+	unsigned long flags;
 	bool chained;
 	int state;
-	unsigned long old_data, data;
-	unsigned long flags;
 
 	/* Find the NCSI channel */
 	ncsi_find_package_and_channel(ndp, h->common.channel, NULL, &nc);
@@ -73,6 +71,9 @@
 	ncm->data[2] = data;
 	ncm->data[4] = ntohl(lsc->oem_status);
 
+	had_link = !!(old_data & 0x1);
+	has_link = !!(data & 0x1);
+
 	netdev_dbg(ndp->ndev.dev, "NCSI: LSC AEN - channel %u state %s\n",
 		   nc->id, data & 0x1 ? "up" : "down");
 
@@ -80,22 +81,60 @@
 	state = nc->state;
 	spin_unlock_irqrestore(&nc->lock, flags);
 
-	if (!((old_data ^ data) & 0x1) || chained)
-		return 0;
-	if (!(state == NCSI_CHANNEL_INACTIVE && (data & 0x1)) &&
-	    !(state == NCSI_CHANNEL_ACTIVE && !(data & 0x1)))
+	if (state == NCSI_CHANNEL_INACTIVE)
+		netdev_warn(ndp->ndev.dev,
+			    "NCSI: Inactive channel %u received AEN!\n",
+			    nc->id);
+
+	if ((had_link == has_link) || chained)
 		return 0;
 
-	if (!(ndp->flags & NCSI_DEV_HWA) &&
-	    state == NCSI_CHANNEL_ACTIVE)
-		ndp->flags |= NCSI_DEV_RESHUFFLE;
+	if (!ndp->multi_package && !nc->package->multi_channel) {
+		if (had_link) {
+			ndp->flags |= NCSI_DEV_RESHUFFLE;
+			ncsi_stop_channel_monitor(nc);
+			spin_lock_irqsave(&ndp->lock, flags);
+			list_add_tail_rcu(&nc->link, &ndp->channel_queue);
+			spin_unlock_irqrestore(&ndp->lock, flags);
+			return ncsi_process_next_channel(ndp);
+		}
+		/* Configured channel came up */
+		return 0;
+	}
 
-	ncsi_stop_channel_monitor(nc);
-	spin_lock_irqsave(&ndp->lock, flags);
-	list_add_tail_rcu(&nc->link, &ndp->channel_queue);
-	spin_unlock_irqrestore(&ndp->lock, flags);
+	if (had_link) {
+		ncm = &nc->modes[NCSI_MODE_TX_ENABLE];
+		if (ncsi_channel_is_last(ndp, nc)) {
+			/* No channels left, reconfigure */
+			return ncsi_reset_dev(&ndp->ndev);
+		} else if (ncm->enable) {
+			/* Need to failover Tx channel */
+			ncsi_update_tx_channel(ndp, nc->package, nc, NULL);
+		}
+	} else if (has_link && nc->package->preferred_channel == nc) {
+		/* Return Tx to preferred channel */
+		ncsi_update_tx_channel(ndp, nc->package, NULL, nc);
+	} else if (has_link) {
+		NCSI_FOR_EACH_PACKAGE(ndp, np) {
+			NCSI_FOR_EACH_CHANNEL(np, tmp) {
+				/* Enable Tx on this channel if the current Tx
+				 * channel is down.
+				 */
+				ncm = &tmp->modes[NCSI_MODE_TX_ENABLE];
+				if (ncm->enable &&
+				    !ncsi_channel_has_link(tmp)) {
+					ncsi_update_tx_channel(ndp, nc->package,
+							       tmp, nc);
+					break;
+				}
+			}
+		}
+	}
 
-	return ncsi_process_next_channel(ndp);
+	/* Leave configured channels active in a multi-channel scenario so
+	 * AEN events are still received.
+	 */
+	return 0;
 }
 
 static int ncsi_aen_handler_cr(struct ncsi_dev_priv *ndp,

diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
index 7567ca6..0187e65 100644
--- a/net/ncsi/ncsi-cmd.c
+++ b/net/ncsi/ncsi-cmd.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright Gavin Shan, IBM Corporation 2016.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -17,6 +13,7 @@
 #include <net/ncsi.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
+#include <net/genetlink.h>
 
 #include "internal.h"
 #include "ncsi-pkt.h"
@@ -57,7 +54,7 @@
 	checksum = ncsi_calculate_checksum((unsigned char *)h,
 					   sizeof(*h) + nca->payload);
 	pchecksum = (__be32 *)((void *)h + sizeof(struct ncsi_pkt_hdr) +
-		    nca->payload);
+		    ALIGN(nca->payload, 4));
 	*pchecksum = htonl(checksum);
 }
 
@@ -211,6 +208,25 @@
 	return 0;
 }
 
+static int ncsi_cmd_handler_oem(struct sk_buff *skb,
+				struct ncsi_cmd_arg *nca)
+{
+	struct ncsi_cmd_oem_pkt *cmd;
+	unsigned int len;
+
+	len = sizeof(struct ncsi_cmd_pkt_hdr) + 4;
+	if (nca->payload < 26)
+		len += 26;
+	else
+		len += nca->payload;
+
+	cmd = skb_put_zero(skb, len);
+	memcpy(&cmd->mfr_id, nca->data, nca->payload);
+	ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+	return 0;
+}
+
 static struct ncsi_cmd_handler {
 	unsigned char type;
 	int           payload;
@@ -244,7 +260,7 @@
 	{ NCSI_PKT_CMD_GNS,    0, ncsi_cmd_handler_default },
 	{ NCSI_PKT_CMD_GNPTS,  0, ncsi_cmd_handler_default },
 	{ NCSI_PKT_CMD_GPS,    0, ncsi_cmd_handler_default },
-	{ NCSI_PKT_CMD_OEM,    0, NULL                     },
+	{ NCSI_PKT_CMD_OEM,   -1, ncsi_cmd_handler_oem     },
 	{ NCSI_PKT_CMD_PLDM,   0, NULL                     },
 	{ NCSI_PKT_CMD_GPUUID, 0, ncsi_cmd_handler_default }
 };
@@ -293,14 +309,21 @@
 
 int ncsi_xmit_cmd(struct ncsi_cmd_arg *nca)
 {
-	struct ncsi_request *nr;
-	struct ethhdr *eh;
 	struct ncsi_cmd_handler *nch = NULL;
+	struct ncsi_request *nr;
+	unsigned char type;
+	struct ethhdr *eh;
 	int i, ret;
 
+	/* Use OEM generic handler for Netlink request */
+	if (nca->req_flags == NCSI_REQ_FLAG_NETLINK_DRIVEN)
+		type = NCSI_PKT_CMD_OEM;
+	else
+		type = nca->type;
+
 	/* Search for the handler */
 	for (i = 0; i < ARRAY_SIZE(ncsi_cmd_handlers); i++) {
-		if (ncsi_cmd_handlers[i].type == nca->type) {
+		if (ncsi_cmd_handlers[i].type == type) {
 			if (ncsi_cmd_handlers[i].handler)
 				nch = &ncsi_cmd_handlers[i];
 			else
@@ -316,12 +339,24 @@
 		return -ENOENT;
 	}
 
-	/* Get packet payload length and allocate the request */
-	nca->payload = nch->payload;
+	/* Get packet payload length and allocate the request
+	 * It is expected that if length set as negative in
+	 * handler structure means caller is initializing it
+	 * and setting length in nca before calling xmit function
+	 */
+	if (nch->payload >= 0)
+		nca->payload = nch->payload;
 	nr = ncsi_alloc_command(nca);
 	if (!nr)
 		return -ENOMEM;
 
+	/* track netlink information */
+	if (nca->req_flags == NCSI_REQ_FLAG_NETLINK_DRIVEN) {
+		nr->snd_seq = nca->info->snd_seq;
+		nr->snd_portid = nca->info->snd_portid;
+		nr->nlhdr = *nca->info->nlhdr;
+	}
+
 	/* Prepare the packet */
 	nca->id = nr->id;
 	ret = nch->handler(nr->cmd, nca);

diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 0912847..70fe026 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright Gavin Shan, IBM Corporation 2016.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -18,7 +14,7 @@
 #include <net/sock.h>
 #include <net/addrconf.h>
 #include <net/ipv6.h>
-#include <net/if_inet6.h>
+#include <net/genetlink.h>
 
 #include "internal.h"
 #include "ncsi-pkt.h"
@@ -27,6 +23,29 @@
 LIST_HEAD(ncsi_dev_list);
 DEFINE_SPINLOCK(ncsi_dev_lock);
 
+bool ncsi_channel_has_link(struct ncsi_channel *channel)
+{
+	return !!(channel->modes[NCSI_MODE_LINK].data[2] & 0x1);
+}
+
+bool ncsi_channel_is_last(struct ncsi_dev_priv *ndp,
+			  struct ncsi_channel *channel)
+{
+	struct ncsi_package *np;
+	struct ncsi_channel *nc;
+
+	NCSI_FOR_EACH_PACKAGE(ndp, np)
+		NCSI_FOR_EACH_CHANNEL(np, nc) {
+			if (nc == channel)
+				continue;
+			if (nc->state == NCSI_CHANNEL_ACTIVE &&
+			    ncsi_channel_has_link(nc))
+				return false;
+		}
+
+	return true;
+}
+
 static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down)
 {
 	struct ncsi_dev *nd = &ndp->ndev;
@@ -51,7 +70,7 @@
 				continue;
 			}
 
-			if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) {
+			if (ncsi_channel_has_link(nc)) {
 				spin_unlock_irqrestore(&nc->lock, flags);
 				nd->link_up = 1;
 				goto report;
@@ -112,10 +131,8 @@
 	default:
 		netdev_err(ndp->ndev.dev, "NCSI Channel %d timed out!\n",
 			   nc->id);
-		if (!(ndp->flags & NCSI_DEV_HWA)) {
-			ncsi_report_link(ndp, true);
-			ndp->flags |= NCSI_DEV_RESHUFFLE;
-		}
+		ncsi_report_link(ndp, true);
+		ndp->flags |= NCSI_DEV_RESHUFFLE;
 
 		ncsi_stop_channel_monitor(nc);
 
@@ -268,6 +285,7 @@
 	np->ndp = ndp;
 	spin_lock_init(&np->lock);
 	INIT_LIST_HEAD(&np->channels);
+	np->channel_whitelist = UINT_MAX;
 
 	spin_lock_irqsave(&ndp->lock, flags);
 	tmp = ncsi_find_package(ndp, id);
@@ -406,6 +424,9 @@
 {
 	struct ncsi_request *nr = from_timer(nr, t, timer);
 	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_cmd_pkt *cmd;
+	struct ncsi_package *np;
+	struct ncsi_channel *nc;
 	unsigned long flags;
 
 	/* If the request already had associated response,
@@ -419,6 +440,18 @@
 	}
 	spin_unlock_irqrestore(&ndp->lock, flags);
 
+	if (nr->flags == NCSI_REQ_FLAG_NETLINK_DRIVEN) {
+		if (nr->cmd) {
+			/* Find the package */
+			cmd = (struct ncsi_cmd_pkt *)
+			      skb_network_header(nr->cmd);
+			ncsi_find_package_and_channel(ndp,
+						      cmd->cmd.common.channel,
+						      &np, &nc);
+			ncsi_send_netlink_timeout(nr, np, nc);
+		}
+	}
+
 	/* Release the request */
 	ncsi_free_request(nr);
 }
@@ -426,12 +459,14 @@
 static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
 {
 	struct ncsi_dev *nd = &ndp->ndev;
-	struct ncsi_package *np = ndp->active_package;
-	struct ncsi_channel *nc = ndp->active_channel;
+	struct ncsi_package *np;
+	struct ncsi_channel *nc, *tmp;
 	struct ncsi_cmd_arg nca;
 	unsigned long flags;
 	int ret;
 
+	np = ndp->active_package;
+	nc = ndp->active_channel;
 	nca.ndp = ndp;
 	nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN;
 	switch (nd->state) {
@@ -507,6 +542,15 @@
 		if (ret)
 			goto error;
 
+		NCSI_FOR_EACH_CHANNEL(np, tmp) {
+			/* If there is another channel active on this package
+			 * do not deselect the package.
+			 */
+			if (tmp != nc && tmp->state == NCSI_CHANNEL_ACTIVE) {
+				nd->state = ncsi_dev_state_suspend_done;
+				break;
+			}
+		}
 		break;
 	case ncsi_dev_state_suspend_deselect:
 		ndp->pending_req_num = 1;
@@ -525,8 +569,10 @@
 		spin_lock_irqsave(&nc->lock, flags);
 		nc->state = NCSI_CHANNEL_INACTIVE;
 		spin_unlock_irqrestore(&nc->lock, flags);
-		ncsi_process_next_channel(ndp);
-
+		if (ndp->flags & NCSI_DEV_RESET)
+			ncsi_reset_dev(nd);
+		else
+			ncsi_process_next_channel(ndp);
 		break;
 	default:
 		netdev_warn(nd->dev, "Wrong NCSI state 0x%x in suspend\n",
@@ -635,13 +681,236 @@
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
+
+/* NCSI OEM Command APIs */
+static int ncsi_oem_gma_handler_bcm(struct ncsi_cmd_arg *nca)
+{
+	unsigned char data[NCSI_OEM_BCM_CMD_GMA_LEN];
+	int ret = 0;
+
+	nca->payload = NCSI_OEM_BCM_CMD_GMA_LEN;
+
+	memset(data, 0, NCSI_OEM_BCM_CMD_GMA_LEN);
+	*(unsigned int *)data = ntohl(NCSI_OEM_MFR_BCM_ID);
+	data[5] = NCSI_OEM_BCM_CMD_GMA;
+
+	nca->data = data;
+
+	ret = ncsi_xmit_cmd(nca);
+	if (ret)
+		netdev_err(nca->ndp->ndev.dev,
+			   "NCSI: Failed to transmit cmd 0x%x during configure\n",
+			   nca->type);
+	return ret;
+}
+
+static int ncsi_oem_gma_handler_mlx(struct ncsi_cmd_arg *nca)
+{
+	union {
+		u8 data_u8[NCSI_OEM_MLX_CMD_GMA_LEN];
+		u32 data_u32[NCSI_OEM_MLX_CMD_GMA_LEN / sizeof(u32)];
+	} u;
+	int ret = 0;
+
+	nca->payload = NCSI_OEM_MLX_CMD_GMA_LEN;
+
+	memset(&u, 0, sizeof(u));
+	u.data_u32[0] = ntohl(NCSI_OEM_MFR_MLX_ID);
+	u.data_u8[5] = NCSI_OEM_MLX_CMD_GMA;
+	u.data_u8[6] = NCSI_OEM_MLX_CMD_GMA_PARAM;
+
+	nca->data = u.data_u8;
+
+	ret = ncsi_xmit_cmd(nca);
+	if (ret)
+		netdev_err(nca->ndp->ndev.dev,
+			   "NCSI: Failed to transmit cmd 0x%x during configure\n",
+			   nca->type);
+	return ret;
+}
+
+/* OEM Command handlers initialization */
+static struct ncsi_oem_gma_handler {
+	unsigned int	mfr_id;
+	int		(*handler)(struct ncsi_cmd_arg *nca);
+} ncsi_oem_gma_handlers[] = {
+	{ NCSI_OEM_MFR_BCM_ID, ncsi_oem_gma_handler_bcm },
+	{ NCSI_OEM_MFR_MLX_ID, ncsi_oem_gma_handler_mlx }
+};
+
+static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id)
+{
+	struct ncsi_oem_gma_handler *nch = NULL;
+	int i;
+
+	/* This function should only be called once, return if flag set */
+	if (nca->ndp->gma_flag == 1)
+		return -1;
+
+	/* Find gma handler for given manufacturer id */
+	for (i = 0; i < ARRAY_SIZE(ncsi_oem_gma_handlers); i++) {
+		if (ncsi_oem_gma_handlers[i].mfr_id == mf_id) {
+			if (ncsi_oem_gma_handlers[i].handler)
+				nch = &ncsi_oem_gma_handlers[i];
+			break;
+			}
+	}
+
+	if (!nch) {
+		netdev_err(nca->ndp->ndev.dev,
+			   "NCSI: No GMA handler available for MFR-ID (0x%x)\n",
+			   mf_id);
+		return -1;
+	}
+
+	/* Set the flag for GMA command which should only be called once */
+	nca->ndp->gma_flag = 1;
+
+	/* Get Mac address from NCSI device */
+	return nch->handler(nca);
+}
+
+#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
+
+/* Determine if a given channel from the channel_queue should be used for Tx */
+static bool ncsi_channel_is_tx(struct ncsi_dev_priv *ndp,
+			       struct ncsi_channel *nc)
+{
+	struct ncsi_channel_mode *ncm;
+	struct ncsi_channel *channel;
+	struct ncsi_package *np;
+
+	/* Check if any other channel has Tx enabled; a channel may have already
+	 * been configured and removed from the channel queue.
+	 */
+	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		if (!ndp->multi_package && np != nc->package)
+			continue;
+		NCSI_FOR_EACH_CHANNEL(np, channel) {
+			ncm = &channel->modes[NCSI_MODE_TX_ENABLE];
+			if (ncm->enable)
+				return false;
+		}
+	}
+
+	/* This channel is the preferred channel and has link */
+	list_for_each_entry_rcu(channel, &ndp->channel_queue, link) {
+		np = channel->package;
+		if (np->preferred_channel &&
+		    ncsi_channel_has_link(np->preferred_channel)) {
+			return np->preferred_channel == nc;
+		}
+	}
+
+	/* This channel has link */
+	if (ncsi_channel_has_link(nc))
+		return true;
+
+	list_for_each_entry_rcu(channel, &ndp->channel_queue, link)
+		if (ncsi_channel_has_link(channel))
+			return false;
+
+	/* No other channel has link; default to this one */
+	return true;
+}
+
+/* Change the active Tx channel in a multi-channel setup */
+int ncsi_update_tx_channel(struct ncsi_dev_priv *ndp,
+			   struct ncsi_package *package,
+			   struct ncsi_channel *disable,
+			   struct ncsi_channel *enable)
+{
+	struct ncsi_cmd_arg nca;
+	struct ncsi_channel *nc;
+	struct ncsi_package *np;
+	int ret = 0;
+
+	if (!package->multi_channel && !ndp->multi_package)
+		netdev_warn(ndp->ndev.dev,
+			    "NCSI: Trying to update Tx channel in single-channel mode\n");
+	nca.ndp = ndp;
+	nca.req_flags = 0;
+
+	/* Find current channel with Tx enabled */
+	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		if (disable)
+			break;
+		if (!ndp->multi_package && np != package)
+			continue;
+
+		NCSI_FOR_EACH_CHANNEL(np, nc)
+			if (nc->modes[NCSI_MODE_TX_ENABLE].enable) {
+				disable = nc;
+				break;
+			}
+	}
+
+	/* Find a suitable channel for Tx */
+	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		if (enable)
+			break;
+		if (!ndp->multi_package && np != package)
+			continue;
+		if (!(ndp->package_whitelist & (0x1 << np->id)))
+			continue;
+
+		if (np->preferred_channel &&
+		    ncsi_channel_has_link(np->preferred_channel)) {
+			enable = np->preferred_channel;
+			break;
+		}
+
+		NCSI_FOR_EACH_CHANNEL(np, nc) {
+			if (!(np->channel_whitelist & 0x1 << nc->id))
+				continue;
+			if (nc->state != NCSI_CHANNEL_ACTIVE)
+				continue;
+			if (ncsi_channel_has_link(nc)) {
+				enable = nc;
+				break;
+			}
+		}
+	}
+
+	if (disable == enable)
+		return -1;
+
+	if (!enable)
+		return -1;
+
+	if (disable) {
+		nca.channel = disable->id;
+		nca.package = disable->package->id;
+		nca.type = NCSI_PKT_CMD_DCNT;
+		ret = ncsi_xmit_cmd(&nca);
+		if (ret)
+			netdev_err(ndp->ndev.dev,
+				   "Error %d sending DCNT\n",
+				   ret);
+	}
+
+	netdev_info(ndp->ndev.dev, "NCSI: channel %u enables Tx\n", enable->id);
+
+	nca.channel = enable->id;
+	nca.package = enable->package->id;
+	nca.type = NCSI_PKT_CMD_ECNT;
+	ret = ncsi_xmit_cmd(&nca);
+	if (ret)
+		netdev_err(ndp->ndev.dev,
+			   "Error %d sending ECNT\n",
+			   ret);
+
+	return ret;
+}
+
 static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 {
-	struct ncsi_dev *nd = &ndp->ndev;
-	struct net_device *dev = nd->dev;
 	struct ncsi_package *np = ndp->active_package;
 	struct ncsi_channel *nc = ndp->active_channel;
 	struct ncsi_channel *hot_nc = NULL;
+	struct ncsi_dev *nd = &ndp->ndev;
+	struct net_device *dev = nd->dev;
 	struct ncsi_cmd_arg nca;
 	unsigned char index;
 	unsigned long flags;
@@ -685,16 +954,30 @@
 			goto error;
 		}
 
+		nd->state = ncsi_dev_state_config_oem_gma;
+		break;
+	case ncsi_dev_state_config_oem_gma:
 		nd->state = ncsi_dev_state_config_clear_vids;
+		ret = -1;
+
+#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
+		nca.type = NCSI_PKT_CMD_OEM;
+		nca.package = np->id;
+		nca.channel = nc->id;
+		ndp->pending_req_num = 1;
+		ret = ncsi_gma_handler(&nca, nc->version.mf_id);
+#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
+
+		if (ret < 0)
+			schedule_work(&ndp->work);
+
 		break;
 	case ncsi_dev_state_config_clear_vids:
 	case ncsi_dev_state_config_svf:
 	case ncsi_dev_state_config_ev:
 	case ncsi_dev_state_config_sma:
 	case ncsi_dev_state_config_ebf:
-#if IS_ENABLED(CONFIG_IPV6)
-	case ncsi_dev_state_config_egmf:
-#endif
+	case ncsi_dev_state_config_dgmf:
 	case ncsi_dev_state_config_ecnt:
 	case ncsi_dev_state_config_ec:
 	case ncsi_dev_state_config_ae:
@@ -747,20 +1030,29 @@
 		} else if (nd->state == ncsi_dev_state_config_ebf) {
 			nca.type = NCSI_PKT_CMD_EBF;
 			nca.dwords[0] = nc->caps[NCSI_CAP_BC].cap;
-			nd->state = ncsi_dev_state_config_ecnt;
-#if IS_ENABLED(CONFIG_IPV6)
-			if (ndp->inet6_addr_num > 0 &&
-			    (nc->caps[NCSI_CAP_GENERIC].cap &
-			     NCSI_CAP_GENERIC_MC))
-				nd->state = ncsi_dev_state_config_egmf;
-			else
+			/* if multicast global filtering is supported then
+			 * disable it so that all multicast packet will be
+			 * forwarded to management controller
+			 */
+			if (nc->caps[NCSI_CAP_GENERIC].cap &
+			    NCSI_CAP_GENERIC_MC)
+				nd->state = ncsi_dev_state_config_dgmf;
+			else if (ncsi_channel_is_tx(ndp, nc))
 				nd->state = ncsi_dev_state_config_ecnt;
-		} else if (nd->state == ncsi_dev_state_config_egmf) {
-			nca.type = NCSI_PKT_CMD_EGMF;
-			nca.dwords[0] = nc->caps[NCSI_CAP_MC].cap;
-			nd->state = ncsi_dev_state_config_ecnt;
-#endif /* CONFIG_IPV6 */
+			else
+				nd->state = ncsi_dev_state_config_ec;
+		} else if (nd->state == ncsi_dev_state_config_dgmf) {
+			nca.type = NCSI_PKT_CMD_DGMF;
+			if (ncsi_channel_is_tx(ndp, nc))
+				nd->state = ncsi_dev_state_config_ecnt;
+			else
+				nd->state = ncsi_dev_state_config_ec;
 		} else if (nd->state == ncsi_dev_state_config_ecnt) {
+			if (np->preferred_channel &&
+			    nc != np->preferred_channel)
+				netdev_info(ndp->ndev.dev,
+					    "NCSI: Tx failed over to channel %u\n",
+					    nc->id);
 			nca.type = NCSI_PKT_CMD_ECNT;
 			nd->state = ncsi_dev_state_config_ec;
 		} else if (nd->state == ncsi_dev_state_config_ec) {
@@ -791,6 +1083,16 @@
 		netdev_dbg(ndp->ndev.dev, "NCSI: channel %u config done\n",
 			   nc->id);
 		spin_lock_irqsave(&nc->lock, flags);
+		nc->state = NCSI_CHANNEL_ACTIVE;
+
+		if (ndp->flags & NCSI_DEV_RESET) {
+			/* A reset event happened during config, start it now */
+			nc->reconfigure_needed = false;
+			spin_unlock_irqrestore(&nc->lock, flags);
+			ncsi_reset_dev(nd);
+			break;
+		}
+
 		if (nc->reconfigure_needed) {
 			/* This channel's configuration has been updated
 			 * part-way during the config state - start the
@@ -811,10 +1113,8 @@
 
 		if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) {
 			hot_nc = nc;
-			nc->state = NCSI_CHANNEL_ACTIVE;
 		} else {
 			hot_nc = NULL;
-			nc->state = NCSI_CHANNEL_INACTIVE;
 			netdev_dbg(ndp->ndev.dev,
 				   "NCSI: channel %u link down after config\n",
 				   nc->id);
@@ -842,43 +1142,35 @@
 
 static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
 {
-	struct ncsi_package *np, *force_package;
-	struct ncsi_channel *nc, *found, *hot_nc, *force_channel;
+	struct ncsi_channel *nc, *found, *hot_nc;
 	struct ncsi_channel_mode *ncm;
-	unsigned long flags;
+	unsigned long flags, cflags;
+	struct ncsi_package *np;
+	bool with_link;
 
 	spin_lock_irqsave(&ndp->lock, flags);
 	hot_nc = ndp->hot_channel;
-	force_channel = ndp->force_channel;
-	force_package = ndp->force_package;
 	spin_unlock_irqrestore(&ndp->lock, flags);
 
-	/* Force a specific channel whether or not it has link if we have been
-	 * configured to do so
-	 */
-	if (force_package && force_channel) {
-		found = force_channel;
-		ncm = &found->modes[NCSI_MODE_LINK];
-		if (!(ncm->data[2] & 0x1))
-			netdev_info(ndp->ndev.dev,
-				    "NCSI: Channel %u forced, but it is link down\n",
-				    found->id);
-		goto out;
-	}
-
-	/* The search is done once an inactive channel with up
-	 * link is found.
+	/* By default the search is done once an inactive channel with up
+	 * link is found, unless a preferred channel is set.
+	 * If multi_package or multi_channel are configured all channels in the
+	 * whitelist are added to the channel queue.
 	 */
 	found = NULL;
+	with_link = false;
 	NCSI_FOR_EACH_PACKAGE(ndp, np) {
-		if (ndp->force_package && np != ndp->force_package)
+		if (!(ndp->package_whitelist & (0x1 << np->id)))
 			continue;
 		NCSI_FOR_EACH_CHANNEL(np, nc) {
-			spin_lock_irqsave(&nc->lock, flags);
+			if (!(np->channel_whitelist & (0x1 << nc->id)))
+				continue;
+
+			spin_lock_irqsave(&nc->lock, cflags);
 
 			if (!list_empty(&nc->link) ||
 			    nc->state != NCSI_CHANNEL_INACTIVE) {
-				spin_unlock_irqrestore(&nc->lock, flags);
+				spin_unlock_irqrestore(&nc->lock, cflags);
 				continue;
 			}
 
@@ -890,32 +1182,49 @@
 
 			ncm = &nc->modes[NCSI_MODE_LINK];
 			if (ncm->data[2] & 0x1) {
-				spin_unlock_irqrestore(&nc->lock, flags);
 				found = nc;
-				goto out;
+				with_link = true;
 			}
 
-			spin_unlock_irqrestore(&nc->lock, flags);
+			/* If multi_channel is enabled configure all valid
+			 * channels whether or not they currently have link
+			 * so they will have AENs enabled.
+			 */
+			if (with_link || np->multi_channel) {
+				spin_lock_irqsave(&ndp->lock, flags);
+				list_add_tail_rcu(&nc->link,
+						  &ndp->channel_queue);
+				spin_unlock_irqrestore(&ndp->lock, flags);
+
+				netdev_dbg(ndp->ndev.dev,
+					   "NCSI: Channel %u added to queue (link %s)\n",
+					   nc->id,
+					   ncm->data[2] & 0x1 ? "up" : "down");
+			}
+
+			spin_unlock_irqrestore(&nc->lock, cflags);
+
+			if (with_link && !np->multi_channel)
+				break;
 		}
+		if (with_link && !ndp->multi_package)
+			break;
 	}
 
-	if (!found) {
+	if (list_empty(&ndp->channel_queue) && found) {
+		netdev_info(ndp->ndev.dev,
+			    "NCSI: No channel with link found, configuring channel %u\n",
+			    found->id);
+		spin_lock_irqsave(&ndp->lock, flags);
+		list_add_tail_rcu(&found->link, &ndp->channel_queue);
+		spin_unlock_irqrestore(&ndp->lock, flags);
+	} else if (!found) {
 		netdev_warn(ndp->ndev.dev,
-			    "NCSI: No channel found with link\n");
+			    "NCSI: No channel found to configure!\n");
 		ncsi_report_link(ndp, true);
 		return -ENODEV;
 	}
 
-	ncm = &found->modes[NCSI_MODE_LINK];
-	netdev_dbg(ndp->ndev.dev,
-		   "NCSI: Channel %u added to queue (link %s)\n",
-		   found->id, ncm->data[2] & 0x1 ? "up" : "down");
-
-out:
-	spin_lock_irqsave(&ndp->lock, flags);
-	list_add_tail_rcu(&found->link, &ndp->channel_queue);
-	spin_unlock_irqrestore(&ndp->lock, flags);
-
 	return ncsi_process_next_channel(ndp);
 }
 
@@ -952,35 +1261,6 @@
 	return false;
 }
 
-static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp)
-{
-	struct ncsi_package *np;
-	struct ncsi_channel *nc;
-	unsigned long flags;
-
-	/* Move all available channels to processing queue */
-	spin_lock_irqsave(&ndp->lock, flags);
-	NCSI_FOR_EACH_PACKAGE(ndp, np) {
-		NCSI_FOR_EACH_CHANNEL(np, nc) {
-			WARN_ON_ONCE(nc->state != NCSI_CHANNEL_INACTIVE ||
-				     !list_empty(&nc->link));
-			ncsi_stop_channel_monitor(nc);
-			list_add_tail_rcu(&nc->link, &ndp->channel_queue);
-		}
-	}
-	spin_unlock_irqrestore(&ndp->lock, flags);
-
-	/* We can have no channels in extremely case */
-	if (list_empty(&ndp->channel_queue)) {
-		netdev_err(ndp->ndev.dev,
-			   "NCSI: No available channels for HWA\n");
-		ncsi_report_link(ndp, false);
-		return -ENOENT;
-	}
-
-	return ncsi_process_next_channel(ndp);
-}
-
 static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
 {
 	struct ncsi_dev *nd = &ndp->ndev;
@@ -1012,70 +1292,28 @@
 		nd->state = ncsi_dev_state_probe_package;
 		break;
 	case ncsi_dev_state_probe_package:
-		ndp->pending_req_num = 16;
-
-		/* Select all possible packages */
-		nca.type = NCSI_PKT_CMD_SP;
-		nca.bytes[0] = 1;
-		nca.channel = NCSI_RESERVED_CHANNEL;
-		for (index = 0; index < 8; index++) {
-			nca.package = index;
-			ret = ncsi_xmit_cmd(&nca);
-			if (ret)
-				goto error;
-		}
-
-		/* Disable all possible packages */
-		nca.type = NCSI_PKT_CMD_DP;
-		for (index = 0; index < 8; index++) {
-			nca.package = index;
-			ret = ncsi_xmit_cmd(&nca);
-			if (ret)
-				goto error;
-		}
-
-		nd->state = ncsi_dev_state_probe_channel;
-		break;
-	case ncsi_dev_state_probe_channel:
-		if (!ndp->active_package)
-			ndp->active_package = list_first_or_null_rcu(
-				&ndp->packages, struct ncsi_package, node);
-		else if (list_is_last(&ndp->active_package->node,
-				      &ndp->packages))
-			ndp->active_package = NULL;
-		else
-			ndp->active_package = list_next_entry(
-				ndp->active_package, node);
-
-		/* All available packages and channels are enumerated. The
-		 * enumeration happens for once when the NCSI interface is
-		 * started. So we need continue to start the interface after
-		 * the enumeration.
-		 *
-		 * We have to choose an active channel before configuring it.
-		 * Note that we possibly don't have active channel in extreme
-		 * situation.
-		 */
-		if (!ndp->active_package) {
-			ndp->flags |= NCSI_DEV_PROBED;
-			if (ncsi_check_hwa(ndp))
-				ncsi_enable_hwa(ndp);
-			else
-				ncsi_choose_active_channel(ndp);
-			return;
-		}
-
-		/* Select the active package */
 		ndp->pending_req_num = 1;
+
 		nca.type = NCSI_PKT_CMD_SP;
 		nca.bytes[0] = 1;
-		nca.package = ndp->active_package->id;
+		nca.package = ndp->package_probe_id;
 		nca.channel = NCSI_RESERVED_CHANNEL;
 		ret = ncsi_xmit_cmd(&nca);
 		if (ret)
 			goto error;
-
+		nd->state = ncsi_dev_state_probe_channel;
+		break;
+	case ncsi_dev_state_probe_channel:
+		ndp->active_package = ncsi_find_package(ndp,
+							ndp->package_probe_id);
+		if (!ndp->active_package) {
+			/* No response */
+			nd->state = ncsi_dev_state_probe_dp;
+			schedule_work(&ndp->work);
+			break;
+		}
 		nd->state = ncsi_dev_state_probe_cis;
+		schedule_work(&ndp->work);
 		break;
 	case ncsi_dev_state_probe_cis:
 		ndp->pending_req_num = NCSI_RESERVED_CHANNEL;
@@ -1124,22 +1362,35 @@
 	case ncsi_dev_state_probe_dp:
 		ndp->pending_req_num = 1;
 
-		/* Deselect the active package */
+		/* Deselect the current package */
 		nca.type = NCSI_PKT_CMD_DP;
-		nca.package = ndp->active_package->id;
+		nca.package = ndp->package_probe_id;
 		nca.channel = NCSI_RESERVED_CHANNEL;
 		ret = ncsi_xmit_cmd(&nca);
 		if (ret)
 			goto error;
 
-		/* Scan channels in next package */
-		nd->state = ncsi_dev_state_probe_channel;
+		/* Probe next package */
+		ndp->package_probe_id++;
+		if (ndp->package_probe_id >= 8) {
+			/* Probe finished */
+			ndp->flags |= NCSI_DEV_PROBED;
+			break;
+		}
+		nd->state = ncsi_dev_state_probe_package;
+		ndp->active_package = NULL;
 		break;
 	default:
 		netdev_warn(nd->dev, "Wrong NCSI state 0x%0x in enumeration\n",
 			    nd->state);
 	}
 
+	if (ndp->flags & NCSI_DEV_PROBED) {
+		/* Check if all packages have HWA support */
+		ncsi_check_hwa(ndp);
+		ncsi_choose_active_channel(ndp);
+	}
+
 	return;
 error:
 	netdev_err(ndp->ndev.dev,
@@ -1229,70 +1480,6 @@
 	return -ENODEV;
 }
 
-#if IS_ENABLED(CONFIG_IPV6)
-static int ncsi_inet6addr_event(struct notifier_block *this,
-				unsigned long event, void *data)
-{
-	struct inet6_ifaddr *ifa = data;
-	struct net_device *dev = ifa->idev->dev;
-	struct ncsi_dev *nd = ncsi_find_dev(dev);
-	struct ncsi_dev_priv *ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL;
-	struct ncsi_package *np;
-	struct ncsi_channel *nc;
-	struct ncsi_cmd_arg nca;
-	bool action;
-	int ret;
-
-	if (!ndp || (ipv6_addr_type(&ifa->addr) &
-	    (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK)))
-		return NOTIFY_OK;
-
-	switch (event) {
-	case NETDEV_UP:
-		action = (++ndp->inet6_addr_num) == 1;
-		nca.type = NCSI_PKT_CMD_EGMF;
-		break;
-	case NETDEV_DOWN:
-		action = (--ndp->inet6_addr_num == 0);
-		nca.type = NCSI_PKT_CMD_DGMF;
-		break;
-	default:
-		return NOTIFY_OK;
-	}
-
-	/* We might not have active channel or packages. The IPv6
-	 * required multicast will be enabled when active channel
-	 * or packages are chosen.
-	 */
-	np = ndp->active_package;
-	nc = ndp->active_channel;
-	if (!action || !np || !nc)
-		return NOTIFY_OK;
-
-	/* We needn't enable or disable it if the function isn't supported */
-	if (!(nc->caps[NCSI_CAP_GENERIC].cap & NCSI_CAP_GENERIC_MC))
-		return NOTIFY_OK;
-
-	nca.ndp = ndp;
-	nca.req_flags = 0;
-	nca.package = np->id;
-	nca.channel = nc->id;
-	nca.dwords[0] = nc->caps[NCSI_CAP_MC].cap;
-	ret = ncsi_xmit_cmd(&nca);
-	if (ret) {
-		netdev_warn(dev, "Fail to %s global multicast filter (%d)\n",
-			    (event == NETDEV_UP) ? "enable" : "disable", ret);
-		return NOTIFY_DONE;
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block ncsi_inet6addr_notifier = {
-	.notifier_call = ncsi_inet6addr_event,
-};
-#endif /* CONFIG_IPV6 */
-
 static int ncsi_kick_channels(struct ncsi_dev_priv *ndp)
 {
 	struct ncsi_dev *nd = &ndp->ndev;
@@ -1458,6 +1645,7 @@
 	INIT_LIST_HEAD(&ndp->channel_queue);
 	INIT_LIST_HEAD(&ndp->vlan_vids);
 	INIT_WORK(&ndp->work, ncsi_dev_work);
+	ndp->package_whitelist = UINT_MAX;
 
 	/* Initialize private NCSI device */
 	spin_lock_init(&ndp->lock);
@@ -1470,11 +1658,6 @@
 	}
 
 	spin_lock_irqsave(&ncsi_dev_lock, flags);
-#if IS_ENABLED(CONFIG_IPV6)
-	ndp->inet6_addr_num = 0;
-	if (list_empty(&ncsi_dev_list))
-		register_inet6addr_notifier(&ncsi_inet6addr_notifier);
-#endif
 	list_add_tail_rcu(&ndp->node, &ncsi_dev_list);
 	spin_unlock_irqrestore(&ncsi_dev_lock, flags);
 
@@ -1494,26 +1677,19 @@
 int ncsi_start_dev(struct ncsi_dev *nd)
 {
 	struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
-	int ret;
 
 	if (nd->state != ncsi_dev_state_registered &&
 	    nd->state != ncsi_dev_state_functional)
 		return -ENOTTY;
 
 	if (!(ndp->flags & NCSI_DEV_PROBED)) {
+		ndp->package_probe_id = 0;
 		nd->state = ncsi_dev_state_probe;
 		schedule_work(&ndp->work);
 		return 0;
 	}
 
-	if (ndp->flags & NCSI_DEV_HWA) {
-		netdev_info(ndp->ndev.dev, "NCSI: Enabling HWA mode\n");
-		ret = ncsi_enable_hwa(ndp);
-	} else {
-		ret = ncsi_choose_active_channel(ndp);
-	}
-
-	return ret;
+	return ncsi_reset_dev(nd);
 }
 EXPORT_SYMBOL_GPL(ncsi_start_dev);
 
@@ -1526,7 +1702,10 @@
 	int old_state;
 	unsigned long flags;
 
-	/* Stop the channel monitor and reset channel's state */
+	/* Stop the channel monitor on any active channels. Don't reset the
+	 * channel state so we know which were active when ncsi_start_dev()
+	 * is next called.
+	 */
 	NCSI_FOR_EACH_PACKAGE(ndp, np) {
 		NCSI_FOR_EACH_CHANNEL(np, nc) {
 			ncsi_stop_channel_monitor(nc);
@@ -1534,7 +1713,6 @@
 			spin_lock_irqsave(&nc->lock, flags);
 			chained = !list_empty(&nc->link);
 			old_state = nc->state;
-			nc->state = NCSI_CHANNEL_INACTIVE;
 			spin_unlock_irqrestore(&nc->lock, flags);
 
 			WARN_ON_ONCE(chained ||
@@ -1547,6 +1725,92 @@
 }
 EXPORT_SYMBOL_GPL(ncsi_stop_dev);
 
+int ncsi_reset_dev(struct ncsi_dev *nd)
+{
+	struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
+	struct ncsi_channel *nc, *active, *tmp;
+	struct ncsi_package *np;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ndp->lock, flags);
+
+	if (!(ndp->flags & NCSI_DEV_RESET)) {
+		/* Haven't been called yet, check states */
+		switch (nd->state & ncsi_dev_state_major) {
+		case ncsi_dev_state_registered:
+		case ncsi_dev_state_probe:
+			/* Not even probed yet - do nothing */
+			spin_unlock_irqrestore(&ndp->lock, flags);
+			return 0;
+		case ncsi_dev_state_suspend:
+		case ncsi_dev_state_config:
+			/* Wait for the channel to finish its suspend/config
+			 * operation; once it finishes it will check for
+			 * NCSI_DEV_RESET and reset the state.
+			 */
+			ndp->flags |= NCSI_DEV_RESET;
+			spin_unlock_irqrestore(&ndp->lock, flags);
+			return 0;
+		}
+	} else {
+		switch (nd->state) {
+		case ncsi_dev_state_suspend_done:
+		case ncsi_dev_state_config_done:
+		case ncsi_dev_state_functional:
+			/* Ok */
+			break;
+		default:
+			/* Current reset operation happening */
+			spin_unlock_irqrestore(&ndp->lock, flags);
+			return 0;
+		}
+	}
+
+	if (!list_empty(&ndp->channel_queue)) {
+		/* Clear any channel queue we may have interrupted */
+		list_for_each_entry_safe(nc, tmp, &ndp->channel_queue, link)
+			list_del_init(&nc->link);
+	}
+	spin_unlock_irqrestore(&ndp->lock, flags);
+
+	active = NULL;
+	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		NCSI_FOR_EACH_CHANNEL(np, nc) {
+			spin_lock_irqsave(&nc->lock, flags);
+
+			if (nc->state == NCSI_CHANNEL_ACTIVE) {
+				active = nc;
+				nc->state = NCSI_CHANNEL_INVISIBLE;
+				spin_unlock_irqrestore(&nc->lock, flags);
+				ncsi_stop_channel_monitor(nc);
+				break;
+			}
+
+			spin_unlock_irqrestore(&nc->lock, flags);
+		}
+		if (active)
+			break;
+	}
+
+	if (!active) {
+		/* Done */
+		spin_lock_irqsave(&ndp->lock, flags);
+		ndp->flags &= ~NCSI_DEV_RESET;
+		spin_unlock_irqrestore(&ndp->lock, flags);
+		return ncsi_choose_active_channel(ndp);
+	}
+
+	spin_lock_irqsave(&ndp->lock, flags);
+	ndp->flags |= NCSI_DEV_RESET;
+	ndp->active_channel = active;
+	ndp->active_package = active->package;
+	spin_unlock_irqrestore(&ndp->lock, flags);
+
+	nd->state = ncsi_dev_state_suspend;
+	schedule_work(&ndp->work);
+	return 0;
+}
+
 void ncsi_unregister_dev(struct ncsi_dev *nd)
 {
 	struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
@@ -1560,10 +1824,6 @@
 
 	spin_lock_irqsave(&ncsi_dev_lock, flags);
 	list_del_rcu(&ndp->node);
-#if IS_ENABLED(CONFIG_IPV6)
-	if (list_empty(&ncsi_dev_list))
-		unregister_inet6addr_notifier(&ncsi_inet6addr_notifier);
-#endif
 	spin_unlock_irqrestore(&ncsi_dev_lock, flags);
 
 	ncsi_unregister_netlink(nd->dev);

diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index 45f33d6..8b386d7 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright Samuel Mendoza-Jonas, IBM Corporation 2018.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -12,7 +8,6 @@
 #include <linux/if_arp.h>
 #include <linux/rtnetlink.h>
 #include <linux/etherdevice.h>
-#include <linux/module.h>
 #include <net/genetlink.h>
 #include <net/ncsi.h>
 #include <linux/skbuff.h>
@@ -20,6 +15,7 @@
 #include <uapi/linux/ncsi.h>
 
 #include "internal.h"
+#include "ncsi-pkt.h"
 #include "ncsi-netlink.h"
 
 static struct genl_family ncsi_genl_family;
@@ -29,6 +25,10 @@
 	[NCSI_ATTR_PACKAGE_LIST] =	{ .type = NLA_NESTED },
 	[NCSI_ATTR_PACKAGE_ID] =	{ .type = NLA_U32 },
 	[NCSI_ATTR_CHANNEL_ID] =	{ .type = NLA_U32 },
+	[NCSI_ATTR_DATA] =		{ .type = NLA_BINARY, .len = 2048 },
+	[NCSI_ATTR_MULTI_FLAG] =	{ .type = NLA_FLAG },
+	[NCSI_ATTR_PACKAGE_MASK] =	{ .type = NLA_U32 },
+	[NCSI_ATTR_CHANNEL_MASK] =	{ .type = NLA_U32 },
 };
 
 static struct ncsi_dev_priv *ndp_from_ifindex(struct net *net, u32 ifindex)
@@ -68,14 +68,14 @@
 	nla_put_u32(skb, NCSI_CHANNEL_ATTR_LINK_STATE, m->data[2]);
 	if (nc->state == NCSI_CHANNEL_ACTIVE)
 		nla_put_flag(skb, NCSI_CHANNEL_ATTR_ACTIVE);
-	if (ndp->force_channel == nc)
+	if (nc == nc->package->preferred_channel)
 		nla_put_flag(skb, NCSI_CHANNEL_ATTR_FORCED);
 
 	nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.version);
 	nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.alpha2);
 	nla_put_string(skb, NCSI_CHANNEL_ATTR_VERSION_STR, nc->version.fw_name);
 
-	vid_nest = nla_nest_start(skb, NCSI_CHANNEL_ATTR_VLAN_LIST);
+	vid_nest = nla_nest_start_noflag(skb, NCSI_CHANNEL_ATTR_VLAN_LIST);
 	if (!vid_nest)
 		return -ENOMEM;
 	ncf = &nc->vlan_filter;
@@ -109,19 +109,19 @@
 	NCSI_FOR_EACH_PACKAGE(ndp, np) {
 		if (np->id != id)
 			continue;
-		pnest = nla_nest_start(skb, NCSI_PKG_ATTR);
+		pnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR);
 		if (!pnest)
 			return -ENOMEM;
 		nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id);
-		if (ndp->force_package == np)
+		if ((0x1 << np->id) == ndp->package_whitelist)
 			nla_put_flag(skb, NCSI_PKG_ATTR_FORCED);
-		cnest = nla_nest_start(skb, NCSI_PKG_ATTR_CHANNEL_LIST);
+		cnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR_CHANNEL_LIST);
 		if (!cnest) {
 			nla_nest_cancel(skb, pnest);
 			return -ENOMEM;
 		}
 		NCSI_FOR_EACH_CHANNEL(np, nc) {
-			nest = nla_nest_start(skb, NCSI_CHANNEL_ATTR);
+			nest = nla_nest_start_noflag(skb, NCSI_CHANNEL_ATTR);
 			if (!nest) {
 				nla_nest_cancel(skb, cnest);
 				nla_nest_cancel(skb, pnest);
@@ -183,7 +183,7 @@
 
 	package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]);
 
-	attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST);
+	attr = nla_nest_start_noflag(skb, NCSI_ATTR_PACKAGE_LIST);
 	if (!attr) {
 		kfree_skb(skb);
 		return -EMSGSIZE;
@@ -216,8 +216,8 @@
 	void *hdr;
 	int rc;
 
-	rc = genlmsg_parse(cb->nlh, &ncsi_genl_family, attrs, NCSI_ATTR_MAX,
-			   ncsi_genl_policy, NULL);
+	rc = genlmsg_parse_deprecated(cb->nlh, &ncsi_genl_family, attrs, NCSI_ATTR_MAX,
+				      ncsi_genl_policy, NULL);
 	if (rc)
 		return rc;
 
@@ -246,7 +246,11 @@
 		goto err;
 	}
 
-	attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST);
+	attr = nla_nest_start_noflag(skb, NCSI_ATTR_PACKAGE_LIST);
+	if (!attr) {
+		rc = -EMSGSIZE;
+		goto err;
+	}
 	rc = ncsi_write_package_info(skb, ndp, package->id);
 	if (rc) {
 		nla_nest_cancel(skb, attr);
@@ -289,49 +293,58 @@
 	package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]);
 	package = NULL;
 
-	spin_lock_irqsave(&ndp->lock, flags);
-
 	NCSI_FOR_EACH_PACKAGE(ndp, np)
 		if (np->id == package_id)
 			package = np;
 	if (!package) {
 		/* The user has set a package that does not exist */
-		spin_unlock_irqrestore(&ndp->lock, flags);
 		return -ERANGE;
 	}
 
 	channel = NULL;
-	if (!info->attrs[NCSI_ATTR_CHANNEL_ID]) {
-		/* Allow any channel */
-		channel_id = NCSI_RESERVED_CHANNEL;
-	} else {
+	if (info->attrs[NCSI_ATTR_CHANNEL_ID]) {
 		channel_id = nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_ID]);
 		NCSI_FOR_EACH_CHANNEL(package, nc)
-			if (nc->id == channel_id)
+			if (nc->id == channel_id) {
 				channel = nc;
+				break;
+			}
+		if (!channel) {
+			netdev_info(ndp->ndev.dev,
+				    "NCSI: Channel %u does not exist!\n",
+				    channel_id);
+			return -ERANGE;
+		}
 	}
 
-	if (channel_id != NCSI_RESERVED_CHANNEL && !channel) {
-		/* The user has set a channel that does not exist on this
-		 * package
-		 */
-		spin_unlock_irqrestore(&ndp->lock, flags);
-		netdev_info(ndp->ndev.dev, "NCSI: Channel %u does not exist!\n",
-			    channel_id);
-		return -ERANGE;
-	}
-
-	ndp->force_package = package;
-	ndp->force_channel = channel;
+	spin_lock_irqsave(&ndp->lock, flags);
+	ndp->package_whitelist = 0x1 << package->id;
+	ndp->multi_package = false;
 	spin_unlock_irqrestore(&ndp->lock, flags);
 
-	netdev_info(ndp->ndev.dev, "Set package 0x%x, channel 0x%x%s as preferred\n",
-		    package_id, channel_id,
-		    channel_id == NCSI_RESERVED_CHANNEL ? " (any)" : "");
+	spin_lock_irqsave(&package->lock, flags);
+	package->multi_channel = false;
+	if (channel) {
+		package->channel_whitelist = 0x1 << channel->id;
+		package->preferred_channel = channel;
+	} else {
+		/* Allow any channel */
+		package->channel_whitelist = UINT_MAX;
+		package->preferred_channel = NULL;
+	}
+	spin_unlock_irqrestore(&package->lock, flags);
 
-	/* Bounce the NCSI channel to set changes */
-	ncsi_stop_dev(&ndp->ndev);
-	ncsi_start_dev(&ndp->ndev);
+	if (channel)
+		netdev_info(ndp->ndev.dev,
+			    "Set package 0x%x, channel 0x%x as preferred\n",
+			    package_id, channel_id);
+	else
+		netdev_info(ndp->ndev.dev, "Set package 0x%x as preferred\n",
+			    package_id);
+
+	/* Update channel configuration */
+	if (!(ndp->flags & NCSI_DEV_RESET))
+		ncsi_reset_dev(&ndp->ndev);
 
 	return 0;
 }
@@ -339,6 +352,7 @@
 static int ncsi_clear_interface_nl(struct sk_buff *msg, struct genl_info *info)
 {
 	struct ncsi_dev_priv *ndp;
+	struct ncsi_package *np;
 	unsigned long flags;
 
 	if (!info || !info->attrs)
@@ -352,16 +366,352 @@
 	if (!ndp)
 		return -ENODEV;
 
-	/* Clear any override */
+	/* Reset any whitelists and disable multi mode */
 	spin_lock_irqsave(&ndp->lock, flags);
-	ndp->force_package = NULL;
-	ndp->force_channel = NULL;
+	ndp->package_whitelist = UINT_MAX;
+	ndp->multi_package = false;
 	spin_unlock_irqrestore(&ndp->lock, flags);
+
+	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		spin_lock_irqsave(&np->lock, flags);
+		np->multi_channel = false;
+		np->channel_whitelist = UINT_MAX;
+		np->preferred_channel = NULL;
+		spin_unlock_irqrestore(&np->lock, flags);
+	}
 	netdev_info(ndp->ndev.dev, "NCSI: Cleared preferred package/channel\n");
 
-	/* Bounce the NCSI channel to set changes */
-	ncsi_stop_dev(&ndp->ndev);
-	ncsi_start_dev(&ndp->ndev);
+	/* Update channel configuration */
+	if (!(ndp->flags & NCSI_DEV_RESET))
+		ncsi_reset_dev(&ndp->ndev);
+
+	return 0;
+}
+
+static int ncsi_send_cmd_nl(struct sk_buff *msg, struct genl_info *info)
+{
+	struct ncsi_dev_priv *ndp;
+	struct ncsi_pkt_hdr *hdr;
+	struct ncsi_cmd_arg nca;
+	unsigned char *data;
+	u32 package_id;
+	u32 channel_id;
+	int len, ret;
+
+	if (!info || !info->attrs) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (!info->attrs[NCSI_ATTR_IFINDEX]) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (!info->attrs[NCSI_ATTR_PACKAGE_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (!info->attrs[NCSI_ATTR_CHANNEL_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (!info->attrs[NCSI_ATTR_DATA]) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)),
+			       nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+	if (!ndp) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]);
+	channel_id = nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_ID]);
+
+	if (package_id >= NCSI_MAX_PACKAGE || channel_id >= NCSI_MAX_CHANNEL) {
+		ret = -ERANGE;
+		goto out_netlink;
+	}
+
+	len = nla_len(info->attrs[NCSI_ATTR_DATA]);
+	if (len < sizeof(struct ncsi_pkt_hdr)) {
+		netdev_info(ndp->ndev.dev, "NCSI: no command to send %u\n",
+			    package_id);
+		ret = -EINVAL;
+		goto out_netlink;
+	} else {
+		data = (unsigned char *)nla_data(info->attrs[NCSI_ATTR_DATA]);
+	}
+
+	hdr = (struct ncsi_pkt_hdr *)data;
+
+	nca.ndp = ndp;
+	nca.package = (unsigned char)package_id;
+	nca.channel = (unsigned char)channel_id;
+	nca.type = hdr->type;
+	nca.req_flags = NCSI_REQ_FLAG_NETLINK_DRIVEN;
+	nca.info = info;
+	nca.payload = ntohs(hdr->length);
+	nca.data = data + sizeof(*hdr);
+
+	ret = ncsi_xmit_cmd(&nca);
+out_netlink:
+	if (ret != 0) {
+		netdev_err(ndp->ndev.dev,
+			   "NCSI: Error %d sending command\n",
+			   ret);
+		ncsi_send_netlink_err(ndp->ndev.dev,
+				      info->snd_seq,
+				      info->snd_portid,
+				      info->nlhdr,
+				      ret);
+	}
+out:
+	return ret;
+}
+
+int ncsi_send_netlink_rsp(struct ncsi_request *nr,
+			  struct ncsi_package *np,
+			  struct ncsi_channel *nc)
+{
+	struct sk_buff *skb;
+	struct net *net;
+	void *hdr;
+	int rc;
+
+	net = dev_net(nr->rsp->dev);
+
+	skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(skb, nr->snd_portid, nr->snd_seq,
+			  &ncsi_genl_family, 0, NCSI_CMD_SEND_CMD);
+	if (!hdr) {
+		kfree_skb(skb);
+		return -EMSGSIZE;
+	}
+
+	nla_put_u32(skb, NCSI_ATTR_IFINDEX, nr->rsp->dev->ifindex);
+	if (np)
+		nla_put_u32(skb, NCSI_ATTR_PACKAGE_ID, np->id);
+	if (nc)
+		nla_put_u32(skb, NCSI_ATTR_CHANNEL_ID, nc->id);
+	else
+		nla_put_u32(skb, NCSI_ATTR_CHANNEL_ID, NCSI_RESERVED_CHANNEL);
+
+	rc = nla_put(skb, NCSI_ATTR_DATA, nr->rsp->len, (void *)nr->rsp->data);
+	if (rc)
+		goto err;
+
+	genlmsg_end(skb, hdr);
+	return genlmsg_unicast(net, skb, nr->snd_portid);
+
+err:
+	kfree_skb(skb);
+	return rc;
+}
+
+int ncsi_send_netlink_timeout(struct ncsi_request *nr,
+			      struct ncsi_package *np,
+			      struct ncsi_channel *nc)
+{
+	struct sk_buff *skb;
+	struct net *net;
+	void *hdr;
+
+	skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(skb, nr->snd_portid, nr->snd_seq,
+			  &ncsi_genl_family, 0, NCSI_CMD_SEND_CMD);
+	if (!hdr) {
+		kfree_skb(skb);
+		return -EMSGSIZE;
+	}
+
+	net = dev_net(nr->cmd->dev);
+
+	nla_put_u32(skb, NCSI_ATTR_IFINDEX, nr->cmd->dev->ifindex);
+
+	if (np)
+		nla_put_u32(skb, NCSI_ATTR_PACKAGE_ID, np->id);
+	else
+		nla_put_u32(skb, NCSI_ATTR_PACKAGE_ID,
+			    NCSI_PACKAGE_INDEX((((struct ncsi_pkt_hdr *)
+						 nr->cmd->data)->channel)));
+
+	if (nc)
+		nla_put_u32(skb, NCSI_ATTR_CHANNEL_ID, nc->id);
+	else
+		nla_put_u32(skb, NCSI_ATTR_CHANNEL_ID, NCSI_RESERVED_CHANNEL);
+
+	genlmsg_end(skb, hdr);
+	return genlmsg_unicast(net, skb, nr->snd_portid);
+}
+
+int ncsi_send_netlink_err(struct net_device *dev,
+			  u32 snd_seq,
+			  u32 snd_portid,
+			  struct nlmsghdr *nlhdr,
+			  int err)
+{
+	struct nlmsghdr *nlh;
+	struct nlmsgerr *nle;
+	struct sk_buff *skb;
+	struct net *net;
+
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	net = dev_net(dev);
+
+	nlh = nlmsg_put(skb, snd_portid, snd_seq,
+			NLMSG_ERROR, sizeof(*nle), 0);
+	nle = (struct nlmsgerr *)nlmsg_data(nlh);
+	nle->error = err;
+	memcpy(&nle->msg, nlhdr, sizeof(*nlh));
+
+	nlmsg_end(skb, nlh);
+
+	return nlmsg_unicast(net->genl_sock, skb, snd_portid);
+}
+
+static int ncsi_set_package_mask_nl(struct sk_buff *msg,
+				    struct genl_info *info)
+{
+	struct ncsi_dev_priv *ndp;
+	unsigned long flags;
+	int rc;
+
+	if (!info || !info->attrs)
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_IFINDEX])
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_PACKAGE_MASK])
+		return -EINVAL;
+
+	ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)),
+			       nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+	if (!ndp)
+		return -ENODEV;
+
+	spin_lock_irqsave(&ndp->lock, flags);
+	if (nla_get_flag(info->attrs[NCSI_ATTR_MULTI_FLAG])) {
+		if (ndp->flags & NCSI_DEV_HWA) {
+			ndp->multi_package = true;
+			rc = 0;
+		} else {
+			netdev_err(ndp->ndev.dev,
+				   "NCSI: Can't use multiple packages without HWA\n");
+			rc = -EPERM;
+		}
+	} else {
+		ndp->multi_package = false;
+		rc = 0;
+	}
+
+	if (!rc)
+		ndp->package_whitelist =
+			nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_MASK]);
+	spin_unlock_irqrestore(&ndp->lock, flags);
+
+	if (!rc) {
+		/* Update channel configuration */
+		if (!(ndp->flags & NCSI_DEV_RESET))
+			ncsi_reset_dev(&ndp->ndev);
+	}
+
+	return rc;
+}
+
+static int ncsi_set_channel_mask_nl(struct sk_buff *msg,
+				    struct genl_info *info)
+{
+	struct ncsi_package *np, *package;
+	struct ncsi_channel *nc, *channel;
+	u32 package_id, channel_id;
+	struct ncsi_dev_priv *ndp;
+	unsigned long flags;
+
+	if (!info || !info->attrs)
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_IFINDEX])
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_PACKAGE_ID])
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_CHANNEL_MASK])
+		return -EINVAL;
+
+	ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)),
+			       nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+	if (!ndp)
+		return -ENODEV;
+
+	package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]);
+	package = NULL;
+	NCSI_FOR_EACH_PACKAGE(ndp, np)
+		if (np->id == package_id) {
+			package = np;
+			break;
+		}
+	if (!package)
+		return -ERANGE;
+
+	spin_lock_irqsave(&package->lock, flags);
+
+	channel = NULL;
+	if (info->attrs[NCSI_ATTR_CHANNEL_ID]) {
+		channel_id = nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_ID]);
+		NCSI_FOR_EACH_CHANNEL(np, nc)
+			if (nc->id == channel_id) {
+				channel = nc;
+				break;
+			}
+		if (!channel) {
+			spin_unlock_irqrestore(&package->lock, flags);
+			return -ERANGE;
+		}
+		netdev_dbg(ndp->ndev.dev,
+			   "NCSI: Channel %u set as preferred channel\n",
+			   channel->id);
+	}
+
+	package->channel_whitelist =
+		nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_MASK]);
+	if (package->channel_whitelist == 0)
+		netdev_dbg(ndp->ndev.dev,
+			   "NCSI: Package %u set to all channels disabled\n",
+			   package->id);
+
+	package->preferred_channel = channel;
+
+	if (nla_get_flag(info->attrs[NCSI_ATTR_MULTI_FLAG])) {
+		package->multi_channel = true;
+		netdev_info(ndp->ndev.dev,
+			    "NCSI: Multi-channel enabled on package %u\n",
+			    package_id);
+	} else {
+		package->multi_channel = false;
+	}
+
+	spin_unlock_irqrestore(&package->lock, flags);
+
+	/* Update channel configuration */
+	if (!(ndp->flags & NCSI_DEV_RESET))
+		ncsi_reset_dev(&ndp->ndev);
 
 	return 0;
 }
@@ -369,29 +719,48 @@
 static const struct genl_ops ncsi_ops[] = {
 	{
 		.cmd = NCSI_CMD_PKG_INFO,
-		.policy = ncsi_genl_policy,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = ncsi_pkg_info_nl,
 		.dumpit = ncsi_pkg_info_all_nl,
 		.flags = 0,
 	},
 	{
 		.cmd = NCSI_CMD_SET_INTERFACE,
-		.policy = ncsi_genl_policy,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = ncsi_set_interface_nl,
 		.flags = GENL_ADMIN_PERM,
 	},
 	{
 		.cmd = NCSI_CMD_CLEAR_INTERFACE,
-		.policy = ncsi_genl_policy,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = ncsi_clear_interface_nl,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NCSI_CMD_SEND_CMD,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = ncsi_send_cmd_nl,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NCSI_CMD_SET_PACKAGE_MASK,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = ncsi_set_package_mask_nl,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NCSI_CMD_SET_CHANNEL_MASK,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = ncsi_set_channel_mask_nl,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 
 static struct genl_family ncsi_genl_family __ro_after_init = {
 	.name = "NCSI",
 	.version = 0,
 	.maxattr = NCSI_ATTR_MAX,
+	.policy = ncsi_genl_policy,
 	.module = THIS_MODULE,
 	.ops = ncsi_ops,
 	.n_ops = ARRAY_SIZE(ncsi_ops),

diff --git a/net/ncsi/ncsi-netlink.h b/net/ncsi/ncsi-netlink.h
index 91a5c25..7502723 100644
--- a/net/ncsi/ncsi-netlink.h
+++ b/net/ncsi/ncsi-netlink.h

@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright Samuel Mendoza-Jonas, IBM Corporation 2018.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __NCSI_NETLINK_H__
@@ -14,6 +10,18 @@
 
 #include "internal.h"
 
+int ncsi_send_netlink_rsp(struct ncsi_request *nr,
+			  struct ncsi_package *np,
+			  struct ncsi_channel *nc);
+int ncsi_send_netlink_timeout(struct ncsi_request *nr,
+			      struct ncsi_package *np,
+			      struct ncsi_channel *nc);
+int ncsi_send_netlink_err(struct net_device *dev,
+			  u32 snd_seq,
+			  u32 snd_portid,
+			  struct nlmsghdr *nlhdr,
+			  int err);
+
 int ncsi_init_netlink(struct net_device *dev);
 int ncsi_unregister_netlink(struct net_device *dev);
 

diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
index 91b4b66..80938b3 100644
--- a/net/ncsi/ncsi-pkt.h
+++ b/net/ncsi/ncsi-pkt.h

@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright Gavin Shan, IBM Corporation 2016.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __NCSI_PKT_H__
@@ -151,6 +147,37 @@
 	unsigned char           pad[22];
 };
 
+/* OEM Request Command as per NCSI Specification */
+struct ncsi_cmd_oem_pkt {
+	struct ncsi_cmd_pkt_hdr cmd;         /* Command header    */
+	__be32                  mfr_id;      /* Manufacture ID    */
+	unsigned char           data[];      /* OEM Payload Data  */
+};
+
+/* OEM Response Packet as per NCSI Specification */
+struct ncsi_rsp_oem_pkt {
+	struct ncsi_rsp_pkt_hdr rsp;         /* Command header    */
+	__be32                  mfr_id;      /* Manufacture ID    */
+	unsigned char           data[];      /* Payload data      */
+};
+
+/* Mellanox Response Data */
+struct ncsi_rsp_oem_mlx_pkt {
+	unsigned char           cmd_rev;     /* Command Revision  */
+	unsigned char           cmd;         /* Command ID        */
+	unsigned char           param;       /* Parameter         */
+	unsigned char           optional;    /* Optional data     */
+	unsigned char           data[];      /* Data              */
+};
+
+/* Broadcom Response Data */
+struct ncsi_rsp_oem_bcm_pkt {
+	unsigned char           ver;         /* Payload Version   */
+	unsigned char           type;        /* OEM Command type  */
+	__be16                  len;         /* Payload Length    */
+	unsigned char           data[];      /* Cmd specific Data */
+};
+
 /* Get Link Status */
 struct ncsi_rsp_gls_pkt {
 	struct ncsi_rsp_pkt_hdr rsp;        /* Response header   */
@@ -360,6 +387,9 @@
 #define NCSI_PKT_CMD_OEM	0x50 /* OEM                              */
 #define NCSI_PKT_CMD_PLDM	0x51 /* PLDM request over NCSI over RBT  */
 #define NCSI_PKT_CMD_GPUUID	0x52 /* Get package UUID                 */
+#define NCSI_PKT_CMD_QPNPR	0x56 /* Query Pending NC PLDM request */
+#define NCSI_PKT_CMD_SNPR	0x57 /* Send NC PLDM Reply  */
+
 
 /* NCSI packet responses */
 #define NCSI_PKT_RSP_CIS	(NCSI_PKT_CMD_CIS    + 0x80)
@@ -392,6 +422,8 @@
 #define NCSI_PKT_RSP_OEM	(NCSI_PKT_CMD_OEM    + 0x80)
 #define NCSI_PKT_RSP_PLDM	(NCSI_PKT_CMD_PLDM   + 0x80)
 #define NCSI_PKT_RSP_GPUUID	(NCSI_PKT_CMD_GPUUID + 0x80)
+#define NCSI_PKT_RSP_QPNPR	(NCSI_PKT_CMD_QPNPR   + 0x80)
+#define NCSI_PKT_RSP_SNPR	(NCSI_PKT_CMD_SNPR   + 0x80)
 
 /* NCSI response code/reason */
 #define NCSI_PKT_RSP_C_COMPLETED	0x0000 /* Command Completed        */

diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 930c1d3..d5611f0 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c

@@ -1,24 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright Gavin Shan, IBM Corporation 2016.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/netdevice.h>
+#include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 
 #include <net/ncsi.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
+#include <net/genetlink.h>
 
 #include "internal.h"
 #include "ncsi-pkt.h"
+#include "ncsi-netlink.h"
 
 static int ncsi_validate_rsp_pkt(struct ncsi_request *nr,
 				 unsigned short payload)
@@ -32,28 +31,44 @@
 	 * before calling this function.
 	 */
 	h = (struct ncsi_rsp_pkt_hdr *)skb_network_header(nr->rsp);
-	if (h->common.revision != NCSI_PKT_REVISION)
+
+	if (h->common.revision != NCSI_PKT_REVISION) {
+		netdev_dbg(nr->ndp->ndev.dev,
+			   "NCSI: unsupported header revision\n");
 		return -EINVAL;
-	if (ntohs(h->common.length) != payload)
+	}
+	if (ntohs(h->common.length) != payload) {
+		netdev_dbg(nr->ndp->ndev.dev,
+			   "NCSI: payload length mismatched\n");
 		return -EINVAL;
+	}
 
 	/* Check on code and reason */
 	if (ntohs(h->code) != NCSI_PKT_RSP_C_COMPLETED ||
-	    ntohs(h->reason) != NCSI_PKT_RSP_R_NO_ERROR)
-		return -EINVAL;
+	    ntohs(h->reason) != NCSI_PKT_RSP_R_NO_ERROR) {
+		netdev_dbg(nr->ndp->ndev.dev,
+			   "NCSI: non zero response/reason code %04xh, %04xh\n",
+			    ntohs(h->code), ntohs(h->reason));
+		return -EPERM;
+	}
 
 	/* Validate checksum, which might be zeroes if the
 	 * sender doesn't support checksum according to NCSI
 	 * specification.
 	 */
-	pchecksum = (__be32 *)((void *)(h + 1) + payload - 4);
+	pchecksum = (__be32 *)((void *)(h + 1) + ALIGN(payload, 4) - 4);
 	if (ntohl(*pchecksum) == 0)
 		return 0;
 
 	checksum = ncsi_calculate_checksum((unsigned char *)h,
 					   sizeof(*h) + payload - 4);
-	if (*pchecksum != htonl(checksum))
+
+	if (*pchecksum != htonl(checksum)) {
+		netdev_dbg(nr->ndp->ndev.dev,
+			   "NCSI: checksum mismatched; recd: %08x calc: %08x\n",
+			   *pchecksum, htonl(checksum));
 		return -EINVAL;
+	}
 
 	return 0;
 }
@@ -241,7 +256,7 @@
 	if (!ncm->enable)
 		return 0;
 
-	ncm->enable = 1;
+	ncm->enable = 0;
 	return 0;
 }
 
@@ -596,6 +611,129 @@
 	return 0;
 }
 
+/* Response handler for Mellanox command Get Mac Address */
+static int ncsi_rsp_handler_oem_mlx_gma(struct ncsi_request *nr)
+{
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct net_device *ndev = ndp->ndev.dev;
+	const struct net_device_ops *ops = ndev->netdev_ops;
+	struct ncsi_rsp_oem_pkt *rsp;
+	struct sockaddr saddr;
+	int ret = 0;
+
+	/* Get the response header */
+	rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
+
+	saddr.sa_family = ndev->type;
+	ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	memcpy(saddr.sa_data, &rsp->data[MLX_MAC_ADDR_OFFSET], ETH_ALEN);
+	ret = ops->ndo_set_mac_address(ndev, &saddr);
+	if (ret < 0)
+		netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n");
+
+	return ret;
+}
+
+/* Response handler for Mellanox card */
+static int ncsi_rsp_handler_oem_mlx(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_oem_mlx_pkt *mlx;
+	struct ncsi_rsp_oem_pkt *rsp;
+
+	/* Get the response header */
+	rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
+	mlx = (struct ncsi_rsp_oem_mlx_pkt *)(rsp->data);
+
+	if (mlx->cmd == NCSI_OEM_MLX_CMD_GMA &&
+	    mlx->param == NCSI_OEM_MLX_CMD_GMA_PARAM)
+		return ncsi_rsp_handler_oem_mlx_gma(nr);
+	return 0;
+}
+
+/* Response handler for Broadcom command Get Mac Address */
+static int ncsi_rsp_handler_oem_bcm_gma(struct ncsi_request *nr)
+{
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct net_device *ndev = ndp->ndev.dev;
+	const struct net_device_ops *ops = ndev->netdev_ops;
+	struct ncsi_rsp_oem_pkt *rsp;
+	struct sockaddr saddr;
+	int ret = 0;
+
+	/* Get the response header */
+	rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
+
+	saddr.sa_family = ndev->type;
+	ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	memcpy(saddr.sa_data, &rsp->data[BCM_MAC_ADDR_OFFSET], ETH_ALEN);
+	/* Increase mac address by 1 for BMC's address */
+	eth_addr_inc((u8 *)saddr.sa_data);
+	if (!is_valid_ether_addr((const u8 *)saddr.sa_data))
+		return -ENXIO;
+
+	ret = ops->ndo_set_mac_address(ndev, &saddr);
+	if (ret < 0)
+		netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n");
+
+	return ret;
+}
+
+/* Response handler for Broadcom card */
+static int ncsi_rsp_handler_oem_bcm(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_oem_bcm_pkt *bcm;
+	struct ncsi_rsp_oem_pkt *rsp;
+
+	/* Get the response header */
+	rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
+	bcm = (struct ncsi_rsp_oem_bcm_pkt *)(rsp->data);
+
+	if (bcm->type == NCSI_OEM_BCM_CMD_GMA)
+		return ncsi_rsp_handler_oem_bcm_gma(nr);
+	return 0;
+}
+
+static struct ncsi_rsp_oem_handler {
+	unsigned int	mfr_id;
+	int		(*handler)(struct ncsi_request *nr);
+} ncsi_rsp_oem_handlers[] = {
+	{ NCSI_OEM_MFR_MLX_ID, ncsi_rsp_handler_oem_mlx },
+	{ NCSI_OEM_MFR_BCM_ID, ncsi_rsp_handler_oem_bcm }
+};
+
+/* Response handler for OEM command */
+static int ncsi_rsp_handler_oem(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_oem_handler *nrh = NULL;
+	struct ncsi_rsp_oem_pkt *rsp;
+	unsigned int mfr_id, i;
+
+	/* Get the response header */
+	rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
+	mfr_id = ntohl(rsp->mfr_id);
+
+	/* Check for manufacturer id and Find the handler */
+	for (i = 0; i < ARRAY_SIZE(ncsi_rsp_oem_handlers); i++) {
+		if (ncsi_rsp_oem_handlers[i].mfr_id == mfr_id) {
+			if (ncsi_rsp_oem_handlers[i].handler)
+				nrh = &ncsi_rsp_oem_handlers[i];
+			else
+				nrh = NULL;
+
+			break;
+		}
+	}
+
+	if (!nrh) {
+		netdev_err(nr->ndp->ndev.dev, "Received unrecognized OEM packet with MFR-ID (0x%x)\n",
+			   mfr_id);
+		return -ENOENT;
+	}
+
+	/* Process the packet */
+	return nrh->handler(nr);
+}
+
 static int ncsi_rsp_handler_gvi(struct ncsi_request *nr)
 {
 	struct ncsi_rsp_gvi_pkt *rsp;
@@ -900,6 +1038,31 @@
 	return 0;
 }
 
+static int ncsi_rsp_handler_pldm(struct ncsi_request *nr)
+{
+	return 0;
+}
+
+static int ncsi_rsp_handler_netlink(struct ncsi_request *nr)
+{
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct ncsi_rsp_pkt *rsp;
+	struct ncsi_package *np;
+	struct ncsi_channel *nc;
+	int ret;
+
+	/* Find the package */
+	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+				      &np, &nc);
+	if (!np)
+		return -ENODEV;
+
+	ret = ncsi_send_netlink_rsp(nr, np, nc);
+
+	return ret;
+}
+
 static struct ncsi_rsp_handler {
 	unsigned char	type;
 	int             payload;
@@ -928,13 +1091,15 @@
 	{ NCSI_PKT_RSP_GVI,    40, ncsi_rsp_handler_gvi     },
 	{ NCSI_PKT_RSP_GC,     32, ncsi_rsp_handler_gc      },
 	{ NCSI_PKT_RSP_GP,     -1, ncsi_rsp_handler_gp      },
-	{ NCSI_PKT_RSP_GCPS,  172, ncsi_rsp_handler_gcps    },
-	{ NCSI_PKT_RSP_GNS,   172, ncsi_rsp_handler_gns     },
-	{ NCSI_PKT_RSP_GNPTS, 172, ncsi_rsp_handler_gnpts   },
+	{ NCSI_PKT_RSP_GCPS,  204, ncsi_rsp_handler_gcps    },
+	{ NCSI_PKT_RSP_GNS,    32, ncsi_rsp_handler_gns     },
+	{ NCSI_PKT_RSP_GNPTS,  48, ncsi_rsp_handler_gnpts   },
 	{ NCSI_PKT_RSP_GPS,     8, ncsi_rsp_handler_gps     },
-	{ NCSI_PKT_RSP_OEM,     0, NULL                     },
-	{ NCSI_PKT_RSP_PLDM,    0, NULL                     },
-	{ NCSI_PKT_RSP_GPUUID, 20, ncsi_rsp_handler_gpuuid  }
+	{ NCSI_PKT_RSP_OEM,    -1, ncsi_rsp_handler_oem     },
+	{ NCSI_PKT_RSP_PLDM,   -1, ncsi_rsp_handler_pldm    },
+	{ NCSI_PKT_RSP_GPUUID, 20, ncsi_rsp_handler_gpuuid  },
+	{ NCSI_PKT_RSP_QPNPR,  -1, ncsi_rsp_handler_pldm    },
+	{ NCSI_PKT_RSP_SNPR,   -1, ncsi_rsp_handler_pldm    }
 };
 
 int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
@@ -1002,6 +1167,17 @@
 		netdev_warn(ndp->ndev.dev,
 			    "NCSI: 'bad' packet ignored for type 0x%x\n",
 			    hdr->type);
+
+		if (nr->flags == NCSI_REQ_FLAG_NETLINK_DRIVEN) {
+			if (ret == -EPERM)
+				goto out_netlink;
+			else
+				ncsi_send_netlink_err(ndp->ndev.dev,
+						      nr->snd_seq,
+						      nr->snd_portid,
+						      &nr->nlhdr,
+						      ret);
+		}
 		goto out;
 	}
 
@@ -1011,6 +1187,17 @@
 		netdev_err(ndp->ndev.dev,
 			   "NCSI: Handler for packet type 0x%x returned %d\n",
 			   hdr->type, ret);
+
+out_netlink:
+	if (nr->flags == NCSI_REQ_FLAG_NETLINK_DRIVEN) {
+		ret = ncsi_rsp_handler_netlink(nr);
+		if (ret) {
+			netdev_err(ndp->ndev.dev,
+				   "NCSI: Netlink handler for packet type 0x%x returned %d\n",
+				   hdr->type, ret);
+		}
+	}
+
 out:
 	ncsi_free_request(nr);
 	return ret;

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index f61c306..91efae8 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 menu "Core Netfilter Configuration"
 	depends on NET && INET && NETFILTER
 
@@ -19,7 +20,7 @@
 	bool
 
 config NETFILTER_NETLINK_ACCT
-tristate "Netfilter NFACCT over NFNETLINK interface"
+	tristate "Netfilter NFACCT over NFNETLINK interface"
 	depends on NETFILTER_ADVANCED
 	select NETFILTER_NETLINK
 	help
@@ -33,7 +34,7 @@
 	help
 	  If this option is enabled, the kernel will include support
 	  for queueing packets via NFNETLINK.
-	  
+
 config NETFILTER_NETLINK_LOG
 	tristate "Netfilter LOG over NFNETLINK interface"
 	default m if NETFILTER_ADVANCED=n
@@ -174,7 +175,7 @@
 	  If unsure, say Y.
 
 config NF_CT_PROTO_GRE
-	tristate
+	bool
 
 config NF_CT_PROTO_SCTP
 	bool 'SCTP protocol connection tracking support'
@@ -222,8 +223,6 @@
 	  of Network Address Translation on them.
 
 	  This is FTP support on Layer 3 independent connection tracking.
-	  Layer 3 independent connection tracking is experimental scheme
-	  which generalize ip_conntrack to support other layer 3 protocols.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
@@ -337,7 +336,7 @@
 	help
 	  SIP is an application-layer control protocol that can establish,
 	  modify, and terminate multimedia sessions (conferences) such as
-	  Internet telephony calls. With the ip_conntrack_sip and
+	  Internet telephony calls. With the nf_conntrack_sip and
 	  the nf_nat_sip modules you can support the protocol on a connection
 	  tracking/NATing firewall.
 
@@ -396,27 +395,13 @@
 	  the enqueued via NFNETLINK.
 
 config NF_NAT
-	tristate
-
-config NF_NAT_NEEDED
-	bool
-	depends on NF_NAT
-	default y
-
-config NF_NAT_PROTO_DCCP
-	bool
-	depends on NF_NAT && NF_CT_PROTO_DCCP
-	default NF_NAT && NF_CT_PROTO_DCCP
-
-config NF_NAT_PROTO_UDPLITE
-	bool
-	depends on NF_NAT && NF_CT_PROTO_UDPLITE
-	default NF_NAT && NF_CT_PROTO_UDPLITE
-
-config NF_NAT_PROTO_SCTP
-	bool
-	default NF_NAT && NF_CT_PROTO_SCTP
-	depends on NF_NAT && NF_CT_PROTO_SCTP
+	tristate "Network Address Translation support"
+	depends on NF_CONNTRACK
+	default m if NETFILTER_ADVANCED=n
+	help
+	  The NAT option allows masquerading, port forwarding and other
+	  forms of full Network Address Port Translation. This can be
+	  controlled by iptables, ip6tables or nft.
 
 config NF_NAT_AMANDA
 	tristate
@@ -446,6 +431,9 @@
 config NF_NAT_REDIRECT
 	bool
 
+config NF_NAT_MASQUERADE
+	bool
+
 config NETFILTER_SYNPROXY
 	tristate
 
@@ -538,6 +526,7 @@
 config NFT_MASQ
 	depends on NF_CONNTRACK
 	depends on NF_NAT
+	select NF_NAT_MASQUERADE
 	tristate "Netfilter nf_tables masquerade support"
 	help
 	  This option adds the "masquerade" expression that you can use
@@ -547,6 +536,7 @@
 	depends on NF_CONNTRACK
 	depends on NF_NAT
 	tristate "Netfilter nf_tables redirect support"
+	select NF_NAT_REDIRECT
 	help
 	  This options adds the "redirect" expression that you can use
 	  to perform NAT in the redirect flavour.
@@ -554,6 +544,7 @@
 config NFT_NAT
 	depends on NF_CONNTRACK
 	select NF_NAT
+	depends on NF_TABLES_IPV4 || NF_TABLES_IPV6
 	tristate "Netfilter nf_tables nat module"
 	help
 	  This option adds the "nat" expression that you can use to perform
@@ -625,6 +616,13 @@
 	  The lookup will be delegated to the IPv4 or IPv6 FIB depending
 	  on the protocol of the packet.
 
+config NFT_XFRM
+	tristate "Netfilter nf_tables xfrm/IPSec security association matching"
+	depends on XFRM
+	help
+	  This option adds an expression that you can use to extract properties
+	  of a packets security association.
+
 config NFT_SOCKET
 	tristate "Netfilter nf_tables socket match support"
 	depends on IPV6 || IPV6=n
@@ -651,6 +649,17 @@
 	help
 	  This makes transparent proxy support available in nftables.
 
+config NFT_SYNPROXY
+	tristate "Netfilter nf_tables SYNPROXY expression support"
+	depends on NF_CONNTRACK && NETFILTER_ADVANCED
+	select NETFILTER_SYNPROXY
+	select SYN_COOKIES
+	help
+	  The SYNPROXY expression allows you to intercept TCP connections and
+	  establish them using syncookies before they are passed on to the
+	  server. This allows to avoid conntrack and server resource usage
+	  during SYN-flood attacks.
+
 if NF_TABLES_NETDEV
 
 config NF_DUP_NETDEV
@@ -688,7 +697,7 @@
 	tristate "Netfilter flow table mixed IPv4/IPv6 module"
 	depends on NF_FLOW_TABLE
 	help
-          This option adds the flow table mixed IPv4/IPv6 support.
+	  This option adds the flow table mixed IPv4/IPv6 support.
 
 	  To compile it as a module, choose M here.
 
@@ -906,7 +915,7 @@
 	    echo netfilter-ssh > /sys/class/leds/<ledname>/trigger
 
 	  For more information on the LEDs available on your system, see
-	  Documentation/leds/leds-class.txt
+	  Documentation/leds/leds-class.rst
 
 config NETFILTER_XT_TARGET_LOG
 	tristate "LOG target support"
@@ -998,11 +1007,26 @@
 
 	To compile it as a module, choose M here. If unsure, say N.
 
+config NETFILTER_XT_TARGET_MASQUERADE
+	tristate "MASQUERADE target support"
+	depends on NF_NAT
+	default m if NETFILTER_ADVANCED=n
+	select NF_NAT_MASQUERADE
+	help
+	  Masquerading is a special case of NAT: all outgoing connections are
+	  changed to seem to come from a particular interface's address, and
+	  if the interface goes down, those connections are lost.  This is
+	  only useful for dialup accounts with dynamic IP address (ie. your IP
+	  address will be different on next dialup).
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_TARGET_TEE
 	tristate '"TEE" - packet cloning to alternate destination'
 	depends on NETFILTER_ADVANCED
 	depends on IPV6 || IPV6=n
 	depends on !NF_CONNTRACK || NF_CONNTRACK
+	depends on IP6_NF_IPTABLES || !IP6_NF_IPTABLES
 	select NF_DUP_IPV4
 	select NF_DUP_IPV6 if IP6_NF_IPTABLES
 	---help---
@@ -1041,7 +1065,7 @@
 	  the tables, chains, rules.
 
 	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
+	  <file:Documentation/kbuild/modules.rst>.  If unsure, say `N'.
 
 config NETFILTER_XT_TARGET_SECMARK
 	tristate '"SECMARK" target support'
@@ -1100,7 +1124,7 @@
 	  eg. UNICAST, LOCAL, BROADCAST, ...
 
 	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
+	  <file:Documentation/kbuild/modules.rst>.  If unsure, say `N'.
 
 config NETFILTER_XT_MATCH_BPF
 	tristate '"bpf" match support'
@@ -1145,7 +1169,7 @@
 	  comments in your iptables ruleset.
 
 	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
+	  <file:Documentation/kbuild/modules.rst>.  If unsure, say `N'.
 
 config NETFILTER_XT_MATCH_CONNBYTES
 	tristate  '"connbytes" per-connection counter match support'
@@ -1156,7 +1180,7 @@
 	  number of bytes and/or packets for each direction within a connection.
 
 	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
+	  <file:Documentation/kbuild/modules.rst>.  If unsure, say `N'.
 
 config NETFILTER_XT_MATCH_CONNLABEL
 	tristate '"connlabel" match support'
@@ -1222,7 +1246,7 @@
 	  and DCCP flags.
 
 	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
+	  <file:Documentation/kbuild/modules.rst>.  If unsure, say `N'.
 
 config NETFILTER_XT_MATCH_DEVGROUP
 	tristate '"devgroup" match support'
@@ -1287,7 +1311,7 @@
 	depends on NETFILTER_ADVANCED
 	help
 	  Helper matching allows you to match packets in dynamic connections
-	  tracked by a conntrack-helper, ie. ip_conntrack_ftp
+	  tracked by a conntrack-helper, ie. nf_conntrack_ftp
 
 	  To compile it as a module, choose M here.  If unsure, say Y.
 
@@ -1458,7 +1482,7 @@
 	  byte counter.
 
 	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
+	  <file:Documentation/kbuild/modules.rst>.  If unsure, say `N'.
 
 config NETFILTER_XT_MATCH_RATEEST
 	tristate '"rateest" match support'
@@ -1478,11 +1502,11 @@
 	  This option adds a `realm' match, which allows you to use the realm
 	  key from the routing subsystem inside iptables.
 
-	  This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option 
+	  This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option
 	  in tc world.
 
 	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
+	  <file:Documentation/kbuild/modules.rst>.  If unsure, say `N'.
 
 config NETFILTER_XT_MATCH_RECENT
 	tristate '"recent" match support'
@@ -1499,12 +1523,12 @@
 	depends on NETFILTER_ADVANCED
 	default IP_SCTP
 	help
-	  With this option enabled, you will be able to use the 
+	  With this option enabled, you will be able to use the
 	  `sctp' match in order to match on SCTP source/destination ports
 	  and SCTP chunk types.
 
 	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
+	  <file:Documentation/kbuild/modules.rst>.  If unsure, say `N'.
 
 config NETFILTER_XT_MATCH_SOCKET
 	tristate '"socket" match support'

diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 16895e0..4fc075b 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile

@@ -13,6 +13,7 @@
 nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
 nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
 nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
+nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
 
 obj-$(CONFIG_NETFILTER) = netfilter.o
 
@@ -25,8 +26,6 @@
 # connection tracking
 obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
 
-obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
-
 # netlink interface for nf_conntrack
 obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o
 obj-$(CONFIG_NF_CT_NETLINK_TIMEOUT) += nfnetlink_cttimeout.o
@@ -47,12 +46,7 @@
 obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
 obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
 
-nf_nat-y	:= nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \
-		   nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o
-
-# NAT protocols (nf_nat)
-nf_nat-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
-nf_nat-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
+nf_nat-y	:= nf_nat_core.o nf_nat_proto.o nf_nat_helper.o
 
 # generic transport layer logging
 obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o
@@ -62,6 +56,7 @@
 
 obj-$(CONFIG_NF_NAT) += nf_nat.o
 nf_nat-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
+nf_nat-$(CONFIG_NF_NAT_MASQUERADE) += nf_nat_masquerade.o
 
 # NAT helpers
 obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
@@ -82,7 +77,8 @@
 nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
 		  nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \
 		  nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \
-		  nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o
+		  nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o \
+		  nft_chain_route.o nf_tables_offload.o
 
 nf_tables_set-objs := nf_tables_set_core.o \
 		      nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o
@@ -113,6 +109,10 @@
 obj-$(CONFIG_NFT_SOCKET)	+= nft_socket.o
 obj-$(CONFIG_NFT_OSF)		+= nft_osf.o
 obj-$(CONFIG_NFT_TPROXY)	+= nft_tproxy.o
+obj-$(CONFIG_NFT_XFRM)		+= nft_xfrm.o
+obj-$(CONFIG_NFT_SYNPROXY)	+= nft_synproxy.o
+
+obj-$(CONFIG_NFT_NAT)		+= nft_chain_nat.o
 
 # nf_tables netdev
 obj-$(CONFIG_NFT_DUP_NETDEV)	+= nft_dup_netdev.o
@@ -124,7 +124,7 @@
 
 obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
 
-# generic X tables 
+# generic X tables
 obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
 
 # combos
@@ -149,6 +149,7 @@
 obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_REDIRECT) += xt_REDIRECT.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_MASQUERADE) += xt_MASQUERADE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o

diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index dc240cb..5d5bdf4 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c

@@ -23,6 +23,7 @@
 #include <linux/mm.h>
 #include <linux/rcupdate.h>
 #include <net/net_namespace.h>
+#include <net/netfilter/nf_queue.h>
 #include <net/sock.h>
 
 #include "nf_internals.h"
@@ -33,7 +34,7 @@
 DEFINE_PER_CPU(bool, nf_skb_duplicated);
 EXPORT_SYMBOL_GPL(nf_skb_duplicated);
 
-#ifdef HAVE_JUMP_LABEL
+#ifdef CONFIG_JUMP_LABEL
 struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 EXPORT_SYMBOL(nf_hooks_needed);
 #endif
@@ -162,7 +163,7 @@
 
 static void hooks_validate(const struct nf_hook_entries *hooks)
 {
-#ifdef CONFIG_DEBUG_KERNEL
+#ifdef CONFIG_DEBUG_MISC
 	struct nf_hook_ops **orig_ops;
 	int prio = INT_MIN;
 	size_t i = 0;
@@ -347,7 +348,7 @@
 	if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
 		net_inc_ingress_queue();
 #endif
-#ifdef HAVE_JUMP_LABEL
+#ifdef CONFIG_JUMP_LABEL
 	static_key_slow_inc(&nf_hooks_needed[pf][reg->hooknum]);
 #endif
 	BUG_ON(p == new_hooks);
@@ -405,7 +406,7 @@
 		if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
 			net_dec_ingress_queue();
 #endif
-#ifdef HAVE_JUMP_LABEL
+#ifdef CONFIG_JUMP_LABEL
 		static_key_slow_dec(&nf_hooks_needed[pf][reg->hooknum]);
 #endif
 	} else {
@@ -519,7 +520,7 @@
 				ret = -EPERM;
 			return ret;
 		case NF_QUEUE:
-			ret = nf_queue(skb, state, e, s, verdict);
+			ret = nf_queue(skb, state, s, verdict);
 			if (ret == 1)
 				continue;
 			return ret;
@@ -535,28 +536,6 @@
 }
 EXPORT_SYMBOL(nf_hook_slow);
 
-
-int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
-{
-	if (writable_len > skb->len)
-		return 0;
-
-	/* Not exclusive use of packet?  Must copy. */
-	if (!skb_cloned(skb)) {
-		if (writable_len <= skb_headlen(skb))
-			return 1;
-	} else if (skb_clone_writable(skb, writable_len))
-		return 1;
-
-	if (writable_len <= skb_headlen(skb))
-		writable_len = 0;
-	else
-		writable_len -= skb_headlen(skb);
-
-	return !!__pskb_pull_tail(skb, writable_len);
-}
-EXPORT_SYMBOL(skb_make_writable);
-
 /* This needs to be compiled in any case to avoid dependencies between the
  * nfnetlink_queue code and nf_conntrack.
  */

diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig
index 4083a80..3c27348 100644
--- a/net/netfilter/ipset/Kconfig
+++ b/net/netfilter/ipset/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 menuconfig IP_SET
 	tristate "IP set support"
 	depends on INET && NETFILTER

diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 257ca39..063df74 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h

@@ -1,9 +1,5 @@
-/* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
 
 #ifndef __IP_SET_BITMAP_IP_GEN_H
 #define __IP_SET_BITMAP_IP_GEN_H
@@ -99,7 +95,7 @@
 	struct nlattr *nested;
 	size_t memsize = mtype_memsize(map, set->dsize) + set->ext_size;
 
-	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+	nested = nla_nest_start(skb, IPSET_ATTR_DATA);
 	if (!nested)
 		goto nla_put_failure;
 	if (mtype_do_head(skb, map) ||
@@ -109,7 +105,7 @@
 		goto nla_put_failure;
 	if (unlikely(ip_set_put_flags(skb, set)))
 		goto nla_put_failure;
-	ipset_nest_end(skb, nested);
+	nla_nest_end(skb, nested);
 
 	return 0;
 nla_put_failure:
@@ -213,7 +209,7 @@
 	u32 id, first = cb->args[IPSET_CB_ARG0];
 	int ret = 0;
 
-	adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
+	adt = nla_nest_start(skb, IPSET_ATTR_ADT);
 	if (!adt)
 		return -EMSGSIZE;
 	/* Extensions may be replaced */
@@ -230,7 +226,7 @@
 #endif
 		     ip_set_timeout_expired(ext_timeout(x, set))))
 			continue;
-		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+		nested = nla_nest_start(skb, IPSET_ATTR_DATA);
 		if (!nested) {
 			if (id == first) {
 				nla_nest_cancel(skb, adt);
@@ -244,9 +240,9 @@
 			goto nla_put_failure;
 		if (ip_set_put_extensions(skb, set, x, mtype_is_filled(x)))
 			goto nla_put_failure;
-		ipset_nest_end(skb, nested);
+		nla_nest_end(skb, nested);
 	}
-	ipset_nest_end(skb, adt);
+	nla_nest_end(skb, adt);
 
 	/* Set listing finished */
 	cb->args[IPSET_CB_ARG0] = 0;
@@ -259,7 +255,7 @@
 		cb->args[IPSET_CB_ARG0] = 0;
 		ret = -EMSGSIZE;
 	}
-	ipset_nest_end(skb, adt);
+	nla_nest_end(skb, adt);
 out:
 	rcu_read_unlock();
 	return ret;

diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 488d6d0..11ff9d4 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
  *                         Patrick Schaaf <bof@bof.de>
- * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org>
  */
 
 /* Kernel module implementing an IP set type: the bitmap:ip type */
@@ -31,7 +28,7 @@
 #define IPSET_TYPE_REV_MAX	3	/* skbinfo support added */
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
 IP_SET_MODULE_DESC("bitmap:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_bitmap:ip");
 

diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index c00b6a2..1d4e633 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
  *                         Patrick Schaaf <bof@bof.de>
  *			   Martin Josefsson <gandalf@wlug.westbo.se>
- * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 /* Kernel module implementing an IP set type: the bitmap:ip,mac type */
@@ -31,7 +27,7 @@
 #define IPSET_TYPE_REV_MAX	3	/* skbinfo support added */
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
 IP_SET_MODULE_DESC("bitmap:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_bitmap:ip,mac");
 
@@ -219,10 +215,6 @@
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 	u32 ip;
 
-	/* MAC can be src only */
-	if (!(opt->flags & IPSET_DIM_TWO_SRC))
-		return 0;
-
 	ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC));
 	if (ip < map->first_ip || ip > map->last_ip)
 		return -IPSET_ERR_BITMAP_RANGE;
@@ -233,7 +225,14 @@
 		return -EINVAL;
 
 	e.id = ip_to_id(map, ip);
-	memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
+
+	if (opt->flags & IPSET_DIM_TWO_SRC)
+		ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
+	else
+		ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
+
+	if (is_zero_ether_addr(e.ether))
+		return -EINVAL;
 
 	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
 }

diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index b561ca8..704a0dd 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c

@@ -1,9 +1,5 @@
-/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
 
 /* Kernel module implementing an IP set type: the bitmap:port type */
 
@@ -26,7 +22,7 @@
 #define IPSET_TYPE_REV_MAX	3	/* skbinfo support added */
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
 IP_SET_MODULE_DESC("bitmap:port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_bitmap:port");
 

diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 1577f2f..d73d182 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
  *                         Patrick Schaaf <bof@bof.de>
- * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org>
  */
 
 /* Kernel module for IP set management */
@@ -51,7 +48,7 @@
 module_param(max_sets, int, 0600);
 MODULE_PARM_DESC(max_sets, "maximal number of sets");
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
 MODULE_DESCRIPTION("core IP set support");
 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
 
@@ -771,11 +768,21 @@
  * The commands are serialized by the nfnl mutex.
  */
 
+static inline u8 protocol(const struct nlattr * const tb[])
+{
+	return nla_get_u8(tb[IPSET_ATTR_PROTOCOL]);
+}
+
 static inline bool
 protocol_failed(const struct nlattr * const tb[])
 {
-	return !tb[IPSET_ATTR_PROTOCOL] ||
-	       nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL;
+	return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) != IPSET_PROTOCOL;
+}
+
+static inline bool
+protocol_min_failed(const struct nlattr * const tb[])
+{
+	return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) < IPSET_PROTOCOL_MIN;
 }
 
 static inline u32
@@ -889,7 +896,7 @@
 	u32 flags = flag_exist(nlh);
 	int ret = 0;
 
-	if (unlikely(protocol_failed(attr) ||
+	if (unlikely(protocol_min_failed(attr) ||
 		     !attr[IPSET_ATTR_SETNAME] ||
 		     !attr[IPSET_ATTR_TYPENAME] ||
 		     !attr[IPSET_ATTR_REVISION] ||
@@ -1027,7 +1034,7 @@
 	ip_set_id_t i;
 	int ret = 0;
 
-	if (unlikely(protocol_failed(attr)))
+	if (unlikely(protocol_min_failed(attr)))
 		return -IPSET_ERR_PROTOCOL;
 
 	/* Must wait for flush to be really finished in list:set */
@@ -1105,7 +1112,7 @@
 	struct ip_set *s;
 	ip_set_id_t i;
 
-	if (unlikely(protocol_failed(attr)))
+	if (unlikely(protocol_min_failed(attr)))
 		return -IPSET_ERR_PROTOCOL;
 
 	if (!attr[IPSET_ATTR_SETNAME]) {
@@ -1147,7 +1154,7 @@
 	ip_set_id_t i;
 	int ret = 0;
 
-	if (unlikely(protocol_failed(attr) ||
+	if (unlikely(protocol_min_failed(attr) ||
 		     !attr[IPSET_ATTR_SETNAME] ||
 		     !attr[IPSET_ATTR_SETNAME2]))
 		return -IPSET_ERR_PROTOCOL;
@@ -1157,7 +1164,7 @@
 		return -ENOENT;
 
 	write_lock_bh(&ip_set_ref_lock);
-	if (set->ref != 0) {
+	if (set->ref != 0 || set->ref_netlink != 0) {
 		ret = -IPSET_ERR_REFERENCED;
 		goto out;
 	}
@@ -1196,7 +1203,7 @@
 	ip_set_id_t from_id, to_id;
 	char from_name[IPSET_MAXNAMELEN];
 
-	if (unlikely(protocol_failed(attr) ||
+	if (unlikely(protocol_min_failed(attr) ||
 		     !attr[IPSET_ATTR_SETNAME] ||
 		     !attr[IPSET_ATTR_SETNAME2]))
 		return -IPSET_ERR_PROTOCOL;
@@ -1277,6 +1284,14 @@
 	}
 }
 
+static const struct nla_policy
+ip_set_dump_policy[IPSET_ATTR_CMD_MAX + 1] = {
+	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
+	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
+				    .len = IPSET_MAXNAMELEN - 1 },
+	[IPSET_ATTR_FLAGS]	= { .type = NLA_U32 },
+};
+
 static int
 dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
 {
@@ -1286,11 +1301,15 @@
 	struct nlattr *attr = (void *)nlh + min_len;
 	u32 dump_type;
 	ip_set_id_t index;
+	int ret;
 
-	/* Second pass, so parser can't fail */
-	nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len,
-		  ip_set_setname_policy, NULL);
+	ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, attr,
+			nlh->nlmsg_len - min_len,
+			ip_set_dump_policy, NULL);
+	if (ret)
+		return ret;
 
+	cb->args[IPSET_CB_PROTO] = nla_get_u8(cda[IPSET_ATTR_PROTOCOL]);
 	if (cda[IPSET_ATTR_SETNAME]) {
 		struct ip_set *set;
 
@@ -1392,7 +1411,8 @@
 			ret = -EMSGSIZE;
 			goto release_refcount;
 		}
-		if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
+		if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL,
+			       cb->args[IPSET_CB_PROTO]) ||
 		    nla_put_string(skb, IPSET_ATTR_SETNAME, set->name))
 			goto nla_put_failure;
 		if (dump_flags & IPSET_FLAG_LIST_SETNAME)
@@ -1407,6 +1427,9 @@
 			    nla_put_u8(skb, IPSET_ATTR_REVISION,
 				       set->revision))
 				goto nla_put_failure;
+			if (cb->args[IPSET_CB_PROTO] > IPSET_PROTOCOL_MIN &&
+			    nla_put_net16(skb, IPSET_ATTR_INDEX, htons(index)))
+				goto nla_put_failure;
 			ret = set->variant->head(set, skb);
 			if (ret < 0)
 				goto release_refcount;
@@ -1466,7 +1489,7 @@
 		       const struct nlattr * const attr[],
 		       struct netlink_ext_ack *extack)
 {
-	if (unlikely(protocol_failed(attr)))
+	if (unlikely(protocol_min_failed(attr)))
 		return -IPSET_ERR_PROTOCOL;
 
 	{
@@ -1531,9 +1554,14 @@
 		memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
 		cmdattr = (void *)&errmsg->msg + min_len;
 
-		nla_parse(cda, IPSET_ATTR_CMD_MAX, cmdattr,
-			  nlh->nlmsg_len - min_len, ip_set_adt_policy, NULL);
+		ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, cmdattr,
+				nlh->nlmsg_len - min_len, ip_set_adt_policy,
+				NULL);
 
+		if (ret) {
+			nlmsg_free(skb2);
+			return ret;
+		}
 		errline = nla_data(cda[IPSET_ATTR_LINENO]);
 
 		*errline = lineno;
@@ -1547,10 +1575,12 @@
 	return ret;
 }
 
-static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb,
-		       const struct nlmsghdr *nlh,
-		       const struct nlattr * const attr[],
-		       struct netlink_ext_ack *extack)
+static int ip_set_ad(struct net *net, struct sock *ctnl,
+		     struct sk_buff *skb,
+		     enum ipset_adt adt,
+		     const struct nlmsghdr *nlh,
+		     const struct nlattr * const attr[],
+		     struct netlink_ext_ack *extack)
 {
 	struct ip_set_net *inst = ip_set_pernet(net);
 	struct ip_set *set;
@@ -1560,7 +1590,7 @@
 	bool use_lineno;
 	int ret = 0;
 
-	if (unlikely(protocol_failed(attr) ||
+	if (unlikely(protocol_min_failed(attr) ||
 		     !attr[IPSET_ATTR_SETNAME] ||
 		     !((attr[IPSET_ATTR_DATA] != NULL) ^
 		       (attr[IPSET_ATTR_ADT] != NULL)) ||
@@ -1581,19 +1611,18 @@
 				     attr[IPSET_ATTR_DATA],
 				     set->type->adt_policy, NULL))
 			return -IPSET_ERR_PROTOCOL;
-		ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags,
+		ret = call_ad(ctnl, skb, set, tb, adt, flags,
 			      use_lineno);
 	} else {
 		int nla_rem;
 
 		nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
-			memset(tb, 0, sizeof(tb));
 			if (nla_type(nla) != IPSET_ATTR_DATA ||
 			    !flag_nested(nla) ||
 			    nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
 					     set->type->adt_policy, NULL))
 				return -IPSET_ERR_PROTOCOL;
-			ret = call_ad(ctnl, skb, set, tb, IPSET_ADD,
+			ret = call_ad(ctnl, skb, set, tb, adt,
 				      flags, use_lineno);
 			if (ret < 0)
 				return ret;
@@ -1602,59 +1631,22 @@
 	return ret;
 }
 
-static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb,
-		       const struct nlmsghdr *nlh,
+static int ip_set_uadd(struct net *net, struct sock *ctnl,
+		       struct sk_buff *skb, const struct nlmsghdr *nlh,
 		       const struct nlattr * const attr[],
 		       struct netlink_ext_ack *extack)
 {
-	struct ip_set_net *inst = ip_set_pernet(net);
-	struct ip_set *set;
-	struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
-	const struct nlattr *nla;
-	u32 flags = flag_exist(nlh);
-	bool use_lineno;
-	int ret = 0;
+	return ip_set_ad(net, ctnl, skb,
+			 IPSET_ADD, nlh, attr, extack);
+}
 
-	if (unlikely(protocol_failed(attr) ||
-		     !attr[IPSET_ATTR_SETNAME] ||
-		     !((attr[IPSET_ATTR_DATA] != NULL) ^
-		       (attr[IPSET_ATTR_ADT] != NULL)) ||
-		     (attr[IPSET_ATTR_DATA] &&
-		      !flag_nested(attr[IPSET_ATTR_DATA])) ||
-		     (attr[IPSET_ATTR_ADT] &&
-		      (!flag_nested(attr[IPSET_ATTR_ADT]) ||
-		       !attr[IPSET_ATTR_LINENO]))))
-		return -IPSET_ERR_PROTOCOL;
-
-	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
-	if (!set)
-		return -ENOENT;
-
-	use_lineno = !!attr[IPSET_ATTR_LINENO];
-	if (attr[IPSET_ATTR_DATA]) {
-		if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
-				     attr[IPSET_ATTR_DATA],
-				     set->type->adt_policy, NULL))
-			return -IPSET_ERR_PROTOCOL;
-		ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags,
-			      use_lineno);
-	} else {
-		int nla_rem;
-
-		nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
-			memset(tb, 0, sizeof(*tb));
-			if (nla_type(nla) != IPSET_ATTR_DATA ||
-			    !flag_nested(nla) ||
-			    nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
-					     set->type->adt_policy, NULL))
-				return -IPSET_ERR_PROTOCOL;
-			ret = call_ad(ctnl, skb, set, tb, IPSET_DEL,
-				      flags, use_lineno);
-			if (ret < 0)
-				return ret;
-		}
-	}
-	return ret;
+static int ip_set_udel(struct net *net, struct sock *ctnl,
+		       struct sk_buff *skb, const struct nlmsghdr *nlh,
+		       const struct nlattr * const attr[],
+		       struct netlink_ext_ack *extack)
+{
+	return ip_set_ad(net, ctnl, skb,
+			 IPSET_DEL, nlh, attr, extack);
 }
 
 static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
@@ -1667,7 +1659,7 @@
 	struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
 	int ret = 0;
 
-	if (unlikely(protocol_failed(attr) ||
+	if (unlikely(protocol_min_failed(attr) ||
 		     !attr[IPSET_ATTR_SETNAME] ||
 		     !attr[IPSET_ATTR_DATA] ||
 		     !flag_nested(attr[IPSET_ATTR_DATA])))
@@ -1704,7 +1696,7 @@
 	struct nlmsghdr *nlh2;
 	int ret = 0;
 
-	if (unlikely(protocol_failed(attr) ||
+	if (unlikely(protocol_min_failed(attr) ||
 		     !attr[IPSET_ATTR_SETNAME]))
 		return -IPSET_ERR_PROTOCOL;
 
@@ -1720,7 +1712,7 @@
 			 IPSET_CMD_HEADER);
 	if (!nlh2)
 		goto nlmsg_failure;
-	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
+	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
 	    nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name) ||
 	    nla_put_string(skb2, IPSET_ATTR_TYPENAME, set->type->name) ||
 	    nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
@@ -1761,7 +1753,7 @@
 	const char *typename;
 	int ret = 0;
 
-	if (unlikely(protocol_failed(attr) ||
+	if (unlikely(protocol_min_failed(attr) ||
 		     !attr[IPSET_ATTR_TYPENAME] ||
 		     !attr[IPSET_ATTR_FAMILY]))
 		return -IPSET_ERR_PROTOCOL;
@@ -1780,7 +1772,7 @@
 			 IPSET_CMD_TYPE);
 	if (!nlh2)
 		goto nlmsg_failure;
-	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
+	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
 	    nla_put_string(skb2, IPSET_ATTR_TYPENAME, typename) ||
 	    nla_put_u8(skb2, IPSET_ATTR_FAMILY, family) ||
 	    nla_put_u8(skb2, IPSET_ATTR_REVISION, max) ||
@@ -1831,6 +1823,111 @@
 		goto nlmsg_failure;
 	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL))
 		goto nla_put_failure;
+	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL_MIN, IPSET_PROTOCOL_MIN))
+		goto nla_put_failure;
+	nlmsg_end(skb2, nlh2);
+
+	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+
+nla_put_failure:
+	nlmsg_cancel(skb2, nlh2);
+nlmsg_failure:
+	kfree_skb(skb2);
+	return -EMSGSIZE;
+}
+
+/* Get set by name or index, from userspace */
+
+static int ip_set_byname(struct net *net, struct sock *ctnl,
+			 struct sk_buff *skb, const struct nlmsghdr *nlh,
+			 const struct nlattr * const attr[],
+			 struct netlink_ext_ack *extack)
+{
+	struct ip_set_net *inst = ip_set_pernet(net);
+	struct sk_buff *skb2;
+	struct nlmsghdr *nlh2;
+	ip_set_id_t id = IPSET_INVALID_ID;
+	const struct ip_set *set;
+	int ret = 0;
+
+	if (unlikely(protocol_failed(attr) ||
+		     !attr[IPSET_ATTR_SETNAME]))
+		return -IPSET_ERR_PROTOCOL;
+
+	set = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &id);
+	if (id == IPSET_INVALID_ID)
+		return -ENOENT;
+
+	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb2)
+		return -ENOMEM;
+
+	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
+			 IPSET_CMD_GET_BYNAME);
+	if (!nlh2)
+		goto nlmsg_failure;
+	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
+	    nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
+	    nla_put_net16(skb2, IPSET_ATTR_INDEX, htons(id)))
+		goto nla_put_failure;
+	nlmsg_end(skb2, nlh2);
+
+	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+
+nla_put_failure:
+	nlmsg_cancel(skb2, nlh2);
+nlmsg_failure:
+	kfree_skb(skb2);
+	return -EMSGSIZE;
+}
+
+static const struct nla_policy ip_set_index_policy[IPSET_ATTR_CMD_MAX + 1] = {
+	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
+	[IPSET_ATTR_INDEX]	= { .type = NLA_U16 },
+};
+
+static int ip_set_byindex(struct net *net, struct sock *ctnl,
+			  struct sk_buff *skb, const struct nlmsghdr *nlh,
+			  const struct nlattr * const attr[],
+			  struct netlink_ext_ack *extack)
+{
+	struct ip_set_net *inst = ip_set_pernet(net);
+	struct sk_buff *skb2;
+	struct nlmsghdr *nlh2;
+	ip_set_id_t id = IPSET_INVALID_ID;
+	const struct ip_set *set;
+	int ret = 0;
+
+	if (unlikely(protocol_failed(attr) ||
+		     !attr[IPSET_ATTR_INDEX]))
+		return -IPSET_ERR_PROTOCOL;
+
+	id = ip_set_get_h16(attr[IPSET_ATTR_INDEX]);
+	if (id >= inst->ip_set_max)
+		return -ENOENT;
+	set = ip_set(inst, id);
+	if (set == NULL)
+		return -ENOENT;
+
+	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb2)
+		return -ENOMEM;
+
+	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
+			 IPSET_CMD_GET_BYINDEX);
+	if (!nlh2)
+		goto nlmsg_failure;
+	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
+	    nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name))
+		goto nla_put_failure;
 	nlmsg_end(skb2, nlh2);
 
 	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
@@ -1879,7 +1976,7 @@
 	[IPSET_CMD_LIST]	= {
 		.call		= ip_set_dump,
 		.attr_count	= IPSET_ATTR_CMD_MAX,
-		.policy		= ip_set_setname_policy,
+		.policy		= ip_set_dump_policy,
 	},
 	[IPSET_CMD_SAVE]	= {
 		.call		= ip_set_dump,
@@ -1916,6 +2013,16 @@
 		.attr_count	= IPSET_ATTR_CMD_MAX,
 		.policy		= ip_set_protocol_policy,
 	},
+	[IPSET_CMD_GET_BYNAME]	= {
+		.call		= ip_set_byname,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_setname_policy,
+	},
+	[IPSET_CMD_GET_BYINDEX]	= {
+		.call		= ip_set_byindex,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_index_policy,
+	},
 };
 
 static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = {
@@ -1961,7 +2068,7 @@
 			goto done;
 		}
 
-		if (req_version->version != IPSET_PROTOCOL) {
+		if (req_version->version < IPSET_PROTOCOL_MIN) {
 			ret = -EPROTO;
 			goto done;
 		}
@@ -1977,8 +2084,9 @@
 		}
 
 		req_version->version = IPSET_PROTOCOL;
-		ret = copy_to_user(user, req_version,
-				   sizeof(struct ip_set_req_version));
+		if (copy_to_user(user, req_version,
+				 sizeof(struct ip_set_req_version)))
+			ret = -EFAULT;
 		goto done;
 	}
 	case IP_SET_OP_GET_BYNAME: {
@@ -2024,9 +2132,11 @@
 		}
 		nfnl_lock(NFNL_SUBSYS_IPSET);
 		set = ip_set(inst, req_get->set.index);
-		strncpy(req_get->set.name, set ? set->name : "",
-			IPSET_MAXNAMELEN);
+		ret = strscpy(req_get->set.name, set ? set->name : "",
+			      IPSET_MAXNAMELEN);
 		nfnl_unlock(NFNL_SUBSYS_IPSET);
+		if (ret < 0)
+			goto done;
 		goto copy;
 	}
 	default:
@@ -2035,7 +2145,8 @@
 	}	/* end of switch(op) */
 
 copy:
-	ret = copy_to_user(user, data, copylen);
+	if (copy_to_user(user, data, copylen))
+		ret = -EFAULT;
 
 done:
 	vfree(data);

diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
index 3f09cdb..2b8f959 100644
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c

@@ -1,4 +1,5 @@
-/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@netfilter.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as

diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 8a33dac..d098d87 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h

@@ -1,9 +1,5 @@
-/* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
 
 #ifndef _IP_SET_HASH_GEN_H
 #define _IP_SET_HASH_GEN_H
@@ -11,11 +7,11 @@
 #include <linux/rcupdate.h>
 #include <linux/jhash.h>
 #include <linux/types.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
+#include <linux/netfilter/ipset/ip_set.h>
 
 #define __ipset_dereference_protected(p, c)	rcu_dereference_protected(p, c)
 #define ipset_dereference_protected(p, set) \
-	__ipset_dereference_protected(p, spin_is_locked(&(set)->lock))
+	__ipset_dereference_protected(p, lockdep_is_held(&(set)->lock))
 
 #define rcu_dereference_bh_nfnl(p)	rcu_dereference_bh_check(p, 1)
 
@@ -67,7 +63,7 @@
 
 /* A hash bucket */
 struct hbucket {
-	struct rcu_head rcu;	/* for call_rcu_bh */
+	struct rcu_head rcu;	/* for call_rcu */
 	/* Which positions are used in the array */
 	DECLARE_BITMAP(used, AHASH_MAX_TUNED);
 	u8 size;		/* size of the array */
@@ -625,7 +621,7 @@
 					goto cleanup;
 				}
 				m->size = AHASH_INIT_SIZE;
-				extsize = ext_size(AHASH_INIT_SIZE, dsize);
+				extsize += ext_size(AHASH_INIT_SIZE, dsize);
 				RCU_INIT_POINTER(hbucket(t, key), m);
 			} else if (m->pos >= m->size) {
 				struct hbucket *ht;
@@ -664,7 +660,7 @@
 	spin_unlock_bh(&set->lock);
 
 	/* Give time to other readers of the set */
-	synchronize_rcu_bh();
+	synchronize_rcu();
 
 	pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
 		 orig->htable_bits, orig, t->htable_bits, t);
@@ -957,7 +953,7 @@
 		mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]));
 #endif
 		key = HKEY(d, h->initval, t->htable_bits);
-		n =  rcu_dereference_bh(hbucket(t, key));
+		n = rcu_dereference_bh(hbucket(t, key));
 		if (!n)
 			continue;
 		for (i = 0; i < n->pos; i++) {
@@ -1057,7 +1053,7 @@
 	htable_bits = t->htable_bits;
 	rcu_read_unlock_bh();
 
-	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+	nested = nla_nest_start(skb, IPSET_ATTR_DATA);
 	if (!nested)
 		goto nla_put_failure;
 	if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE,
@@ -1079,7 +1075,7 @@
 		goto nla_put_failure;
 	if (unlikely(ip_set_put_flags(skb, set)))
 		goto nla_put_failure;
-	ipset_nest_end(skb, nested);
+	nla_nest_end(skb, nested);
 
 	return 0;
 nla_put_failure:
@@ -1124,7 +1120,7 @@
 	void *incomplete;
 	int i, ret = 0;
 
-	atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
+	atd = nla_nest_start(skb, IPSET_ATTR_ADT);
 	if (!atd)
 		return -EMSGSIZE;
 
@@ -1150,7 +1146,7 @@
 				continue;
 			pr_debug("list hash %lu hbucket %p i %u, data %p\n",
 				 cb->args[IPSET_CB_ARG0], n, i, e);
-			nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+			nested = nla_nest_start(skb, IPSET_ATTR_DATA);
 			if (!nested) {
 				if (cb->args[IPSET_CB_ARG0] == first) {
 					nla_nest_cancel(skb, atd);
@@ -1163,10 +1159,10 @@
 				goto nla_put_failure;
 			if (ip_set_put_extensions(skb, set, e, true))
 				goto nla_put_failure;
-			ipset_nest_end(skb, nested);
+			nla_nest_end(skb, nested);
 		}
 	}
-	ipset_nest_end(skb, atd);
+	nla_nest_end(skb, atd);
 	/* Set listing finished */
 	cb->args[IPSET_CB_ARG0] = 0;
 
@@ -1180,7 +1176,7 @@
 		cb->args[IPSET_CB_ARG0] = 0;
 		ret = -EMSGSIZE;
 	} else {
-		ipset_nest_end(skb, atd);
+		nla_nest_end(skb, atd);
 	}
 out:
 	rcu_read_unlock();

diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index 613eb21..f4432d9 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c

@@ -1,9 +1,5 @@
-/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
 
 /* Kernel module implementing an IP set type: the hash:ip type */
 
@@ -30,7 +26,7 @@
 #define IPSET_TYPE_REV_MAX	4	/* skbinfo support  */
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
 IP_SET_MODULE_DESC("hash:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:ip");
 

diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c
index 1ab5ed2..4ce563e 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmac.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmac.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (C) 2016 Tomasz Chilinski <tomasz.chilinski@chilan.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 /* Kernel module implementing an IP set type: the hash:ip,mac type */
@@ -36,9 +33,6 @@
 /* Type specific function prefix */
 #define HTYPE		hash_ipmac
 
-/* Zero valued element is not supported */
-static const unsigned char invalid_ether[ETH_ALEN] = { 0 };
-
 /* IPv4 variant */
 
 /* Member elements */
@@ -95,16 +89,16 @@
 	struct hash_ipmac4_elem e = { .ip = 0, { .foo[0] = 0, .foo[1] = 0 } };
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
-	 /* MAC can be src only */
-	if (!(opt->flags & IPSET_DIM_TWO_SRC))
-		return 0;
-
 	if (skb_mac_header(skb) < skb->head ||
 	    (skb_mac_header(skb) + ETH_HLEN) > skb->data)
 		return -EINVAL;
 
-	memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
-	if (ether_addr_equal(e.ether, invalid_ether))
+	if (opt->flags & IPSET_DIM_TWO_SRC)
+		ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
+	else
+		ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
+
+	if (is_zero_ether_addr(e.ether))
 		return -EINVAL;
 
 	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
@@ -140,7 +134,7 @@
 	if (ret)
 		return ret;
 	memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
-	if (ether_addr_equal(e.ether, invalid_ether))
+	if (is_zero_ether_addr(e.ether))
 		return -IPSET_ERR_HASH_ELEM;
 
 	return adtfn(set, &e, &ext, &ext, flags);
@@ -211,16 +205,16 @@
 	};
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
-	 /* MAC can be src only */
-	if (!(opt->flags & IPSET_DIM_TWO_SRC))
-		return 0;
-
 	if (skb_mac_header(skb) < skb->head ||
 	    (skb_mac_header(skb) + ETH_HLEN) > skb->data)
 		return -EINVAL;
 
-	memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
-	if (ether_addr_equal(e.ether, invalid_ether))
+	if (opt->flags & IPSET_DIM_TWO_SRC)
+		ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
+	else
+		ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
+
+	if (is_zero_ether_addr(e.ether))
 		return -EINVAL;
 
 	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
@@ -260,7 +254,7 @@
 		return ret;
 
 	memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
-	if (ether_addr_equal(e.ether, invalid_ether))
+	if (is_zero_ether_addr(e.ether))
 		return -IPSET_ERR_HASH_ELEM;
 
 	return adtfn(set, &e, &ext, &ext, flags);

diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index f3ba834..7a1734a 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c

@@ -1,10 +1,5 @@
-/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- * Copyright (C) 2013 Smoothwall Ltd. <vytas.dauksa@smoothwall.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
 
 /* Kernel module implementing an IP set type: the hash:ip,mark type */
 

diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index ddb8039..32e2406 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c

@@ -1,9 +1,5 @@
-/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
 
 /* Kernel module implementing an IP set type: the hash:ip,port type */
 
@@ -32,7 +28,7 @@
 #define IPSET_TYPE_REV_MAX	5 /* skbinfo support added */
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
 IP_SET_MODULE_DESC("hash:ip,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:ip,port");
 

diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index a7f4d7a..15d4193 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c

@@ -1,9 +1,5 @@
-/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
 
 /* Kernel module implementing an IP set type: the hash:ip,port,ip type */
 
@@ -32,7 +28,7 @@
 #define IPSET_TYPE_REV_MAX	5 /* skbinfo support added */
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
 IP_SET_MODULE_DESC("hash:ip,port,ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:ip,port,ip");
 

diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 88b83d6..7a4d7af 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c

@@ -1,9 +1,5 @@
-/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
 
 /* Kernel module implementing an IP set type: the hash:ip,port,net type */
 
@@ -34,7 +30,7 @@
 #define IPSET_TYPE_REV_MAX	7 /* skbinfo support added */
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
 IP_SET_MODULE_DESC("hash:ip,port,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:ip,port,net");
 

diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
index f9d5a2a..d94c585 100644
--- a/net/netfilter/ipset/ip_set_hash_mac.c
+++ b/net/netfilter/ipset/ip_set_hash_mac.c

@@ -1,9 +1,5 @@
-/* Copyright (C) 2014 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2014 Jozsef Kadlecsik <kadlec@netfilter.org> */
 
 /* Kernel module implementing an IP set type: the hash:mac type */
 
@@ -23,7 +19,7 @@
 #define IPSET_TYPE_REV_MAX	0
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
 IP_SET_MODULE_DESC("hash:mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:mac");
 
@@ -81,15 +77,15 @@
 	struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } };
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
-	 /* MAC can be src only */
-	if (!(opt->flags & IPSET_DIM_ONE_SRC))
-		return 0;
-
 	if (skb_mac_header(skb) < skb->head ||
 	    (skb_mac_header(skb) + ETH_HLEN) > skb->data)
 		return -EINVAL;
 
-	ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
+	if (opt->flags & IPSET_DIM_ONE_SRC)
+		ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
+	else
+		ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
+
 	if (is_zero_ether_addr(e.ether))
 		return -EINVAL;
 	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);

diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 5449e23..3d932de 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c

@@ -1,9 +1,5 @@
-/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
 
 /* Kernel module implementing an IP set type: the hash:net type */
 
@@ -31,7 +27,7 @@
 #define IPSET_TYPE_REV_MAX	6 /* skbinfo mapping support added */
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
 IP_SET_MODULE_DESC("hash:net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:net");
 
@@ -372,6 +368,7 @@
 		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
 		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
 		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
 		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },

diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index f5164c1..87b29f9 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c

@@ -1,9 +1,5 @@
-/* Copyright (C) 2011-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2011-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
 
 /* Kernel module implementing an IP set type: the hash:net,iface type */
 
@@ -32,7 +28,7 @@
 #define IPSET_TYPE_REV_MAX	6 /* skbinfo support added */
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
 IP_SET_MODULE_DESC("hash:net,iface", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:net,iface");
 

diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index 5a2b923..4398322 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c

@@ -1,9 +1,6 @@
-/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org>
  * Copyright (C) 2013 Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 /* Kernel module implementing an IP set type: the hash:net type */
@@ -479,6 +476,7 @@
 		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
 		[IPSET_ATTR_CIDR2]	= { .type = NLA_U8 },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
 		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
 		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },

diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 1a187be..799f227 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c

@@ -1,9 +1,5 @@
-/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
 
 /* Kernel module implementing an IP set type: the hash:net,port type */
 
@@ -33,7 +29,7 @@
 #define IPSET_TYPE_REV_MAX	7 /* skbinfo support added */
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
 IP_SET_MODULE_DESC("hash:net,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:net,port");
 

diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 613e18e..a82b70e 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c

@@ -1,9 +1,5 @@
-/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
 
 /* Kernel module implementing an IP set type: the hash:ip,port,net type */
 

diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 4eef55d..67ac501 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c

@@ -1,9 +1,5 @@
-/* Copyright (C) 2008-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2008-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
 
 /* Kernel module implementing an IP set type: the list:set type */
 
@@ -22,7 +18,7 @@
 #define IPSET_TYPE_REV_MAX	3 /* skbinfo support added */
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
 IP_SET_MODULE_DESC("list:set", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_list:set");
 
@@ -292,7 +288,7 @@
 	if (n &&
 	    !(SET_WITH_TIMEOUT(set) &&
 	      ip_set_timeout_expired(ext_timeout(n, set))))
-		n =  NULL;
+		n = NULL;
 
 	e = kzalloc(set->dsize, GFP_ATOMIC);
 	if (!e)
@@ -466,7 +462,7 @@
 	struct nlattr *nested;
 	size_t memsize = list_set_memsize(map, set->dsize) + set->ext_size;
 
-	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+	nested = nla_nest_start(skb, IPSET_ATTR_DATA);
 	if (!nested)
 		goto nla_put_failure;
 	if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) ||
@@ -476,7 +472,7 @@
 		goto nla_put_failure;
 	if (unlikely(ip_set_put_flags(skb, set)))
 		goto nla_put_failure;
-	ipset_nest_end(skb, nested);
+	nla_nest_end(skb, nested);
 
 	return 0;
 nla_put_failure:
@@ -494,7 +490,7 @@
 	struct set_elem *e;
 	int ret = 0;
 
-	atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
+	atd = nla_nest_start(skb, IPSET_ATTR_ADT);
 	if (!atd)
 		return -EMSGSIZE;
 
@@ -506,7 +502,7 @@
 			i++;
 			continue;
 		}
-		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+		nested = nla_nest_start(skb, IPSET_ATTR_DATA);
 		if (!nested)
 			goto nla_put_failure;
 		ip_set_name_byindex(map->net, e->id, name);
@@ -514,11 +510,11 @@
 			goto nla_put_failure;
 		if (ip_set_put_extensions(skb, set, e, true))
 			goto nla_put_failure;
-		ipset_nest_end(skb, nested);
+		nla_nest_end(skb, nested);
 		i++;
 	}
 
-	ipset_nest_end(skb, atd);
+	nla_nest_end(skb, atd);
 	/* Set listing finished */
 	cb->args[IPSET_CB_ARG0] = 0;
 	goto out;
@@ -531,8 +527,8 @@
 		ret = -EMSGSIZE;
 	} else {
 		cb->args[IPSET_CB_ARG0] = i;
+		nla_nest_end(skb, atd);
 	}
-	ipset_nest_end(skb, atd);
 out:
 	rcu_read_unlock();
 	return ret;

diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c
index d5be9c2..ff570bf 100644
--- a/net/netfilter/ipset/pfxlen.c
+++ b/net/netfilter/ipset/pfxlen.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/export.h>
 #include <linux/netfilter/ipset/pfxlen.h>
 

diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index cad48d0..5b672e0 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # IP Virtual Server configuration
 #
@@ -29,6 +30,7 @@
 	bool "IPv6 support for IPVS"
 	depends on IPV6 = y || IP_VS = IPV6
 	select IP6_NF_IPTABLES
+	select NF_DEFRAG_IPV6
 	---help---
 	  Add IPv6 support to IPVS.
 
@@ -133,7 +135,7 @@
 	  module, choose M here. If unsure, say N.
 
 config	IP_VS_LC
-        tristate "least-connection scheduling"
+	tristate "least-connection scheduling"
 	---help---
 	  The least-connection scheduling algorithm directs network
 	  connections to the server with the least number of active 
@@ -143,7 +145,7 @@
 	  module, choose M here. If unsure, say N.
 
 config	IP_VS_WLC
-        tristate "weighted least-connection scheduling"
+	tristate "weighted least-connection scheduling"
 	---help---
 	  The weighted least-connection scheduling algorithm directs network
 	  connections to the server with the least active connections
@@ -331,7 +333,7 @@
 
 config	IP_VS_PE_SIP
 	tristate "SIP persistence engine"
-        depends on IP_VS_PROTO_UDP
+	depends on IP_VS_PROTO_UDP
 	depends on NF_CONNTRACK_SIP
 	---help---
 	  Allow persistence based on the SIP Call-ID

diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 7588aea..f9b16f2 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * ip_vs_app.c: Application module support for IPVS
  *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference
  * is that ip_vs_app module handles the reverse direction (incoming requests
  * and outgoing responses).
@@ -15,7 +11,6 @@
  *		IP_MASQ_APP application masquerading module
  *
  * Author:	Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
- *
  */
 
 #define KMSG_COMPONENT "IPVS"
@@ -198,21 +193,29 @@
 
 	mutex_lock(&__ip_vs_app_mutex);
 
+	/* increase the module use count */
+	if (!ip_vs_use_count_inc()) {
+		err = -ENOENT;
+		goto out_unlock;
+	}
+
 	list_for_each_entry(a, &ipvs->app_list, a_list) {
 		if (!strcmp(app->name, a->name)) {
 			err = -EEXIST;
+			/* decrease the module use count */
+			ip_vs_use_count_dec();
 			goto out_unlock;
 		}
 	}
 	a = kmemdup(app, sizeof(*app), GFP_KERNEL);
 	if (!a) {
 		err = -ENOMEM;
+		/* decrease the module use count */
+		ip_vs_use_count_dec();
 		goto out_unlock;
 	}
 	INIT_LIST_HEAD(&a->incs_list);
 	list_add(&a->a_list, &ipvs->app_list);
-	/* increase the module use count */
-	ip_vs_use_count_inc();
 
 out_unlock:
 	mutex_unlock(&__ip_vs_app_mutex);
@@ -363,7 +366,7 @@
 	struct tcphdr *th;
 	__u32 seq;
 
-	if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
+	if (skb_ensure_writable(skb, tcp_offset + sizeof(*th)))
 		return 0;
 
 	th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
@@ -440,7 +443,7 @@
 	struct tcphdr *th;
 	__u32 seq;
 
-	if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
+	if (skb_ensure_writable(skb, tcp_offset + sizeof(*th)))
 		return 0;
 
 	th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);

diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 5b2b178..02f2f63 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * IPVS         An implementation of the IP virtual server support for the
  *              LINUX operating system.  IPVS is now implemented as a module
@@ -9,17 +10,11 @@
  *              Peter Kese <peter.kese@ijs.si>
  *              Julian Anastasov <ja@ssi.bg>
  *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese,
  * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms
  * and others. Many code here is taken from IP MASQ code of kernel 2.2.
  *
  * Changes:
- *
  */
 
 #define KMSG_COMPONENT "IPVS"

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 7ca926a..8b80ab7 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * IPVS         An implementation of the IP virtual server support for the
  *              LINUX operating system.  IPVS is now implemented as a module
@@ -9,11 +10,6 @@
  *              Peter Kese <peter.kese@ijs.si>
  *              Julian Anastasov <ja@ssi.bg>
  *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese,
  * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms
  * and others.
@@ -21,7 +17,6 @@
  * Changes:
  *	Paul `Rusty' Russell		properly handle non-linear skbs
  *	Harald Welte			don't use nfcache
- *
  */
 
 #define KMSG_COMPONENT "IPVS"
@@ -39,6 +34,8 @@
 #include <net/tcp.h>
 #include <net/udp.h>
 #include <net/icmp.h>                   /* for icmp_send */
+#include <net/gue.h>
+#include <net/gre.h>
 #include <net/route.h>
 #include <net/ip6_checksum.h>
 #include <net/netns/generic.h>		/* net_generic() */
@@ -53,6 +50,7 @@
 #endif
 
 #include <net/ip_vs.h>
+#include <linux/indirect_call_wrapper.h>
 
 
 EXPORT_SYMBOL(register_ip_vs_scheduler);
@@ -70,6 +68,29 @@
 #endif
 EXPORT_SYMBOL(ip_vs_new_conn_out);
 
+#ifdef CONFIG_IP_VS_PROTO_TCP
+INDIRECT_CALLABLE_DECLARE(int
+	tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
+			 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph));
+#endif
+
+#ifdef CONFIG_IP_VS_PROTO_UDP
+INDIRECT_CALLABLE_DECLARE(int
+	udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
+			 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph));
+#endif
+
+#if defined(CONFIG_IP_VS_PROTO_TCP) && defined(CONFIG_IP_VS_PROTO_UDP)
+#define SNAT_CALL(f, ...) \
+	INDIRECT_CALL_2(f, tcp_snat_handler, udp_snat_handler, __VA_ARGS__)
+#elif defined(CONFIG_IP_VS_PROTO_TCP)
+#define SNAT_CALL(f, ...) INDIRECT_CALL_1(f, tcp_snat_handler, __VA_ARGS__)
+#elif defined(CONFIG_IP_VS_PROTO_UDP)
+#define SNAT_CALL(f, ...) INDIRECT_CALL_1(f, udp_snat_handler, __VA_ARGS__)
+#else
+#define SNAT_CALL(f, ...) f(__VA_ARGS__)
+#endif
+
 static unsigned int ip_vs_net_id __read_mostly;
 /* netns cnt used for uniqueness */
 static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
@@ -478,7 +499,9 @@
 	 */
 	if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK)) {
 		iph->hdr_flags ^= IP_VS_HDR_INVERSE;
-		cp = pp->conn_in_get(svc->ipvs, svc->af, skb, iph);
+		cp = INDIRECT_CALL_1(pp->conn_in_get,
+				     ip_vs_conn_in_get_proto, svc->ipvs,
+				     svc->af, skb, iph);
 		iph->hdr_flags ^= IP_VS_HDR_INVERSE;
 
 		if (cp) {
@@ -594,7 +617,7 @@
 		unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
 				      iph->protocol == IPPROTO_UDP) ?
 				      IP_VS_CONN_F_ONE_PACKET : 0;
-		union nf_inet_addr daddr =  { .all = { 0, 0, 0, 0 } };
+		union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
 
 		/* create a new connection entry */
 		IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
@@ -871,7 +894,7 @@
 	if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol ||
 	    IPPROTO_SCTP == protocol)
 		offset += 2 * sizeof(__u16);
-	if (!skb_make_writable(skb, offset))
+	if (skb_ensure_writable(skb, offset))
 		goto out;
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -972,7 +995,8 @@
 	ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, true, &ciph);
 
 	/* The embedded headers contain source and dest in reverse order */
-	cp = pp->conn_out_get(ipvs, AF_INET, skb, &ciph);
+	cp = INDIRECT_CALL_1(pp->conn_out_get, ip_vs_conn_out_get_proto,
+			     ipvs, AF_INET, skb, &ciph);
 	if (!cp)
 		return NF_ACCEPT;
 
@@ -1028,7 +1052,8 @@
 		return NF_ACCEPT;
 
 	/* The embedded headers contain source and dest in reverse order */
-	cp = pp->conn_out_get(ipvs, AF_INET6, skb, &ciph);
+	cp = INDIRECT_CALL_1(pp->conn_out_get, ip_vs_conn_out_get_proto,
+			     ipvs, AF_INET6, skb, &ciph);
 	if (!cp)
 		return NF_ACCEPT;
 
@@ -1259,11 +1284,12 @@
 
 	IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet");
 
-	if (!skb_make_writable(skb, iph->len))
+	if (skb_ensure_writable(skb, iph->len))
 		goto drop;
 
 	/* mangle the packet */
-	if (pp->snat_handler && !pp->snat_handler(skb, pp, cp, iph))
+	if (pp->snat_handler &&
+	    !SNAT_CALL(pp->snat_handler, skb, pp, cp, iph))
 		goto drop;
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -1389,7 +1415,8 @@
 	/*
 	 * Check if the packet belongs to an existing entry
 	 */
-	cp = pp->conn_out_get(ipvs, af, skb, &iph);
+	cp = INDIRECT_CALL_1(pp->conn_out_get, ip_vs_conn_out_get_proto,
+			     ipvs, af, skb, &iph);
 
 	if (likely(cp)) {
 		if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
@@ -1536,14 +1563,12 @@
 		/* sorry, all this trouble for a no-hit :) */
 		IP_VS_DBG_PKT(12, af, pp, skb, iph->off,
 			      "ip_vs_in: packet continues traversal as normal");
-		if (iph->fragoffs) {
-			/* Fragment that couldn't be mapped to a conn entry
-			 * is missing module nf_defrag_ipv6
-			 */
-			IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
+
+		/* Fragment couldn't be mapped to a conn entry */
+		if (iph->fragoffs)
 			IP_VS_DBG_PKT(7, af, pp, skb, iph->off,
 				      "unhandled fragment");
-		}
+
 		*verdict = NF_ACCEPT;
 		return 0;
 	}
@@ -1551,6 +1576,73 @@
 	return 1;
 }
 
+/* Check the UDP tunnel and return its header length */
+static int ipvs_udp_decap(struct netns_ipvs *ipvs, struct sk_buff *skb,
+			  unsigned int offset, __u16 af,
+			  const union nf_inet_addr *daddr, __u8 *proto)
+{
+	struct udphdr _udph, *udph;
+	struct ip_vs_dest *dest;
+
+	udph = skb_header_pointer(skb, offset, sizeof(_udph), &_udph);
+	if (!udph)
+		goto unk;
+	offset += sizeof(struct udphdr);
+	dest = ip_vs_find_tunnel(ipvs, af, daddr, udph->dest);
+	if (!dest)
+		goto unk;
+	if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+		struct guehdr _gueh, *gueh;
+
+		gueh = skb_header_pointer(skb, offset, sizeof(_gueh), &_gueh);
+		if (!gueh)
+			goto unk;
+		if (gueh->control != 0 || gueh->version != 0)
+			goto unk;
+		/* Later we can support also IPPROTO_IPV6 */
+		if (gueh->proto_ctype != IPPROTO_IPIP)
+			goto unk;
+		*proto = gueh->proto_ctype;
+		return sizeof(struct udphdr) + sizeof(struct guehdr) +
+		       (gueh->hlen << 2);
+	}
+
+unk:
+	return 0;
+}
+
+/* Check the GRE tunnel and return its header length */
+static int ipvs_gre_decap(struct netns_ipvs *ipvs, struct sk_buff *skb,
+			  unsigned int offset, __u16 af,
+			  const union nf_inet_addr *daddr, __u8 *proto)
+{
+	struct gre_base_hdr _greh, *greh;
+	struct ip_vs_dest *dest;
+
+	greh = skb_header_pointer(skb, offset, sizeof(_greh), &_greh);
+	if (!greh)
+		goto unk;
+	dest = ip_vs_find_tunnel(ipvs, af, daddr, 0);
+	if (!dest)
+		goto unk;
+	if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+		__be16 type;
+
+		/* Only support version 0 and C (csum) */
+		if ((greh->flags & ~GRE_CSUM) != 0)
+			goto unk;
+		type = greh->protocol;
+		/* Later we can support also IPPROTO_IPV6 */
+		if (type != htons(ETH_P_IP))
+			goto unk;
+		*proto = IPPROTO_IPIP;
+		return gre_calc_hlen(gre_flags_to_tnl_flags(greh->flags));
+	}
+
+unk:
+	return 0;
+}
+
 /*
  *	Handle ICMP messages in the outside-to-inside direction (incoming).
  *	Find any that might be relevant, check against existing connections,
@@ -1570,6 +1662,7 @@
 	struct ip_vs_proto_data *pd;
 	unsigned int offset, offset2, ihl, verdict;
 	bool ipip, new_cp = false;
+	union nf_inet_addr *raddr;
 
 	*related = 1;
 
@@ -1608,20 +1701,56 @@
 	cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
 	if (cih == NULL)
 		return NF_ACCEPT; /* The packet looks wrong, ignore */
+	raddr = (union nf_inet_addr *)&cih->daddr;
 
 	/* Special case for errors for IPIP packets */
 	ipip = false;
 	if (cih->protocol == IPPROTO_IPIP) {
+		struct ip_vs_dest *dest;
+
 		if (unlikely(cih->frag_off & htons(IP_OFFSET)))
 			return NF_ACCEPT;
 		/* Error for our IPIP must arrive at LOCAL_IN */
 		if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL))
 			return NF_ACCEPT;
+		dest = ip_vs_find_tunnel(ipvs, AF_INET, raddr, 0);
+		/* Only for known tunnel */
+		if (!dest || dest->tun_type != IP_VS_CONN_F_TUNNEL_TYPE_IPIP)
+			return NF_ACCEPT;
 		offset += cih->ihl * 4;
 		cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
 		if (cih == NULL)
 			return NF_ACCEPT; /* The packet looks wrong, ignore */
 		ipip = true;
+	} else if ((cih->protocol == IPPROTO_UDP ||	/* Can be UDP encap */
+		    cih->protocol == IPPROTO_GRE) &&	/* Can be GRE encap */
+		   /* Error for our tunnel must arrive at LOCAL_IN */
+		   (skb_rtable(skb)->rt_flags & RTCF_LOCAL)) {
+		__u8 iproto;
+		int ulen;
+
+		/* Non-first fragment has no UDP header */
+		if (unlikely(cih->frag_off & htons(IP_OFFSET)))
+			return NF_ACCEPT;
+		offset2 = offset + cih->ihl * 4;
+		if (cih->protocol == IPPROTO_UDP)
+			ulen = ipvs_udp_decap(ipvs, skb, offset2, AF_INET,
+					      raddr, &iproto);
+		else
+			ulen = ipvs_gre_decap(ipvs, skb, offset2, AF_INET,
+					      raddr, &iproto);
+		if (ulen > 0) {
+			/* Skip IP and UDP/GRE tunnel headers */
+			offset = offset2 + ulen;
+			/* Now we should be at the original IP header */
+			cih = skb_header_pointer(skb, offset, sizeof(_ciph),
+						 &_ciph);
+			if (cih && cih->version == 4 && cih->ihl >= 5 &&
+			    iproto == IPPROTO_IPIP)
+				ipip = true;
+			else
+				return NF_ACCEPT;
+		}
 	}
 
 	pd = ip_vs_proto_data_get(ipvs, cih->protocol);
@@ -1644,12 +1773,13 @@
 	/* The embedded headers contain source and dest in reverse order.
 	 * For IPIP this is error for request, not for reply.
 	 */
-	cp = pp->conn_in_get(ipvs, AF_INET, skb, &ciph);
+	cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto,
+			     ipvs, AF_INET, skb, &ciph);
 
 	if (!cp) {
 		int v;
 
-		if (!sysctl_schedule_icmp(ipvs))
+		if (ipip || !sysctl_schedule_icmp(ipvs))
 			return NF_ACCEPT;
 
 		if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph))
@@ -1686,8 +1816,7 @@
 			skb_reset_network_header(skb);
 			IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n",
 				&ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu);
-			ipv4_update_pmtu(skb, ipvs->net,
-					 mtu, 0, 0, 0, 0);
+			ipv4_update_pmtu(skb, ipvs->net, mtu, 0, 0);
 			/* Client uses PMTUD? */
 			if (!(frag_off & htons(IP_DF)))
 				goto ignore_ipip;
@@ -1797,7 +1926,8 @@
 	/* The embedded headers contain source and dest in reverse order
 	 * if not from localhost
 	 */
-	cp = pp->conn_in_get(ipvs, AF_INET6, skb, &ciph);
+	cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto,
+			     ipvs, AF_INET6, skb, &ciph);
 
 	if (!cp) {
 		int v;
@@ -1926,7 +2056,8 @@
 	/*
 	 * Check if the packet belongs to an existing connection entry
 	 */
-	cp = pp->conn_in_get(ipvs, af, skb, &iph);
+	cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto,
+			     ipvs, af, skb, &iph);
 
 	conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
 	if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) {
@@ -2220,7 +2351,6 @@
 static int __net_init __ip_vs_init(struct net *net)
 {
 	struct netns_ipvs *ipvs;
-	int ret;
 
 	ipvs = net_generic(net, ip_vs_net_id);
 	if (ipvs == NULL)
@@ -2252,17 +2382,11 @@
 	if (ip_vs_sync_net_init(ipvs) < 0)
 		goto sync_fail;
 
-	ret = nf_register_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
-	if (ret < 0)
-		goto hook_fail;
-
 	return 0;
 /*
  * Error handling
  */
 
-hook_fail:
-	ip_vs_sync_net_cleanup(ipvs);
 sync_fail:
 	ip_vs_conn_net_cleanup(ipvs);
 conn_fail:
@@ -2282,7 +2406,6 @@
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
 	ip_vs_service_net_cleanup(ipvs);	/* ip_vs_flush() with locks */
 	ip_vs_conn_net_cleanup(ipvs);
 	ip_vs_app_net_cleanup(ipvs);
@@ -2293,10 +2416,24 @@
 	net->ipvs = NULL;
 }
 
+static int __net_init __ip_vs_dev_init(struct net *net)
+{
+	int ret;
+
+	ret = nf_register_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+	if (ret < 0)
+		goto hook_fail;
+	return 0;
+
+hook_fail:
+	return ret;
+}
+
 static void __net_exit __ip_vs_dev_cleanup(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 	EnterFunction(2);
+	nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
 	ipvs->enable = 0;	/* Disable packet reception */
 	smp_wmb();
 	ip_vs_sync_net_cleanup(ipvs);
@@ -2311,6 +2448,7 @@
 };
 
 static struct pernet_operations ipvs_core_dev_ops = {
+	.init = __ip_vs_dev_init,
 	.exit = __ip_vs_dev_cleanup,
 };
 

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 518364f..3cccc88 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * IPVS         An implementation of the IP virtual server support for the
  *              LINUX operating system.  IPVS is now implemented as a module
@@ -9,13 +10,7 @@
  *              Peter Kese <peter.kese@ijs.si>
  *              Julian Anastasov <ja@ssi.bg>
  *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * Changes:
- *
  */
 
 #define KMSG_COMPONENT "IPVS"
@@ -43,6 +38,7 @@
 #ifdef CONFIG_IP_VS_IPV6
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 #endif
 #include <net/route.h>
 #include <net/sock.h>
@@ -97,7 +93,6 @@
 static void update_defense_level(struct netns_ipvs *ipvs)
 {
 	struct sysinfo i;
-	static int old_secure_tcp = 0;
 	int availmem;
 	int nomem;
 	int to_change = -1;
@@ -178,35 +173,35 @@
 	spin_lock(&ipvs->securetcp_lock);
 	switch (ipvs->sysctl_secure_tcp) {
 	case 0:
-		if (old_secure_tcp >= 2)
+		if (ipvs->old_secure_tcp >= 2)
 			to_change = 0;
 		break;
 	case 1:
 		if (nomem) {
-			if (old_secure_tcp < 2)
+			if (ipvs->old_secure_tcp < 2)
 				to_change = 1;
 			ipvs->sysctl_secure_tcp = 2;
 		} else {
-			if (old_secure_tcp >= 2)
+			if (ipvs->old_secure_tcp >= 2)
 				to_change = 0;
 		}
 		break;
 	case 2:
 		if (nomem) {
-			if (old_secure_tcp < 2)
+			if (ipvs->old_secure_tcp < 2)
 				to_change = 1;
 		} else {
-			if (old_secure_tcp >= 2)
+			if (ipvs->old_secure_tcp >= 2)
 				to_change = 0;
 			ipvs->sysctl_secure_tcp = 1;
 		}
 		break;
 	case 3:
-		if (old_secure_tcp < 2)
+		if (ipvs->old_secure_tcp < 2)
 			to_change = 1;
 		break;
 	}
-	old_secure_tcp = ipvs->sysctl_secure_tcp;
+	ipvs->old_secure_tcp = ipvs->sysctl_secure_tcp;
 	if (to_change >= 0)
 		ip_vs_protocol_timeout_change(ipvs,
 					      ipvs->sysctl_secure_tcp > 1);
@@ -266,7 +261,7 @@
 ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto,
 		  const union nf_inet_addr *addr, __be16 port)
 {
-	register unsigned int porth = ntohs(port);
+	unsigned int porth = ntohs(port);
 	__be32 addr_fold = addr->ip;
 	__u32 ahash;
 
@@ -497,7 +492,7 @@
 					    const union nf_inet_addr *addr,
 					    __be16 port)
 {
-	register unsigned int porth = ntohs(port);
+	unsigned int porth = ntohs(port);
 	__be32 addr_fold = addr->ip;
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -514,15 +509,37 @@
 static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
 {
 	unsigned int hash;
+	__be16 port;
 
 	if (dest->in_rs_table)
 		return;
 
+	switch (IP_VS_DFWD_METHOD(dest)) {
+	case IP_VS_CONN_F_MASQ:
+		port = dest->port;
+		break;
+	case IP_VS_CONN_F_TUNNEL:
+		switch (dest->tun_type) {
+		case IP_VS_CONN_F_TUNNEL_TYPE_GUE:
+			port = dest->tun_port;
+			break;
+		case IP_VS_CONN_F_TUNNEL_TYPE_IPIP:
+		case IP_VS_CONN_F_TUNNEL_TYPE_GRE:
+			port = 0;
+			break;
+		default:
+			return;
+		}
+		break;
+	default:
+		return;
+	}
+
 	/*
 	 *	Hash by proto,addr,port,
 	 *	which are the parameters of the real service.
 	 */
-	hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
+	hash = ip_vs_rs_hashkey(dest->af, &dest->addr, port);
 
 	hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
 	dest->in_rs_table = 1;
@@ -554,7 +571,8 @@
 		if (dest->port == dport &&
 		    dest->af == af &&
 		    ip_vs_addr_equal(af, &dest->addr, daddr) &&
-		    (dest->protocol == protocol || dest->vfwmark)) {
+		    (dest->protocol == protocol || dest->vfwmark) &&
+		    IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) {
 			/* HIT */
 			return true;
 		}
@@ -584,7 +602,37 @@
 		if (dest->port == dport &&
 		    dest->af == af &&
 		    ip_vs_addr_equal(af, &dest->addr, daddr) &&
-			(dest->protocol == protocol || dest->vfwmark)) {
+		    (dest->protocol == protocol || dest->vfwmark) &&
+		    IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) {
+			/* HIT */
+			return dest;
+		}
+	}
+
+	return NULL;
+}
+
+/* Find real service record by <af,addr,tun_port>.
+ * In case of multiple records with the same <af,addr,tun_port>, only
+ * the first found record is returned.
+ *
+ * To be called under RCU lock.
+ */
+struct ip_vs_dest *ip_vs_find_tunnel(struct netns_ipvs *ipvs, int af,
+				     const union nf_inet_addr *daddr,
+				     __be16 tun_port)
+{
+	struct ip_vs_dest *dest;
+	unsigned int hash;
+
+	/* Check for "full" addressed entries */
+	hash = ip_vs_rs_hashkey(af, daddr, tun_port);
+
+	hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
+		if (dest->tun_port == tun_port &&
+		    dest->af == af &&
+		    ip_vs_addr_equal(af, &dest->addr, daddr) &&
+		    IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_TUNNEL) {
 			/* HIT */
 			return dest;
 		}
@@ -830,20 +878,29 @@
 	conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
 	conn_flags |= IP_VS_CONN_F_INACTIVE;
 
+	/* Need to rehash? */
+	if ((udest->conn_flags & IP_VS_CONN_F_FWD_MASK) !=
+	    IP_VS_DFWD_METHOD(dest) ||
+	    udest->tun_type != dest->tun_type ||
+	    udest->tun_port != dest->tun_port)
+		ip_vs_rs_unhash(dest);
+
+	/* set the tunnel info */
+	dest->tun_type = udest->tun_type;
+	dest->tun_port = udest->tun_port;
+	dest->tun_flags = udest->tun_flags;
+
 	/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
 	if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
 		conn_flags |= IP_VS_CONN_F_NOOUTPUT;
 	} else {
-		/*
-		 *    Put the real service in rs_table if not present.
-		 *    For now only for NAT!
-		 */
-		ip_vs_rs_hash(ipvs, dest);
 		/* FTP-NAT requires conntrack for mangling */
 		if (svc->port == FTPPORT)
 			ip_vs_register_conntrack(svc);
 	}
 	atomic_set(&dest->conn_flags, conn_flags);
+	/* Put the real service in rs_table if not present. */
+	ip_vs_rs_hash(ipvs, dest);
 
 	/* bind the service */
 	old_svc = rcu_dereference_protected(dest->svc, 1);
@@ -900,11 +957,17 @@
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (udest->af == AF_INET6) {
+		int ret;
+
 		atype = ipv6_addr_type(&udest->addr.in6);
 		if ((!(atype & IPV6_ADDR_UNICAST) ||
 			atype & IPV6_ADDR_LINKLOCAL) &&
 			!__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6))
 			return -EINVAL;
+
+		ret = nf_defrag_ipv6_enable(svc->ipvs->net);
+		if (ret)
+			return ret;
 	} else
 #endif
 	{
@@ -980,6 +1043,13 @@
 		return -ERANGE;
 	}
 
+	if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+		if (udest->tun_port == 0) {
+			pr_err("%s(): tunnel port is zero\n", __func__);
+			return -EINVAL;
+		}
+	}
+
 	ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
 
 	/* We use function that requires RCU lock */
@@ -1044,6 +1114,13 @@
 		return -ERANGE;
 	}
 
+	if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+		if (udest->tun_port == 0) {
+			pr_err("%s(): tunnel port is zero\n", __func__);
+			return -EINVAL;
+		}
+	}
+
 	ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
 
 	/* We use function that requires RCU lock */
@@ -1197,7 +1274,8 @@
 	struct ip_vs_service *svc = NULL;
 
 	/* increase the module use count */
-	ip_vs_use_count_inc();
+	if (!ip_vs_use_count_inc())
+		return -ENOPROTOOPT;
 
 	/* Lookup the scheduler by 'u->sched_name' */
 	if (strcmp(u->sched_name, "none")) {
@@ -1228,6 +1306,10 @@
 			ret = -EINVAL;
 			goto out_err;
 		}
+
+		ret = nf_defrag_ipv6_enable(ipvs->net);
+		if (ret)
+			goto out_err;
 	}
 #endif
 
@@ -1644,7 +1726,6 @@
 
 #ifdef CONFIG_SYSCTL
 
-static int zero;
 static int three = 3;
 
 static int
@@ -1656,12 +1737,18 @@
 	int val = *valp;
 	int rc;
 
-	rc = proc_dointvec(table, write, buffer, lenp, ppos);
+	struct ctl_table tmp = {
+		.data = &val,
+		.maxlen = sizeof(int),
+		.mode = table->mode,
+	};
+
+	rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
 	if (write && (*valp != val)) {
-		if ((*valp < 0) || (*valp > 3)) {
-			/* Restore the correct value */
-			*valp = val;
+		if (val < 0 || val > 3) {
+			rc = -EINVAL;
 		} else {
+			*valp = val;
 			update_defense_level(ipvs);
 		}
 	}
@@ -1675,33 +1762,20 @@
 	int *valp = table->data;
 	int val[2];
 	int rc;
+	struct ctl_table tmp = {
+		.data = &val,
+		.maxlen = table->maxlen,
+		.mode = table->mode,
+	};
 
-	/* backup the value first */
 	memcpy(val, valp, sizeof(val));
-
-	rc = proc_dointvec(table, write, buffer, lenp, ppos);
-	if (write && (valp[0] < 0 || valp[1] < 0 ||
-	    (valp[0] >= valp[1] && valp[1]))) {
-		/* Restore the correct value */
-		memcpy(valp, val, sizeof(val));
-	}
-	return rc;
-}
-
-static int
-proc_do_sync_mode(struct ctl_table *table, int write,
-		     void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	int *valp = table->data;
-	int val = *valp;
-	int rc;
-
-	rc = proc_dointvec(table, write, buffer, lenp, ppos);
-	if (write && (*valp != val)) {
-		if ((*valp < 0) || (*valp > 1)) {
-			/* Restore the correct value */
-			*valp = val;
-		}
+	rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+	if (write) {
+		if (val[0] < 0 || val[1] < 0 ||
+		    (val[0] >= val[1] && val[1]))
+			rc = -EINVAL;
+		else
+			memcpy(valp, val, sizeof(val));
 	}
 	return rc;
 }
@@ -1714,12 +1788,18 @@
 	int val = *valp;
 	int rc;
 
-	rc = proc_dointvec(table, write, buffer, lenp, ppos);
+	struct ctl_table tmp = {
+		.data = &val,
+		.maxlen = sizeof(int),
+		.mode = table->mode,
+	};
+
+	rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
 	if (write && (*valp != val)) {
-		if (*valp < 1 || !is_power_of_2(*valp)) {
-			/* Restore the correct value */
+		if (val < 1 || !is_power_of_2(val))
+			rc = -EINVAL;
+		else
 			*valp = val;
-		}
 	}
 	return rc;
 }
@@ -1779,7 +1859,9 @@
 		.procname	= "sync_version",
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_do_sync_mode,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
 	},
 	{
 		.procname	= "sync_ports",
@@ -1853,7 +1935,7 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 		.extra2		= &three,
 	},
 	{
@@ -2221,6 +2303,18 @@
 		  u->udp_timeout);
 
 #ifdef CONFIG_IP_VS_PROTO_TCP
+	if (u->tcp_timeout < 0 || u->tcp_timeout > (INT_MAX / HZ) ||
+	    u->tcp_fin_timeout < 0 || u->tcp_fin_timeout > (INT_MAX / HZ)) {
+		return -EINVAL;
+	}
+#endif
+
+#ifdef CONFIG_IP_VS_PROTO_UDP
+	if (u->udp_timeout < 0 || u->udp_timeout > (INT_MAX / HZ))
+		return -EINVAL;
+#endif
+
+#ifdef CONFIG_IP_VS_PROTO_TCP
 	if (u->tcp_timeout) {
 		pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
 		pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
@@ -2310,6 +2404,7 @@
 	udest->u_threshold	= udest_compat->u_threshold;
 	udest->l_threshold	= udest_compat->l_threshold;
 	udest->af		= AF_INET;
+	udest->tun_type		= IP_VS_CONN_F_TUNNEL_TYPE_IPIP;
 }
 
 static int
@@ -2340,9 +2435,6 @@
 	if (copy_from_user(arg, user, len) != 0)
 		return -EFAULT;
 
-	/* increase the module use count */
-	ip_vs_use_count_inc();
-
 	/* Handle daemons since they have another lock */
 	if (cmd == IP_VS_SO_SET_STARTDAEMON ||
 	    cmd == IP_VS_SO_SET_STOPDAEMON) {
@@ -2355,15 +2447,13 @@
 			ret = -EINVAL;
 			if (strscpy(cfg.mcast_ifn, dm->mcast_ifn,
 				    sizeof(cfg.mcast_ifn)) <= 0)
-				goto out_dec;
+				return ret;
 			cfg.syncid = dm->syncid;
 			ret = start_sync_thread(ipvs, &cfg, dm->state);
 		} else {
-			mutex_lock(&ipvs->sync_mutex);
 			ret = stop_sync_thread(ipvs, dm->state);
-			mutex_unlock(&ipvs->sync_mutex);
 		}
-		goto out_dec;
+		return ret;
 	}
 
 	mutex_lock(&__ip_vs_mutex);
@@ -2458,10 +2548,6 @@
 
   out_unlock:
 	mutex_unlock(&__ip_vs_mutex);
-  out_dec:
-	/* decrease the module use count */
-	ip_vs_use_count_dec();
-
 	return ret;
 }
 
@@ -2722,8 +2808,7 @@
 		int size;
 
 		get = (struct ip_vs_get_services *)arg;
-		size = sizeof(*get) +
-			sizeof(struct ip_vs_service_entry) * get->num_services;
+		size = struct_size(get, entrytable, get->num_services);
 		if (*len != size) {
 			pr_err("length: %u != %u\n", *len, size);
 			ret = -EINVAL;
@@ -2764,8 +2849,7 @@
 		int size;
 
 		get = (struct ip_vs_get_dests *)arg;
-		size = sizeof(*get) +
-			sizeof(struct ip_vs_dest_entry) * get->num_dests;
+		size = struct_size(get, entrytable, get->num_dests);
 		if (*len != size) {
 			pr_err("length: %u != %u\n", *len, size);
 			ret = -EINVAL;
@@ -2869,12 +2953,15 @@
 	[IPVS_DEST_ATTR_PERSIST_CONNS]	= { .type = NLA_U32 },
 	[IPVS_DEST_ATTR_STATS]		= { .type = NLA_NESTED },
 	[IPVS_DEST_ATTR_ADDR_FAMILY]	= { .type = NLA_U16 },
+	[IPVS_DEST_ATTR_TUN_TYPE]	= { .type = NLA_U8 },
+	[IPVS_DEST_ATTR_TUN_PORT]	= { .type = NLA_U16 },
+	[IPVS_DEST_ATTR_TUN_FLAGS]	= { .type = NLA_U16 },
 };
 
 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
 				 struct ip_vs_kstats *kstats)
 {
-	struct nlattr *nl_stats = nla_nest_start(skb, container_type);
+	struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type);
 
 	if (!nl_stats)
 		return -EMSGSIZE;
@@ -2904,7 +2991,7 @@
 static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type,
 				   struct ip_vs_kstats *kstats)
 {
-	struct nlattr *nl_stats = nla_nest_start(skb, container_type);
+	struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type);
 
 	if (!nl_stats)
 		return -EMSGSIZE;
@@ -2950,7 +3037,7 @@
 	struct ip_vs_kstats kstats;
 	char *sched_name;
 
-	nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
+	nl_service = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_SERVICE);
 	if (!nl_service)
 		return -EMSGSIZE;
 
@@ -3065,7 +3152,7 @@
 
 static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
 				    struct ip_vs_service_user_kern *usvc,
-				    struct nlattr *nla, int full_entry,
+				    struct nlattr *nla, bool full_entry,
 				    struct ip_vs_service **ret_svc)
 {
 	struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
@@ -3074,8 +3161,7 @@
 
 	/* Parse mandatory identifying service fields first */
 	if (nla == NULL ||
-	    nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla,
-			     ip_vs_svc_policy, NULL))
+	    nla_parse_nested_deprecated(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy, NULL))
 		return -EINVAL;
 
 	nla_af		= attrs[IPVS_SVC_ATTR_AF];
@@ -3152,7 +3238,7 @@
 	struct ip_vs_service *svc;
 	int ret;
 
-	ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, 0, &svc);
+	ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, false, &svc);
 	return ret ? ERR_PTR(ret) : svc;
 }
 
@@ -3161,7 +3247,7 @@
 	struct nlattr *nl_dest;
 	struct ip_vs_kstats kstats;
 
-	nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
+	nl_dest = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DEST);
 	if (!nl_dest)
 		return -EMSGSIZE;
 
@@ -3172,6 +3258,12 @@
 			 IP_VS_CONN_F_FWD_MASK)) ||
 	    nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
 			atomic_read(&dest->weight)) ||
+	    nla_put_u8(skb, IPVS_DEST_ATTR_TUN_TYPE,
+		       dest->tun_type) ||
+	    nla_put_be16(skb, IPVS_DEST_ATTR_TUN_PORT,
+			 dest->tun_port) ||
+	    nla_put_u16(skb, IPVS_DEST_ATTR_TUN_FLAGS,
+			dest->tun_flags) ||
 	    nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
 	    nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
 	    nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
@@ -3233,8 +3325,7 @@
 	mutex_lock(&__ip_vs_mutex);
 
 	/* Try to find the service for which to dump destinations */
-	if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX,
-			ip_vs_cmd_policy, NULL))
+	if (nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy, cb->extack))
 		goto out_err;
 
 
@@ -3262,7 +3353,7 @@
 }
 
 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
-				 struct nlattr *nla, int full_entry)
+				 struct nlattr *nla, bool full_entry)
 {
 	struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
 	struct nlattr *nla_addr, *nla_port;
@@ -3270,8 +3361,7 @@
 
 	/* Parse mandatory identifying destination fields first */
 	if (nla == NULL ||
-	    nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla,
-			     ip_vs_dest_policy, NULL))
+	    nla_parse_nested_deprecated(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy, NULL))
 		return -EINVAL;
 
 	nla_addr	= attrs[IPVS_DEST_ATTR_ADDR];
@@ -3294,12 +3384,16 @@
 	/* If a full entry was requested, check for the additional fields */
 	if (full_entry) {
 		struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
-			      *nla_l_thresh;
+			      *nla_l_thresh, *nla_tun_type, *nla_tun_port,
+			      *nla_tun_flags;
 
 		nla_fwd		= attrs[IPVS_DEST_ATTR_FWD_METHOD];
 		nla_weight	= attrs[IPVS_DEST_ATTR_WEIGHT];
 		nla_u_thresh	= attrs[IPVS_DEST_ATTR_U_THRESH];
 		nla_l_thresh	= attrs[IPVS_DEST_ATTR_L_THRESH];
+		nla_tun_type	= attrs[IPVS_DEST_ATTR_TUN_TYPE];
+		nla_tun_port	= attrs[IPVS_DEST_ATTR_TUN_PORT];
+		nla_tun_flags	= attrs[IPVS_DEST_ATTR_TUN_FLAGS];
 
 		if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
 			return -EINVAL;
@@ -3309,6 +3403,15 @@
 		udest->weight = nla_get_u32(nla_weight);
 		udest->u_threshold = nla_get_u32(nla_u_thresh);
 		udest->l_threshold = nla_get_u32(nla_l_thresh);
+
+		if (nla_tun_type)
+			udest->tun_type = nla_get_u8(nla_tun_type);
+
+		if (nla_tun_port)
+			udest->tun_port = nla_get_be16(nla_tun_port);
+
+		if (nla_tun_flags)
+			udest->tun_flags = nla_get_u16(nla_tun_flags);
 	}
 
 	return 0;
@@ -3319,7 +3422,7 @@
 {
 	struct nlattr *nl_daemon;
 
-	nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
+	nl_daemon = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DAEMON);
 	if (!nl_daemon)
 		return -EMSGSIZE;
 
@@ -3469,10 +3572,8 @@
 	if (!attrs[IPVS_DAEMON_ATTR_STATE])
 		return -EINVAL;
 
-	mutex_lock(&ipvs->sync_mutex);
 	ret = stop_sync_thread(ipvs,
 			       nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
-	mutex_unlock(&ipvs->sync_mutex);
 	return ret;
 }
 
@@ -3507,9 +3608,7 @@
 		struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
 
 		if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
-		    nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
-				     info->attrs[IPVS_CMD_ATTR_DAEMON],
-				     ip_vs_daemon_policy, info->extack))
+		    nla_parse_nested_deprecated(daemon_attrs, IPVS_DAEMON_ATTR_MAX, info->attrs[IPVS_CMD_ATTR_DAEMON], ip_vs_daemon_policy, info->extack))
 			goto out;
 
 		if (cmd == IPVS_CMD_NEW_DAEMON)
@@ -3524,11 +3623,11 @@
 
 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 {
+	bool need_full_svc = false, need_full_dest = false;
 	struct ip_vs_service *svc = NULL;
 	struct ip_vs_service_user_kern usvc;
 	struct ip_vs_dest_user_kern udest;
 	int ret = 0, cmd;
-	int need_full_svc = 0, need_full_dest = 0;
 	struct net *net = sock_net(skb->sk);
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
@@ -3552,7 +3651,7 @@
 	 * received a valid one. We need a full service specification when
 	 * adding / editing a service. Only identifying members otherwise. */
 	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
-		need_full_svc = 1;
+		need_full_svc = true;
 
 	ret = ip_vs_genl_parse_service(ipvs, &usvc,
 				       info->attrs[IPVS_CMD_ATTR_SERVICE],
@@ -3572,7 +3671,7 @@
 	if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
 	    cmd == IPVS_CMD_DEL_DEST) {
 		if (cmd != IPVS_CMD_DEL_DEST)
-			need_full_dest = 1;
+			need_full_dest = true;
 
 		ret = ip_vs_genl_parse_dest(&udest,
 					    info->attrs[IPVS_CMD_ATTR_DEST],
@@ -3753,94 +3852,98 @@
 static const struct genl_ops ip_vs_genl_ops[] = {
 	{
 		.cmd	= IPVS_CMD_NEW_SERVICE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags	= GENL_ADMIN_PERM,
-		.policy	= ip_vs_cmd_policy,
 		.doit	= ip_vs_genl_set_cmd,
 	},
 	{
 		.cmd	= IPVS_CMD_SET_SERVICE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags	= GENL_ADMIN_PERM,
-		.policy	= ip_vs_cmd_policy,
 		.doit	= ip_vs_genl_set_cmd,
 	},
 	{
 		.cmd	= IPVS_CMD_DEL_SERVICE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags	= GENL_ADMIN_PERM,
-		.policy	= ip_vs_cmd_policy,
 		.doit	= ip_vs_genl_set_cmd,
 	},
 	{
 		.cmd	= IPVS_CMD_GET_SERVICE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags	= GENL_ADMIN_PERM,
 		.doit	= ip_vs_genl_get_cmd,
 		.dumpit	= ip_vs_genl_dump_services,
-		.policy	= ip_vs_cmd_policy,
 	},
 	{
 		.cmd	= IPVS_CMD_NEW_DEST,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags	= GENL_ADMIN_PERM,
-		.policy	= ip_vs_cmd_policy,
 		.doit	= ip_vs_genl_set_cmd,
 	},
 	{
 		.cmd	= IPVS_CMD_SET_DEST,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags	= GENL_ADMIN_PERM,
-		.policy	= ip_vs_cmd_policy,
 		.doit	= ip_vs_genl_set_cmd,
 	},
 	{
 		.cmd	= IPVS_CMD_DEL_DEST,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags	= GENL_ADMIN_PERM,
-		.policy	= ip_vs_cmd_policy,
 		.doit	= ip_vs_genl_set_cmd,
 	},
 	{
 		.cmd	= IPVS_CMD_GET_DEST,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags	= GENL_ADMIN_PERM,
-		.policy	= ip_vs_cmd_policy,
 		.dumpit	= ip_vs_genl_dump_dests,
 	},
 	{
 		.cmd	= IPVS_CMD_NEW_DAEMON,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags	= GENL_ADMIN_PERM,
-		.policy	= ip_vs_cmd_policy,
 		.doit	= ip_vs_genl_set_daemon,
 	},
 	{
 		.cmd	= IPVS_CMD_DEL_DAEMON,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags	= GENL_ADMIN_PERM,
-		.policy	= ip_vs_cmd_policy,
 		.doit	= ip_vs_genl_set_daemon,
 	},
 	{
 		.cmd	= IPVS_CMD_GET_DAEMON,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags	= GENL_ADMIN_PERM,
 		.dumpit	= ip_vs_genl_dump_daemons,
 	},
 	{
 		.cmd	= IPVS_CMD_SET_CONFIG,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags	= GENL_ADMIN_PERM,
-		.policy	= ip_vs_cmd_policy,
 		.doit	= ip_vs_genl_set_cmd,
 	},
 	{
 		.cmd	= IPVS_CMD_GET_CONFIG,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags	= GENL_ADMIN_PERM,
 		.doit	= ip_vs_genl_get_cmd,
 	},
 	{
 		.cmd	= IPVS_CMD_GET_INFO,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags	= GENL_ADMIN_PERM,
 		.doit	= ip_vs_genl_get_cmd,
 	},
 	{
 		.cmd	= IPVS_CMD_ZERO,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags	= GENL_ADMIN_PERM,
-		.policy	= ip_vs_cmd_policy,
 		.doit	= ip_vs_genl_set_cmd,
 	},
 	{
 		.cmd	= IPVS_CMD_FLUSH,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags	= GENL_ADMIN_PERM,
 		.doit	= ip_vs_genl_set_cmd,
 	},
@@ -3851,6 +3954,7 @@
 	.name		= IPVS_GENL_NAME,
 	.version	= IPVS_GENL_VERSION,
 	.maxattr	= IPVS_CMD_ATTR_MAX,
+	.policy = ip_vs_cmd_policy,
 	.netnsok        = true,         /* Make ipvsadm to work on netns */
 	.module		= THIS_MODULE,
 	.ops		= ip_vs_genl_ops,

diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index 07459e7..5e6ec32 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * IPVS:        Destination Hashing scheduling module
  *
@@ -6,13 +7,7 @@
  *              Inspired by the consistent hashing scheduler patch from
  *              Thomas Proell <proellt@gmx.de>
  *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * Changes:
- *
  */
 
 /*

diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 4890550..05b8112 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * ip_vs_est.c: simple rate estimator for IPVS
  *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
  *              Network name space (netns) aware.
  *              Global data moved to netns i.e struct netns_ipvs

diff --git a/net/netfilter/ipvs/ip_vs_fo.c b/net/netfilter/ipvs/ip_vs_fo.c
index e09874d..b846cc3 100644
--- a/net/netfilter/ipvs/ip_vs_fo.c
+++ b/net/netfilter/ipvs/ip_vs_fo.c

@@ -1,16 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * IPVS:        Weighted Fail Over module
  *
  * Authors:     Kenny Mathis <kmathis@chokepoint.net>
  *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * Changes:
  *     Kenny Mathis            :     added initial functionality based on weight
- *
  */
 
 #define KMSG_COMPONENT "IPVS"

diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 4398a72..cf92590 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * ip_vs_ftp.c: IPVS ftp application module
  *
@@ -5,12 +6,6 @@
  *
  * Changes:
  *
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
  * Most code here is taken from ip_masq_ftp.c in kernel 2.2. The difference
  * is that ip_vs_ftp module handles the reverse direction to ip_masq_ftp.
  *
@@ -19,7 +14,6 @@
  * Version:	@(#)ip_masq_ftp.c 0.04   02/05/96
  *
  * Author:	Wouter Gadeyne
- *
  */
 
 #define KMSG_COMPONENT "IPVS"
@@ -124,7 +118,7 @@
 	}
 	s = data + plen;
 	if (skip) {
-		int found = 0;
+		bool found = false;
 
 		for (;; s++) {
 			if (s == data_limit)
@@ -136,7 +130,7 @@
 				if (!ext && isdigit(*s))
 					break;
 				if (*s == skip)
-					found = 1;
+					found = true;
 			} else if (*s != skip) {
 				break;
 			}
@@ -273,7 +267,7 @@
 		return 1;
 
 	/* Linear packets are much easier to deal with. */
-	if (!skb_make_writable(skb, skb->len))
+	if (skb_ensure_writable(skb, skb->len))
 		return 0;
 
 	if (cp->app_data == (void *) IP_VS_FTP_PASV) {
@@ -439,7 +433,7 @@
 		return 1;
 
 	/* Linear packets are much easier to deal with. */
-	if (!skb_make_writable(skb, skb->len))
+	if (skb_ensure_writable(skb, skb->len))
 		return 0;
 
 	data = data_start = ip_vs_ftp_data_ptr(skb, ipvsh);

diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index b9f375e..7ac7473 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * IPVS:        Locality-Based Least-Connection scheduling module
  *
  * Authors:     Wensong Zhang <wensong@gnuchina.org>
  *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * Changes:
  *     Martin Hamilton         :    fixed the terrible locking bugs
  *                                   *lock(tbl->lock) ==> *lock(&tbl->lock)
@@ -18,7 +14,6 @@
  *     Julian Anastasov        :    replaced del_timer call with del_timer_sync
  *                                   to avoid the possible race between timer
  *                                   handler and del_timer thread in SMP
- *
  */
 
 /*

diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 542c494..77c323c 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c

@@ -1,17 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * IPVS:        Locality-Based Least-Connection with Replication scheduler
  *
  * Authors:     Wensong Zhang <wensong@gnuchina.org>
  *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * Changes:
  *     Julian Anastasov        :    Added the missing (dest->weight>0)
  *                                  condition in the ip_vs_dest_set_max.
- *
  */
 
 /*
@@ -165,7 +160,7 @@
 /* get weighted least-connection node in the destination set */
 static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 {
-	register struct ip_vs_dest_set_elem *e;
+	struct ip_vs_dest_set_elem *e;
 	struct ip_vs_dest *dest, *least;
 	int loh, doh;
 
@@ -214,7 +209,7 @@
 /* get weighted most-connection node in the destination set */
 static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
 {
-	register struct ip_vs_dest_set_elem *e;
+	struct ip_vs_dest_set_elem *e;
 	struct ip_vs_dest *dest, *most;
 	int moh, doh;
 

diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
index 19a0769..9d34d81 100644
--- a/net/netfilter/ipvs/ip_vs_lc.c
+++ b/net/netfilter/ipvs/ip_vs_lc.c

@@ -1,17 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * IPVS:        Least-Connection Scheduling module
  *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * Changes:
  *     Wensong Zhang            :     added the ip_vs_lc_update_svc
  *     Wensong Zhang            :     added any dest with weight=0 is quiesced
- *
  */
 
 #define KMSG_COMPONENT "IPVS"

diff --git a/net/netfilter/ipvs/ip_vs_mh.c b/net/netfilter/ipvs/ip_vs_mh.c
index 94d9d34..da0280c 100644
--- a/net/netfilter/ipvs/ip_vs_mh.c
+++ b/net/netfilter/ipvs/ip_vs_mh.c

@@ -174,8 +174,8 @@
 		return 0;
 	}
 
-	table =  kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE),
-			 sizeof(unsigned long), GFP_KERNEL);
+	table = kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE),
+			sizeof(unsigned long), GFP_KERNEL);
 	if (!table)
 		return -ENOMEM;
 

diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index eb8b9c8..08adcb2 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * ip_vs_nfct.c:	Netfilter connection tracking support for IPVS
  *
@@ -7,27 +8,11 @@
  * Portions Copyright (C) 2003-2010
  * Julian Anastasov
  *
- *
- * This code is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- *
  * Authors:
  * Ben North <ben@redfrontdoor.org>
  * Julian Anastasov <ja@ssi.bg>		Reorganize and sync with latest kernels
  * Hannes Eder <heder@google.com>	Extend NFCT support for FTP, ipvs match
  *
- *
  * Current status:
  *
  * - provide conntrack confirmation for new and related connections, by
@@ -43,7 +28,6 @@
  * when RELATED conntrack is created from real server (Active FTP DATA)
  * - if iptables_nat is not loaded the Passive FTP will not work (the
  * PASV response can not be NAT-ed) but Active FTP should work
- *
  */
 
 #define KMSG_COMPONENT "IPVS"
@@ -247,7 +231,7 @@
 
 	IP_VS_DBG_BUF(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
 		      __func__, ct, ARG_TUPLE(&exp->tuple));
-	nf_ct_expect_related(exp);
+	nf_ct_expect_related(exp, 0);
 	nf_ct_expect_put(exp);
 }
 EXPORT_SYMBOL(ip_vs_nfct_expect_related);

diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index 7d9d4ac..f56862a 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c

@@ -1,15 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * IPVS:        Never Queue scheduling module
  *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * Changes:
- *
  */
 
 /*

diff --git a/net/netfilter/ipvs/ip_vs_ovf.c b/net/netfilter/ipvs/ip_vs_ovf.c
index f7d62c3..78b074c 100644
--- a/net/netfilter/ipvs/ip_vs_ovf.c
+++ b/net/netfilter/ipvs/ip_vs_ovf.c

@@ -1,20 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * IPVS:        Overflow-Connection Scheduling module
  *
  * Authors:     Raducu Deaconu <rhadoo_io@yahoo.com>
  *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * Scheduler implements "overflow" loadbalancing according to number of active
  * connections , will keep all conections to the node with the highest weight
  * and overflow to the next node if the number of connections exceeds the node's
  * weight.
  * Note that this scheduler might not be suitable for UDP because it only uses
  * active connections
- *
  */
 
 #define KMSG_COMPONENT "IPVS"

diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c
index 0df17ca..166c669 100644
--- a/net/netfilter/ipvs/ip_vs_pe.c
+++ b/net/netfilter/ipvs/ip_vs_pe.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #define KMSG_COMPONENT "IPVS"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
@@ -67,7 +68,8 @@
 	struct ip_vs_pe *tmp;
 
 	/* increase the module use count */
-	ip_vs_use_count_inc();
+	if (!ip_vs_use_count_inc())
+		return -ENOENT;
 
 	mutex_lock(&ip_vs_pe_mutex);
 	/* Make sure that the pe with this name doesn't exist

diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index d07ef9e..0ac6705 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #define KMSG_COMPONENT "IPVS"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 

diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 54ee84a..f100da4 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c

@@ -1,16 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * ip_vs_proto.c: transport protocol load balancing support for IPVS
  *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *              Julian Anastasov <ja@ssi.bg>
  *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * Changes:
- *
  */
 
 #define KMSG_COMPONENT "IPVS"

diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 5320d39..89602c1 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * ip_vs_proto_ah_esp.c:	AH/ESP IPSec load balancing support for IPVS
  *
  * Authors:	Julian Anastasov <ja@ssi.bg>, February 2002
  *		Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		version 2 as published by the Free Software Foundation;
- *
  */
 
 #define KMSG_COMPONENT "IPVS"
@@ -129,7 +125,6 @@
 	.conn_out_get =		ah_esp_conn_out_get,
 	.snat_handler =		NULL,
 	.dnat_handler =		NULL,
-	.csum_check =		NULL,
 	.state_transition =	NULL,
 	.register_app =		NULL,
 	.unregister_app =	NULL,
@@ -152,7 +147,6 @@
 	.conn_out_get =		ah_esp_conn_out_get,
 	.snat_handler =		NULL,
 	.dnat_handler =		NULL,
-	.csum_check =		NULL,
 	.state_transition =	NULL,
 	.register_app =		NULL,
 	.unregister_app =	NULL,

diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index b0cd7d0..a0921ad 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c

@@ -10,6 +10,9 @@
 #include <net/ip_vs.h>
 
 static int
+sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp);
+
+static int
 sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 		   struct ip_vs_proto_data *pd,
 		   int *verdict, struct ip_vs_conn **cpp,
@@ -98,14 +101,14 @@
 #endif
 
 	/* csum_check requires unshared skb */
-	if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
+	if (skb_ensure_writable(skb, sctphoff + sizeof(*sctph)))
 		return 0;
 
 	if (unlikely(cp->app != NULL)) {
 		int ret;
 
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+		if (!sctp_csum_check(cp->af, skb, pp))
 			return 0;
 
 		/* Call application helper if needed */
@@ -145,14 +148,14 @@
 #endif
 
 	/* csum_check requires unshared skb */
-	if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
+	if (skb_ensure_writable(skb, sctphoff + sizeof(*sctph)))
 		return 0;
 
 	if (unlikely(cp->app != NULL)) {
 		int ret;
 
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+		if (!sctp_csum_check(cp->af, skb, pp))
 			return 0;
 
 		/* Call application helper if needed */
@@ -183,7 +186,7 @@
 sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
 {
 	unsigned int sctphoff;
-	struct sctphdr *sh, _sctph;
+	struct sctphdr *sh;
 	__le32 cmp, val;
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -193,10 +196,7 @@
 #endif
 		sctphoff = ip_hdrlen(skb);
 
-	sh = skb_header_pointer(skb, sctphoff, sizeof(_sctph), &_sctph);
-	if (sh == NULL)
-		return 0;
-
+	sh = (struct sctphdr *)(skb->data + sctphoff);
 	cmp = sh->checksum;
 	val = sctp_compute_cksum(skb, sctphoff);
 
@@ -587,7 +587,6 @@
 	.conn_out_get	= ip_vs_conn_out_get_proto,
 	.snat_handler	= sctp_snat_handler,
 	.dnat_handler	= sctp_dnat_handler,
-	.csum_check	= sctp_csum_check,
 	.state_name	= sctp_state_name,
 	.state_transition = sctp_state_transition,
 	.app_conn_bind	= sctp_app_conn_bind,

diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 1770fc6..32b0288 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * ip_vs_proto_tcp.c:	TCP load balancing support for IPVS
  *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *              Julian Anastasov <ja@ssi.bg>
  *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
  *
  *              Network name space (netns) aware.
@@ -28,10 +24,14 @@
 #include <net/ip6_checksum.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/indirect_call_wrapper.h>
 
 #include <net/ip_vs.h>
 
 static int
+tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp);
+
+static int
 tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 		  struct ip_vs_proto_data *pd,
 		  int *verdict, struct ip_vs_conn **cpp,
@@ -143,14 +143,14 @@
 }
 
 
-static int
+INDIRECT_CALLABLE_SCOPE int
 tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
 {
 	struct tcphdr *tcph;
 	unsigned int tcphoff = iph->len;
+	bool payload_csum = false;
 	int oldlen;
-	int payload_csum = 0;
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (cp->af == AF_INET6 && iph->fragoffs)
@@ -159,14 +159,14 @@
 	oldlen = skb->len - tcphoff;
 
 	/* csum_check requires unshared skb */
-	if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
+	if (skb_ensure_writable(skb, tcphoff + sizeof(*tcph)))
 		return 0;
 
 	if (unlikely(cp->app != NULL)) {
 		int ret;
 
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+		if (!tcp_csum_check(cp->af, skb, pp))
 			return 0;
 
 		/* Call application helper if needed */
@@ -176,7 +176,7 @@
 		if (ret == 1)
 			oldlen = skb->len - tcphoff;
 		else
-			payload_csum = 1;
+			payload_csum = true;
 	}
 
 	tcph = (void *)skb_network_header(skb) + tcphoff;
@@ -192,7 +192,7 @@
 		tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
 				     cp->dport, cp->vport);
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
-			skb->ip_summed = (cp->app && pp->csum_check) ?
+			skb->ip_summed = cp->app ?
 					 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
@@ -227,8 +227,8 @@
 {
 	struct tcphdr *tcph;
 	unsigned int tcphoff = iph->len;
+	bool payload_csum = false;
 	int oldlen;
-	int payload_csum = 0;
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (cp->af == AF_INET6 && iph->fragoffs)
@@ -237,14 +237,14 @@
 	oldlen = skb->len - tcphoff;
 
 	/* csum_check requires unshared skb */
-	if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
+	if (skb_ensure_writable(skb, tcphoff + sizeof(*tcph)))
 		return 0;
 
 	if (unlikely(cp->app != NULL)) {
 		int ret;
 
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+		if (!tcp_csum_check(cp->af, skb, pp))
 			return 0;
 
 		/*
@@ -257,7 +257,7 @@
 		if (ret == 1)
 			oldlen = skb->len - tcphoff;
 		else
-			payload_csum = 1;
+			payload_csum = true;
 	}
 
 	tcph = (void *)skb_network_header(skb) + tcphoff;
@@ -275,7 +275,7 @@
 		tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
 				     cp->vport, cp->dport);
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
-			skb->ip_summed = (cp->app && pp->csum_check) ?
+			skb->ip_summed = cp->app ?
 					 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
@@ -710,7 +710,7 @@
 							sizeof(tcp_timeouts));
 	if (!pd->timeout_table)
 		return -ENOMEM;
-	pd->tcp_state_table =  tcp_states;
+	pd->tcp_state_table = tcp_states;
 	return 0;
 }
 
@@ -736,7 +736,6 @@
 	.conn_out_get =		ip_vs_conn_out_get_proto,
 	.snat_handler =		tcp_snat_handler,
 	.dnat_handler =		tcp_dnat_handler,
-	.csum_check =		tcp_csum_check,
 	.state_name =		tcp_state_name,
 	.state_transition =	tcp_state_transition,
 	.app_conn_bind =	tcp_app_conn_bind,

diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 0f53c49..153d896 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c

@@ -1,17 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * ip_vs_proto_udp.c:	UDP load balancing support for IPVS
  *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *              Julian Anastasov <ja@ssi.bg>
  *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
  *              Network name space (netns) aware.
- *
  */
 
 #define KMSG_COMPONENT "IPVS"
@@ -23,12 +18,16 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/udp.h>
+#include <linux/indirect_call_wrapper.h>
 
 #include <net/ip_vs.h>
 #include <net/ip.h>
 #include <net/ip6_checksum.h>
 
 static int
+udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp);
+
+static int
 udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 		  struct ip_vs_proto_data *pd,
 		  int *verdict, struct ip_vs_conn **cpp,
@@ -133,14 +132,14 @@
 }
 
 
-static int
+INDIRECT_CALLABLE_SCOPE int
 udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
 {
 	struct udphdr *udph;
 	unsigned int udphoff = iph->len;
+	bool payload_csum = false;
 	int oldlen;
-	int payload_csum = 0;
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (cp->af == AF_INET6 && iph->fragoffs)
@@ -149,14 +148,14 @@
 	oldlen = skb->len - udphoff;
 
 	/* csum_check requires unshared skb */
-	if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
+	if (skb_ensure_writable(skb, udphoff + sizeof(*udph)))
 		return 0;
 
 	if (unlikely(cp->app != NULL)) {
 		int ret;
 
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+		if (!udp_csum_check(cp->af, skb, pp))
 			return 0;
 
 		/*
@@ -168,7 +167,7 @@
 		if (ret == 1)
 			oldlen = skb->len - udphoff;
 		else
-			payload_csum = 1;
+			payload_csum = true;
 	}
 
 	udph = (void *)skb_network_header(skb) + udphoff;
@@ -186,7 +185,7 @@
 		udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
 				     cp->dport, cp->vport);
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
-			skb->ip_summed = (cp->app && pp->csum_check) ?
+			skb->ip_summed = cp->app ?
 					 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
@@ -222,8 +221,8 @@
 {
 	struct udphdr *udph;
 	unsigned int udphoff = iph->len;
+	bool payload_csum = false;
 	int oldlen;
-	int payload_csum = 0;
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (cp->af == AF_INET6 && iph->fragoffs)
@@ -232,14 +231,14 @@
 	oldlen = skb->len - udphoff;
 
 	/* csum_check requires unshared skb */
-	if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
+	if (skb_ensure_writable(skb, udphoff + sizeof(*udph)))
 		return 0;
 
 	if (unlikely(cp->app != NULL)) {
 		int ret;
 
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+		if (!udp_csum_check(cp->af, skb, pp))
 			return 0;
 
 		/*
@@ -252,7 +251,7 @@
 		if (ret == 1)
 			oldlen = skb->len - udphoff;
 		else
-			payload_csum = 1;
+			payload_csum = true;
 	}
 
 	udph = (void *)skb_network_header(skb) + udphoff;
@@ -270,7 +269,7 @@
 		udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
 				     cp->vport, cp->dport);
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
-			skb->ip_summed = (cp->app && pp->csum_check) ?
+			skb->ip_summed = cp->app ?
 					 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
@@ -494,7 +493,6 @@
 	.conn_out_get =		ip_vs_conn_out_get_proto,
 	.snat_handler =		udp_snat_handler,
 	.dnat_handler =		udp_dnat_handler,
-	.csum_check =		udp_csum_check,
 	.state_transition =	udp_state_transition,
 	.state_name =		udp_state_name,
 	.register_app =		udp_register_app,

diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index ee0530d..38495c6 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * IPVS:        Round-Robin Scheduling module
  *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *              Peter Kese <peter.kese@ijs.si>
  *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * Fixes/Changes:
  *     Wensong Zhang            :     changed the ip_vs_rr_schedule to return dest
  *     Julian Anastasov         :     fixed the NULL pointer access bug in debugging
@@ -16,7 +12,6 @@
  *     Wensong Zhang            :     changed for the d-linked destination list
  *     Wensong Zhang            :     added the ip_vs_rr_update_svc
  *     Wensong Zhang            :     added any dest with weight=0 is quiesced
- *
  */
 
 #define KMSG_COMPONENT "IPVS"

diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index a2ff7d7..d490372 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * IPVS         An implementation of the IP virtual server support for the
  *              LINUX operating system.  IPVS is now implemented as a module
@@ -8,13 +9,7 @@
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *              Peter Kese <peter.kese@ijs.si>
  *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * Changes:
- *
  */
 
 #define KMSG_COMPONENT "IPVS"
@@ -184,7 +179,8 @@
 	}
 
 	/* increase the module use count */
-	ip_vs_use_count_inc();
+	if (!ip_vs_use_count_inc())
+		return -ENOENT;
 
 	mutex_lock(&ip_vs_sched_mutex);
 

diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index ab23cf2..7663288 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c

@@ -1,15 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * IPVS:        Shortest Expected Delay scheduling module
  *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * Changes:
- *
  */
 
 /*

diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 1e01c78..c2028e4 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c

@@ -1,15 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * IPVS:        Source Hashing scheduling module
  *
  * Authors:     Wensong Zhang <wensong@gnuchina.org>
  *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * Changes:
- *
  */
 
 /*

diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index d4020c5..8dc892a 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c

@@ -195,6 +195,7 @@
 #define IPVS_OPT_F_PARAM	(1 << (IPVS_OPT_PARAM-1))
 
 struct ip_vs_sync_thread_data {
+	struct task_struct *task;
 	struct netns_ipvs *ipvs;
 	struct socket *sock;
 	char *buf;
@@ -374,8 +375,11 @@
 					      max(IPVS_SYNC_SEND_DELAY, 1));
 		ms->sync_queue_len++;
 		list_add_tail(&sb->list, &ms->sync_queue);
-		if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE)
-			wake_up_process(ms->master_thread);
+		if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE) {
+			int id = (int)(ms - ipvs->ms);
+
+			wake_up_process(ipvs->master_tinfo[id].task);
+		}
 	} else
 		ip_vs_sync_buff_release(sb);
 	spin_unlock(&ipvs->sync_lock);
@@ -1616,7 +1620,7 @@
 	EnterFunction(7);
 
 	/* Receive a packet */
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, buflen);
+	iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, buflen);
 	len = sock_recvmsg(sock, &msg, MSG_DONTWAIT);
 	if (len < 0)
 		return len;
@@ -1636,8 +1640,10 @@
 	spin_lock_bh(&ipvs->sync_lock);
 	if (ms->sync_queue_len &&
 	    ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) {
+		int id = (int)(ms - ipvs->ms);
+
 		ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE;
-		wake_up_process(ms->master_thread);
+		wake_up_process(ipvs->master_tinfo[id].task);
 	}
 	spin_unlock_bh(&ipvs->sync_lock);
 }
@@ -1703,10 +1709,6 @@
 	if (sb)
 		ip_vs_sync_buff_release(sb);
 
-	/* release the sending multicast socket */
-	sock_release(tinfo->sock);
-	kfree(tinfo);
-
 	return 0;
 }
 
@@ -1740,11 +1742,6 @@
 		}
 	}
 
-	/* release the sending multicast socket */
-	sock_release(tinfo->sock);
-	kfree(tinfo->buf);
-	kfree(tinfo);
-
 	return 0;
 }
 
@@ -1752,8 +1749,8 @@
 int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
 		      int state)
 {
-	struct ip_vs_sync_thread_data *tinfo = NULL;
-	struct task_struct **array = NULL, *task;
+	struct ip_vs_sync_thread_data *ti = NULL, *tinfo;
+	struct task_struct *task;
 	struct net_device *dev;
 	char *name;
 	int (*threadfn)(void *data);
@@ -1765,6 +1762,10 @@
 	IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %zd bytes\n",
 		  sizeof(struct ip_vs_sync_conn_v0));
 
+	/* increase the module use count */
+	if (!ip_vs_use_count_inc())
+		return -ENOPROTOOPT;
+
 	/* Do not hold one mutex and then to block on another */
 	for (;;) {
 		rtnl_lock();
@@ -1822,7 +1823,7 @@
 		threadfn = sync_thread_master;
 	} else if (state == IP_VS_STATE_BACKUP) {
 		result = -EEXIST;
-		if (ipvs->backup_threads)
+		if (ipvs->backup_tinfo)
 			goto out_early;
 
 		ipvs->bcfg = *c;
@@ -1849,28 +1850,22 @@
 					  master_wakeup_work_handler);
 			ms->ipvs = ipvs;
 		}
-	} else {
-		array = kcalloc(count, sizeof(struct task_struct *),
-				GFP_KERNEL);
-		result = -ENOMEM;
-		if (!array)
-			goto out;
 	}
+	result = -ENOMEM;
+	ti = kcalloc(count, sizeof(struct ip_vs_sync_thread_data),
+		     GFP_KERNEL);
+	if (!ti)
+		goto out;
 
 	for (id = 0; id < count; id++) {
-		result = -ENOMEM;
-		tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
-		if (!tinfo)
-			goto out;
+		tinfo = &ti[id];
 		tinfo->ipvs = ipvs;
-		tinfo->sock = NULL;
 		if (state == IP_VS_STATE_BACKUP) {
+			result = -ENOMEM;
 			tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen,
 					     GFP_KERNEL);
 			if (!tinfo->buf)
 				goto out;
-		} else {
-			tinfo->buf = NULL;
 		}
 		tinfo->id = id;
 		if (state == IP_VS_STATE_MASTER)
@@ -1885,17 +1880,15 @@
 			result = PTR_ERR(task);
 			goto out;
 		}
-		tinfo = NULL;
-		if (state == IP_VS_STATE_MASTER)
-			ipvs->ms[id].master_thread = task;
-		else
-			array[id] = task;
+		tinfo->task = task;
 	}
 
 	/* mark as active */
 
-	if (state == IP_VS_STATE_BACKUP)
-		ipvs->backup_threads = array;
+	if (state == IP_VS_STATE_MASTER)
+		ipvs->master_tinfo = ti;
+	else
+		ipvs->backup_tinfo = ti;
 	spin_lock_bh(&ipvs->sync_buff_lock);
 	ipvs->sync_state |= state;
 	spin_unlock_bh(&ipvs->sync_buff_lock);
@@ -1903,56 +1896,64 @@
 	mutex_unlock(&ipvs->sync_mutex);
 	rtnl_unlock();
 
-	/* increase the module use count */
-	ip_vs_use_count_inc();
-
 	return 0;
 
 out:
 	/* We do not need RTNL lock anymore, release it here so that
-	 * sock_release below and in the kthreads can use rtnl_lock
-	 * to leave the mcast group.
+	 * sock_release below can use rtnl_lock to leave the mcast group.
 	 */
 	rtnl_unlock();
-	count = id;
-	while (count-- > 0) {
-		if (state == IP_VS_STATE_MASTER)
-			kthread_stop(ipvs->ms[count].master_thread);
-		else
-			kthread_stop(array[count]);
+	id = min(id, count - 1);
+	if (ti) {
+		for (tinfo = ti + id; tinfo >= ti; tinfo--) {
+			if (tinfo->task)
+				kthread_stop(tinfo->task);
+		}
 	}
 	if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
 		kfree(ipvs->ms);
 		ipvs->ms = NULL;
 	}
 	mutex_unlock(&ipvs->sync_mutex);
-	if (tinfo) {
-		if (tinfo->sock)
-			sock_release(tinfo->sock);
-		kfree(tinfo->buf);
-		kfree(tinfo);
+
+	/* No more mutexes, release socks */
+	if (ti) {
+		for (tinfo = ti + id; tinfo >= ti; tinfo--) {
+			if (tinfo->sock)
+				sock_release(tinfo->sock);
+			kfree(tinfo->buf);
+		}
+		kfree(ti);
 	}
-	kfree(array);
+
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
 	return result;
 
 out_early:
 	mutex_unlock(&ipvs->sync_mutex);
 	rtnl_unlock();
+
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
 	return result;
 }
 
 
 int stop_sync_thread(struct netns_ipvs *ipvs, int state)
 {
-	struct task_struct **array;
+	struct ip_vs_sync_thread_data *ti, *tinfo;
 	int id;
 	int retc = -EINVAL;
 
 	IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
 
+	mutex_lock(&ipvs->sync_mutex);
 	if (state == IP_VS_STATE_MASTER) {
+		retc = -ESRCH;
 		if (!ipvs->ms)
-			return -ESRCH;
+			goto err;
+		ti = ipvs->master_tinfo;
 
 		/*
 		 * The lock synchronizes with sb_queue_tail(), so that we don't
@@ -1971,38 +1972,56 @@
 			struct ipvs_master_sync_state *ms = &ipvs->ms[id];
 			int ret;
 
+			tinfo = &ti[id];
 			pr_info("stopping master sync thread %d ...\n",
-				task_pid_nr(ms->master_thread));
+				task_pid_nr(tinfo->task));
 			cancel_delayed_work_sync(&ms->master_wakeup_work);
-			ret = kthread_stop(ms->master_thread);
+			ret = kthread_stop(tinfo->task);
 			if (retc >= 0)
 				retc = ret;
 		}
 		kfree(ipvs->ms);
 		ipvs->ms = NULL;
+		ipvs->master_tinfo = NULL;
 	} else if (state == IP_VS_STATE_BACKUP) {
-		if (!ipvs->backup_threads)
-			return -ESRCH;
+		retc = -ESRCH;
+		if (!ipvs->backup_tinfo)
+			goto err;
+		ti = ipvs->backup_tinfo;
 
 		ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
-		array = ipvs->backup_threads;
 		retc = 0;
 		for (id = ipvs->threads_mask; id >= 0; id--) {
 			int ret;
 
+			tinfo = &ti[id];
 			pr_info("stopping backup sync thread %d ...\n",
-				task_pid_nr(array[id]));
-			ret = kthread_stop(array[id]);
+				task_pid_nr(tinfo->task));
+			ret = kthread_stop(tinfo->task);
 			if (retc >= 0)
 				retc = ret;
 		}
-		kfree(array);
-		ipvs->backup_threads = NULL;
+		ipvs->backup_tinfo = NULL;
+	} else {
+		goto err;
 	}
+	id = ipvs->threads_mask;
+	mutex_unlock(&ipvs->sync_mutex);
+
+	/* No more mutexes, release socks */
+	for (tinfo = ti + id; tinfo >= ti; tinfo--) {
+		if (tinfo->sock)
+			sock_release(tinfo->sock);
+		kfree(tinfo->buf);
+	}
+	kfree(ti);
 
 	/* decrease the module use count */
 	ip_vs_use_count_dec();
+	return retc;
 
+err:
+	mutex_unlock(&ipvs->sync_mutex);
 	return retc;
 }
 
@@ -2021,7 +2040,6 @@
 {
 	int retc;
 
-	mutex_lock(&ipvs->sync_mutex);
 	retc = stop_sync_thread(ipvs, IP_VS_STATE_MASTER);
 	if (retc && retc != -ESRCH)
 		pr_err("Failed to stop Master Daemon\n");
@@ -2029,5 +2047,4 @@
 	retc = stop_sync_thread(ipvs, IP_VS_STATE_BACKUP);
 	if (retc && retc != -ESRCH)
 		pr_err("Failed to stop Backup Daemon\n");
-	mutex_unlock(&ipvs->sync_mutex);
 }

diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index 6add39e..09f584b 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * IPVS:        Weighted Least-Connection Scheduling module
  *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *              Peter Kese <peter.kese@ijs.si>
  *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * Changes:
  *     Wensong Zhang            :     changed the ip_vs_wlc_schedule to return dest
  *     Wensong Zhang            :     changed to use the inactconns in scheduling
@@ -16,7 +12,6 @@
  *     Wensong Zhang            :     changed for the d-linked destination list
  *     Wensong Zhang            :     added the ip_vs_wlc_update_svc
  *     Wensong Zhang            :     added any dest with weight=0 is quiesced
- *
  */
 
 #define KMSG_COMPONENT "IPVS"

diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 62258dd..1bc7a07 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * IPVS:        Weighted Round-Robin Scheduling module
  *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * Changes:
  *     Wensong Zhang            :     changed the ip_vs_wrr_schedule to return dest
  *     Wensong Zhang            :     changed some comestics things for debugging
@@ -15,7 +11,6 @@
  *     Wensong Zhang            :     added the ip_vs_wrr_update_svc
  *     Julian Anastasov         :     fixed the bug of returning destination
  *                                    with weight 0 when all weights are zero
- *
  */
 
 #define KMSG_COMPONENT "IPVS"

diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 473cce2..888d306 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * ip_vs_xmit.c: various packet transmitters for IPVS
  *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *              Julian Anastasov <ja@ssi.bg>
  *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * Changes:
  *
  * Description of forwarding methods:
@@ -32,6 +28,8 @@
 #include <linux/slab.h>
 #include <linux/tcp.h>                  /* for tcphdr */
 #include <net/ip.h>
+#include <net/gue.h>
+#include <net/gre.h>
 #include <net/tcp.h>                    /* for csum_tcpudp_magic */
 #include <net/udp.h>
 #include <net/icmp.h>                   /* for icmp_send */
@@ -39,6 +37,7 @@
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
 #include <net/ip_tunnels.h>
+#include <net/ip6_checksum.h>
 #include <net/addrconf.h>
 #include <linux/icmpv6.h>
 #include <linux/netfilter.h>
@@ -126,7 +125,7 @@
 {
 	struct flowi4 fl4;
 	struct rtable *rt;
-	int loop = 0;
+	bool loop = false;
 
 	memset(&fl4, 0, sizeof(fl4));
 	fl4.daddr = daddr;
@@ -149,7 +148,7 @@
 		ip_rt_put(rt);
 		*saddr = fl4.saddr;
 		flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr);
-		loop++;
+		loop = true;
 		goto retry;
 	}
 	*saddr = fl4.saddr;
@@ -278,7 +277,7 @@
 		}
 
 		/* don't propagate ttl change to cloned packets */
-		if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
+		if (skb_ensure_writable(skb, sizeof(struct ipv6hdr)))
 			return false;
 
 		ipv6_hdr(skb)->hop_limit--;
@@ -293,7 +292,7 @@
 		}
 
 		/* don't propagate ttl change to cloned packets */
-		if (!skb_make_writable(skb, sizeof(struct iphdr)))
+		if (skb_ensure_writable(skb, sizeof(struct iphdr)))
 			return false;
 
 		/* Decrease ttl */
@@ -382,6 +381,21 @@
 		mtu = dst_mtu(&rt->dst);
 	} else {
 		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
+		if (!dest)
+			goto err_put;
+		if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+			mtu -= sizeof(struct udphdr) + sizeof(struct guehdr);
+			if ((dest->tun_flags &
+			     IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+			    skb->ip_summed == CHECKSUM_PARTIAL)
+				mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+		} else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+			__be16 tflags = 0;
+
+			if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+				tflags |= TUNNEL_CSUM;
+			mtu -= gre_calc_hlen(tflags);
+		}
 		if (mtu < 68) {
 			IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
 			goto err_put;
@@ -533,6 +547,21 @@
 		mtu = dst_mtu(&rt->dst);
 	else {
 		mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
+		if (!dest)
+			goto err_put;
+		if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+			mtu -= sizeof(struct udphdr) + sizeof(struct guehdr);
+			if ((dest->tun_flags &
+			     IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+			    skb->ip_summed == CHECKSUM_PARTIAL)
+				mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+		} else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+			__be16 tflags = 0;
+
+			if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+				tflags |= TUNNEL_CSUM;
+			mtu -= gre_calc_hlen(tflags);
+		}
 		if (mtu < IPV6_MIN_MTU) {
 			IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
 				     IPV6_MIN_MTU);
@@ -584,7 +613,7 @@
 	if (unlikely(cp->flags & IP_VS_CONN_F_NFCT))
 		ret = ip_vs_confirm_conntrack(skb);
 	if (ret == NF_ACCEPT) {
-		nf_reset(skb);
+		nf_reset_ct(skb);
 		skb_forward_csum(skb);
 	}
 	return ret;
@@ -787,7 +816,7 @@
 	}
 
 	/* copy-on-write the packet before mangling it */
-	if (!skb_make_writable(skb, sizeof(struct iphdr)))
+	if (skb_ensure_writable(skb, sizeof(struct iphdr)))
 		goto tx_error;
 
 	if (skb_cow(skb, rt->dst.dev->hard_header_len))
@@ -876,7 +905,7 @@
 	}
 
 	/* copy-on-write the packet before mangling it */
-	if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
+	if (skb_ensure_writable(skb, sizeof(struct ipv6hdr)))
 		goto tx_error;
 
 	if (skb_cow(skb, rt->dst.dev->hard_header_len))
@@ -989,6 +1018,98 @@
 	}
 }
 
+static int
+ipvs_gue_encap(struct net *net, struct sk_buff *skb,
+	       struct ip_vs_conn *cp, __u8 *next_protocol)
+{
+	__be16 dport;
+	__be16 sport = udp_flow_src_port(net, skb, 0, 0, false);
+	struct udphdr  *udph;	/* Our new UDP header */
+	struct guehdr  *gueh;	/* Our new GUE header */
+	size_t hdrlen, optlen = 0;
+	void *data;
+	bool need_priv = false;
+
+	if ((cp->dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+	    skb->ip_summed == CHECKSUM_PARTIAL) {
+		optlen += GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+		need_priv = true;
+	}
+
+	hdrlen = sizeof(struct guehdr) + optlen;
+
+	skb_push(skb, hdrlen);
+
+	gueh = (struct guehdr *)skb->data;
+
+	gueh->control = 0;
+	gueh->version = 0;
+	gueh->hlen = optlen >> 2;
+	gueh->flags = 0;
+	gueh->proto_ctype = *next_protocol;
+
+	data = &gueh[1];
+
+	if (need_priv) {
+		__be32 *flags = data;
+		u16 csum_start = skb_checksum_start_offset(skb);
+		__be16 *pd;
+
+		gueh->flags |= GUE_FLAG_PRIV;
+		*flags = 0;
+		data += GUE_LEN_PRIV;
+
+		if (csum_start < hdrlen)
+			return -EINVAL;
+
+		csum_start -= hdrlen;
+		pd = data;
+		pd[0] = htons(csum_start);
+		pd[1] = htons(csum_start + skb->csum_offset);
+
+		if (!skb_is_gso(skb)) {
+			skb->ip_summed = CHECKSUM_NONE;
+			skb->encapsulation = 0;
+		}
+
+		*flags |= GUE_PFLAG_REMCSUM;
+		data += GUE_PLEN_REMCSUM;
+	}
+
+	skb_push(skb, sizeof(struct udphdr));
+	skb_reset_transport_header(skb);
+
+	udph = udp_hdr(skb);
+
+	dport = cp->dest->tun_port;
+	udph->dest = dport;
+	udph->source = sport;
+	udph->len = htons(skb->len);
+	udph->check = 0;
+
+	*next_protocol = IPPROTO_UDP;
+
+	return 0;
+}
+
+static void
+ipvs_gre_encap(struct net *net, struct sk_buff *skb,
+	       struct ip_vs_conn *cp, __u8 *next_protocol)
+{
+	__be16 proto = *next_protocol == IPPROTO_IPIP ?
+				htons(ETH_P_IP) : htons(ETH_P_IPV6);
+	__be16 tflags = 0;
+	size_t hdrlen;
+
+	if (cp->dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+		tflags |= TUNNEL_CSUM;
+
+	hdrlen = gre_calc_hlen(tflags);
+	gre_build_header(skb, hdrlen, tflags, proto, 0, 0);
+
+	*next_protocol = IPPROTO_GRE;
+}
+
 /*
  *   IP Tunneling transmitter
  *
@@ -1025,6 +1146,8 @@
 	struct iphdr  *iph;			/* Our new IP header */
 	unsigned int max_headroom;		/* The extra header space needed */
 	int ret, local;
+	int tun_type, gso_type;
+	int tun_flags;
 
 	EnterFunction(10);
 
@@ -1046,6 +1169,30 @@
 	 */
 	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
 
+	tun_type = cp->dest->tun_type;
+	tun_flags = cp->dest->tun_flags;
+
+	if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+		size_t gue_hdrlen, gue_optlen = 0;
+
+		if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+		    skb->ip_summed == CHECKSUM_PARTIAL) {
+			gue_optlen += GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+		}
+		gue_hdrlen = sizeof(struct guehdr) + gue_optlen;
+
+		max_headroom += sizeof(struct udphdr) + gue_hdrlen;
+	} else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+		size_t gre_hdrlen;
+		__be16 tflags = 0;
+
+		if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+			tflags |= TUNNEL_CSUM;
+		gre_hdrlen = gre_calc_hlen(tflags);
+
+		max_headroom += gre_hdrlen;
+	}
+
 	/* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */
 	dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL;
 	skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
@@ -1054,11 +1201,45 @@
 	if (IS_ERR(skb))
 		goto tx_error;
 
-	if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af)))
+	gso_type = __tun_gso_type_mask(AF_INET, cp->af);
+	if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+		if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
+		    (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
+			gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+		else
+			gso_type |= SKB_GSO_UDP_TUNNEL;
+		if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+		    skb->ip_summed == CHECKSUM_PARTIAL) {
+			gso_type |= SKB_GSO_TUNNEL_REMCSUM;
+		}
+	} else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+		if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+			gso_type |= SKB_GSO_GRE_CSUM;
+		else
+			gso_type |= SKB_GSO_GRE;
+	}
+
+	if (iptunnel_handle_offloads(skb, gso_type))
 		goto tx_error;
 
 	skb->transport_header = skb->network_header;
 
+	skb_set_inner_ipproto(skb, next_protocol);
+
+	if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+		bool check = false;
+
+		if (ipvs_gue_encap(net, skb, cp, &next_protocol))
+			goto tx_error;
+
+		if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
+		    (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
+			check = true;
+
+		udp_set_csum(!check, skb, saddr, cp->daddr.ip, skb->len);
+	} else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE)
+		ipvs_gre_encap(net, skb, cp, &next_protocol);
+
 	skb_push(skb, sizeof(struct iphdr));
 	skb_reset_network_header(skb);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -1102,6 +1283,8 @@
 ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		     struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
 {
+	struct netns_ipvs *ipvs = cp->ipvs;
+	struct net *net = ipvs->net;
 	struct rt6_info *rt;		/* Route to the other host */
 	struct in6_addr saddr;		/* Source for tunnel */
 	struct net_device *tdev;	/* Device to other host */
@@ -1112,10 +1295,12 @@
 	struct ipv6hdr  *iph;		/* Our new IP header */
 	unsigned int max_headroom;	/* The extra header space needed */
 	int ret, local;
+	int tun_type, gso_type;
+	int tun_flags;
 
 	EnterFunction(10);
 
-	local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
+	local = __ip_vs_get_out_rt_v6(ipvs, cp->af, skb, cp->dest,
 				      &cp->daddr.in6,
 				      &saddr, ipvsh, 1,
 				      IP_VS_RT_MODE_LOCAL |
@@ -1134,17 +1319,75 @@
 	 */
 	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
 
+	tun_type = cp->dest->tun_type;
+	tun_flags = cp->dest->tun_flags;
+
+	if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+		size_t gue_hdrlen, gue_optlen = 0;
+
+		if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+		    skb->ip_summed == CHECKSUM_PARTIAL) {
+			gue_optlen += GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+		}
+		gue_hdrlen = sizeof(struct guehdr) + gue_optlen;
+
+		max_headroom += sizeof(struct udphdr) + gue_hdrlen;
+	} else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+		size_t gre_hdrlen;
+		__be16 tflags = 0;
+
+		if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+			tflags |= TUNNEL_CSUM;
+		gre_hdrlen = gre_calc_hlen(tflags);
+
+		max_headroom += gre_hdrlen;
+	}
+
 	skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
 					 &next_protocol, &payload_len,
 					 &dsfield, &ttl, NULL);
 	if (IS_ERR(skb))
 		goto tx_error;
 
-	if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af)))
+	gso_type = __tun_gso_type_mask(AF_INET6, cp->af);
+	if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+		if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
+		    (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
+			gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+		else
+			gso_type |= SKB_GSO_UDP_TUNNEL;
+		if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+		    skb->ip_summed == CHECKSUM_PARTIAL) {
+			gso_type |= SKB_GSO_TUNNEL_REMCSUM;
+		}
+	} else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+		if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+			gso_type |= SKB_GSO_GRE_CSUM;
+		else
+			gso_type |= SKB_GSO_GRE;
+	}
+
+	if (iptunnel_handle_offloads(skb, gso_type))
 		goto tx_error;
 
 	skb->transport_header = skb->network_header;
 
+	skb_set_inner_ipproto(skb, next_protocol);
+
+	if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+		bool check = false;
+
+		if (ipvs_gue_encap(net, skb, cp, &next_protocol))
+			goto tx_error;
+
+		if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
+		    (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
+			check = true;
+
+		udp6_set_csum(!check, skb, &saddr, &cp->daddr.in6, skb->len);
+	} else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE)
+		ipvs_gre_encap(net, skb, cp, &next_protocol);
+
 	skb_push(skb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(skb);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -1167,7 +1410,7 @@
 
 	ret = ip_vs_tunnel_xmit_prepare(skb, cp);
 	if (ret == NF_ACCEPT)
-		ip6_local_out(cp->ipvs->net, skb->sk, skb);
+		ip6_local_out(net, skb->sk, skb);
 	else if (ret == NF_DROP)
 		kfree_skb(skb);
 
@@ -1328,7 +1571,7 @@
 	}
 
 	/* copy-on-write the packet before mangling it */
-	if (!skb_make_writable(skb, offset))
+	if (skb_ensure_writable(skb, offset))
 		goto tx_error;
 
 	if (skb_cow(skb, rt->dst.dev->hard_header_len))
@@ -1417,7 +1660,7 @@
 	}
 
 	/* copy-on-write the packet before mangling it */
-	if (!skb_make_writable(skb, offset))
+	if (skb_ensure_writable(skb, offset))
 		goto tx_error;
 
 	if (skb_cow(skb, rt->dst.dev->hard_header_len))

diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index b6d0f6d..82f36be 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * count the number of connections matching an arbitrary key.
  *
@@ -33,12 +34,6 @@
 
 #define CONNCOUNT_SLOTS		256U
 
-#ifdef CONFIG_LOCKDEP
-#define CONNCOUNT_LOCK_SLOTS	8U
-#else
-#define CONNCOUNT_LOCK_SLOTS	256U
-#endif
-
 #define CONNCOUNT_GC_MAX_NODES	8
 #define MAX_KEYLEN		5
 
@@ -49,8 +44,6 @@
 	struct nf_conntrack_zone	zone;
 	int				cpu;
 	u32				jiffies32;
-	bool				dead;
-	struct rcu_head			rcu_head;
 };
 
 struct nf_conncount_rb {
@@ -60,7 +53,7 @@
 	struct rcu_head rcu_head;
 };
 
-static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp;
+static spinlock_t nf_conncount_locks[CONNCOUNT_SLOTS] __cacheline_aligned_in_smp;
 
 struct nf_conncount_data {
 	unsigned int keylen;
@@ -89,79 +82,25 @@
 	return memcmp(a, b, klen * sizeof(u32));
 }
 
-enum nf_conncount_list_add
-nf_conncount_add(struct nf_conncount_list *list,
-		 const struct nf_conntrack_tuple *tuple,
-		 const struct nf_conntrack_zone *zone)
-{
-	struct nf_conncount_tuple *conn;
-
-	if (WARN_ON_ONCE(list->count > INT_MAX))
-		return NF_CONNCOUNT_ERR;
-
-	conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
-	if (conn == NULL)
-		return NF_CONNCOUNT_ERR;
-
-	conn->tuple = *tuple;
-	conn->zone = *zone;
-	conn->cpu = raw_smp_processor_id();
-	conn->jiffies32 = (u32)jiffies;
-	conn->dead = false;
-	spin_lock_bh(&list->list_lock);
-	if (list->dead == true) {
-		kmem_cache_free(conncount_conn_cachep, conn);
-		spin_unlock_bh(&list->list_lock);
-		return NF_CONNCOUNT_SKIP;
-	}
-	list_add_tail(&conn->node, &list->head);
-	list->count++;
-	spin_unlock_bh(&list->list_lock);
-	return NF_CONNCOUNT_ADDED;
-}
-EXPORT_SYMBOL_GPL(nf_conncount_add);
-
-static void __conn_free(struct rcu_head *h)
-{
-	struct nf_conncount_tuple *conn;
-
-	conn = container_of(h, struct nf_conncount_tuple, rcu_head);
-	kmem_cache_free(conncount_conn_cachep, conn);
-}
-
-static bool conn_free(struct nf_conncount_list *list,
+static void conn_free(struct nf_conncount_list *list,
 		      struct nf_conncount_tuple *conn)
 {
-	bool free_entry = false;
-
-	spin_lock_bh(&list->list_lock);
-
-	if (conn->dead) {
-		spin_unlock_bh(&list->list_lock);
-		return free_entry;
-	}
+	lockdep_assert_held(&list->list_lock);
 
 	list->count--;
-	conn->dead = true;
-	list_del_rcu(&conn->node);
-	if (list->count == 0) {
-		list->dead = true;
-		free_entry = true;
-	}
+	list_del(&conn->node);
 
-	spin_unlock_bh(&list->list_lock);
-	call_rcu(&conn->rcu_head, __conn_free);
-	return free_entry;
+	kmem_cache_free(conncount_conn_cachep, conn);
 }
 
 static const struct nf_conntrack_tuple_hash *
 find_or_evict(struct net *net, struct nf_conncount_list *list,
-	      struct nf_conncount_tuple *conn, bool *free_entry)
+	      struct nf_conncount_tuple *conn)
 {
 	const struct nf_conntrack_tuple_hash *found;
 	unsigned long a, b;
 	int cpu = raw_smp_processor_id();
-	__s32 age;
+	u32 age;
 
 	found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple);
 	if (found)
@@ -176,52 +115,45 @@
 	 */
 	age = a - b;
 	if (conn->cpu == cpu || age >= 2) {
-		*free_entry = conn_free(list, conn);
+		conn_free(list, conn);
 		return ERR_PTR(-ENOENT);
 	}
 
 	return ERR_PTR(-EAGAIN);
 }
 
-void nf_conncount_lookup(struct net *net,
-			 struct nf_conncount_list *list,
-			 const struct nf_conntrack_tuple *tuple,
-			 const struct nf_conntrack_zone *zone,
-			 bool *addit)
+static int __nf_conncount_add(struct net *net,
+			      struct nf_conncount_list *list,
+			      const struct nf_conntrack_tuple *tuple,
+			      const struct nf_conntrack_zone *zone)
 {
 	const struct nf_conntrack_tuple_hash *found;
 	struct nf_conncount_tuple *conn, *conn_n;
 	struct nf_conn *found_ct;
 	unsigned int collect = 0;
-	bool free_entry = false;
-
-	/* best effort only */
-	*addit = tuple ? true : false;
 
 	/* check the saved connections */
 	list_for_each_entry_safe(conn, conn_n, &list->head, node) {
 		if (collect > CONNCOUNT_GC_MAX_NODES)
 			break;
 
-		found = find_or_evict(net, list, conn, &free_entry);
+		found = find_or_evict(net, list, conn);
 		if (IS_ERR(found)) {
 			/* Not found, but might be about to be confirmed */
 			if (PTR_ERR(found) == -EAGAIN) {
-				if (!tuple)
-					continue;
-
 				if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
 				    nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
 				    nf_ct_zone_id(zone, zone->dir))
-					*addit = false;
-			} else if (PTR_ERR(found) == -ENOENT)
+					return 0; /* already exists */
+			} else {
 				collect++;
+			}
 			continue;
 		}
 
 		found_ct = nf_ct_tuplehash_to_ctrack(found);
 
-		if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple) &&
+		if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
 		    nf_ct_zone_equal(found_ct, zone, zone->dir)) {
 			/*
 			 * We should not see tuples twice unless someone hooks
@@ -229,7 +161,8 @@
 			 *
 			 * Attempt to avoid a re-add in this case.
 			 */
-			*addit = false;
+			nf_ct_put(found_ct);
+			return 0;
 		} else if (already_closed(found_ct)) {
 			/*
 			 * we do not care about connections which are
@@ -243,19 +176,48 @@
 
 		nf_ct_put(found_ct);
 	}
+
+	if (WARN_ON_ONCE(list->count > INT_MAX))
+		return -EOVERFLOW;
+
+	conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
+	if (conn == NULL)
+		return -ENOMEM;
+
+	conn->tuple = *tuple;
+	conn->zone = *zone;
+	conn->cpu = raw_smp_processor_id();
+	conn->jiffies32 = (u32)jiffies;
+	list_add_tail(&conn->node, &list->head);
+	list->count++;
+	return 0;
 }
-EXPORT_SYMBOL_GPL(nf_conncount_lookup);
+
+int nf_conncount_add(struct net *net,
+		     struct nf_conncount_list *list,
+		     const struct nf_conntrack_tuple *tuple,
+		     const struct nf_conntrack_zone *zone)
+{
+	int ret;
+
+	/* check the saved connections */
+	spin_lock_bh(&list->list_lock);
+	ret = __nf_conncount_add(net, list, tuple, zone);
+	spin_unlock_bh(&list->list_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_conncount_add);
 
 void nf_conncount_list_init(struct nf_conncount_list *list)
 {
 	spin_lock_init(&list->list_lock);
 	INIT_LIST_HEAD(&list->head);
 	list->count = 0;
-	list->dead = false;
 }
 EXPORT_SYMBOL_GPL(nf_conncount_list_init);
 
-/* Return true if the list is empty */
+/* Return true if the list is empty. Must be called with BH disabled. */
 bool nf_conncount_gc_list(struct net *net,
 			  struct nf_conncount_list *list)
 {
@@ -263,17 +225,17 @@
 	struct nf_conncount_tuple *conn, *conn_n;
 	struct nf_conn *found_ct;
 	unsigned int collected = 0;
-	bool free_entry = false;
 	bool ret = false;
 
+	/* don't bother if other cpu is already doing GC */
+	if (!spin_trylock(&list->list_lock))
+		return false;
+
 	list_for_each_entry_safe(conn, conn_n, &list->head, node) {
-		found = find_or_evict(net, list, conn, &free_entry);
+		found = find_or_evict(net, list, conn);
 		if (IS_ERR(found)) {
-			if (PTR_ERR(found) == -ENOENT)  {
-				if (free_entry)
-					return true;
+			if (PTR_ERR(found) == -ENOENT)
 				collected++;
-			}
 			continue;
 		}
 
@@ -284,23 +246,19 @@
 			 * closed already -> ditch it
 			 */
 			nf_ct_put(found_ct);
-			if (conn_free(list, conn))
-				return true;
+			conn_free(list, conn);
 			collected++;
 			continue;
 		}
 
 		nf_ct_put(found_ct);
 		if (collected > CONNCOUNT_GC_MAX_NODES)
-			return false;
+			break;
 	}
 
-	spin_lock_bh(&list->list_lock);
-	if (!list->count) {
-		list->dead = true;
+	if (!list->count)
 		ret = true;
-	}
-	spin_unlock_bh(&list->list_lock);
+	spin_unlock(&list->list_lock);
 
 	return ret;
 }
@@ -314,6 +272,7 @@
 	kmem_cache_free(conncount_rb_cachep, rbconn);
 }
 
+/* caller must hold tree nf_conncount_locks[] lock */
 static void tree_nodes_free(struct rb_root *root,
 			    struct nf_conncount_rb *gc_nodes[],
 			    unsigned int gc_count)
@@ -323,8 +282,10 @@
 	while (gc_count) {
 		rbconn = gc_nodes[--gc_count];
 		spin_lock(&rbconn->list.list_lock);
-		rb_erase(&rbconn->node, root);
-		call_rcu(&rbconn->rcu_head, __tree_nodes_free);
+		if (!rbconn->list.count) {
+			rb_erase(&rbconn->node, root);
+			call_rcu(&rbconn->rcu_head, __tree_nodes_free);
+		}
 		spin_unlock(&rbconn->list.list_lock);
 	}
 }
@@ -341,20 +302,19 @@
 	    struct rb_root *root,
 	    unsigned int hash,
 	    const u32 *key,
-	    u8 keylen,
 	    const struct nf_conntrack_tuple *tuple,
 	    const struct nf_conntrack_zone *zone)
 {
-	enum nf_conncount_list_add ret;
 	struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
 	struct rb_node **rbnode, *parent;
 	struct nf_conncount_rb *rbconn;
 	struct nf_conncount_tuple *conn;
 	unsigned int count = 0, gc_count = 0;
-	bool node_found = false;
+	u8 keylen = data->keylen;
+	bool do_gc = true;
 
-	spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
-
+	spin_lock_bh(&nf_conncount_locks[hash]);
+restart:
 	parent = NULL;
 	rbnode = &(root->rb_node);
 	while (*rbnode) {
@@ -368,45 +328,32 @@
 		} else if (diff > 0) {
 			rbnode = &((*rbnode)->rb_right);
 		} else {
-			/* unlikely: other cpu added node already */
-			node_found = true;
-			ret = nf_conncount_add(&rbconn->list, tuple, zone);
-			if (ret == NF_CONNCOUNT_ERR) {
+			int ret;
+
+			ret = nf_conncount_add(net, &rbconn->list, tuple, zone);
+			if (ret)
 				count = 0; /* hotdrop */
-			} else if (ret == NF_CONNCOUNT_ADDED) {
+			else
 				count = rbconn->list.count;
-			} else {
-				/* NF_CONNCOUNT_SKIP, rbconn is already
-				 * reclaimed by gc, insert a new tree node
-				 */
-				node_found = false;
-			}
-			break;
+			tree_nodes_free(root, gc_nodes, gc_count);
+			goto out_unlock;
 		}
 
 		if (gc_count >= ARRAY_SIZE(gc_nodes))
 			continue;
 
-		if (nf_conncount_gc_list(net, &rbconn->list))
+		if (do_gc && nf_conncount_gc_list(net, &rbconn->list))
 			gc_nodes[gc_count++] = rbconn;
 	}
 
 	if (gc_count) {
 		tree_nodes_free(root, gc_nodes, gc_count);
-		/* tree_node_free before new allocation permits
-		 * allocator to re-use newly free'd object.
-		 *
-		 * This is a rare event; in most cases we will find
-		 * existing node to re-use. (or gc_count is 0).
-		 */
-
-		if (gc_count >= ARRAY_SIZE(gc_nodes))
-			schedule_gc_worker(data, hash);
+		schedule_gc_worker(data, hash);
+		gc_count = 0;
+		do_gc = false;
+		goto restart;
 	}
 
-	if (node_found)
-		goto out_unlock;
-
 	/* expected case: match, insert new node */
 	rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
 	if (rbconn == NULL)
@@ -427,10 +374,10 @@
 	count = 1;
 	rbconn->list.count = count;
 
-	rb_link_node(&rbconn->node, parent, rbnode);
+	rb_link_node_rcu(&rbconn->node, parent, rbnode);
 	rb_insert_color(&rbconn->node, root);
 out_unlock:
-	spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
+	spin_unlock_bh(&nf_conncount_locks[hash]);
 	return count;
 }
 
@@ -441,7 +388,6 @@
 	   const struct nf_conntrack_tuple *tuple,
 	   const struct nf_conntrack_zone *zone)
 {
-	enum nf_conncount_list_add ret;
 	struct rb_root *root;
 	struct rb_node *parent;
 	struct nf_conncount_rb *rbconn;
@@ -454,7 +400,6 @@
 	parent = rcu_dereference_raw(root->rb_node);
 	while (parent) {
 		int diff;
-		bool addit;
 
 		rbconn = rb_entry(parent, struct nf_conncount_rb, node);
 
@@ -464,31 +409,36 @@
 		} else if (diff > 0) {
 			parent = rcu_dereference_raw(parent->rb_right);
 		} else {
-			/* same source network -> be counted! */
-			nf_conncount_lookup(net, &rbconn->list, tuple, zone,
-					    &addit);
+			int ret;
 
-			if (!addit)
+			if (!tuple) {
+				nf_conncount_gc_list(net, &rbconn->list);
 				return rbconn->list.count;
+			}
 
-			ret = nf_conncount_add(&rbconn->list, tuple, zone);
-			if (ret == NF_CONNCOUNT_ERR) {
-				return 0; /* hotdrop */
-			} else if (ret == NF_CONNCOUNT_ADDED) {
-				return rbconn->list.count;
-			} else {
-				/* NF_CONNCOUNT_SKIP, rbconn is already
-				 * reclaimed by gc, insert a new tree node
-				 */
+			spin_lock_bh(&rbconn->list.list_lock);
+			/* Node might be about to be free'd.
+			 * We need to defer to insert_tree() in this case.
+			 */
+			if (rbconn->list.count == 0) {
+				spin_unlock_bh(&rbconn->list.list_lock);
 				break;
 			}
+
+			/* same source network -> be counted! */
+			ret = __nf_conncount_add(net, &rbconn->list, tuple, zone);
+			spin_unlock_bh(&rbconn->list.list_lock);
+			if (ret)
+				return 0; /* hotdrop */
+			else
+				return rbconn->list.count;
 		}
 	}
 
 	if (!tuple)
 		return 0;
 
-	return insert_tree(net, data, root, hash, key, keylen, tuple, zone);
+	return insert_tree(net, data, root, hash, key, tuple, zone);
 }
 
 static void tree_gc_worker(struct work_struct *work)
@@ -499,27 +449,47 @@
 	struct rb_node *node;
 	unsigned int tree, next_tree, gc_count = 0;
 
-	tree = data->gc_tree % CONNCOUNT_LOCK_SLOTS;
+	tree = data->gc_tree % CONNCOUNT_SLOTS;
 	root = &data->root[tree];
 
+	local_bh_disable();
 	rcu_read_lock();
 	for (node = rb_first(root); node != NULL; node = rb_next(node)) {
 		rbconn = rb_entry(node, struct nf_conncount_rb, node);
 		if (nf_conncount_gc_list(data->net, &rbconn->list))
-			gc_nodes[gc_count++] = rbconn;
+			gc_count++;
 	}
 	rcu_read_unlock();
+	local_bh_enable();
+
+	cond_resched();
 
 	spin_lock_bh(&nf_conncount_locks[tree]);
+	if (gc_count < ARRAY_SIZE(gc_nodes))
+		goto next; /* do not bother */
 
-	if (gc_count) {
-		tree_nodes_free(root, gc_nodes, gc_count);
+	gc_count = 0;
+	node = rb_first(root);
+	while (node != NULL) {
+		rbconn = rb_entry(node, struct nf_conncount_rb, node);
+		node = rb_next(node);
+
+		if (rbconn->list.count > 0)
+			continue;
+
+		gc_nodes[gc_count++] = rbconn;
+		if (gc_count >= ARRAY_SIZE(gc_nodes)) {
+			tree_nodes_free(root, gc_nodes, gc_count);
+			gc_count = 0;
+		}
 	}
 
+	tree_nodes_free(root, gc_nodes, gc_count);
+next:
 	clear_bit(tree, data->pending_trees);
 
 	next_tree = (tree + 1) % CONNCOUNT_SLOTS;
-	next_tree = find_next_bit(data->pending_trees, next_tree, CONNCOUNT_SLOTS);
+	next_tree = find_next_bit(data->pending_trees, CONNCOUNT_SLOTS, next_tree);
 
 	if (next_tree < CONNCOUNT_SLOTS) {
 		data->gc_tree = next_tree;
@@ -621,10 +591,7 @@
 {
 	int i;
 
-	BUILD_BUG_ON(CONNCOUNT_LOCK_SLOTS > CONNCOUNT_SLOTS);
-	BUILD_BUG_ON((CONNCOUNT_SLOTS % CONNCOUNT_LOCK_SLOTS) != 0);
-
-	for (i = 0; i < CONNCOUNT_LOCK_SLOTS; ++i)
+	for (i = 0; i < CONNCOUNT_SLOTS; ++i)
 		spin_lock_init(&nf_conncount_locks[i]);
 
 	conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple",

diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 1d66de5..2ccda8a 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Accouting handling for netfilter. */
 
 /*
  * (C) 2008 Krzysztof Piotr Oledzki <ole@ans.pl>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -25,102 +22,15 @@
 module_param_named(acct, nf_ct_acct, bool, 0644);
 MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting.");
 
-#ifdef CONFIG_SYSCTL
-static struct ctl_table acct_sysctl_table[] = {
-	{
-		.procname	= "nf_conntrack_acct",
-		.data		= &init_net.ct.sysctl_acct,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{}
-};
-#endif /* CONFIG_SYSCTL */
-
-unsigned int
-seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir)
-{
-	struct nf_conn_acct *acct;
-	struct nf_conn_counter *counter;
-
-	acct = nf_conn_acct_find(ct);
-	if (!acct)
-		return 0;
-
-	counter = acct->counter;
-	seq_printf(s, "packets=%llu bytes=%llu ",
-		   (unsigned long long)atomic64_read(&counter[dir].packets),
-		   (unsigned long long)atomic64_read(&counter[dir].bytes));
-
-	return 0;
-};
-EXPORT_SYMBOL_GPL(seq_print_acct);
-
 static const struct nf_ct_ext_type acct_extend = {
 	.len	= sizeof(struct nf_conn_acct),
 	.align	= __alignof__(struct nf_conn_acct),
 	.id	= NF_CT_EXT_ACCT,
 };
 
-#ifdef CONFIG_SYSCTL
-static int nf_conntrack_acct_init_sysctl(struct net *net)
-{
-	struct ctl_table *table;
-
-	table = kmemdup(acct_sysctl_table, sizeof(acct_sysctl_table),
-			GFP_KERNEL);
-	if (!table)
-		goto out;
-
-	table[0].data = &net->ct.sysctl_acct;
-
-	/* Don't export sysctls to unprivileged users */
-	if (net->user_ns != &init_user_ns)
-		table[0].procname = NULL;
-
-	net->ct.acct_sysctl_header = register_net_sysctl(net, "net/netfilter",
-							 table);
-	if (!net->ct.acct_sysctl_header) {
-		pr_err("can't register to sysctl\n");
-		goto out_register;
-	}
-	return 0;
-
-out_register:
-	kfree(table);
-out:
-	return -ENOMEM;
-}
-
-static void nf_conntrack_acct_fini_sysctl(struct net *net)
-{
-	struct ctl_table *table;
-
-	table = net->ct.acct_sysctl_header->ctl_table_arg;
-	unregister_net_sysctl_table(net->ct.acct_sysctl_header);
-	kfree(table);
-}
-#else
-static int nf_conntrack_acct_init_sysctl(struct net *net)
-{
-	return 0;
-}
-
-static void nf_conntrack_acct_fini_sysctl(struct net *net)
-{
-}
-#endif
-
-int nf_conntrack_acct_pernet_init(struct net *net)
+void nf_conntrack_acct_pernet_init(struct net *net)
 {
 	net->ct.sysctl_acct = nf_ct_acct;
-	return nf_conntrack_acct_init_sysctl(net);
-}
-
-void nf_conntrack_acct_pernet_fini(struct net *net)
-{
-	nf_conntrack_acct_fini_sysctl(net);
 }
 
 int nf_conntrack_acct_init(void)

diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index 20edd58..d011d2e 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Amanda extension for IP connection tracking
  *
  * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
  * based on HW's ip_conntrack_irc.c as well as other modules
  * (C) 2006 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -28,11 +24,13 @@
 static unsigned int master_timeout __read_mostly = 300;
 static char *ts_algo = "kmp";
 
+#define HELPER_NAME "amanda"
+
 MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
 MODULE_DESCRIPTION("Amanda connection tracking module");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ip_conntrack_amanda");
-MODULE_ALIAS_NFCT_HELPER("amanda");
+MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
 
 module_param(master_timeout, uint, 0600);
 MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
@@ -54,6 +52,7 @@
 	SEARCH_DATA,
 	SEARCH_MESG,
 	SEARCH_INDEX,
+	SEARCH_STATE,
 };
 
 static struct {
@@ -81,6 +80,10 @@
 		.string = "INDEX ",
 		.len	= 6,
 	},
+	[SEARCH_STATE] = {
+		.string = "STATE ",
+		.len	= 6,
+	},
 };
 
 static int amanda_help(struct sk_buff *skb,
@@ -124,7 +127,7 @@
 		goto out;
 	stop += start;
 
-	for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) {
+	for (i = SEARCH_DATA; i <= SEARCH_STATE; i++) {
 		off = skb_find_text(skb, start, stop, search[i].ts);
 		if (off == UINT_MAX)
 			continue;
@@ -156,7 +159,7 @@
 		if (nf_nat_amanda && ct->status & IPS_NAT_MASK)
 			ret = nf_nat_amanda(skb, ctinfo, protoff,
 					    off - dataoff, len, exp);
-		else if (nf_ct_expect_related(exp) != 0) {
+		else if (nf_ct_expect_related(exp, 0) != 0) {
 			nf_ct_helper_log(skb, ct, "cannot add expectation");
 			ret = NF_DROP;
 		}
@@ -168,19 +171,20 @@
 }
 
 static const struct nf_conntrack_expect_policy amanda_exp_policy = {
-	.max_expected		= 3,
+	.max_expected		= 4,
 	.timeout		= 180,
 };
 
 static struct nf_conntrack_helper amanda_helper[2] __read_mostly = {
 	{
-		.name			= "amanda",
+		.name			= HELPER_NAME,
 		.me			= THIS_MODULE,
 		.help			= amanda_help,
 		.tuple.src.l3num	= AF_INET,
 		.tuple.src.u.udp.port	= cpu_to_be16(10080),
 		.tuple.dst.protonum	= IPPROTO_UDP,
 		.expect_policy		= &amanda_exp_policy,
+		.nat_mod_name		= NF_NAT_HELPER_NAME(HELPER_NAME),
 	},
 	{
 		.name			= "amanda",
@@ -190,6 +194,7 @@
 		.tuple.src.u.udp.port	= cpu_to_be16(10080),
 		.tuple.dst.protonum	= IPPROTO_UDP,
 		.expect_policy		= &amanda_exp_policy,
+		.nat_mod_name		= NF_NAT_HELPER_NAME(HELPER_NAME),
 	},
 };
 

diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index 5423b19..1ba6bec 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *      broadcast connection tracking helper
  *
  *      (c) 2005 Patrick McHardy <kaber@trash.net>
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -41,12 +37,17 @@
 
 	in_dev = __in_dev_get_rcu(rt->dst.dev);
 	if (in_dev != NULL) {
-		for_primary_ifa(in_dev) {
+		const struct in_ifaddr *ifa;
+
+		in_dev_for_each_ifa_rcu(ifa, in_dev) {
+			if (ifa->ifa_flags & IFA_F_SECONDARY)
+				continue;
+
 			if (ifa->ifa_broadcast == iph->daddr) {
 				mask = ifa->ifa_mask;
 				break;
 			}
-		} endfor_ifa(in_dev);
+		}
 	}
 
 	if (mask == 0)
@@ -67,7 +68,7 @@
 	exp->class		  = NF_CT_EXPECT_CLASS_DEFAULT;
 	exp->helper               = NULL;
 
-	nf_ct_expect_related(exp);
+	nf_ct_expect_related(exp, 0);
 	nf_ct_expect_put(exp);
 
 	nf_ct_refresh(ct, skb, timeout * HZ);

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 277d02a..5cd610b 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Connection state tracking for netfilter.  This is separated from,
    but required by, the NAT layer; it can also be used by an iptables
    extension. */
@@ -6,10 +7,6 @@
  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
  * (C) 2005-2012 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -25,6 +22,7 @@
 #include <linux/slab.h>
 #include <linux/random.h>
 #include <linux/jhash.h>
+#include <linux/siphash.h>
 #include <linux/err.h>
 #include <linux/percpu.h>
 #include <linux/moduleparam.h>
@@ -51,7 +49,6 @@
 #include <net/netfilter/nf_conntrack_labels.h>
 #include <net/netfilter/nf_conntrack_synproxy.h>
 #include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_helper.h>
 #include <net/netns/hash.h>
 #include <net/ip.h>
@@ -76,8 +73,7 @@
 };
 
 static __read_mostly struct kmem_cache *nf_conntrack_cachep;
-static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
-static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
+static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
 static __read_mostly bool nf_conntrack_locks_all;
 
 /* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
@@ -222,6 +218,24 @@
 	return scale_hash(hash_conntrack_raw(tuple, net));
 }
 
+static bool nf_ct_get_tuple_ports(const struct sk_buff *skb,
+				  unsigned int dataoff,
+				  struct nf_conntrack_tuple *tuple)
+{	struct {
+		__be16 sport;
+		__be16 dport;
+	} _inet_hdr, *inet_hdr;
+
+	/* Actually only need first 4 bytes to get ports. */
+	inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr);
+	if (!inet_hdr)
+		return false;
+
+	tuple->src.u.udp.port = inet_hdr->sport;
+	tuple->dst.u.udp.port = inet_hdr->dport;
+	return true;
+}
+
 static bool
 nf_ct_get_tuple(const struct sk_buff *skb,
 		unsigned int nhoff,
@@ -229,16 +243,11 @@
 		u_int16_t l3num,
 		u_int8_t protonum,
 		struct net *net,
-		struct nf_conntrack_tuple *tuple,
-		const struct nf_conntrack_l4proto *l4proto)
+		struct nf_conntrack_tuple *tuple)
 {
 	unsigned int size;
 	const __be32 *ap;
 	__be32 _addrs[8];
-	struct {
-		__be16 sport;
-		__be16 dport;
-	} _inet_hdr, *inet_hdr;
 
 	memset(tuple, 0, sizeof(*tuple));
 
@@ -274,16 +283,36 @@
 	tuple->dst.protonum = protonum;
 	tuple->dst.dir = IP_CT_DIR_ORIGINAL;
 
-	if (unlikely(l4proto->pkt_to_tuple))
-		return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
+	switch (protonum) {
+#if IS_ENABLED(CONFIG_IPV6)
+	case IPPROTO_ICMPV6:
+		return icmpv6_pkt_to_tuple(skb, dataoff, net, tuple);
+#endif
+	case IPPROTO_ICMP:
+		return icmp_pkt_to_tuple(skb, dataoff, net, tuple);
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	case IPPROTO_GRE:
+		return gre_pkt_to_tuple(skb, dataoff, net, tuple);
+#endif
+	case IPPROTO_TCP:
+	case IPPROTO_UDP: /* fallthrough */
+		return nf_ct_get_tuple_ports(skb, dataoff, tuple);
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+	case IPPROTO_UDPLITE:
+		return nf_ct_get_tuple_ports(skb, dataoff, tuple);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	case IPPROTO_SCTP:
+		return nf_ct_get_tuple_ports(skb, dataoff, tuple);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	case IPPROTO_DCCP:
+		return nf_ct_get_tuple_ports(skb, dataoff, tuple);
+#endif
+	default:
+		break;
+	}
 
-	/* Actually only need first 4 bytes to get ports. */
-	inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr);
-	if (!inet_hdr)
-		return false;
-
-	tuple->src.u.udp.port = inet_hdr->sport;
-	tuple->dst.u.udp.port = inet_hdr->dport;
 	return true;
 }
 
@@ -366,33 +395,20 @@
 		       u_int16_t l3num,
 		       struct net *net, struct nf_conntrack_tuple *tuple)
 {
-	const struct nf_conntrack_l4proto *l4proto;
 	u8 protonum;
 	int protoff;
-	int ret;
-
-	rcu_read_lock();
 
 	protoff = get_l4proto(skb, nhoff, l3num, &protonum);
-	if (protoff <= 0) {
-		rcu_read_unlock();
+	if (protoff <= 0)
 		return false;
-	}
 
-	l4proto = __nf_ct_l4proto_find(l3num, protonum);
-
-	ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple,
-			      l4proto);
-
-	rcu_read_unlock();
-	return ret;
+	return nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple);
 }
 EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr);
 
 bool
 nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
-		   const struct nf_conntrack_tuple *orig,
-		   const struct nf_conntrack_l4proto *l4proto)
+		   const struct nf_conntrack_tuple *orig)
 {
 	memset(inverse, 0, sizeof(*inverse));
 
@@ -415,8 +431,14 @@
 
 	inverse->dst.protonum = orig->dst.protonum;
 
-	if (unlikely(l4proto->invert_tuple))
-		return l4proto->invert_tuple(inverse, orig);
+	switch (orig->dst.protonum) {
+	case IPPROTO_ICMP:
+		return nf_conntrack_invert_icmp_tuple(inverse, orig);
+#if IS_ENABLED(CONFIG_IPV6)
+	case IPPROTO_ICMPV6:
+		return nf_conntrack_invert_icmpv6_tuple(inverse, orig);
+#endif
+	}
 
 	inverse->src.u.all = orig->dst.u.all;
 	inverse->dst.u.all = orig->src.u.all;
@@ -424,6 +446,40 @@
 }
 EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
 
+/* Generate a almost-unique pseudo-id for a given conntrack.
+ *
+ * intentionally doesn't re-use any of the seeds used for hash
+ * table location, we assume id gets exposed to userspace.
+ *
+ * Following nf_conn items do not change throughout lifetime
+ * of the nf_conn:
+ *
+ * 1. nf_conn address
+ * 2. nf_conn->master address (normally NULL)
+ * 3. the associated net namespace
+ * 4. the original direction tuple
+ */
+u32 nf_ct_get_id(const struct nf_conn *ct)
+{
+	static __read_mostly siphash_key_t ct_id_seed;
+	unsigned long a, b, c, d;
+
+	net_get_random_once(&ct_id_seed, sizeof(ct_id_seed));
+
+	a = (unsigned long)ct;
+	b = (unsigned long)ct->master;
+	c = (unsigned long)nf_ct_net(ct);
+	d = (unsigned long)siphash(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+				   sizeof(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple),
+				   &ct_id_seed);
+#ifdef CONFIG_64BIT
+	return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed);
+#else
+	return siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &ct_id_seed);
+#endif
+}
+EXPORT_SYMBOL_GPL(nf_ct_get_id);
+
 static void
 clean_from_lists(struct nf_conn *ct)
 {
@@ -526,11 +582,20 @@
 }
 EXPORT_SYMBOL_GPL(nf_ct_tmpl_free);
 
+static void destroy_gre_conntrack(struct nf_conn *ct)
+{
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	struct nf_conn *master = ct->master;
+
+	if (master)
+		nf_ct_gre_keymap_destroy(master);
+#endif
+}
+
 static void
 destroy_conntrack(struct nf_conntrack *nfct)
 {
 	struct nf_conn *ct = (struct nf_conn *)nfct;
-	const struct nf_conntrack_l4proto *l4proto;
 
 	pr_debug("destroy_conntrack(%p)\n", ct);
 	WARN_ON(atomic_read(&nfct->use) != 0);
@@ -539,9 +604,9 @@
 		nf_ct_tmpl_free(ct);
 		return;
 	}
-	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
-	if (l4proto->destroy)
-		l4proto->destroy(ct);
+
+	if (unlikely(nf_ct_protonum(ct) == IPPROTO_GRE))
+		destroy_gre_conntrack(ct);
 
 	local_bh_disable();
 	/* Expectations will have been removed in clean_from_lists,
@@ -683,9 +748,6 @@
 			continue;
 		}
 
-		if (nf_ct_is_dying(ct))
-			continue;
-
 		if (nf_ct_key_equal(h, tuple, zone, net))
 			return h;
 	}
@@ -711,20 +773,24 @@
 	struct nf_conn *ct;
 
 	rcu_read_lock();
-begin:
+
 	h = ____nf_conntrack_find(net, zone, tuple, hash);
 	if (h) {
+		/* We have a candidate that matches the tuple we're interested
+		 * in, try to obtain a reference and re-check tuple
+		 */
 		ct = nf_ct_tuplehash_to_ctrack(h);
-		if (unlikely(nf_ct_is_dying(ct) ||
-			     !atomic_inc_not_zero(&ct->ct_general.use)))
-			h = NULL;
-		else {
-			if (unlikely(!nf_ct_key_equal(h, tuple, zone, net))) {
-				nf_ct_put(ct);
-				goto begin;
-			}
+		if (likely(atomic_inc_not_zero(&ct->ct_general.use))) {
+			if (likely(nf_ct_key_equal(h, tuple, zone, net)))
+				goto found;
+
+			/* TYPESAFE_BY_RCU recycled the candidate */
+			nf_ct_put(ct);
 		}
+
+		h = NULL;
 	}
+found:
 	rcu_read_unlock();
 
 	return h;
@@ -840,7 +906,7 @@
 	enum ip_conntrack_info oldinfo;
 	struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
 
-	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+	l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
 	if (l4proto->allow_clash &&
 	    !nf_ct_is_dying(ct) &&
 	    atomic_inc_not_zero(&ct->ct_general.use)) {
@@ -901,10 +967,18 @@
 	 * REJECT will give spurious warnings here.
 	 */
 
-	/* No external references means no one else could have
-	 * confirmed us.
+	/* Another skb with the same unconfirmed conntrack may
+	 * win the race. This may happen for bridge(br_flood)
+	 * or broadcast/multicast packets do skb_clone with
+	 * unconfirmed conntrack.
 	 */
-	WARN_ON(nf_ct_is_confirmed(ct));
+	if (unlikely(nf_ct_is_confirmed(ct))) {
+		WARN_ON_ONCE(1);
+		nf_conntrack_double_unlock(hash, reply_hash);
+		local_bh_enable();
+		return NF_DROP;
+	}
+
 	pr_debug("Confirming conntrack %p\n", ct);
 	/* We have to check the DYING flag after unlink to prevent
 	 * a race against nf_ct_get_next_corpse() possibly called from
@@ -940,12 +1014,9 @@
 
 	/* set conntrack timestamp, if enabled. */
 	tstamp = nf_conn_tstamp_find(ct);
-	if (tstamp) {
-		if (skb->tstamp == 0)
-			__net_timestamp(skb);
+	if (tstamp)
+		tstamp->start = ktime_get_real_ns();
 
-		tstamp->start = ktime_to_ns(skb->tstamp);
-	}
 	/* Since the lookup is lockless, hash insertion must be done after
 	 * starting the timer and setting the CONFIRMED bit. The RCU barriers
 	 * guarantee that no other CPU can find the conntrack before the above
@@ -1007,6 +1078,22 @@
 		}
 
 		if (nf_ct_key_equal(h, tuple, zone, net)) {
+			/* Tuple is taken already, so caller will need to find
+			 * a new source port to use.
+			 *
+			 * Only exception:
+			 * If the *original tuples* are identical, then both
+			 * conntracks refer to the same flow.
+			 * This is a rare situation, it can occur e.g. when
+			 * more than one UDP packet is sent from same socket
+			 * in different threads.
+			 *
+			 * Let nf_ct_resolve_clash() deal with this later.
+			 */
+			if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+					      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple))
+				continue;
+
 			NF_CT_STAT_INC_ATOMIC(net, found);
 			rcu_read_unlock();
 			return 1;
@@ -1112,7 +1199,7 @@
 	if (!test_bit(IPS_ASSURED_BIT, &ct->status))
 		return true;
 
-	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+	l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
 	if (l4proto->can_early_drop && l4proto->can_early_drop(ct))
 		return true;
 
@@ -1292,6 +1379,7 @@
 	/* save hash for reusing when confirming */
 	*(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
 	ct->status = 0;
+	ct->timeout = 0;
 	write_pnet(&ct->ct_net, net);
 	memset(&ct->__nfct_init_offset[0], 0,
 	       offsetof(struct nf_conn, proto) -
@@ -1342,7 +1430,6 @@
 static noinline struct nf_conntrack_tuple_hash *
 init_conntrack(struct net *net, struct nf_conn *tmpl,
 	       const struct nf_conntrack_tuple *tuple,
-	       const struct nf_conntrack_l4proto *l4proto,
 	       struct sk_buff *skb,
 	       unsigned int dataoff, u32 hash)
 {
@@ -1355,7 +1442,7 @@
 	struct nf_conn_timeout *timeout_ext;
 	struct nf_conntrack_zone tmp;
 
-	if (!nf_ct_invert_tuple(&repl_tuple, tuple, l4proto)) {
+	if (!nf_ct_invert_tuple(&repl_tuple, tuple)) {
 		pr_debug("Can't invert tuple.\n");
 		return NULL;
 	}
@@ -1373,12 +1460,6 @@
 
 	timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
 
-	if (!l4proto->new(ct, skb, dataoff)) {
-		nf_conntrack_free(ct);
-		pr_debug("can't track with proto module\n");
-		return NULL;
-	}
-
 	if (timeout_ext)
 		nf_ct_timeout_ext_add(ct, rcu_dereference(timeout_ext->timeout),
 				      GFP_ATOMIC);
@@ -1439,12 +1520,11 @@
 
 /* On success, returns 0, sets skb->_nfct | ctinfo */
 static int
-resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
+resolve_normal_ct(struct nf_conn *tmpl,
 		  struct sk_buff *skb,
 		  unsigned int dataoff,
-		  u_int16_t l3num,
 		  u_int8_t protonum,
-		  const struct nf_conntrack_l4proto *l4proto)
+		  const struct nf_hook_state *state)
 {
 	const struct nf_conntrack_zone *zone;
 	struct nf_conntrack_tuple tuple;
@@ -1455,17 +1535,18 @@
 	u32 hash;
 
 	if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
-			     dataoff, l3num, protonum, net, &tuple, l4proto)) {
+			     dataoff, state->pf, protonum, state->net,
+			     &tuple)) {
 		pr_debug("Can't get tuple\n");
 		return 0;
 	}
 
 	/* look for tuple match */
 	zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
-	hash = hash_conntrack_raw(&tuple, net);
-	h = __nf_conntrack_find_get(net, zone, &tuple, hash);
+	hash = hash_conntrack_raw(&tuple, state->net);
+	h = __nf_conntrack_find_get(state->net, zone, &tuple, hash);
 	if (!h) {
-		h = init_conntrack(net, tmpl, &tuple, l4proto,
+		h = init_conntrack(state->net, tmpl, &tuple,
 				   skb, dataoff, hash);
 		if (!h)
 			return 0;
@@ -1494,13 +1575,101 @@
 	return 0;
 }
 
-unsigned int
-nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
-		struct sk_buff *skb)
+/*
+ * icmp packets need special treatment to handle error messages that are
+ * related to a connection.
+ *
+ * Callers need to check if skb has a conntrack assigned when this
+ * helper returns; in such case skb belongs to an already known connection.
+ */
+static unsigned int __cold
+nf_conntrack_handle_icmp(struct nf_conn *tmpl,
+			 struct sk_buff *skb,
+			 unsigned int dataoff,
+			 u8 protonum,
+			 const struct nf_hook_state *state)
 {
-	const struct nf_conntrack_l4proto *l4proto;
-	struct nf_conn *ct, *tmpl;
+	int ret;
+
+	if (state->pf == NFPROTO_IPV4 && protonum == IPPROTO_ICMP)
+		ret = nf_conntrack_icmpv4_error(tmpl, skb, dataoff, state);
+#if IS_ENABLED(CONFIG_IPV6)
+	else if (state->pf == NFPROTO_IPV6 && protonum == IPPROTO_ICMPV6)
+		ret = nf_conntrack_icmpv6_error(tmpl, skb, dataoff, state);
+#endif
+	else
+		return NF_ACCEPT;
+
+	if (ret <= 0) {
+		NF_CT_STAT_INC_ATOMIC(state->net, error);
+		NF_CT_STAT_INC_ATOMIC(state->net, invalid);
+	}
+
+	return ret;
+}
+
+static int generic_packet(struct nf_conn *ct, struct sk_buff *skb,
+			  enum ip_conntrack_info ctinfo)
+{
+	const unsigned int *timeout = nf_ct_timeout_lookup(ct);
+
+	if (!timeout)
+		timeout = &nf_generic_pernet(nf_ct_net(ct))->timeout;
+
+	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
+	return NF_ACCEPT;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int nf_conntrack_handle_packet(struct nf_conn *ct,
+				      struct sk_buff *skb,
+				      unsigned int dataoff,
+				      enum ip_conntrack_info ctinfo,
+				      const struct nf_hook_state *state)
+{
+	switch (nf_ct_protonum(ct)) {
+	case IPPROTO_TCP:
+		return nf_conntrack_tcp_packet(ct, skb, dataoff,
+					       ctinfo, state);
+	case IPPROTO_UDP:
+		return nf_conntrack_udp_packet(ct, skb, dataoff,
+					       ctinfo, state);
+	case IPPROTO_ICMP:
+		return nf_conntrack_icmp_packet(ct, skb, ctinfo, state);
+#if IS_ENABLED(CONFIG_IPV6)
+	case IPPROTO_ICMPV6:
+		return nf_conntrack_icmpv6_packet(ct, skb, ctinfo, state);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+	case IPPROTO_UDPLITE:
+		return nf_conntrack_udplite_packet(ct, skb, dataoff,
+						   ctinfo, state);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	case IPPROTO_SCTP:
+		return nf_conntrack_sctp_packet(ct, skb, dataoff,
+						ctinfo, state);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	case IPPROTO_DCCP:
+		return nf_conntrack_dccp_packet(ct, skb, dataoff,
+						ctinfo, state);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	case IPPROTO_GRE:
+		return nf_conntrack_gre_packet(ct, skb, dataoff,
+					       ctinfo, state);
+#endif
+	}
+
+	return generic_packet(ct, skb, ctinfo);
+}
+
+unsigned int
+nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
+{
 	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct, *tmpl;
 	u_int8_t protonum;
 	int dataoff, ret;
 
@@ -1509,32 +1678,26 @@
 		/* Previously seen (loopback or untracked)?  Ignore. */
 		if ((tmpl && !nf_ct_is_template(tmpl)) ||
 		     ctinfo == IP_CT_UNTRACKED) {
-			NF_CT_STAT_INC_ATOMIC(net, ignore);
+			NF_CT_STAT_INC_ATOMIC(state->net, ignore);
 			return NF_ACCEPT;
 		}
 		skb->_nfct = 0;
 	}
 
 	/* rcu_read_lock()ed by nf_hook_thresh */
-	dataoff = get_l4proto(skb, skb_network_offset(skb), pf, &protonum);
+	dataoff = get_l4proto(skb, skb_network_offset(skb), state->pf, &protonum);
 	if (dataoff <= 0) {
 		pr_debug("not prepared to track yet or error occurred\n");
-		NF_CT_STAT_INC_ATOMIC(net, error);
-		NF_CT_STAT_INC_ATOMIC(net, invalid);
+		NF_CT_STAT_INC_ATOMIC(state->net, error);
+		NF_CT_STAT_INC_ATOMIC(state->net, invalid);
 		ret = NF_ACCEPT;
 		goto out;
 	}
 
-	l4proto = __nf_ct_l4proto_find(pf, protonum);
-
-	/* It may be an special packet, error, unclean...
-	 * inverse of the return code tells to the netfilter
-	 * core what to do with the packet. */
-	if (l4proto->error != NULL) {
-		ret = l4proto->error(net, tmpl, skb, dataoff, pf, hooknum);
+	if (protonum == IPPROTO_ICMP || protonum == IPPROTO_ICMPV6) {
+		ret = nf_conntrack_handle_icmp(tmpl, skb, dataoff,
+					       protonum, state);
 		if (ret <= 0) {
-			NF_CT_STAT_INC_ATOMIC(net, error);
-			NF_CT_STAT_INC_ATOMIC(net, invalid);
 			ret = -ret;
 			goto out;
 		}
@@ -1543,10 +1706,11 @@
 			goto out;
 	}
 repeat:
-	ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l4proto);
+	ret = resolve_normal_ct(tmpl, skb, dataoff,
+				protonum, state);
 	if (ret < 0) {
 		/* Too stressed to deal. */
-		NF_CT_STAT_INC_ATOMIC(net, drop);
+		NF_CT_STAT_INC_ATOMIC(state->net, drop);
 		ret = NF_DROP;
 		goto out;
 	}
@@ -1554,21 +1718,21 @@
 	ct = nf_ct_get(skb, &ctinfo);
 	if (!ct) {
 		/* Not valid part of a connection */
-		NF_CT_STAT_INC_ATOMIC(net, invalid);
+		NF_CT_STAT_INC_ATOMIC(state->net, invalid);
 		ret = NF_ACCEPT;
 		goto out;
 	}
 
-	ret = l4proto->packet(ct, skb, dataoff, ctinfo);
+	ret = nf_conntrack_handle_packet(ct, skb, dataoff, ctinfo, state);
 	if (ret <= 0) {
 		/* Invalid: inverse of the return code tells
 		 * the netfilter core what to do */
 		pr_debug("nf_conntrack_in: Can't track with proto module\n");
 		nf_conntrack_put(&ct->ct_general);
 		skb->_nfct = 0;
-		NF_CT_STAT_INC_ATOMIC(net, invalid);
+		NF_CT_STAT_INC_ATOMIC(state->net, invalid);
 		if (ret == -NF_DROP)
-			NF_CT_STAT_INC_ATOMIC(net, drop);
+			NF_CT_STAT_INC_ATOMIC(state->net, drop);
 		/* Special case: TCP tracker reports an attempt to reopen a
 		 * closed/aborted connection. We have to go back and create a
 		 * fresh conntrack.
@@ -1590,20 +1754,6 @@
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_in);
 
-bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
-			  const struct nf_conntrack_tuple *orig)
-{
-	bool ret;
-
-	rcu_read_lock();
-	ret = nf_ct_invert_tuple(inverse, orig,
-				 __nf_ct_l4proto_find(orig->src.l3num,
-						      orig->dst.protonum));
-	rcu_read_unlock();
-	return ret;
-}
-EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr);
-
 /* Alter reply tuple (maybe alter helper).  This is for NAT, and is
    implicitly racy: see __nf_conntrack_confirm */
 void nf_conntrack_alter_reply(struct nf_conn *ct,
@@ -1631,11 +1781,9 @@
 void __nf_ct_refresh_acct(struct nf_conn *ct,
 			  enum ip_conntrack_info ctinfo,
 			  const struct sk_buff *skb,
-			  unsigned long extra_jiffies,
-			  int do_acct)
+			  u32 extra_jiffies,
+			  bool do_acct)
 {
-	WARN_ON(!skb);
-
 	/* Only update if this is not a fixed timeout */
 	if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
 		goto acct;
@@ -1644,7 +1792,8 @@
 	if (nf_ct_is_confirmed(ct))
 		extra_jiffies += nfct_time_stamp;
 
-	ct->timeout = extra_jiffies;
+	if (READ_ONCE(ct->timeout) != extra_jiffies)
+		WRITE_ONCE(ct->timeout, extra_jiffies);
 acct:
 	if (do_acct)
 		nf_ct_acct_update(ct, ctinfo, skb->len);
@@ -1667,9 +1816,7 @@
 #include <linux/netfilter/nfnetlink_conntrack.h>
 #include <linux/mutex.h>
 
-/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
- * in ip_conntrack_core, since we don't want the protocols to autoload
- * or depend on ctnetlink */
+/* Generic function for tcp/udp/sctp/dccp and alike. */
 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
 			       const struct nf_conntrack_tuple *tuple)
 {
@@ -1734,7 +1881,6 @@
 
 static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
 {
-	const struct nf_conntrack_l4proto *l4proto;
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_tuple tuple;
 	enum ip_conntrack_info ctinfo;
@@ -1755,10 +1901,8 @@
 	if (dataoff <= 0)
 		return -1;
 
-	l4proto = nf_ct_l4proto_find_get(l3num, l4num);
-
 	if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
-			     l4num, net, &tuple, l4proto))
+			     l4num, net, &tuple))
 		return -1;
 
 	if (ct->status & IPS_SRC_NAT) {
@@ -2087,10 +2231,7 @@
 
 	list_for_each_entry(net, net_exit_list, exit_list) {
 		nf_conntrack_proto_pernet_fini(net);
-		nf_conntrack_helper_pernet_fini(net);
 		nf_conntrack_ecache_pernet_fini(net);
-		nf_conntrack_tstamp_pernet_fini(net);
-		nf_conntrack_acct_pernet_fini(net);
 		nf_conntrack_expect_pernet_fini(net);
 		free_percpu(net->ct.stat);
 		free_percpu(net->ct.pcpu_lists);
@@ -2228,6 +2369,7 @@
 
 int nf_conntrack_init_start(void)
 {
+	unsigned long nr_pages = totalram_pages();
 	int max_factor = 8;
 	int ret = -ENOMEM;
 	int i;
@@ -2247,11 +2389,11 @@
 		 * >= 4GB machines have 65536 buckets.
 		 */
 		nf_conntrack_htable_size
-			= (((totalram_pages << PAGE_SHIFT) / 16384)
+			= (((nr_pages << PAGE_SHIFT) / 16384)
 			   / sizeof(struct hlist_head));
-		if (totalram_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE)))
+		if (nr_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE)))
 			nf_conntrack_htable_size = 65536;
-		else if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
+		else if (nr_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
 			nf_conntrack_htable_size = 16384;
 		if (nf_conntrack_htable_size < 32)
 			nf_conntrack_htable_size = 32;
@@ -2366,6 +2508,7 @@
 	int cpu;
 
 	BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER);
+	BUILD_BUG_ON_NOT_POWER_OF_2(CONNTRACK_LOCKS);
 	atomic_set(&net->ct.count, 0);
 
 	net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
@@ -2387,33 +2530,15 @@
 	ret = nf_conntrack_expect_pernet_init(net);
 	if (ret < 0)
 		goto err_expect;
-	ret = nf_conntrack_acct_pernet_init(net);
-	if (ret < 0)
-		goto err_acct;
-	ret = nf_conntrack_tstamp_pernet_init(net);
-	if (ret < 0)
-		goto err_tstamp;
-	ret = nf_conntrack_ecache_pernet_init(net);
-	if (ret < 0)
-		goto err_ecache;
-	ret = nf_conntrack_helper_pernet_init(net);
-	if (ret < 0)
-		goto err_helper;
-	ret = nf_conntrack_proto_pernet_init(net);
-	if (ret < 0)
-		goto err_proto;
+
+	nf_conntrack_acct_pernet_init(net);
+	nf_conntrack_tstamp_pernet_init(net);
+	nf_conntrack_ecache_pernet_init(net);
+	nf_conntrack_helper_pernet_init(net);
+	nf_conntrack_proto_pernet_init(net);
+
 	return 0;
 
-err_proto:
-	nf_conntrack_helper_pernet_fini(net);
-err_helper:
-	nf_conntrack_ecache_pernet_fini(net);
-err_ecache:
-	nf_conntrack_tstamp_pernet_fini(net);
-err_tstamp:
-	nf_conntrack_acct_pernet_fini(net);
-err_acct:
-	nf_conntrack_expect_pernet_fini(net);
 err_expect:
 	free_percpu(net->ct.stat);
 err_pcpu_lists:

diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index c11822a..6fba74b 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Event cache for netfilter. */
 
 /*
@@ -5,10 +6,6 @@
  * (C) 2005 Patrick McHardy <kaber@trash.net>
  * (C) 2005-2006 Netfilter Core Team <coreteam@netfilter.org>
  * (C) 2005 USAGI/WIDE Project <http://www.linux-ipv6.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -27,6 +24,7 @@
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_extend.h>
 
 static DEFINE_MUTEX(nf_ct_ecache_mutex);
@@ -336,85 +334,21 @@
 #define NF_CT_EVENTS_DEFAULT 1
 static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
 
-#ifdef CONFIG_SYSCTL
-static struct ctl_table event_sysctl_table[] = {
-	{
-		.procname	= "nf_conntrack_events",
-		.data		= &init_net.ct.sysctl_events,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{}
-};
-#endif /* CONFIG_SYSCTL */
-
 static const struct nf_ct_ext_type event_extend = {
 	.len	= sizeof(struct nf_conntrack_ecache),
 	.align	= __alignof__(struct nf_conntrack_ecache),
 	.id	= NF_CT_EXT_ECACHE,
 };
 
-#ifdef CONFIG_SYSCTL
-static int nf_conntrack_event_init_sysctl(struct net *net)
-{
-	struct ctl_table *table;
-
-	table = kmemdup(event_sysctl_table, sizeof(event_sysctl_table),
-			GFP_KERNEL);
-	if (!table)
-		goto out;
-
-	table[0].data = &net->ct.sysctl_events;
-
-	/* Don't export sysctls to unprivileged users */
-	if (net->user_ns != &init_user_ns)
-		table[0].procname = NULL;
-
-	net->ct.event_sysctl_header =
-		register_net_sysctl(net, "net/netfilter", table);
-	if (!net->ct.event_sysctl_header) {
-		pr_err("can't register to sysctl\n");
-		goto out_register;
-	}
-	return 0;
-
-out_register:
-	kfree(table);
-out:
-	return -ENOMEM;
-}
-
-static void nf_conntrack_event_fini_sysctl(struct net *net)
-{
-	struct ctl_table *table;
-
-	table = net->ct.event_sysctl_header->ctl_table_arg;
-	unregister_net_sysctl_table(net->ct.event_sysctl_header);
-	kfree(table);
-}
-#else
-static int nf_conntrack_event_init_sysctl(struct net *net)
-{
-	return 0;
-}
-
-static void nf_conntrack_event_fini_sysctl(struct net *net)
-{
-}
-#endif /* CONFIG_SYSCTL */
-
-int nf_conntrack_ecache_pernet_init(struct net *net)
+void nf_conntrack_ecache_pernet_init(struct net *net)
 {
 	net->ct.sysctl_events = nf_ct_events;
 	INIT_DELAYED_WORK(&net->ct.ecache_dwork, ecache_work);
-	return nf_conntrack_event_init_sysctl(net);
 }
 
 void nf_conntrack_ecache_pernet_fini(struct net *net)
 {
 	cancel_delayed_work_sync(&net->ct.ecache_dwork);
-	nf_conntrack_event_fini_sysctl(net);
 }
 
 int nf_conntrack_ecache_init(void)

diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 27b8423..42557d2 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Expectation handling for nf_conntrack. */
 
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
  * (c) 2005-2012 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/types.h>
@@ -28,8 +25,10 @@
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 
@@ -252,13 +251,22 @@
 static inline int expect_matches(const struct nf_conntrack_expect *a,
 				 const struct nf_conntrack_expect *b)
 {
-	return a->master == b->master &&
-	       nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
+	return nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
 	       nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
 	       net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
 	       nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
 }
 
+static bool master_matches(const struct nf_conntrack_expect *a,
+			   const struct nf_conntrack_expect *b,
+			   unsigned int flags)
+{
+	if (flags & NF_CT_EXP_F_SKIP_MASTER)
+		return true;
+
+	return a->master == b->master;
+}
+
 /* Generally a bad idea to call this: could have matched already. */
 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
 {
@@ -336,7 +344,7 @@
 
 	exp->tuple.dst.u.all = *dst;
 
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
 	memset(&exp->saved_addr, 0, sizeof(exp->saved_addr));
 	memset(&exp->saved_proto, 0, sizeof(exp->saved_proto));
 #endif
@@ -402,7 +410,8 @@
 		nf_ct_remove_expect(last);
 }
 
-static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
+static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
+				       unsigned int flags)
 {
 	const struct nf_conntrack_expect_policy *p;
 	struct nf_conntrack_expect *i;
@@ -420,8 +429,10 @@
 	}
 	h = nf_ct_expect_dst_hash(net, &expect->tuple);
 	hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
-		if (expect_matches(i, expect)) {
-			if (i->class != expect->class)
+		if (master_matches(i, expect, flags) &&
+		    expect_matches(i, expect)) {
+			if (i->class != expect->class ||
+			    i->master != expect->master)
 				return -EALREADY;
 
 			if (nf_ct_remove_expect(i))
@@ -456,12 +467,12 @@
 }
 
 int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
-				u32 portid, int report)
+				u32 portid, int report, unsigned int flags)
 {
 	int ret;
 
 	spin_lock_bh(&nf_conntrack_expect_lock);
-	ret = __nf_ct_expect_check(expect);
+	ret = __nf_ct_expect_check(expect, flags);
 	if (ret < 0)
 		goto out;
 
@@ -610,8 +621,7 @@
 		   expect->tuple.src.l3num,
 		   expect->tuple.dst.protonum);
 	print_tuple(s, &expect->tuple,
-		    __nf_ct_l4proto_find(expect->tuple.src.l3num,
-				       expect->tuple.dst.protonum));
+		    nf_ct_l4proto_find(expect->tuple.dst.protonum));
 
 	if (expect->flags & NF_CT_EXPECT_PERMANENT) {
 		seq_puts(s, "PERMANENT");

diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 277bbfe..d4ed1e1 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Structure dynamic extension infrastructure
  * Copyright (C) 2004 Rusty Russell IBM Corporation
  * Copyright (C) 2007 Netfilter Core Team <coreteam@netfilter.org>
  * Copyright (C) 2007 USAGI/WIDE Project <http://www.linux-ipv6.org>
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
 #include <linux/kmemleak.h>

diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index a11c304..9eca904 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* FTP extension for connection tracking. */
 
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
  * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -29,11 +26,13 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <linux/netfilter/nf_conntrack_ftp.h>
 
+#define HELPER_NAME "ftp"
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
 MODULE_DESCRIPTION("ftp connection tracking helper");
 MODULE_ALIAS("ip_conntrack_ftp");
-MODULE_ALIAS_NFCT_HELPER("ftp");
+MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
 
 /* This is slow, but it's simple. --RR */
 static char *ftp_buffer;
@@ -163,7 +162,7 @@
 	if (length == 0)
 		return 0;
 
-	cmd->u3.ip =  htonl((array[0] << 24) | (array[1] << 16) |
+	cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16) |
 				    (array[2] << 8) | array[3]);
 	cmd->u.tcp.port = htons((array[4] << 8) | array[5]);
 	return length;
@@ -323,7 +322,7 @@
 		i++;
 	}
 
-	pr_debug("Skipped up to `%c'!\n", skip);
+	pr_debug("Skipped up to 0x%hhx delimiter!\n", skip);
 
 	*numoff = i;
 	*numlen = getnum(data + i, dlen - i, cmd, term, numoff);
@@ -526,7 +525,7 @@
 				 protoff, matchoff, matchlen, exp);
 	else {
 		/* Can't expect this?  Best to drop packet now. */
-		if (nf_ct_expect_related(exp) != 0) {
+		if (nf_ct_expect_related(exp, 0) != 0) {
 			nf_ct_helper_log(skb, ct, "cannot add expectation");
 			ret = NF_DROP;
 		} else
@@ -588,12 +587,14 @@
 	/* FIXME should be configurable whether IPv4 and IPv6 FTP connections
 		 are tracked or not - YK */
 	for (i = 0; i < ports_c; i++) {
-		nf_ct_helper_init(&ftp[2 * i], AF_INET, IPPROTO_TCP, "ftp",
-				  FTP_PORT, ports[i], ports[i], &ftp_exp_policy,
-				  0, help, nf_ct_ftp_from_nlattr, THIS_MODULE);
-		nf_ct_helper_init(&ftp[2 * i + 1], AF_INET6, IPPROTO_TCP, "ftp",
-				  FTP_PORT, ports[i], ports[i], &ftp_exp_policy,
-				  0, help, nf_ct_ftp_from_nlattr, THIS_MODULE);
+		nf_ct_helper_init(&ftp[2 * i], AF_INET, IPPROTO_TCP,
+				  HELPER_NAME, FTP_PORT, ports[i], ports[i],
+				  &ftp_exp_policy, 0, help,
+				  nf_ct_ftp_from_nlattr, THIS_MODULE);
+		nf_ct_helper_init(&ftp[2 * i + 1], AF_INET6, IPPROTO_TCP,
+				  HELPER_NAME, FTP_PORT, ports[i], ports[i],
+				  &ftp_exp_policy, 0, help,
+				  nf_ct_ftp_from_nlattr, THIS_MODULE);
 	}
 
 	ret = nf_conntrack_helpers_register(ftp, ports_c * 2);

diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index 1601275..573cb44 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
- * ip_conntrack_helper_h323_asn1.c - BER and PER decoding library for H.323
- * 			      	     conntrack/NAT module.
+ * BER and PER decoding library for H.323 conntrack/NAT module.
  *
  * Copyright (c) 2006 by Jing Min Zhao <zhaojingmin@users.sourceforge.net>
  *
- * This source code is licensed under General Public License version 2.
- *
- * See ip_conntrack_helper_h323_asn1.h for details.
- *
+ * See nf_conntrack_helper_h323_asn1.h for details.
  */
 
 #ifdef __KERNEL__
@@ -172,7 +169,7 @@
 	if (bits % BITS_PER_BYTE > 0)
 		bytes++;
 
-	if (*bs->cur + bytes > *bs->end)
+	if (bs->cur + bytes > bs->end)
 		return 1;
 
 	return 0;

diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 005589c..8ba037b 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c

@@ -1,13 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * H.323 connection tracking helper
  *
  * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
  * Copyright (c) 2006-2012 Patrick McHardy <kaber@trash.net>
  *
- * This source code is licensed under General Public License version 2.
- *
  * Based on the 'brute force' H.323 connection tracking module by
- * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Jozsef Kadlecsik <kadlec@netfilter.org>
  *
  * For more information, please see http://nath323.sourceforge.net/
  */
@@ -306,8 +305,8 @@
 		ret = nat_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff,
 				   taddr, port, rtp_port, rtp_exp, rtcp_exp);
 	} else {		/* Conntrack only */
-		if (nf_ct_expect_related(rtp_exp) == 0) {
-			if (nf_ct_expect_related(rtcp_exp) == 0) {
+		if (nf_ct_expect_related(rtp_exp, 0) == 0) {
+			if (nf_ct_expect_related(rtcp_exp, 0) == 0) {
 				pr_debug("nf_ct_h323: expect RTP ");
 				nf_ct_dump_tuple(&rtp_exp->tuple);
 				pr_debug("nf_ct_h323: expect RTCP ");
@@ -365,7 +364,7 @@
 		ret = nat_t120(skb, ct, ctinfo, protoff, data, dataoff, taddr,
 			       port, exp);
 	} else {		/* Conntrack only */
-		if (nf_ct_expect_related(exp) == 0) {
+		if (nf_ct_expect_related(exp, 0) == 0) {
 			pr_debug("nf_ct_h323: expect T.120 ");
 			nf_ct_dump_tuple(&exp->tuple);
 		} else
@@ -702,7 +701,7 @@
 		ret = nat_h245(skb, ct, ctinfo, protoff, data, dataoff, taddr,
 			       port, exp);
 	} else {		/* Conntrack only */
-		if (nf_ct_expect_related(exp) == 0) {
+		if (nf_ct_expect_related(exp, 0) == 0) {
 			pr_debug("nf_ct_q931: expect H.245 ");
 			nf_ct_dump_tuple(&exp->tuple);
 		} else
@@ -748,24 +747,19 @@
 		}
 		break;
 	}
-#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6: {
-		const struct nf_ipv6_ops *v6ops;
 		struct rt6_info *rt1, *rt2;
 		struct flowi6 fl1, fl2;
 
-		v6ops = nf_get_ipv6_ops();
-		if (!v6ops)
-			return 0;
-
 		memset(&fl1, 0, sizeof(fl1));
 		fl1.daddr = src->in6;
 
 		memset(&fl2, 0, sizeof(fl2));
 		fl2.daddr = dst->in6;
-		if (!v6ops->route(net, (struct dst_entry **)&rt1,
+		if (!nf_ip6_route(net, (struct dst_entry **)&rt1,
 				  flowi6_to_flowi(&fl1), false)) {
-			if (!v6ops->route(net, (struct dst_entry **)&rt2,
+			if (!nf_ip6_route(net, (struct dst_entry **)&rt2,
 					  flowi6_to_flowi(&fl2), false)) {
 				if (ipv6_addr_equal(rt6_nexthop(rt1, &fl1.daddr),
 						    rt6_nexthop(rt2, &fl2.daddr)) &&
@@ -831,7 +825,7 @@
 					 protoff, data, dataoff,
 					 taddr, port, exp);
 	} else {		/* Conntrack only */
-		if (nf_ct_expect_related(exp) == 0) {
+		if (nf_ct_expect_related(exp, 0) == 0) {
 			pr_debug("nf_ct_q931: expect Call Forwarding ");
 			nf_ct_dump_tuple(&exp->tuple);
 		} else
@@ -1290,7 +1284,7 @@
 		ret = nat_q931(skb, ct, ctinfo, protoff, data,
 			       taddr, i, port, exp);
 	} else {		/* Conntrack only */
-		if (nf_ct_expect_related(exp) == 0) {
+		if (nf_ct_expect_related(exp, 0) == 0) {
 			pr_debug("nf_ct_ras: expect Q.931 ");
 			nf_ct_dump_tuple(&exp->tuple);
 
@@ -1355,7 +1349,7 @@
 			  IPPROTO_UDP, NULL, &port);
 	exp->helper = nf_conntrack_helper_ras;
 
-	if (nf_ct_expect_related(exp) == 0) {
+	if (nf_ct_expect_related(exp, 0) == 0) {
 		pr_debug("nf_ct_ras: expect RAS ");
 		nf_ct_dump_tuple(&exp->tuple);
 	} else
@@ -1567,7 +1561,7 @@
 	exp->flags = NF_CT_EXPECT_PERMANENT;
 	exp->helper = nf_conntrack_helper_q931;
 
-	if (nf_ct_expect_related(exp) == 0) {
+	if (nf_ct_expect_related(exp, 0) == 0) {
 		pr_debug("nf_ct_ras: expect Q.931 ");
 		nf_ct_dump_tuple(&exp->tuple);
 	} else
@@ -1621,7 +1615,7 @@
 	exp->flags = NF_CT_EXPECT_PERMANENT;
 	exp->helper = nf_conntrack_helper_q931;
 
-	if (nf_ct_expect_related(exp) == 0) {
+	if (nf_ct_expect_related(exp, 0) == 0) {
 		pr_debug("nf_ct_ras: expect Q.931 ");
 		nf_ct_dump_tuple(&exp->tuple);
 	} else

diff --git a/net/netfilter/nf_conntrack_h323_types.c b/net/netfilter/nf_conntrack_h323_types.c
index d880f35..fb1cb67 100644
--- a/net/netfilter/nf_conntrack_h323_types.c
+++ b/net/netfilter/nf_conntrack_h323_types.c

@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Generated by Jing Min Zhao's ASN.1 parser, May 16 2007
  *
  * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
- *
- * This source code is licensed under General Public License version 2.
  */
 
 static const struct field_t _TransportAddress_ipAddress[] = {	/* SEQUENCE */

diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index e24b762..118f415 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Helper handling for netfilter. */
 
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
  * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/types.h>
@@ -24,10 +21,11 @@
 #include <linux/rtnetlink.h>
 
 #include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
-#include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_log.h>
 
 static DEFINE_MUTEX(nf_ct_helper_mutex);
@@ -42,66 +40,8 @@
 MODULE_PARM_DESC(nf_conntrack_helper,
 		 "Enable automatic conntrack helper assignment (default 0)");
 
-#ifdef CONFIG_SYSCTL
-static struct ctl_table helper_sysctl_table[] = {
-	{
-		.procname	= "nf_conntrack_helper",
-		.data		= &init_net.ct.sysctl_auto_assign_helper,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{}
-};
-
-static int nf_conntrack_helper_init_sysctl(struct net *net)
-{
-	struct ctl_table *table;
-
-	table = kmemdup(helper_sysctl_table, sizeof(helper_sysctl_table),
-			GFP_KERNEL);
-	if (!table)
-		goto out;
-
-	table[0].data = &net->ct.sysctl_auto_assign_helper;
-
-	/* Don't export sysctls to unprivileged users */
-	if (net->user_ns != &init_user_ns)
-		table[0].procname = NULL;
-
-	net->ct.helper_sysctl_header =
-		register_net_sysctl(net, "net/netfilter", table);
-
-	if (!net->ct.helper_sysctl_header) {
-		pr_err("nf_conntrack_helper: can't register to sysctl.\n");
-		goto out_register;
-	}
-	return 0;
-
-out_register:
-	kfree(table);
-out:
-	return -ENOMEM;
-}
-
-static void nf_conntrack_helper_fini_sysctl(struct net *net)
-{
-	struct ctl_table *table;
-
-	table = net->ct.helper_sysctl_header->ctl_table_arg;
-	unregister_net_sysctl_table(net->ct.helper_sysctl_header);
-	kfree(table);
-}
-#else
-static int nf_conntrack_helper_init_sysctl(struct net *net)
-{
-	return 0;
-}
-
-static void nf_conntrack_helper_fini_sysctl(struct net *net)
-{
-}
-#endif /* CONFIG_SYSCTL */
+static DEFINE_MUTEX(nf_ct_nat_helpers_mutex);
+static struct list_head nf_ct_nat_helpers __read_mostly;
 
 /* Stupid hash, but collision free for the default registrations of the
  * helpers currently in the kernel. */
@@ -191,6 +131,70 @@
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_put);
 
+static struct nf_conntrack_nat_helper *
+nf_conntrack_nat_helper_find(const char *mod_name)
+{
+	struct nf_conntrack_nat_helper *cur;
+	bool found = false;
+
+	list_for_each_entry_rcu(cur, &nf_ct_nat_helpers, list) {
+		if (!strcmp(cur->mod_name, mod_name)) {
+			found = true;
+			break;
+		}
+	}
+	return found ? cur : NULL;
+}
+
+int
+nf_nat_helper_try_module_get(const char *name, u16 l3num, u8 protonum)
+{
+	struct nf_conntrack_helper *h;
+	struct nf_conntrack_nat_helper *nat;
+	char mod_name[NF_CT_HELPER_NAME_LEN];
+	int ret = 0;
+
+	rcu_read_lock();
+	h = __nf_conntrack_helper_find(name, l3num, protonum);
+	if (!h) {
+		rcu_read_unlock();
+		return -ENOENT;
+	}
+
+	nat = nf_conntrack_nat_helper_find(h->nat_mod_name);
+	if (!nat) {
+		snprintf(mod_name, sizeof(mod_name), "%s", h->nat_mod_name);
+		rcu_read_unlock();
+		request_module(mod_name);
+
+		rcu_read_lock();
+		nat = nf_conntrack_nat_helper_find(mod_name);
+		if (!nat) {
+			rcu_read_unlock();
+			return -ENOENT;
+		}
+	}
+
+	if (!try_module_get(nat->module))
+		ret = -ENOENT;
+
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_helper_try_module_get);
+
+void nf_nat_helper_put(struct nf_conntrack_helper *helper)
+{
+	struct nf_conntrack_nat_helper *nat;
+
+	nat = nf_conntrack_nat_helper_find(helper->nat_mod_name);
+	if (WARN_ON_ONCE(!nat))
+		return;
+
+	module_put(nat->module);
+}
+EXPORT_SYMBOL_GPL(nf_nat_helper_put);
+
 struct nf_conn_help *
 nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
 {
@@ -491,6 +495,8 @@
 	helper->help = help;
 	helper->from_nlattr = from_nlattr;
 	helper->me = module;
+	snprintf(helper->nat_mod_name, sizeof(helper->nat_mod_name),
+		 NF_NAT_HELPER_PREFIX "%s", name);
 
 	if (spec_port == default_port)
 		snprintf(helper->name, sizeof(helper->name), "%s", name);
@@ -527,22 +533,32 @@
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helpers_unregister);
 
+void nf_nat_helper_register(struct nf_conntrack_nat_helper *nat)
+{
+	mutex_lock(&nf_ct_nat_helpers_mutex);
+	list_add_rcu(&nat->list, &nf_ct_nat_helpers);
+	mutex_unlock(&nf_ct_nat_helpers_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_nat_helper_register);
+
+void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat)
+{
+	mutex_lock(&nf_ct_nat_helpers_mutex);
+	list_del_rcu(&nat->list);
+	mutex_unlock(&nf_ct_nat_helpers_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_nat_helper_unregister);
+
 static const struct nf_ct_ext_type helper_extend = {
 	.len	= sizeof(struct nf_conn_help),
 	.align	= __alignof__(struct nf_conn_help),
 	.id	= NF_CT_EXT_HELPER,
 };
 
-int nf_conntrack_helper_pernet_init(struct net *net)
+void nf_conntrack_helper_pernet_init(struct net *net)
 {
 	net->ct.auto_assign_helper_warned = false;
 	net->ct.sysctl_auto_assign_helper = nf_ct_auto_assign_helper;
-	return nf_conntrack_helper_init_sysctl(net);
-}
-
-void nf_conntrack_helper_pernet_fini(struct net *net)
-{
-	nf_conntrack_helper_fini_sysctl(net);
 }
 
 int nf_conntrack_helper_init(void)
@@ -560,6 +576,7 @@
 		goto out_extend;
 	}
 
+	INIT_LIST_HEAD(&nf_ct_nat_helpers);
 	return 0;
 out_extend:
 	kvfree(nf_ct_helper_hash);

diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 4099f4d..e40988a 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* IRC extension for IP connection tracking, Version 1.21
  * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>
  * based on RR's ip_conntrack_ftp.c
  * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -42,11 +38,13 @@
 				struct nf_conntrack_expect *exp) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_irc_hook);
 
+#define HELPER_NAME "irc"
+
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ip_conntrack_irc");
-MODULE_ALIAS_NFCT_HELPER("irc");
+MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
 
 module_param_array(ports, ushort, &ports_c, 0400);
 MODULE_PARM_DESC(ports, "port numbers of IRC servers");
@@ -215,7 +213,7 @@
 						 addr_beg_p - ib_ptr,
 						 addr_end_p - addr_beg_p,
 						 exp);
-			else if (nf_ct_expect_related(exp) != 0) {
+			else if (nf_ct_expect_related(exp, 0) != 0) {
 				nf_ct_helper_log(skb, ct,
 						 "cannot add expectation");
 				ret = NF_DROP;
@@ -259,7 +257,7 @@
 		ports[ports_c++] = IRC_PORT;
 
 	for (i = 0; i < ports_c; i++) {
-		nf_ct_helper_init(&irc[i], AF_INET, IPPROTO_TCP, "irc",
+		nf_ct_helper_init(&irc[i], AF_INET, IPPROTO_TCP, HELPER_NAME,
 				  IRC_PORT, ports[i], i, &irc_exp_policy,
 				  0, help, NULL, THIS_MODULE);
 	}

diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c
index adf2198..5227925 100644
--- a/net/netfilter/nf_conntrack_labels.c
+++ b/net/netfilter/nf_conntrack_labels.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * test/set flag bits stored in conntrack extension area.
  *
  * (C) 2013 Astaro GmbH & Co KG
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/export.h>
@@ -14,7 +11,7 @@
 #include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_labels.h>
 
-static spinlock_t nf_connlabels_lock;
+static DEFINE_SPINLOCK(nf_connlabels_lock);
 
 static int replace_u32(u32 *address, u32 mask, u32 new)
 {
@@ -92,7 +89,6 @@
 {
 	BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE / sizeof(long) >= U8_MAX);
 
-	spin_lock_init(&nf_connlabels_lock);
 	return nf_ct_extend_register(&labels_extend);
 }
 

diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index bac5848..7f19ee2 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *      NetBIOS name service broadcast connection tracking helper
  *
  *      (c) 2005 Patrick McHardy <kaber@trash.net>
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 /*
  *      This helper tracks locally originating NetBIOS name service

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 036207e..e2d13cd 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c

@@ -29,6 +29,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
+#include <linux/siphash.h>
 
 #include <linux/netfilter.h>
 #include <net/netlink.h>
@@ -45,9 +46,8 @@
 #include <net/netfilter/nf_conntrack_timestamp.h>
 #include <net/netfilter/nf_conntrack_labels.h>
 #include <net/netfilter/nf_conntrack_synproxy.h>
-#ifdef CONFIG_NF_NAT_NEEDED
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l4proto.h>
+#if IS_ENABLED(CONFIG_NF_NAT)
+#include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_helper.h>
 #endif
 
@@ -63,7 +63,7 @@
 	int ret = 0;
 	struct nlattr *nest_parms;
 
-	nest_parms = nla_nest_start(skb, CTA_TUPLE_PROTO | NLA_F_NESTED);
+	nest_parms = nla_nest_start(skb, CTA_TUPLE_PROTO);
 	if (!nest_parms)
 		goto nla_put_failure;
 	if (nla_put_u8(skb, CTA_PROTO_NUM, tuple->dst.protonum))
@@ -104,7 +104,7 @@
 	int ret = 0;
 	struct nlattr *nest_parms;
 
-	nest_parms = nla_nest_start(skb, CTA_TUPLE_IP | NLA_F_NESTED);
+	nest_parms = nla_nest_start(skb, CTA_TUPLE_IP);
 	if (!nest_parms)
 		goto nla_put_failure;
 
@@ -135,8 +135,7 @@
 	ret = ctnetlink_dump_tuples_ip(skb, tuple);
 
 	if (ret >= 0) {
-		l4proto = __nf_ct_l4proto_find(tuple->src.l3num,
-					       tuple->dst.protonum);
+		l4proto = nf_ct_l4proto_find(tuple->dst.protonum);
 		ret = ctnetlink_dump_tuples_proto(skb, tuple, l4proto);
 	}
 	rcu_read_unlock();
@@ -184,11 +183,11 @@
 	struct nlattr *nest_proto;
 	int ret;
 
-	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+	l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
 	if (!l4proto->to_nlattr)
 		return 0;
 
-	nest_proto = nla_nest_start(skb, CTA_PROTOINFO | NLA_F_NESTED);
+	nest_proto = nla_nest_start(skb, CTA_PROTOINFO);
 	if (!nest_proto)
 		goto nla_put_failure;
 
@@ -216,7 +215,7 @@
 	if (!helper)
 		goto out;
 
-	nest_helper = nla_nest_start(skb, CTA_HELP | NLA_F_NESTED);
+	nest_helper = nla_nest_start(skb, CTA_HELP);
 	if (!nest_helper)
 		goto nla_put_failure;
 	if (nla_put_string(skb, CTA_HELP_NAME, helper->name))
@@ -250,7 +249,7 @@
 		bytes = atomic64_read(&counter[dir].bytes);
 	}
 
-	nest_count = nla_nest_start(skb, attr | NLA_F_NESTED);
+	nest_count = nla_nest_start(skb, attr);
 	if (!nest_count)
 		goto nla_put_failure;
 
@@ -294,7 +293,7 @@
 	if (!tstamp)
 		return 0;
 
-	nest_count = nla_nest_start(skb, CTA_TIMESTAMP | NLA_F_NESTED);
+	nest_count = nla_nest_start(skb, CTA_TIMESTAMP);
 	if (!nest_count)
 		goto nla_put_failure;
 
@@ -338,7 +337,7 @@
 		return 0;
 
 	ret = -1;
-	nest_secctx = nla_nest_start(skb, CTA_SECCTX | NLA_F_NESTED);
+	nest_secctx = nla_nest_start(skb, CTA_SECCTX);
 	if (!nest_secctx)
 		goto nla_put_failure;
 
@@ -398,7 +397,7 @@
 	if (!(ct->status & IPS_EXPECTED))
 		return 0;
 
-	nest_parms = nla_nest_start(skb, CTA_TUPLE_MASTER | NLA_F_NESTED);
+	nest_parms = nla_nest_start(skb, CTA_TUPLE_MASTER);
 	if (!nest_parms)
 		goto nla_put_failure;
 	if (ctnetlink_dump_tuples(skb, master_tuple(ct)) < 0)
@@ -416,7 +415,7 @@
 {
 	struct nlattr *nest_parms;
 
-	nest_parms = nla_nest_start(skb, type | NLA_F_NESTED);
+	nest_parms = nla_nest_start(skb, type);
 	if (!nest_parms)
 		goto nla_put_failure;
 
@@ -468,7 +467,7 @@
 	if (!synproxy)
 		return 0;
 
-	nest_parms = nla_nest_start(skb, CTA_SYNPROXY | NLA_F_NESTED);
+	nest_parms = nla_nest_start(skb, CTA_SYNPROXY);
 	if (!nest_parms)
 		goto nla_put_failure;
 
@@ -487,7 +486,9 @@
 
 static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
 {
-	if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct)))
+	__be32 id = (__force __be32)nf_ct_get_id(ct);
+
+	if (nla_put_be32(skb, CTA_ID, id))
 		goto nla_put_failure;
 	return 0;
 
@@ -527,7 +528,7 @@
 
 	zone = nf_ct_zone(ct);
 
-	nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
+	nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG);
 	if (!nest_parms)
 		goto nla_put_failure;
 	if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
@@ -537,7 +538,7 @@
 		goto nla_put_failure;
 	nla_nest_end(skb, nest_parms);
 
-	nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
+	nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY);
 	if (!nest_parms)
 		goto nla_put_failure;
 	if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
@@ -552,10 +553,8 @@
 		goto nla_put_failure;
 
 	if (ctnetlink_dump_status(skb, ct) < 0 ||
-	    ctnetlink_dump_timeout(skb, ct) < 0 ||
 	    ctnetlink_dump_acct(skb, ct, type) < 0 ||
 	    ctnetlink_dump_timestamp(skb, ct) < 0 ||
-	    ctnetlink_dump_protoinfo(skb, ct) < 0 ||
 	    ctnetlink_dump_helpinfo(skb, ct) < 0 ||
 	    ctnetlink_dump_mark(skb, ct) < 0 ||
 	    ctnetlink_dump_secctx(skb, ct) < 0 ||
@@ -567,6 +566,11 @@
 	    ctnetlink_dump_ct_synproxy(skb, ct) < 0)
 		goto nla_put_failure;
 
+	if (!test_bit(IPS_OFFLOAD_BIT, &ct->status) &&
+	    (ctnetlink_dump_timeout(skb, ct) < 0 ||
+	     ctnetlink_dump_protoinfo(skb, ct) < 0))
+		goto nla_put_failure;
+
 	nlmsg_end(skb, nlh);
 	return skb->len;
 
@@ -592,7 +596,7 @@
 	len = nla_policy_len(cta_ip_nla_policy, CTA_IP_MAX + 1);
 	len *= 3u; /* ORIG, REPLY, MASTER */
 
-	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+	l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
 	len += l4proto->nlattr_size;
 	if (l4proto->nlattr_tuple_size) {
 		len4 = l4proto->nlattr_tuple_size();
@@ -657,7 +661,7 @@
 	       + nla_total_size(0) /* CTA_HELP */
 	       + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
 	       + ctnetlink_secctx_size(ct)
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
 	       + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
 	       + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */
 #endif
@@ -719,7 +723,7 @@
 
 	zone = nf_ct_zone(ct);
 
-	nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
+	nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG);
 	if (!nest_parms)
 		goto nla_put_failure;
 	if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
@@ -729,7 +733,7 @@
 		goto nla_put_failure;
 	nla_nest_end(skb, nest_parms);
 
-	nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
+	nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY);
 	if (!nest_parms)
 		goto nla_put_failure;
 	if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
@@ -821,6 +825,7 @@
 }
 
 struct ctnetlink_filter {
+	u8 family;
 	struct {
 		u_int32_t val;
 		u_int32_t mask;
@@ -828,31 +833,39 @@
 };
 
 static struct ctnetlink_filter *
-ctnetlink_alloc_filter(const struct nlattr * const cda[])
+ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
 {
-#ifdef CONFIG_NF_CONNTRACK_MARK
 	struct ctnetlink_filter *filter;
 
+#ifndef CONFIG_NF_CONNTRACK_MARK
+	if (cda[CTA_MARK] && cda[CTA_MARK_MASK])
+		return ERR_PTR(-EOPNOTSUPP);
+#endif
+
 	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
 	if (filter == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK]));
-	filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
+	filter->family = family;
 
-	return filter;
-#else
-	return ERR_PTR(-EOPNOTSUPP);
+#ifdef CONFIG_NF_CONNTRACK_MARK
+	if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
+		filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK]));
+		filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
+	}
 #endif
+	return filter;
 }
 
 static int ctnetlink_start(struct netlink_callback *cb)
 {
 	const struct nlattr * const *cda = cb->data;
 	struct ctnetlink_filter *filter = NULL;
+	struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	u8 family = nfmsg->nfgen_family;
 
-	if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
-		filter = ctnetlink_alloc_filter(cda);
+	if (family || (cda[CTA_MARK] && cda[CTA_MARK_MASK])) {
+		filter = ctnetlink_alloc_filter(cda, family);
 		if (IS_ERR(filter))
 			return PTR_ERR(filter);
 	}
@@ -866,13 +879,24 @@
 	struct ctnetlink_filter *filter = data;
 
 	if (filter == NULL)
-		return 1;
+		goto out;
+
+	/* Match entries of a given L3 protocol number.
+	 * If it is not specified, ie. l3proto == 0,
+	 * then match everything.
+	 */
+	if (filter->family && nf_ct_l3num(ct) != filter->family)
+		goto ignore_entry;
 
 #ifdef CONFIG_NF_CONNTRACK_MARK
-	if ((ct->mark & filter->mark.mask) == filter->mark.val)
-		return 1;
+	if ((ct->mark & filter->mark.mask) != filter->mark.val)
+		goto ignore_entry;
 #endif
 
+out:
+	return 1;
+
+ignore_entry:
 	return 0;
 }
 
@@ -883,8 +907,6 @@
 	struct nf_conn *ct, *last;
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
-	struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
-	u_int8_t l3proto = nfmsg->nfgen_family;
 	struct nf_conn *nf_ct_evict[8];
 	int res, i;
 	spinlock_t *lockp;
@@ -923,11 +945,6 @@
 			if (!net_eq(net, nf_ct_net(ct)))
 				continue;
 
-			/* Dump entries of a given L3 protocol number.
-			 * If it is not specified, ie. l3proto == 0,
-			 * then dump everything. */
-			if (l3proto && nf_ct_l3num(ct) != l3proto)
-				continue;
 			if (cb->args[1]) {
 				if (ct != last)
 					continue;
@@ -1006,12 +1023,12 @@
 	struct nlattr *tb[CTA_IP_MAX+1];
 	int ret = 0;
 
-	ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL, NULL);
+	ret = nla_parse_nested_deprecated(tb, CTA_IP_MAX, attr, NULL, NULL);
 	if (ret < 0)
 		return ret;
 
-	ret = nla_validate_nested(attr, CTA_IP_MAX,
-				  cta_ip_nla_policy, NULL);
+	ret = nla_validate_nested_deprecated(attr, CTA_IP_MAX,
+					     cta_ip_nla_policy, NULL);
 	if (ret)
 		return ret;
 
@@ -1038,8 +1055,8 @@
 	struct nlattr *tb[CTA_PROTO_MAX+1];
 	int ret = 0;
 
-	ret = nla_parse_nested(tb, CTA_PROTO_MAX, attr, proto_nla_policy,
-			       NULL);
+	ret = nla_parse_nested_deprecated(tb, CTA_PROTO_MAX, attr,
+					  proto_nla_policy, NULL);
 	if (ret < 0)
 		return ret;
 
@@ -1048,11 +1065,12 @@
 	tuple->dst.protonum = nla_get_u8(tb[CTA_PROTO_NUM]);
 
 	rcu_read_lock();
-	l4proto = __nf_ct_l4proto_find(tuple->src.l3num, tuple->dst.protonum);
+	l4proto = nf_ct_l4proto_find(tuple->dst.protonum);
 
 	if (likely(l4proto->nlattr_to_tuple)) {
-		ret = nla_validate_nested(attr, CTA_PROTO_MAX,
-					  l4proto->nla_policy, NULL);
+		ret = nla_validate_nested_deprecated(attr, CTA_PROTO_MAX,
+						     l4proto->nla_policy,
+						     NULL);
 		if (ret == 0)
 			ret = l4proto->nlattr_to_tuple(tb, tuple);
 	}
@@ -1115,8 +1133,8 @@
 
 	memset(tuple, 0, sizeof(*tuple));
 
-	err = nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy,
-			       NULL);
+	err = nla_parse_nested_deprecated(tb, CTA_TUPLE_MAX, cda[type],
+					  tuple_nla_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -1166,7 +1184,8 @@
 	int err;
 	struct nlattr *tb[CTA_HELP_MAX+1];
 
-	err = nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, CTA_HELP_MAX, attr,
+					  help_nla_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -1213,12 +1232,12 @@
 
 static int ctnetlink_flush_conntrack(struct net *net,
 				     const struct nlattr * const cda[],
-				     u32 portid, int report)
+				     u32 portid, int report, u8 family)
 {
 	struct ctnetlink_filter *filter = NULL;
 
-	if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
-		filter = ctnetlink_alloc_filter(cda);
+	if (family || (cda[CTA_MARK] && cda[CTA_MARK_MASK])) {
+		filter = ctnetlink_alloc_filter(cda, family);
 		if (IS_ERR(filter))
 			return PTR_ERR(filter);
 	}
@@ -1240,7 +1259,6 @@
 	struct nf_conntrack_tuple tuple;
 	struct nf_conn *ct;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	u_int8_t u3 = nfmsg->nfgen_family;
 	struct nf_conntrack_zone zone;
 	int err;
 
@@ -1250,14 +1268,16 @@
 
 	if (cda[CTA_TUPLE_ORIG])
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG,
-					    u3, &zone);
+					    nfmsg->nfgen_family, &zone);
 	else if (cda[CTA_TUPLE_REPLY])
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY,
-					    u3, &zone);
+					    nfmsg->nfgen_family, &zone);
 	else {
+		u_int8_t u3 = nfmsg->version ? nfmsg->nfgen_family : AF_UNSPEC;
+
 		return ctnetlink_flush_conntrack(net, cda,
 						 NETLINK_CB(skb).portid,
-						 nlmsg_report(nlh));
+						 nlmsg_report(nlh), u3);
 	}
 
 	if (err < 0)
@@ -1275,8 +1295,9 @@
 	}
 
 	if (cda[CTA_ID]) {
-		u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID]));
-		if (id != (u32)(unsigned long)ct) {
+		__be32 id = nla_get_be32(cda[CTA_ID]);
+
+		if (id != (__force __be32)nf_ct_get_id(ct)) {
 			nf_ct_put(ct);
 			return -ENOENT;
 		}
@@ -1483,7 +1504,7 @@
 	return -EOPNOTSUPP;
 }
 
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
 static int
 ctnetlink_parse_nat_setup(struct nf_conn *ct,
 			  enum nf_nat_manip_type manip,
@@ -1575,7 +1596,7 @@
 static int
 ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[])
 {
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
 	int ret;
 
 	if (!cda[CTA_NAT_DST] && !cda[CTA_NAT_SRC])
@@ -1676,6 +1697,22 @@
 	return 0;
 }
 
+#if defined(CONFIG_NF_CONNTRACK_MARK)
+static void ctnetlink_change_mark(struct nf_conn *ct,
+				    const struct nlattr * const cda[])
+{
+	u32 mark, newmark, mask = 0;
+
+	if (cda[CTA_MARK_MASK])
+		mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
+
+	mark = ntohl(nla_get_be32(cda[CTA_MARK]));
+	newmark = (ct->mark & mask) ^ mark;
+	if (newmark != ct->mark)
+		ct->mark = newmark;
+}
+#endif
+
 static const struct nla_policy protoinfo_policy[CTA_PROTOINFO_MAX+1] = {
 	[CTA_PROTOINFO_TCP]	= { .type = NLA_NESTED },
 	[CTA_PROTOINFO_DCCP]	= { .type = NLA_NESTED },
@@ -1690,16 +1727,14 @@
 	struct nlattr *tb[CTA_PROTOINFO_MAX+1];
 	int err = 0;
 
-	err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy,
-			       NULL);
+	err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_MAX, attr,
+					  protoinfo_policy, NULL);
 	if (err < 0)
 		return err;
 
-	rcu_read_lock();
-	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+	l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
 	if (l4proto->from_nlattr)
 		err = l4proto->from_nlattr(tb, ct);
-	rcu_read_unlock();
 
 	return err;
 }
@@ -1716,7 +1751,8 @@
 	int err;
 	struct nlattr *cda[CTA_SEQADJ_MAX+1];
 
-	err = nla_parse_nested(cda, CTA_SEQADJ_MAX, attr, seqadj_policy, NULL);
+	err = nla_parse_nested_deprecated(cda, CTA_SEQADJ_MAX, attr,
+					  seqadj_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -1793,8 +1829,9 @@
 	if (!synproxy)
 		return 0;
 
-	err = nla_parse_nested(tb, CTA_SYNPROXY_MAX, cda[CTA_SYNPROXY],
-			       synproxy_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, CTA_SYNPROXY_MAX,
+					  cda[CTA_SYNPROXY], synproxy_policy,
+					  NULL);
 	if (err < 0)
 		return err;
 
@@ -1871,7 +1908,7 @@
 
 #if defined(CONFIG_NF_CONNTRACK_MARK)
 	if (cda[CTA_MARK])
-		ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
+		ctnetlink_change_mark(ct, cda);
 #endif
 
 	if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) {
@@ -2015,7 +2052,7 @@
 
 #if defined(CONFIG_NF_CONNTRACK_MARK)
 	if (cda[CTA_MARK])
-		ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
+		ctnetlink_change_mark(ct, cda);
 #endif
 
 	/* setup master conntrack: this is a confirmed expectation */
@@ -2344,7 +2381,7 @@
 	       + nla_total_size(0) /* CTA_HELP */
 	       + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
 	       + ctnetlink_secctx_size(ct)
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
 	       + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
 	       + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */
 #endif
@@ -2371,7 +2408,7 @@
 
 	zone = nf_ct_zone(ct);
 
-	nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
+	nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG);
 	if (!nest_parms)
 		goto nla_put_failure;
 	if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
@@ -2381,7 +2418,7 @@
 		goto nla_put_failure;
 	nla_nest_end(skb, nest_parms);
 
-	nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
+	nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY);
 	if (!nest_parms)
 		goto nla_put_failure;
 	if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
@@ -2443,7 +2480,7 @@
 {
 	struct nlattr *nest_parms;
 
-	nest_parms = nla_nest_start(skb, ct_attr | NLA_F_NESTED);
+	nest_parms = nla_nest_start(skb, ct_attr);
 	if (!nest_parms)
 		goto nla_put_failure;
 
@@ -2512,14 +2549,7 @@
 	}
 #if defined(CONFIG_NF_CONNTRACK_MARK)
 	if (cda[CTA_MARK]) {
-		u32 mask = 0, mark, newmark;
-		if (cda[CTA_MARK_MASK])
-			mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
-
-		mark = ntohl(nla_get_be32(cda[CTA_MARK]));
-		newmark = (ct->mark & mask) ^ mark;
-		if (newmark != ct->mark)
-			ct->mark = newmark;
+		ctnetlink_change_mark(ct, cda);
 	}
 #endif
 	return 0;
@@ -2531,7 +2561,8 @@
 	struct nlattr *cda[CTA_MAX+1];
 	int ret;
 
-	ret = nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy, NULL);
+	ret = nla_parse_nested_deprecated(cda, CTA_MAX, attr, ct_nla_policy,
+					  NULL);
 	if (ret < 0)
 		return ret;
 
@@ -2564,8 +2595,8 @@
 	struct nf_conntrack_expect *exp;
 	int err;
 
-	err = nla_parse_nested(cda, CTA_EXPECT_MAX, attr, exp_nla_policy,
-			       NULL);
+	err = nla_parse_nested_deprecated(cda, CTA_EXPECT_MAX, attr,
+					  exp_nla_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -2588,7 +2619,7 @@
 	if (IS_ERR(exp))
 		return PTR_ERR(exp);
 
-	err = nf_ct_expect_related_report(exp, portid, report);
+	err = nf_ct_expect_related_report(exp, portid, report, 0);
 	nf_ct_expect_put(exp);
 	return err;
 }
@@ -2622,7 +2653,7 @@
 {
 	struct nlattr *nest_parms;
 
-	nest_parms = nla_nest_start(skb, type | NLA_F_NESTED);
+	nest_parms = nla_nest_start(skb, type);
 	if (!nest_parms)
 		goto nla_put_failure;
 	if (ctnetlink_dump_tuples(skb, tuple) < 0)
@@ -2649,16 +2680,15 @@
 	m.src.u.all = mask->src.u.all;
 	m.dst.protonum = tuple->dst.protonum;
 
-	nest_parms = nla_nest_start(skb, CTA_EXPECT_MASK | NLA_F_NESTED);
+	nest_parms = nla_nest_start(skb, CTA_EXPECT_MASK);
 	if (!nest_parms)
 		goto nla_put_failure;
 
 	rcu_read_lock();
 	ret = ctnetlink_dump_tuples_ip(skb, &m);
 	if (ret >= 0) {
-		l4proto = __nf_ct_l4proto_find(tuple->src.l3num,
-					       tuple->dst.protonum);
-	ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto);
+		l4proto = nf_ct_l4proto_find(tuple->dst.protonum);
+		ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto);
 	}
 	rcu_read_unlock();
 
@@ -2675,6 +2705,25 @@
 
 static const union nf_inet_addr any_addr;
 
+static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp)
+{
+	static __read_mostly siphash_key_t exp_id_seed;
+	unsigned long a, b, c, d;
+
+	net_get_random_once(&exp_id_seed, sizeof(exp_id_seed));
+
+	a = (unsigned long)exp;
+	b = (unsigned long)exp->helper;
+	c = (unsigned long)exp->master;
+	d = (unsigned long)siphash(&exp->tuple, sizeof(exp->tuple), &exp_id_seed);
+
+#ifdef CONFIG_64BIT
+	return (__force __be32)siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &exp_id_seed);
+#else
+	return (__force __be32)siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &exp_id_seed);
+#endif
+}
+
 static int
 ctnetlink_exp_dump_expect(struct sk_buff *skb,
 			  const struct nf_conntrack_expect *exp)
@@ -2682,7 +2731,7 @@
 	struct nf_conn *master = exp->master;
 	long timeout = ((long)exp->timeout.expires - (long)jiffies) / HZ;
 	struct nf_conn_help *help;
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
 	struct nlattr *nest_parms;
 	struct nf_conntrack_tuple nat_tuple = {};
 #endif
@@ -2700,10 +2749,10 @@
 				 CTA_EXPECT_MASTER) < 0)
 		goto nla_put_failure;
 
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
 	if (!nf_inet_addr_cmp(&exp->saved_addr, &any_addr) ||
 	    exp->saved_proto.all) {
-		nest_parms = nla_nest_start(skb, CTA_EXPECT_NAT | NLA_F_NESTED);
+		nest_parms = nla_nest_start(skb, CTA_EXPECT_NAT);
 		if (!nest_parms)
 			goto nla_put_failure;
 
@@ -2722,7 +2771,7 @@
 	}
 #endif
 	if (nla_put_be32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)) ||
-	    nla_put_be32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)) ||
+	    nla_put_be32(skb, CTA_EXPECT_ID, nf_expect_get_id(exp)) ||
 	    nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) ||
 	    nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class)))
 		goto nla_put_failure;
@@ -3027,7 +3076,8 @@
 
 	if (cda[CTA_EXPECT_ID]) {
 		__be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
-		if (ntohl(id) != (u32)(unsigned long)exp) {
+
+		if (id != nf_expect_get_id(exp)) {
 			nf_ct_expect_put(exp);
 			return -ENOENT;
 		}
@@ -3163,13 +3213,13 @@
 			   struct nf_conntrack_expect *exp,
 			   u_int8_t u3)
 {
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
 	struct nlattr *tb[CTA_EXPECT_NAT_MAX+1];
 	struct nf_conntrack_tuple nat_tuple = {};
 	int err;
 
-	err = nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr,
-			       exp_nat_nla_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, CTA_EXPECT_NAT_MAX, attr,
+					  exp_nat_nla_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -3320,7 +3370,7 @@
 		goto err_rcu;
 	}
 
-	err = nf_ct_expect_related_report(exp, portid, report);
+	err = nf_ct_expect_related_report(exp, portid, report, 0);
 	nf_ct_expect_put(exp);
 err_rcu:
 	rcu_read_unlock();

diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 11562f2..a971183 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Connection tracking support for PPTP (Point to Point Tunneling Protocol).
  * PPTP is a a protocol for creating virtual private networks.
@@ -121,7 +122,7 @@
 		struct nf_conntrack_expect *exp_other;
 
 		/* obviously this tuple inversion only works until you do NAT */
-		nf_ct_invert_tuplepr(&inv_t, &exp->tuple);
+		nf_ct_invert_tuple(&inv_t, &exp->tuple);
 		pr_debug("trying to unexpect other dir: ");
 		nf_ct_dump_tuple(&inv_t);
 
@@ -233,9 +234,9 @@
 	nf_nat_pptp_exp_gre = rcu_dereference(nf_nat_pptp_hook_exp_gre);
 	if (nf_nat_pptp_exp_gre && ct->status & IPS_NAT_MASK)
 		nf_nat_pptp_exp_gre(exp_orig, exp_reply);
-	if (nf_ct_expect_related(exp_orig) != 0)
+	if (nf_ct_expect_related(exp_orig, 0) != 0)
 		goto out_put_both;
-	if (nf_ct_expect_related(exp_reply) != 0)
+	if (nf_ct_expect_related(exp_reply, 0) != 0)
 		goto out_unexpect_orig;
 
 	/* Add GRE keymap entries */

diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 51c5d7e..a0560d1 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c

@@ -16,6 +16,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_bridge.h>
 #include <net/netfilter/nf_log.h>
 
 #include <linux/ip.h>
@@ -43,40 +44,9 @@
 
 extern unsigned int nf_conntrack_net_id;
 
-static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly;
-
 static DEFINE_MUTEX(nf_ct_proto_mutex);
 
 #ifdef CONFIG_SYSCTL
-static int
-nf_ct_register_sysctl(struct net *net,
-		      struct ctl_table_header **header,
-		      const char *path,
-		      struct ctl_table *table)
-{
-	if (*header == NULL) {
-		*header = register_net_sysctl(net, path, table);
-		if (*header == NULL)
-			return -ENOMEM;
-	}
-
-	return 0;
-}
-
-static void
-nf_ct_unregister_sysctl(struct ctl_table_header **header,
-			struct ctl_table **table,
-			unsigned int users)
-{
-	if (users > 0)
-		return;
-
-	unregister_net_sysctl_table(*header);
-	kfree(*table);
-	*header = NULL;
-	*table = NULL;
-}
-
 __printf(5, 6)
 void nf_l4proto_log_invalid(const struct sk_buff *skb,
 			    struct net *net,
@@ -86,7 +56,7 @@
 	struct va_format vaf;
 	va_list args;
 
-	if (net->ct.sysctl_log_invalid != protonum ||
+	if (net->ct.sysctl_log_invalid != protonum &&
 	    net->ct.sysctl_log_invalid != IPPROTO_RAW)
 		return;
 
@@ -124,338 +94,88 @@
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid);
 #endif
 
-const struct nf_conntrack_l4proto *
-__nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
+const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto)
 {
-	if (unlikely(l3proto >= NFPROTO_NUMPROTO || nf_ct_protos[l3proto] == NULL))
-		return &nf_conntrack_l4proto_generic;
-
-	return rcu_dereference(nf_ct_protos[l3proto][l4proto]);
-}
-EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
-
-const struct nf_conntrack_l4proto *
-nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
-{
-	const struct nf_conntrack_l4proto *p;
-
-	rcu_read_lock();
-	p = __nf_ct_l4proto_find(l3num, l4num);
-	if (!try_module_get(p->me))
-		p = &nf_conntrack_l4proto_generic;
-	rcu_read_unlock();
-
-	return p;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get);
-
-void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p)
-{
-	module_put(p->me);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_put);
-
-static int kill_l4proto(struct nf_conn *i, void *data)
-{
-	const struct nf_conntrack_l4proto *l4proto;
-	l4proto = data;
-	return nf_ct_protonum(i) == l4proto->l4proto &&
-	       nf_ct_l3num(i) == l4proto->l3proto;
-}
-
-static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
-				const struct nf_conntrack_l4proto *l4proto)
-{
-	if (l4proto->get_net_proto) {
-		/* statically built-in protocols use static per-net */
-		return l4proto->get_net_proto(net);
-	} else if (l4proto->net_id) {
-		/* ... and loadable protocols use dynamic per-net */
-		return net_generic(net, *l4proto->net_id);
-	}
-	return NULL;
-}
-
-static
-int nf_ct_l4proto_register_sysctl(struct net *net,
-				  struct nf_proto_net *pn,
-				  const struct nf_conntrack_l4proto *l4proto)
-{
-	int err = 0;
-
-#ifdef CONFIG_SYSCTL
-	if (pn->ctl_table != NULL) {
-		err = nf_ct_register_sysctl(net,
-					    &pn->ctl_table_header,
-					    "net/netfilter",
-					    pn->ctl_table);
-		if (err < 0) {
-			if (!pn->users) {
-				kfree(pn->ctl_table);
-				pn->ctl_table = NULL;
-			}
-		}
-	}
-#endif /* CONFIG_SYSCTL */
-	return err;
-}
-
-static
-void nf_ct_l4proto_unregister_sysctl(struct net *net,
-				struct nf_proto_net *pn,
-				const struct nf_conntrack_l4proto *l4proto)
-{
-#ifdef CONFIG_SYSCTL
-	if (pn->ctl_table_header != NULL)
-		nf_ct_unregister_sysctl(&pn->ctl_table_header,
-					&pn->ctl_table,
-					pn->users);
-#endif /* CONFIG_SYSCTL */
-}
-
-/* FIXME: Allow NULL functions and sub in pointers to generic for
-   them. --RR */
-int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *l4proto)
-{
-	int ret = 0;
-
-	if (l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos))
-		return -EBUSY;
-
-	if ((l4proto->to_nlattr && l4proto->nlattr_size == 0) ||
-	    (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size))
-		return -EINVAL;
-
-	mutex_lock(&nf_ct_proto_mutex);
-	if (!nf_ct_protos[l4proto->l3proto]) {
-		/* l3proto may be loaded latter. */
-		struct nf_conntrack_l4proto __rcu **proto_array;
-		int i;
-
-		proto_array =
-			kmalloc_array(MAX_NF_CT_PROTO,
-				      sizeof(struct nf_conntrack_l4proto *),
-				      GFP_KERNEL);
-		if (proto_array == NULL) {
-			ret = -ENOMEM;
-			goto out_unlock;
-		}
-
-		for (i = 0; i < MAX_NF_CT_PROTO; i++)
-			RCU_INIT_POINTER(proto_array[i],
-					 &nf_conntrack_l4proto_generic);
-
-		/* Before making proto_array visible to lockless readers,
-		 * we must make sure its content is committed to memory.
-		 */
-		smp_wmb();
-
-		nf_ct_protos[l4proto->l3proto] = proto_array;
-	} else if (rcu_dereference_protected(
-			nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
-			lockdep_is_held(&nf_ct_proto_mutex)
-			) != &nf_conntrack_l4proto_generic) {
-		ret = -EBUSY;
-		goto out_unlock;
+	switch (l4proto) {
+	case IPPROTO_UDP: return &nf_conntrack_l4proto_udp;
+	case IPPROTO_TCP: return &nf_conntrack_l4proto_tcp;
+	case IPPROTO_ICMP: return &nf_conntrack_l4proto_icmp;
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	case IPPROTO_DCCP: return &nf_conntrack_l4proto_dccp;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	case IPPROTO_SCTP: return &nf_conntrack_l4proto_sctp;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+	case IPPROTO_UDPLITE: return &nf_conntrack_l4proto_udplite;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	case IPPROTO_GRE: return &nf_conntrack_l4proto_gre;
+#endif
+#if IS_ENABLED(CONFIG_IPV6)
+	case IPPROTO_ICMPV6: return &nf_conntrack_l4proto_icmpv6;
+#endif /* CONFIG_IPV6 */
 	}
 
-	rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
-			   l4proto);
-out_unlock:
-	mutex_unlock(&nf_ct_proto_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one);
+	return &nf_conntrack_l4proto_generic;
+};
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_find);
 
-int nf_ct_l4proto_pernet_register_one(struct net *net,
-				const struct nf_conntrack_l4proto *l4proto)
+unsigned int nf_confirm(struct sk_buff *skb, unsigned int protoff,
+			struct nf_conn *ct, enum ip_conntrack_info ctinfo)
 {
-	int ret = 0;
-	struct nf_proto_net *pn = NULL;
-
-	if (l4proto->init_net) {
-		ret = l4proto->init_net(net, l4proto->l3proto);
-		if (ret < 0)
-			goto out;
-	}
-
-	pn = nf_ct_l4proto_net(net, l4proto);
-	if (pn == NULL)
-		goto out;
-
-	ret = nf_ct_l4proto_register_sysctl(net, pn, l4proto);
-	if (ret < 0)
-		goto out;
-
-	pn->users++;
-out:
-	return ret;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one);
-
-static void __nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
-
-{
-	BUG_ON(l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos));
-
-	BUG_ON(rcu_dereference_protected(
-			nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
-			lockdep_is_held(&nf_ct_proto_mutex)
-			) != l4proto);
-	rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
-			   &nf_conntrack_l4proto_generic);
-}
-
-void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
-{
-	mutex_lock(&nf_ct_proto_mutex);
-	__nf_ct_l4proto_unregister_one(l4proto);
-	mutex_unlock(&nf_ct_proto_mutex);
-
-	synchronize_net();
-	/* Remove all contrack entries for this protocol */
-	nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one);
-
-void nf_ct_l4proto_pernet_unregister_one(struct net *net,
-				const struct nf_conntrack_l4proto *l4proto)
-{
-	struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto);
-
-	if (pn == NULL)
-		return;
-
-	pn->users--;
-	nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one);
-
-static void
-nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],
-			 unsigned int num_proto)
-{
-	int i;
-
-	mutex_lock(&nf_ct_proto_mutex);
-	for (i = 0; i < num_proto; i++)
-		__nf_ct_l4proto_unregister_one(l4proto[i]);
-	mutex_unlock(&nf_ct_proto_mutex);
-
-	synchronize_net();
-
-	for (i = 0; i < num_proto; i++)
-		nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto[i]);
-}
-
-static int
-nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
-		       unsigned int num_proto)
-{
-	int ret = -EINVAL, ver;
-	unsigned int i;
-
-	for (i = 0; i < num_proto; i++) {
-		ret = nf_ct_l4proto_register_one(l4proto[i]);
-		if (ret < 0)
-			break;
-	}
-	if (i != num_proto) {
-		ver = l4proto[i]->l3proto == PF_INET6 ? 6 : 4;
-		pr_err("nf_conntrack_ipv%d: can't register l4 %d proto.\n",
-		       ver, l4proto[i]->l4proto);
-		nf_ct_l4proto_unregister(l4proto, i);
-	}
-	return ret;
-}
-
-int nf_ct_l4proto_pernet_register(struct net *net,
-				  const struct nf_conntrack_l4proto *const l4proto[],
-				  unsigned int num_proto)
-{
-	int ret = -EINVAL;
-	unsigned int i;
-
-	for (i = 0; i < num_proto; i++) {
-		ret = nf_ct_l4proto_pernet_register_one(net, l4proto[i]);
-		if (ret < 0)
-			break;
-	}
-	if (i != num_proto) {
-		pr_err("nf_conntrack_proto_%d %d: pernet registration failed\n",
-		       l4proto[i]->l4proto,
-		       l4proto[i]->l3proto == PF_INET6 ? 6 : 4);
-		nf_ct_l4proto_pernet_unregister(net, l4proto, i);
-	}
-	return ret;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
-
-void nf_ct_l4proto_pernet_unregister(struct net *net,
-				const struct nf_conntrack_l4proto *const l4proto[],
-				unsigned int num_proto)
-{
-	while (num_proto-- != 0)
-		nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
-
-static unsigned int ipv4_helper(void *priv,
-				struct sk_buff *skb,
-				const struct nf_hook_state *state)
-{
-	struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
 	const struct nf_conn_help *help;
-	const struct nf_conntrack_helper *helper;
-
-	/* This is where we call the helper: as the packet goes out. */
-	ct = nf_ct_get(skb, &ctinfo);
-	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
-		return NF_ACCEPT;
 
 	help = nfct_help(ct);
-	if (!help)
-		return NF_ACCEPT;
+	if (help) {
+		const struct nf_conntrack_helper *helper;
+		int ret;
 
-	/* rcu_read_lock()ed by nf_hook_thresh */
-	helper = rcu_dereference(help->helper);
-	if (!helper)
-		return NF_ACCEPT;
+		/* rcu_read_lock()ed by nf_hook_thresh */
+		helper = rcu_dereference(help->helper);
+		if (helper) {
+			ret = helper->help(skb,
+					   protoff,
+					   ct, ctinfo);
+			if (ret != NF_ACCEPT)
+				return ret;
+		}
+	}
 
-	return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
-			    ct, ctinfo);
+	if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+	    !nf_is_loopback_packet(skb)) {
+		if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
+			NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
+			return NF_DROP;
+		}
+	}
+
+	/* We've seen it coming out the other side: confirm it */
+	return nf_conntrack_confirm(skb);
 }
+EXPORT_SYMBOL_GPL(nf_confirm);
 
 static unsigned int ipv4_confirm(void *priv,
 				 struct sk_buff *skb,
 				 const struct nf_hook_state *state)
 {
-	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
 
 	ct = nf_ct_get(skb, &ctinfo);
 	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
-		goto out;
+		return nf_conntrack_confirm(skb);
 
-	/* adjust seqs for loopback traffic only in outgoing direction */
-	if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
-	    !nf_is_loopback_packet(skb)) {
-		if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
-			NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
-			return NF_DROP;
-		}
-	}
-out:
-	/* We've seen it coming out the other side: confirm it */
-	return nf_conntrack_confirm(skb);
+	return nf_confirm(skb,
+			  skb_network_offset(skb) + ip_hdrlen(skb),
+			  ct, ctinfo);
 }
 
 static unsigned int ipv4_conntrack_in(void *priv,
 				      struct sk_buff *skb,
 				      const struct nf_hook_state *state)
 {
-	return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
+	return nf_conntrack_in(skb, state);
 }
 
 static unsigned int ipv4_conntrack_local(void *priv,
@@ -477,7 +197,7 @@
 		return NF_ACCEPT;
 	}
 
-	return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
+	return nf_conntrack_in(skb, state);
 }
 
 /* Connection tracking may drop packets, but never alters them, so
@@ -497,24 +217,12 @@
 		.priority	= NF_IP_PRI_CONNTRACK,
 	},
 	{
-		.hook		= ipv4_helper,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_POST_ROUTING,
-		.priority	= NF_IP_PRI_CONNTRACK_HELPER,
-	},
-	{
 		.hook		= ipv4_confirm,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
 	},
 	{
-		.hook		= ipv4_helper,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_LOCAL_IN,
-		.priority	= NF_IP_PRI_CONNTRACK_HELPER,
-	},
-	{
 		.hook		= ipv4_confirm,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_IN,
@@ -659,81 +367,35 @@
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	unsigned char pnum = ipv6_hdr(skb)->nexthdr;
-	int protoff;
 	__be16 frag_off;
+	int protoff;
 
 	ct = nf_ct_get(skb, &ctinfo);
 	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
-		goto out;
+		return nf_conntrack_confirm(skb);
 
 	protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
 				   &frag_off);
 	if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
 		pr_debug("proto header not found\n");
-		goto out;
+		return nf_conntrack_confirm(skb);
 	}
 
-	/* adjust seqs for loopback traffic only in outgoing direction */
-	if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
-	    !nf_is_loopback_packet(skb)) {
-		if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
-			NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
-			return NF_DROP;
-		}
-	}
-out:
-	/* We've seen it coming out the other side: confirm it */
-	return nf_conntrack_confirm(skb);
+	return nf_confirm(skb, protoff, ct, ctinfo);
 }
 
 static unsigned int ipv6_conntrack_in(void *priv,
 				      struct sk_buff *skb,
 				      const struct nf_hook_state *state)
 {
-	return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
+	return nf_conntrack_in(skb, state);
 }
 
 static unsigned int ipv6_conntrack_local(void *priv,
 					 struct sk_buff *skb,
 					 const struct nf_hook_state *state)
 {
-	return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
-}
-
-static unsigned int ipv6_helper(void *priv,
-				struct sk_buff *skb,
-				const struct nf_hook_state *state)
-{
-	struct nf_conn *ct;
-	const struct nf_conn_help *help;
-	const struct nf_conntrack_helper *helper;
-	enum ip_conntrack_info ctinfo;
-	__be16 frag_off;
-	int protoff;
-	u8 nexthdr;
-
-	/* This is where we call the helper: as the packet goes out. */
-	ct = nf_ct_get(skb, &ctinfo);
-	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
-		return NF_ACCEPT;
-
-	help = nfct_help(ct);
-	if (!help)
-		return NF_ACCEPT;
-	/* rcu_read_lock()ed by nf_hook_thresh */
-	helper = rcu_dereference(help->helper);
-	if (!helper)
-		return NF_ACCEPT;
-
-	nexthdr = ipv6_hdr(skb)->nexthdr;
-	protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
-				   &frag_off);
-	if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
-		pr_debug("proto header not found\n");
-		return NF_ACCEPT;
-	}
-
-	return helper->help(skb, protoff, ct, ctinfo);
+	return nf_conntrack_in(skb, state);
 }
 
 static const struct nf_hook_ops ipv6_conntrack_ops[] = {
@@ -750,24 +412,12 @@
 		.priority	= NF_IP6_PRI_CONNTRACK,
 	},
 	{
-		.hook		= ipv6_helper,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_POST_ROUTING,
-		.priority	= NF_IP6_PRI_CONNTRACK_HELPER,
-	},
-	{
 		.hook		= ipv6_confirm,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP6_PRI_LAST,
 	},
 	{
-		.hook		= ipv6_helper,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_LOCAL_IN,
-		.priority	= NF_IP6_PRI_CONNTRACK_HELPER,
-	},
-	{
 		.hook		= ipv6_confirm,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_IN,
@@ -792,12 +442,14 @@
 	return 0;
 }
 
+static struct nf_ct_bridge_info *nf_ct_bridge_info;
+
 static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
 {
 	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
-	bool fixup_needed = false;
+	bool fixup_needed = false, retry = true;
 	int err = 0;
-
+retry:
 	mutex_lock(&nf_ct_proto_mutex);
 
 	switch (nfproto) {
@@ -837,6 +489,32 @@
 			fixup_needed = true;
 		break;
 #endif
+	case NFPROTO_BRIDGE:
+		if (!nf_ct_bridge_info) {
+			if (!retry) {
+				err = -EPROTO;
+				goto out_unlock;
+			}
+			mutex_unlock(&nf_ct_proto_mutex);
+			request_module("nf_conntrack_bridge");
+			retry = false;
+			goto retry;
+		}
+		if (!try_module_get(nf_ct_bridge_info->me)) {
+			err = -EPROTO;
+			goto out_unlock;
+		}
+		cnet->users_bridge++;
+		if (cnet->users_bridge > 1)
+			goto out_unlock;
+
+		err = nf_register_net_hooks(net, nf_ct_bridge_info->ops,
+					    nf_ct_bridge_info->ops_size);
+		if (err)
+			cnet->users_bridge = 0;
+		else
+			fixup_needed = true;
+		break;
 	default:
 		err = -EPROTO;
 		break;
@@ -869,79 +547,102 @@
 						ARRAY_SIZE(ipv6_conntrack_ops));
 		break;
 #endif
-	}
+	case NFPROTO_BRIDGE:
+		if (!nf_ct_bridge_info)
+			break;
+		if (cnet->users_bridge && (--cnet->users_bridge == 0))
+			nf_unregister_net_hooks(net, nf_ct_bridge_info->ops,
+						nf_ct_bridge_info->ops_size);
 
+		module_put(nf_ct_bridge_info->me);
+		break;
+	}
 	mutex_unlock(&nf_ct_proto_mutex);
 }
 
+static int nf_ct_netns_inet_get(struct net *net)
+{
+	int err;
+
+	err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
+	if (err < 0)
+		goto err1;
+	err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
+	if (err < 0)
+		goto err2;
+
+	return err;
+err2:
+	nf_ct_netns_put(net, NFPROTO_IPV4);
+err1:
+	return err;
+}
+
 int nf_ct_netns_get(struct net *net, u8 nfproto)
 {
 	int err;
 
-	if (nfproto == NFPROTO_INET) {
-		err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
+	switch (nfproto) {
+	case NFPROTO_INET:
+		err = nf_ct_netns_inet_get(net);
+		break;
+	case NFPROTO_BRIDGE:
+		err = nf_ct_netns_do_get(net, NFPROTO_BRIDGE);
 		if (err < 0)
-			goto err1;
-		err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
-		if (err < 0)
-			goto err2;
-	} else {
-		err = nf_ct_netns_do_get(net, nfproto);
-		if (err < 0)
-			goto err1;
-	}
-	return 0;
+			return err;
 
-err2:
-	nf_ct_netns_put(net, NFPROTO_IPV4);
-err1:
+		err = nf_ct_netns_inet_get(net);
+		if (err < 0) {
+			nf_ct_netns_put(net, NFPROTO_BRIDGE);
+			return err;
+		}
+		break;
+	default:
+		err = nf_ct_netns_do_get(net, nfproto);
+		break;
+	}
 	return err;
 }
 EXPORT_SYMBOL_GPL(nf_ct_netns_get);
 
 void nf_ct_netns_put(struct net *net, uint8_t nfproto)
 {
-	if (nfproto == NFPROTO_INET) {
+	switch (nfproto) {
+	case NFPROTO_BRIDGE:
+		nf_ct_netns_do_put(net, NFPROTO_BRIDGE);
+		/* fall through */
+	case NFPROTO_INET:
 		nf_ct_netns_do_put(net, NFPROTO_IPV4);
 		nf_ct_netns_do_put(net, NFPROTO_IPV6);
-	} else {
+		break;
+	default:
 		nf_ct_netns_do_put(net, nfproto);
+		break;
 	}
 }
 EXPORT_SYMBOL_GPL(nf_ct_netns_put);
 
-static const struct nf_conntrack_l4proto * const builtin_l4proto[] = {
-	&nf_conntrack_l4proto_tcp4,
-	&nf_conntrack_l4proto_udp4,
-	&nf_conntrack_l4proto_icmp,
-#ifdef CONFIG_NF_CT_PROTO_DCCP
-	&nf_conntrack_l4proto_dccp4,
-#endif
-#ifdef CONFIG_NF_CT_PROTO_SCTP
-	&nf_conntrack_l4proto_sctp4,
-#endif
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-	&nf_conntrack_l4proto_udplite4,
-#endif
-#if IS_ENABLED(CONFIG_IPV6)
-	&nf_conntrack_l4proto_tcp6,
-	&nf_conntrack_l4proto_udp6,
-	&nf_conntrack_l4proto_icmpv6,
-#ifdef CONFIG_NF_CT_PROTO_DCCP
-	&nf_conntrack_l4proto_dccp6,
-#endif
-#ifdef CONFIG_NF_CT_PROTO_SCTP
-	&nf_conntrack_l4proto_sctp6,
-#endif
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-	&nf_conntrack_l4proto_udplite6,
-#endif
-#endif /* CONFIG_IPV6 */
-};
+void nf_ct_bridge_register(struct nf_ct_bridge_info *info)
+{
+	WARN_ON(nf_ct_bridge_info);
+	mutex_lock(&nf_ct_proto_mutex);
+	nf_ct_bridge_info = info;
+	mutex_unlock(&nf_ct_proto_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_ct_bridge_register);
+
+void nf_ct_bridge_unregister(struct nf_ct_bridge_info *info)
+{
+	WARN_ON(!nf_ct_bridge_info);
+	mutex_lock(&nf_ct_proto_mutex);
+	nf_ct_bridge_info = NULL;
+	mutex_unlock(&nf_ct_proto_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_ct_bridge_unregister);
 
 int nf_conntrack_proto_init(void)
 {
-	int ret = 0;
+	int ret;
 
 	ret = nf_register_sockopt(&so_getorigdst);
 	if (ret < 0)
@@ -952,14 +653,9 @@
 	if (ret < 0)
 		goto cleanup_sockopt;
 #endif
-	ret = nf_ct_l4proto_register(builtin_l4proto,
-				     ARRAY_SIZE(builtin_l4proto));
-	if (ret < 0)
-		goto cleanup_sockopt2;
 
 	return ret;
-cleanup_sockopt2:
-	nf_unregister_sockopt(&so_getorigdst);
+
 #if IS_ENABLED(CONFIG_IPV6)
 cleanup_sockopt:
 	nf_unregister_sockopt(&so_getorigdst6);
@@ -969,61 +665,39 @@
 
 void nf_conntrack_proto_fini(void)
 {
-	unsigned int i;
-
 	nf_unregister_sockopt(&so_getorigdst);
 #if IS_ENABLED(CONFIG_IPV6)
 	nf_unregister_sockopt(&so_getorigdst6);
 #endif
-	/* No need to call nf_ct_l4proto_unregister(), the register
-	 * tables are free'd here anyway.
-	 */
-	for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
-		kfree(nf_ct_protos[i]);
 }
 
-int nf_conntrack_proto_pernet_init(struct net *net)
+void nf_conntrack_proto_pernet_init(struct net *net)
 {
-	int err;
-	struct nf_proto_net *pn = nf_ct_l4proto_net(net,
-					&nf_conntrack_l4proto_generic);
-
-	err = nf_conntrack_l4proto_generic.init_net(net,
-					nf_conntrack_l4proto_generic.l3proto);
-	if (err < 0)
-		return err;
-	err = nf_ct_l4proto_register_sysctl(net,
-					    pn,
-					    &nf_conntrack_l4proto_generic);
-	if (err < 0)
-		return err;
-
-	err = nf_ct_l4proto_pernet_register(net, builtin_l4proto,
-					    ARRAY_SIZE(builtin_l4proto));
-	if (err < 0) {
-		nf_ct_l4proto_unregister_sysctl(net, pn,
-						&nf_conntrack_l4proto_generic);
-		return err;
-	}
-
-	pn->users++;
-	return 0;
+	nf_conntrack_generic_init_net(net);
+	nf_conntrack_udp_init_net(net);
+	nf_conntrack_tcp_init_net(net);
+	nf_conntrack_icmp_init_net(net);
+#if IS_ENABLED(CONFIG_IPV6)
+	nf_conntrack_icmpv6_init_net(net);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	nf_conntrack_dccp_init_net(net);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	nf_conntrack_sctp_init_net(net);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	nf_conntrack_gre_init_net(net);
+#endif
 }
 
 void nf_conntrack_proto_pernet_fini(struct net *net)
 {
-	struct nf_proto_net *pn = nf_ct_l4proto_net(net,
-					&nf_conntrack_l4proto_generic);
-
-	nf_ct_l4proto_pernet_unregister(net, builtin_l4proto,
-					ARRAY_SIZE(builtin_l4proto));
-	pn->users--;
-	nf_ct_l4proto_unregister_sysctl(net,
-					pn,
-					&nf_conntrack_l4proto_generic);
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	nf_ct_gre_keymap_flush(net);
+#endif
 }
 
-
 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
 		  &nf_conntrack_htable_size, 0600);
 

diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index f3f91ed..b6b14db 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * DCCP connection tracking protocol helper
  *
  * Copyright (c) 2005, 2006, 2008 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -384,27 +380,19 @@
 	},
 };
 
-static inline struct nf_dccp_net *dccp_pernet(struct net *net)
-{
-	return &net->ct.nf_ct_proto.dccp;
-}
-
-static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
-		     unsigned int dataoff)
+static noinline bool
+dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
+	 const struct dccp_hdr *dh)
 {
 	struct net *net = nf_ct_net(ct);
 	struct nf_dccp_net *dn;
-	struct dccp_hdr _dh, *dh;
 	const char *msg;
 	u_int8_t state;
 
-	dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
-	BUG_ON(dh == NULL);
-
 	state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE];
 	switch (state) {
 	default:
-		dn = dccp_pernet(net);
+		dn = nf_dccp_pernet(net);
 		if (dn->dccp_loose == 0) {
 			msg = "not picking up existing connection ";
 			goto out_invalid;
@@ -438,8 +426,52 @@
 		     ntohl(dhack->dccph_ack_nr_low);
 }
 
-static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
-		       unsigned int dataoff, enum ip_conntrack_info ctinfo)
+static bool dccp_error(const struct dccp_hdr *dh,
+		       struct sk_buff *skb, unsigned int dataoff,
+		       const struct nf_hook_state *state)
+{
+	unsigned int dccp_len = skb->len - dataoff;
+	unsigned int cscov;
+	const char *msg;
+
+	if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) ||
+	    dh->dccph_doff * 4 > dccp_len) {
+		msg = "nf_ct_dccp: truncated/malformed packet ";
+		goto out_invalid;
+	}
+
+	cscov = dccp_len;
+	if (dh->dccph_cscov) {
+		cscov = (dh->dccph_cscov - 1) * 4;
+		if (cscov > dccp_len) {
+			msg = "nf_ct_dccp: bad checksum coverage ";
+			goto out_invalid;
+		}
+	}
+
+	if (state->hook == NF_INET_PRE_ROUTING &&
+	    state->net->ct.sysctl_checksum &&
+	    nf_checksum_partial(skb, state->hook, dataoff, cscov,
+				IPPROTO_DCCP, state->pf)) {
+		msg = "nf_ct_dccp: bad checksum ";
+		goto out_invalid;
+	}
+
+	if (dh->dccph_type >= DCCP_PKT_INVALID) {
+		msg = "nf_ct_dccp: reserved packet type ";
+		goto out_invalid;
+	}
+	return false;
+out_invalid:
+	nf_l4proto_log_invalid(skb, state->net, state->pf,
+			       IPPROTO_DCCP, "%s", msg);
+	return true;
+}
+
+int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
+			     unsigned int dataoff,
+			     enum ip_conntrack_info ctinfo,
+			     const struct nf_hook_state *state)
 {
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	struct dccp_hdr _dh, *dh;
@@ -448,8 +480,15 @@
 	unsigned int *timeouts;
 
 	dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
-	BUG_ON(dh == NULL);
+	if (!dh)
+		return NF_DROP;
+
+	if (dccp_error(dh, skb, dataoff, state))
+		return -NF_ACCEPT;
+
 	type = dh->dccph_type;
+	if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh))
+		return -NF_ACCEPT;
 
 	if (type == DCCP_PKT_RESET &&
 	    !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
@@ -521,61 +560,12 @@
 
 	timeouts = nf_ct_timeout_lookup(ct);
 	if (!timeouts)
-		timeouts = dccp_pernet(nf_ct_net(ct))->dccp_timeout;
+		timeouts = nf_dccp_pernet(nf_ct_net(ct))->dccp_timeout;
 	nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
 
 	return NF_ACCEPT;
 }
 
-static int dccp_error(struct net *net, struct nf_conn *tmpl,
-		      struct sk_buff *skb, unsigned int dataoff,
-		      u_int8_t pf, unsigned int hooknum)
-{
-	struct dccp_hdr _dh, *dh;
-	unsigned int dccp_len = skb->len - dataoff;
-	unsigned int cscov;
-	const char *msg;
-
-	dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
-	if (dh == NULL) {
-		msg = "nf_ct_dccp: short packet ";
-		goto out_invalid;
-	}
-
-	if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) ||
-	    dh->dccph_doff * 4 > dccp_len) {
-		msg = "nf_ct_dccp: truncated/malformed packet ";
-		goto out_invalid;
-	}
-
-	cscov = dccp_len;
-	if (dh->dccph_cscov) {
-		cscov = (dh->dccph_cscov - 1) * 4;
-		if (cscov > dccp_len) {
-			msg = "nf_ct_dccp: bad checksum coverage ";
-			goto out_invalid;
-		}
-	}
-
-	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
-	    nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_DCCP,
-				pf)) {
-		msg = "nf_ct_dccp: bad checksum ";
-		goto out_invalid;
-	}
-
-	if (dh->dccph_type >= DCCP_PKT_INVALID) {
-		msg = "nf_ct_dccp: reserved packet type ";
-		goto out_invalid;
-	}
-
-	return NF_ACCEPT;
-
-out_invalid:
-	nf_l4proto_log_invalid(skb, net, pf, IPPROTO_DCCP, "%s", msg);
-	return -NF_ACCEPT;
-}
-
 static bool dccp_can_early_drop(const struct nf_conn *ct)
 {
 	switch (ct->proto.dccp.state) {
@@ -604,7 +594,7 @@
 	struct nlattr *nest_parms;
 
 	spin_lock_bh(&ct->lock);
-	nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP | NLA_F_NESTED);
+	nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP);
 	if (!nest_parms)
 		goto nla_put_failure;
 	if (nla_put_u8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state) ||
@@ -645,8 +635,8 @@
 	if (!attr)
 		return 0;
 
-	err = nla_parse_nested(tb, CTA_PROTOINFO_DCCP_MAX, attr,
-			       dccp_nla_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_DCCP_MAX, attr,
+					  dccp_nla_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -683,7 +673,7 @@
 static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[],
 				      struct net *net, void *data)
 {
-	struct nf_dccp_net *dn = dccp_pernet(net);
+	struct nf_dccp_net *dn = nf_dccp_pernet(net);
 	unsigned int *timeouts = data;
 	int i;
 
@@ -730,126 +720,28 @@
 };
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
-#ifdef CONFIG_SYSCTL
-/* template, data assigned later */
-static struct ctl_table dccp_sysctl_table[] = {
-	{
-		.procname	= "nf_conntrack_dccp_timeout_request",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_dccp_timeout_respond",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_dccp_timeout_partopen",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_dccp_timeout_open",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_dccp_timeout_closereq",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_dccp_timeout_closing",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_dccp_timeout_timewait",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_dccp_loose",
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{ }
-};
-#endif /* CONFIG_SYSCTL */
-
-static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn,
-				     struct nf_dccp_net *dn)
+void nf_conntrack_dccp_init_net(struct net *net)
 {
-#ifdef CONFIG_SYSCTL
-	if (pn->ctl_table)
-		return 0;
+	struct nf_dccp_net *dn = nf_dccp_pernet(net);
 
-	pn->ctl_table = kmemdup(dccp_sysctl_table,
-				sizeof(dccp_sysctl_table),
-				GFP_KERNEL);
-	if (!pn->ctl_table)
-		return -ENOMEM;
+	/* default values */
+	dn->dccp_loose = 1;
+	dn->dccp_timeout[CT_DCCP_REQUEST]	= 2 * DCCP_MSL;
+	dn->dccp_timeout[CT_DCCP_RESPOND]	= 4 * DCCP_MSL;
+	dn->dccp_timeout[CT_DCCP_PARTOPEN]	= 4 * DCCP_MSL;
+	dn->dccp_timeout[CT_DCCP_OPEN]		= 12 * 3600 * HZ;
+	dn->dccp_timeout[CT_DCCP_CLOSEREQ]	= 64 * HZ;
+	dn->dccp_timeout[CT_DCCP_CLOSING]	= 64 * HZ;
+	dn->dccp_timeout[CT_DCCP_TIMEWAIT]	= 2 * DCCP_MSL;
 
-	pn->ctl_table[0].data = &dn->dccp_timeout[CT_DCCP_REQUEST];
-	pn->ctl_table[1].data = &dn->dccp_timeout[CT_DCCP_RESPOND];
-	pn->ctl_table[2].data = &dn->dccp_timeout[CT_DCCP_PARTOPEN];
-	pn->ctl_table[3].data = &dn->dccp_timeout[CT_DCCP_OPEN];
-	pn->ctl_table[4].data = &dn->dccp_timeout[CT_DCCP_CLOSEREQ];
-	pn->ctl_table[5].data = &dn->dccp_timeout[CT_DCCP_CLOSING];
-	pn->ctl_table[6].data = &dn->dccp_timeout[CT_DCCP_TIMEWAIT];
-	pn->ctl_table[7].data = &dn->dccp_loose;
-
-	/* Don't export sysctls to unprivileged users */
-	if (net->user_ns != &init_user_ns)
-		pn->ctl_table[0].procname = NULL;
-#endif
-	return 0;
+	/* timeouts[0] is unused, make it same as SYN_SENT so
+	 * ->timeouts[0] contains 'new' timeout, like udp or icmp.
+	 */
+	dn->dccp_timeout[CT_DCCP_NONE] = dn->dccp_timeout[CT_DCCP_REQUEST];
 }
 
-static int dccp_init_net(struct net *net, u_int16_t proto)
-{
-	struct nf_dccp_net *dn = dccp_pernet(net);
-	struct nf_proto_net *pn = &dn->pn;
-
-	if (!pn->users) {
-		/* default values */
-		dn->dccp_loose = 1;
-		dn->dccp_timeout[CT_DCCP_REQUEST]	= 2 * DCCP_MSL;
-		dn->dccp_timeout[CT_DCCP_RESPOND]	= 4 * DCCP_MSL;
-		dn->dccp_timeout[CT_DCCP_PARTOPEN]	= 4 * DCCP_MSL;
-		dn->dccp_timeout[CT_DCCP_OPEN]		= 12 * 3600 * HZ;
-		dn->dccp_timeout[CT_DCCP_CLOSEREQ]	= 64 * HZ;
-		dn->dccp_timeout[CT_DCCP_CLOSING]	= 64 * HZ;
-		dn->dccp_timeout[CT_DCCP_TIMEWAIT]	= 2 * DCCP_MSL;
-
-		/* timeouts[0] is unused, make it same as SYN_SENT so
-		 * ->timeouts[0] contains 'new' timeout, like udp or icmp.
-		 */
-		dn->dccp_timeout[CT_DCCP_NONE] = dn->dccp_timeout[CT_DCCP_REQUEST];
-	}
-
-	return dccp_kmemdup_sysctl_table(net, pn, dn);
-}
-
-static struct nf_proto_net *dccp_get_net_proto(struct net *net)
-{
-	return &net->ct.nf_ct_proto.dccp.pn;
-}
-
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = {
-	.l3proto		= AF_INET,
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp = {
 	.l4proto		= IPPROTO_DCCP,
-	.new			= dccp_new,
-	.packet			= dccp_packet,
-	.error			= dccp_error,
 	.can_early_drop		= dccp_can_early_drop,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack	= dccp_print_conntrack,
@@ -872,40 +764,4 @@
 		.nla_policy	= dccp_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.init_net		= dccp_init_net,
-	.get_net_proto		= dccp_get_net_proto,
 };
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
-
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = {
-	.l3proto		= AF_INET6,
-	.l4proto		= IPPROTO_DCCP,
-	.new			= dccp_new,
-	.packet			= dccp_packet,
-	.error			= dccp_error,
-	.can_early_drop		= dccp_can_early_drop,
-#ifdef CONFIG_NF_CONNTRACK_PROCFS
-	.print_conntrack	= dccp_print_conntrack,
-#endif
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	.nlattr_size		= DCCP_NLATTR_SIZE,
-	.to_nlattr		= dccp_to_nlattr,
-	.from_nlattr		= nlattr_to_dccp,
-	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
-	.nlattr_tuple_size	= nf_ct_port_nlattr_tuple_size,
-	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
-	.nla_policy		= nf_ct_port_nla_policy,
-#endif
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-	.ctnl_timeout		= {
-		.nlattr_to_obj	= dccp_timeout_nlattr_to_obj,
-		.obj_to_nlattr	= dccp_timeout_obj_to_nlattr,
-		.nlattr_max	= CTA_TIMEOUT_DCCP_MAX,
-		.obj_size	= sizeof(unsigned int) * CT_DCCP_MAX,
-		.nla_policy	= dccp_timeout_nla_policy,
-	},
-#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.init_net		= dccp_init_net,
-	.get_net_proto		= dccp_get_net_proto,
-};
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp6);

diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 1df3244..e831637 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/types.h>
@@ -15,61 +12,6 @@
 
 static const unsigned int nf_ct_generic_timeout = 600*HZ;
 
-static bool nf_generic_should_process(u8 proto)
-{
-	switch (proto) {
-#ifdef CONFIG_NF_CT_PROTO_GRE_MODULE
-	case IPPROTO_GRE:
-		return false;
-#endif
-	default:
-		return true;
-	}
-}
-
-static inline struct nf_generic_net *generic_pernet(struct net *net)
-{
-	return &net->ct.nf_ct_proto.generic;
-}
-
-static bool generic_pkt_to_tuple(const struct sk_buff *skb,
-				 unsigned int dataoff,
-				 struct net *net, struct nf_conntrack_tuple *tuple)
-{
-	tuple->src.u.all = 0;
-	tuple->dst.u.all = 0;
-
-	return true;
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int generic_packet(struct nf_conn *ct,
-			  const struct sk_buff *skb,
-			  unsigned int dataoff,
-			  enum ip_conntrack_info ctinfo)
-{
-	const unsigned int *timeout = nf_ct_timeout_lookup(ct);
-
-	if (!timeout)
-		timeout = &generic_pernet(nf_ct_net(ct))->timeout;
-
-	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
-	return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
-			unsigned int dataoff)
-{
-	bool ret;
-
-	ret = nf_generic_should_process(nf_ct_protonum(ct));
-	if (!ret)
-		pr_warn_once("conntrack: generic helper won't handle protocol %d. Please consider loading the specific helper module.\n",
-			     nf_ct_protonum(ct));
-	return ret;
-}
-
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 
 #include <linux/netfilter/nfnetlink.h>
@@ -78,7 +20,7 @@
 static int generic_timeout_nlattr_to_obj(struct nlattr *tb[],
 					 struct net *net, void *data)
 {
-	struct nf_generic_net *gn = generic_pernet(net);
+	struct nf_generic_net *gn = nf_generic_pernet(net);
 	unsigned int *timeout = data;
 
 	if (!timeout)
@@ -115,55 +57,16 @@
 };
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
-#ifdef CONFIG_SYSCTL
-static struct ctl_table generic_sysctl_table[] = {
-	{
-		.procname	= "nf_conntrack_generic_timeout",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{ }
-};
-#endif /* CONFIG_SYSCTL */
-
-static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn,
-					struct nf_generic_net *gn)
+void nf_conntrack_generic_init_net(struct net *net)
 {
-#ifdef CONFIG_SYSCTL
-	pn->ctl_table = kmemdup(generic_sysctl_table,
-				sizeof(generic_sysctl_table),
-				GFP_KERNEL);
-	if (!pn->ctl_table)
-		return -ENOMEM;
-
-	pn->ctl_table[0].data = &gn->timeout;
-#endif
-	return 0;
-}
-
-static int generic_init_net(struct net *net, u_int16_t proto)
-{
-	struct nf_generic_net *gn = generic_pernet(net);
-	struct nf_proto_net *pn = &gn->pn;
+	struct nf_generic_net *gn = nf_generic_pernet(net);
 
 	gn->timeout = nf_ct_generic_timeout;
-
-	return generic_kmemdup_sysctl_table(pn, gn);
-}
-
-static struct nf_proto_net *generic_get_net_proto(struct net *net)
-{
-	return &net->ct.nf_ct_proto.generic.pn;
 }
 
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
 {
-	.l3proto		= PF_UNSPEC,
 	.l4proto		= 255,
-	.pkt_to_tuple		= generic_pkt_to_tuple,
-	.packet			= generic_packet,
-	.new			= generic_new,
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	.ctnl_timeout		= {
 		.nlattr_to_obj	= generic_timeout_nlattr_to_obj,
@@ -173,6 +76,4 @@
 		.nla_policy	= generic_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.init_net		= generic_init_net,
-	.get_net_proto		= generic_get_net_proto,
 };

diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 650eb4f..5b05487 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c

@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
- * ip_conntrack_proto_gre.c - Version 3.0
- *
  * Connection tracking protocol helper module for GRE.
  *
  * GRE is a generic encapsulation protocol, which is generally not very
@@ -43,41 +42,30 @@
 #include <linux/netfilter/nf_conntrack_proto_gre.h>
 #include <linux/netfilter/nf_conntrack_pptp.h>
 
-enum grep_conntrack {
-	GRE_CT_UNREPLIED,
-	GRE_CT_REPLIED,
-	GRE_CT_MAX
-};
-
 static const unsigned int gre_timeouts[GRE_CT_MAX] = {
 	[GRE_CT_UNREPLIED]	= 30*HZ,
 	[GRE_CT_REPLIED]	= 180*HZ,
 };
 
-static unsigned int proto_gre_net_id __read_mostly;
-struct netns_proto_gre {
-	struct nf_proto_net	nf;
-	rwlock_t		keymap_lock;
-	struct list_head	keymap_list;
-	unsigned int		gre_timeouts[GRE_CT_MAX];
-};
+/* used when expectation is added */
+static DEFINE_SPINLOCK(keymap_lock);
 
-static inline struct netns_proto_gre *gre_pernet(struct net *net)
+static inline struct nf_gre_net *gre_pernet(struct net *net)
 {
-	return net_generic(net, proto_gre_net_id);
+	return &net->ct.nf_ct_proto.gre;
 }
 
-static void nf_ct_gre_keymap_flush(struct net *net)
+void nf_ct_gre_keymap_flush(struct net *net)
 {
-	struct netns_proto_gre *net_gre = gre_pernet(net);
+	struct nf_gre_net *net_gre = gre_pernet(net);
 	struct nf_ct_gre_keymap *km, *tmp;
 
-	write_lock_bh(&net_gre->keymap_lock);
+	spin_lock_bh(&keymap_lock);
 	list_for_each_entry_safe(km, tmp, &net_gre->keymap_list, list) {
-		list_del(&km->list);
-		kfree(km);
+		list_del_rcu(&km->list);
+		kfree_rcu(km, rcu);
 	}
-	write_unlock_bh(&net_gre->keymap_lock);
+	spin_unlock_bh(&keymap_lock);
 }
 
 static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km,
@@ -93,18 +81,16 @@
 /* look up the source key for a given tuple */
 static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t)
 {
-	struct netns_proto_gre *net_gre = gre_pernet(net);
+	struct nf_gre_net *net_gre = gre_pernet(net);
 	struct nf_ct_gre_keymap *km;
 	__be16 key = 0;
 
-	read_lock_bh(&net_gre->keymap_lock);
-	list_for_each_entry(km, &net_gre->keymap_list, list) {
+	list_for_each_entry_rcu(km, &net_gre->keymap_list, list) {
 		if (gre_key_cmpfn(km, t)) {
 			key = km->tuple.src.u.gre.key;
 			break;
 		}
 	}
-	read_unlock_bh(&net_gre->keymap_lock);
 
 	pr_debug("lookup src key 0x%x for ", key);
 	nf_ct_dump_tuple(t);
@@ -117,21 +103,17 @@
 			 struct nf_conntrack_tuple *t)
 {
 	struct net *net = nf_ct_net(ct);
-	struct netns_proto_gre *net_gre = gre_pernet(net);
+	struct nf_gre_net *net_gre = gre_pernet(net);
 	struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct);
 	struct nf_ct_gre_keymap **kmp, *km;
 
 	kmp = &ct_pptp_info->keymap[dir];
 	if (*kmp) {
 		/* check whether it's a retransmission */
-		read_lock_bh(&net_gre->keymap_lock);
-		list_for_each_entry(km, &net_gre->keymap_list, list) {
-			if (gre_key_cmpfn(km, t) && km == *kmp) {
-				read_unlock_bh(&net_gre->keymap_lock);
+		list_for_each_entry_rcu(km, &net_gre->keymap_list, list) {
+			if (gre_key_cmpfn(km, t) && km == *kmp)
 				return 0;
-			}
 		}
-		read_unlock_bh(&net_gre->keymap_lock);
 		pr_debug("trying to override keymap_%s for ct %p\n",
 			 dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct);
 		return -EEXIST;
@@ -146,9 +128,9 @@
 	pr_debug("adding new entry %p: ", km);
 	nf_ct_dump_tuple(&km->tuple);
 
-	write_lock_bh(&net_gre->keymap_lock);
+	spin_lock_bh(&keymap_lock);
 	list_add_tail(&km->list, &net_gre->keymap_list);
-	write_unlock_bh(&net_gre->keymap_lock);
+	spin_unlock_bh(&keymap_lock);
 
 	return 0;
 }
@@ -157,32 +139,30 @@
 /* destroy the keymap entries associated with specified master ct */
 void nf_ct_gre_keymap_destroy(struct nf_conn *ct)
 {
-	struct net *net = nf_ct_net(ct);
-	struct netns_proto_gre *net_gre = gre_pernet(net);
 	struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct);
 	enum ip_conntrack_dir dir;
 
 	pr_debug("entering for ct %p\n", ct);
 
-	write_lock_bh(&net_gre->keymap_lock);
+	spin_lock_bh(&keymap_lock);
 	for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) {
 		if (ct_pptp_info->keymap[dir]) {
 			pr_debug("removing %p from list\n",
 				 ct_pptp_info->keymap[dir]);
-			list_del(&ct_pptp_info->keymap[dir]->list);
-			kfree(ct_pptp_info->keymap[dir]);
+			list_del_rcu(&ct_pptp_info->keymap[dir]->list);
+			kfree_rcu(ct_pptp_info->keymap[dir], rcu);
 			ct_pptp_info->keymap[dir] = NULL;
 		}
 	}
-	write_unlock_bh(&net_gre->keymap_lock);
+	spin_unlock_bh(&keymap_lock);
 }
 EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy);
 
 /* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
 
 /* gre hdr info to tuple */
-static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
-			     struct net *net, struct nf_conntrack_tuple *tuple)
+bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+		      struct net *net, struct nf_conntrack_tuple *tuple)
 {
 	const struct pptp_gre_header *pgrehdr;
 	struct pptp_gre_header _pgrehdr;
@@ -228,15 +208,31 @@
 
 static unsigned int *gre_get_timeouts(struct net *net)
 {
-	return gre_pernet(net)->gre_timeouts;
+	return gre_pernet(net)->timeouts;
 }
 
 /* Returns verdict for packet, and may modify conntrack */
-static int gre_packet(struct nf_conn *ct,
-		      const struct sk_buff *skb,
-		      unsigned int dataoff,
-		      enum ip_conntrack_info ctinfo)
+int nf_conntrack_gre_packet(struct nf_conn *ct,
+			    struct sk_buff *skb,
+			    unsigned int dataoff,
+			    enum ip_conntrack_info ctinfo,
+			    const struct nf_hook_state *state)
 {
+	if (state->pf != NFPROTO_IPV4)
+		return -NF_ACCEPT;
+
+	if (!nf_ct_is_confirmed(ct)) {
+		unsigned int *timeouts = nf_ct_timeout_lookup(ct);
+
+		if (!timeouts)
+			timeouts = gre_get_timeouts(nf_ct_net(ct));
+
+		/* initialize to sane value.  Ideally a conntrack helper
+		 * (e.g. in case of pptp) is increasing them */
+		ct->proto.gre.stream_timeout = timeouts[GRE_CT_REPLIED];
+		ct->proto.gre.timeout = timeouts[GRE_CT_UNREPLIED];
+	}
+
 	/* If we've seen traffic both ways, this is a GRE connection.
 	 * Extend timeout. */
 	if (ct->status & IPS_SEEN_REPLY) {
@@ -252,39 +248,6 @@
 	return NF_ACCEPT;
 }
 
-/* Called when a new connection for this protocol found. */
-static bool gre_new(struct nf_conn *ct, const struct sk_buff *skb,
-		    unsigned int dataoff)
-{
-	unsigned int *timeouts = nf_ct_timeout_lookup(ct);
-
-	if (!timeouts)
-		timeouts = gre_get_timeouts(nf_ct_net(ct));
-
-	pr_debug(": ");
-	nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-
-	/* initialize to sane value.  Ideally a conntrack helper
-	 * (e.g. in case of pptp) is increasing them */
-	ct->proto.gre.stream_timeout = timeouts[GRE_CT_REPLIED];
-	ct->proto.gre.timeout = timeouts[GRE_CT_UNREPLIED];
-
-	return true;
-}
-
-/* Called when a conntrack entry has already been removed from the hashes
- * and is about to be deleted from memory */
-static void gre_destroy(struct nf_conn *ct)
-{
-	struct nf_conn *master = ct->master;
-	pr_debug(" entering\n");
-
-	if (!master)
-		pr_debug("no master !?!\n");
-	else
-		nf_ct_gre_keymap_destroy(master);
-}
-
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 
 #include <linux/netfilter/nfnetlink.h>
@@ -294,13 +257,13 @@
 				     struct net *net, void *data)
 {
 	unsigned int *timeouts = data;
-	struct netns_proto_gre *net_gre = gre_pernet(net);
+	struct nf_gre_net *net_gre = gre_pernet(net);
 
 	if (!timeouts)
 		timeouts = gre_get_timeouts(net);
 	/* set default timeouts for GRE. */
-	timeouts[GRE_CT_UNREPLIED] = net_gre->gre_timeouts[GRE_CT_UNREPLIED];
-	timeouts[GRE_CT_REPLIED] = net_gre->gre_timeouts[GRE_CT_REPLIED];
+	timeouts[GRE_CT_UNREPLIED] = net_gre->timeouts[GRE_CT_UNREPLIED];
+	timeouts[GRE_CT_REPLIED] = net_gre->timeouts[GRE_CT_REPLIED];
 
 	if (tb[CTA_TIMEOUT_GRE_UNREPLIED]) {
 		timeouts[GRE_CT_UNREPLIED] =
@@ -336,31 +299,22 @@
 };
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
-static int gre_init_net(struct net *net, u_int16_t proto)
+void nf_conntrack_gre_init_net(struct net *net)
 {
-	struct netns_proto_gre *net_gre = gre_pernet(net);
+	struct nf_gre_net *net_gre = gre_pernet(net);
 	int i;
 
-	rwlock_init(&net_gre->keymap_lock);
 	INIT_LIST_HEAD(&net_gre->keymap_list);
 	for (i = 0; i < GRE_CT_MAX; i++)
-		net_gre->gre_timeouts[i] = gre_timeouts[i];
-
-	return 0;
+		net_gre->timeouts[i] = gre_timeouts[i];
 }
 
 /* protocol helper struct */
-static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = {
-	.l3proto	 = AF_INET,
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = {
 	.l4proto	 = IPPROTO_GRE,
-	.pkt_to_tuple	 = gre_pkt_to_tuple,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack = gre_print_conntrack,
 #endif
-	.packet		 = gre_packet,
-	.new		 = gre_new,
-	.destroy	 = gre_destroy,
-	.me 		 = THIS_MODULE,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
 	.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
@@ -376,59 +330,4 @@
 		.nla_policy	= gre_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.net_id		= &proto_gre_net_id,
-	.init_net	= gre_init_net,
 };
-
-static int proto_gre_net_init(struct net *net)
-{
-	int ret = 0;
-
-	ret = nf_ct_l4proto_pernet_register_one(net,
-						&nf_conntrack_l4proto_gre4);
-	if (ret < 0)
-		pr_err("nf_conntrack_gre4: pernet registration failed.\n");
-	return ret;
-}
-
-static void proto_gre_net_exit(struct net *net)
-{
-	nf_ct_l4proto_pernet_unregister_one(net, &nf_conntrack_l4proto_gre4);
-	nf_ct_gre_keymap_flush(net);
-}
-
-static struct pernet_operations proto_gre_net_ops = {
-	.init = proto_gre_net_init,
-	.exit = proto_gre_net_exit,
-	.id   = &proto_gre_net_id,
-	.size = sizeof(struct netns_proto_gre),
-};
-
-static int __init nf_ct_proto_gre_init(void)
-{
-	int ret;
-
-	ret = register_pernet_subsys(&proto_gre_net_ops);
-	if (ret < 0)
-		goto out_pernet;
-	ret = nf_ct_l4proto_register_one(&nf_conntrack_l4proto_gre4);
-	if (ret < 0)
-		goto out_gre4;
-
-	return 0;
-out_gre4:
-	unregister_pernet_subsys(&proto_gre_net_ops);
-out_pernet:
-	return ret;
-}
-
-static void __exit nf_ct_proto_gre_fini(void)
-{
-	nf_ct_l4proto_unregister_one(&nf_conntrack_l4proto_gre4);
-	unregister_pernet_subsys(&proto_gre_net_ops);
-}
-
-module_init(nf_ct_proto_gre_init);
-module_exit(nf_ct_proto_gre_fini);
-
-MODULE_LICENSE("GPL");

diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index 43c7e1a..097deba 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
  * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/types.h>
@@ -25,13 +22,8 @@
 
 static const unsigned int nf_ct_icmp_timeout = 30*HZ;
 
-static inline struct nf_icmp_net *icmp_pernet(struct net *net)
-{
-	return &net->ct.nf_ct_proto.icmp;
-}
-
-static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
-			      struct net *net, struct nf_conntrack_tuple *tuple)
+bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+		       struct net *net, struct nf_conntrack_tuple *tuple)
 {
 	const struct icmphdr *hp;
 	struct icmphdr _hdr;
@@ -59,8 +51,8 @@
 	[ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
 };
 
-static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
-			      const struct nf_conntrack_tuple *orig)
+bool nf_conntrack_invert_icmp_tuple(struct nf_conntrack_tuple *tuple,
+				    const struct nf_conntrack_tuple *orig)
 {
 	if (orig->dst.u.icmp.type >= sizeof(invmap) ||
 	    !invmap[orig->dst.u.icmp.type])
@@ -72,34 +64,16 @@
 	return true;
 }
 
-static unsigned int *icmp_get_timeouts(struct net *net)
-{
-	return &icmp_pernet(net)->timeout;
-}
-
 /* Returns verdict for packet, or -1 for invalid. */
-static int icmp_packet(struct nf_conn *ct,
-		       const struct sk_buff *skb,
-		       unsigned int dataoff,
-		       enum ip_conntrack_info ctinfo)
+int nf_conntrack_icmp_packet(struct nf_conn *ct,
+			     struct sk_buff *skb,
+			     enum ip_conntrack_info ctinfo,
+			     const struct nf_hook_state *state)
 {
 	/* Do not immediately delete the connection after the first
 	   successful reply to avoid excessive conntrackd traffic
 	   and also to handle correctly ICMP echo reply duplicates. */
 	unsigned int *timeout = nf_ct_timeout_lookup(ct);
-
-	if (!timeout)
-		timeout = icmp_get_timeouts(nf_ct_net(ct));
-
-	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
-
-	return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
-		     unsigned int dataoff)
-{
 	static const u_int8_t valid_new[] = {
 		[ICMP_ECHO] = 1,
 		[ICMP_TIMESTAMP] = 1,
@@ -107,93 +81,145 @@
 		[ICMP_ADDRESS] = 1
 	};
 
+	if (state->pf != NFPROTO_IPV4)
+		return -NF_ACCEPT;
+
 	if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) ||
 	    !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) {
 		/* Can't create a new ICMP `conn' with this. */
 		pr_debug("icmp: can't create new conn with type %u\n",
 			 ct->tuplehash[0].tuple.dst.u.icmp.type);
 		nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple);
-		return false;
+		return -NF_ACCEPT;
 	}
-	return true;
+
+	if (!timeout)
+		timeout = &nf_icmp_pernet(nf_ct_net(ct))->timeout;
+
+	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
+	return NF_ACCEPT;
 }
 
-/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
-static int
-icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
-		 unsigned int hooknum)
+/* Check inner header is related to any of the existing connections */
+int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb,
+			    unsigned int dataoff,
+			    const struct nf_hook_state *state,
+			    u8 l4proto, union nf_inet_addr *outer_daddr)
 {
 	struct nf_conntrack_tuple innertuple, origtuple;
-	const struct nf_conntrack_l4proto *innerproto;
 	const struct nf_conntrack_tuple_hash *h;
 	const struct nf_conntrack_zone *zone;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conntrack_zone tmp;
+	union nf_inet_addr *ct_daddr;
+	enum ip_conntrack_dir dir;
+	struct nf_conn *ct;
 
 	WARN_ON(skb_nfct(skb));
 	zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
 
 	/* Are they talking about one of our connections? */
-	if (!nf_ct_get_tuplepr(skb,
-			       skb_network_offset(skb) + ip_hdrlen(skb)
-						       + sizeof(struct icmphdr),
-			       PF_INET, net, &origtuple)) {
-		pr_debug("icmp_error_message: failed to get tuple\n");
+	if (!nf_ct_get_tuplepr(skb, dataoff,
+			       state->pf, state->net, &origtuple))
 		return -NF_ACCEPT;
-	}
-
-	/* rcu_read_lock()ed by nf_hook_thresh */
-	innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum);
 
 	/* Ordinarily, we'd expect the inverted tupleproto, but it's
 	   been preserved inside the ICMP. */
-	if (!nf_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
-		pr_debug("icmp_error_message: no match\n");
+	if (!nf_ct_invert_tuple(&innertuple, &origtuple))
+		return -NF_ACCEPT;
+
+	h = nf_conntrack_find_get(state->net, zone, &innertuple);
+	if (!h)
+		return -NF_ACCEPT;
+
+	/* Consider: A -> T (=This machine) -> B
+	 *   Conntrack entry will look like this:
+	 *      Original:  A->B
+	 *      Reply:     B->T (SNAT case) OR A
+	 *
+	 * When this function runs, we got packet that looks like this:
+	 * iphdr|icmphdr|inner_iphdr|l4header (tcp, udp, ..).
+	 *
+	 * Above nf_conntrack_find_get() makes lookup based on inner_hdr,
+	 * so we should expect that destination of the found connection
+	 * matches outer header destination address.
+	 *
+	 * In above example, we can consider these two cases:
+	 *  1. Error coming in reply direction from B or M (middle box) to
+	 *     T (SNAT case) or A.
+	 *     Inner saddr will be B, dst will be T or A.
+	 *     The found conntrack will be reply tuple (B->T/A).
+	 *  2. Error coming in original direction from A or M to B.
+	 *     Inner saddr will be A, inner daddr will be B.
+	 *     The found conntrack will be original tuple (A->B).
+	 *
+	 * In both cases, conntrack[dir].dst == inner.dst.
+	 *
+	 * A bogus packet could look like this:
+	 *   Inner: B->T
+	 *   Outer: B->X (other machine reachable by T).
+	 *
+	 * In this case, lookup yields connection A->B and will
+	 * set packet from B->X as *RELATED*, even though no connection
+	 * from X was ever seen.
+	 */
+	ct = nf_ct_tuplehash_to_ctrack(h);
+	dir = NF_CT_DIRECTION(h);
+	ct_daddr = &ct->tuplehash[dir].tuple.dst.u3;
+	if (!nf_inet_addr_cmp(outer_daddr, ct_daddr)) {
+		if (state->pf == AF_INET) {
+			nf_l4proto_log_invalid(skb, state->net, state->pf,
+					       l4proto,
+					       "outer daddr %pI4 != inner %pI4",
+					       &outer_daddr->ip, &ct_daddr->ip);
+		} else if (state->pf == AF_INET6) {
+			nf_l4proto_log_invalid(skb, state->net, state->pf,
+					       l4proto,
+					       "outer daddr %pI6 != inner %pI6",
+					       &outer_daddr->ip6, &ct_daddr->ip6);
+		}
+		nf_ct_put(ct);
 		return -NF_ACCEPT;
 	}
 
 	ctinfo = IP_CT_RELATED;
-
-	h = nf_conntrack_find_get(net, zone, &innertuple);
-	if (!h) {
-		pr_debug("icmp_error_message: no match\n");
-		return -NF_ACCEPT;
-	}
-
-	if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+	if (dir == IP_CT_DIR_REPLY)
 		ctinfo += IP_CT_IS_REPLY;
 
 	/* Update skb to refer to this connection */
-	nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
+	nf_ct_set(skb, ct, ctinfo);
 	return NF_ACCEPT;
 }
 
-static void icmp_error_log(const struct sk_buff *skb, struct net *net,
-			   u8 pf, const char *msg)
+static void icmp_error_log(const struct sk_buff *skb,
+			   const struct nf_hook_state *state,
+			   const char *msg)
 {
-	nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMP, "%s", msg);
+	nf_l4proto_log_invalid(skb, state->net, state->pf,
+			       IPPROTO_ICMP, "%s", msg);
 }
 
 /* Small and modified version of icmp_rcv */
-static int
-icmp_error(struct net *net, struct nf_conn *tmpl,
-	   struct sk_buff *skb, unsigned int dataoff,
-	   u8 pf, unsigned int hooknum)
+int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
+			      struct sk_buff *skb, unsigned int dataoff,
+			      const struct nf_hook_state *state)
 {
+	union nf_inet_addr outer_daddr;
 	const struct icmphdr *icmph;
 	struct icmphdr _ih;
 
 	/* Not enough header? */
-	icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
+	icmph = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
 	if (icmph == NULL) {
-		icmp_error_log(skb, net, pf, "short packet");
+		icmp_error_log(skb, state, "short packet");
 		return -NF_ACCEPT;
 	}
 
-	/* See ip_conntrack_proto_tcp.c */
-	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
-	    nf_ip_checksum(skb, hooknum, dataoff, 0)) {
-		icmp_error_log(skb, net, pf, "bad hw icmp checksum");
+	/* See nf_conntrack_proto_tcp.c */
+	if (state->net->ct.sysctl_checksum &&
+	    state->hook == NF_INET_PRE_ROUTING &&
+	    nf_ip_checksum(skb, state->hook, dataoff, IPPROTO_ICMP)) {
+		icmp_error_log(skb, state, "bad hw icmp checksum");
 		return -NF_ACCEPT;
 	}
 
@@ -204,7 +230,7 @@
 	 *		  discarded.
 	 */
 	if (icmph->type > NR_ICMP_TYPES) {
-		icmp_error_log(skb, net, pf, "invalid icmp type");
+		icmp_error_log(skb, state, "invalid icmp type");
 		return -NF_ACCEPT;
 	}
 
@@ -216,7 +242,12 @@
 	    icmph->type != ICMP_REDIRECT)
 		return NF_ACCEPT;
 
-	return icmp_error_message(net, tmpl, skb, hooknum);
+	memset(&outer_daddr, 0, sizeof(outer_daddr));
+	outer_daddr.ip = ip_hdr(skb)->daddr;
+
+	dataoff += sizeof(*icmph);
+	return nf_conntrack_inet_error(tmpl, skb, dataoff, state,
+				       IPPROTO_ICMP, &outer_daddr);
 }
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -282,7 +313,7 @@
 				      struct net *net, void *data)
 {
 	unsigned int *timeout = data;
-	struct nf_icmp_net *in = icmp_pernet(net);
+	struct nf_icmp_net *in = nf_icmp_pernet(net);
 
 	if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) {
 		if (!timeout)
@@ -315,59 +346,16 @@
 };
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
-#ifdef CONFIG_SYSCTL
-static struct ctl_table icmp_sysctl_table[] = {
-	{
-		.procname	= "nf_conntrack_icmp_timeout",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{ }
-};
-#endif /* CONFIG_SYSCTL */
-
-static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
-				     struct nf_icmp_net *in)
+void nf_conntrack_icmp_init_net(struct net *net)
 {
-#ifdef CONFIG_SYSCTL
-	pn->ctl_table = kmemdup(icmp_sysctl_table,
-				sizeof(icmp_sysctl_table),
-				GFP_KERNEL);
-	if (!pn->ctl_table)
-		return -ENOMEM;
-
-	pn->ctl_table[0].data = &in->timeout;
-#endif
-	return 0;
-}
-
-static int icmp_init_net(struct net *net, u_int16_t proto)
-{
-	struct nf_icmp_net *in = icmp_pernet(net);
-	struct nf_proto_net *pn = &in->pn;
+	struct nf_icmp_net *in = nf_icmp_pernet(net);
 
 	in->timeout = nf_ct_icmp_timeout;
-
-	return icmp_kmemdup_sysctl_table(pn, in);
-}
-
-static struct nf_proto_net *icmp_get_net_proto(struct net *net)
-{
-	return &net->ct.nf_ct_proto.icmp.pn;
 }
 
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
 {
-	.l3proto		= PF_INET,
 	.l4proto		= IPPROTO_ICMP,
-	.pkt_to_tuple		= icmp_pkt_to_tuple,
-	.invert_tuple		= icmp_invert_tuple,
-	.packet			= icmp_packet,
-	.new			= icmp_new,
-	.error			= icmp_error,
-	.destroy		= NULL,
-	.me			= NULL,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr	= icmp_tuple_to_nlattr,
 	.nlattr_tuple_size	= icmp_nlattr_tuple_size,
@@ -383,6 +371,4 @@
 		.nla_policy	= icmp_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.init_net		= icmp_init_net,
-	.get_net_proto		= icmp_get_net_proto,
 };

diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index 97e40f7..6f9144e 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C)2003,2004 USAGI/WIDE Project
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Author:
  *	Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
  */
@@ -25,20 +22,14 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_timeout.h>
 #include <net/netfilter/nf_conntrack_zones.h>
-#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
 #include <net/netfilter/nf_log.h>
 
 static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
 
-static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
-{
-	return &net->ct.nf_ct_proto.icmpv6;
-}
-
-static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
-				unsigned int dataoff,
-				struct net *net,
-				struct nf_conntrack_tuple *tuple)
+bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
+			 unsigned int dataoff,
+			 struct net *net,
+			 struct nf_conntrack_tuple *tuple)
 {
 	const struct icmp6hdr *hp;
 	struct icmp6hdr _hdr;
@@ -72,8 +63,8 @@
 	[ICMPV6_MLD2_REPORT - 130] = 1
 };
 
-static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
-				const struct nf_conntrack_tuple *orig)
+bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple,
+				      const struct nf_conntrack_tuple *orig)
 {
 	int type = orig->dst.u.icmp.type - 128;
 	if (type < 0 || type >= sizeof(invmap) || !invmap[type])
@@ -87,16 +78,35 @@
 
 static unsigned int *icmpv6_get_timeouts(struct net *net)
 {
-	return &icmpv6_pernet(net)->timeout;
+	return &nf_icmpv6_pernet(net)->timeout;
 }
 
 /* Returns verdict for packet, or -1 for invalid. */
-static int icmpv6_packet(struct nf_conn *ct,
-		       const struct sk_buff *skb,
-		       unsigned int dataoff,
-		       enum ip_conntrack_info ctinfo)
+int nf_conntrack_icmpv6_packet(struct nf_conn *ct,
+			       struct sk_buff *skb,
+			       enum ip_conntrack_info ctinfo,
+			       const struct nf_hook_state *state)
 {
 	unsigned int *timeout = nf_ct_timeout_lookup(ct);
+	static const u8 valid_new[] = {
+		[ICMPV6_ECHO_REQUEST - 128] = 1,
+		[ICMPV6_NI_QUERY - 128] = 1
+	};
+
+	if (state->pf != NFPROTO_IPV6)
+		return -NF_ACCEPT;
+
+	if (!nf_ct_is_confirmed(ct)) {
+		int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128;
+
+		if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
+			/* Can't create a new ICMPv6 `conn' with this. */
+			pr_debug("icmpv6: can't create new conn with type %u\n",
+				 type + 128);
+			nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
+			return -NF_ACCEPT;
+		}
+	}
 
 	if (!timeout)
 		timeout = icmpv6_get_timeouts(nf_ct_net(ct));
@@ -109,100 +119,35 @@
 	return NF_ACCEPT;
 }
 
-/* Called when a new connection for this protocol found. */
-static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
-		       unsigned int dataoff)
-{
-	static const u_int8_t valid_new[] = {
-		[ICMPV6_ECHO_REQUEST - 128] = 1,
-		[ICMPV6_NI_QUERY - 128] = 1
-	};
-	int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128;
 
-	if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
-		/* Can't create a new ICMPv6 `conn' with this. */
-		pr_debug("icmpv6: can't create new conn with type %u\n",
-			 type + 128);
-		nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
-		return false;
-	}
-	return true;
+static void icmpv6_error_log(const struct sk_buff *skb,
+			     const struct nf_hook_state *state,
+			     const char *msg)
+{
+	nf_l4proto_log_invalid(skb, state->net, state->pf,
+			       IPPROTO_ICMPV6, "%s", msg);
 }
 
-static int
-icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
-		     struct sk_buff *skb,
-		     unsigned int icmp6off)
+int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
+			      struct sk_buff *skb,
+			      unsigned int dataoff,
+			      const struct nf_hook_state *state)
 {
-	struct nf_conntrack_tuple intuple, origtuple;
-	const struct nf_conntrack_tuple_hash *h;
-	const struct nf_conntrack_l4proto *inproto;
-	enum ip_conntrack_info ctinfo;
-	struct nf_conntrack_zone tmp;
-
-	WARN_ON(skb_nfct(skb));
-
-	/* Are they talking about one of our connections? */
-	if (!nf_ct_get_tuplepr(skb,
-			       skb_network_offset(skb)
-				+ sizeof(struct ipv6hdr)
-				+ sizeof(struct icmp6hdr),
-			       PF_INET6, net, &origtuple)) {
-		pr_debug("icmpv6_error: Can't get tuple\n");
-		return -NF_ACCEPT;
-	}
-
-	/* rcu_read_lock()ed by nf_hook_thresh */
-	inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum);
-
-	/* Ordinarily, we'd expect the inverted tupleproto, but it's
-	   been preserved inside the ICMP. */
-	if (!nf_ct_invert_tuple(&intuple, &origtuple, inproto)) {
-		pr_debug("icmpv6_error: Can't invert tuple\n");
-		return -NF_ACCEPT;
-	}
-
-	ctinfo = IP_CT_RELATED;
-
-	h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp),
-				  &intuple);
-	if (!h) {
-		pr_debug("icmpv6_error: no match\n");
-		return -NF_ACCEPT;
-	} else {
-		if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
-			ctinfo += IP_CT_IS_REPLY;
-	}
-
-	/* Update skb to refer to this connection */
-	nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
-	return NF_ACCEPT;
-}
-
-static void icmpv6_error_log(const struct sk_buff *skb, struct net *net,
-			     u8 pf, const char *msg)
-{
-	nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMPV6, "%s", msg);
-}
-
-static int
-icmpv6_error(struct net *net, struct nf_conn *tmpl,
-	     struct sk_buff *skb, unsigned int dataoff,
-	     u8 pf, unsigned int hooknum)
-{
+	union nf_inet_addr outer_daddr;
 	const struct icmp6hdr *icmp6h;
 	struct icmp6hdr _ih;
 	int type;
 
 	icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
 	if (icmp6h == NULL) {
-		icmpv6_error_log(skb, net, pf, "short packet");
+		icmpv6_error_log(skb, state, "short packet");
 		return -NF_ACCEPT;
 	}
 
-	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
-	    nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
-		icmpv6_error_log(skb, net, pf, "ICMPv6 checksum failed");
+	if (state->hook == NF_INET_PRE_ROUTING &&
+	    state->net->ct.sysctl_checksum &&
+	    nf_ip6_checksum(skb, state->hook, dataoff, IPPROTO_ICMPV6)) {
+		icmpv6_error_log(skb, state, "ICMPv6 checksum failed");
 		return -NF_ACCEPT;
 	}
 
@@ -217,7 +162,11 @@
 	if (icmp6h->icmp6_type >= 128)
 		return NF_ACCEPT;
 
-	return icmpv6_error_message(net, tmpl, skb, dataoff);
+	memcpy(&outer_daddr.ip6, &ipv6_hdr(skb)->daddr,
+	       sizeof(outer_daddr.ip6));
+	dataoff += sizeof(*icmp6h);
+	return nf_conntrack_inet_error(tmpl, skb, dataoff, state,
+				       IPPROTO_ICMPV6, &outer_daddr);
 }
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -283,7 +232,7 @@
 					struct net *net, void *data)
 {
 	unsigned int *timeout = data;
-	struct nf_icmp_net *in = icmpv6_pernet(net);
+	struct nf_icmp_net *in = nf_icmpv6_pernet(net);
 
 	if (!timeout)
 		timeout = icmpv6_get_timeouts(net);
@@ -316,57 +265,16 @@
 };
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
-#ifdef CONFIG_SYSCTL
-static struct ctl_table icmpv6_sysctl_table[] = {
-	{
-		.procname	= "nf_conntrack_icmpv6_timeout",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{ }
-};
-#endif /* CONFIG_SYSCTL */
-
-static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn,
-				       struct nf_icmp_net *in)
+void nf_conntrack_icmpv6_init_net(struct net *net)
 {
-#ifdef CONFIG_SYSCTL
-	pn->ctl_table = kmemdup(icmpv6_sysctl_table,
-				sizeof(icmpv6_sysctl_table),
-				GFP_KERNEL);
-	if (!pn->ctl_table)
-		return -ENOMEM;
-
-	pn->ctl_table[0].data = &in->timeout;
-#endif
-	return 0;
-}
-
-static int icmpv6_init_net(struct net *net, u_int16_t proto)
-{
-	struct nf_icmp_net *in = icmpv6_pernet(net);
-	struct nf_proto_net *pn = &in->pn;
+	struct nf_icmp_net *in = nf_icmpv6_pernet(net);
 
 	in->timeout = nf_ct_icmpv6_timeout;
-
-	return icmpv6_kmemdup_sysctl_table(pn, in);
-}
-
-static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
-{
-	return &net->ct.nf_ct_proto.icmpv6.pn;
 }
 
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
 {
-	.l3proto		= PF_INET6,
 	.l4proto		= IPPROTO_ICMPV6,
-	.pkt_to_tuple		= icmpv6_pkt_to_tuple,
-	.invert_tuple		= icmpv6_invert_tuple,
-	.packet			= icmpv6_packet,
-	.new			= icmpv6_new,
-	.error			= icmpv6_error,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr	= icmpv6_tuple_to_nlattr,
 	.nlattr_tuple_size	= icmpv6_nlattr_tuple_size,
@@ -382,6 +290,4 @@
 		.nla_policy	= icmpv6_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.init_net		= icmpv6_init_net,
-	.get_net_proto		= icmpv6_get_net_proto,
 };

diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index e4d738d..fce3d93 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Connection tracking protocol helper module for SCTP.
  *
@@ -6,10 +7,6 @@
  *
  * SCTP is defined in RFC 2960. References to various sections in this code
  * are to this RFC.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/types.h>
@@ -146,11 +143,6 @@
 	}
 };
 
-static inline struct nf_sctp_net *sctp_pernet(struct net *net)
-{
-	return &net->ct.nf_ct_proto.sctp;
-}
-
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 /* Print out the private part of the conntrack. */
 static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
@@ -273,11 +265,100 @@
 	return sctp_conntracks[dir][i][cur_state];
 }
 
-/* Returns verdict for packet, or -NF_ACCEPT for invalid. */
-static int sctp_packet(struct nf_conn *ct,
-		       const struct sk_buff *skb,
+/* Don't need lock here: this conntrack not in circulation yet */
+static noinline bool
+sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
+	 const struct sctphdr *sh, unsigned int dataoff)
+{
+	enum sctp_conntrack new_state;
+	const struct sctp_chunkhdr *sch;
+	struct sctp_chunkhdr _sch;
+	u32 offset, count;
+
+	memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp));
+	new_state = SCTP_CONNTRACK_MAX;
+	for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) {
+		new_state = sctp_new_state(IP_CT_DIR_ORIGINAL,
+					   SCTP_CONNTRACK_NONE, sch->type);
+
+		/* Invalid: delete conntrack */
+		if (new_state == SCTP_CONNTRACK_NONE ||
+		    new_state == SCTP_CONNTRACK_MAX) {
+			pr_debug("nf_conntrack_sctp: invalid new deleting.\n");
+			return false;
+		}
+
+		/* Copy the vtag into the state info */
+		if (sch->type == SCTP_CID_INIT) {
+			struct sctp_inithdr _inithdr, *ih;
+			/* Sec 8.5.1 (A) */
+			if (sh->vtag)
+				return false;
+
+			ih = skb_header_pointer(skb, offset + sizeof(_sch),
+						sizeof(_inithdr), &_inithdr);
+			if (!ih)
+				return false;
+
+			pr_debug("Setting vtag %x for new conn\n",
+				 ih->init_tag);
+
+			ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag;
+		} else if (sch->type == SCTP_CID_HEARTBEAT) {
+			pr_debug("Setting vtag %x for secondary conntrack\n",
+				 sh->vtag);
+			ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
+		} else {
+		/* If it is a shutdown ack OOTB packet, we expect a return
+		   shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
+			pr_debug("Setting vtag %x for new conn OOTB\n",
+				 sh->vtag);
+			ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
+		}
+
+		ct->proto.sctp.state = new_state;
+	}
+
+	return true;
+}
+
+static bool sctp_error(struct sk_buff *skb,
 		       unsigned int dataoff,
-		       enum ip_conntrack_info ctinfo)
+		       const struct nf_hook_state *state)
+{
+	const struct sctphdr *sh;
+	const char *logmsg;
+
+	if (skb->len < dataoff + sizeof(struct sctphdr)) {
+		logmsg = "nf_ct_sctp: short packet ";
+		goto out_invalid;
+	}
+	if (state->hook == NF_INET_PRE_ROUTING &&
+	    state->net->ct.sysctl_checksum &&
+	    skb->ip_summed == CHECKSUM_NONE) {
+		if (skb_ensure_writable(skb, dataoff + sizeof(*sh))) {
+			logmsg = "nf_ct_sctp: failed to read header ";
+			goto out_invalid;
+		}
+		sh = (const struct sctphdr *)(skb->data + dataoff);
+		if (sh->checksum != sctp_compute_cksum(skb, dataoff)) {
+			logmsg = "nf_ct_sctp: bad CRC ";
+			goto out_invalid;
+		}
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+	return false;
+out_invalid:
+	nf_l4proto_log_invalid(skb, state->net, state->pf, IPPROTO_SCTP, "%s", logmsg);
+	return true;
+}
+
+/* Returns verdict for packet, or -NF_ACCEPT for invalid. */
+int nf_conntrack_sctp_packet(struct nf_conn *ct,
+			     struct sk_buff *skb,
+			     unsigned int dataoff,
+			     enum ip_conntrack_info ctinfo,
+			     const struct nf_hook_state *state)
 {
 	enum sctp_conntrack new_state, old_state;
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
@@ -289,6 +370,9 @@
 	unsigned int *timeouts;
 	unsigned long map[256 / sizeof(unsigned long)] = { 0 };
 
+	if (sctp_error(skb, dataoff, state))
+		return -NF_ACCEPT;
+
 	sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
 	if (sh == NULL)
 		goto out;
@@ -296,6 +380,17 @@
 	if (do_basic_checks(ct, skb, dataoff, map) != 0)
 		goto out;
 
+	if (!nf_ct_is_confirmed(ct)) {
+		/* If an OOTB packet has any of these chunks discard (Sec 8.4) */
+		if (test_bit(SCTP_CID_ABORT, map) ||
+		    test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) ||
+		    test_bit(SCTP_CID_COOKIE_ACK, map))
+			return -NF_ACCEPT;
+
+		if (!sctp_new(ct, skb, sh, dataoff))
+			return -NF_ACCEPT;
+	}
+
 	/* Check the verification tag (Sec 8.5) */
 	if (!test_bit(SCTP_CID_INIT, map) &&
 	    !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) &&
@@ -377,7 +472,7 @@
 
 	timeouts = nf_ct_timeout_lookup(ct);
 	if (!timeouts)
-		timeouts = sctp_pernet(nf_ct_net(ct))->timeouts;
+		timeouts = nf_sctp_pernet(nf_ct_net(ct))->timeouts;
 
 	nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
 
@@ -397,110 +492,6 @@
 	return -NF_ACCEPT;
 }
 
-/* Called when a new connection for this protocol found. */
-static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
-		     unsigned int dataoff)
-{
-	enum sctp_conntrack new_state;
-	const struct sctphdr *sh;
-	struct sctphdr _sctph;
-	const struct sctp_chunkhdr *sch;
-	struct sctp_chunkhdr _sch;
-	u_int32_t offset, count;
-	unsigned long map[256 / sizeof(unsigned long)] = { 0 };
-
-	sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
-	if (sh == NULL)
-		return false;
-
-	if (do_basic_checks(ct, skb, dataoff, map) != 0)
-		return false;
-
-	/* If an OOTB packet has any of these chunks discard (Sec 8.4) */
-	if (test_bit(SCTP_CID_ABORT, map) ||
-	    test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) ||
-	    test_bit(SCTP_CID_COOKIE_ACK, map))
-		return false;
-
-	memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp));
-	new_state = SCTP_CONNTRACK_MAX;
-	for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
-		/* Don't need lock here: this conntrack not in circulation yet */
-		new_state = sctp_new_state(IP_CT_DIR_ORIGINAL,
-					   SCTP_CONNTRACK_NONE, sch->type);
-
-		/* Invalid: delete conntrack */
-		if (new_state == SCTP_CONNTRACK_NONE ||
-		    new_state == SCTP_CONNTRACK_MAX) {
-			pr_debug("nf_conntrack_sctp: invalid new deleting.\n");
-			return false;
-		}
-
-		/* Copy the vtag into the state info */
-		if (sch->type == SCTP_CID_INIT) {
-			struct sctp_inithdr _inithdr, *ih;
-			/* Sec 8.5.1 (A) */
-			if (sh->vtag)
-				return false;
-
-			ih = skb_header_pointer(skb, offset + sizeof(_sch),
-						sizeof(_inithdr), &_inithdr);
-			if (!ih)
-				return false;
-
-			pr_debug("Setting vtag %x for new conn\n",
-				 ih->init_tag);
-
-			ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag;
-		} else if (sch->type == SCTP_CID_HEARTBEAT) {
-			pr_debug("Setting vtag %x for secondary conntrack\n",
-				 sh->vtag);
-			ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
-		}
-		/* If it is a shutdown ack OOTB packet, we expect a return
-		   shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
-		else {
-			pr_debug("Setting vtag %x for new conn OOTB\n",
-				 sh->vtag);
-			ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
-		}
-
-		ct->proto.sctp.state = new_state;
-	}
-
-	return true;
-}
-
-static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb,
-		      unsigned int dataoff,
-		      u8 pf, unsigned int hooknum)
-{
-	const struct sctphdr *sh;
-	const char *logmsg;
-
-	if (skb->len < dataoff + sizeof(struct sctphdr)) {
-		logmsg = "nf_ct_sctp: short packet ";
-		goto out_invalid;
-	}
-	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
-	    skb->ip_summed == CHECKSUM_NONE) {
-		if (!skb_make_writable(skb, dataoff + sizeof(struct sctphdr))) {
-			logmsg = "nf_ct_sctp: failed to read header ";
-			goto out_invalid;
-		}
-		sh = (const struct sctphdr *)(skb->data + dataoff);
-		if (sh->checksum != sctp_compute_cksum(skb, dataoff)) {
-			logmsg = "nf_ct_sctp: bad CRC ";
-			goto out_invalid;
-		}
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	}
-	return NF_ACCEPT;
-out_invalid:
-	nf_l4proto_log_invalid(skb, net, pf, IPPROTO_SCTP, "%s", logmsg);
-	return -NF_ACCEPT;
-}
-
 static bool sctp_can_early_drop(const struct nf_conn *ct)
 {
 	switch (ct->proto.sctp.state) {
@@ -526,7 +517,7 @@
 	struct nlattr *nest_parms;
 
 	spin_lock_bh(&ct->lock);
-	nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP | NLA_F_NESTED);
+	nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP);
 	if (!nest_parms)
 		goto nla_put_failure;
 
@@ -569,8 +560,8 @@
 	if (!attr)
 		return 0;
 
-	err = nla_parse_nested(tb, CTA_PROTOINFO_SCTP_MAX, attr,
-			       sctp_nla_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_SCTP_MAX, attr,
+					  sctp_nla_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -600,7 +591,7 @@
 				      struct net *net, void *data)
 {
 	unsigned int *timeouts = data;
-	struct nf_sctp_net *sn = sctp_pernet(net);
+	struct nf_sctp_net *sn = nf_sctp_pernet(net);
 	int i;
 
 	/* set default SCTP timeouts. */
@@ -648,129 +639,26 @@
 };
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
-
-#ifdef CONFIG_SYSCTL
-static struct ctl_table sctp_sysctl_table[] = {
-	{
-		.procname	= "nf_conntrack_sctp_timeout_closed",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_sctp_timeout_cookie_wait",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_sctp_timeout_cookie_echoed",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_sctp_timeout_established",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_sctp_timeout_shutdown_sent",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_sctp_timeout_shutdown_recd",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_sctp_timeout_shutdown_ack_sent",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_sctp_timeout_heartbeat_sent",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_sctp_timeout_heartbeat_acked",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{ }
-};
-#endif
-
-static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
-				     struct nf_sctp_net *sn)
+void nf_conntrack_sctp_init_net(struct net *net)
 {
-#ifdef CONFIG_SYSCTL
-	if (pn->ctl_table)
-		return 0;
+	struct nf_sctp_net *sn = nf_sctp_pernet(net);
+	int i;
 
-	pn->ctl_table = kmemdup(sctp_sysctl_table,
-				sizeof(sctp_sysctl_table),
-				GFP_KERNEL);
-	if (!pn->ctl_table)
-		return -ENOMEM;
+	for (i = 0; i < SCTP_CONNTRACK_MAX; i++)
+		sn->timeouts[i] = sctp_timeouts[i];
 
-	pn->ctl_table[0].data = &sn->timeouts[SCTP_CONNTRACK_CLOSED];
-	pn->ctl_table[1].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_WAIT];
-	pn->ctl_table[2].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_ECHOED];
-	pn->ctl_table[3].data = &sn->timeouts[SCTP_CONNTRACK_ESTABLISHED];
-	pn->ctl_table[4].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT];
-	pn->ctl_table[5].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD];
-	pn->ctl_table[6].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT];
-	pn->ctl_table[7].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_SENT];
-	pn->ctl_table[8].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_ACKED];
-#endif
-	return 0;
+	/* timeouts[0] is unused, init it so ->timeouts[0] contains
+	 * 'new' timeout, like udp or icmp.
+	 */
+	sn->timeouts[0] = sctp_timeouts[SCTP_CONNTRACK_CLOSED];
 }
 
-static int sctp_init_net(struct net *net, u_int16_t proto)
-{
-	struct nf_sctp_net *sn = sctp_pernet(net);
-	struct nf_proto_net *pn = &sn->pn;
-
-	if (!pn->users) {
-		int i;
-
-		for (i = 0; i < SCTP_CONNTRACK_MAX; i++)
-			sn->timeouts[i] = sctp_timeouts[i];
-
-		/* timeouts[0] is unused, init it so ->timeouts[0] contains
-		 * 'new' timeout, like udp or icmp.
-		 */
-		sn->timeouts[0] = sctp_timeouts[SCTP_CONNTRACK_CLOSED];
-	}
-
-	return sctp_kmemdup_sysctl_table(pn, sn);
-}
-
-static struct nf_proto_net *sctp_get_net_proto(struct net *net)
-{
-	return &net->ct.nf_ct_proto.sctp.pn;
-}
-
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = {
-	.l3proto		= PF_INET,
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp = {
 	.l4proto 		= IPPROTO_SCTP,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack	= sctp_print_conntrack,
 #endif
-	.packet 		= sctp_packet,
-	.new 			= sctp_new,
-	.error			= sctp_error,
 	.can_early_drop		= sctp_can_early_drop,
-	.me 			= THIS_MODULE,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.nlattr_size		= SCTP_NLATTR_SIZE,
 	.to_nlattr		= sctp_to_nlattr,
@@ -789,41 +677,4 @@
 		.nla_policy	= sctp_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.init_net		= sctp_init_net,
-	.get_net_proto		= sctp_get_net_proto,
 };
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
-
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = {
-	.l3proto		= PF_INET6,
-	.l4proto 		= IPPROTO_SCTP,
-#ifdef CONFIG_NF_CONNTRACK_PROCFS
-	.print_conntrack	= sctp_print_conntrack,
-#endif
-	.packet 		= sctp_packet,
-	.new 			= sctp_new,
-	.error			= sctp_error,
-	.can_early_drop		= sctp_can_early_drop,
-	.me 			= THIS_MODULE,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	.nlattr_size		= SCTP_NLATTR_SIZE,
-	.to_nlattr		= sctp_to_nlattr,
-	.from_nlattr		= nlattr_to_sctp,
-	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
-	.nlattr_tuple_size	= nf_ct_port_nlattr_tuple_size,
-	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
-	.nla_policy		= nf_ct_port_nla_policy,
-#endif
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-	.ctnl_timeout		= {
-		.nlattr_to_obj	= sctp_timeout_nlattr_to_obj,
-		.obj_to_nlattr	= sctp_timeout_obj_to_nlattr,
-		.nlattr_max	= CTA_TIMEOUT_SCTP_MAX,
-		.obj_size	= sizeof(unsigned int) * SCTP_CONNTRACK_MAX,
-		.nla_policy	= sctp_timeout_nla_policy,
-	},
-#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.init_net		= sctp_init_net,
-	.get_net_proto		= sctp_get_net_proto,
-};
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp6);

diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 247b897..1926fd5 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * (C) 2002-2013 Jozsef Kadlecsik <kadlec@netfilter.org>
  * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/types.h>
@@ -272,11 +269,6 @@
 	}
 };
 
-static inline struct nf_tcp_net *tcp_pernet(struct net *net)
-{
-	return &net->ct.nf_ct_proto.tcp;
-}
-
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 /* Print out the private part of the conntrack. */
 static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
@@ -475,11 +467,12 @@
 			  const struct tcphdr *tcph)
 {
 	struct net *net = nf_ct_net(ct);
-	struct nf_tcp_net *tn = tcp_pernet(net);
+	struct nf_tcp_net *tn = nf_tcp_pernet(net);
 	struct ip_ct_tcp_state *sender = &state->seen[dir];
 	struct ip_ct_tcp_state *receiver = &state->seen[!dir];
 	const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
 	__u32 seq, ack, sack, end, win, swin;
+	u16 win_raw;
 	s32 receiver_offset;
 	bool res, in_recv_win;
 
@@ -488,7 +481,8 @@
 	 */
 	seq = ntohl(tcph->seq);
 	ack = sack = ntohl(tcph->ack_seq);
-	win = ntohs(tcph->window);
+	win_raw = ntohs(tcph->window);
+	win = win_raw;
 	end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
 
 	if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
@@ -663,14 +657,14 @@
 			    && state->last_seq == seq
 			    && state->last_ack == ack
 			    && state->last_end == end
-			    && state->last_win == win)
+			    && state->last_win == win_raw)
 				state->retrans++;
 			else {
 				state->last_dir = dir;
 				state->last_seq = seq;
 				state->last_ack = ack;
 				state->last_end = end;
-				state->last_win = win;
+				state->last_win = win_raw;
 				state->retrans = 0;
 			}
 		}
@@ -717,35 +711,26 @@
 	[TCPHDR_ACK|TCPHDR_URG]			= 1,
 };
 
-static void tcp_error_log(const struct sk_buff *skb, struct net *net,
-			  u8 pf, const char *msg)
+static void tcp_error_log(const struct sk_buff *skb,
+			  const struct nf_hook_state *state,
+			  const char *msg)
 {
-	nf_l4proto_log_invalid(skb, net, pf, IPPROTO_TCP, "%s", msg);
+	nf_l4proto_log_invalid(skb, state->net, state->pf, IPPROTO_TCP, "%s", msg);
 }
 
 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
-static int tcp_error(struct net *net, struct nf_conn *tmpl,
-		     struct sk_buff *skb,
-		     unsigned int dataoff,
-		     u_int8_t pf,
-		     unsigned int hooknum)
+static bool tcp_error(const struct tcphdr *th,
+		      struct sk_buff *skb,
+		      unsigned int dataoff,
+		      const struct nf_hook_state *state)
 {
-	const struct tcphdr *th;
-	struct tcphdr _tcph;
 	unsigned int tcplen = skb->len - dataoff;
-	u_int8_t tcpflags;
-
-	/* Smaller that minimal TCP header? */
-	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
-	if (th == NULL) {
-		tcp_error_log(skb, net, pf, "short packet");
-		return -NF_ACCEPT;
-	}
+	u8 tcpflags;
 
 	/* Not whole TCP header or malformed packet */
 	if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
-		tcp_error_log(skb, net, pf, "truncated packet");
-		return -NF_ACCEPT;
+		tcp_error_log(skb, state, "truncated packet");
+		return true;
 	}
 
 	/* Checksum invalid? Ignore.
@@ -753,30 +738,110 @@
 	 * because the checksum is assumed to be correct.
 	 */
 	/* FIXME: Source route IP option packets --RR */
-	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
-	    nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
-		tcp_error_log(skb, net, pf, "bad checksum");
-		return -NF_ACCEPT;
+	if (state->net->ct.sysctl_checksum &&
+	    state->hook == NF_INET_PRE_ROUTING &&
+	    nf_checksum(skb, state->hook, dataoff, IPPROTO_TCP, state->pf)) {
+		tcp_error_log(skb, state, "bad checksum");
+		return true;
 	}
 
 	/* Check TCP flags. */
 	tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
 	if (!tcp_valid_flags[tcpflags]) {
-		tcp_error_log(skb, net, pf, "invalid tcp flag combination");
-		return -NF_ACCEPT;
+		tcp_error_log(skb, state, "invalid tcp flag combination");
+		return true;
 	}
 
-	return NF_ACCEPT;
+	return false;
+}
+
+static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
+			     unsigned int dataoff,
+			     const struct tcphdr *th)
+{
+	enum tcp_conntrack new_state;
+	struct net *net = nf_ct_net(ct);
+	const struct nf_tcp_net *tn = nf_tcp_pernet(net);
+	const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
+	const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
+
+	/* Don't need lock here: this conntrack not in circulation yet */
+	new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
+
+	/* Invalid: delete conntrack */
+	if (new_state >= TCP_CONNTRACK_MAX) {
+		pr_debug("nf_ct_tcp: invalid new deleting.\n");
+		return false;
+	}
+
+	if (new_state == TCP_CONNTRACK_SYN_SENT) {
+		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
+		/* SYN packet */
+		ct->proto.tcp.seen[0].td_end =
+			segment_seq_plus_len(ntohl(th->seq), skb->len,
+					     dataoff, th);
+		ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+		if (ct->proto.tcp.seen[0].td_maxwin == 0)
+			ct->proto.tcp.seen[0].td_maxwin = 1;
+		ct->proto.tcp.seen[0].td_maxend =
+			ct->proto.tcp.seen[0].td_end;
+
+		tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
+	} else if (tn->tcp_loose == 0) {
+		/* Don't try to pick up connections. */
+		return false;
+	} else {
+		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
+		/*
+		 * We are in the middle of a connection,
+		 * its history is lost for us.
+		 * Let's try to use the data from the packet.
+		 */
+		ct->proto.tcp.seen[0].td_end =
+			segment_seq_plus_len(ntohl(th->seq), skb->len,
+					     dataoff, th);
+		ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+		if (ct->proto.tcp.seen[0].td_maxwin == 0)
+			ct->proto.tcp.seen[0].td_maxwin = 1;
+		ct->proto.tcp.seen[0].td_maxend =
+			ct->proto.tcp.seen[0].td_end +
+			ct->proto.tcp.seen[0].td_maxwin;
+
+		/* We assume SACK and liberal window checking to handle
+		 * window scaling */
+		ct->proto.tcp.seen[0].flags =
+		ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
+					      IP_CT_TCP_FLAG_BE_LIBERAL;
+	}
+
+	/* tcp_packet will set them */
+	ct->proto.tcp.last_index = TCP_NONE_SET;
+
+	pr_debug("%s: sender end=%u maxend=%u maxwin=%u scale=%i "
+		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+		 __func__,
+		 sender->td_end, sender->td_maxend, sender->td_maxwin,
+		 sender->td_scale,
+		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+		 receiver->td_scale);
+	return true;
+}
+
+static bool nf_conntrack_tcp_established(const struct nf_conn *ct)
+{
+	return ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED &&
+	       test_bit(IPS_ASSURED_BIT, &ct->status);
 }
 
 /* Returns verdict for packet, or -1 for invalid. */
-static int tcp_packet(struct nf_conn *ct,
-		      const struct sk_buff *skb,
-		      unsigned int dataoff,
-		      enum ip_conntrack_info ctinfo)
+int nf_conntrack_tcp_packet(struct nf_conn *ct,
+			    struct sk_buff *skb,
+			    unsigned int dataoff,
+			    enum ip_conntrack_info ctinfo,
+			    const struct nf_hook_state *state)
 {
 	struct net *net = nf_ct_net(ct);
-	struct nf_tcp_net *tn = tcp_pernet(net);
+	struct nf_tcp_net *tn = nf_tcp_pernet(net);
 	struct nf_conntrack_tuple *tuple;
 	enum tcp_conntrack new_state, old_state;
 	unsigned int index, *timeouts;
@@ -786,7 +851,14 @@
 	unsigned long timeout;
 
 	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
-	BUG_ON(th == NULL);
+	if (th == NULL)
+		return -NF_ACCEPT;
+
+	if (tcp_error(th, skb, dataoff, state))
+		return -NF_ACCEPT;
+
+	if (!nf_ct_is_confirmed(ct) && !tcp_new(ct, skb, dataoff, th))
+		return -NF_ACCEPT;
 
 	spin_lock_bh(&ct->lock);
 	old_state = ct->proto.tcp.state;
@@ -963,16 +1035,38 @@
 			new_state = TCP_CONNTRACK_ESTABLISHED;
 		break;
 	case TCP_CONNTRACK_CLOSE:
-		if (index == TCP_RST_SET
-		    && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
-		    && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
-			/* Invalid RST  */
-			spin_unlock_bh(&ct->lock);
-			nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
-			return -NF_ACCEPT;
+		if (index != TCP_RST_SET)
+			break;
+
+		if (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) {
+			u32 seq = ntohl(th->seq);
+
+			if (before(seq, ct->proto.tcp.seen[!dir].td_maxack)) {
+				/* Invalid RST  */
+				spin_unlock_bh(&ct->lock);
+				nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
+				return -NF_ACCEPT;
+			}
+
+			if (!nf_conntrack_tcp_established(ct) ||
+			    seq == ct->proto.tcp.seen[!dir].td_maxack)
+				break;
+
+			/* Check if rst is part of train, such as
+			 *   foo:80 > bar:4379: P, 235946583:235946602(19) ack 42
+			 *   foo:80 > bar:4379: R, 235946602:235946602(0)  ack 42
+			 */
+			if (ct->proto.tcp.last_index == TCP_ACK_SET &&
+			    ct->proto.tcp.last_dir == dir &&
+			    seq == ct->proto.tcp.last_end)
+				break;
+
+			/* ... RST sequence number doesn't match exactly, keep
+			 * established state to allow a possible challenge ACK.
+			 */
+			new_state = old_state;
 		}
-		if (index == TCP_RST_SET
-		    && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
+		if (((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
 			 && ct->proto.tcp.last_index == TCP_SYN_SET)
 			|| (!test_bit(IPS_ASSURED_BIT, &ct->status)
 			    && ct->proto.tcp.last_index == TCP_ACK_SET))
@@ -988,7 +1082,7 @@
 			 * segments we ignored. */
 			goto in_window;
 		}
-		/* Just fall through */
+		break;
 	default:
 		/* Keep compilers happy. */
 		break;
@@ -1023,6 +1117,8 @@
 	if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
 	    timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
 		timeout = timeouts[TCP_CONNTRACK_RETRANS];
+	else if (unlikely(index == TCP_RST_SET))
+		timeout = timeouts[TCP_CONNTRACK_CLOSE];
 	else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
 		 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
 		 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
@@ -1067,82 +1163,6 @@
 	return NF_ACCEPT;
 }
 
-/* Called when a new connection for this protocol found. */
-static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
-		    unsigned int dataoff)
-{
-	enum tcp_conntrack new_state;
-	const struct tcphdr *th;
-	struct tcphdr _tcph;
-	struct net *net = nf_ct_net(ct);
-	struct nf_tcp_net *tn = tcp_pernet(net);
-	const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
-	const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
-
-	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
-	BUG_ON(th == NULL);
-
-	/* Don't need lock here: this conntrack not in circulation yet */
-	new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
-
-	/* Invalid: delete conntrack */
-	if (new_state >= TCP_CONNTRACK_MAX) {
-		pr_debug("nf_ct_tcp: invalid new deleting.\n");
-		return false;
-	}
-
-	if (new_state == TCP_CONNTRACK_SYN_SENT) {
-		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
-		/* SYN packet */
-		ct->proto.tcp.seen[0].td_end =
-			segment_seq_plus_len(ntohl(th->seq), skb->len,
-					     dataoff, th);
-		ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
-		if (ct->proto.tcp.seen[0].td_maxwin == 0)
-			ct->proto.tcp.seen[0].td_maxwin = 1;
-		ct->proto.tcp.seen[0].td_maxend =
-			ct->proto.tcp.seen[0].td_end;
-
-		tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
-	} else if (tn->tcp_loose == 0) {
-		/* Don't try to pick up connections. */
-		return false;
-	} else {
-		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
-		/*
-		 * We are in the middle of a connection,
-		 * its history is lost for us.
-		 * Let's try to use the data from the packet.
-		 */
-		ct->proto.tcp.seen[0].td_end =
-			segment_seq_plus_len(ntohl(th->seq), skb->len,
-					     dataoff, th);
-		ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
-		if (ct->proto.tcp.seen[0].td_maxwin == 0)
-			ct->proto.tcp.seen[0].td_maxwin = 1;
-		ct->proto.tcp.seen[0].td_maxend =
-			ct->proto.tcp.seen[0].td_end +
-			ct->proto.tcp.seen[0].td_maxwin;
-
-		/* We assume SACK and liberal window checking to handle
-		 * window scaling */
-		ct->proto.tcp.seen[0].flags =
-		ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
-					      IP_CT_TCP_FLAG_BE_LIBERAL;
-	}
-
-	/* tcp_packet will set them */
-	ct->proto.tcp.last_index = TCP_NONE_SET;
-
-	pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
-		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
-		 sender->td_end, sender->td_maxend, sender->td_maxwin,
-		 sender->td_scale,
-		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
-		 receiver->td_scale);
-	return true;
-}
-
 static bool tcp_can_early_drop(const struct nf_conn *ct)
 {
 	switch (ct->proto.tcp.state) {
@@ -1171,7 +1191,7 @@
 	struct nf_ct_tcp_flags tmp = {};
 
 	spin_lock_bh(&ct->lock);
-	nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
+	nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP);
 	if (!nest_parms)
 		goto nla_put_failure;
 
@@ -1207,7 +1227,7 @@
 	[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
 	[CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
 	[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
-	[CTA_PROTOINFO_TCP_FLAGS_REPLY]	    = { .len =  sizeof(struct nf_ct_tcp_flags) },
+	[CTA_PROTOINFO_TCP_FLAGS_REPLY]	    = { .len = sizeof(struct nf_ct_tcp_flags) },
 };
 
 #define TCP_NLATTR_SIZE	( \
@@ -1227,8 +1247,8 @@
 	if (!pattr)
 		return 0;
 
-	err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr,
-			       tcp_nla_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_TCP_MAX, pattr,
+					  tcp_nla_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -1287,7 +1307,7 @@
 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
 				     struct net *net, void *data)
 {
-	struct nf_tcp_net *tn = tcp_pernet(net);
+	struct nf_tcp_net *tn = nf_tcp_pernet(net);
 	unsigned int *timeouts = data;
 	int i;
 
@@ -1396,158 +1416,29 @@
 };
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
-#ifdef CONFIG_SYSCTL
-static struct ctl_table tcp_sysctl_table[] = {
-	{
-		.procname	= "nf_conntrack_tcp_timeout_syn_sent",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_tcp_timeout_syn_recv",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_tcp_timeout_established",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_tcp_timeout_fin_wait",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_tcp_timeout_close_wait",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_tcp_timeout_last_ack",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_tcp_timeout_time_wait",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_tcp_timeout_close",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_tcp_timeout_max_retrans",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_tcp_timeout_unacknowledged",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_tcp_loose",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname       = "nf_conntrack_tcp_be_liberal",
-		.maxlen         = sizeof(unsigned int),
-		.mode           = 0644,
-		.proc_handler   = proc_dointvec,
-	},
-	{
-		.procname	= "nf_conntrack_tcp_max_retrans",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{ }
-};
-#endif /* CONFIG_SYSCTL */
-
-static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
-				    struct nf_tcp_net *tn)
+void nf_conntrack_tcp_init_net(struct net *net)
 {
-#ifdef CONFIG_SYSCTL
-	if (pn->ctl_table)
-		return 0;
+	struct nf_tcp_net *tn = nf_tcp_pernet(net);
+	int i;
 
-	pn->ctl_table = kmemdup(tcp_sysctl_table,
-				sizeof(tcp_sysctl_table),
-				GFP_KERNEL);
-	if (!pn->ctl_table)
-		return -ENOMEM;
+	for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
+		tn->timeouts[i] = tcp_timeouts[i];
 
-	pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
-	pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
-	pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
-	pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
-	pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
-	pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
-	pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
-	pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
-	pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
-	pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
-	pn->ctl_table[10].data = &tn->tcp_loose;
-	pn->ctl_table[11].data = &tn->tcp_be_liberal;
-	pn->ctl_table[12].data = &tn->tcp_max_retrans;
-#endif
-	return 0;
+	/* timeouts[0] is unused, make it same as SYN_SENT so
+	 * ->timeouts[0] contains 'new' timeout, like udp or icmp.
+	 */
+	tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT];
+	tn->tcp_loose = nf_ct_tcp_loose;
+	tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
+	tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
 }
 
-static int tcp_init_net(struct net *net, u_int16_t proto)
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
 {
-	struct nf_tcp_net *tn = tcp_pernet(net);
-	struct nf_proto_net *pn = &tn->pn;
-
-	if (!pn->users) {
-		int i;
-
-		for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
-			tn->timeouts[i] = tcp_timeouts[i];
-
-		/* timeouts[0] is unused, make it same as SYN_SENT so
-		 * ->timeouts[0] contains 'new' timeout, like udp or icmp.
-		 */
-		tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT];
-		tn->tcp_loose = nf_ct_tcp_loose;
-		tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
-		tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
-	}
-
-	return tcp_kmemdup_sysctl_table(pn, tn);
-}
-
-static struct nf_proto_net *tcp_get_net_proto(struct net *net)
-{
-	return &net->ct.nf_ct_proto.tcp.pn;
-}
-
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
-{
-	.l3proto		= PF_INET,
 	.l4proto 		= IPPROTO_TCP,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack 	= tcp_print_conntrack,
 #endif
-	.packet 		= tcp_packet,
-	.new 			= tcp_new,
-	.error			= tcp_error,
 	.can_early_drop		= tcp_can_early_drop,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.to_nlattr		= tcp_to_nlattr,
@@ -1568,42 +1459,4 @@
 		.nla_policy	= tcp_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.init_net		= tcp_init_net,
-	.get_net_proto		= tcp_get_net_proto,
 };
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
-
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
-{
-	.l3proto		= PF_INET6,
-	.l4proto 		= IPPROTO_TCP,
-#ifdef CONFIG_NF_CONNTRACK_PROCFS
-	.print_conntrack 	= tcp_print_conntrack,
-#endif
-	.packet 		= tcp_packet,
-	.new 			= tcp_new,
-	.error			= tcp_error,
-	.can_early_drop		= tcp_can_early_drop,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	.nlattr_size		= TCP_NLATTR_SIZE,
-	.to_nlattr		= tcp_to_nlattr,
-	.from_nlattr		= nlattr_to_tcp,
-	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
-	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
-	.nlattr_tuple_size	= tcp_nlattr_tuple_size,
-	.nla_policy		= nf_ct_port_nla_policy,
-#endif
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-	.ctnl_timeout		= {
-		.nlattr_to_obj	= tcp_timeout_nlattr_to_obj,
-		.obj_to_nlattr	= tcp_timeout_obj_to_nlattr,
-		.nlattr_max	= CTA_TIMEOUT_TCP_MAX,
-		.obj_size	= sizeof(unsigned int) *
-					TCP_CONNTRACK_TIMEOUT_MAX,
-		.nla_policy	= tcp_timeout_nla_policy,
-	},
-#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.init_net		= tcp_init_net,
-	.get_net_proto		= tcp_get_net_proto,
-};
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);

diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 3065fb8..7365b43 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
  * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/types.h>
@@ -29,27 +26,165 @@
 
 static const unsigned int udp_timeouts[UDP_CT_MAX] = {
 	[UDP_CT_UNREPLIED]	= 30*HZ,
-	[UDP_CT_REPLIED]	= 180*HZ,
+	[UDP_CT_REPLIED]	= 120*HZ,
 };
 
-static inline struct nf_udp_net *udp_pernet(struct net *net)
-{
-	return &net->ct.nf_ct_proto.udp;
-}
-
 static unsigned int *udp_get_timeouts(struct net *net)
 {
-	return udp_pernet(net)->timeouts;
+	return nf_udp_pernet(net)->timeouts;
+}
+
+static void udp_error_log(const struct sk_buff *skb,
+			  const struct nf_hook_state *state,
+			  const char *msg)
+{
+	nf_l4proto_log_invalid(skb, state->net, state->pf,
+			       IPPROTO_UDP, "%s", msg);
+}
+
+static bool udp_error(struct sk_buff *skb,
+		      unsigned int dataoff,
+		      const struct nf_hook_state *state)
+{
+	unsigned int udplen = skb->len - dataoff;
+	const struct udphdr *hdr;
+	struct udphdr _hdr;
+
+	/* Header is too small? */
+	hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+	if (!hdr) {
+		udp_error_log(skb, state, "short packet");
+		return true;
+	}
+
+	/* Truncated/malformed packets */
+	if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
+		udp_error_log(skb, state, "truncated/malformed packet");
+		return true;
+	}
+
+	/* Packet with no checksum */
+	if (!hdr->check)
+		return false;
+
+	/* Checksum invalid? Ignore.
+	 * We skip checking packets on the outgoing path
+	 * because the checksum is assumed to be correct.
+	 * FIXME: Source route IP option packets --RR */
+	if (state->hook == NF_INET_PRE_ROUTING &&
+	    state->net->ct.sysctl_checksum &&
+	    nf_checksum(skb, state->hook, dataoff, IPPROTO_UDP, state->pf)) {
+		udp_error_log(skb, state, "bad checksum");
+		return true;
+	}
+
+	return false;
 }
 
 /* Returns verdict for packet, and may modify conntracktype */
-static int udp_packet(struct nf_conn *ct,
-		      const struct sk_buff *skb,
-		      unsigned int dataoff,
-		      enum ip_conntrack_info ctinfo)
+int nf_conntrack_udp_packet(struct nf_conn *ct,
+			    struct sk_buff *skb,
+			    unsigned int dataoff,
+			    enum ip_conntrack_info ctinfo,
+			    const struct nf_hook_state *state)
 {
 	unsigned int *timeouts;
 
+	if (udp_error(skb, dataoff, state))
+		return -NF_ACCEPT;
+
+	timeouts = nf_ct_timeout_lookup(ct);
+	if (!timeouts)
+		timeouts = udp_get_timeouts(nf_ct_net(ct));
+
+	if (!nf_ct_is_confirmed(ct))
+		ct->proto.udp.stream_ts = 2 * HZ + jiffies;
+
+	/* If we've seen traffic both ways, this is some kind of UDP
+	 * stream. Set Assured.
+	 */
+	if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+		unsigned long extra = timeouts[UDP_CT_UNREPLIED];
+
+		/* Still active after two seconds? Extend timeout. */
+		if (time_after(jiffies, ct->proto.udp.stream_ts))
+			extra = timeouts[UDP_CT_REPLIED];
+
+		nf_ct_refresh_acct(ct, ctinfo, skb, extra);
+
+		/* Also, more likely to be important, and not a probe */
+		if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
+			nf_conntrack_event_cache(IPCT_ASSURED, ct);
+	} else {
+		nf_ct_refresh_acct(ct, ctinfo, skb,
+				   timeouts[UDP_CT_UNREPLIED]);
+	}
+	return NF_ACCEPT;
+}
+
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+static void udplite_error_log(const struct sk_buff *skb,
+			      const struct nf_hook_state *state,
+			      const char *msg)
+{
+	nf_l4proto_log_invalid(skb, state->net, state->pf,
+			       IPPROTO_UDPLITE, "%s", msg);
+}
+
+static bool udplite_error(struct sk_buff *skb,
+			  unsigned int dataoff,
+			  const struct nf_hook_state *state)
+{
+	unsigned int udplen = skb->len - dataoff;
+	const struct udphdr *hdr;
+	struct udphdr _hdr;
+	unsigned int cscov;
+
+	/* Header is too small? */
+	hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+	if (!hdr) {
+		udplite_error_log(skb, state, "short packet");
+		return true;
+	}
+
+	cscov = ntohs(hdr->len);
+	if (cscov == 0) {
+		cscov = udplen;
+	} else if (cscov < sizeof(*hdr) || cscov > udplen) {
+		udplite_error_log(skb, state, "invalid checksum coverage");
+		return true;
+	}
+
+	/* UDPLITE mandates checksums */
+	if (!hdr->check) {
+		udplite_error_log(skb, state, "checksum missing");
+		return true;
+	}
+
+	/* Checksum invalid? Ignore. */
+	if (state->hook == NF_INET_PRE_ROUTING &&
+	    state->net->ct.sysctl_checksum &&
+	    nf_checksum_partial(skb, state->hook, dataoff, cscov, IPPROTO_UDP,
+				state->pf)) {
+		udplite_error_log(skb, state, "bad checksum");
+		return true;
+	}
+
+	return false;
+}
+
+/* Returns verdict for packet, and may modify conntracktype */
+int nf_conntrack_udplite_packet(struct nf_conn *ct,
+				struct sk_buff *skb,
+				unsigned int dataoff,
+				enum ip_conntrack_info ctinfo,
+				const struct nf_hook_state *state)
+{
+	unsigned int *timeouts;
+
+	if (udplite_error(skb, dataoff, state))
+		return -NF_ACCEPT;
+
 	timeouts = nf_ct_timeout_lookup(ct);
 	if (!timeouts)
 		timeouts = udp_get_timeouts(nf_ct_net(ct));
@@ -68,109 +203,8 @@
 	}
 	return NF_ACCEPT;
 }
-
-/* Called when a new connection for this protocol found. */
-static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
-		    unsigned int dataoff)
-{
-	return true;
-}
-
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-static void udplite_error_log(const struct sk_buff *skb, struct net *net,
-			      u8 pf, const char *msg)
-{
-	nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDPLITE, "%s", msg);
-}
-
-static int udplite_error(struct net *net, struct nf_conn *tmpl,
-			 struct sk_buff *skb,
-			 unsigned int dataoff,
-			 u8 pf, unsigned int hooknum)
-{
-	unsigned int udplen = skb->len - dataoff;
-	const struct udphdr *hdr;
-	struct udphdr _hdr;
-	unsigned int cscov;
-
-	/* Header is too small? */
-	hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
-	if (!hdr) {
-		udplite_error_log(skb, net, pf, "short packet");
-		return -NF_ACCEPT;
-	}
-
-	cscov = ntohs(hdr->len);
-	if (cscov == 0) {
-		cscov = udplen;
-	} else if (cscov < sizeof(*hdr) || cscov > udplen) {
-		udplite_error_log(skb, net, pf, "invalid checksum coverage");
-		return -NF_ACCEPT;
-	}
-
-	/* UDPLITE mandates checksums */
-	if (!hdr->check) {
-		udplite_error_log(skb, net, pf, "checksum missing");
-		return -NF_ACCEPT;
-	}
-
-	/* Checksum invalid? Ignore. */
-	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
-	    nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
-				pf)) {
-		udplite_error_log(skb, net, pf, "bad checksum");
-		return -NF_ACCEPT;
-	}
-
-	return NF_ACCEPT;
-}
 #endif
 
-static void udp_error_log(const struct sk_buff *skb, struct net *net,
-			  u8 pf, const char *msg)
-{
-	nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDP, "%s", msg);
-}
-
-static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
-		     unsigned int dataoff,
-		     u_int8_t pf,
-		     unsigned int hooknum)
-{
-	unsigned int udplen = skb->len - dataoff;
-	const struct udphdr *hdr;
-	struct udphdr _hdr;
-
-	/* Header is too small? */
-	hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
-	if (hdr == NULL) {
-		udp_error_log(skb, net, pf, "short packet");
-		return -NF_ACCEPT;
-	}
-
-	/* Truncated/malformed packets */
-	if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
-		udp_error_log(skb, net, pf, "truncated/malformed packet");
-		return -NF_ACCEPT;
-	}
-
-	/* Packet with no checksum */
-	if (!hdr->check)
-		return NF_ACCEPT;
-
-	/* Checksum invalid? Ignore.
-	 * We skip checking packets on the outgoing path
-	 * because the checksum is assumed to be correct.
-	 * FIXME: Source route IP option packets --RR */
-	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
-	    nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
-		udp_error_log(skb, net, pf, "bad checksum");
-		return -NF_ACCEPT;
-	}
-
-	return NF_ACCEPT;
-}
-
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 
 #include <linux/netfilter/nfnetlink.h>
@@ -180,7 +214,7 @@
 				     struct net *net, void *data)
 {
 	unsigned int *timeouts = data;
-	struct nf_udp_net *un = udp_pernet(net);
+	struct nf_udp_net *un = nf_udp_pernet(net);
 
 	if (!timeouts)
 		timeouts = un->timeouts;
@@ -223,69 +257,19 @@
 };
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
-#ifdef CONFIG_SYSCTL
-static struct ctl_table udp_sysctl_table[] = {
-	{
-		.procname	= "nf_conntrack_udp_timeout",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "nf_conntrack_udp_timeout_stream",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{ }
-};
-#endif /* CONFIG_SYSCTL */
-
-static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn,
-				    struct nf_udp_net *un)
+void nf_conntrack_udp_init_net(struct net *net)
 {
-#ifdef CONFIG_SYSCTL
-	if (pn->ctl_table)
-		return 0;
-	pn->ctl_table = kmemdup(udp_sysctl_table,
-				sizeof(udp_sysctl_table),
-				GFP_KERNEL);
-	if (!pn->ctl_table)
-		return -ENOMEM;
-	pn->ctl_table[0].data = &un->timeouts[UDP_CT_UNREPLIED];
-	pn->ctl_table[1].data = &un->timeouts[UDP_CT_REPLIED];
-#endif
-	return 0;
+	struct nf_udp_net *un = nf_udp_pernet(net);
+	int i;
+
+	for (i = 0; i < UDP_CT_MAX; i++)
+		un->timeouts[i] = udp_timeouts[i];
 }
 
-static int udp_init_net(struct net *net, u_int16_t proto)
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
 {
-	struct nf_udp_net *un = udp_pernet(net);
-	struct nf_proto_net *pn = &un->pn;
-
-	if (!pn->users) {
-		int i;
-
-		for (i = 0; i < UDP_CT_MAX; i++)
-			un->timeouts[i] = udp_timeouts[i];
-	}
-
-	return udp_kmemdup_sysctl_table(pn, un);
-}
-
-static struct nf_proto_net *udp_get_net_proto(struct net *net)
-{
-	return &net->ct.nf_ct_proto.udp.pn;
-}
-
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 =
-{
-	.l3proto		= PF_INET,
 	.l4proto		= IPPROTO_UDP,
 	.allow_clash		= true,
-	.packet			= udp_packet,
-	.new			= udp_new,
-	.error			= udp_error,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
 	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
@@ -301,20 +285,13 @@
 		.nla_policy	= udp_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.init_net		= udp_init_net,
-	.get_net_proto		= udp_get_net_proto,
 };
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4);
 
 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite =
 {
-	.l3proto		= PF_INET,
 	.l4proto		= IPPROTO_UDPLITE,
 	.allow_clash		= true,
-	.packet			= udp_packet,
-	.new			= udp_new,
-	.error			= udplite_error,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
 	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
@@ -330,66 +307,5 @@
 		.nla_policy	= udp_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.init_net		= udp_init_net,
-	.get_net_proto		= udp_get_net_proto,
 };
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4);
-#endif
-
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 =
-{
-	.l3proto		= PF_INET6,
-	.l4proto		= IPPROTO_UDP,
-	.allow_clash		= true,
-	.packet			= udp_packet,
-	.new			= udp_new,
-	.error			= udp_error,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
-	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
-	.nlattr_tuple_size	= nf_ct_port_nlattr_tuple_size,
-	.nla_policy		= nf_ct_port_nla_policy,
-#endif
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-	.ctnl_timeout		= {
-		.nlattr_to_obj	= udp_timeout_nlattr_to_obj,
-		.obj_to_nlattr	= udp_timeout_obj_to_nlattr,
-		.nlattr_max	= CTA_TIMEOUT_UDP_MAX,
-		.obj_size	= sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX,
-		.nla_policy	= udp_timeout_nla_policy,
-	},
-#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.init_net		= udp_init_net,
-	.get_net_proto		= udp_get_net_proto,
-};
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6);
-
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
-{
-	.l3proto		= PF_INET6,
-	.l4proto		= IPPROTO_UDPLITE,
-	.allow_clash		= true,
-	.packet			= udp_packet,
-	.new			= udp_new,
-	.error			= udplite_error,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
-	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
-	.nlattr_tuple_size	= nf_ct_port_nlattr_tuple_size,
-	.nla_policy		= nf_ct_port_nla_policy,
-#endif
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-	.ctnl_timeout		= {
-		.nlattr_to_obj	= udp_timeout_nlattr_to_obj,
-		.obj_to_nlattr	= udp_timeout_obj_to_nlattr,
-		.nlattr_max	= CTA_TIMEOUT_UDP_MAX,
-		.obj_size	= sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX,
-		.nla_policy	= udp_timeout_nla_policy,
-	},
-#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.init_net		= udp_init_net,
-	.get_net_proto		= udp_get_net_proto,
-};
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6);
 #endif

diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index 5072ff9..1aebd65 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* SANE connection tracking helper
  * (SANE = Scanner Access Now Easy)
  * For documentation about the SANE network protocol see
@@ -11,10 +12,6 @@
  *  (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
  *  (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
  *  (C) 2003 Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -30,10 +27,12 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <linux/netfilter/nf_conntrack_sane.h>
 
+#define HELPER_NAME "sane"
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Michal Schmidt <mschmidt@redhat.com>");
 MODULE_DESCRIPTION("SANE connection tracking helper");
-MODULE_ALIAS_NFCT_HELPER("sane");
+MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
 
 static char *sane_buffer;
 
@@ -154,7 +153,7 @@
 	nf_ct_dump_tuple(&exp->tuple);
 
 	/* Can't expect this?  Best to drop packet now. */
-	if (nf_ct_expect_related(exp) != 0) {
+	if (nf_ct_expect_related(exp, 0) != 0) {
 		nf_ct_helper_log(skb, ct, "cannot add expectation");
 		ret = NF_DROP;
 	}
@@ -195,12 +194,12 @@
 	/* FIXME should be configurable whether IPv4 and IPv6 connections
 		 are tracked or not - YK */
 	for (i = 0; i < ports_c; i++) {
-		nf_ct_helper_init(&sane[2 * i], AF_INET, IPPROTO_TCP, "sane",
-				  SANE_PORT, ports[i], ports[i],
+		nf_ct_helper_init(&sane[2 * i], AF_INET, IPPROTO_TCP,
+				  HELPER_NAME, SANE_PORT, ports[i], ports[i],
 				  &sane_exp_policy, 0, help, NULL,
 				  THIS_MODULE);
-		nf_ct_helper_init(&sane[2 * i + 1], AF_INET6, IPPROTO_TCP, "sane",
-				  SANE_PORT, ports[i], ports[i],
+		nf_ct_helper_init(&sane[2 * i + 1], AF_INET6, IPPROTO_TCP,
+				  HELPER_NAME, SANE_PORT, ports[i], ports[i],
 				  &sane_exp_policy, 0, help, NULL,
 				  THIS_MODULE);
 	}

diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c
index a975efd..3066449 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/types.h>
 #include <linux/netfilter.h>
 #include <net/tcp.h>
@@ -115,19 +116,20 @@
 /* TCP SACK sequence number adjustment */
 static unsigned int nf_ct_sack_adjust(struct sk_buff *skb,
 				      unsigned int protoff,
-				      struct tcphdr *tcph,
 				      struct nf_conn *ct,
 				      enum ip_conntrack_info ctinfo)
 {
-	unsigned int dir, optoff, optend;
+	struct tcphdr *tcph = (void *)skb->data + protoff;
 	struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+	unsigned int dir, optoff, optend;
 
 	optoff = protoff + sizeof(struct tcphdr);
 	optend = protoff + tcph->doff * 4;
 
-	if (!skb_make_writable(skb, optend))
+	if (skb_ensure_writable(skb, optend))
 		return 0;
 
+	tcph = (void *)skb->data + protoff;
 	dir = CTINFO2DIR(ctinfo);
 
 	while (optoff < optend) {
@@ -174,7 +176,7 @@
 	this_way  = &seqadj->seq[dir];
 	other_way = &seqadj->seq[!dir];
 
-	if (!skb_make_writable(skb, protoff + sizeof(*tcph)))
+	if (skb_ensure_writable(skb, protoff + sizeof(*tcph)))
 		return 0;
 
 	tcph = (void *)skb->data + protoff;
@@ -207,7 +209,7 @@
 		 ntohl(newack));
 	tcph->ack_seq = newack;
 
-	res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo);
+	res = nf_ct_sack_adjust(skb, protoff, ct, ctinfo);
 out:
 	spin_unlock_bh(&ct->lock);
 

diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index c8d2b66..b83dc9b 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* SIP extension for IP connection tracking.
  *
  * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
  * based on RR's ip_conntrack_ftp.c and other modules.
  * (C) 2007 United Security Providers
  * (C) 2007, 2008 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -20,6 +17,8 @@
 #include <linux/udp.h>
 #include <linux/tcp.h>
 #include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
@@ -28,11 +27,13 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <linux/netfilter/nf_conntrack_sip.h>
 
+#define HELPER_NAME "sip"
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
 MODULE_DESCRIPTION("SIP connection tracking helper");
 MODULE_ALIAS("ip_conntrack_sip");
-MODULE_ALIAS_NFCT_HELPER("sip");
+MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
 
 #define MAX_PORTS	8
 static unsigned short ports[MAX_PORTS];
@@ -54,6 +55,11 @@
 MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling "
 				   "endpoints only (default 1)");
 
+static int sip_external_media __read_mostly = 0;
+module_param(sip_external_media, int, 0600);
+MODULE_PARM_DESC(sip_external_media, "Expect Media streams between external "
+				     "endpoints (default 0)");
+
 const struct nf_nat_sip_hooks *nf_nat_sip_hooks;
 EXPORT_SYMBOL_GPL(nf_nat_sip_hooks);
 
@@ -861,6 +867,36 @@
 		if (!nf_inet_addr_cmp(daddr, &ct->tuplehash[dir].tuple.src.u3))
 			return NF_ACCEPT;
 		saddr = &ct->tuplehash[!dir].tuple.src.u3;
+	} else if (sip_external_media) {
+		struct net_device *dev = skb_dst(skb)->dev;
+		struct net *net = dev_net(dev);
+		struct flowi fl;
+		struct dst_entry *dst = NULL;
+
+		memset(&fl, 0, sizeof(fl));
+
+		switch (nf_ct_l3num(ct)) {
+			case NFPROTO_IPV4:
+				fl.u.ip4.daddr = daddr->ip;
+				nf_ip_route(net, &dst, &fl, false);
+				break;
+
+			case NFPROTO_IPV6:
+				fl.u.ip6.daddr = daddr->in6;
+				nf_ip6_route(net, &dst, &fl, false);
+				break;
+		}
+
+		/* Don't predict any conntracks when media endpoint is reachable
+		 * through the same interface as the signalling peer.
+		 */
+		if (dst) {
+			bool external_media = (dst->dev == dev);
+
+			dst_release(dst);
+			if (external_media)
+				return NF_ACCEPT;
+		}
 	}
 
 	/* We need to check whether the registration exists before attempting
@@ -891,7 +927,7 @@
 		    nfct_help(exp->master)->helper != nfct_help(ct)->helper ||
 		    exp->class != class)
 			break;
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
 		if (!direct_rtp &&
 		    (!nf_inet_addr_cmp(&exp->saved_addr, &exp->tuple.dst.u3) ||
 		     exp->saved_proto.udp.port != exp->tuple.dst.u.udp.port) &&
@@ -941,11 +977,15 @@
 		/* -EALREADY handling works around end-points that send
 		 * SDP messages with identical port but different media type,
 		 * we pretend expectation was set up.
+		 * It also works in the case that SDP messages are sent with
+		 * identical expect tuples but for different master conntracks.
 		 */
-		int errp = nf_ct_expect_related(rtp_exp);
+		int errp = nf_ct_expect_related(rtp_exp,
+						NF_CT_EXP_F_SKIP_MASTER);
 
 		if (errp == 0 || errp == -EALREADY) {
-			int errcp = nf_ct_expect_related(rtcp_exp);
+			int errcp = nf_ct_expect_related(rtcp_exp,
+						NF_CT_EXP_F_SKIP_MASTER);
 
 			if (errcp == 0 || errcp == -EALREADY)
 				ret = NF_ACCEPT;
@@ -1260,7 +1300,7 @@
 		ret = hooks->expect(skb, protoff, dataoff, dptr, datalen,
 				    exp, matchoff, matchlen);
 	else {
-		if (nf_ct_expect_related(exp) != 0) {
+		if (nf_ct_expect_related(exp, 0) != 0) {
 			nf_ct_helper_log(skb, ct, "cannot add expectation");
 			ret = NF_DROP;
 		} else
@@ -1632,21 +1672,21 @@
 		ports[ports_c++] = SIP_PORT;
 
 	for (i = 0; i < ports_c; i++) {
-		nf_ct_helper_init(&sip[4 * i], AF_INET, IPPROTO_UDP, "sip",
-				  SIP_PORT, ports[i], i, sip_exp_policy,
-				  SIP_EXPECT_MAX, sip_help_udp,
+		nf_ct_helper_init(&sip[4 * i], AF_INET, IPPROTO_UDP,
+				  HELPER_NAME, SIP_PORT, ports[i], i,
+				  sip_exp_policy, SIP_EXPECT_MAX, sip_help_udp,
 				  NULL, THIS_MODULE);
-		nf_ct_helper_init(&sip[4 * i + 1], AF_INET, IPPROTO_TCP, "sip",
-				  SIP_PORT, ports[i], i, sip_exp_policy,
-				  SIP_EXPECT_MAX, sip_help_tcp,
+		nf_ct_helper_init(&sip[4 * i + 1], AF_INET, IPPROTO_TCP,
+				  HELPER_NAME, SIP_PORT, ports[i], i,
+				  sip_exp_policy, SIP_EXPECT_MAX, sip_help_tcp,
 				  NULL, THIS_MODULE);
-		nf_ct_helper_init(&sip[4 * i + 2], AF_INET6, IPPROTO_UDP, "sip",
-				  SIP_PORT, ports[i], i, sip_exp_policy,
-				  SIP_EXPECT_MAX, sip_help_udp,
+		nf_ct_helper_init(&sip[4 * i + 2], AF_INET6, IPPROTO_UDP,
+				  HELPER_NAME, SIP_PORT, ports[i], i,
+				  sip_exp_policy, SIP_EXPECT_MAX, sip_help_udp,
 				  NULL, THIS_MODULE);
-		nf_ct_helper_init(&sip[4 * i + 3], AF_INET6, IPPROTO_TCP, "sip",
-				  SIP_PORT, ports[i], i, sip_exp_policy,
-				  SIP_EXPECT_MAX, sip_help_tcp,
+		nf_ct_helper_init(&sip[4 * i + 3], AF_INET6, IPPROTO_TCP,
+				  HELPER_NAME, SIP_PORT, ports[i], i,
+				  sip_exp_policy, SIP_EXPECT_MAX, sip_help_tcp,
 				  NULL, THIS_MODULE);
 	}
 

diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c
index b8e0a22..daacf20 100644
--- a/net/netfilter/nf_conntrack_snmp.c
+++ b/net/netfilter/nf_conntrack_snmp.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *      SNMP service broadcast connection tracking helper
  *
  *      (c) 2011 Jiri Olsa <jolsa@redhat.com>
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
 #include <linux/module.h>

diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 13279f6..410809c 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c

@@ -24,6 +24,10 @@
 #include <net/netfilter/nf_conntrack_timestamp.h>
 #include <linux/rculist_nulls.h>
 
+static bool enable_hooks __read_mostly;
+MODULE_PARM_DESC(enable_hooks, "Always enable conntrack hooks");
+module_param(enable_hooks, bool, 0000);
+
 unsigned int nf_conntrack_net_id __read_mostly;
 
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
@@ -267,6 +271,24 @@
 	return "unknown";
 }
 
+static unsigned int
+seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir)
+{
+	struct nf_conn_acct *acct;
+	struct nf_conn_counter *counter;
+
+	acct = nf_conn_acct_find(ct);
+	if (!acct)
+		return 0;
+
+	counter = acct->counter;
+	seq_printf(s, "packets=%llu bytes=%llu ",
+		   (unsigned long long)atomic64_read(&counter[dir].packets),
+		   (unsigned long long)atomic64_read(&counter[dir].bytes));
+
+	return 0;
+}
+
 /* return 0 on success, 1 in case of error */
 static int ct_seq_show(struct seq_file *s, void *v)
 {
@@ -292,8 +314,7 @@
 	if (!net_eq(nf_ct_net(ct), net))
 		goto release;
 
-	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
-	WARN_ON(!l4proto);
+	l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
 
 	ret = -ENOSPC;
 	seq_printf(s, "%-8s %u %-8s %u ",
@@ -514,36 +535,102 @@
 
 static struct ctl_table_header *nf_ct_netfilter_header;
 
+enum nf_ct_sysctl_index {
+	NF_SYSCTL_CT_MAX,
+	NF_SYSCTL_CT_COUNT,
+	NF_SYSCTL_CT_BUCKETS,
+	NF_SYSCTL_CT_CHECKSUM,
+	NF_SYSCTL_CT_LOG_INVALID,
+	NF_SYSCTL_CT_EXPECT_MAX,
+	NF_SYSCTL_CT_ACCT,
+	NF_SYSCTL_CT_HELPER,
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	NF_SYSCTL_CT_EVENTS,
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+	NF_SYSCTL_CT_TIMESTAMP,
+#endif
+	NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_SENT,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_RECV,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_ESTABLISHED,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_FIN_WAIT,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE_WAIT,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_LAST_ACK,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_TIME_WAIT,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
+	NF_SYSCTL_CT_PROTO_TCP_LOOSE,
+	NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
+	NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_CLOSED,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_WAIT,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_ECHOED,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_ESTABLISHED,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_SENT,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_RECD,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_RESPOND,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_PARTOPEN,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_OPEN,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSEREQ,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSING,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_TIMEWAIT,
+	NF_SYSCTL_CT_PROTO_DCCP_LOOSE,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	NF_SYSCTL_CT_PROTO_TIMEOUT_GRE,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM,
+#endif
+
+	__NF_SYSCTL_CT_LAST_SYSCTL,
+};
+
+#define NF_SYSCTL_CT_LAST_SYSCTL (__NF_SYSCTL_CT_LAST_SYSCTL + 1)
+
 static struct ctl_table nf_ct_sysctl_table[] = {
-	{
+	[NF_SYSCTL_CT_MAX] = {
 		.procname	= "nf_conntrack_max",
 		.data		= &nf_conntrack_max,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-	{
+	[NF_SYSCTL_CT_COUNT] = {
 		.procname	= "nf_conntrack_count",
 		.data		= &init_net.ct.count,
 		.maxlen		= sizeof(int),
 		.mode		= 0444,
 		.proc_handler	= proc_dointvec,
 	},
-	{
+	[NF_SYSCTL_CT_BUCKETS] = {
 		.procname       = "nf_conntrack_buckets",
 		.data           = &nf_conntrack_htable_size_user,
 		.maxlen         = sizeof(unsigned int),
 		.mode           = 0644,
 		.proc_handler   = nf_conntrack_hash_sysctl,
 	},
-	{
+	[NF_SYSCTL_CT_CHECKSUM] = {
 		.procname	= "nf_conntrack_checksum",
 		.data		= &init_net.ct.sysctl_checksum,
-		.maxlen		= sizeof(unsigned int),
+		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1 	= SYSCTL_ZERO,
+		.extra2 	= SYSCTL_ONE,
 	},
-	{
+	[NF_SYSCTL_CT_LOG_INVALID] = {
 		.procname	= "nf_conntrack_log_invalid",
 		.data		= &init_net.ct.sysctl_log_invalid,
 		.maxlen		= sizeof(unsigned int),
@@ -552,14 +639,288 @@
 		.extra1		= &log_invalid_proto_min,
 		.extra2		= &log_invalid_proto_max,
 	},
-	{
+	[NF_SYSCTL_CT_EXPECT_MAX] = {
 		.procname	= "nf_conntrack_expect_max",
 		.data		= &nf_ct_expect_max,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-	{ }
+	[NF_SYSCTL_CT_ACCT] = {
+		.procname	= "nf_conntrack_acct",
+		.data		= &init_net.ct.sysctl_acct,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1 	= SYSCTL_ZERO,
+		.extra2 	= SYSCTL_ONE,
+	},
+	[NF_SYSCTL_CT_HELPER] = {
+		.procname	= "nf_conntrack_helper",
+		.data		= &init_net.ct.sysctl_auto_assign_helper,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1 	= SYSCTL_ZERO,
+		.extra2 	= SYSCTL_ONE,
+	},
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	[NF_SYSCTL_CT_EVENTS] = {
+		.procname	= "nf_conntrack_events",
+		.data		= &init_net.ct.sysctl_events,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1 	= SYSCTL_ZERO,
+		.extra2 	= SYSCTL_ONE,
+	},
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+	[NF_SYSCTL_CT_TIMESTAMP] = {
+		.procname	= "nf_conntrack_timestamp",
+		.data		= &init_net.ct.sysctl_tstamp,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1 	= SYSCTL_ZERO,
+		.extra2 	= SYSCTL_ONE,
+	},
+#endif
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC] = {
+		.procname	= "nf_conntrack_generic_timeout",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_SENT] = {
+		.procname	= "nf_conntrack_tcp_timeout_syn_sent",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_RECV] = {
+		.procname	= "nf_conntrack_tcp_timeout_syn_recv",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_ESTABLISHED] = {
+		.procname	= "nf_conntrack_tcp_timeout_established",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_FIN_WAIT] = {
+		.procname	= "nf_conntrack_tcp_timeout_fin_wait",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE_WAIT] = {
+		.procname	= "nf_conntrack_tcp_timeout_close_wait",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_LAST_ACK] = {
+		.procname	= "nf_conntrack_tcp_timeout_last_ack",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_TIME_WAIT] = {
+		.procname	= "nf_conntrack_tcp_timeout_time_wait",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE] = {
+		.procname	= "nf_conntrack_tcp_timeout_close",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS] = {
+		.procname	= "nf_conntrack_tcp_timeout_max_retrans",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK] = {
+		.procname	= "nf_conntrack_tcp_timeout_unacknowledged",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
+		.procname	= "nf_conntrack_tcp_loose",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1 	= SYSCTL_ZERO,
+		.extra2 	= SYSCTL_ONE,
+	},
+	[NF_SYSCTL_CT_PROTO_TCP_LIBERAL] = {
+		.procname       = "nf_conntrack_tcp_be_liberal",
+		.maxlen         = sizeof(int),
+		.mode           = 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1 	= SYSCTL_ZERO,
+		.extra2 	= SYSCTL_ONE,
+	},
+	[NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS] = {
+		.procname	= "nf_conntrack_tcp_max_retrans",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP] = {
+		.procname	= "nf_conntrack_udp_timeout",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM] = {
+		.procname	= "nf_conntrack_udp_timeout_stream",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
+		.procname	= "nf_conntrack_icmp_timeout",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6] = {
+		.procname	= "nf_conntrack_icmpv6_timeout",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_CLOSED] = {
+		.procname	= "nf_conntrack_sctp_timeout_closed",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_WAIT] = {
+		.procname	= "nf_conntrack_sctp_timeout_cookie_wait",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_ECHOED] = {
+		.procname	= "nf_conntrack_sctp_timeout_cookie_echoed",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_ESTABLISHED] = {
+		.procname	= "nf_conntrack_sctp_timeout_established",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_SENT] = {
+		.procname	= "nf_conntrack_sctp_timeout_shutdown_sent",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_RECD] = {
+		.procname	= "nf_conntrack_sctp_timeout_shutdown_recd",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT] = {
+		.procname	= "nf_conntrack_sctp_timeout_shutdown_ack_sent",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT] = {
+		.procname	= "nf_conntrack_sctp_timeout_heartbeat_sent",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED] = {
+		.procname       = "nf_conntrack_sctp_timeout_heartbeat_acked",
+		.maxlen         = sizeof(unsigned int),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec_jiffies,
+	},
+#endif
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST] = {
+		.procname	= "nf_conntrack_dccp_timeout_request",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_RESPOND] = {
+		.procname	= "nf_conntrack_dccp_timeout_respond",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_PARTOPEN] = {
+		.procname	= "nf_conntrack_dccp_timeout_partopen",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_OPEN] = {
+		.procname	= "nf_conntrack_dccp_timeout_open",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSEREQ] = {
+		.procname	= "nf_conntrack_dccp_timeout_closereq",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSING] = {
+		.procname	= "nf_conntrack_dccp_timeout_closing",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_TIMEWAIT] = {
+		.procname	= "nf_conntrack_dccp_timeout_timewait",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_DCCP_LOOSE] = {
+		.procname	= "nf_conntrack_dccp_loose",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1 	= SYSCTL_ZERO,
+		.extra2 	= SYSCTL_ONE,
+	},
+#endif
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_GRE] = {
+		.procname       = "nf_conntrack_gre_timeout",
+		.maxlen         = sizeof(unsigned int),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM] = {
+		.procname       = "nf_conntrack_gre_timeout_stream",
+		.maxlen         = sizeof(unsigned int),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec_jiffies,
+	},
+#endif
+	{}
 };
 
 static struct ctl_table nf_ct_netfilter_table[] = {
@@ -573,25 +934,141 @@
 	{ }
 };
 
+static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
+						    struct ctl_table *table)
+{
+	struct nf_tcp_net *tn = nf_tcp_pernet(net);
+
+#define XASSIGN(XNAME, tn) \
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_ ## XNAME].data = \
+			&(tn)->timeouts[TCP_CONNTRACK_ ## XNAME]
+
+	XASSIGN(SYN_SENT, tn);
+	XASSIGN(SYN_RECV, tn);
+	XASSIGN(ESTABLISHED, tn);
+	XASSIGN(FIN_WAIT, tn);
+	XASSIGN(CLOSE_WAIT, tn);
+	XASSIGN(LAST_ACK, tn);
+	XASSIGN(TIME_WAIT, tn);
+	XASSIGN(CLOSE, tn);
+	XASSIGN(RETRANS, tn);
+	XASSIGN(UNACK, tn);
+#undef XASSIGN
+#define XASSIGN(XNAME, rval) \
+	table[NF_SYSCTL_CT_PROTO_TCP_ ## XNAME].data = (rval)
+
+	XASSIGN(LOOSE, &tn->tcp_loose);
+	XASSIGN(LIBERAL, &tn->tcp_be_liberal);
+	XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans);
+#undef XASSIGN
+}
+
+static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
+						     struct ctl_table *table)
+{
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	struct nf_sctp_net *sn = nf_sctp_pernet(net);
+
+#define XASSIGN(XNAME, sn) \
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_ ## XNAME].data = \
+			&(sn)->timeouts[SCTP_CONNTRACK_ ## XNAME]
+
+	XASSIGN(CLOSED, sn);
+	XASSIGN(COOKIE_WAIT, sn);
+	XASSIGN(COOKIE_ECHOED, sn);
+	XASSIGN(ESTABLISHED, sn);
+	XASSIGN(SHUTDOWN_SENT, sn);
+	XASSIGN(SHUTDOWN_RECD, sn);
+	XASSIGN(SHUTDOWN_ACK_SENT, sn);
+	XASSIGN(HEARTBEAT_SENT, sn);
+	XASSIGN(HEARTBEAT_ACKED, sn);
+#undef XASSIGN
+#endif
+}
+
+static void nf_conntrack_standalone_init_dccp_sysctl(struct net *net,
+						     struct ctl_table *table)
+{
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	struct nf_dccp_net *dn = nf_dccp_pernet(net);
+
+#define XASSIGN(XNAME, dn) \
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_ ## XNAME].data = \
+			&(dn)->dccp_timeout[CT_DCCP_ ## XNAME]
+
+	XASSIGN(REQUEST, dn);
+	XASSIGN(RESPOND, dn);
+	XASSIGN(PARTOPEN, dn);
+	XASSIGN(OPEN, dn);
+	XASSIGN(CLOSEREQ, dn);
+	XASSIGN(CLOSING, dn);
+	XASSIGN(TIMEWAIT, dn);
+#undef XASSIGN
+
+	table[NF_SYSCTL_CT_PROTO_DCCP_LOOSE].data = &dn->dccp_loose;
+#endif
+}
+
+static void nf_conntrack_standalone_init_gre_sysctl(struct net *net,
+						    struct ctl_table *table)
+{
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	struct nf_gre_net *gn = nf_gre_pernet(net);
+
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_GRE].data = &gn->timeouts[GRE_CT_UNREPLIED];
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM].data = &gn->timeouts[GRE_CT_REPLIED];
+#endif
+}
+
 static int nf_conntrack_standalone_init_sysctl(struct net *net)
 {
+	struct nf_udp_net *un = nf_udp_pernet(net);
 	struct ctl_table *table;
 
+	BUILD_BUG_ON(ARRAY_SIZE(nf_ct_sysctl_table) != NF_SYSCTL_CT_LAST_SYSCTL);
+
 	table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table),
 			GFP_KERNEL);
 	if (!table)
-		goto out_kmemdup;
+		return -ENOMEM;
 
-	table[1].data = &net->ct.count;
-	table[3].data = &net->ct.sysctl_checksum;
-	table[4].data = &net->ct.sysctl_log_invalid;
+	table[NF_SYSCTL_CT_COUNT].data = &net->ct.count;
+	table[NF_SYSCTL_CT_CHECKSUM].data = &net->ct.sysctl_checksum;
+	table[NF_SYSCTL_CT_LOG_INVALID].data = &net->ct.sysctl_log_invalid;
+	table[NF_SYSCTL_CT_ACCT].data = &net->ct.sysctl_acct;
+	table[NF_SYSCTL_CT_HELPER].data = &net->ct.sysctl_auto_assign_helper;
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	table[NF_SYSCTL_CT_EVENTS].data = &net->ct.sysctl_events;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+	table[NF_SYSCTL_CT_TIMESTAMP].data = &net->ct.sysctl_tstamp;
+#endif
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC].data = &nf_generic_pernet(net)->timeout;
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP].data = &nf_icmp_pernet(net)->timeout;
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout;
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED];
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
+
+	nf_conntrack_standalone_init_tcp_sysctl(net, table);
+	nf_conntrack_standalone_init_sctp_sysctl(net, table);
+	nf_conntrack_standalone_init_dccp_sysctl(net, table);
+	nf_conntrack_standalone_init_gre_sysctl(net, table);
 
 	/* Don't export sysctls to unprivileged users */
-	if (net->user_ns != &init_user_ns)
-		table[0].procname = NULL;
+	if (net->user_ns != &init_user_ns) {
+		table[NF_SYSCTL_CT_MAX].procname = NULL;
+		table[NF_SYSCTL_CT_ACCT].procname = NULL;
+		table[NF_SYSCTL_CT_HELPER].procname = NULL;
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+		table[NF_SYSCTL_CT_TIMESTAMP].procname = NULL;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+		table[NF_SYSCTL_CT_EVENTS].procname = NULL;
+#endif
+	}
 
 	if (!net_eq(&init_net, net))
-		table[2].mode = 0444;
+		table[NF_SYSCTL_CT_BUCKETS].mode = 0444;
 
 	net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table);
 	if (!net->ct.sysctl_header)
@@ -601,7 +1078,6 @@
 
 out_unregister_netfilter:
 	kfree(table);
-out_kmemdup:
 	return -ENOMEM;
 }
 
@@ -624,31 +1100,47 @@
 }
 #endif /* CONFIG_SYSCTL */
 
+static void nf_conntrack_fini_net(struct net *net)
+{
+	if (enable_hooks)
+		nf_ct_netns_put(net, NFPROTO_INET);
+
+	nf_conntrack_standalone_fini_proc(net);
+	nf_conntrack_standalone_fini_sysctl(net);
+}
+
 static int nf_conntrack_pernet_init(struct net *net)
 {
 	int ret;
 
-	ret = nf_conntrack_init_net(net);
+	net->ct.sysctl_checksum = 1;
+
+	ret = nf_conntrack_standalone_init_sysctl(net);
 	if (ret < 0)
-		goto out_init;
+		return ret;
 
 	ret = nf_conntrack_standalone_init_proc(net);
 	if (ret < 0)
 		goto out_proc;
 
-	net->ct.sysctl_checksum = 1;
-	net->ct.sysctl_log_invalid = 0;
-	ret = nf_conntrack_standalone_init_sysctl(net);
+	ret = nf_conntrack_init_net(net);
 	if (ret < 0)
-		goto out_sysctl;
+		goto out_init_net;
+
+	if (enable_hooks) {
+		ret = nf_ct_netns_get(net, NFPROTO_INET);
+		if (ret < 0)
+			goto out_hooks;
+	}
 
 	return 0;
 
-out_sysctl:
+out_hooks:
+	nf_conntrack_cleanup_net(net);
+out_init_net:
 	nf_conntrack_standalone_fini_proc(net);
 out_proc:
-	nf_conntrack_cleanup_net(net);
-out_init:
+	nf_conntrack_standalone_fini_sysctl(net);
 	return ret;
 }
 
@@ -656,10 +1148,9 @@
 {
 	struct net *net;
 
-	list_for_each_entry(net, net_exit_list, exit_list) {
-		nf_conntrack_standalone_fini_sysctl(net);
-		nf_conntrack_standalone_fini_proc(net);
-	}
+	list_for_each_entry(net, net_exit_list, exit_list)
+		nf_conntrack_fini_net(net);
+
 	nf_conntrack_cleanup_net_list(net_exit_list);
 }
 
@@ -676,7 +1167,6 @@
 	if (ret < 0)
 		goto out_start;
 
-	BUILD_BUG_ON(SKB_NFCT_PTRMASK != NFCT_PTRMASK);
 	BUILD_BUG_ON(NFCT_INFOMASK <= IP_CT_NUMBER);
 
 #ifdef CONFIG_SYSCTL
@@ -720,10 +1210,3 @@
 
 module_init(nf_conntrack_standalone_init);
 module_exit(nf_conntrack_standalone_fini);
-
-/* Some modules need us, but don't depend directly on any symbol.
-   They should call this. */
-void need_conntrack(void)
-{
-}
-EXPORT_SYMBOL_GPL(need_conntrack);

diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index 548b673..80ee53f 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c

@@ -1,8 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
  * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -20,11 +18,13 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <linux/netfilter/nf_conntrack_tftp.h>
 
+#define HELPER_NAME "tftp"
+
 MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
 MODULE_DESCRIPTION("TFTP connection tracking helper");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ip_conntrack_tftp");
-MODULE_ALIAS_NFCT_HELPER("tftp");
+MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
 
 #define MAX_PORTS 8
 static unsigned short ports[MAX_PORTS];
@@ -78,7 +78,7 @@
 		nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook);
 		if (nf_nat_tftp && ct->status & IPS_NAT_MASK)
 			ret = nf_nat_tftp(skb, ctinfo, exp);
-		else if (nf_ct_expect_related(exp) != 0) {
+		else if (nf_ct_expect_related(exp, 0) != 0) {
 			nf_ct_helper_log(skb, ct, "cannot add expectation");
 			ret = NF_DROP;
 		}
@@ -119,12 +119,14 @@
 		ports[ports_c++] = TFTP_PORT;
 
 	for (i = 0; i < ports_c; i++) {
-		nf_ct_helper_init(&tftp[2 * i], AF_INET, IPPROTO_UDP, "tftp",
-				  TFTP_PORT, ports[i], i, &tftp_exp_policy,
-				  0, tftp_help, NULL, THIS_MODULE);
-		nf_ct_helper_init(&tftp[2 * i + 1], AF_INET6, IPPROTO_UDP, "tftp",
-				  TFTP_PORT, ports[i], i, &tftp_exp_policy,
-				  0, tftp_help, NULL, THIS_MODULE);
+		nf_ct_helper_init(&tftp[2 * i], AF_INET, IPPROTO_UDP,
+				  HELPER_NAME, TFTP_PORT, ports[i], i,
+				  &tftp_exp_policy, 0, tftp_help, NULL,
+				  THIS_MODULE);
+		nf_ct_helper_init(&tftp[2 * i + 1], AF_INET6, IPPROTO_UDP,
+				  HELPER_NAME, TFTP_PORT, ports[i], i,
+				  &tftp_exp_policy, 0, tftp_help, NULL,
+				  THIS_MODULE);
 	}
 
 	ret = nf_conntrack_helpers_register(tftp, ports_c * 2);

diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c
index 91fbd18..14387e0 100644
--- a/net/netfilter/nf_conntrack_timeout.c
+++ b/net/netfilter/nf_conntrack_timeout.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org>
  * (C) 2012 by Vyatta Inc. <http://www.vyatta.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation (or any later at your option).
  */
 
 #include <linux/types.h>
@@ -22,6 +19,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_timeout.h>
 
 struct nf_ct_timeout *
@@ -48,6 +46,95 @@
 }
 EXPORT_SYMBOL_GPL(nf_ct_untimeout);
 
+static void __nf_ct_timeout_put(struct nf_ct_timeout *timeout)
+{
+	typeof(nf_ct_timeout_put_hook) timeout_put;
+
+	timeout_put = rcu_dereference(nf_ct_timeout_put_hook);
+	if (timeout_put)
+		timeout_put(timeout);
+}
+
+int nf_ct_set_timeout(struct net *net, struct nf_conn *ct,
+		      u8 l3num, u8 l4num, const char *timeout_name)
+{
+	typeof(nf_ct_timeout_find_get_hook) timeout_find_get;
+	struct nf_ct_timeout *timeout;
+	struct nf_conn_timeout *timeout_ext;
+	const char *errmsg = NULL;
+	int ret = 0;
+
+	rcu_read_lock();
+	timeout_find_get = rcu_dereference(nf_ct_timeout_find_get_hook);
+	if (!timeout_find_get) {
+		ret = -ENOENT;
+		errmsg = "Timeout policy base is empty";
+		goto out;
+	}
+
+	timeout = timeout_find_get(net, timeout_name);
+	if (!timeout) {
+		ret = -ENOENT;
+		pr_info_ratelimited("No such timeout policy \"%s\"\n",
+				    timeout_name);
+		goto out;
+	}
+
+	if (timeout->l3num != l3num) {
+		ret = -EINVAL;
+		pr_info_ratelimited("Timeout policy `%s' can only be used by "
+				    "L%d protocol number %d\n",
+				    timeout_name, 3, timeout->l3num);
+		goto err_put_timeout;
+	}
+	/* Make sure the timeout policy matches any existing protocol tracker,
+	 * otherwise default to generic.
+	 */
+	if (timeout->l4proto->l4proto != l4num) {
+		ret = -EINVAL;
+		pr_info_ratelimited("Timeout policy `%s' can only be used by "
+				    "L%d protocol number %d\n",
+				    timeout_name, 4, timeout->l4proto->l4proto);
+		goto err_put_timeout;
+	}
+	timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC);
+	if (!timeout_ext) {
+		ret = -ENOMEM;
+		goto err_put_timeout;
+	}
+
+	rcu_read_unlock();
+	return ret;
+
+err_put_timeout:
+	__nf_ct_timeout_put(timeout);
+out:
+	rcu_read_unlock();
+	if (errmsg)
+		pr_info_ratelimited("%s\n", errmsg);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_ct_set_timeout);
+
+void nf_ct_destroy_timeout(struct nf_conn *ct)
+{
+	struct nf_conn_timeout *timeout_ext;
+	typeof(nf_ct_timeout_put_hook) timeout_put;
+
+	rcu_read_lock();
+	timeout_put = rcu_dereference(nf_ct_timeout_put_hook);
+
+	if (timeout_put) {
+		timeout_ext = nf_ct_timeout_find(ct);
+		if (timeout_ext) {
+			timeout_put(timeout_ext->timeout);
+			RCU_INIT_POINTER(timeout_ext->timeout, NULL);
+		}
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(nf_ct_destroy_timeout);
+
 static const struct nf_ct_ext_type timeout_extend = {
 	.len	= sizeof(struct nf_conn_timeout),
 	.align	= __alignof__(struct nf_conn_timeout),

diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c
index 56766cb..f656d39 100644
--- a/net/netfilter/nf_conntrack_timestamp.c
+++ b/net/netfilter/nf_conntrack_timestamp.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * (C) 2010 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation (or any later at your option).
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -22,83 +19,15 @@
 module_param_named(tstamp, nf_ct_tstamp, bool, 0644);
 MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping.");
 
-#ifdef CONFIG_SYSCTL
-static struct ctl_table tstamp_sysctl_table[] = {
-	{
-		.procname	= "nf_conntrack_timestamp",
-		.data		= &init_net.ct.sysctl_tstamp,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{}
-};
-#endif /* CONFIG_SYSCTL */
-
 static const struct nf_ct_ext_type tstamp_extend = {
 	.len	= sizeof(struct nf_conn_tstamp),
 	.align	= __alignof__(struct nf_conn_tstamp),
 	.id	= NF_CT_EXT_TSTAMP,
 };
 
-#ifdef CONFIG_SYSCTL
-static int nf_conntrack_tstamp_init_sysctl(struct net *net)
-{
-	struct ctl_table *table;
-
-	table = kmemdup(tstamp_sysctl_table, sizeof(tstamp_sysctl_table),
-			GFP_KERNEL);
-	if (!table)
-		goto out;
-
-	table[0].data = &net->ct.sysctl_tstamp;
-
-	/* Don't export sysctls to unprivileged users */
-	if (net->user_ns != &init_user_ns)
-		table[0].procname = NULL;
-
-	net->ct.tstamp_sysctl_header = register_net_sysctl(net,	"net/netfilter",
-							   table);
-	if (!net->ct.tstamp_sysctl_header) {
-		pr_err("can't register to sysctl\n");
-		goto out_register;
-	}
-	return 0;
-
-out_register:
-	kfree(table);
-out:
-	return -ENOMEM;
-}
-
-static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
-{
-	struct ctl_table *table;
-
-	table = net->ct.tstamp_sysctl_header->ctl_table_arg;
-	unregister_net_sysctl_table(net->ct.tstamp_sysctl_header);
-	kfree(table);
-}
-#else
-static int nf_conntrack_tstamp_init_sysctl(struct net *net)
-{
-	return 0;
-}
-
-static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
-{
-}
-#endif
-
-int nf_conntrack_tstamp_pernet_init(struct net *net)
+void nf_conntrack_tstamp_pernet_init(struct net *net)
 {
 	net->ct.sysctl_tstamp = nf_ct_tstamp;
-	return nf_conntrack_tstamp_init_sysctl(net);
-}
-
-void nf_conntrack_tstamp_pernet_fini(struct net *net)
-{
-	nf_conntrack_tstamp_fini_sysctl(net);
 }
 
 int nf_conntrack_tstamp_init(void)

diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
index f4a566e..f108a76 100644
--- a/net/netfilter/nf_dup_netdev.c
+++ b/net/netfilter/nf_dup_netdev.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
  */
 
 #include <linux/kernel.h>
@@ -13,6 +10,7 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
 #include <net/netfilter/nf_dup_netdev.h>
 
 static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev)
@@ -53,5 +51,25 @@
 }
 EXPORT_SYMBOL_GPL(nf_dup_netdev_egress);
 
+int nft_fwd_dup_netdev_offload(struct nft_offload_ctx *ctx,
+			       struct nft_flow_rule *flow,
+			       enum flow_action_id id, int oif)
+{
+	struct flow_action_entry *entry;
+	struct net_device *dev;
+
+	/* nft_flow_rule_destroy() releases the reference on this device. */
+	dev = dev_get_by_index(ctx->net, oif);
+	if (!dev)
+		return -EOPNOTSUPP;
+
+	entry = &flow->rule->action.entries[ctx->num_actions++];
+	entry->id = id;
+	entry->dev = dev;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_fwd_dup_netdev_offload);
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");

diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index d812561..128245e 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -10,6 +11,7 @@
 #include <net/netfilter/nf_flow_table.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
 
 struct flow_offload_entry {
@@ -28,6 +30,7 @@
 {
 	struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
 	struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
+	struct dst_entry *other_dst = route->tuple[!dir].dst;
 	struct dst_entry *dst = route->tuple[dir].dst;
 
 	ft->dir = dir;
@@ -50,8 +53,7 @@
 	ft->src_port = ctt->src.u.tcp.port;
 	ft->dst_port = ctt->dst.u.tcp.port;
 
-	ft->iifidx = route->tuple[dir].ifindex;
-	ft->oifidx = route->tuple[!dir].ifindex;
+	ft->iifidx = other_dst->dev->ifindex;
 	ft->dst_cache = dst;
 }
 
@@ -110,17 +112,18 @@
 #define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT	(120 * HZ)
 #define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT	(30 * HZ)
 
-static void flow_offload_fixup_ct_state(struct nf_conn *ct)
+static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
+{
+	return (__s32)(timeout - (u32)jiffies);
+}
+
+static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
 {
 	const struct nf_conntrack_l4proto *l4proto;
+	int l4num = nf_ct_protonum(ct);
 	unsigned int timeout;
-	int l4num;
 
-	l4num = nf_ct_protonum(ct);
-	if (l4num == IPPROTO_TCP)
-		flow_offload_fixup_tcp(&ct->proto.tcp);
-
-	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), l4num);
+	l4proto = nf_ct_l4proto_find(l4num);
 	if (!l4proto)
 		return;
 
@@ -131,7 +134,20 @@
 	else
 		return;
 
-	ct->timeout = nfct_time_stamp + timeout;
+	if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
+		ct->timeout = nfct_time_stamp + timeout;
+}
+
+static void flow_offload_fixup_ct_state(struct nf_conn *ct)
+{
+	if (nf_ct_protonum(ct) == IPPROTO_TCP)
+		flow_offload_fixup_tcp(&ct->proto.tcp);
+}
+
+static void flow_offload_fixup_ct(struct nf_conn *ct)
+{
+	flow_offload_fixup_ct_state(ct);
+	flow_offload_fixup_ct_timeout(ct);
 }
 
 void flow_offload_free(struct flow_offload *flow)
@@ -184,18 +200,35 @@
 
 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
 {
-	flow->timeout = (u32)jiffies;
+	int err;
 
-	rhashtable_insert_fast(&flow_table->rhashtable,
-			       &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
-			       nf_flow_offload_rhash_params);
-	rhashtable_insert_fast(&flow_table->rhashtable,
-			       &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
-			       nf_flow_offload_rhash_params);
+	flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+
+	err = rhashtable_insert_fast(&flow_table->rhashtable,
+				     &flow->tuplehash[0].node,
+				     nf_flow_offload_rhash_params);
+	if (err < 0)
+		return err;
+
+	err = rhashtable_insert_fast(&flow_table->rhashtable,
+				     &flow->tuplehash[1].node,
+				     nf_flow_offload_rhash_params);
+	if (err < 0) {
+		rhashtable_remove_fast(&flow_table->rhashtable,
+				       &flow->tuplehash[0].node,
+				       nf_flow_offload_rhash_params);
+		return err;
+	}
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(flow_offload_add);
 
+static inline bool nf_flow_has_expired(const struct flow_offload *flow)
+{
+	return nf_flow_timeout_delta(flow->timeout) <= 0;
+}
+
 static void flow_offload_del(struct nf_flowtable *flow_table,
 			     struct flow_offload *flow)
 {
@@ -211,6 +244,11 @@
 	e = container_of(flow, struct flow_offload_entry, flow);
 	clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
 
+	if (nf_flow_has_expired(flow))
+		flow_offload_fixup_ct(e->ct);
+	else if (flow->flags & FLOW_OFFLOAD_TEARDOWN)
+		flow_offload_fixup_ct_timeout(e->ct);
+
 	flow_offload_free(flow);
 }
 
@@ -231,10 +269,11 @@
 {
 	struct flow_offload_tuple_rhash *tuplehash;
 	struct flow_offload *flow;
+	struct flow_offload_entry *e;
 	int dir;
 
-	tuplehash = rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
-					   nf_flow_offload_rhash_params);
+	tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
+				      nf_flow_offload_rhash_params);
 	if (!tuplehash)
 		return NULL;
 
@@ -243,31 +282,33 @@
 	if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
 		return NULL;
 
+	e = container_of(flow, struct flow_offload_entry, flow);
+	if (unlikely(nf_ct_is_dying(e->ct)))
+		return NULL;
+
 	return tuplehash;
 }
 EXPORT_SYMBOL_GPL(flow_offload_lookup);
 
-int nf_flow_table_iterate(struct nf_flowtable *flow_table,
-			  void (*iter)(struct flow_offload *flow, void *data),
-			  void *data)
+static int
+nf_flow_table_iterate(struct nf_flowtable *flow_table,
+		      void (*iter)(struct flow_offload *flow, void *data),
+		      void *data)
 {
 	struct flow_offload_tuple_rhash *tuplehash;
 	struct rhashtable_iter hti;
 	struct flow_offload *flow;
-	int err;
+	int err = 0;
 
-	err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
-	if (err)
-		return err;
-
+	rhashtable_walk_enter(&flow_table->rhashtable, &hti);
 	rhashtable_walk_start(&hti);
 
 	while ((tuplehash = rhashtable_walk_next(&hti))) {
 		if (IS_ERR(tuplehash)) {
-			err = PTR_ERR(tuplehash);
-			if (err != -EAGAIN)
-				goto out;
-
+			if (PTR_ERR(tuplehash) != -EAGAIN) {
+				err = PTR_ERR(tuplehash);
+				break;
+			}
 			continue;
 		}
 		if (tuplehash->tuple.dir)
@@ -277,55 +318,21 @@
 
 		iter(flow, data);
 	}
-out:
 	rhashtable_walk_stop(&hti);
 	rhashtable_walk_exit(&hti);
 
 	return err;
 }
-EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
 
-static inline bool nf_flow_has_expired(const struct flow_offload *flow)
+static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
 {
-	return (__s32)(flow->timeout - (u32)jiffies) <= 0;
-}
+	struct nf_flowtable *flow_table = data;
+	struct flow_offload_entry *e;
 
-static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
-{
-	struct flow_offload_tuple_rhash *tuplehash;
-	struct rhashtable_iter hti;
-	struct flow_offload *flow;
-	int err;
-
-	err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
-	if (err)
-		return 0;
-
-	rhashtable_walk_start(&hti);
-
-	while ((tuplehash = rhashtable_walk_next(&hti))) {
-		if (IS_ERR(tuplehash)) {
-			err = PTR_ERR(tuplehash);
-			if (err != -EAGAIN)
-				goto out;
-
-			continue;
-		}
-		if (tuplehash->tuple.dir)
-			continue;
-
-		flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
-
-		if (nf_flow_has_expired(flow) ||
-		    (flow->flags & (FLOW_OFFLOAD_DYING |
-				    FLOW_OFFLOAD_TEARDOWN)))
-			flow_offload_del(flow_table, flow);
-	}
-out:
-	rhashtable_walk_stop(&hti);
-	rhashtable_walk_exit(&hti);
-
-	return 1;
+	e = container_of(flow, struct flow_offload_entry, flow);
+	if (nf_flow_has_expired(flow) || nf_ct_is_dying(e->ct) ||
+	    (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN)))
+		flow_offload_del(flow_table, flow);
 }
 
 static void nf_flow_offload_work_gc(struct work_struct *work)
@@ -333,7 +340,7 @@
 	struct nf_flowtable *flow_table;
 
 	flow_table = container_of(work, struct nf_flowtable, gc_work.work);
-	nf_flow_offload_gc_step(flow_table);
+	nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
 	queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
 }
 
@@ -478,14 +485,17 @@
 static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
 {
 	struct net_device *dev = data;
+	struct flow_offload_entry *e;
+
+	e = container_of(flow, struct flow_offload_entry, flow);
 
 	if (!dev) {
 		flow_offload_teardown(flow);
 		return;
 	}
-
-	if (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
-	    flow->tuplehash[1].tuple.iifidx == dev->ifindex)
+	if (net_eq(nf_ct_net(e->ct), dev_net(dev)) &&
+	    (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
+	     flow->tuplehash[1].tuple.iifidx == dev->ifindex))
 		flow_offload_dead(flow);
 }
 
@@ -496,7 +506,7 @@
 	flush_delayed_work(&flowtable->gc_work);
 }
 
-void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
+void nf_flow_table_cleanup(struct net_device *dev)
 {
 	struct nf_flowtable *flowtable;
 
@@ -514,7 +524,7 @@
 	mutex_unlock(&flowtable_lock);
 	cancel_delayed_work_sync(&flow_table->gc_work);
 	nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
-	WARN_ON(!nf_flow_offload_gc_step(flow_table));
+	nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
 	rhashtable_destroy(&flow_table->rhashtable);
 }
 EXPORT_SYMBOL_GPL(nf_flow_table_free);

diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index 99771aa..593357a 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>

diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 15ed913..b9e7dd6 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -181,6 +182,9 @@
 	    iph->protocol != IPPROTO_UDP)
 		return -1;
 
+	if (iph->ttl <= 1)
+		return -1;
+
 	thoff = iph->ihl * 4;
 	if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
 		return -1;
@@ -210,6 +214,24 @@
 	return true;
 }
 
+static int nf_flow_offload_dst_check(struct dst_entry *dst)
+{
+	if (unlikely(dst_xfrm(dst)))
+		return dst_check(dst, 0) ? 0 : -1;
+
+	return 0;
+}
+
+static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
+				      const struct nf_hook_state *state,
+				      struct dst_entry *dst)
+{
+	skb_orphan(skb);
+	skb_dst_set_noref(skb, dst);
+	dst_output(state->net, state->sk, skb);
+	return NF_STOLEN;
+}
+
 unsigned int
 nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 			const struct nf_hook_state *state)
@@ -235,16 +257,12 @@
 	if (tuplehash == NULL)
 		return NF_ACCEPT;
 
-	outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
-	if (!outdev)
-		return NF_ACCEPT;
-
 	dir = tuplehash->tuple.dir;
 	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
 	rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
+	outdev = rt->dst.dev;
 
-	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) &&
-	    (ip_hdr(skb)->frag_off & htons(IP_DF)) != 0)
+	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
 		return NF_ACCEPT;
 
 	if (skb_try_make_writable(skb, sizeof(*iph)))
@@ -254,13 +272,25 @@
 	if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
 		return NF_ACCEPT;
 
-	if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
-	    nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
+	if (nf_flow_offload_dst_check(&rt->dst)) {
+		flow_offload_teardown(flow);
+		return NF_ACCEPT;
+	}
+
+	if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
 		return NF_DROP;
 
 	flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
 	iph = ip_hdr(skb);
 	ip_decrease_ttl(iph);
+	skb->tstamp = 0;
+
+	if (unlikely(dst_xfrm(&rt->dst))) {
+		memset(skb->cb, 0, sizeof(struct inet_skb_parm));
+		IPCB(skb)->iif = skb->dev->ifindex;
+		IPCB(skb)->flags = IPSKB_FORWARDED;
+		return nf_flow_xmit_xfrm(skb, state, &rt->dst);
+	}
 
 	skb->dev = outdev;
 	nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
@@ -412,6 +442,9 @@
 	    ip6h->nexthdr != IPPROTO_UDP)
 		return -1;
 
+	if (ip6h->hop_limit <= 1)
+		return -1;
+
 	thoff = sizeof(*ip6h);
 	if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
 		return -1;
@@ -437,9 +470,9 @@
 	struct nf_flowtable *flow_table = priv;
 	struct flow_offload_tuple tuple = {};
 	enum flow_offload_tuple_dir dir;
+	const struct in6_addr *nexthop;
 	struct flow_offload *flow;
 	struct net_device *outdev;
-	struct in6_addr *nexthop;
 	struct ipv6hdr *ip6h;
 	struct rt6_info *rt;
 
@@ -453,13 +486,10 @@
 	if (tuplehash == NULL)
 		return NF_ACCEPT;
 
-	outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
-	if (!outdev)
-		return NF_ACCEPT;
-
 	dir = tuplehash->tuple.dir;
 	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
 	rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
+	outdev = rt->dst.dev;
 
 	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
 		return NF_ACCEPT;
@@ -468,16 +498,28 @@
 				sizeof(*ip6h)))
 		return NF_ACCEPT;
 
+	if (nf_flow_offload_dst_check(&rt->dst)) {
+		flow_offload_teardown(flow);
+		return NF_ACCEPT;
+	}
+
 	if (skb_try_make_writable(skb, sizeof(*ip6h)))
 		return NF_DROP;
 
-	if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
-	    nf_flow_nat_ipv6(flow, skb, dir) < 0)
+	if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
 		return NF_DROP;
 
 	flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
 	ip6h = ipv6_hdr(skb);
 	ip6h->hop_limit--;
+	skb->tstamp = 0;
+
+	if (unlikely(dst_xfrm(&rt->dst))) {
+		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
+		IP6CB(skb)->iif = skb->dev->ifindex;
+		IP6CB(skb)->flags = IP6SKB_FORWARDED;
+		return nf_flow_xmit_xfrm(skb, state, &rt->dst);
+	}
 
 	skb->dev = outdev;
 	nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);

diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index e15779f..d6c4390 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h

@@ -7,9 +7,6 @@
 #include <linux/netdevice.h>
 
 /* nf_queue.c */
-int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
-	     const struct nf_hook_entries *entries, unsigned int index,
-	     unsigned int verdict);
 void nf_queue_nf_hook_drop(struct net *net);
 
 /* nf_log.c */

diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index a61d6df..bb25d4c 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -373,7 +374,7 @@
 			continue;
 
 		logger = nft_log_dereference(loggers[*pos][i]);
-		seq_printf(s, "%s", logger->name);
+		seq_puts(s, logger->name);
 		if (i == 0 && loggers[*pos][i + 1] != NULL)
 			seq_puts(s, ",");
 

diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
index a8c5c84..ae5628d 100644
--- a/net/netfilter/nf_log_common.c
+++ b/net/netfilter/nf_log_common.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -156,22 +153,20 @@
 			  const struct net_device *out,
 			  const struct nf_loginfo *loginfo, const char *prefix)
 {
+	const struct net_device *physoutdev __maybe_unused;
+	const struct net_device *physindev __maybe_unused;
+
 	nf_log_buf_add(m, KERN_SOH "%c%sIN=%s OUT=%s ",
 	       '0' + loginfo->u.log.level, prefix,
 	       in ? in->name : "",
 	       out ? out->name : "");
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-	if (skb->nf_bridge) {
-		const struct net_device *physindev;
-		const struct net_device *physoutdev;
-
-		physindev = nf_bridge_get_physindev(skb);
-		if (physindev && in != physindev)
-			nf_log_buf_add(m, "PHYSIN=%s ", physindev->name);
-		physoutdev = nf_bridge_get_physoutdev(skb);
-		if (physoutdev && out != physoutdev)
-			nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name);
-	}
+	physindev = nf_bridge_get_physindev(skb);
+	if (physindev && in != physindev)
+		nf_log_buf_add(m, "PHYSIN=%s ", physindev->name);
+	physoutdev = nf_bridge_get_physoutdev(skb);
+	if (physoutdev && out != physoutdev)
+		nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name);
 #endif
 }
 EXPORT_SYMBOL_GPL(nf_log_dump_packet_common);

diff --git a/net/netfilter/nf_log_netdev.c b/net/netfilter/nf_log_netdev.c
index 350eb14..968dafa 100644
--- a/net/netfilter/nf_log_netdev.c
+++ b/net/netfilter/nf_log_netdev.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * (C) 2016 by Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>

diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c
index e4d61a7..3bc7e08 100644
--- a/net/netfilter/nf_nat_amanda.c
+++ b/net/netfilter/nf_nat_amanda.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Amanda extension for TCP NAT alteration.
  * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
  * based on a copy of HW's ip_nat_irc.c as well as other modules
  * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/kernel.h>
@@ -19,10 +15,15 @@
 #include <net/netfilter/nf_nat_helper.h>
 #include <linux/netfilter/nf_conntrack_amanda.h>
 
+#define NAT_HELPER_NAME "amanda"
+
 MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
 MODULE_DESCRIPTION("Amanda NAT helper");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("ip_nat_amanda");
+MODULE_ALIAS_NF_NAT_HELPER(NAT_HELPER_NAME);
+
+static struct nf_conntrack_nat_helper nat_helper_amanda =
+	NF_CT_NAT_HELPER_INIT(NAT_HELPER_NAME);
 
 static unsigned int help(struct sk_buff *skb,
 			 enum ip_conntrack_info ctinfo,
@@ -47,7 +48,7 @@
 		int res;
 
 		exp->tuple.dst.u.tcp.port = htons(port);
-		res = nf_ct_expect_related(exp);
+		res = nf_ct_expect_related(exp, 0);
 		if (res == 0)
 			break;
 		else if (res != -EBUSY) {
@@ -74,6 +75,7 @@
 
 static void __exit nf_nat_amanda_fini(void)
 {
+	nf_nat_helper_unregister(&nat_helper_amanda);
 	RCU_INIT_POINTER(nf_nat_amanda_hook, NULL);
 	synchronize_rcu();
 }
@@ -81,6 +83,7 @@
 static int __init nf_nat_amanda_init(void)
 {
 	BUG_ON(nf_nat_amanda_hook != NULL);
+	nf_nat_helper_register(&nat_helper_amanda);
 	RCU_INIT_POINTER(nf_nat_amanda_hook, help);
 	return 0;
 }

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index e2b1960..bfc555f 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
  * (C) 2011 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -21,25 +18,18 @@
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_helper.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_seqadj.h>
 #include <net/netfilter/nf_conntrack_zones.h>
-#include <linux/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <uapi/linux/netfilter/nf_nat.h>
 
 #include "nf_internals.h"
 
 static spinlock_t nf_nat_locks[CONNTRACK_LOCKS];
 
 static DEFINE_MUTEX(nf_nat_proto_mutex);
-static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
-						__read_mostly;
-static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
-						__read_mostly;
 static unsigned int nat_net_id __read_mostly;
 
 static struct hlist_head *nf_nat_bysource __read_mostly;
@@ -61,23 +51,75 @@
 	struct nf_nat_hooks_net nat_proto_net[NFPROTO_NUMPROTO];
 };
 
-inline const struct nf_nat_l3proto *
-__nf_nat_l3proto_find(u8 family)
-{
-	return rcu_dereference(nf_nat_l3protos[family]);
-}
-
-inline const struct nf_nat_l4proto *
-__nf_nat_l4proto_find(u8 family, u8 protonum)
-{
-	return rcu_dereference(nf_nat_l4protos[family][protonum]);
-}
-EXPORT_SYMBOL_GPL(__nf_nat_l4proto_find);
-
 #ifdef CONFIG_XFRM
+static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
+				       const struct nf_conn *ct,
+				       enum ip_conntrack_dir dir,
+				       unsigned long statusbit,
+				       struct flowi *fl)
+{
+	const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
+	struct flowi4 *fl4 = &fl->u.ip4;
+
+	if (ct->status & statusbit) {
+		fl4->daddr = t->dst.u3.ip;
+		if (t->dst.protonum == IPPROTO_TCP ||
+		    t->dst.protonum == IPPROTO_UDP ||
+		    t->dst.protonum == IPPROTO_UDPLITE ||
+		    t->dst.protonum == IPPROTO_DCCP ||
+		    t->dst.protonum == IPPROTO_SCTP)
+			fl4->fl4_dport = t->dst.u.all;
+	}
+
+	statusbit ^= IPS_NAT_MASK;
+
+	if (ct->status & statusbit) {
+		fl4->saddr = t->src.u3.ip;
+		if (t->dst.protonum == IPPROTO_TCP ||
+		    t->dst.protonum == IPPROTO_UDP ||
+		    t->dst.protonum == IPPROTO_UDPLITE ||
+		    t->dst.protonum == IPPROTO_DCCP ||
+		    t->dst.protonum == IPPROTO_SCTP)
+			fl4->fl4_sport = t->src.u.all;
+	}
+}
+
+static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
+				       const struct nf_conn *ct,
+				       enum ip_conntrack_dir dir,
+				       unsigned long statusbit,
+				       struct flowi *fl)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
+	struct flowi6 *fl6 = &fl->u.ip6;
+
+	if (ct->status & statusbit) {
+		fl6->daddr = t->dst.u3.in6;
+		if (t->dst.protonum == IPPROTO_TCP ||
+		    t->dst.protonum == IPPROTO_UDP ||
+		    t->dst.protonum == IPPROTO_UDPLITE ||
+		    t->dst.protonum == IPPROTO_DCCP ||
+		    t->dst.protonum == IPPROTO_SCTP)
+			fl6->fl6_dport = t->dst.u.all;
+	}
+
+	statusbit ^= IPS_NAT_MASK;
+
+	if (ct->status & statusbit) {
+		fl6->saddr = t->src.u3.in6;
+		if (t->dst.protonum == IPPROTO_TCP ||
+		    t->dst.protonum == IPPROTO_UDP ||
+		    t->dst.protonum == IPPROTO_UDPLITE ||
+		    t->dst.protonum == IPPROTO_DCCP ||
+		    t->dst.protonum == IPPROTO_SCTP)
+			fl6->fl6_sport = t->src.u.all;
+	}
+#endif
+}
+
 static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl)
 {
-	const struct nf_nat_l3proto *l3proto;
 	const struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	enum ip_conntrack_dir dir;
@@ -89,17 +131,20 @@
 		return;
 
 	family = nf_ct_l3num(ct);
-	l3proto = __nf_nat_l3proto_find(family);
-	if (l3proto == NULL)
-		return;
-
 	dir = CTINFO2DIR(ctinfo);
 	if (dir == IP_CT_DIR_ORIGINAL)
 		statusbit = IPS_DST_NAT;
 	else
 		statusbit = IPS_SRC_NAT;
 
-	l3proto->decode_session(skb, ct, dir, statusbit, fl);
+	switch (family) {
+	case NFPROTO_IPV4:
+		nf_nat_ipv4_decode_session(skb, ct, dir, statusbit, fl);
+		return;
+	case NFPROTO_IPV6:
+		nf_nat_ipv6_decode_session(skb, ct, dir, statusbit, fl);
+		return;
+	}
 }
 
 int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
@@ -117,7 +162,8 @@
 	dst = skb_dst(skb);
 	if (dst->xfrm)
 		dst = ((struct xfrm_dst *)dst)->route;
-	dst_hold(dst);
+	if (!dst_hold_safe(dst))
+		return -EHOSTUNREACH;
 
 	if (sk && !net_eq(net, sock_net(sk)))
 		sk = NULL;
@@ -155,7 +201,7 @@
 }
 
 /* Is this tuple already taken? (not by us) */
-int
+static int
 nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
 		  const struct nf_conn *ignored_conntrack)
 {
@@ -167,32 +213,70 @@
 	 */
 	struct nf_conntrack_tuple reply;
 
-	nf_ct_invert_tuplepr(&reply, tuple);
+	nf_ct_invert_tuple(&reply, tuple);
 	return nf_conntrack_tuple_taken(&reply, ignored_conntrack);
 }
-EXPORT_SYMBOL(nf_nat_used_tuple);
+
+static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
+				 const struct nf_nat_range2 *range)
+{
+	if (t->src.l3num == NFPROTO_IPV4)
+		return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) &&
+		       ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip);
+
+	return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 &&
+	       ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0;
+}
+
+/* Is the manipable part of the tuple between min and max incl? */
+static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
+			     enum nf_nat_manip_type maniptype,
+			     const union nf_conntrack_man_proto *min,
+			     const union nf_conntrack_man_proto *max)
+{
+	__be16 port;
+
+	switch (tuple->dst.protonum) {
+	case IPPROTO_ICMP:
+	case IPPROTO_ICMPV6:
+		return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
+		       ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
+	case IPPROTO_GRE: /* all fall though */
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+	case IPPROTO_UDPLITE:
+	case IPPROTO_DCCP:
+	case IPPROTO_SCTP:
+		if (maniptype == NF_NAT_MANIP_SRC)
+			port = tuple->src.u.all;
+		else
+			port = tuple->dst.u.all;
+
+		return ntohs(port) >= ntohs(min->all) &&
+		       ntohs(port) <= ntohs(max->all);
+	default:
+		return true;
+	}
+}
 
 /* If we source map this tuple so reply looks like reply_tuple, will
  * that meet the constraints of range.
  */
-static int in_range(const struct nf_nat_l3proto *l3proto,
-		    const struct nf_nat_l4proto *l4proto,
-		    const struct nf_conntrack_tuple *tuple,
+static int in_range(const struct nf_conntrack_tuple *tuple,
 		    const struct nf_nat_range2 *range)
 {
 	/* If we are supposed to map IPs, then we must be in the
 	 * range specified, otherwise let this drag us onto a new src IP.
 	 */
 	if (range->flags & NF_NAT_RANGE_MAP_IPS &&
-	    !l3proto->in_range(tuple, range))
+	    !nf_nat_inet_in_range(tuple, range))
 		return 0;
 
-	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) ||
-	    l4proto->in_range(tuple, NF_NAT_MANIP_SRC,
-			      &range->min_proto, &range->max_proto))
+	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
 		return 1;
 
-	return 0;
+	return l4proto_in_range(tuple, NF_NAT_MANIP_SRC,
+				&range->min_proto, &range->max_proto);
 }
 
 static inline int
@@ -211,8 +295,6 @@
 static int
 find_appropriate_src(struct net *net,
 		     const struct nf_conntrack_zone *zone,
-		     const struct nf_nat_l3proto *l3proto,
-		     const struct nf_nat_l4proto *l4proto,
 		     const struct nf_conntrack_tuple *tuple,
 		     struct nf_conntrack_tuple *result,
 		     const struct nf_nat_range2 *range)
@@ -225,11 +307,11 @@
 		    net_eq(net, nf_ct_net(ct)) &&
 		    nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
 			/* Copy source part from reply tuple. */
-			nf_ct_invert_tuplepr(result,
+			nf_ct_invert_tuple(result,
 				       &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 			result->dst = tuple->dst;
 
-			if (in_range(l3proto, l4proto, result, range))
+			if (in_range(result, range))
 				return 1;
 		}
 	}
@@ -310,12 +392,134 @@
 	}
 }
 
+/* Alter the per-proto part of the tuple (depending on maniptype), to
+ * give a unique tuple in the given range if possible.
+ *
+ * Per-protocol part of tuple is initialized to the incoming packet.
+ */
+static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
+					const struct nf_nat_range2 *range,
+					enum nf_nat_manip_type maniptype,
+					const struct nf_conn *ct)
+{
+	unsigned int range_size, min, max, i, attempts;
+	__be16 *keyptr;
+	u16 off;
+	static const unsigned int max_attempts = 128;
+
+	switch (tuple->dst.protonum) {
+	case IPPROTO_ICMP: /* fallthrough */
+	case IPPROTO_ICMPV6:
+		/* id is same for either direction... */
+		keyptr = &tuple->src.u.icmp.id;
+		if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
+			min = 0;
+			range_size = 65536;
+		} else {
+			min = ntohs(range->min_proto.icmp.id);
+			range_size = ntohs(range->max_proto.icmp.id) -
+				     ntohs(range->min_proto.icmp.id) + 1;
+		}
+		goto find_free_id;
+#if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
+	case IPPROTO_GRE:
+		/* If there is no master conntrack we are not PPTP,
+		   do not change tuples */
+		if (!ct->master)
+			return;
+
+		if (maniptype == NF_NAT_MANIP_SRC)
+			keyptr = &tuple->src.u.gre.key;
+		else
+			keyptr = &tuple->dst.u.gre.key;
+
+		if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
+			min = 1;
+			range_size = 65535;
+		} else {
+			min = ntohs(range->min_proto.gre.key);
+			range_size = ntohs(range->max_proto.gre.key) - min + 1;
+		}
+		goto find_free_id;
+#endif
+	case IPPROTO_UDP:	/* fallthrough */
+	case IPPROTO_UDPLITE:	/* fallthrough */
+	case IPPROTO_TCP:	/* fallthrough */
+	case IPPROTO_SCTP:	/* fallthrough */
+	case IPPROTO_DCCP:	/* fallthrough */
+		if (maniptype == NF_NAT_MANIP_SRC)
+			keyptr = &tuple->src.u.all;
+		else
+			keyptr = &tuple->dst.u.all;
+
+		break;
+	default:
+		return;
+	}
+
+	/* If no range specified... */
+	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
+		/* If it's dst rewrite, can't change port */
+		if (maniptype == NF_NAT_MANIP_DST)
+			return;
+
+		if (ntohs(*keyptr) < 1024) {
+			/* Loose convention: >> 512 is credential passing */
+			if (ntohs(*keyptr) < 512) {
+				min = 1;
+				range_size = 511 - min + 1;
+			} else {
+				min = 600;
+				range_size = 1023 - min + 1;
+			}
+		} else {
+			min = 1024;
+			range_size = 65535 - 1024 + 1;
+		}
+	} else {
+		min = ntohs(range->min_proto.all);
+		max = ntohs(range->max_proto.all);
+		if (unlikely(max < min))
+			swap(max, min);
+		range_size = max - min + 1;
+	}
+
+find_free_id:
+	if (range->flags & NF_NAT_RANGE_PROTO_OFFSET)
+		off = (ntohs(*keyptr) - ntohs(range->base_proto.all));
+	else
+		off = prandom_u32();
+
+	attempts = range_size;
+	if (attempts > max_attempts)
+		attempts = max_attempts;
+
+	/* We are in softirq; doing a search of the entire range risks
+	 * soft lockup when all tuples are already used.
+	 *
+	 * If we can't find any free port from first offset, pick a new
+	 * one and try again, with ever smaller search window.
+	 */
+another_round:
+	for (i = 0; i < attempts; i++, off++) {
+		*keyptr = htons(min + off % range_size);
+		if (!nf_nat_used_tuple(tuple, ct))
+			return;
+	}
+
+	if (attempts >= range_size || attempts < 16)
+		return;
+	attempts /= 2;
+	off = prandom_u32();
+	goto another_round;
+}
+
 /* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING,
  * we change the source to map into the range. For NF_INET_PRE_ROUTING
  * and NF_INET_LOCAL_OUT, we change the destination to map into the
  * range. It might not be possible to get a unique tuple, but we try.
  * At worst (or if we race), we will end up with a final duplicate in
- * __ip_conntrack_confirm and drop the packet. */
+ * __nf_conntrack_confirm and drop the packet. */
 static void
 get_unique_tuple(struct nf_conntrack_tuple *tuple,
 		 const struct nf_conntrack_tuple *orig_tuple,
@@ -324,17 +528,10 @@
 		 enum nf_nat_manip_type maniptype)
 {
 	const struct nf_conntrack_zone *zone;
-	const struct nf_nat_l3proto *l3proto;
-	const struct nf_nat_l4proto *l4proto;
 	struct net *net = nf_ct_net(ct);
 
 	zone = nf_ct_zone(ct);
 
-	rcu_read_lock();
-	l3proto = __nf_nat_l3proto_find(orig_tuple->src.l3num);
-	l4proto = __nf_nat_l4proto_find(orig_tuple->src.l3num,
-					orig_tuple->dst.protonum);
-
 	/* 1) If this srcip/proto/src-proto-part is currently mapped,
 	 * and that same mapping gives a unique tuple within the given
 	 * range, use that.
@@ -346,16 +543,16 @@
 	if (maniptype == NF_NAT_MANIP_SRC &&
 	    !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
 		/* try the original tuple first */
-		if (in_range(l3proto, l4proto, orig_tuple, range)) {
+		if (in_range(orig_tuple, range)) {
 			if (!nf_nat_used_tuple(orig_tuple, ct)) {
 				*tuple = *orig_tuple;
-				goto out;
+				return;
 			}
-		} else if (find_appropriate_src(net, zone, l3proto, l4proto,
+		} else if (find_appropriate_src(net, zone,
 						orig_tuple, tuple, range)) {
 			pr_debug("get_unique_tuple: Found current src map\n");
 			if (!nf_nat_used_tuple(tuple, ct))
-				goto out;
+				return;
 		}
 	}
 
@@ -371,21 +568,19 @@
 	if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
 		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
 			if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
-			    l4proto->in_range(tuple, maniptype,
+			    l4proto_in_range(tuple, maniptype,
 			          &range->min_proto,
 			          &range->max_proto) &&
 			    (range->min_proto.all == range->max_proto.all ||
 			     !nf_nat_used_tuple(tuple, ct)))
-				goto out;
+				return;
 		} else if (!nf_nat_used_tuple(tuple, ct)) {
-			goto out;
+			return;
 		}
 	}
 
 	/* Last chance: get protocol to try to obtain unique tuple. */
-	l4proto->unique_tuple(l3proto, tuple, range, maniptype, ct);
-out:
-	rcu_read_unlock();
+	nf_nat_l4proto_unique_tuple(tuple, range, maniptype, ct);
 }
 
 struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct)
@@ -424,8 +619,8 @@
 	 * manipulations (future optimization: if num_manips == 0,
 	 * orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
 	 */
-	nf_ct_invert_tuplepr(&curr_tuple,
-			     &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	nf_ct_invert_tuple(&curr_tuple,
+			   &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
 	get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);
 
@@ -433,7 +628,7 @@
 		struct nf_conntrack_tuple reply;
 
 		/* Alter conntrack table so will recognize replies. */
-		nf_ct_invert_tuplepr(&reply, &new_tuple);
+		nf_ct_invert_tuple(&reply, &new_tuple);
 		nf_conntrack_alter_reply(ct, &reply);
 
 		/* Non-atomic: we own this at the moment. */
@@ -496,26 +691,6 @@
 }
 EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding);
 
-static unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
-				     enum nf_nat_manip_type mtype,
-				     enum ip_conntrack_dir dir)
-{
-	const struct nf_nat_l3proto *l3proto;
-	const struct nf_nat_l4proto *l4proto;
-	struct nf_conntrack_tuple target;
-
-	/* We are aiming to look like inverse of other direction. */
-	nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
-
-	l3proto = __nf_nat_l3proto_find(target.src.l3num);
-	l4proto = __nf_nat_l4proto_find(target.src.l3num,
-					target.dst.protonum);
-	if (!l3proto->manip_pkt(skb, 0, l4proto, &target, mtype))
-		return NF_DROP;
-
-	return NF_ACCEPT;
-}
-
 /* Do packet manipulations according to nf_nat_setup_info. */
 unsigned int nf_nat_packet(struct nf_conn *ct,
 			   enum ip_conntrack_info ctinfo,
@@ -666,117 +841,6 @@
 	return 0;
 }
 
-static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
-{
-	struct nf_nat_proto_clean clean = {
-		.l3proto = l3proto,
-		.l4proto = l4proto,
-	};
-
-	nf_ct_iterate_destroy(nf_nat_proto_remove, &clean);
-}
-
-static void nf_nat_l3proto_clean(u8 l3proto)
-{
-	struct nf_nat_proto_clean clean = {
-		.l3proto = l3proto,
-	};
-
-	nf_ct_iterate_destroy(nf_nat_proto_remove, &clean);
-}
-
-/* Protocol registration. */
-int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto)
-{
-	const struct nf_nat_l4proto **l4protos;
-	unsigned int i;
-	int ret = 0;
-
-	mutex_lock(&nf_nat_proto_mutex);
-	if (nf_nat_l4protos[l3proto] == NULL) {
-		l4protos = kmalloc_array(IPPROTO_MAX,
-					 sizeof(struct nf_nat_l4proto *),
-					 GFP_KERNEL);
-		if (l4protos == NULL) {
-			ret = -ENOMEM;
-			goto out;
-		}
-
-		for (i = 0; i < IPPROTO_MAX; i++)
-			RCU_INIT_POINTER(l4protos[i], &nf_nat_l4proto_unknown);
-
-		/* Before making proto_array visible to lockless readers,
-		 * we must make sure its content is committed to memory.
-		 */
-		smp_wmb();
-
-		nf_nat_l4protos[l3proto] = l4protos;
-	}
-
-	if (rcu_dereference_protected(
-			nf_nat_l4protos[l3proto][l4proto->l4proto],
-			lockdep_is_held(&nf_nat_proto_mutex)
-			) != &nf_nat_l4proto_unknown) {
-		ret = -EBUSY;
-		goto out;
-	}
-	RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], l4proto);
- out:
-	mutex_unlock(&nf_nat_proto_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(nf_nat_l4proto_register);
-
-/* No one stores the protocol anywhere; simply delete it. */
-void nf_nat_l4proto_unregister(u8 l3proto, const struct nf_nat_l4proto *l4proto)
-{
-	mutex_lock(&nf_nat_proto_mutex);
-	RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto],
-			 &nf_nat_l4proto_unknown);
-	mutex_unlock(&nf_nat_proto_mutex);
-	synchronize_rcu();
-
-	nf_nat_l4proto_clean(l3proto, l4proto->l4proto);
-}
-EXPORT_SYMBOL_GPL(nf_nat_l4proto_unregister);
-
-int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto)
-{
-	mutex_lock(&nf_nat_proto_mutex);
-	RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_TCP],
-			 &nf_nat_l4proto_tcp);
-	RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDP],
-			 &nf_nat_l4proto_udp);
-#ifdef CONFIG_NF_NAT_PROTO_DCCP
-	RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_DCCP],
-			 &nf_nat_l4proto_dccp);
-#endif
-#ifdef CONFIG_NF_NAT_PROTO_SCTP
-	RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_SCTP],
-			 &nf_nat_l4proto_sctp);
-#endif
-#ifdef CONFIG_NF_NAT_PROTO_UDPLITE
-	RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDPLITE],
-			 &nf_nat_l4proto_udplite);
-#endif
-	mutex_unlock(&nf_nat_proto_mutex);
-
-	RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(nf_nat_l3proto_register);
-
-void nf_nat_l3proto_unregister(const struct nf_nat_l3proto *l3proto)
-{
-	mutex_lock(&nf_nat_proto_mutex);
-	RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], NULL);
-	mutex_unlock(&nf_nat_proto_mutex);
-	synchronize_rcu();
-
-	nf_nat_l3proto_clean(l3proto->l3proto);
-}
-EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister);
-
 /* No one using conntrack by the time this called. */
 static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
 {
@@ -801,24 +865,34 @@
 	[CTA_PROTONAT_PORT_MAX]	= { .type = NLA_U16 },
 };
 
+static int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
+					  struct nf_nat_range2 *range)
+{
+	if (tb[CTA_PROTONAT_PORT_MIN]) {
+		range->min_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
+		range->max_proto.all = range->min_proto.all;
+		range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+	}
+	if (tb[CTA_PROTONAT_PORT_MAX]) {
+		range->max_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
+		range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+	}
+	return 0;
+}
+
 static int nfnetlink_parse_nat_proto(struct nlattr *attr,
 				     const struct nf_conn *ct,
 				     struct nf_nat_range2 *range)
 {
 	struct nlattr *tb[CTA_PROTONAT_MAX+1];
-	const struct nf_nat_l4proto *l4proto;
 	int err;
 
-	err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr,
-			       protonat_nla_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, CTA_PROTONAT_MAX, attr,
+					  protonat_nla_policy, NULL);
 	if (err < 0)
 		return err;
 
-	l4proto = __nf_nat_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
-	if (l4proto->nlattr_to_range)
-		err = l4proto->nlattr_to_range(tb, range);
-
-	return err;
+	return nf_nat_l4proto_nlattr_to_range(tb, range);
 }
 
 static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
@@ -829,22 +903,67 @@
 	[CTA_NAT_PROTO]		= { .type = NLA_NESTED },
 };
 
+static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
+				       struct nf_nat_range2 *range)
+{
+	if (tb[CTA_NAT_V4_MINIP]) {
+		range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]);
+		range->flags |= NF_NAT_RANGE_MAP_IPS;
+	}
+
+	if (tb[CTA_NAT_V4_MAXIP])
+		range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]);
+	else
+		range->max_addr.ip = range->min_addr.ip;
+
+	return 0;
+}
+
+static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
+				       struct nf_nat_range2 *range)
+{
+	if (tb[CTA_NAT_V6_MINIP]) {
+		nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP],
+			   sizeof(struct in6_addr));
+		range->flags |= NF_NAT_RANGE_MAP_IPS;
+	}
+
+	if (tb[CTA_NAT_V6_MAXIP])
+		nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP],
+			   sizeof(struct in6_addr));
+	else
+		range->max_addr = range->min_addr;
+
+	return 0;
+}
+
 static int
 nfnetlink_parse_nat(const struct nlattr *nat,
-		    const struct nf_conn *ct, struct nf_nat_range2 *range,
-		    const struct nf_nat_l3proto *l3proto)
+		    const struct nf_conn *ct, struct nf_nat_range2 *range)
 {
 	struct nlattr *tb[CTA_NAT_MAX+1];
 	int err;
 
 	memset(range, 0, sizeof(*range));
 
-	err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, CTA_NAT_MAX, nat,
+					  nat_nla_policy, NULL);
 	if (err < 0)
 		return err;
 
-	err = l3proto->nlattr_to_range(tb, range);
-	if (err < 0)
+	switch (nf_ct_l3num(ct)) {
+	case NFPROTO_IPV4:
+		err = nf_nat_ipv4_nlattr_to_range(tb, range);
+		break;
+	case NFPROTO_IPV6:
+		err = nf_nat_ipv6_nlattr_to_range(tb, range);
+		break;
+	default:
+		err = -EPROTONOSUPPORT;
+		break;
+	}
+
+	if (err)
 		return err;
 
 	if (!tb[CTA_NAT_PROTO])
@@ -860,7 +979,6 @@
 			  const struct nlattr *attr)
 {
 	struct nf_nat_range2 range;
-	const struct nf_nat_l3proto *l3proto;
 	int err;
 
 	/* Should not happen, restricted to creating new conntracks
@@ -869,18 +987,11 @@
 	if (WARN_ON_ONCE(nf_nat_initialized(ct, manip)))
 		return -EEXIST;
 
-	/* Make sure that L3 NAT is there by when we call nf_nat_setup_info to
-	 * attach the null binding, otherwise this may oops.
-	 */
-	l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
-	if (l3proto == NULL)
-		return -EAGAIN;
-
 	/* No NAT information has been passed, allocate the null-binding */
 	if (attr == NULL)
 		return __nf_nat_alloc_null_binding(ct, manip) == NF_DROP ? -ENOMEM : 0;
 
-	err = nfnetlink_parse_nat(attr, ct, &range, l3proto);
+	err = nfnetlink_parse_nat(attr, ct, &range);
 	if (err < 0)
 		return err;
 
@@ -901,7 +1012,7 @@
 	.expectfn	= nf_nat_follow_master,
 };
 
-int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops,
+int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops,
 		       const struct nf_hook_ops *orig_nat_ops, unsigned int ops_count)
 {
 	struct nat_net *nat_net = net_generic(net, nat_net_id);
@@ -911,14 +1022,12 @@
 	struct nf_hook_ops *nat_ops;
 	int i, ret;
 
-	if (WARN_ON_ONCE(ops->pf >= ARRAY_SIZE(nat_net->nat_proto_net)))
+	if (WARN_ON_ONCE(pf >= ARRAY_SIZE(nat_net->nat_proto_net)))
 		return -EINVAL;
 
-	nat_proto_net = &nat_net->nat_proto_net[ops->pf];
+	nat_proto_net = &nat_net->nat_proto_net[pf];
 
 	for (i = 0; i < ops_count; i++) {
-		if (WARN_ON(orig_nat_ops[i].pf != ops->pf))
-			return -EINVAL;
 		if (orig_nat_ops[i].hooknum == hooknum) {
 			hooknum = i;
 			break;
@@ -977,10 +1086,9 @@
 	mutex_unlock(&nf_nat_proto_mutex);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(nf_nat_register_fn);
 
-void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops,
-		          unsigned int ops_count)
+void nf_nat_unregister_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops,
+			  unsigned int ops_count)
 {
 	struct nat_net *nat_net = net_generic(net, nat_net_id);
 	struct nf_nat_hooks_net *nat_proto_net;
@@ -989,10 +1097,10 @@
 	int hooknum = ops->hooknum;
 	int i;
 
-	if (ops->pf >= ARRAY_SIZE(nat_net->nat_proto_net))
+	if (pf >= ARRAY_SIZE(nat_net->nat_proto_net))
 		return;
 
-	nat_proto_net = &nat_net->nat_proto_net[ops->pf];
+	nat_proto_net = &nat_net->nat_proto_net[pf];
 
 	mutex_lock(&nf_nat_proto_mutex);
 	if (WARN_ON(nat_proto_net->users == 0))
@@ -1026,7 +1134,6 @@
 unlock:
 	mutex_unlock(&nf_nat_proto_mutex);
 }
-EXPORT_SYMBOL_GPL(nf_nat_unregister_fn);
 
 static struct pernet_operations nat_net_ops = {
 	.id = &nat_net_id,
@@ -1081,7 +1188,6 @@
 static void __exit nf_nat_cleanup(void)
 {
 	struct nf_nat_proto_clean clean = {};
-	unsigned int i;
 
 	nf_ct_iterate_destroy(nf_nat_proto_clean, &clean);
 
@@ -1089,10 +1195,6 @@
 	nf_ct_helper_expectfn_unregister(&follow_master_nat);
 	RCU_INIT_POINTER(nf_nat_hook, NULL);
 
-	synchronize_rcu();
-
-	for (i = 0; i < NFPROTO_NUMPROTO; i++)
-		kfree(nf_nat_l4protos[i]);
 	synchronize_net();
 	kvfree(nf_nat_bysource);
 	unregister_pernet_subsys(&nat_net_ops);

diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
index 5063cbf..aace676 100644
--- a/net/netfilter/nf_nat_ftp.c
+++ b/net/netfilter/nf_nat_ftp.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* FTP extension for TCP NAT alteration. */
 
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -21,13 +18,18 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <linux/netfilter/nf_conntrack_ftp.h>
 
+#define NAT_HELPER_NAME "ftp"
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
 MODULE_DESCRIPTION("ftp NAT helper");
-MODULE_ALIAS("ip_nat_ftp");
+MODULE_ALIAS_NF_NAT_HELPER(NAT_HELPER_NAME);
 
 /* FIXME: Time out? --RR */
 
+static struct nf_conntrack_nat_helper nat_helper_ftp =
+	NF_CT_NAT_HELPER_INIT(NAT_HELPER_NAME);
+
 static int nf_nat_ftp_fmt_cmd(struct nf_conn *ct, enum nf_ct_ftp_type type,
 			      char *buffer, size_t buflen,
 			      union nf_inet_addr *addr, u16 port)
@@ -89,7 +91,7 @@
 		int ret;
 
 		exp->tuple.dst.u.tcp.port = htons(port);
-		ret = nf_ct_expect_related(exp);
+		ret = nf_ct_expect_related(exp, 0);
 		if (ret == 0)
 			break;
 		else if (ret != -EBUSY) {
@@ -124,6 +126,7 @@
 
 static void __exit nf_nat_ftp_fini(void)
 {
+	nf_nat_helper_unregister(&nat_helper_ftp);
 	RCU_INIT_POINTER(nf_nat_ftp_hook, NULL);
 	synchronize_rcu();
 }
@@ -131,6 +134,7 @@
 static int __init nf_nat_ftp_init(void)
 {
 	BUG_ON(nf_nat_ftp_hook != NULL);
+	nf_nat_helper_register(&nat_helper_ftp);
 	RCU_INIT_POINTER(nf_nat_ftp_hook, nf_nat_ftp);
 	return 0;
 }

diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index 99606ba..a263505 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* nf_nat_helper.c - generic support functions for NAT helpers
  *
  * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
  * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org>
  * (C) 2007-2012 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/module.h>
 #include <linux/gfp.h>
@@ -22,9 +19,6 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_seqadj.h>
 #include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-#include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_helper.h>
 
 /* Frobs data inside this packet, which is linear. */
@@ -37,7 +31,7 @@
 {
 	unsigned char *data;
 
-	BUG_ON(skb_is_nonlinear(skb));
+	SKB_LINEAR_ASSERT(skb);
 	data = skb_network_header(skb) + dataoff;
 
 	/* move post-replacement */
@@ -98,11 +92,10 @@
 				const char *rep_buffer,
 				unsigned int rep_len, bool adjust)
 {
-	const struct nf_nat_l3proto *l3proto;
 	struct tcphdr *tcph;
 	int oldlen, datalen;
 
-	if (!skb_make_writable(skb, skb->len))
+	if (skb_ensure_writable(skb, skb->len))
 		return false;
 
 	if (rep_len > match_len &&
@@ -110,8 +103,6 @@
 	    !enlarge_skb(skb, rep_len - match_len))
 		return false;
 
-	SKB_LINEAR_ASSERT(skb);
-
 	tcph = (void *)skb->data + protoff;
 
 	oldlen = skb->len - protoff;
@@ -120,9 +111,8 @@
 
 	datalen = skb->len - protoff;
 
-	l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
-	l3proto->csum_recalc(skb, IPPROTO_TCP, tcph, &tcph->check,
-			     datalen, oldlen);
+	nf_nat_csum_recalc(skb, nf_ct_l3num(ct), IPPROTO_TCP,
+			   tcph, &tcph->check, datalen, oldlen);
 
 	if (adjust && rep_len != match_len)
 		nf_ct_seqadj_set(ct, ctinfo, tcph->seq,
@@ -152,11 +142,10 @@
 			 const char *rep_buffer,
 			 unsigned int rep_len)
 {
-	const struct nf_nat_l3proto *l3proto;
 	struct udphdr *udph;
 	int datalen, oldlen;
 
-	if (!skb_make_writable(skb, skb->len))
+	if (skb_ensure_writable(skb, skb->len))
 		return false;
 
 	if (rep_len > match_len &&
@@ -178,9 +167,8 @@
 	if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL)
 		return true;
 
-	l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
-	l3proto->csum_recalc(skb, IPPROTO_UDP, udph, &udph->check,
-			     datalen, oldlen);
+	nf_nat_csum_recalc(skb, nf_ct_l3num(ct), IPPROTO_UDP,
+			   udph, &udph->check, datalen, oldlen);
 
 	return true;
 }

diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c
index 3aa35a4..c691ab8 100644
--- a/net/netfilter/nf_nat_irc.c
+++ b/net/netfilter/nf_nat_irc.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* IRC extension for TCP NAT alteration.
  *
  * (C) 2000-2001 by Harald Welte <laforge@gnumonks.org>
  * (C) 2004 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
  * based on a copy of RR's ip_nat_ftp.c
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -23,10 +19,15 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <linux/netfilter/nf_conntrack_irc.h>
 
+#define NAT_HELPER_NAME "irc"
+
 MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
 MODULE_DESCRIPTION("IRC (DCC) NAT helper");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("ip_nat_irc");
+MODULE_ALIAS_NF_NAT_HELPER(NAT_HELPER_NAME);
+
+static struct nf_conntrack_nat_helper nat_helper_irc =
+	NF_CT_NAT_HELPER_INIT(NAT_HELPER_NAME);
 
 static unsigned int help(struct sk_buff *skb,
 			 enum ip_conntrack_info ctinfo,
@@ -52,7 +53,7 @@
 		int ret;
 
 		exp->tuple.dst.u.tcp.port = htons(port);
-		ret = nf_ct_expect_related(exp);
+		ret = nf_ct_expect_related(exp, 0);
 		if (ret == 0)
 			break;
 		else if (ret != -EBUSY) {
@@ -96,6 +97,7 @@
 
 static void __exit nf_nat_irc_fini(void)
 {
+	nf_nat_helper_unregister(&nat_helper_irc);
 	RCU_INIT_POINTER(nf_nat_irc_hook, NULL);
 	synchronize_rcu();
 }
@@ -103,6 +105,7 @@
 static int __init nf_nat_irc_init(void)
 {
 	BUG_ON(nf_nat_irc_hook != NULL);
+	nf_nat_helper_register(&nat_helper_irc);
 	RCU_INIT_POINTER(nf_nat_irc_hook, help);
 	return 0;
 }

diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
new file mode 100644
index 0000000..8e8a65d
--- /dev/null
+++ b/net/netfilter/nf_nat_masquerade.c

@@ -0,0 +1,337 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/types.h>
+#include <linux/atomic.h>
+#include <linux/inetdevice.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+
+#include <net/netfilter/nf_nat_masquerade.h>
+
+static DEFINE_MUTEX(masq_mutex);
+static unsigned int masq_refcnt __read_mostly;
+
+unsigned int
+nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
+		       const struct nf_nat_range2 *range,
+		       const struct net_device *out)
+{
+	struct nf_conn *ct;
+	struct nf_conn_nat *nat;
+	enum ip_conntrack_info ctinfo;
+	struct nf_nat_range2 newrange;
+	const struct rtable *rt;
+	__be32 newsrc, nh;
+
+	WARN_ON(hooknum != NF_INET_POST_ROUTING);
+
+	ct = nf_ct_get(skb, &ctinfo);
+
+	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+			 ctinfo == IP_CT_RELATED_REPLY)));
+
+	/* Source address is 0.0.0.0 - locally generated packet that is
+	 * probably not supposed to be masqueraded.
+	 */
+	if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
+		return NF_ACCEPT;
+
+	rt = skb_rtable(skb);
+	nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
+	newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE);
+	if (!newsrc) {
+		pr_info("%s ate my IP address\n", out->name);
+		return NF_DROP;
+	}
+
+	nat = nf_ct_nat_ext_add(ct);
+	if (nat)
+		nat->masq_index = out->ifindex;
+
+	/* Transfer from original range. */
+	memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
+	memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
+	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
+	newrange.min_addr.ip = newsrc;
+	newrange.max_addr.ip = newsrc;
+	newrange.min_proto   = range->min_proto;
+	newrange.max_proto   = range->max_proto;
+
+	/* Hand modified range to generic setup. */
+	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
+
+static int device_cmp(struct nf_conn *i, void *ifindex)
+{
+	const struct nf_conn_nat *nat = nfct_nat(i);
+
+	if (!nat)
+		return 0;
+	return nat->masq_index == (int)(long)ifindex;
+}
+
+static int masq_device_event(struct notifier_block *this,
+			     unsigned long event,
+			     void *ptr)
+{
+	const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct net *net = dev_net(dev);
+
+	if (event == NETDEV_DOWN) {
+		/* Device was downed.  Search entire table for
+		 * conntracks which were associated with that device,
+		 * and forget them.
+		 */
+
+		nf_ct_iterate_cleanup_net(net, device_cmp,
+					  (void *)(long)dev->ifindex, 0, 0);
+	}
+
+	return NOTIFY_DONE;
+}
+
+static int inet_cmp(struct nf_conn *ct, void *ptr)
+{
+	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
+	struct net_device *dev = ifa->ifa_dev->dev;
+	struct nf_conntrack_tuple *tuple;
+
+	if (!device_cmp(ct, (void *)(long)dev->ifindex))
+		return 0;
+
+	tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+	return ifa->ifa_address == tuple->dst.u3.ip;
+}
+
+static int masq_inet_event(struct notifier_block *this,
+			   unsigned long event,
+			   void *ptr)
+{
+	struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
+	struct net *net = dev_net(idev->dev);
+
+	/* The masq_dev_notifier will catch the case of the device going
+	 * down.  So if the inetdev is dead and being destroyed we have
+	 * no work to do.  Otherwise this is an individual address removal
+	 * and we have to perform the flush.
+	 */
+	if (idev->dead)
+		return NOTIFY_DONE;
+
+	if (event == NETDEV_DOWN)
+		nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block masq_dev_notifier = {
+	.notifier_call	= masq_device_event,
+};
+
+static struct notifier_block masq_inet_notifier = {
+	.notifier_call	= masq_inet_event,
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+static atomic_t v6_worker_count __read_mostly;
+
+static int
+nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
+		       const struct in6_addr *daddr, unsigned int srcprefs,
+		       struct in6_addr *saddr)
+{
+#ifdef CONFIG_IPV6_MODULE
+	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
+
+	if (!v6_ops)
+		return -EHOSTUNREACH;
+
+	return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr);
+#else
+	return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr);
+#endif
+}
+
+unsigned int
+nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
+		       const struct net_device *out)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn_nat *nat;
+	struct in6_addr src;
+	struct nf_conn *ct;
+	struct nf_nat_range2 newrange;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+			 ctinfo == IP_CT_RELATED_REPLY)));
+
+	if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out,
+				   &ipv6_hdr(skb)->daddr, 0, &src) < 0)
+		return NF_DROP;
+
+	nat = nf_ct_nat_ext_add(ct);
+	if (nat)
+		nat->masq_index = out->ifindex;
+
+	newrange.flags		= range->flags | NF_NAT_RANGE_MAP_IPS;
+	newrange.min_addr.in6	= src;
+	newrange.max_addr.in6	= src;
+	newrange.min_proto	= range->min_proto;
+	newrange.max_proto	= range->max_proto;
+
+	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
+
+struct masq_dev_work {
+	struct work_struct work;
+	struct net *net;
+	struct in6_addr addr;
+	int ifindex;
+};
+
+static int inet6_cmp(struct nf_conn *ct, void *work)
+{
+	struct masq_dev_work *w = (struct masq_dev_work *)work;
+	struct nf_conntrack_tuple *tuple;
+
+	if (!device_cmp(ct, (void *)(long)w->ifindex))
+		return 0;
+
+	tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+	return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6);
+}
+
+static void iterate_cleanup_work(struct work_struct *work)
+{
+	struct masq_dev_work *w;
+
+	w = container_of(work, struct masq_dev_work, work);
+
+	nf_ct_iterate_cleanup_net(w->net, inet6_cmp, (void *)w, 0, 0);
+
+	put_net(w->net);
+	kfree(w);
+	atomic_dec(&v6_worker_count);
+	module_put(THIS_MODULE);
+}
+
+/* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep).
+ *
+ * Defer it to the system workqueue.
+ *
+ * As we can have 'a lot' of inet_events (depending on amount of ipv6
+ * addresses being deleted), we also need to limit work item queue.
+ */
+static int masq_inet6_event(struct notifier_block *this,
+			    unsigned long event, void *ptr)
+{
+	struct inet6_ifaddr *ifa = ptr;
+	const struct net_device *dev;
+	struct masq_dev_work *w;
+	struct net *net;
+
+	if (event != NETDEV_DOWN || atomic_read(&v6_worker_count) >= 16)
+		return NOTIFY_DONE;
+
+	dev = ifa->idev->dev;
+	net = maybe_get_net(dev_net(dev));
+	if (!net)
+		return NOTIFY_DONE;
+
+	if (!try_module_get(THIS_MODULE))
+		goto err_module;
+
+	w = kmalloc(sizeof(*w), GFP_ATOMIC);
+	if (w) {
+		atomic_inc(&v6_worker_count);
+
+		INIT_WORK(&w->work, iterate_cleanup_work);
+		w->ifindex = dev->ifindex;
+		w->net = net;
+		w->addr = ifa->addr;
+		schedule_work(&w->work);
+
+		return NOTIFY_DONE;
+	}
+
+	module_put(THIS_MODULE);
+ err_module:
+	put_net(net);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block masq_inet6_notifier = {
+	.notifier_call	= masq_inet6_event,
+};
+
+static int nf_nat_masquerade_ipv6_register_notifier(void)
+{
+	return register_inet6addr_notifier(&masq_inet6_notifier);
+}
+#else
+static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; }
+#endif
+
+int nf_nat_masquerade_inet_register_notifiers(void)
+{
+	int ret = 0;
+
+	mutex_lock(&masq_mutex);
+	if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) {
+		ret = -EOVERFLOW;
+		goto out_unlock;
+	}
+
+	/* check if the notifier was already set */
+	if (++masq_refcnt > 1)
+		goto out_unlock;
+
+	/* Register for device down reports */
+	ret = register_netdevice_notifier(&masq_dev_notifier);
+	if (ret)
+		goto err_dec;
+	/* Register IP address change reports */
+	ret = register_inetaddr_notifier(&masq_inet_notifier);
+	if (ret)
+		goto err_unregister;
+
+	ret = nf_nat_masquerade_ipv6_register_notifier();
+	if (ret)
+		goto err_unreg_inet;
+
+	mutex_unlock(&masq_mutex);
+	return ret;
+err_unreg_inet:
+	unregister_inetaddr_notifier(&masq_inet_notifier);
+err_unregister:
+	unregister_netdevice_notifier(&masq_dev_notifier);
+err_dec:
+	masq_refcnt--;
+out_unlock:
+	mutex_unlock(&masq_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers);
+
+void nf_nat_masquerade_inet_unregister_notifiers(void)
+{
+	mutex_lock(&masq_mutex);
+	/* check if the notifiers still have clients */
+	if (--masq_refcnt > 0)
+		goto out_unlock;
+
+	unregister_netdevice_notifier(&masq_dev_notifier);
+	unregister_inetaddr_notifier(&masq_inet_notifier);
+#if IS_ENABLED(CONFIG_IPV6)
+	unregister_inet6addr_notifier(&masq_inet6_notifier);
+#endif
+out_unlock:
+	mutex_unlock(&masq_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers);

diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
new file mode 100644
index 0000000..0a59c14
--- /dev/null
+++ b/net/netfilter/nf_nat_proto.c

@@ -0,0 +1,1037 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+
+#include <linux/dccp.h>
+#include <linux/sctp.h>
+#include <net/sctp/checksum.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+
+#include <linux/ipv6.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
+#include <net/ip6_route.h>
+#include <net/xfrm.h>
+#include <net/ipv6.h>
+
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static void nf_csum_update(struct sk_buff *skb,
+			   unsigned int iphdroff, __sum16 *check,
+			   const struct nf_conntrack_tuple *t,
+			   enum nf_nat_manip_type maniptype);
+
+static void
+__udp_manip_pkt(struct sk_buff *skb,
+	        unsigned int iphdroff, struct udphdr *hdr,
+	        const struct nf_conntrack_tuple *tuple,
+	        enum nf_nat_manip_type maniptype, bool do_csum)
+{
+	__be16 *portptr, newport;
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		/* Get rid of src port */
+		newport = tuple->src.u.udp.port;
+		portptr = &hdr->source;
+	} else {
+		/* Get rid of dst port */
+		newport = tuple->dst.u.udp.port;
+		portptr = &hdr->dest;
+	}
+	if (do_csum) {
+		nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
+		inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
+					 false);
+		if (!hdr->check)
+			hdr->check = CSUM_MANGLED_0;
+	}
+	*portptr = newport;
+}
+
+static bool udp_manip_pkt(struct sk_buff *skb,
+			  unsigned int iphdroff, unsigned int hdroff,
+			  const struct nf_conntrack_tuple *tuple,
+			  enum nf_nat_manip_type maniptype)
+{
+	struct udphdr *hdr;
+	bool do_csum;
+
+	if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
+		return false;
+
+	hdr = (struct udphdr *)(skb->data + hdroff);
+	do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL;
+
+	__udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, do_csum);
+	return true;
+}
+
+static bool udplite_manip_pkt(struct sk_buff *skb,
+			      unsigned int iphdroff, unsigned int hdroff,
+			      const struct nf_conntrack_tuple *tuple,
+			      enum nf_nat_manip_type maniptype)
+{
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+	struct udphdr *hdr;
+
+	if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
+		return false;
+
+	hdr = (struct udphdr *)(skb->data + hdroff);
+	__udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, true);
+#endif
+	return true;
+}
+
+static bool
+sctp_manip_pkt(struct sk_buff *skb,
+	       unsigned int iphdroff, unsigned int hdroff,
+	       const struct nf_conntrack_tuple *tuple,
+	       enum nf_nat_manip_type maniptype)
+{
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	struct sctphdr *hdr;
+	int hdrsize = 8;
+
+	/* This could be an inner header returned in imcp packet; in such
+	 * cases we cannot update the checksum field since it is outside
+	 * of the 8 bytes of transport layer headers we are guaranteed.
+	 */
+	if (skb->len >= hdroff + sizeof(*hdr))
+		hdrsize = sizeof(*hdr);
+
+	if (skb_ensure_writable(skb, hdroff + hdrsize))
+		return false;
+
+	hdr = (struct sctphdr *)(skb->data + hdroff);
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		/* Get rid of src port */
+		hdr->source = tuple->src.u.sctp.port;
+	} else {
+		/* Get rid of dst port */
+		hdr->dest = tuple->dst.u.sctp.port;
+	}
+
+	if (hdrsize < sizeof(*hdr))
+		return true;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		hdr->checksum = sctp_compute_cksum(skb, hdroff);
+		skb->ip_summed = CHECKSUM_NONE;
+	}
+
+#endif
+	return true;
+}
+
+static bool
+tcp_manip_pkt(struct sk_buff *skb,
+	      unsigned int iphdroff, unsigned int hdroff,
+	      const struct nf_conntrack_tuple *tuple,
+	      enum nf_nat_manip_type maniptype)
+{
+	struct tcphdr *hdr;
+	__be16 *portptr, newport, oldport;
+	int hdrsize = 8; /* TCP connection tracking guarantees this much */
+
+	/* this could be a inner header returned in icmp packet; in such
+	   cases we cannot update the checksum field since it is outside of
+	   the 8 bytes of transport layer headers we are guaranteed */
+	if (skb->len >= hdroff + sizeof(struct tcphdr))
+		hdrsize = sizeof(struct tcphdr);
+
+	if (skb_ensure_writable(skb, hdroff + hdrsize))
+		return false;
+
+	hdr = (struct tcphdr *)(skb->data + hdroff);
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		/* Get rid of src port */
+		newport = tuple->src.u.tcp.port;
+		portptr = &hdr->source;
+	} else {
+		/* Get rid of dst port */
+		newport = tuple->dst.u.tcp.port;
+		portptr = &hdr->dest;
+	}
+
+	oldport = *portptr;
+	*portptr = newport;
+
+	if (hdrsize < sizeof(*hdr))
+		return true;
+
+	nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
+	inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false);
+	return true;
+}
+
+static bool
+dccp_manip_pkt(struct sk_buff *skb,
+	       unsigned int iphdroff, unsigned int hdroff,
+	       const struct nf_conntrack_tuple *tuple,
+	       enum nf_nat_manip_type maniptype)
+{
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	struct dccp_hdr *hdr;
+	__be16 *portptr, oldport, newport;
+	int hdrsize = 8; /* DCCP connection tracking guarantees this much */
+
+	if (skb->len >= hdroff + sizeof(struct dccp_hdr))
+		hdrsize = sizeof(struct dccp_hdr);
+
+	if (skb_ensure_writable(skb, hdroff + hdrsize))
+		return false;
+
+	hdr = (struct dccp_hdr *)(skb->data + hdroff);
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		newport = tuple->src.u.dccp.port;
+		portptr = &hdr->dccph_sport;
+	} else {
+		newport = tuple->dst.u.dccp.port;
+		portptr = &hdr->dccph_dport;
+	}
+
+	oldport = *portptr;
+	*portptr = newport;
+
+	if (hdrsize < sizeof(*hdr))
+		return true;
+
+	nf_csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype);
+	inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
+				 false);
+#endif
+	return true;
+}
+
+static bool
+icmp_manip_pkt(struct sk_buff *skb,
+	       unsigned int iphdroff, unsigned int hdroff,
+	       const struct nf_conntrack_tuple *tuple,
+	       enum nf_nat_manip_type maniptype)
+{
+	struct icmphdr *hdr;
+
+	if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
+		return false;
+
+	hdr = (struct icmphdr *)(skb->data + hdroff);
+	inet_proto_csum_replace2(&hdr->checksum, skb,
+				 hdr->un.echo.id, tuple->src.u.icmp.id, false);
+	hdr->un.echo.id = tuple->src.u.icmp.id;
+	return true;
+}
+
+static bool
+icmpv6_manip_pkt(struct sk_buff *skb,
+		 unsigned int iphdroff, unsigned int hdroff,
+		 const struct nf_conntrack_tuple *tuple,
+		 enum nf_nat_manip_type maniptype)
+{
+	struct icmp6hdr *hdr;
+
+	if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
+		return false;
+
+	hdr = (struct icmp6hdr *)(skb->data + hdroff);
+	nf_csum_update(skb, iphdroff, &hdr->icmp6_cksum, tuple, maniptype);
+	if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
+	    hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
+		inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
+					 hdr->icmp6_identifier,
+					 tuple->src.u.icmp.id, false);
+		hdr->icmp6_identifier = tuple->src.u.icmp.id;
+	}
+	return true;
+}
+
+/* manipulate a GRE packet according to maniptype */
+static bool
+gre_manip_pkt(struct sk_buff *skb,
+	      unsigned int iphdroff, unsigned int hdroff,
+	      const struct nf_conntrack_tuple *tuple,
+	      enum nf_nat_manip_type maniptype)
+{
+#if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
+	const struct gre_base_hdr *greh;
+	struct pptp_gre_header *pgreh;
+
+	/* pgreh includes two optional 32bit fields which are not required
+	 * to be there.  That's where the magic '8' comes from */
+	if (skb_ensure_writable(skb, hdroff + sizeof(*pgreh) - 8))
+		return false;
+
+	greh = (void *)skb->data + hdroff;
+	pgreh = (struct pptp_gre_header *)greh;
+
+	/* we only have destination manip of a packet, since 'source key'
+	 * is not present in the packet itself */
+	if (maniptype != NF_NAT_MANIP_DST)
+		return true;
+
+	switch (greh->flags & GRE_VERSION) {
+	case GRE_VERSION_0:
+		/* We do not currently NAT any GREv0 packets.
+		 * Try to behave like "nf_nat_proto_unknown" */
+		break;
+	case GRE_VERSION_1:
+		pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
+		pgreh->call_id = tuple->dst.u.gre.key;
+		break;
+	default:
+		pr_debug("can't nat unknown GRE version\n");
+		return false;
+	}
+#endif
+	return true;
+}
+
+static bool l4proto_manip_pkt(struct sk_buff *skb,
+			      unsigned int iphdroff, unsigned int hdroff,
+			      const struct nf_conntrack_tuple *tuple,
+			      enum nf_nat_manip_type maniptype)
+{
+	switch (tuple->dst.protonum) {
+	case IPPROTO_TCP:
+		return tcp_manip_pkt(skb, iphdroff, hdroff,
+				     tuple, maniptype);
+	case IPPROTO_UDP:
+		return udp_manip_pkt(skb, iphdroff, hdroff,
+				     tuple, maniptype);
+	case IPPROTO_UDPLITE:
+		return udplite_manip_pkt(skb, iphdroff, hdroff,
+					 tuple, maniptype);
+	case IPPROTO_SCTP:
+		return sctp_manip_pkt(skb, iphdroff, hdroff,
+				      tuple, maniptype);
+	case IPPROTO_ICMP:
+		return icmp_manip_pkt(skb, iphdroff, hdroff,
+				      tuple, maniptype);
+	case IPPROTO_ICMPV6:
+		return icmpv6_manip_pkt(skb, iphdroff, hdroff,
+					tuple, maniptype);
+	case IPPROTO_DCCP:
+		return dccp_manip_pkt(skb, iphdroff, hdroff,
+				      tuple, maniptype);
+	case IPPROTO_GRE:
+		return gre_manip_pkt(skb, iphdroff, hdroff,
+				     tuple, maniptype);
+	}
+
+	/* If we don't know protocol -- no error, pass it unmodified. */
+	return true;
+}
+
+static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
+				  unsigned int iphdroff,
+				  const struct nf_conntrack_tuple *target,
+				  enum nf_nat_manip_type maniptype)
+{
+	struct iphdr *iph;
+	unsigned int hdroff;
+
+	if (skb_ensure_writable(skb, iphdroff + sizeof(*iph)))
+		return false;
+
+	iph = (void *)skb->data + iphdroff;
+	hdroff = iphdroff + iph->ihl * 4;
+
+	if (!l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
+		return false;
+	iph = (void *)skb->data + iphdroff;
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
+		iph->saddr = target->src.u3.ip;
+	} else {
+		csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
+		iph->daddr = target->dst.u3.ip;
+	}
+	return true;
+}
+
+static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
+				  unsigned int iphdroff,
+				  const struct nf_conntrack_tuple *target,
+				  enum nf_nat_manip_type maniptype)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	struct ipv6hdr *ipv6h;
+	__be16 frag_off;
+	int hdroff;
+	u8 nexthdr;
+
+	if (skb_ensure_writable(skb, iphdroff + sizeof(*ipv6h)))
+		return false;
+
+	ipv6h = (void *)skb->data + iphdroff;
+	nexthdr = ipv6h->nexthdr;
+	hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
+				  &nexthdr, &frag_off);
+	if (hdroff < 0)
+		goto manip_addr;
+
+	if ((frag_off & htons(~0x7)) == 0 &&
+	    !l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
+		return false;
+
+	/* must reload, offset might have changed */
+	ipv6h = (void *)skb->data + iphdroff;
+
+manip_addr:
+	if (maniptype == NF_NAT_MANIP_SRC)
+		ipv6h->saddr = target->src.u3.in6;
+	else
+		ipv6h->daddr = target->dst.u3.in6;
+
+#endif
+	return true;
+}
+
+unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
+			      enum nf_nat_manip_type mtype,
+			      enum ip_conntrack_dir dir)
+{
+	struct nf_conntrack_tuple target;
+
+	/* We are aiming to look like inverse of other direction. */
+	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
+
+	switch (target.src.l3num) {
+	case NFPROTO_IPV6:
+		if (nf_nat_ipv6_manip_pkt(skb, 0, &target, mtype))
+			return NF_ACCEPT;
+		break;
+	case NFPROTO_IPV4:
+		if (nf_nat_ipv4_manip_pkt(skb, 0, &target, mtype))
+			return NF_ACCEPT;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+
+	return NF_DROP;
+}
+
+static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
+				    unsigned int iphdroff, __sum16 *check,
+				    const struct nf_conntrack_tuple *t,
+				    enum nf_nat_manip_type maniptype)
+{
+	struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+	__be32 oldip, newip;
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		oldip = iph->saddr;
+		newip = t->src.u3.ip;
+	} else {
+		oldip = iph->daddr;
+		newip = t->dst.u3.ip;
+	}
+	inet_proto_csum_replace4(check, skb, oldip, newip, true);
+}
+
+static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
+				    unsigned int iphdroff, __sum16 *check,
+				    const struct nf_conntrack_tuple *t,
+				    enum nf_nat_manip_type maniptype)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
+	const struct in6_addr *oldip, *newip;
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		oldip = &ipv6h->saddr;
+		newip = &t->src.u3.in6;
+	} else {
+		oldip = &ipv6h->daddr;
+		newip = &t->dst.u3.in6;
+	}
+	inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
+				  newip->s6_addr32, true);
+#endif
+}
+
+static void nf_csum_update(struct sk_buff *skb,
+			   unsigned int iphdroff, __sum16 *check,
+			   const struct nf_conntrack_tuple *t,
+			   enum nf_nat_manip_type maniptype)
+{
+	switch (t->src.l3num) {
+	case NFPROTO_IPV4:
+		nf_nat_ipv4_csum_update(skb, iphdroff, check, t, maniptype);
+		return;
+	case NFPROTO_IPV6:
+		nf_nat_ipv6_csum_update(skb, iphdroff, check, t, maniptype);
+		return;
+	}
+}
+
+static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
+				    u8 proto, void *data, __sum16 *check,
+				    int datalen, int oldlen)
+{
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		const struct iphdr *iph = ip_hdr(skb);
+
+		skb->ip_summed = CHECKSUM_PARTIAL;
+		skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
+			ip_hdrlen(skb);
+		skb->csum_offset = (void *)check - data;
+		*check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen,
+					    proto, 0);
+	} else {
+		inet_proto_csum_replace2(check, skb,
+					 htons(oldlen), htons(datalen), true);
+	}
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
+				    u8 proto, void *data, __sum16 *check,
+				    int datalen, int oldlen)
+{
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+
+		skb->ip_summed = CHECKSUM_PARTIAL;
+		skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
+			(data - (void *)skb->data);
+		skb->csum_offset = (void *)check - data;
+		*check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+					  datalen, proto, 0);
+	} else {
+		inet_proto_csum_replace2(check, skb,
+					 htons(oldlen), htons(datalen), true);
+	}
+}
+#endif
+
+void nf_nat_csum_recalc(struct sk_buff *skb,
+			u8 nfproto, u8 proto, void *data, __sum16 *check,
+			int datalen, int oldlen)
+{
+	switch (nfproto) {
+	case NFPROTO_IPV4:
+		nf_nat_ipv4_csum_recalc(skb, proto, data, check,
+					datalen, oldlen);
+		return;
+#if IS_ENABLED(CONFIG_IPV6)
+	case NFPROTO_IPV6:
+		nf_nat_ipv6_csum_recalc(skb, proto, data, check,
+					datalen, oldlen);
+		return;
+#endif
+	}
+
+	WARN_ON_ONCE(1);
+}
+
+int nf_nat_icmp_reply_translation(struct sk_buff *skb,
+				  struct nf_conn *ct,
+				  enum ip_conntrack_info ctinfo,
+				  unsigned int hooknum)
+{
+	struct {
+		struct icmphdr	icmp;
+		struct iphdr	ip;
+	} *inside;
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
+	unsigned int hdrlen = ip_hdrlen(skb);
+	struct nf_conntrack_tuple target;
+	unsigned long statusbit;
+
+	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
+
+	if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
+		return 0;
+	if (nf_ip_checksum(skb, hooknum, hdrlen, IPPROTO_ICMP))
+		return 0;
+
+	inside = (void *)skb->data + hdrlen;
+	if (inside->icmp.type == ICMP_REDIRECT) {
+		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+			return 0;
+		if (ct->status & IPS_NAT_MASK)
+			return 0;
+	}
+
+	if (manip == NF_NAT_MANIP_SRC)
+		statusbit = IPS_SRC_NAT;
+	else
+		statusbit = IPS_DST_NAT;
+
+	/* Invert if this is reply direction */
+	if (dir == IP_CT_DIR_REPLY)
+		statusbit ^= IPS_NAT_MASK;
+
+	if (!(ct->status & statusbit))
+		return 1;
+
+	if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
+				   &ct->tuplehash[!dir].tuple, !manip))
+		return 0;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		/* Reloading "inside" here since manip_pkt may reallocate */
+		inside = (void *)skb->data + hdrlen;
+		inside->icmp.checksum = 0;
+		inside->icmp.checksum =
+			csum_fold(skb_checksum(skb, hdrlen,
+					       skb->len - hdrlen, 0));
+	}
+
+	/* Change outer to look like the reply to an incoming packet */
+	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
+	target.dst.protonum = IPPROTO_ICMP;
+	if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip))
+		return 0;
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
+
+static unsigned int
+nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
+	       const struct nf_hook_state *state)
+{
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct)
+		return NF_ACCEPT;
+
+	if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
+		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
+			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
+							   state->hook))
+				return NF_DROP;
+			else
+				return NF_ACCEPT;
+		}
+	}
+
+	return nf_nat_inet_fn(priv, skb, state);
+}
+
+static unsigned int
+nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
+	       const struct nf_hook_state *state)
+{
+	unsigned int ret;
+	__be32 daddr = ip_hdr(skb)->daddr;
+
+	ret = nf_nat_ipv4_fn(priv, skb, state);
+	if (ret == NF_ACCEPT && daddr != ip_hdr(skb)->daddr)
+		skb_dst_drop(skb);
+
+	return ret;
+}
+
+static unsigned int
+nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
+		const struct nf_hook_state *state)
+{
+#ifdef CONFIG_XFRM
+	const struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	int err;
+#endif
+	unsigned int ret;
+
+	ret = nf_nat_ipv4_fn(priv, skb, state);
+#ifdef CONFIG_XFRM
+	if (ret != NF_ACCEPT)
+		return ret;
+
+	if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
+		return ret;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (ct->tuplehash[dir].tuple.src.u3.ip !=
+		     ct->tuplehash[!dir].tuple.dst.u3.ip ||
+		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
+		     ct->tuplehash[dir].tuple.src.u.all !=
+		     ct->tuplehash[!dir].tuple.dst.u.all)) {
+			err = nf_xfrm_me_harder(state->net, skb, AF_INET);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
+		}
+	}
+#endif
+	return ret;
+}
+
+static unsigned int
+nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
+		     const struct nf_hook_state *state)
+{
+	const struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	unsigned int ret;
+	int err;
+
+	ret = nf_nat_ipv4_fn(priv, skb, state);
+	if (ret != NF_ACCEPT)
+		return ret;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
+		    ct->tuplehash[!dir].tuple.src.u3.ip) {
+			err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
+		}
+#ifdef CONFIG_XFRM
+		else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
+			 ct->tuplehash[dir].tuple.dst.u.all !=
+			 ct->tuplehash[!dir].tuple.src.u.all) {
+			err = nf_xfrm_me_harder(state->net, skb, AF_INET);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
+		}
+#endif
+	}
+	return ret;
+}
+
+static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
+	/* Before packet filtering, change destination */
+	{
+		.hook		= nf_nat_ipv4_in,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_PRE_ROUTING,
+		.priority	= NF_IP_PRI_NAT_DST,
+	},
+	/* After packet filtering, change source */
+	{
+		.hook		= nf_nat_ipv4_out,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP_PRI_NAT_SRC,
+	},
+	/* Before packet filtering, change destination */
+	{
+		.hook		= nf_nat_ipv4_local_fn,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP_PRI_NAT_DST,
+	},
+	/* After packet filtering, change source */
+	{
+		.hook		= nf_nat_ipv4_fn,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP_PRI_NAT_SRC,
+	},
+};
+
+int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops)
+{
+	return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv4_ops,
+				  ARRAY_SIZE(nf_nat_ipv4_ops));
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv4_register_fn);
+
+void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
+{
+	nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv4_unregister_fn);
+
+#if IS_ENABLED(CONFIG_IPV6)
+int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
+				    struct nf_conn *ct,
+				    enum ip_conntrack_info ctinfo,
+				    unsigned int hooknum,
+				    unsigned int hdrlen)
+{
+	struct {
+		struct icmp6hdr	icmp6;
+		struct ipv6hdr	ip6;
+	} *inside;
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
+	struct nf_conntrack_tuple target;
+	unsigned long statusbit;
+
+	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
+
+	if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
+		return 0;
+	if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
+		return 0;
+
+	inside = (void *)skb->data + hdrlen;
+	if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
+		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+			return 0;
+		if (ct->status & IPS_NAT_MASK)
+			return 0;
+	}
+
+	if (manip == NF_NAT_MANIP_SRC)
+		statusbit = IPS_SRC_NAT;
+	else
+		statusbit = IPS_DST_NAT;
+
+	/* Invert if this is reply direction */
+	if (dir == IP_CT_DIR_REPLY)
+		statusbit ^= IPS_NAT_MASK;
+
+	if (!(ct->status & statusbit))
+		return 1;
+
+	if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
+				   &ct->tuplehash[!dir].tuple, !manip))
+		return 0;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+
+		inside = (void *)skb->data + hdrlen;
+		inside->icmp6.icmp6_cksum = 0;
+		inside->icmp6.icmp6_cksum =
+			csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+					skb->len - hdrlen, IPPROTO_ICMPV6,
+					skb_checksum(skb, hdrlen,
+						     skb->len - hdrlen, 0));
+	}
+
+	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
+	target.dst.protonum = IPPROTO_ICMPV6;
+	if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip))
+		return 0;
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
+
+static unsigned int
+nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
+	       const struct nf_hook_state *state)
+{
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	__be16 frag_off;
+	int hdrlen;
+	u8 nexthdr;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	/* Can't track?  It's not due to stress, or conntrack would
+	 * have dropped it.  Hence it's the user's responsibilty to
+	 * packet filter it out, or implement conntrack/NAT for that
+	 * protocol. 8) --RR
+	 */
+	if (!ct)
+		return NF_ACCEPT;
+
+	if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
+		nexthdr = ipv6_hdr(skb)->nexthdr;
+		hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+					  &nexthdr, &frag_off);
+
+		if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
+			if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
+							     state->hook,
+							     hdrlen))
+				return NF_DROP;
+			else
+				return NF_ACCEPT;
+		}
+	}
+
+	return nf_nat_inet_fn(priv, skb, state);
+}
+
+static unsigned int
+nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
+	       const struct nf_hook_state *state)
+{
+	unsigned int ret;
+	struct in6_addr daddr = ipv6_hdr(skb)->daddr;
+
+	ret = nf_nat_ipv6_fn(priv, skb, state);
+	if (ret != NF_DROP && ret != NF_STOLEN &&
+	    ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
+		skb_dst_drop(skb);
+
+	return ret;
+}
+
+static unsigned int
+nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
+		const struct nf_hook_state *state)
+{
+#ifdef CONFIG_XFRM
+	const struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	int err;
+#endif
+	unsigned int ret;
+
+	ret = nf_nat_ipv6_fn(priv, skb, state);
+#ifdef CONFIG_XFRM
+	if (ret != NF_ACCEPT)
+		return ret;
+
+	if (IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED)
+		return ret;
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
+				      &ct->tuplehash[!dir].tuple.dst.u3) ||
+		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
+		     ct->tuplehash[dir].tuple.src.u.all !=
+		     ct->tuplehash[!dir].tuple.dst.u.all)) {
+			err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
+		}
+	}
+#endif
+
+	return ret;
+}
+
+static unsigned int
+nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
+		     const struct nf_hook_state *state)
+{
+	const struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	unsigned int ret;
+	int err;
+
+	ret = nf_nat_ipv6_fn(priv, skb, state);
+	if (ret != NF_ACCEPT)
+		return ret;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
+				      &ct->tuplehash[!dir].tuple.src.u3)) {
+			err = nf_ip6_route_me_harder(state->net, skb);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
+		}
+#ifdef CONFIG_XFRM
+		else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
+			 ct->tuplehash[dir].tuple.dst.u.all !=
+			 ct->tuplehash[!dir].tuple.src.u.all) {
+			err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
+		}
+#endif
+	}
+
+	return ret;
+}
+
+static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
+	/* Before packet filtering, change destination */
+	{
+		.hook		= nf_nat_ipv6_in,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_PRE_ROUTING,
+		.priority	= NF_IP6_PRI_NAT_DST,
+	},
+	/* After packet filtering, change source */
+	{
+		.hook		= nf_nat_ipv6_out,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP6_PRI_NAT_SRC,
+	},
+	/* Before packet filtering, change destination */
+	{
+		.hook		= nf_nat_ipv6_local_fn,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP6_PRI_NAT_DST,
+	},
+	/* After packet filtering, change source */
+	{
+		.hook		= nf_nat_ipv6_fn,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP6_PRI_NAT_SRC,
+	},
+};
+
+int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops)
+{
+	return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv6_ops,
+				  ARRAY_SIZE(nf_nat_ipv6_ops));
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv6_register_fn);
+
+void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
+{
+	nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv6_unregister_fn);
+#endif /* CONFIG_IPV6 */
+
+#if defined(CONFIG_NF_TABLES_INET) && IS_ENABLED(CONFIG_NFT_NAT)
+int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops)
+{
+	int ret;
+
+	if (WARN_ON_ONCE(ops->pf != NFPROTO_INET))
+		return -EINVAL;
+
+	ret = nf_nat_register_fn(net, NFPROTO_IPV6, ops, nf_nat_ipv6_ops,
+				 ARRAY_SIZE(nf_nat_ipv6_ops));
+	if (ret)
+		return ret;
+
+	ret = nf_nat_register_fn(net, NFPROTO_IPV4, ops, nf_nat_ipv4_ops,
+				 ARRAY_SIZE(nf_nat_ipv4_ops));
+	if (ret)
+		nf_nat_ipv6_unregister_fn(net, ops);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_inet_register_fn);
+
+void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
+{
+	nf_nat_unregister_fn(net, NFPROTO_IPV4, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
+	nf_nat_unregister_fn(net, NFPROTO_IPV6, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
+}
+EXPORT_SYMBOL_GPL(nf_nat_inet_unregister_fn);
+#endif /* NFT INET NAT */

diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c
deleted file mode 100644
index 5d849d8..0000000
--- a/net/netfilter/nf_nat_proto_common.c
+++ /dev/null

@@ -1,120 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2008 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/random.h>
-#include <linux/netfilter.h>
-#include <linux/export.h>
-
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-
-bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple,
-			     enum nf_nat_manip_type maniptype,
-			     const union nf_conntrack_man_proto *min,
-			     const union nf_conntrack_man_proto *max)
-{
-	__be16 port;
-
-	if (maniptype == NF_NAT_MANIP_SRC)
-		port = tuple->src.u.all;
-	else
-		port = tuple->dst.u.all;
-
-	return ntohs(port) >= ntohs(min->all) &&
-	       ntohs(port) <= ntohs(max->all);
-}
-EXPORT_SYMBOL_GPL(nf_nat_l4proto_in_range);
-
-void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
-				 struct nf_conntrack_tuple *tuple,
-				 const struct nf_nat_range2 *range,
-				 enum nf_nat_manip_type maniptype,
-				 const struct nf_conn *ct,
-				 u16 *rover)
-{
-	unsigned int range_size, min, max, i;
-	__be16 *portptr;
-	u_int16_t off;
-
-	if (maniptype == NF_NAT_MANIP_SRC)
-		portptr = &tuple->src.u.all;
-	else
-		portptr = &tuple->dst.u.all;
-
-	/* If no range specified... */
-	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
-		/* If it's dst rewrite, can't change port */
-		if (maniptype == NF_NAT_MANIP_DST)
-			return;
-
-		if (ntohs(*portptr) < 1024) {
-			/* Loose convention: >> 512 is credential passing */
-			if (ntohs(*portptr) < 512) {
-				min = 1;
-				range_size = 511 - min + 1;
-			} else {
-				min = 600;
-				range_size = 1023 - min + 1;
-			}
-		} else {
-			min = 1024;
-			range_size = 65535 - 1024 + 1;
-		}
-	} else {
-		min = ntohs(range->min_proto.all);
-		max = ntohs(range->max_proto.all);
-		if (unlikely(max < min))
-			swap(max, min);
-		range_size = max - min + 1;
-	}
-
-	if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) {
-		off = l3proto->secure_port(tuple, maniptype == NF_NAT_MANIP_SRC
-						  ? tuple->dst.u.all
-						  : tuple->src.u.all);
-	} else if (range->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY) {
-		off = prandom_u32();
-	} else if (range->flags & NF_NAT_RANGE_PROTO_OFFSET) {
-		off = (ntohs(*portptr) - ntohs(range->base_proto.all));
-	} else {
-		off = *rover;
-	}
-
-	for (i = 0; ; ++off) {
-		*portptr = htons(min + off % range_size);
-		if (++i != range_size && nf_nat_used_tuple(tuple, ct))
-			continue;
-		if (!(range->flags & (NF_NAT_RANGE_PROTO_RANDOM_ALL|
-					NF_NAT_RANGE_PROTO_OFFSET)))
-			*rover = off;
-		return;
-	}
-}
-EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple);
-
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
-				   struct nf_nat_range2 *range)
-{
-	if (tb[CTA_PROTONAT_PORT_MIN]) {
-		range->min_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
-		range->max_proto.all = range->min_proto.all;
-		range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
-	}
-	if (tb[CTA_PROTONAT_PORT_MAX]) {
-		range->max_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
-		range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(nf_nat_l4proto_nlattr_to_range);
-#endif

diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c
deleted file mode 100644
index 67ea0d8..0000000
--- a/net/netfilter/nf_nat_proto_dccp.c
+++ /dev/null

@@ -1,82 +0,0 @@
-/*
- * DCCP NAT protocol helper
- *
- * Copyright (c) 2005, 2006, 2008 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/dccp.h>
-
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-
-static u_int16_t dccp_port_rover;
-
-static void
-dccp_unique_tuple(const struct nf_nat_l3proto *l3proto,
-		  struct nf_conntrack_tuple *tuple,
-		  const struct nf_nat_range2 *range,
-		  enum nf_nat_manip_type maniptype,
-		  const struct nf_conn *ct)
-{
-	nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
-				    &dccp_port_rover);
-}
-
-static bool
-dccp_manip_pkt(struct sk_buff *skb,
-	       const struct nf_nat_l3proto *l3proto,
-	       unsigned int iphdroff, unsigned int hdroff,
-	       const struct nf_conntrack_tuple *tuple,
-	       enum nf_nat_manip_type maniptype)
-{
-	struct dccp_hdr *hdr;
-	__be16 *portptr, oldport, newport;
-	int hdrsize = 8; /* DCCP connection tracking guarantees this much */
-
-	if (skb->len >= hdroff + sizeof(struct dccp_hdr))
-		hdrsize = sizeof(struct dccp_hdr);
-
-	if (!skb_make_writable(skb, hdroff + hdrsize))
-		return false;
-
-	hdr = (struct dccp_hdr *)(skb->data + hdroff);
-
-	if (maniptype == NF_NAT_MANIP_SRC) {
-		newport = tuple->src.u.dccp.port;
-		portptr = &hdr->dccph_sport;
-	} else {
-		newport = tuple->dst.u.dccp.port;
-		portptr = &hdr->dccph_dport;
-	}
-
-	oldport = *portptr;
-	*portptr = newport;
-
-	if (hdrsize < sizeof(*hdr))
-		return true;
-
-	l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum,
-			     tuple, maniptype);
-	inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
-				 false);
-	return true;
-}
-
-const struct nf_nat_l4proto nf_nat_l4proto_dccp = {
-	.l4proto		= IPPROTO_DCCP,
-	.manip_pkt		= dccp_manip_pkt,
-	.in_range		= nf_nat_l4proto_in_range,
-	.unique_tuple		= dccp_unique_tuple,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	.nlattr_to_range	= nf_nat_l4proto_nlattr_to_range,
-#endif
-};

diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
deleted file mode 100644
index 1c5d9b6..0000000
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ /dev/null

@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/sctp.h>
-#include <net/sctp/checksum.h>
-
-#include <net/netfilter/nf_nat_l4proto.h>
-
-static u_int16_t nf_sctp_port_rover;
-
-static void
-sctp_unique_tuple(const struct nf_nat_l3proto *l3proto,
-		  struct nf_conntrack_tuple *tuple,
-		  const struct nf_nat_range2 *range,
-		  enum nf_nat_manip_type maniptype,
-		  const struct nf_conn *ct)
-{
-	nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
-				    &nf_sctp_port_rover);
-}
-
-static bool
-sctp_manip_pkt(struct sk_buff *skb,
-	       const struct nf_nat_l3proto *l3proto,
-	       unsigned int iphdroff, unsigned int hdroff,
-	       const struct nf_conntrack_tuple *tuple,
-	       enum nf_nat_manip_type maniptype)
-{
-	struct sctphdr *hdr;
-	int hdrsize = 8;
-
-	/* This could be an inner header returned in imcp packet; in such
-	 * cases we cannot update the checksum field since it is outside
-	 * of the 8 bytes of transport layer headers we are guaranteed.
-	 */
-	if (skb->len >= hdroff + sizeof(*hdr))
-		hdrsize = sizeof(*hdr);
-
-	if (!skb_make_writable(skb, hdroff + hdrsize))
-		return false;
-
-	hdr = (struct sctphdr *)(skb->data + hdroff);
-
-	if (maniptype == NF_NAT_MANIP_SRC) {
-		/* Get rid of src port */
-		hdr->source = tuple->src.u.sctp.port;
-	} else {
-		/* Get rid of dst port */
-		hdr->dest = tuple->dst.u.sctp.port;
-	}
-
-	if (hdrsize < sizeof(*hdr))
-		return true;
-
-	if (skb->ip_summed != CHECKSUM_PARTIAL) {
-		hdr->checksum = sctp_compute_cksum(skb, hdroff);
-		skb->ip_summed = CHECKSUM_NONE;
-	}
-
-	return true;
-}
-
-const struct nf_nat_l4proto nf_nat_l4proto_sctp = {
-	.l4proto		= IPPROTO_SCTP,
-	.manip_pkt		= sctp_manip_pkt,
-	.in_range		= nf_nat_l4proto_in_range,
-	.unique_tuple		= sctp_unique_tuple,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	.nlattr_to_range	= nf_nat_l4proto_nlattr_to_range,
-#endif
-};

diff --git a/net/netfilter/nf_nat_proto_tcp.c b/net/netfilter/nf_nat_proto_tcp.c
deleted file mode 100644
index f15fcd4..0000000
--- a/net/netfilter/nf_nat_proto_tcp.c
+++ /dev/null

@@ -1,85 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/export.h>
-#include <linux/tcp.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-#include <net/netfilter/nf_nat_core.h>
-
-static u16 tcp_port_rover;
-
-static void
-tcp_unique_tuple(const struct nf_nat_l3proto *l3proto,
-		 struct nf_conntrack_tuple *tuple,
-		 const struct nf_nat_range2 *range,
-		 enum nf_nat_manip_type maniptype,
-		 const struct nf_conn *ct)
-{
-	nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
-				    &tcp_port_rover);
-}
-
-static bool
-tcp_manip_pkt(struct sk_buff *skb,
-	      const struct nf_nat_l3proto *l3proto,
-	      unsigned int iphdroff, unsigned int hdroff,
-	      const struct nf_conntrack_tuple *tuple,
-	      enum nf_nat_manip_type maniptype)
-{
-	struct tcphdr *hdr;
-	__be16 *portptr, newport, oldport;
-	int hdrsize = 8; /* TCP connection tracking guarantees this much */
-
-	/* this could be a inner header returned in icmp packet; in such
-	   cases we cannot update the checksum field since it is outside of
-	   the 8 bytes of transport layer headers we are guaranteed */
-	if (skb->len >= hdroff + sizeof(struct tcphdr))
-		hdrsize = sizeof(struct tcphdr);
-
-	if (!skb_make_writable(skb, hdroff + hdrsize))
-		return false;
-
-	hdr = (struct tcphdr *)(skb->data + hdroff);
-
-	if (maniptype == NF_NAT_MANIP_SRC) {
-		/* Get rid of src port */
-		newport = tuple->src.u.tcp.port;
-		portptr = &hdr->source;
-	} else {
-		/* Get rid of dst port */
-		newport = tuple->dst.u.tcp.port;
-		portptr = &hdr->dest;
-	}
-
-	oldport = *portptr;
-	*portptr = newport;
-
-	if (hdrsize < sizeof(*hdr))
-		return true;
-
-	l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
-	inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false);
-	return true;
-}
-
-const struct nf_nat_l4proto nf_nat_l4proto_tcp = {
-	.l4proto		= IPPROTO_TCP,
-	.manip_pkt		= tcp_manip_pkt,
-	.in_range		= nf_nat_l4proto_in_range,
-	.unique_tuple		= tcp_unique_tuple,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	.nlattr_to_range	= nf_nat_l4proto_nlattr_to_range,
-#endif
-};

diff --git a/net/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c
deleted file mode 100644
index 5790f70..0000000
--- a/net/netfilter/nf_nat_proto_udp.c
+++ /dev/null

@@ -1,130 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/export.h>
-#include <linux/init.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-
-static u16 udp_port_rover;
-
-static void
-udp_unique_tuple(const struct nf_nat_l3proto *l3proto,
-		 struct nf_conntrack_tuple *tuple,
-		 const struct nf_nat_range2 *range,
-		 enum nf_nat_manip_type maniptype,
-		 const struct nf_conn *ct)
-{
-	nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
-				    &udp_port_rover);
-}
-
-static void
-__udp_manip_pkt(struct sk_buff *skb,
-	        const struct nf_nat_l3proto *l3proto,
-	        unsigned int iphdroff, struct udphdr *hdr,
-	        const struct nf_conntrack_tuple *tuple,
-	        enum nf_nat_manip_type maniptype, bool do_csum)
-{
-	__be16 *portptr, newport;
-
-	if (maniptype == NF_NAT_MANIP_SRC) {
-		/* Get rid of src port */
-		newport = tuple->src.u.udp.port;
-		portptr = &hdr->source;
-	} else {
-		/* Get rid of dst port */
-		newport = tuple->dst.u.udp.port;
-		portptr = &hdr->dest;
-	}
-	if (do_csum) {
-		l3proto->csum_update(skb, iphdroff, &hdr->check,
-				     tuple, maniptype);
-		inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
-					 false);
-		if (!hdr->check)
-			hdr->check = CSUM_MANGLED_0;
-	}
-	*portptr = newport;
-}
-
-static bool udp_manip_pkt(struct sk_buff *skb,
-			  const struct nf_nat_l3proto *l3proto,
-			  unsigned int iphdroff, unsigned int hdroff,
-			  const struct nf_conntrack_tuple *tuple,
-			  enum nf_nat_manip_type maniptype)
-{
-	struct udphdr *hdr;
-	bool do_csum;
-
-	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
-		return false;
-
-	hdr = (struct udphdr *)(skb->data + hdroff);
-	do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL;
-
-	__udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, do_csum);
-	return true;
-}
-
-#ifdef CONFIG_NF_NAT_PROTO_UDPLITE
-static u16 udplite_port_rover;
-
-static bool udplite_manip_pkt(struct sk_buff *skb,
-			      const struct nf_nat_l3proto *l3proto,
-			      unsigned int iphdroff, unsigned int hdroff,
-			      const struct nf_conntrack_tuple *tuple,
-			      enum nf_nat_manip_type maniptype)
-{
-	struct udphdr *hdr;
-
-	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
-		return false;
-
-	hdr = (struct udphdr *)(skb->data + hdroff);
-	__udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, true);
-	return true;
-}
-
-static void
-udplite_unique_tuple(const struct nf_nat_l3proto *l3proto,
-		     struct nf_conntrack_tuple *tuple,
-		     const struct nf_nat_range2 *range,
-		     enum nf_nat_manip_type maniptype,
-		     const struct nf_conn *ct)
-{
-	nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
-				    &udplite_port_rover);
-}
-
-const struct nf_nat_l4proto nf_nat_l4proto_udplite = {
-	.l4proto		= IPPROTO_UDPLITE,
-	.manip_pkt		= udplite_manip_pkt,
-	.in_range		= nf_nat_l4proto_in_range,
-	.unique_tuple		= udplite_unique_tuple,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	.nlattr_to_range	= nf_nat_l4proto_nlattr_to_range,
-#endif
-};
-#endif /* CONFIG_NF_NAT_PROTO_UDPLITE */
-
-const struct nf_nat_l4proto nf_nat_l4proto_udp = {
-	.l4proto		= IPPROTO_UDP,
-	.manip_pkt		= udp_manip_pkt,
-	.in_range		= nf_nat_l4proto_in_range,
-	.unique_tuple		= udp_unique_tuple,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	.nlattr_to_range	= nf_nat_l4proto_nlattr_to_range,
-#endif
-};

diff --git a/net/netfilter/nf_nat_proto_unknown.c b/net/netfilter/nf_nat_proto_unknown.c
deleted file mode 100644
index c5db3e2..0000000
--- a/net/netfilter/nf_nat_proto_unknown.c
+++ /dev/null

@@ -1,54 +0,0 @@
-/* The "unknown" protocol.  This is what is used for protocols we
- * don't understand.  It's returned by ip_ct_find_proto().
- */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-
-#include <linux/netfilter.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-
-static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
-			     enum nf_nat_manip_type manip_type,
-			     const union nf_conntrack_man_proto *min,
-			     const union nf_conntrack_man_proto *max)
-{
-	return true;
-}
-
-static void unknown_unique_tuple(const struct nf_nat_l3proto *l3proto,
-				 struct nf_conntrack_tuple *tuple,
-				 const struct nf_nat_range2 *range,
-				 enum nf_nat_manip_type maniptype,
-				 const struct nf_conn *ct)
-{
-	/* Sorry: we can't help you; if it's not unique, we can't frob
-	 * anything.
-	 */
-	return;
-}
-
-static bool
-unknown_manip_pkt(struct sk_buff *skb,
-		  const struct nf_nat_l3proto *l3proto,
-		  unsigned int iphdroff, unsigned int hdroff,
-		  const struct nf_conntrack_tuple *tuple,
-		  enum nf_nat_manip_type maniptype)
-{
-	return true;
-}
-
-const struct nf_nat_l4proto nf_nat_l4proto_unknown = {
-	.manip_pkt		= unknown_manip_pkt,
-	.in_range		= unknown_in_range,
-	.unique_tuple		= unknown_unique_tuple,
-};

diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
index adee04a..f91579c 100644
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
  * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Based on Rusty Russell's IPv4 REDIRECT target. Development of IPv6
  * NAT funded by Astaro.
  */
@@ -47,18 +44,18 @@
 	if (hooknum == NF_INET_LOCAL_OUT) {
 		newdst = htonl(0x7F000001);
 	} else {
-		struct in_device *indev;
-		struct in_ifaddr *ifa;
+		const struct in_device *indev;
 
 		newdst = 0;
 
-		rcu_read_lock();
 		indev = __in_dev_get_rcu(skb->dev);
-		if (indev && indev->ifa_list) {
-			ifa = indev->ifa_list;
-			newdst = ifa->ifa_local;
+		if (indev) {
+			const struct in_ifaddr *ifa;
+
+			ifa = rcu_dereference(indev->ifa_list);
+			if (ifa)
+				newdst = ifa->ifa_local;
 		}
-		rcu_read_unlock();
 
 		if (!newdst)
 			return NF_DROP;
@@ -97,7 +94,6 @@
 		struct inet6_ifaddr *ifa;
 		bool addr = false;
 
-		rcu_read_lock();
 		idev = __in6_dev_get(skb->dev);
 		if (idev != NULL) {
 			read_lock_bh(&idev->lock);
@@ -108,7 +104,6 @@
 			}
 			read_unlock_bh(&idev->lock);
 		}
-		rcu_read_unlock();
 
 		if (!addr)
 			return NF_DROP;

diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index 1f30860..f0a735e 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* SIP extension for NAT alteration.
  *
  * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
  * based on RR's ip_nat_ftp.c and other modules.
  * (C) 2007 United Security Providers
  * (C) 2007, 2008, 2011, 2012 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -18,16 +15,21 @@
 
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_seqadj.h>
 #include <linux/netfilter/nf_conntrack_sip.h>
 
+#define NAT_HELPER_NAME "sip"
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
 MODULE_DESCRIPTION("SIP NAT helper");
-MODULE_ALIAS("ip_nat_sip");
+MODULE_ALIAS_NF_NAT_HELPER(NAT_HELPER_NAME);
 
+static struct nf_conntrack_nat_helper nat_helper_sip =
+	NF_CT_NAT_HELPER_INIT(NAT_HELPER_NAME);
 
 static unsigned int mangle_packet(struct sk_buff *skb, unsigned int protoff,
 				  unsigned int dataoff,
@@ -280,7 +282,7 @@
 	if (dir == IP_CT_DIR_REPLY && ct_sip_info->forced_dport) {
 		struct udphdr *uh;
 
-		if (!skb_make_writable(skb, skb->len)) {
+		if (skb_ensure_writable(skb, skb->len)) {
 			nf_ct_helper_log(skb, ct, "cannot mangle packet");
 			return NF_DROP;
 		}
@@ -316,6 +318,9 @@
 static void nf_nat_sip_expected(struct nf_conn *ct,
 				struct nf_conntrack_expect *exp)
 {
+	struct nf_conn_help *help = nfct_help(ct->master);
+	struct nf_conntrack_expect *pair_exp;
+	int range_set_for_snat = 0;
 	struct nf_nat_range2 range;
 
 	/* This must be a fresh one. */
@@ -327,15 +332,42 @@
 	range.min_addr = range.max_addr = exp->saved_addr;
 	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
 
-	/* Change src to where master sends to, but only if the connection
-	 * actually came from the same source. */
-	if (nf_inet_addr_cmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3,
+	/* Do media streams SRC manip according with the parameters
+	 * found in the paired expectation.
+	 */
+	if (exp->class != SIP_EXPECT_SIGNALLING) {
+		spin_lock_bh(&nf_conntrack_expect_lock);
+		hlist_for_each_entry(pair_exp, &help->expectations, lnode) {
+			if (pair_exp->tuple.src.l3num == nf_ct_l3num(ct) &&
+			    pair_exp->tuple.dst.protonum == ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum &&
+			    nf_inet_addr_cmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3, &pair_exp->saved_addr) &&
+			    ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all == pair_exp->saved_proto.all) {
+				range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
+				range.min_proto.all = range.max_proto.all = pair_exp->tuple.dst.u.all;
+				range.min_addr = range.max_addr = pair_exp->tuple.dst.u3;
+				range_set_for_snat = 1;
+				break;
+			}
+		}
+		spin_unlock_bh(&nf_conntrack_expect_lock);
+	}
+
+	/* When no paired expectation has been found, change src to
+	 * where master sends to, but only if the connection actually came
+	 * from the same source.
+	 */
+	if (!range_set_for_snat &&
+	    nf_inet_addr_cmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3,
 			     &ct->master->tuplehash[exp->dir].tuple.src.u3)) {
 		range.flags = NF_NAT_RANGE_MAP_IPS;
 		range.min_addr = range.max_addr
 			= ct->master->tuplehash[!exp->dir].tuple.dst.u3;
-		nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
+		range_set_for_snat = 1;
 	}
+
+	/* Perform SRC manip. */
+	if (range_set_for_snat)
+		nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
 }
 
 static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
@@ -382,7 +414,7 @@
 		int ret;
 
 		exp->tuple.dst.u.udp.port = htons(port);
-		ret = nf_ct_expect_related(exp);
+		ret = nf_ct_expect_related(exp, NF_CT_EXP_F_SKIP_MASTER);
 		if (ret == 0)
 			break;
 		else if (ret != -EBUSY) {
@@ -575,7 +607,8 @@
 		int ret;
 
 		rtp_exp->tuple.dst.u.udp.port = htons(port);
-		ret = nf_ct_expect_related(rtp_exp);
+		ret = nf_ct_expect_related(rtp_exp,
+					   NF_CT_EXP_F_SKIP_MASTER);
 		if (ret == -EBUSY)
 			continue;
 		else if (ret < 0) {
@@ -583,7 +616,8 @@
 			break;
 		}
 		rtcp_exp->tuple.dst.u.udp.port = htons(port + 1);
-		ret = nf_ct_expect_related(rtcp_exp);
+		ret = nf_ct_expect_related(rtcp_exp,
+					   NF_CT_EXP_F_SKIP_MASTER);
 		if (ret == 0)
 			break;
 		else if (ret == -EBUSY) {
@@ -625,8 +659,8 @@
 
 static void __exit nf_nat_sip_fini(void)
 {
+	nf_nat_helper_unregister(&nat_helper_sip);
 	RCU_INIT_POINTER(nf_nat_sip_hooks, NULL);
-
 	nf_ct_helper_expectfn_unregister(&sip_nat);
 	synchronize_rcu();
 }
@@ -644,6 +678,7 @@
 static int __init nf_nat_sip_init(void)
 {
 	BUG_ON(nf_nat_sip_hooks != NULL);
+	nf_nat_helper_register(&nat_helper_sip);
 	RCU_INIT_POINTER(nf_nat_sip_hooks, &sip_hooks);
 	nf_ct_helper_expectfn_register(&sip_nat);
 	return 0;

diff --git a/net/netfilter/nf_nat_tftp.c b/net/netfilter/nf_nat_tftp.c
index 7f67e1d..1a59113 100644
--- a/net/netfilter/nf_nat_tftp.c
+++ b/net/netfilter/nf_nat_tftp.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -13,10 +10,15 @@
 #include <net/netfilter/nf_nat_helper.h>
 #include <linux/netfilter/nf_conntrack_tftp.h>
 
+#define NAT_HELPER_NAME "tftp"
+
 MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
 MODULE_DESCRIPTION("TFTP NAT helper");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("ip_nat_tftp");
+MODULE_ALIAS_NF_NAT_HELPER(NAT_HELPER_NAME);
+
+static struct nf_conntrack_nat_helper nat_helper_tftp =
+	NF_CT_NAT_HELPER_INIT(NAT_HELPER_NAME);
 
 static unsigned int help(struct sk_buff *skb,
 			 enum ip_conntrack_info ctinfo,
@@ -28,7 +30,7 @@
 		= ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
 	exp->dir = IP_CT_DIR_REPLY;
 	exp->expectfn = nf_nat_follow_master;
-	if (nf_ct_expect_related(exp) != 0) {
+	if (nf_ct_expect_related(exp, 0) != 0) {
 		nf_ct_helper_log(skb, exp->master, "cannot add expectation");
 		return NF_DROP;
 	}
@@ -37,6 +39,7 @@
 
 static void __exit nf_nat_tftp_fini(void)
 {
+	nf_nat_helper_unregister(&nat_helper_tftp);
 	RCU_INIT_POINTER(nf_nat_tftp_hook, NULL);
 	synchronize_rcu();
 }
@@ -44,6 +47,7 @@
 static int __init nf_nat_tftp_init(void)
 {
 	BUG_ON(nf_nat_tftp_hook != NULL);
+	nf_nat_helper_register(&nat_helper_tftp);
 	RCU_INIT_POINTER(nf_nat_tftp_hook, help);
 	return 0;
 }

diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index d67a96a..a2b58de 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c

@@ -46,6 +46,24 @@
 }
 EXPORT_SYMBOL(nf_unregister_queue_handler);
 
+static void nf_queue_entry_release_br_nf_refs(struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+
+	if (nf_bridge) {
+		struct net_device *physdev;
+
+		physdev = nf_bridge_get_physindev(skb);
+		if (physdev)
+			dev_put(physdev);
+		physdev = nf_bridge_get_physoutdev(skb);
+		if (physdev)
+			dev_put(physdev);
+	}
+#endif
+}
+
 void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
 {
 	struct nf_hook_state *state = &entry->state;
@@ -57,20 +75,28 @@
 		dev_put(state->out);
 	if (state->sk)
 		sock_put(state->sk);
+
+	nf_queue_entry_release_br_nf_refs(entry->skb);
+}
+EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
+
+static void nf_queue_entry_get_br_nf_refs(struct sk_buff *skb)
+{
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-	if (entry->skb->nf_bridge) {
+	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+
+	if (nf_bridge) {
 		struct net_device *physdev;
 
-		physdev = nf_bridge_get_physindev(entry->skb);
+		physdev = nf_bridge_get_physindev(skb);
 		if (physdev)
-			dev_put(physdev);
-		physdev = nf_bridge_get_physoutdev(entry->skb);
+			dev_hold(physdev);
+		physdev = nf_bridge_get_physoutdev(skb);
 		if (physdev)
-			dev_put(physdev);
+			dev_hold(physdev);
 	}
 #endif
 }
-EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
 
 /* Bump dev refs so they don't vanish while packet is out */
 void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
@@ -83,18 +109,8 @@
 		dev_hold(state->out);
 	if (state->sk)
 		sock_hold(state->sk);
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-	if (entry->skb->nf_bridge) {
-		struct net_device *physdev;
 
-		physdev = nf_bridge_get_physindev(entry->skb);
-		if (physdev)
-			dev_hold(physdev);
-		physdev = nf_bridge_get_physoutdev(entry->skb);
-		if (physdev)
-			dev_hold(physdev);
-	}
-#endif
+	nf_queue_entry_get_br_nf_refs(entry->skb);
 }
 EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
 
@@ -140,7 +156,6 @@
 }
 
 static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
-		      const struct nf_hook_entries *entries,
 		      unsigned int index, unsigned int queuenum)
 {
 	int status = -ENOENT;
@@ -174,6 +189,11 @@
 		goto err;
 	}
 
+	if (!skb_dst_force(skb) && state->hook != NF_INET_PRE_ROUTING) {
+		status = -ENETDOWN;
+		goto err;
+	}
+
 	*entry = (struct nf_queue_entry) {
 		.skb	= skb,
 		.state	= *state,
@@ -182,7 +202,6 @@
 	};
 
 	nf_queue_entry_get_refs(entry);
-	skb_dst_force(skb);
 
 	switch (entry->state.pf) {
 	case AF_INET:
@@ -209,12 +228,11 @@
 
 /* Packets leaving via this function must come back through nf_reinject(). */
 int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
-	     const struct nf_hook_entries *entries, unsigned int index,
-	     unsigned int verdict)
+	     unsigned int index, unsigned int verdict)
 {
 	int ret;
 
-	ret = __nf_queue(skb, state, entries, index, verdict >> NF_VERDICT_QBITS);
+	ret = __nf_queue(skb, state, index, verdict >> NF_VERDICT_QBITS);
 	if (ret < 0) {
 		if (ret == -ESRCH &&
 		    (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
@@ -224,6 +242,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(nf_queue);
 
 static unsigned int nf_iterate(struct sk_buff *skb,
 			       struct nf_hook_state *state,
@@ -238,6 +257,7 @@
 repeat:
 		verdict = nf_hook_entry_hookfn(hook, skb, state);
 		if (verdict != NF_ACCEPT) {
+			*index = i;
 			if (verdict != NF_REPEAT)
 				return verdict;
 			goto repeat;
@@ -318,7 +338,7 @@
 		local_bh_enable();
 		break;
 	case NF_QUEUE:
-		err = nf_queue(skb, &entry->state, hooks, i, verdict);
+		err = nf_queue(skb, &entry->state, i, verdict);
 		if (err == 1)
 			goto next_hook;
 		break;

diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 8ff4d22..b0930d4 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -13,16 +10,16 @@
 #include <net/netns/generic.h>
 #include <linux/proc_fs.h>
 
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter/xt_tcpudp.h>
-#include <linux/netfilter/xt_SYNPROXY.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/nf_synproxy.h>
 
 #include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_extend.h>
 #include <net/netfilter/nf_conntrack_seqadj.h>
 #include <net/netfilter/nf_conntrack_synproxy.h>
 #include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_synproxy.h>
 
 unsigned int synproxy_net_id;
 EXPORT_SYMBOL_GPL(synproxy_net_id);
@@ -59,8 +56,8 @@
 			switch (opcode) {
 			case TCPOPT_MSS:
 				if (opsize == TCPOLEN_MSS) {
-					opts->mss = get_unaligned_be16(ptr);
-					opts->options |= XT_SYNPROXY_OPT_MSS;
+					opts->mss_option = get_unaligned_be16(ptr);
+					opts->options |= NF_SYNPROXY_OPT_MSS;
 				}
 				break;
 			case TCPOPT_WINDOW:
@@ -68,19 +65,19 @@
 					opts->wscale = *ptr;
 					if (opts->wscale > TCP_MAX_WSCALE)
 						opts->wscale = TCP_MAX_WSCALE;
-					opts->options |= XT_SYNPROXY_OPT_WSCALE;
+					opts->options |= NF_SYNPROXY_OPT_WSCALE;
 				}
 				break;
 			case TCPOPT_TIMESTAMP:
 				if (opsize == TCPOLEN_TIMESTAMP) {
 					opts->tsval = get_unaligned_be32(ptr);
 					opts->tsecr = get_unaligned_be32(ptr + 4);
-					opts->options |= XT_SYNPROXY_OPT_TIMESTAMP;
+					opts->options |= NF_SYNPROXY_OPT_TIMESTAMP;
 				}
 				break;
 			case TCPOPT_SACK_PERM:
 				if (opsize == TCPOLEN_SACK_PERM)
-					opts->options |= XT_SYNPROXY_OPT_SACK_PERM;
+					opts->options |= NF_SYNPROXY_OPT_SACK_PERM;
 				break;
 			}
 
@@ -92,36 +89,36 @@
 }
 EXPORT_SYMBOL_GPL(synproxy_parse_options);
 
-unsigned int synproxy_options_size(const struct synproxy_options *opts)
+static unsigned int
+synproxy_options_size(const struct synproxy_options *opts)
 {
 	unsigned int size = 0;
 
-	if (opts->options & XT_SYNPROXY_OPT_MSS)
+	if (opts->options & NF_SYNPROXY_OPT_MSS)
 		size += TCPOLEN_MSS_ALIGNED;
-	if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
+	if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
 		size += TCPOLEN_TSTAMP_ALIGNED;
-	else if (opts->options & XT_SYNPROXY_OPT_SACK_PERM)
+	else if (opts->options & NF_SYNPROXY_OPT_SACK_PERM)
 		size += TCPOLEN_SACKPERM_ALIGNED;
-	if (opts->options & XT_SYNPROXY_OPT_WSCALE)
+	if (opts->options & NF_SYNPROXY_OPT_WSCALE)
 		size += TCPOLEN_WSCALE_ALIGNED;
 
 	return size;
 }
-EXPORT_SYMBOL_GPL(synproxy_options_size);
 
-void
+static void
 synproxy_build_options(struct tcphdr *th, const struct synproxy_options *opts)
 {
 	__be32 *ptr = (__be32 *)(th + 1);
 	u8 options = opts->options;
 
-	if (options & XT_SYNPROXY_OPT_MSS)
+	if (options & NF_SYNPROXY_OPT_MSS)
 		*ptr++ = htonl((TCPOPT_MSS << 24) |
 			       (TCPOLEN_MSS << 16) |
-			       opts->mss);
+			       opts->mss_option);
 
-	if (options & XT_SYNPROXY_OPT_TIMESTAMP) {
-		if (options & XT_SYNPROXY_OPT_SACK_PERM)
+	if (options & NF_SYNPROXY_OPT_TIMESTAMP) {
+		if (options & NF_SYNPROXY_OPT_SACK_PERM)
 			*ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
 				       (TCPOLEN_SACK_PERM << 16) |
 				       (TCPOPT_TIMESTAMP << 8) |
@@ -134,58 +131,56 @@
 
 		*ptr++ = htonl(opts->tsval);
 		*ptr++ = htonl(opts->tsecr);
-	} else if (options & XT_SYNPROXY_OPT_SACK_PERM)
+	} else if (options & NF_SYNPROXY_OPT_SACK_PERM)
 		*ptr++ = htonl((TCPOPT_NOP << 24) |
 			       (TCPOPT_NOP << 16) |
 			       (TCPOPT_SACK_PERM << 8) |
 			       TCPOLEN_SACK_PERM);
 
-	if (options & XT_SYNPROXY_OPT_WSCALE)
+	if (options & NF_SYNPROXY_OPT_WSCALE)
 		*ptr++ = htonl((TCPOPT_NOP << 24) |
 			       (TCPOPT_WINDOW << 16) |
 			       (TCPOLEN_WINDOW << 8) |
 			       opts->wscale);
 }
-EXPORT_SYMBOL_GPL(synproxy_build_options);
 
-void synproxy_init_timestamp_cookie(const struct xt_synproxy_info *info,
+void synproxy_init_timestamp_cookie(const struct nf_synproxy_info *info,
 				    struct synproxy_options *opts)
 {
 	opts->tsecr = opts->tsval;
 	opts->tsval = tcp_time_stamp_raw() & ~0x3f;
 
-	if (opts->options & XT_SYNPROXY_OPT_WSCALE) {
+	if (opts->options & NF_SYNPROXY_OPT_WSCALE) {
 		opts->tsval |= opts->wscale;
 		opts->wscale = info->wscale;
 	} else
 		opts->tsval |= 0xf;
 
-	if (opts->options & XT_SYNPROXY_OPT_SACK_PERM)
+	if (opts->options & NF_SYNPROXY_OPT_SACK_PERM)
 		opts->tsval |= 1 << 4;
 
-	if (opts->options & XT_SYNPROXY_OPT_ECN)
+	if (opts->options & NF_SYNPROXY_OPT_ECN)
 		opts->tsval |= 1 << 5;
 }
 EXPORT_SYMBOL_GPL(synproxy_init_timestamp_cookie);
 
-void synproxy_check_timestamp_cookie(struct synproxy_options *opts)
+static void
+synproxy_check_timestamp_cookie(struct synproxy_options *opts)
 {
 	opts->wscale = opts->tsecr & 0xf;
 	if (opts->wscale != 0xf)
-		opts->options |= XT_SYNPROXY_OPT_WSCALE;
+		opts->options |= NF_SYNPROXY_OPT_WSCALE;
 
-	opts->options |= opts->tsecr & (1 << 4) ? XT_SYNPROXY_OPT_SACK_PERM : 0;
+	opts->options |= opts->tsecr & (1 << 4) ? NF_SYNPROXY_OPT_SACK_PERM : 0;
 
-	opts->options |= opts->tsecr & (1 << 5) ? XT_SYNPROXY_OPT_ECN : 0;
+	opts->options |= opts->tsecr & (1 << 5) ? NF_SYNPROXY_OPT_ECN : 0;
 }
-EXPORT_SYMBOL_GPL(synproxy_check_timestamp_cookie);
 
-unsigned int synproxy_tstamp_adjust(struct sk_buff *skb,
-				    unsigned int protoff,
-				    struct tcphdr *th,
-				    struct nf_conn *ct,
-				    enum ip_conntrack_info ctinfo,
-				    const struct nf_conn_synproxy *synproxy)
+static unsigned int
+synproxy_tstamp_adjust(struct sk_buff *skb, unsigned int protoff,
+		       struct tcphdr *th, struct nf_conn *ct,
+		       enum ip_conntrack_info ctinfo,
+		       const struct nf_conn_synproxy *synproxy)
 {
 	unsigned int optoff, optend;
 	__be32 *ptr, old;
@@ -196,7 +191,7 @@
 	optoff = protoff + sizeof(struct tcphdr);
 	optend = protoff + th->doff * 4;
 
-	if (!skb_make_writable(skb, optend))
+	if (skb_ensure_writable(skb, optend))
 		return 0;
 
 	while (optoff < optend) {
@@ -235,7 +230,6 @@
 	}
 	return 1;
 }
-EXPORT_SYMBOL_GPL(synproxy_tstamp_adjust);
 
 static struct nf_ct_ext_type nf_ct_synproxy_extend __read_mostly = {
 	.len		= sizeof(struct nf_conn_synproxy),
@@ -416,5 +410,830 @@
 module_init(synproxy_core_init);
 module_exit(synproxy_core_exit);
 
+static struct iphdr *
+synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr,
+		  __be32 daddr)
+{
+	struct iphdr *iph;
+
+	skb_reset_network_header(skb);
+	iph = skb_put(skb, sizeof(*iph));
+	iph->version	= 4;
+	iph->ihl	= sizeof(*iph) / 4;
+	iph->tos	= 0;
+	iph->id		= 0;
+	iph->frag_off	= htons(IP_DF);
+	iph->ttl	= net->ipv4.sysctl_ip_default_ttl;
+	iph->protocol	= IPPROTO_TCP;
+	iph->check	= 0;
+	iph->saddr	= saddr;
+	iph->daddr	= daddr;
+
+	return iph;
+}
+
+static void
+synproxy_send_tcp(struct net *net,
+		  const struct sk_buff *skb, struct sk_buff *nskb,
+		  struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
+		  struct iphdr *niph, struct tcphdr *nth,
+		  unsigned int tcp_hdr_size)
+{
+	nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0);
+	nskb->ip_summed   = CHECKSUM_PARTIAL;
+	nskb->csum_start  = (unsigned char *)nth - nskb->head;
+	nskb->csum_offset = offsetof(struct tcphdr, check);
+
+	skb_dst_set_noref(nskb, skb_dst(skb));
+	nskb->protocol = htons(ETH_P_IP);
+	if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
+		goto free_nskb;
+
+	if (nfct) {
+		nf_ct_set(nskb, (struct nf_conn *)nfct, ctinfo);
+		nf_conntrack_get(nfct);
+	}
+
+	ip_local_out(net, nskb->sk, nskb);
+	return;
+
+free_nskb:
+	kfree_skb(nskb);
+}
+
+void
+synproxy_send_client_synack(struct net *net,
+			    const struct sk_buff *skb, const struct tcphdr *th,
+			    const struct synproxy_options *opts)
+{
+	struct sk_buff *nskb;
+	struct iphdr *iph, *niph;
+	struct tcphdr *nth;
+	unsigned int tcp_hdr_size;
+	u16 mss = opts->mss_encode;
+
+	iph = ip_hdr(skb);
+
+	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+			 GFP_ATOMIC);
+	if (!nskb)
+		return;
+	skb_reserve(nskb, MAX_TCP_HEADER);
+
+	niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr);
+
+	skb_reset_transport_header(nskb);
+	nth = skb_put(nskb, tcp_hdr_size);
+	nth->source	= th->dest;
+	nth->dest	= th->source;
+	nth->seq	= htonl(__cookie_v4_init_sequence(iph, th, &mss));
+	nth->ack_seq	= htonl(ntohl(th->seq) + 1);
+	tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
+	if (opts->options & NF_SYNPROXY_OPT_ECN)
+		tcp_flag_word(nth) |= TCP_FLAG_ECE;
+	nth->doff	= tcp_hdr_size / 4;
+	nth->window	= 0;
+	nth->check	= 0;
+	nth->urg_ptr	= 0;
+
+	synproxy_build_options(nth, opts);
+
+	synproxy_send_tcp(net, skb, nskb, skb_nfct(skb),
+			  IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size);
+}
+EXPORT_SYMBOL_GPL(synproxy_send_client_synack);
+
+static void
+synproxy_send_server_syn(struct net *net,
+			 const struct sk_buff *skb, const struct tcphdr *th,
+			 const struct synproxy_options *opts, u32 recv_seq)
+{
+	struct synproxy_net *snet = synproxy_pernet(net);
+	struct sk_buff *nskb;
+	struct iphdr *iph, *niph;
+	struct tcphdr *nth;
+	unsigned int tcp_hdr_size;
+
+	iph = ip_hdr(skb);
+
+	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+			 GFP_ATOMIC);
+	if (!nskb)
+		return;
+	skb_reserve(nskb, MAX_TCP_HEADER);
+
+	niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr);
+
+	skb_reset_transport_header(nskb);
+	nth = skb_put(nskb, tcp_hdr_size);
+	nth->source	= th->source;
+	nth->dest	= th->dest;
+	nth->seq	= htonl(recv_seq - 1);
+	/* ack_seq is used to relay our ISN to the synproxy hook to initialize
+	 * sequence number translation once a connection tracking entry exists.
+	 */
+	nth->ack_seq	= htonl(ntohl(th->ack_seq) - 1);
+	tcp_flag_word(nth) = TCP_FLAG_SYN;
+	if (opts->options & NF_SYNPROXY_OPT_ECN)
+		tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
+	nth->doff	= tcp_hdr_size / 4;
+	nth->window	= th->window;
+	nth->check	= 0;
+	nth->urg_ptr	= 0;
+
+	synproxy_build_options(nth, opts);
+
+	synproxy_send_tcp(net, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
+			  niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_ack(struct net *net,
+			 const struct ip_ct_tcp *state,
+			 const struct sk_buff *skb, const struct tcphdr *th,
+			 const struct synproxy_options *opts)
+{
+	struct sk_buff *nskb;
+	struct iphdr *iph, *niph;
+	struct tcphdr *nth;
+	unsigned int tcp_hdr_size;
+
+	iph = ip_hdr(skb);
+
+	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+			 GFP_ATOMIC);
+	if (!nskb)
+		return;
+	skb_reserve(nskb, MAX_TCP_HEADER);
+
+	niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr);
+
+	skb_reset_transport_header(nskb);
+	nth = skb_put(nskb, tcp_hdr_size);
+	nth->source	= th->dest;
+	nth->dest	= th->source;
+	nth->seq	= htonl(ntohl(th->ack_seq));
+	nth->ack_seq	= htonl(ntohl(th->seq) + 1);
+	tcp_flag_word(nth) = TCP_FLAG_ACK;
+	nth->doff	= tcp_hdr_size / 4;
+	nth->window	= htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
+	nth->check	= 0;
+	nth->urg_ptr	= 0;
+
+	synproxy_build_options(nth, opts);
+
+	synproxy_send_tcp(net, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_client_ack(struct net *net,
+			 const struct sk_buff *skb, const struct tcphdr *th,
+			 const struct synproxy_options *opts)
+{
+	struct sk_buff *nskb;
+	struct iphdr *iph, *niph;
+	struct tcphdr *nth;
+	unsigned int tcp_hdr_size;
+
+	iph = ip_hdr(skb);
+
+	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+			 GFP_ATOMIC);
+	if (!nskb)
+		return;
+	skb_reserve(nskb, MAX_TCP_HEADER);
+
+	niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr);
+
+	skb_reset_transport_header(nskb);
+	nth = skb_put(nskb, tcp_hdr_size);
+	nth->source	= th->source;
+	nth->dest	= th->dest;
+	nth->seq	= htonl(ntohl(th->seq) + 1);
+	nth->ack_seq	= th->ack_seq;
+	tcp_flag_word(nth) = TCP_FLAG_ACK;
+	nth->doff	= tcp_hdr_size / 4;
+	nth->window	= htons(ntohs(th->window) >> opts->wscale);
+	nth->check	= 0;
+	nth->urg_ptr	= 0;
+
+	synproxy_build_options(nth, opts);
+
+	synproxy_send_tcp(net, skb, nskb, skb_nfct(skb),
+			  IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size);
+}
+
+bool
+synproxy_recv_client_ack(struct net *net,
+			 const struct sk_buff *skb, const struct tcphdr *th,
+			 struct synproxy_options *opts, u32 recv_seq)
+{
+	struct synproxy_net *snet = synproxy_pernet(net);
+	int mss;
+
+	mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1);
+	if (mss == 0) {
+		this_cpu_inc(snet->stats->cookie_invalid);
+		return false;
+	}
+
+	this_cpu_inc(snet->stats->cookie_valid);
+	opts->mss_option = mss;
+	opts->options |= NF_SYNPROXY_OPT_MSS;
+
+	if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
+		synproxy_check_timestamp_cookie(opts);
+
+	synproxy_send_server_syn(net, skb, th, opts, recv_seq);
+	return true;
+}
+EXPORT_SYMBOL_GPL(synproxy_recv_client_ack);
+
+unsigned int
+ipv4_synproxy_hook(void *priv, struct sk_buff *skb,
+		   const struct nf_hook_state *nhs)
+{
+	struct net *net = nhs->net;
+	struct synproxy_net *snet = synproxy_pernet(net);
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+	struct nf_conn_synproxy *synproxy;
+	struct synproxy_options opts = {};
+	const struct ip_ct_tcp *state;
+	struct tcphdr *th, _th;
+	unsigned int thoff;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct)
+		return NF_ACCEPT;
+
+	synproxy = nfct_synproxy(ct);
+	if (!synproxy)
+		return NF_ACCEPT;
+
+	if (nf_is_loopback_packet(skb) ||
+	    ip_hdr(skb)->protocol != IPPROTO_TCP)
+		return NF_ACCEPT;
+
+	thoff = ip_hdrlen(skb);
+	th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
+	if (!th)
+		return NF_DROP;
+
+	state = &ct->proto.tcp;
+	switch (state->state) {
+	case TCP_CONNTRACK_CLOSE:
+		if (th->rst && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
+			nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
+						      ntohl(th->seq) + 1);
+			break;
+		}
+
+		if (!th->syn || th->ack ||
+		    CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+			break;
+
+		/* Reopened connection - reset the sequence number and timestamp
+		 * adjustments, they will get initialized once the connection is
+		 * reestablished.
+		 */
+		nf_ct_seqadj_init(ct, ctinfo, 0);
+		synproxy->tsoff = 0;
+		this_cpu_inc(snet->stats->conn_reopened);
+
+		/* fall through */
+	case TCP_CONNTRACK_SYN_SENT:
+		if (!synproxy_parse_options(skb, thoff, th, &opts))
+			return NF_DROP;
+
+		if (!th->syn && th->ack &&
+		    CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+			/* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
+			 * therefore we need to add 1 to make the SYN sequence
+			 * number match the one of first SYN.
+			 */
+			if (synproxy_recv_client_ack(net, skb, th, &opts,
+						     ntohl(th->seq) + 1)) {
+				this_cpu_inc(snet->stats->cookie_retrans);
+				consume_skb(skb);
+				return NF_STOLEN;
+			} else {
+				return NF_DROP;
+			}
+		}
+
+		synproxy->isn = ntohl(th->ack_seq);
+		if (opts.options & NF_SYNPROXY_OPT_TIMESTAMP)
+			synproxy->its = opts.tsecr;
+
+		nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
+		break;
+	case TCP_CONNTRACK_SYN_RECV:
+		if (!th->syn || !th->ack)
+			break;
+
+		if (!synproxy_parse_options(skb, thoff, th, &opts))
+			return NF_DROP;
+
+		if (opts.options & NF_SYNPROXY_OPT_TIMESTAMP) {
+			synproxy->tsoff = opts.tsval - synproxy->its;
+			nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
+		}
+
+		opts.options &= ~(NF_SYNPROXY_OPT_MSS |
+				  NF_SYNPROXY_OPT_WSCALE |
+				  NF_SYNPROXY_OPT_SACK_PERM);
+
+		swap(opts.tsval, opts.tsecr);
+		synproxy_send_server_ack(net, state, skb, th, &opts);
+
+		nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+		nf_conntrack_event_cache(IPCT_SEQADJ, ct);
+
+		swap(opts.tsval, opts.tsecr);
+		synproxy_send_client_ack(net, skb, th, &opts);
+
+		consume_skb(skb);
+		return NF_STOLEN;
+	default:
+		break;
+	}
+
+	synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
+	return NF_ACCEPT;
+}
+EXPORT_SYMBOL_GPL(ipv4_synproxy_hook);
+
+static const struct nf_hook_ops ipv4_synproxy_ops[] = {
+	{
+		.hook		= ipv4_synproxy_hook,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+	},
+	{
+		.hook		= ipv4_synproxy_hook,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+	},
+};
+
+int nf_synproxy_ipv4_init(struct synproxy_net *snet, struct net *net)
+{
+	int err;
+
+	if (snet->hook_ref4 == 0) {
+		err = nf_register_net_hooks(net, ipv4_synproxy_ops,
+					    ARRAY_SIZE(ipv4_synproxy_ops));
+		if (err)
+			return err;
+	}
+
+	snet->hook_ref4++;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_synproxy_ipv4_init);
+
+void nf_synproxy_ipv4_fini(struct synproxy_net *snet, struct net *net)
+{
+	snet->hook_ref4--;
+	if (snet->hook_ref4 == 0)
+		nf_unregister_net_hooks(net, ipv4_synproxy_ops,
+					ARRAY_SIZE(ipv4_synproxy_ops));
+}
+EXPORT_SYMBOL_GPL(nf_synproxy_ipv4_fini);
+
+#if IS_ENABLED(CONFIG_IPV6)
+static struct ipv6hdr *
+synproxy_build_ip_ipv6(struct net *net, struct sk_buff *skb,
+		       const struct in6_addr *saddr,
+		       const struct in6_addr *daddr)
+{
+	struct ipv6hdr *iph;
+
+	skb_reset_network_header(skb);
+	iph = skb_put(skb, sizeof(*iph));
+	ip6_flow_hdr(iph, 0, 0);
+	iph->hop_limit	= net->ipv6.devconf_all->hop_limit;
+	iph->nexthdr	= IPPROTO_TCP;
+	iph->saddr	= *saddr;
+	iph->daddr	= *daddr;
+
+	return iph;
+}
+
+static void
+synproxy_send_tcp_ipv6(struct net *net,
+		       const struct sk_buff *skb, struct sk_buff *nskb,
+		       struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
+		       struct ipv6hdr *niph, struct tcphdr *nth,
+		       unsigned int tcp_hdr_size)
+{
+	struct dst_entry *dst;
+	struct flowi6 fl6;
+	int err;
+
+	nth->check = ~tcp_v6_check(tcp_hdr_size, &niph->saddr, &niph->daddr, 0);
+	nskb->ip_summed   = CHECKSUM_PARTIAL;
+	nskb->csum_start  = (unsigned char *)nth - nskb->head;
+	nskb->csum_offset = offsetof(struct tcphdr, check);
+
+	memset(&fl6, 0, sizeof(fl6));
+	fl6.flowi6_proto = IPPROTO_TCP;
+	fl6.saddr = niph->saddr;
+	fl6.daddr = niph->daddr;
+	fl6.fl6_sport = nth->source;
+	fl6.fl6_dport = nth->dest;
+	security_skb_classify_flow((struct sk_buff *)skb,
+				   flowi6_to_flowi(&fl6));
+	err = nf_ip6_route(net, &dst, flowi6_to_flowi(&fl6), false);
+	if (err) {
+		goto free_nskb;
+	}
+
+	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
+	if (IS_ERR(dst))
+		goto free_nskb;
+
+	skb_dst_set(nskb, dst);
+
+	if (nfct) {
+		nf_ct_set(nskb, (struct nf_conn *)nfct, ctinfo);
+		nf_conntrack_get(nfct);
+	}
+
+	ip6_local_out(net, nskb->sk, nskb);
+	return;
+
+free_nskb:
+	kfree_skb(nskb);
+}
+
+void
+synproxy_send_client_synack_ipv6(struct net *net,
+				 const struct sk_buff *skb,
+				 const struct tcphdr *th,
+				 const struct synproxy_options *opts)
+{
+	struct sk_buff *nskb;
+	struct ipv6hdr *iph, *niph;
+	struct tcphdr *nth;
+	unsigned int tcp_hdr_size;
+	u16 mss = opts->mss_encode;
+
+	iph = ipv6_hdr(skb);
+
+	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+			 GFP_ATOMIC);
+	if (!nskb)
+		return;
+	skb_reserve(nskb, MAX_TCP_HEADER);
+
+	niph = synproxy_build_ip_ipv6(net, nskb, &iph->daddr, &iph->saddr);
+
+	skb_reset_transport_header(nskb);
+	nth = skb_put(nskb, tcp_hdr_size);
+	nth->source	= th->dest;
+	nth->dest	= th->source;
+	nth->seq	= htonl(nf_ipv6_cookie_init_sequence(iph, th, &mss));
+	nth->ack_seq	= htonl(ntohl(th->seq) + 1);
+	tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
+	if (opts->options & NF_SYNPROXY_OPT_ECN)
+		tcp_flag_word(nth) |= TCP_FLAG_ECE;
+	nth->doff	= tcp_hdr_size / 4;
+	nth->window	= 0;
+	nth->check	= 0;
+	nth->urg_ptr	= 0;
+
+	synproxy_build_options(nth, opts);
+
+	synproxy_send_tcp_ipv6(net, skb, nskb, skb_nfct(skb),
+			       IP_CT_ESTABLISHED_REPLY, niph, nth,
+			       tcp_hdr_size);
+}
+EXPORT_SYMBOL_GPL(synproxy_send_client_synack_ipv6);
+
+static void
+synproxy_send_server_syn_ipv6(struct net *net, const struct sk_buff *skb,
+			      const struct tcphdr *th,
+			      const struct synproxy_options *opts, u32 recv_seq)
+{
+	struct synproxy_net *snet = synproxy_pernet(net);
+	struct sk_buff *nskb;
+	struct ipv6hdr *iph, *niph;
+	struct tcphdr *nth;
+	unsigned int tcp_hdr_size;
+
+	iph = ipv6_hdr(skb);
+
+	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+			 GFP_ATOMIC);
+	if (!nskb)
+		return;
+	skb_reserve(nskb, MAX_TCP_HEADER);
+
+	niph = synproxy_build_ip_ipv6(net, nskb, &iph->saddr, &iph->daddr);
+
+	skb_reset_transport_header(nskb);
+	nth = skb_put(nskb, tcp_hdr_size);
+	nth->source	= th->source;
+	nth->dest	= th->dest;
+	nth->seq	= htonl(recv_seq - 1);
+	/* ack_seq is used to relay our ISN to the synproxy hook to initialize
+	 * sequence number translation once a connection tracking entry exists.
+	 */
+	nth->ack_seq	= htonl(ntohl(th->ack_seq) - 1);
+	tcp_flag_word(nth) = TCP_FLAG_SYN;
+	if (opts->options & NF_SYNPROXY_OPT_ECN)
+		tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
+	nth->doff	= tcp_hdr_size / 4;
+	nth->window	= th->window;
+	nth->check	= 0;
+	nth->urg_ptr	= 0;
+
+	synproxy_build_options(nth, opts);
+
+	synproxy_send_tcp_ipv6(net, skb, nskb, &snet->tmpl->ct_general,
+			       IP_CT_NEW, niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_ack_ipv6(struct net *net, const struct ip_ct_tcp *state,
+			      const struct sk_buff *skb,
+			      const struct tcphdr *th,
+			      const struct synproxy_options *opts)
+{
+	struct sk_buff *nskb;
+	struct ipv6hdr *iph, *niph;
+	struct tcphdr *nth;
+	unsigned int tcp_hdr_size;
+
+	iph = ipv6_hdr(skb);
+
+	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+			 GFP_ATOMIC);
+	if (!nskb)
+		return;
+	skb_reserve(nskb, MAX_TCP_HEADER);
+
+	niph = synproxy_build_ip_ipv6(net, nskb, &iph->daddr, &iph->saddr);
+
+	skb_reset_transport_header(nskb);
+	nth = skb_put(nskb, tcp_hdr_size);
+	nth->source	= th->dest;
+	nth->dest	= th->source;
+	nth->seq	= htonl(ntohl(th->ack_seq));
+	nth->ack_seq	= htonl(ntohl(th->seq) + 1);
+	tcp_flag_word(nth) = TCP_FLAG_ACK;
+	nth->doff	= tcp_hdr_size / 4;
+	nth->window	= htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
+	nth->check	= 0;
+	nth->urg_ptr	= 0;
+
+	synproxy_build_options(nth, opts);
+
+	synproxy_send_tcp_ipv6(net, skb, nskb, NULL, 0, niph, nth,
+			       tcp_hdr_size);
+}
+
+static void
+synproxy_send_client_ack_ipv6(struct net *net, const struct sk_buff *skb,
+			      const struct tcphdr *th,
+			      const struct synproxy_options *opts)
+{
+	struct sk_buff *nskb;
+	struct ipv6hdr *iph, *niph;
+	struct tcphdr *nth;
+	unsigned int tcp_hdr_size;
+
+	iph = ipv6_hdr(skb);
+
+	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+			 GFP_ATOMIC);
+	if (!nskb)
+		return;
+	skb_reserve(nskb, MAX_TCP_HEADER);
+
+	niph = synproxy_build_ip_ipv6(net, nskb, &iph->saddr, &iph->daddr);
+
+	skb_reset_transport_header(nskb);
+	nth = skb_put(nskb, tcp_hdr_size);
+	nth->source	= th->source;
+	nth->dest	= th->dest;
+	nth->seq	= htonl(ntohl(th->seq) + 1);
+	nth->ack_seq	= th->ack_seq;
+	tcp_flag_word(nth) = TCP_FLAG_ACK;
+	nth->doff	= tcp_hdr_size / 4;
+	nth->window	= htons(ntohs(th->window) >> opts->wscale);
+	nth->check	= 0;
+	nth->urg_ptr	= 0;
+
+	synproxy_build_options(nth, opts);
+
+	synproxy_send_tcp_ipv6(net, skb, nskb, skb_nfct(skb),
+			       IP_CT_ESTABLISHED_REPLY, niph, nth,
+			       tcp_hdr_size);
+}
+
+bool
+synproxy_recv_client_ack_ipv6(struct net *net,
+			      const struct sk_buff *skb,
+			      const struct tcphdr *th,
+			      struct synproxy_options *opts, u32 recv_seq)
+{
+	struct synproxy_net *snet = synproxy_pernet(net);
+	int mss;
+
+	mss = nf_cookie_v6_check(ipv6_hdr(skb), th, ntohl(th->ack_seq) - 1);
+	if (mss == 0) {
+		this_cpu_inc(snet->stats->cookie_invalid);
+		return false;
+	}
+
+	this_cpu_inc(snet->stats->cookie_valid);
+	opts->mss_option = mss;
+	opts->options |= NF_SYNPROXY_OPT_MSS;
+
+	if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
+		synproxy_check_timestamp_cookie(opts);
+
+	synproxy_send_server_syn_ipv6(net, skb, th, opts, recv_seq);
+	return true;
+}
+EXPORT_SYMBOL_GPL(synproxy_recv_client_ack_ipv6);
+
+unsigned int
+ipv6_synproxy_hook(void *priv, struct sk_buff *skb,
+		   const struct nf_hook_state *nhs)
+{
+	struct net *net = nhs->net;
+	struct synproxy_net *snet = synproxy_pernet(net);
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+	struct nf_conn_synproxy *synproxy;
+	struct synproxy_options opts = {};
+	const struct ip_ct_tcp *state;
+	struct tcphdr *th, _th;
+	__be16 frag_off;
+	u8 nexthdr;
+	int thoff;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct)
+		return NF_ACCEPT;
+
+	synproxy = nfct_synproxy(ct);
+	if (!synproxy)
+		return NF_ACCEPT;
+
+	if (nf_is_loopback_packet(skb))
+		return NF_ACCEPT;
+
+	nexthdr = ipv6_hdr(skb)->nexthdr;
+	thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
+				 &frag_off);
+	if (thoff < 0 || nexthdr != IPPROTO_TCP)
+		return NF_ACCEPT;
+
+	th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
+	if (!th)
+		return NF_DROP;
+
+	state = &ct->proto.tcp;
+	switch (state->state) {
+	case TCP_CONNTRACK_CLOSE:
+		if (th->rst && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
+			nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
+						      ntohl(th->seq) + 1);
+			break;
+		}
+
+		if (!th->syn || th->ack ||
+		    CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+			break;
+
+		/* Reopened connection - reset the sequence number and timestamp
+		 * adjustments, they will get initialized once the connection is
+		 * reestablished.
+		 */
+		nf_ct_seqadj_init(ct, ctinfo, 0);
+		synproxy->tsoff = 0;
+		this_cpu_inc(snet->stats->conn_reopened);
+
+		/* fall through */
+	case TCP_CONNTRACK_SYN_SENT:
+		if (!synproxy_parse_options(skb, thoff, th, &opts))
+			return NF_DROP;
+
+		if (!th->syn && th->ack &&
+		    CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+			/* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
+			 * therefore we need to add 1 to make the SYN sequence
+			 * number match the one of first SYN.
+			 */
+			if (synproxy_recv_client_ack_ipv6(net, skb, th, &opts,
+							  ntohl(th->seq) + 1)) {
+				this_cpu_inc(snet->stats->cookie_retrans);
+				consume_skb(skb);
+				return NF_STOLEN;
+			} else {
+				return NF_DROP;
+			}
+		}
+
+		synproxy->isn = ntohl(th->ack_seq);
+		if (opts.options & NF_SYNPROXY_OPT_TIMESTAMP)
+			synproxy->its = opts.tsecr;
+
+		nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
+		break;
+	case TCP_CONNTRACK_SYN_RECV:
+		if (!th->syn || !th->ack)
+			break;
+
+		if (!synproxy_parse_options(skb, thoff, th, &opts))
+			return NF_DROP;
+
+		if (opts.options & NF_SYNPROXY_OPT_TIMESTAMP) {
+			synproxy->tsoff = opts.tsval - synproxy->its;
+			nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
+		}
+
+		opts.options &= ~(NF_SYNPROXY_OPT_MSS |
+				  NF_SYNPROXY_OPT_WSCALE |
+				  NF_SYNPROXY_OPT_SACK_PERM);
+
+		swap(opts.tsval, opts.tsecr);
+		synproxy_send_server_ack_ipv6(net, state, skb, th, &opts);
+
+		nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+		nf_conntrack_event_cache(IPCT_SEQADJ, ct);
+
+		swap(opts.tsval, opts.tsecr);
+		synproxy_send_client_ack_ipv6(net, skb, th, &opts);
+
+		consume_skb(skb);
+		return NF_STOLEN;
+	default:
+		break;
+	}
+
+	synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
+	return NF_ACCEPT;
+}
+EXPORT_SYMBOL_GPL(ipv6_synproxy_hook);
+
+static const struct nf_hook_ops ipv6_synproxy_ops[] = {
+	{
+		.hook		= ipv6_synproxy_hook,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+	},
+	{
+		.hook		= ipv6_synproxy_hook,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+	},
+};
+
+int
+nf_synproxy_ipv6_init(struct synproxy_net *snet, struct net *net)
+{
+	int err;
+
+	if (snet->hook_ref6 == 0) {
+		err = nf_register_net_hooks(net, ipv6_synproxy_ops,
+					    ARRAY_SIZE(ipv6_synproxy_ops));
+		if (err)
+			return err;
+	}
+
+	snet->hook_ref6++;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_synproxy_ipv6_init);
+
+void
+nf_synproxy_ipv6_fini(struct synproxy_net *snet, struct net *net)
+{
+	snet->hook_ref6--;
+	if (snet->hook_ref6 == 0)
+		nf_unregister_net_hooks(net, ipv6_synproxy_ops,
+					ARRAY_SIZE(ipv6_synproxy_ops));
+}
+EXPORT_SYMBOL_GPL(nf_synproxy_ipv6_fini);
+#endif /* CONFIG_IPV6 */
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index fe0558b..712a428 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2007-2009 Patrick McHardy <kaber@trash.net>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 
@@ -21,12 +18,15 @@
 #include <net/netfilter/nf_flow_table.h>
 #include <net/netfilter/nf_tables_core.h>
 #include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 
 static LIST_HEAD(nf_tables_expressions);
 static LIST_HEAD(nf_tables_objects);
 static LIST_HEAD(nf_tables_flowtables);
+static LIST_HEAD(nf_tables_destroy_list);
+static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
 static u64 table_handle;
 
 enum {
@@ -35,17 +35,31 @@
 	NFT_VALIDATE_DO,
 };
 
+static struct rhltable nft_objname_ht;
+
 static u32 nft_chain_hash(const void *data, u32 len, u32 seed);
 static u32 nft_chain_hash_obj(const void *data, u32 len, u32 seed);
 static int nft_chain_hash_cmp(struct rhashtable_compare_arg *, const void *);
 
+static u32 nft_objname_hash(const void *data, u32 len, u32 seed);
+static u32 nft_objname_hash_obj(const void *data, u32 len, u32 seed);
+static int nft_objname_hash_cmp(struct rhashtable_compare_arg *, const void *);
+
 static const struct rhashtable_params nft_chain_ht_params = {
 	.head_offset		= offsetof(struct nft_chain, rhlhead),
 	.key_offset		= offsetof(struct nft_chain, name),
 	.hashfn			= nft_chain_hash,
 	.obj_hashfn		= nft_chain_hash_obj,
 	.obj_cmpfn		= nft_chain_hash_cmp,
-	.locks_mul		= 1,
+	.automatic_shrinking	= true,
+};
+
+static const struct rhashtable_params nft_objname_ht_params = {
+	.head_offset		= offsetof(struct nft_object, rhlhead),
+	.key_offset		= offsetof(struct nft_object, key),
+	.hashfn			= nft_objname_hash,
+	.obj_hashfn		= nft_objname_hash_obj,
+	.obj_cmpfn		= nft_objname_hash_cmp,
 	.automatic_shrinking	= true,
 };
 
@@ -64,6 +78,8 @@
 
 	net->nft.validate_state = new_validate_state;
 }
+static void nf_tables_trans_destroy_work(struct work_struct *w);
+static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
 
 static void nft_ctx_init(struct nft_ctx *ctx,
 			 struct net *net,
@@ -82,6 +98,7 @@
 	ctx->nla   	= nla;
 	ctx->portid	= NETLINK_CB(skb).portid;
 	ctx->report	= nlmsg_report(nlh);
+	ctx->flags	= nlh->nlmsg_flags;
 	ctx->seq	= nlh->nlmsg_seq;
 }
 
@@ -112,6 +129,28 @@
 	kfree(trans);
 }
 
+static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
+{
+	struct net *net = ctx->net;
+	struct nft_trans *trans;
+
+	if (!nft_set_is_anonymous(set))
+		return;
+
+	list_for_each_entry_reverse(trans, &net->nft.commit_list, list) {
+		switch (trans->msg_type) {
+		case NFT_MSG_NEWSET:
+			if (nft_trans_set(trans) == set)
+				nft_trans_set_bound(trans) = true;
+			break;
+		case NFT_MSG_NEWSETELEM:
+			if (nft_trans_elem_set(trans) == set)
+				nft_trans_elem_set_bound(trans) = true;
+			break;
+		}
+	}
+}
+
 static int nf_tables_register_hook(struct net *net,
 				   const struct nft_table *table,
 				   struct nft_chain *chain)
@@ -178,33 +217,33 @@
 	return err;
 }
 
-static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
+static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
 {
 	struct nft_trans *trans;
 
 	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain));
 	if (trans == NULL)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	if (msg_type == NFT_MSG_NEWCHAIN)
 		nft_activate_next(ctx->net, ctx->chain);
 
 	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
-	return 0;
+	return trans;
 }
 
 static int nft_delchain(struct nft_ctx *ctx)
 {
-	int err;
+	struct nft_trans *trans;
 
-	err = nft_trans_chain_add(ctx, NFT_MSG_DELCHAIN);
-	if (err < 0)
-		return err;
+	trans = nft_trans_chain_add(ctx, NFT_MSG_DELCHAIN);
+	if (IS_ERR(trans))
+		return PTR_ERR(trans);
 
 	ctx->table->use--;
 	nft_deactivate_next(ctx->net, ctx->chain);
 
-	return err;
+	return 0;
 }
 
 static void nft_rule_expr_activate(const struct nft_ctx *ctx,
@@ -222,14 +261,15 @@
 }
 
 static void nft_rule_expr_deactivate(const struct nft_ctx *ctx,
-				     struct nft_rule *rule)
+				     struct nft_rule *rule,
+				     enum nft_trans_phase phase)
 {
 	struct nft_expr *expr;
 
 	expr = nft_expr_first(rule);
 	while (expr != nft_expr_last(rule) && expr->ops) {
 		if (expr->ops->deactivate)
-			expr->ops->deactivate(ctx, expr);
+			expr->ops->deactivate(ctx, expr, phase);
 
 		expr = nft_expr_next(expr);
 	}
@@ -280,7 +320,7 @@
 		nft_trans_destroy(trans);
 		return err;
 	}
-	nft_rule_expr_deactivate(ctx, rule);
+	nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_PREPARE);
 
 	return 0;
 }
@@ -291,6 +331,9 @@
 	int err;
 
 	list_for_each_entry(rule, &ctx->chain->rules, list) {
+		if (!nft_is_active_next(ctx->net, rule))
+			continue;
+
 		err = nft_delrule(ctx, rule);
 		if (err < 0)
 			return err;
@@ -298,7 +341,7 @@
 	return 0;
 }
 
-static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
+static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
 			     struct nft_set *set)
 {
 	struct nft_trans *trans;
@@ -318,7 +361,7 @@
 	return 0;
 }
 
-static int nft_delset(struct nft_ctx *ctx, struct nft_set *set)
+static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
 {
 	int err;
 
@@ -798,6 +841,34 @@
 	return strcmp(chain->name, name);
 }
 
+static u32 nft_objname_hash(const void *data, u32 len, u32 seed)
+{
+	const struct nft_object_hash_key *k = data;
+
+	seed ^= hash_ptr(k->table, 32);
+
+	return jhash(k->name, strlen(k->name), seed);
+}
+
+static u32 nft_objname_hash_obj(const void *data, u32 len, u32 seed)
+{
+	const struct nft_object *obj = data;
+
+	return nft_objname_hash(&obj->key, 0, seed);
+}
+
+static int nft_objname_hash_cmp(struct rhashtable_compare_arg *arg,
+				const void *ptr)
+{
+	const struct nft_object_hash_key *k = arg->key;
+	const struct nft_object *obj = ptr;
+
+	if (obj->key.table != k->table)
+		return -1;
+
+	return strcmp(obj->key.name, k->name);
+}
+
 static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 			      struct sk_buff *skb, const struct nlmsghdr *nlh,
 			      const struct nlattr * const nla[],
@@ -1005,7 +1076,8 @@
 
 static void nf_tables_table_destroy(struct nft_ctx *ctx)
 {
-	BUG_ON(ctx->table->use > 0);
+	if (WARN_ON(ctx->table->use > 0))
+		return;
 
 	rhltable_destroy(&ctx->table->chains_ht);
 	kfree(ctx->table->name);
@@ -1053,7 +1125,7 @@
 	return ERR_PTR(-ENOENT);
 }
 
-static bool lockdep_commit_lock_is_held(struct net *net)
+static bool lockdep_commit_lock_is_held(const struct net *net)
 {
 #ifdef CONFIG_PROVE_LOCKING
 	return lockdep_is_held(&net->nft.commit_mutex);
@@ -1104,6 +1176,7 @@
 	[NFTA_CHAIN_POLICY]	= { .type = NLA_U32 },
 	[NFTA_CHAIN_TYPE]	= { .type = NLA_STRING },
 	[NFTA_CHAIN_COUNTERS]	= { .type = NLA_NESTED },
+	[NFTA_CHAIN_FLAGS]	= { .type = NLA_U32 },
 };
 
 static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = {
@@ -1121,6 +1194,9 @@
 	u64 pkts, bytes;
 	int cpu;
 
+	if (!stats)
+		return 0;
+
 	memset(&total, 0, sizeof(total));
 	for_each_possible_cpu(cpu) {
 		cpu_stats = per_cpu_ptr(stats, cpu);
@@ -1132,7 +1208,7 @@
 		total.pkts += pkts;
 		total.bytes += bytes;
 	}
-	nest = nla_nest_start(skb, NFTA_CHAIN_COUNTERS);
+	nest = nla_nest_start_noflag(skb, NFTA_CHAIN_COUNTERS);
 	if (nest == NULL)
 		goto nla_put_failure;
 
@@ -1178,9 +1254,10 @@
 	if (nft_is_base_chain(chain)) {
 		const struct nft_base_chain *basechain = nft_base_chain(chain);
 		const struct nf_hook_ops *ops = &basechain->ops;
+		struct nft_stats __percpu *stats;
 		struct nlattr *nest;
 
-		nest = nla_nest_start(skb, NFTA_CHAIN_HOOK);
+		nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK);
 		if (nest == NULL)
 			goto nla_put_failure;
 		if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
@@ -1199,7 +1276,9 @@
 		if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name))
 			goto nla_put_failure;
 
-		if (basechain->stats && nft_dump_stats(skb, basechain->stats))
+		stats = rcu_dereference_check(basechain->stats,
+					      lockdep_commit_lock_is_held(net));
+		if (nft_dump_stats(skb, stats))
 			goto nla_put_failure;
 	}
 
@@ -1351,8 +1430,8 @@
 	struct nft_stats *stats;
 	int err;
 
-	err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy,
-			       NULL);
+	err = nla_parse_nested_deprecated(tb, NFTA_COUNTER_MAX, attr,
+					  nft_counter_policy, NULL);
 	if (err < 0)
 		return ERR_PTR(err);
 
@@ -1375,23 +1454,18 @@
 	return newstats;
 }
 
-static void nft_chain_stats_replace(struct nft_base_chain *chain,
-				    struct nft_stats __percpu *newstats)
+static void nft_chain_stats_replace(struct nft_trans *trans)
 {
-	struct nft_stats __percpu *oldstats;
+	struct nft_base_chain *chain = nft_base_chain(trans->ctx.chain);
 
-	if (newstats == NULL)
+	if (!nft_trans_chain_stats(trans))
 		return;
 
-	if (chain->stats) {
-		oldstats = nfnl_dereference(chain->stats, NFNL_SUBSYS_NFTABLES);
-		rcu_assign_pointer(chain->stats, newstats);
-		synchronize_rcu();
-		free_percpu(oldstats);
-	} else {
-		rcu_assign_pointer(chain->stats, newstats);
+	rcu_swap_protected(chain->stats, nft_trans_chain_stats(trans),
+			   lockdep_commit_lock_is_held(trans->ctx.net));
+
+	if (!nft_trans_chain_stats(trans))
 		static_branch_inc(&nft_counters_enabled);
-	}
 }
 
 static void nf_tables_chain_free_chain_rules(struct nft_chain *chain)
@@ -1412,7 +1486,8 @@
 {
 	struct nft_chain *chain = ctx->chain;
 
-	BUG_ON(chain->use > 0);
+	if (WARN_ON(chain->use > 0))
+		return;
 
 	/* no concurrent access possible anymore */
 	nf_tables_chain_free_chain_rules(chain);
@@ -1421,9 +1496,10 @@
 		struct nft_base_chain *basechain = nft_base_chain(chain);
 
 		module_put(basechain->type->owner);
-		free_percpu(basechain->stats);
-		if (basechain->stats)
+		if (rcu_access_pointer(basechain->stats)) {
 			static_branch_dec(&nft_counters_enabled);
+			free_percpu(rcu_dereference_raw(basechain->stats));
+		}
 		kfree(chain->name);
 		kfree(basechain);
 	} else {
@@ -1452,8 +1528,9 @@
 	lockdep_assert_held(&net->nft.commit_mutex);
 	lockdep_nfnl_nft_mutex_not_held();
 
-	err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
-			       nft_hook_policy, NULL);
+	err = nla_parse_nested_deprecated(ha, NFTA_HOOK_MAX,
+					  nla[NFTA_CHAIN_HOOK],
+					  nft_hook_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -1471,7 +1548,7 @@
 		if (IS_ERR(type))
 			return PTR_ERR(type);
 	}
-	if (!(type->hook_mask & (1 << hook->num)))
+	if (hook->num > NF_MAX_HOOKS || !(type->hook_mask & (1 << hook->num)))
 		return -EOPNOTSUPP;
 
 	if (type->type == NFT_CHAIN_T_NAT &&
@@ -1534,13 +1611,14 @@
 }
 
 static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
-			      u8 policy)
+			      u8 policy, u32 flags)
 {
 	const struct nlattr * const *nla = ctx->nla;
 	struct nft_table *table = ctx->table;
 	struct nft_base_chain *basechain;
 	struct nft_stats __percpu *stats;
 	struct net *net = ctx->net;
+	struct nft_trans *trans;
 	struct nft_chain *chain;
 	struct nft_rule **rules;
 	int err;
@@ -1572,7 +1650,7 @@
 				kfree(basechain);
 				return PTR_ERR(stats);
 			}
-			basechain->stats = stats;
+			rcu_assign_pointer(basechain->stats, stats);
 			static_branch_inc(&nft_counters_enabled);
 		}
 
@@ -1587,8 +1665,13 @@
 		ops->hook	= hook.type->hooks[ops->hooknum];
 		ops->dev	= hook.dev;
 
-		chain->flags |= NFT_BASE_CHAIN;
-		basechain->policy = policy;
+		chain->flags |= NFT_BASE_CHAIN | flags;
+		basechain->policy = NF_ACCEPT;
+		if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
+		    nft_chain_offload_priority(basechain) < 0)
+			return -EOPNOTSUPP;
+
+		flow_block_init(&basechain->flow_block);
 	} else {
 		chain = kzalloc(sizeof(*chain), GFP_KERNEL);
 		if (chain == NULL)
@@ -1624,13 +1707,18 @@
 	if (err)
 		goto err2;
 
-	err = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
-	if (err < 0) {
+	trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
+	if (IS_ERR(trans)) {
+		err = PTR_ERR(trans);
 		rhltable_remove(&table->chains_ht, &chain->rhlhead,
 				nft_chain_ht_params);
 		goto err2;
 	}
 
+	nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET;
+	if (nft_is_base_chain(chain))
+		nft_trans_chain_policy(trans) = policy;
+
 	table->use++;
 	list_add_tail_rcu(&chain->list, &table->chains);
 
@@ -1643,7 +1731,8 @@
 	return err;
 }
 
-static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy)
+static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
+			      u32 flags)
 {
 	const struct nlattr * const *nla = ctx->nla;
 	struct nft_table *table = ctx->table;
@@ -1655,6 +1744,9 @@
 	struct nft_trans *trans;
 	int err;
 
+	if (chain->flags ^ flags)
+		return -EOPNOTSUPP;
+
 	if (nla[NFTA_CHAIN_HOOK]) {
 		if (!nft_is_base_chain(chain))
 			return -EBUSY;
@@ -1760,6 +1852,7 @@
 	u8 policy = NF_ACCEPT;
 	struct nft_ctx ctx;
 	u64 handle = 0;
+	u32 flags = 0;
 
 	lockdep_assert_held(&net->nft.commit_mutex);
 
@@ -1814,6 +1907,11 @@
 		}
 	}
 
+	if (nla[NFTA_CHAIN_FLAGS])
+		flags = ntohl(nla_get_be32(nla[NFTA_CHAIN_FLAGS]));
+	else if (chain)
+		flags = chain->flags;
+
 	nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
 
 	if (chain != NULL) {
@@ -1824,10 +1922,11 @@
 		if (nlh->nlmsg_flags & NLM_F_REPLACE)
 			return -EOPNOTSUPP;
 
-		return nf_tables_updchain(&ctx, genmask, policy);
+		flags |= chain->flags & NFT_BASE_CHAIN;
+		return nf_tables_updchain(&ctx, genmask, policy, flags);
 	}
 
-	return nf_tables_addchain(&ctx, family, genmask, policy);
+	return nf_tables_addchain(&ctx, family, genmask, policy, flags);
 }
 
 static int nf_tables_delchain(struct net *net, struct sock *nlsk,
@@ -1934,16 +2033,32 @@
 static const struct nft_expr_type *__nft_expr_type_get(u8 family,
 						       struct nlattr *nla)
 {
-	const struct nft_expr_type *type;
+	const struct nft_expr_type *type, *candidate = NULL;
 
 	list_for_each_entry(type, &nf_tables_expressions, list) {
-		if (!nla_strcmp(nla, type->name) &&
-		    (!type->family || type->family == family))
-			return type;
+		if (!nla_strcmp(nla, type->name)) {
+			if (!type->family && !candidate)
+				candidate = type;
+			else if (type->family == family)
+				candidate = type;
+		}
 	}
-	return NULL;
+	return candidate;
 }
 
+#ifdef CONFIG_MODULES
+static int nft_expr_type_request_module(struct net *net, u8 family,
+					struct nlattr *nla)
+{
+	nft_request_module(net, "nft-expr-%u-%.*s", family,
+			   nla_len(nla), (char *)nla_data(nla));
+	if (__nft_expr_type_get(family, nla))
+		return -EAGAIN;
+
+	return 0;
+}
+#endif
+
 static const struct nft_expr_type *nft_expr_type_get(struct net *net,
 						     u8 family,
 						     struct nlattr *nla)
@@ -1960,9 +2075,7 @@
 	lockdep_nfnl_nft_mutex_not_held();
 #ifdef CONFIG_MODULES
 	if (type == NULL) {
-		nft_request_module(net, "nft-expr-%u-%.*s", family,
-				   nla_len(nla), (char *)nla_data(nla));
-		if (__nft_expr_type_get(family, nla))
+		if (nft_expr_type_request_module(net, family, nla) == -EAGAIN)
 			return ERR_PTR(-EAGAIN);
 
 		nft_request_module(net, "nft-expr-%.*s",
@@ -1986,7 +2099,8 @@
 		goto nla_put_failure;
 
 	if (expr->ops->dump) {
-		struct nlattr *data = nla_nest_start(skb, NFTA_EXPR_DATA);
+		struct nlattr *data = nla_nest_start_noflag(skb,
+							    NFTA_EXPR_DATA);
 		if (data == NULL)
 			goto nla_put_failure;
 		if (expr->ops->dump(skb, expr) < 0)
@@ -2005,7 +2119,7 @@
 {
 	struct nlattr *nest;
 
-	nest = nla_nest_start(skb, attr);
+	nest = nla_nest_start_noflag(skb, attr);
 	if (!nest)
 		goto nla_put_failure;
 	if (nf_tables_fill_expr_info(skb, expr) < 0)
@@ -2031,7 +2145,8 @@
 	struct nlattr *tb[NFTA_EXPR_MAX + 1];
 	int err;
 
-	err = nla_parse_nested(tb, NFTA_EXPR_MAX, nla, nft_expr_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, NFTA_EXPR_MAX, nla,
+					  nft_expr_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -2040,8 +2155,9 @@
 		return PTR_ERR(type);
 
 	if (tb[NFTA_EXPR_DATA]) {
-		err = nla_parse_nested(info->tb, type->maxattr,
-				       tb[NFTA_EXPR_DATA], type->policy, NULL);
+		err = nla_parse_nested_deprecated(info->tb, type->maxattr,
+						  tb[NFTA_EXPR_DATA],
+						  type->policy, NULL);
 		if (err < 0)
 			goto err1;
 	} else
@@ -2052,6 +2168,12 @@
 				       (const struct nlattr * const *)info->tb);
 		if (IS_ERR(ops)) {
 			err = PTR_ERR(ops);
+#ifdef CONFIG_MODULES
+			if (err == -EAGAIN)
+				nft_expr_type_request_module(ctx->net,
+							     ctx->family,
+							     tb[NFTA_EXPR_NAME]);
+#endif
 			goto err1;
 		}
 	} else
@@ -2088,9 +2210,11 @@
 static void nf_tables_expr_destroy(const struct nft_ctx *ctx,
 				   struct nft_expr *expr)
 {
+	const struct nft_expr_type *type = expr->ops->type;
+
 	if (expr->ops->destroy)
 		expr->ops->destroy(ctx, expr);
-	module_put(expr->ops->type->owner);
+	module_put(type->owner);
 }
 
 struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
@@ -2098,6 +2222,7 @@
 {
 	struct nft_expr_info info;
 	struct nft_expr *expr;
+	struct module *owner;
 	int err;
 
 	err = nf_tables_expr_parse(ctx, nla, &info);
@@ -2117,7 +2242,11 @@
 err3:
 	kfree(expr);
 err2:
-	module_put(info.ops->type->owner);
+	owner = info.ops->type->owner;
+	if (info.ops->type->release_ops)
+		info.ops->type->release_ops(info.ops);
+
+	module_put(owner);
 err1:
 	return ERR_PTR(err);
 }
@@ -2167,6 +2296,7 @@
 	[NFTA_RULE_USERDATA]	= { .type = NLA_BINARY,
 				    .len = NFT_USERDATA_MAXLEN },
 	[NFTA_RULE_ID]		= { .type = NLA_U32 },
+	[NFTA_RULE_POSITION_ID]	= { .type = NLA_U32 },
 };
 
 static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
@@ -2174,13 +2304,13 @@
 				    u32 flags, int family,
 				    const struct nft_table *table,
 				    const struct nft_chain *chain,
-				    const struct nft_rule *rule)
+				    const struct nft_rule *rule,
+				    const struct nft_rule *prule)
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
 	const struct nft_expr *expr, *next;
 	struct nlattr *list;
-	const struct nft_rule *prule;
 	u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
 
 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags);
@@ -2200,15 +2330,14 @@
 			 NFTA_RULE_PAD))
 		goto nla_put_failure;
 
-	if ((event != NFT_MSG_DELRULE) && (rule->list.prev != &chain->rules)) {
-		prule = list_prev_entry(rule, list);
+	if (event != NFT_MSG_DELRULE && prule) {
 		if (nla_put_be64(skb, NFTA_RULE_POSITION,
 				 cpu_to_be64(prule->handle),
 				 NFTA_RULE_PAD))
 			goto nla_put_failure;
 	}
 
-	list = nla_nest_start(skb, NFTA_RULE_EXPRESSIONS);
+	list = nla_nest_start_noflag(skb, NFTA_RULE_EXPRESSIONS);
 	if (list == NULL)
 		goto nla_put_failure;
 	nft_rule_for_each_expr(expr, next, rule) {
@@ -2248,7 +2377,7 @@
 
 	err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq,
 				       event, 0, ctx->family, ctx->table,
-				       ctx->chain, rule);
+				       ctx->chain, rule, NULL);
 	if (err < 0) {
 		kfree_skb(skb);
 		goto err;
@@ -2266,15 +2395,51 @@
 	char *chain;
 };
 
+static int __nf_tables_dump_rules(struct sk_buff *skb,
+				  unsigned int *idx,
+				  struct netlink_callback *cb,
+				  const struct nft_table *table,
+				  const struct nft_chain *chain)
+{
+	struct net *net = sock_net(skb->sk);
+	const struct nft_rule *rule, *prule;
+	unsigned int s_idx = cb->args[0];
+
+	prule = NULL;
+	list_for_each_entry_rcu(rule, &chain->rules, list) {
+		if (!nft_is_active(net, rule))
+			goto cont_skip;
+		if (*idx < s_idx)
+			goto cont;
+		if (*idx > s_idx) {
+			memset(&cb->args[1], 0,
+					sizeof(cb->args) - sizeof(cb->args[0]));
+		}
+		if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
+					cb->nlh->nlmsg_seq,
+					NFT_MSG_NEWRULE,
+					NLM_F_MULTI | NLM_F_APPEND,
+					table->family,
+					table, chain, rule, prule) < 0)
+			return 1;
+
+		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+cont:
+		prule = rule;
+cont_skip:
+		(*idx)++;
+	}
+	return 0;
+}
+
 static int nf_tables_dump_rules(struct sk_buff *skb,
 				struct netlink_callback *cb)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
 	const struct nft_rule_dump_ctx *ctx = cb->data;
-	const struct nft_table *table;
+	struct nft_table *table;
 	const struct nft_chain *chain;
-	const struct nft_rule *rule;
-	unsigned int idx = 0, s_idx = cb->args[0];
+	unsigned int idx = 0;
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
 
@@ -2288,32 +2453,31 @@
 		if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0)
 			continue;
 
-		list_for_each_entry_rcu(chain, &table->chains, list) {
-			if (ctx && ctx->chain &&
-			    strcmp(ctx->chain, chain->name) != 0)
-				continue;
+		if (ctx && ctx->table && ctx->chain) {
+			struct rhlist_head *list, *tmp;
 
-			list_for_each_entry_rcu(rule, &chain->rules, list) {
-				if (!nft_is_active(net, rule))
-					goto cont;
-				if (idx < s_idx)
-					goto cont;
-				if (idx > s_idx)
-					memset(&cb->args[1], 0,
-					       sizeof(cb->args) - sizeof(cb->args[0]));
-				if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
-							      cb->nlh->nlmsg_seq,
-							      NFT_MSG_NEWRULE,
-							      NLM_F_MULTI | NLM_F_APPEND,
-							      table->family,
-							      table, chain, rule) < 0)
-					goto done;
+			list = rhltable_lookup(&table->chains_ht, ctx->chain,
+					       nft_chain_ht_params);
+			if (!list)
+				goto done;
 
-				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-cont:
-				idx++;
+			rhl_for_each_entry_rcu(chain, tmp, list, rhlhead) {
+				if (!nft_is_active(net, chain))
+					continue;
+				__nf_tables_dump_rules(skb, &idx,
+						       cb, table, chain);
+				break;
 			}
+			goto done;
 		}
+
+		list_for_each_entry_rcu(chain, &table->chains, list) {
+			if (__nf_tables_dump_rules(skb, &idx, cb, table, chain))
+				goto done;
+		}
+
+		if (ctx && ctx->table)
+			break;
 	}
 done:
 	rcu_read_unlock();
@@ -2418,7 +2582,7 @@
 
 	err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid,
 				       nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
-				       family, table, chain, rule);
+				       family, table, chain, rule, NULL);
 	if (err < 0)
 		goto err;
 
@@ -2434,7 +2598,6 @@
 {
 	struct nft_expr *expr, *next;
 
-	lockdep_assert_held(&ctx->net->nft.commit_mutex);
 	/*
 	 * Careful: some expressions might not be initialized in case this
 	 * is called on error from nf_tables_newrule().
@@ -2451,7 +2614,7 @@
 static void nf_tables_rule_release(const struct nft_ctx *ctx,
 				   struct nft_rule *rule)
 {
-	nft_rule_expr_deactivate(ctx, rule);
+	nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE);
 	nf_tables_rule_destroy(ctx, rule);
 }
 
@@ -2505,6 +2668,9 @@
 	return 0;
 }
 
+static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
+					     const struct nlattr *nla);
+
 #define NFT_RULE_MAXEXPRS	128
 
 static int nf_tables_newrule(struct net *net, struct sock *nlsk,
@@ -2516,6 +2682,7 @@
 	u8 genmask = nft_genmask_next(net);
 	struct nft_expr_info *info = NULL;
 	int family = nfmsg->nfgen_family;
+	struct nft_flow_rule *flow;
 	struct nft_table *table;
 	struct nft_chain *chain;
 	struct nft_rule *rule, *old_rule = NULL;
@@ -2566,17 +2733,20 @@
 
 		if (chain->use == UINT_MAX)
 			return -EOVERFLOW;
-	}
 
-	if (nla[NFTA_RULE_POSITION]) {
-		if (!(nlh->nlmsg_flags & NLM_F_CREATE))
-			return -EOPNOTSUPP;
-
-		pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
-		old_rule = __nft_rule_lookup(chain, pos_handle);
-		if (IS_ERR(old_rule)) {
-			NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
-			return PTR_ERR(old_rule);
+		if (nla[NFTA_RULE_POSITION]) {
+			pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
+			old_rule = __nft_rule_lookup(chain, pos_handle);
+			if (IS_ERR(old_rule)) {
+				NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
+				return PTR_ERR(old_rule);
+			}
+		} else if (nla[NFTA_RULE_POSITION_ID]) {
+			old_rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_POSITION_ID]);
+			if (IS_ERR(old_rule)) {
+				NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION_ID]);
+				return PTR_ERR(old_rule);
+			}
 		}
 	}
 
@@ -2659,7 +2829,8 @@
 
 		list_add_tail_rcu(&rule->list, &old_rule->list);
 	} else {
-		if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
+		trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
+		if (!trans) {
 			err = -ENOMEM;
 			goto err2;
 		}
@@ -2682,13 +2853,24 @@
 	if (net->nft.validate_state == NFT_VALIDATE_DO)
 		return nft_table_validate(net, table);
 
+	if (chain->flags & NFT_CHAIN_HW_OFFLOAD) {
+		flow = nft_flow_rule_create(net, rule);
+		if (IS_ERR(flow))
+			return PTR_ERR(flow);
+
+		nft_trans_flow_rule(trans) = flow;
+	}
+
 	return 0;
 err2:
 	nf_tables_rule_release(&ctx, rule);
 err1:
 	for (i = 0; i < n; i++) {
-		if (info[i].ops != NULL)
+		if (info[i].ops) {
 			module_put(info[i].ops->type->owner);
+			if (info[i].ops->type->release_ops)
+				info[i].ops->type->release_ops(info[i].ops);
+		}
 	}
 	kvfree(info);
 	return err;
@@ -3072,9 +3254,7 @@
 
 static __be64 nf_jiffies64_to_msecs(u64 input)
 {
-	u64 ms = jiffies64_to_nsecs(input);
-
-	return cpu_to_be64(div_u64(ms, NSEC_PER_MSEC));
+	return cpu_to_be64(jiffies64_to_msecs(input));
 }
 
 static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
@@ -3139,7 +3319,7 @@
 	if (nla_put(skb, NFTA_SET_USERDATA, set->udlen, set->udata))
 		goto nla_put_failure;
 
-	desc = nla_nest_start(skb, NFTA_SET_DESC);
+	desc = nla_nest_start_noflag(skb, NFTA_SET_DESC);
 	if (desc == NULL)
 		goto nla_put_failure;
 	if (set->size &&
@@ -3317,15 +3497,14 @@
 	return err;
 }
 
-static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
-				    struct nft_set_desc *desc,
+static int nf_tables_set_desc_parse(struct nft_set_desc *desc,
 				    const struct nlattr *nla)
 {
 	struct nlattr *da[NFTA_SET_DESC_MAX + 1];
 	int err;
 
-	err = nla_parse_nested(da, NFTA_SET_DESC_MAX, nla,
-			       nft_set_desc_policy, NULL);
+	err = nla_parse_nested_deprecated(da, NFTA_SET_DESC_MAX, nla,
+					  nft_set_desc_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -3384,8 +3563,11 @@
 			      NFT_SET_OBJECT))
 			return -EINVAL;
 		/* Only one of these operations is supported */
-		if ((flags & (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT)) ==
-			     (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT))
+		if ((flags & (NFT_SET_MAP | NFT_SET_OBJECT)) ==
+			     (NFT_SET_MAP | NFT_SET_OBJECT))
+			return -EOPNOTSUPP;
+		if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) ==
+			     (NFT_SET_EVAL | NFT_SET_OBJECT))
 			return -EOPNOTSUPP;
 	}
 
@@ -3444,7 +3626,7 @@
 		policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
 
 	if (nla[NFTA_SET_DESC] != NULL) {
-		err = nf_tables_set_desc_parse(&ctx, &desc, nla[NFTA_SET_DESC]);
+		err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]);
 		if (err < 0)
 			return err;
 	}
@@ -3555,19 +3737,15 @@
 
 static void nft_set_destroy(struct nft_set *set)
 {
+	if (WARN_ON(set->use > 0))
+		return;
+
 	set->ops->destroy(set);
 	module_put(to_set_type(set->ops)->owner);
 	kfree(set->name);
 	kvfree(set);
 }
 
-static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
-{
-	list_del_rcu(&set->list);
-	nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC);
-	nft_set_destroy(set);
-}
-
 static int nf_tables_delset(struct net *net, struct sock *nlsk,
 			    struct sk_buff *skb, const struct nlmsghdr *nlh,
 			    const struct nlattr * const nla[],
@@ -3602,7 +3780,7 @@
 		NL_SET_BAD_ATTR(extack, attr);
 		return PTR_ERR(set);
 	}
-	if (!list_empty(&set->bindings) ||
+	if (set->use ||
 	    (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0)) {
 		NL_SET_BAD_ATTR(extack, attr);
 		return -EBUSY;
@@ -3632,6 +3810,9 @@
 	struct nft_set_binding *i;
 	struct nft_set_iter iter;
 
+	if (set->use == UINT_MAX)
+		return -EOVERFLOW;
+
 	if (!list_empty(&set->bindings) && nft_set_is_anonymous(set))
 		return -EBUSY;
 
@@ -3658,20 +3839,51 @@
 bind:
 	binding->chain = ctx->chain;
 	list_add_tail_rcu(&binding->list, &set->bindings);
+	nft_set_trans_bind(ctx, set);
+	set->use++;
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nf_tables_bind_set);
 
-void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
-			  struct nft_set_binding *binding)
+static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
+				 struct nft_set_binding *binding, bool event)
 {
 	list_del_rcu(&binding->list);
 
-	if (list_empty(&set->bindings) && nft_set_is_anonymous(set) &&
-	    nft_is_active(ctx->net, set))
-		nf_tables_set_destroy(ctx, set);
+	if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) {
+		list_del_rcu(&set->list);
+		if (event)
+			nf_tables_set_notify(ctx, set, NFT_MSG_DELSET,
+					     GFP_KERNEL);
+	}
 }
-EXPORT_SYMBOL_GPL(nf_tables_unbind_set);
+
+void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
+			      struct nft_set_binding *binding,
+			      enum nft_trans_phase phase)
+{
+	switch (phase) {
+	case NFT_TRANS_PREPARE:
+		set->use--;
+		return;
+	case NFT_TRANS_ABORT:
+	case NFT_TRANS_RELEASE:
+		set->use--;
+		/* fall through */
+	default:
+		nf_tables_unbind_set(ctx, set, binding,
+				     phase == NFT_TRANS_COMMIT);
+	}
+}
+EXPORT_SYMBOL_GPL(nf_tables_deactivate_set);
+
+void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set)
+{
+	if (list_empty(&set->bindings) && nft_set_is_anonymous(set))
+		nft_set_destroy(set);
+}
+EXPORT_SYMBOL_GPL(nf_tables_destroy_set);
 
 const struct nft_set_ext_type nft_set_ext_types[] = {
 	[NFT_SET_EXT_KEY]		= {
@@ -3715,6 +3927,7 @@
 	[NFTA_SET_ELEM_DATA]		= { .type = NLA_NESTED },
 	[NFTA_SET_ELEM_FLAGS]		= { .type = NLA_U32 },
 	[NFTA_SET_ELEM_TIMEOUT]		= { .type = NLA_U64 },
+	[NFTA_SET_ELEM_EXPIRATION]	= { .type = NLA_U64 },
 	[NFTA_SET_ELEM_USERDATA]	= { .type = NLA_BINARY,
 					    .len = NFT_USERDATA_MAXLEN },
 	[NFTA_SET_ELEM_EXPR]		= { .type = NLA_NESTED },
@@ -3760,7 +3973,7 @@
 	unsigned char *b = skb_tail_pointer(skb);
 	struct nlattr *nest;
 
-	nest = nla_nest_start(skb, NFTA_LIST_ELEM);
+	nest = nla_nest_start_noflag(skb, NFTA_LIST_ELEM);
 	if (nest == NULL)
 		goto nla_put_failure;
 
@@ -3780,7 +3993,7 @@
 
 	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) &&
 	    nla_put_string(skb, NFTA_SET_ELEM_OBJREF,
-			   (*nft_set_ext_obj(ext))->name) < 0)
+			   (*nft_set_ext_obj(ext))->key.name) < 0)
 		goto nla_put_failure;
 
 	if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
@@ -3904,7 +4117,7 @@
 	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name))
 		goto nla_put_failure;
 
-	nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS);
+	nest = nla_nest_start_noflag(skb, NFTA_SET_ELEM_LIST_ELEMENTS);
 	if (nest == NULL)
 		goto nla_put_failure;
 
@@ -3976,7 +4189,7 @@
 	if (nla_put_string(skb, NFTA_SET_NAME, set->name))
 		goto nla_put_failure;
 
-	nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS);
+	nest = nla_nest_start_noflag(skb, NFTA_SET_ELEM_LIST_ELEMENTS);
 	if (nest == NULL)
 		goto nla_put_failure;
 
@@ -4021,8 +4234,8 @@
 	void *priv;
 	int err;
 
-	err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
-			       nft_set_elem_policy, NULL);
+	err = nla_parse_nested_deprecated(nla, NFTA_SET_ELEM_MAX, attr,
+					  nft_set_elem_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -4168,7 +4381,7 @@
 void *nft_set_elem_init(const struct nft_set *set,
 			const struct nft_set_ext_tmpl *tmpl,
 			const u32 *key, const u32 *data,
-			u64 timeout, gfp_t gfp)
+			u64 timeout, u64 expiration, gfp_t gfp)
 {
 	struct nft_set_ext *ext;
 	void *elem;
@@ -4183,9 +4396,11 @@
 	memcpy(nft_set_ext_key(ext), key, set->klen);
 	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
 		memcpy(nft_set_ext_data(ext), data, set->dlen);
-	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION))
-		*nft_set_ext_expiration(ext) =
-			get_jiffies_64() + timeout;
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
+		*nft_set_ext_expiration(ext) = get_jiffies_64() + expiration;
+		if (expiration == 0)
+			*nft_set_ext_expiration(ext) += timeout;
+	}
 	if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT))
 		*nft_set_ext_timeout(ext) = timeout;
 
@@ -4250,11 +4465,12 @@
 	struct nft_trans *trans;
 	u32 flags = 0;
 	u64 timeout;
+	u64 expiration;
 	u8 ulen;
 	int err;
 
-	err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
-			       nft_set_elem_policy, NULL);
+	err = nla_parse_nested_deprecated(nla, NFTA_SET_ELEM_MAX, attr,
+					  nft_set_elem_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -4293,6 +4509,16 @@
 		timeout = set->timeout;
 	}
 
+	expiration = 0;
+	if (nla[NFTA_SET_ELEM_EXPIRATION] != NULL) {
+		if (!(set->flags & NFT_SET_TIMEOUT))
+			return -EINVAL;
+		err = nf_msecs_to_jiffies64(nla[NFTA_SET_ELEM_EXPIRATION],
+					    &expiration);
+		if (err)
+			return err;
+	}
+
 	err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &d1,
 			    nla[NFTA_SET_ELEM_KEY]);
 	if (err < 0)
@@ -4313,7 +4539,8 @@
 			err = -EINVAL;
 			goto err2;
 		}
-		obj = nft_obj_lookup(ctx->table, nla[NFTA_SET_ELEM_OBJREF],
+		obj = nft_obj_lookup(ctx->net, ctx->table,
+				     nla[NFTA_SET_ELEM_OBJREF],
 				     set->objtype, genmask);
 		if (IS_ERR(obj)) {
 			err = PTR_ERR(obj);
@@ -4374,7 +4601,7 @@
 
 	err = -ENOMEM;
 	elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, data.data,
-				      timeout, GFP_KERNEL);
+				      timeout, expiration, GFP_KERNEL);
 	if (elem.priv == NULL)
 		goto err3;
 
@@ -4435,6 +4662,8 @@
 err5:
 	kfree(trans);
 err4:
+	if (obj)
+		obj->use--;
 	kfree(elem.priv);
 err3:
 	if (nla[NFTA_SET_ELEM_DATA] != NULL)
@@ -4544,8 +4773,8 @@
 	void *priv;
 	int err;
 
-	err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
-			       nft_set_elem_policy, NULL);
+	err = nla_parse_nested_deprecated(nla, NFTA_SET_ELEM_MAX, attr,
+					  nft_set_elem_policy, NULL);
 	if (err < 0)
 		goto err1;
 
@@ -4574,7 +4803,7 @@
 
 	err = -ENOMEM;
 	elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, NULL, 0,
-				      GFP_KERNEL);
+				      0, GFP_KERNEL);
 	if (elem.priv == NULL)
 		goto err2;
 
@@ -4746,18 +4975,36 @@
 }
 EXPORT_SYMBOL_GPL(nft_unregister_obj);
 
-struct nft_object *nft_obj_lookup(const struct nft_table *table,
+struct nft_object *nft_obj_lookup(const struct net *net,
+				  const struct nft_table *table,
 				  const struct nlattr *nla, u32 objtype,
 				  u8 genmask)
 {
+	struct nft_object_hash_key k = { .table = table };
+	char search[NFT_OBJ_MAXNAMELEN];
+	struct rhlist_head *tmp, *list;
 	struct nft_object *obj;
 
-	list_for_each_entry_rcu(obj, &table->objects, list) {
-		if (!nla_strcmp(nla, obj->name) &&
-		    objtype == obj->ops->type->type &&
-		    nft_active_genmask(obj, genmask))
+	nla_strlcpy(search, nla, sizeof(search));
+	k.name = search;
+
+	WARN_ON_ONCE(!rcu_read_lock_held() &&
+		     !lockdep_commit_lock_is_held(net));
+
+	rcu_read_lock();
+	list = rhltable_lookup(&nft_objname_ht, &k, nft_objname_ht_params);
+	if (!list)
+		goto out;
+
+	rhl_for_each_entry_rcu(obj, tmp, list, rhlhead) {
+		if (objtype == obj->ops->type->type &&
+		    nft_active_genmask(obj, genmask)) {
+			rcu_read_unlock();
 			return obj;
+		}
 	}
+out:
+	rcu_read_unlock();
 	return ERR_PTR(-ENOENT);
 }
 EXPORT_SYMBOL_GPL(nft_obj_lookup);
@@ -4801,8 +5048,8 @@
 		goto err1;
 
 	if (attr) {
-		err = nla_parse_nested(tb, type->maxattr, attr, type->policy,
-				       NULL);
+		err = nla_parse_nested_deprecated(tb, type->maxattr, attr,
+						  type->policy, NULL);
 		if (err < 0)
 			goto err2;
 	} else {
@@ -4845,7 +5092,7 @@
 {
 	struct nlattr *nest;
 
-	nest = nla_nest_start(skb, attr);
+	nest = nla_nest_start_noflag(skb, attr);
 	if (!nest)
 		goto nla_put_failure;
 	if (obj->ops->dump(skb, obj, reset) < 0)
@@ -4888,6 +5135,38 @@
 	return ERR_PTR(-ENOENT);
 }
 
+static int nf_tables_updobj(const struct nft_ctx *ctx,
+			    const struct nft_object_type *type,
+			    const struct nlattr *attr,
+			    struct nft_object *obj)
+{
+	struct nft_object *newobj;
+	struct nft_trans *trans;
+	int err;
+
+	trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ,
+				sizeof(struct nft_trans_obj));
+	if (!trans)
+		return -ENOMEM;
+
+	newobj = nft_obj_init(ctx, type, attr);
+	if (IS_ERR(newobj)) {
+		err = PTR_ERR(newobj);
+		goto err_free_trans;
+	}
+
+	nft_trans_obj(trans) = obj;
+	nft_trans_obj_update(trans) = true;
+	nft_trans_obj_newobj(trans) = newobj;
+	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+	return 0;
+
+err_free_trans:
+	kfree(trans);
+	return err;
+}
+
 static int nf_tables_newobj(struct net *net, struct sock *nlsk,
 			    struct sk_buff *skb, const struct nlmsghdr *nlh,
 			    const struct nlattr * const nla[],
@@ -4915,7 +5194,7 @@
 	}
 
 	objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
-	obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
+	obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		if (err != -ENOENT) {
@@ -4927,7 +5206,13 @@
 			NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
 			return -EEXIST;
 		}
-		return 0;
+		if (nlh->nlmsg_flags & NLM_F_REPLACE)
+			return -EOPNOTSUPP;
+
+		type = nft_obj_type_get(net, objtype);
+		nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+
+		return nf_tables_updobj(&ctx, type, nla[NFTA_OBJ_DATA], obj);
 	}
 
 	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
@@ -4941,11 +5226,11 @@
 		err = PTR_ERR(obj);
 		goto err1;
 	}
-	obj->table = table;
+	obj->key.table = table;
 	obj->handle = nf_tables_alloc_handle(table);
 
-	obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
-	if (!obj->name) {
+	obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
+	if (!obj->key.name) {
 		err = -ENOMEM;
 		goto err2;
 	}
@@ -4954,11 +5239,20 @@
 	if (err < 0)
 		goto err3;
 
+	err = rhltable_insert(&nft_objname_ht, &obj->rhlhead,
+			      nft_objname_ht_params);
+	if (err < 0)
+		goto err4;
+
 	list_add_tail_rcu(&obj->list, &table->objects);
 	table->use++;
 	return 0;
+err4:
+	/* queued in transaction log */
+	INIT_LIST_HEAD(&obj->list);
+	return err;
 err3:
-	kfree(obj->name);
+	kfree(obj->key.name);
 err2:
 	if (obj->ops->destroy)
 		obj->ops->destroy(&ctx, obj);
@@ -4987,7 +5281,7 @@
 	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);
 
 	if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) ||
-	    nla_put_string(skb, NFTA_OBJ_NAME, obj->name) ||
+	    nla_put_string(skb, NFTA_OBJ_NAME, obj->key.name) ||
 	    nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
 	    nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
 	    nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset) ||
@@ -5142,7 +5436,7 @@
 	}
 
 	objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
-	obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
+	obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask);
 	if (IS_ERR(obj)) {
 		NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
 		return PTR_ERR(obj);
@@ -5173,7 +5467,7 @@
 		obj->ops->destroy(ctx, obj);
 
 	module_put(obj->ops->type->owner);
-	kfree(obj->name);
+	kfree(obj->key.name);
 	kfree(obj);
 }
 
@@ -5207,7 +5501,7 @@
 		obj = nft_obj_lookup_byhandle(table, attr, objtype, genmask);
 	} else {
 		attr = nla[NFTA_OBJ_NAME];
-		obj = nft_obj_lookup(table, attr, objtype, genmask);
+		obj = nft_obj_lookup(net, table, attr, objtype, genmask);
 	}
 
 	if (IS_ERR(obj)) {
@@ -5224,7 +5518,7 @@
 	return nft_delobj(&ctx, obj);
 }
 
-void nft_obj_notify(struct net *net, struct nft_table *table,
+void nft_obj_notify(struct net *net, const struct nft_table *table,
 		    struct nft_object *obj, u32 portid, u32 seq, int event,
 		    int family, int report, gfp_t gfp)
 {
@@ -5302,6 +5596,22 @@
 }
 EXPORT_SYMBOL_GPL(nft_flowtable_lookup);
 
+void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx,
+				    struct nft_flowtable *flowtable,
+				    enum nft_trans_phase phase)
+{
+	switch (phase) {
+	case NFT_TRANS_PREPARE:
+	case NFT_TRANS_ABORT:
+	case NFT_TRANS_RELEASE:
+		flowtable->use--;
+		/* fall through */
+	default:
+		return;
+	}
+}
+EXPORT_SYMBOL_GPL(nf_tables_deactivate_flowtable);
+
 static struct nft_flowtable *
 nft_flowtable_lookup_byhandle(const struct nft_table *table,
 			      const struct nlattr *nla, u8 genmask)
@@ -5369,8 +5679,8 @@
 	int hooknum, priority;
 	int err, n = 0, i;
 
-	err = nla_parse_nested(tb, NFTA_FLOWTABLE_HOOK_MAX, attr,
-			       nft_flowtable_hook_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, NFTA_FLOWTABLE_HOOK_MAX, attr,
+					  nft_flowtable_hook_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -5653,12 +5963,14 @@
 			 NFTA_FLOWTABLE_PAD))
 		goto nla_put_failure;
 
-	nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK);
+	nest = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK);
+	if (!nest)
+		goto nla_put_failure;
 	if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) ||
 	    nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority)))
 		goto nla_put_failure;
 
-	nest_devs = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK_DEVS);
+	nest_devs = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK_DEVS);
 	if (!nest_devs)
 		goto nla_put_failure;
 
@@ -6126,6 +6438,27 @@
 	return 0;
 }
 
+/* a drop policy has to be deferred until all rules have been activated,
+ * otherwise a large ruleset that contains a drop-policy base chain will
+ * cause all packets to get dropped until the full transaction has been
+ * processed.
+ *
+ * We defer the drop policy until the transaction has been finalized.
+ */
+static void nft_chain_commit_drop_policy(struct nft_trans *trans)
+{
+	struct nft_base_chain *basechain;
+
+	if (nft_trans_chain_policy(trans) != NF_DROP)
+		return;
+
+	if (!nft_is_base_chain(trans->ctx.chain))
+		return;
+
+	basechain = nft_base_chain(trans->ctx.chain);
+	basechain->policy = NF_DROP;
+}
+
 static void nft_chain_commit_update(struct nft_trans *trans)
 {
 	struct nft_base_chain *basechain;
@@ -6144,8 +6477,9 @@
 	if (!nft_is_base_chain(trans->ctx.chain))
 		return;
 
+	nft_chain_stats_replace(trans);
+
 	basechain = nft_base_chain(trans->ctx.chain);
-	nft_chain_stats_replace(basechain, nft_trans_chain_stats(trans));
 
 	switch (nft_trans_chain_policy(trans)) {
 	case NF_DROP:
@@ -6155,6 +6489,20 @@
 	}
 }
 
+static void nft_obj_commit_update(struct nft_trans *trans)
+{
+	struct nft_object *newobj;
+	struct nft_object *obj;
+
+	obj = nft_trans_obj(trans);
+	newobj = nft_trans_obj_newobj(trans);
+
+	if (obj->ops->update)
+		obj->ops->update(obj, newobj);
+
+	kfree(newobj);
+}
+
 static void nft_commit_release(struct nft_trans *trans)
 {
 	switch (trans->msg_type) {
@@ -6162,6 +6510,7 @@
 		nf_tables_table_destroy(&trans->ctx);
 		break;
 	case NFT_MSG_NEWCHAIN:
+		free_percpu(nft_trans_chain_stats(trans));
 		kfree(nft_trans_chain_name(trans));
 		break;
 	case NFT_MSG_DELCHAIN:
@@ -6185,19 +6534,28 @@
 		nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
 		break;
 	}
+
+	if (trans->put_net)
+		put_net(trans->ctx.net);
+
 	kfree(trans);
 }
 
-static void nf_tables_commit_release(struct net *net)
+static void nf_tables_trans_destroy_work(struct work_struct *w)
 {
 	struct nft_trans *trans, *next;
+	LIST_HEAD(head);
 
-	if (list_empty(&net->nft.commit_list))
+	spin_lock(&nf_tables_destroy_list_lock);
+	list_splice_init(&nf_tables_destroy_list, &head);
+	spin_unlock(&nf_tables_destroy_list_lock);
+
+	if (list_empty(&head))
 		return;
 
 	synchronize_rcu();
 
-	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+	list_for_each_entry_safe(trans, next, &head, list) {
 		list_del(&trans->list);
 		nft_commit_release(trans);
 	}
@@ -6319,6 +6677,12 @@
 		nf_tables_commit_chain_free_rules_old(g0);
 }
 
+static void nft_obj_del(struct nft_object *obj)
+{
+	rhltable_remove(&nft_objname_ht, &obj->rhlhead, nft_objname_ht_params);
+	list_del_rcu(&obj->list);
+}
+
 static void nft_chain_del(struct nft_chain *chain)
 {
 	struct nft_table *table = chain->table;
@@ -6328,17 +6692,58 @@
 	list_del_rcu(&chain->list);
 }
 
+static void nf_tables_commit_release(struct net *net)
+{
+	struct nft_trans *trans;
+
+	/* all side effects have to be made visible.
+	 * For example, if a chain named 'foo' has been deleted, a
+	 * new transaction must not find it anymore.
+	 *
+	 * Memory reclaim happens asynchronously from work queue
+	 * to prevent expensive synchronize_rcu() in commit phase.
+	 */
+	if (list_empty(&net->nft.commit_list)) {
+		mutex_unlock(&net->nft.commit_mutex);
+		return;
+	}
+
+	trans = list_last_entry(&net->nft.commit_list,
+				struct nft_trans, list);
+	get_net(trans->ctx.net);
+	WARN_ON_ONCE(trans->put_net);
+
+	trans->put_net = true;
+	spin_lock(&nf_tables_destroy_list_lock);
+	list_splice_tail_init(&net->nft.commit_list, &nf_tables_destroy_list);
+	spin_unlock(&nf_tables_destroy_list_lock);
+
+	mutex_unlock(&net->nft.commit_mutex);
+
+	schedule_work(&trans_destroy_work);
+}
+
 static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 {
 	struct nft_trans *trans, *next;
 	struct nft_trans_elem *te;
 	struct nft_chain *chain;
 	struct nft_table *table;
+	int err;
+
+	if (list_empty(&net->nft.commit_list)) {
+		mutex_unlock(&net->nft.commit_mutex);
+		return 0;
+	}
 
 	/* 0. Validate ruleset, otherwise roll back for error reporting. */
 	if (nf_tables_validate(net) < 0)
 		return -EAGAIN;
 
+	err = nft_flow_rule_offload_commit(net);
+	if (err < 0)
+		return err;
+
 	/* 1.  Allocate space for next generation rules_gen_X[] */
 	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
 		int ret;
@@ -6395,6 +6800,7 @@
 				nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
 				/* trans destroyed after rcu grace period */
 			} else {
+				nft_chain_commit_drop_policy(trans);
 				nft_clear(net, trans->ctx.chain);
 				nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
 				nft_trans_destroy(trans);
@@ -6419,6 +6825,9 @@
 			nf_tables_rule_notify(&trans->ctx,
 					      nft_trans_rule(trans),
 					      NFT_MSG_DELRULE);
+			nft_rule_expr_deactivate(&trans->ctx,
+						 nft_trans_rule(trans),
+						 NFT_TRANS_COMMIT);
 			break;
 		case NFT_MSG_NEWSET:
 			nft_clear(net, nft_trans_set(trans));
@@ -6458,13 +6867,21 @@
 			te->set->ndeact--;
 			break;
 		case NFT_MSG_NEWOBJ:
-			nft_clear(net, nft_trans_obj(trans));
-			nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans),
-					     NFT_MSG_NEWOBJ);
-			nft_trans_destroy(trans);
+			if (nft_trans_obj_update(trans)) {
+				nft_obj_commit_update(trans);
+				nf_tables_obj_notify(&trans->ctx,
+						     nft_trans_obj(trans),
+						     NFT_MSG_NEWOBJ);
+			} else {
+				nft_clear(net, nft_trans_obj(trans));
+				nf_tables_obj_notify(&trans->ctx,
+						     nft_trans_obj(trans),
+						     NFT_MSG_NEWOBJ);
+				nft_trans_destroy(trans);
+			}
 			break;
 		case NFT_MSG_DELOBJ:
-			list_del_rcu(&nft_trans_obj(trans)->list);
+			nft_obj_del(nft_trans_obj(trans));
 			nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans),
 					     NFT_MSG_DELOBJ);
 			break;
@@ -6486,9 +6903,8 @@
 		}
 	}
 
-	nf_tables_commit_release(net);
 	nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
-	mutex_unlock(&net->nft.commit_mutex);
+	nf_tables_commit_release(net);
 
 	return 0;
 }
@@ -6567,7 +6983,9 @@
 		case NFT_MSG_NEWRULE:
 			trans->ctx.chain->use--;
 			list_del_rcu(&nft_trans_rule(trans)->list);
-			nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans));
+			nft_rule_expr_deactivate(&trans->ctx,
+						 nft_trans_rule(trans),
+						 NFT_TRANS_ABORT);
 			break;
 		case NFT_MSG_DELRULE:
 			trans->ctx.chain->use++;
@@ -6577,6 +6995,10 @@
 			break;
 		case NFT_MSG_NEWSET:
 			trans->ctx.table->use--;
+			if (nft_trans_set_bound(trans)) {
+				nft_trans_destroy(trans);
+				break;
+			}
 			list_del_rcu(&nft_trans_set(trans)->list);
 			break;
 		case NFT_MSG_DELSET:
@@ -6585,8 +7007,11 @@
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWSETELEM:
+			if (nft_trans_elem_set_bound(trans)) {
+				nft_trans_destroy(trans);
+				break;
+			}
 			te = (struct nft_trans_elem *)trans->data;
-
 			te->set->ops->remove(net, te->set, &te->elem);
 			atomic_dec(&te->set->nelems);
 			break;
@@ -6600,8 +7025,13 @@
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWOBJ:
-			trans->ctx.table->use--;
-			list_del_rcu(&nft_trans_obj(trans)->list);
+			if (nft_trans_obj_update(trans)) {
+				kfree(nft_trans_obj_newobj(trans));
+				nft_trans_destroy(trans);
+			} else {
+				trans->ctx.table->use--;
+				nft_obj_del(nft_trans_obj(trans));
+			}
 			break;
 		case NFT_MSG_DELOBJ:
 			trans->ctx.table->use++;
@@ -6962,8 +7392,8 @@
 	struct nft_chain *chain;
 	int err;
 
-	err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy,
-			       NULL);
+	err = nla_parse_nested_deprecated(tb, NFTA_VERDICT_MAX, nla,
+					  nft_verdict_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -7021,7 +7451,7 @@
 {
 	struct nlattr *nest;
 
-	nest = nla_nest_start(skb, type);
+	nest = nla_nest_start_noflag(skb, type);
 	if (!nest)
 		goto nla_put_failure;
 
@@ -7093,7 +7523,8 @@
 	struct nlattr *tb[NFTA_DATA_MAX + 1];
 	int err;
 
-	err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, NFTA_DATA_MAX, nla,
+					  nft_data_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -7134,7 +7565,7 @@
 	struct nlattr *nest;
 	int err;
 
-	nest = nla_nest_start(skb, attr);
+	nest = nla_nest_start_noflag(skb, attr);
 	if (nest == NULL)
 		return -1;
 
@@ -7159,7 +7590,8 @@
 {
 	struct nft_rule *rule, *nr;
 
-	BUG_ON(!nft_is_base_chain(ctx->chain));
+	if (WARN_ON(!nft_is_base_chain(ctx->chain)))
+		return 0;
 
 	nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain);
 	list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
@@ -7193,9 +7625,6 @@
 
 		list_for_each_entry(chain, &table->chains, list)
 			nf_tables_unregister_hook(net, table, chain);
-		list_for_each_entry(flowtable, &table->flowtables, list)
-			nf_unregister_net_hooks(net, flowtable->ops,
-						flowtable->ops_len);
 		/* No packets are walking on these chains anymore. */
 		ctx.table = table;
 		list_for_each_entry(chain, &table->chains, list) {
@@ -7217,7 +7646,7 @@
 			nft_set_destroy(set);
 		}
 		list_for_each_entry_safe(obj, ne, &table->objects, list) {
-			list_del(&obj->list);
+			nft_obj_del(obj);
 			table->use--;
 			nft_obj_destroy(&ctx, obj);
 		}
@@ -7262,6 +7691,7 @@
 {
 	int err;
 
+	spin_lock_init(&nf_tables_destroy_list_lock);
 	err = register_pernet_subsys(&nf_tables_net_ops);
 	if (err < 0)
 		return err;
@@ -7278,12 +7708,26 @@
 	if (err < 0)
 		goto err3;
 
-	/* must be last */
-	err = nfnetlink_subsys_register(&nf_tables_subsys);
+	err = rhltable_init(&nft_objname_ht, &nft_objname_ht_params);
 	if (err < 0)
 		goto err4;
 
+	err = nft_offload_init();
+	if (err < 0)
+		goto err5;
+
+	/* must be last */
+	err = nfnetlink_subsys_register(&nf_tables_subsys);
+	if (err < 0)
+		goto err6;
+
+	nft_chain_route_init();
+
 	return err;
+err6:
+	nft_offload_exit();
+err5:
+	rhltable_destroy(&nft_objname_ht);
 err4:
 	unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
 err3:
@@ -7298,10 +7742,14 @@
 static void __exit nf_tables_module_exit(void)
 {
 	nfnetlink_subsys_unregister(&nf_tables_subsys);
+	nft_offload_exit();
 	unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
 	nft_chain_filter_fini();
+	nft_chain_route_fini();
 	unregister_pernet_subsys(&nf_tables_net_ops);
+	cancel_work_sync(&trans_destroy_work);
 	rcu_barrier();
+	rhltable_destroy(&nft_objname_ht);
 	nf_tables_core_module_exit();
 }
 

diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index ffd5c0f..96c74c4 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 
@@ -22,6 +19,7 @@
 #include <net/netfilter/nf_tables_core.h>
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nf_log.h>
+#include <net/netfilter/nft_meta.h>
 
 static noinline void __nft_trace_packet(struct nft_traceinfo *info,
 					const struct nft_chain *chain,
@@ -98,21 +96,23 @@
 					    const struct nft_pktinfo *pkt)
 {
 	struct nft_base_chain *base_chain;
+	struct nft_stats __percpu *pstats;
 	struct nft_stats *stats;
 
 	base_chain = nft_base_chain(chain);
-	if (!base_chain->stats)
-		return;
 
-	local_bh_disable();
-	stats = this_cpu_ptr(rcu_dereference(base_chain->stats));
-	if (stats) {
+	rcu_read_lock();
+	pstats = READ_ONCE(base_chain->stats);
+	if (pstats) {
+		local_bh_disable();
+		stats = this_cpu_ptr(pstats);
 		u64_stats_update_begin(&stats->syncp);
 		stats->pkts++;
 		stats->bytes += pkt->skb->len;
 		u64_stats_update_end(&stats->syncp);
+		local_bh_enable();
 	}
-	local_bh_enable();
+	rcu_read_unlock();
 }
 
 struct nft_jumpstack {
@@ -124,14 +124,25 @@
 			       struct nft_regs *regs,
 			       struct nft_pktinfo *pkt)
 {
+#ifdef CONFIG_RETPOLINE
 	unsigned long e = (unsigned long)expr->ops->eval;
+#define X(e, fun) \
+	do { if ((e) == (unsigned long)(fun)) \
+		return fun(expr, regs, pkt); } while (0)
 
-	if (e == (unsigned long)nft_meta_get_eval)
-		nft_meta_get_eval(expr, regs, pkt);
-	else if (e == (unsigned long)nft_lookup_eval)
-		nft_lookup_eval(expr, regs, pkt);
-	else
-		expr->ops->eval(expr, regs, pkt);
+	X(e, nft_payload_eval);
+	X(e, nft_cmp_eval);
+	X(e, nft_meta_get_eval);
+	X(e, nft_lookup_eval);
+	X(e, nft_range_eval);
+	X(e, nft_immediate_eval);
+	X(e, nft_byteorder_eval);
+	X(e, nft_dynset_eval);
+	X(e, nft_rt_get_eval);
+	X(e, nft_bitwise_eval);
+#undef  X
+#endif /* CONFIG_RETPOLINE */
+	expr->ops->eval(expr, regs, pkt);
 }
 
 unsigned int
@@ -210,7 +221,6 @@
 		chain = regs.verdict.chain;
 		goto do_chain;
 	case NFT_CONTINUE:
-		/* fall through */
 	case NFT_RETURN:
 		nft_trace_packet(&info, chain, rule,
 				 NFT_TRACETYPE_RETURN);
@@ -249,12 +259,24 @@
 	&nft_exthdr_type,
 };
 
+static struct nft_object_type *nft_basic_objects[] = {
+#ifdef CONFIG_NETWORK_SECMARK
+	&nft_secmark_obj_type,
+#endif
+};
+
 int __init nf_tables_core_module_init(void)
 {
-	int err, i;
+	int err, i, j = 0;
 
-	for (i = 0; i < ARRAY_SIZE(nft_basic_types); i++) {
-		err = nft_register_expr(nft_basic_types[i]);
+	for (i = 0; i < ARRAY_SIZE(nft_basic_objects); i++) {
+		err = nft_register_obj(nft_basic_objects[i]);
+		if (err)
+			goto err;
+	}
+
+	for (j = 0; j < ARRAY_SIZE(nft_basic_types); j++) {
+		err = nft_register_expr(nft_basic_types[j]);
 		if (err)
 			goto err;
 	}
@@ -262,8 +284,12 @@
 	return 0;
 
 err:
+	while (j-- > 0)
+		nft_unregister_expr(nft_basic_types[j]);
+
 	while (i-- > 0)
-		nft_unregister_expr(nft_basic_types[i]);
+		nft_unregister_obj(nft_basic_objects[i]);
+
 	return err;
 }
 
@@ -274,4 +300,8 @@
 	i = ARRAY_SIZE(nft_basic_types);
 	while (i-- > 0)
 		nft_unregister_expr(nft_basic_types[i]);
+
+	i = ARRAY_SIZE(nft_basic_objects);
+	while (i-- > 0)
+		nft_unregister_obj(nft_basic_objects[i]);
 }

diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
new file mode 100644
index 0000000..e25dab8
--- /dev/null
+++ b/net/netfilter/nf_tables_offload.c

@@ -0,0 +1,484 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <net/flow_offload.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
+#include <net/pkt_cls.h>
+
+static struct nft_flow_rule *nft_flow_rule_alloc(int num_actions)
+{
+	struct nft_flow_rule *flow;
+
+	flow = kzalloc(sizeof(struct nft_flow_rule), GFP_KERNEL);
+	if (!flow)
+		return NULL;
+
+	flow->rule = flow_rule_alloc(num_actions);
+	if (!flow->rule) {
+		kfree(flow);
+		return NULL;
+	}
+
+	flow->rule->match.dissector	= &flow->match.dissector;
+	flow->rule->match.mask		= &flow->match.mask;
+	flow->rule->match.key		= &flow->match.key;
+
+	return flow;
+}
+
+struct nft_flow_rule *nft_flow_rule_create(struct net *net,
+					   const struct nft_rule *rule)
+{
+	struct nft_offload_ctx *ctx;
+	struct nft_flow_rule *flow;
+	int num_actions = 0, err;
+	struct nft_expr *expr;
+
+	expr = nft_expr_first(rule);
+	while (expr->ops && expr != nft_expr_last(rule)) {
+		if (expr->ops->offload_flags & NFT_OFFLOAD_F_ACTION)
+			num_actions++;
+
+		expr = nft_expr_next(expr);
+	}
+
+	flow = nft_flow_rule_alloc(num_actions);
+	if (!flow)
+		return ERR_PTR(-ENOMEM);
+
+	expr = nft_expr_first(rule);
+
+	ctx = kzalloc(sizeof(struct nft_offload_ctx), GFP_KERNEL);
+	if (!ctx) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	ctx->net = net;
+	ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC;
+
+	while (expr->ops && expr != nft_expr_last(rule)) {
+		if (!expr->ops->offload) {
+			err = -EOPNOTSUPP;
+			goto err_out;
+		}
+		err = expr->ops->offload(ctx, flow, expr);
+		if (err < 0)
+			goto err_out;
+
+		expr = nft_expr_next(expr);
+	}
+	flow->proto = ctx->dep.l3num;
+	kfree(ctx);
+
+	return flow;
+err_out:
+	kfree(ctx);
+	nft_flow_rule_destroy(flow);
+
+	return ERR_PTR(err);
+}
+
+void nft_flow_rule_destroy(struct nft_flow_rule *flow)
+{
+	struct flow_action_entry *entry;
+	int i;
+
+	flow_action_for_each(i, entry, &flow->rule->action) {
+		switch (entry->id) {
+		case FLOW_ACTION_REDIRECT:
+		case FLOW_ACTION_MIRRED:
+			dev_put(entry->dev);
+			break;
+		default:
+			break;
+		}
+	}
+	kfree(flow->rule);
+	kfree(flow);
+}
+
+void nft_offload_set_dependency(struct nft_offload_ctx *ctx,
+				enum nft_offload_dep_type type)
+{
+	ctx->dep.type = type;
+}
+
+void nft_offload_update_dependency(struct nft_offload_ctx *ctx,
+				   const void *data, u32 len)
+{
+	switch (ctx->dep.type) {
+	case NFT_OFFLOAD_DEP_NETWORK:
+		WARN_ON(len != sizeof(__u16));
+		memcpy(&ctx->dep.l3num, data, sizeof(__u16));
+		break;
+	case NFT_OFFLOAD_DEP_TRANSPORT:
+		WARN_ON(len != sizeof(__u8));
+		memcpy(&ctx->dep.protonum, data, sizeof(__u8));
+		break;
+	default:
+		break;
+	}
+	ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC;
+}
+
+static void nft_flow_offload_common_init(struct flow_cls_common_offload *common,
+					 __be16 proto, int priority,
+					 struct netlink_ext_ack *extack)
+{
+	common->protocol = proto;
+	common->prio = priority;
+	common->extack = extack;
+}
+
+static int nft_setup_cb_call(struct nft_base_chain *basechain,
+			     enum tc_setup_type type, void *type_data)
+{
+	struct flow_block_cb *block_cb;
+	int err;
+
+	list_for_each_entry(block_cb, &basechain->flow_block.cb_list, list) {
+		err = block_cb->cb(type, type_data, block_cb->cb_priv);
+		if (err < 0)
+			return err;
+	}
+	return 0;
+}
+
+int nft_chain_offload_priority(struct nft_base_chain *basechain)
+{
+	if (basechain->ops.priority <= 0 ||
+	    basechain->ops.priority > USHRT_MAX)
+		return -1;
+
+	return 0;
+}
+
+static int nft_flow_offload_rule(struct nft_chain *chain,
+				 struct nft_rule *rule,
+				 struct nft_flow_rule *flow,
+				 enum flow_cls_command command)
+{
+	struct flow_cls_offload cls_flow = {};
+	struct nft_base_chain *basechain;
+	struct netlink_ext_ack extack;
+	__be16 proto = ETH_P_ALL;
+
+	if (!nft_is_base_chain(chain))
+		return -EOPNOTSUPP;
+
+	basechain = nft_base_chain(chain);
+
+	if (flow)
+		proto = flow->proto;
+
+	nft_flow_offload_common_init(&cls_flow.common, proto,
+				     basechain->ops.priority, &extack);
+	cls_flow.command = command;
+	cls_flow.cookie = (unsigned long) rule;
+	if (flow)
+		cls_flow.rule = flow->rule;
+
+	return nft_setup_cb_call(basechain, TC_SETUP_CLSFLOWER, &cls_flow);
+}
+
+static int nft_flow_offload_bind(struct flow_block_offload *bo,
+				 struct nft_base_chain *basechain)
+{
+	list_splice(&bo->cb_list, &basechain->flow_block.cb_list);
+	return 0;
+}
+
+static int nft_flow_offload_unbind(struct flow_block_offload *bo,
+				   struct nft_base_chain *basechain)
+{
+	struct flow_block_cb *block_cb, *next;
+
+	list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
+		list_del(&block_cb->list);
+		flow_block_cb_free(block_cb);
+	}
+
+	return 0;
+}
+
+static int nft_block_setup(struct nft_base_chain *basechain,
+			   struct flow_block_offload *bo,
+			   enum flow_block_command cmd)
+{
+	int err;
+
+	switch (cmd) {
+	case FLOW_BLOCK_BIND:
+		err = nft_flow_offload_bind(bo, basechain);
+		break;
+	case FLOW_BLOCK_UNBIND:
+		err = nft_flow_offload_unbind(bo, basechain);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		err = -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
+static int nft_block_offload_cmd(struct nft_base_chain *chain,
+				 struct net_device *dev,
+				 enum flow_block_command cmd)
+{
+	struct netlink_ext_ack extack = {};
+	struct flow_block_offload bo = {};
+	int err;
+
+	bo.net = dev_net(dev);
+	bo.block = &chain->flow_block;
+	bo.command = cmd;
+	bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+	bo.extack = &extack;
+	INIT_LIST_HEAD(&bo.cb_list);
+
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
+	if (err < 0)
+		return err;
+
+	return nft_block_setup(chain, &bo, cmd);
+}
+
+static void nft_indr_block_ing_cmd(struct net_device *dev,
+				   struct nft_base_chain *chain,
+				   flow_indr_block_bind_cb_t *cb,
+				   void *cb_priv,
+				   enum flow_block_command cmd)
+{
+	struct netlink_ext_ack extack = {};
+	struct flow_block_offload bo = {};
+
+	if (!chain)
+		return;
+
+	bo.net = dev_net(dev);
+	bo.block = &chain->flow_block;
+	bo.command = cmd;
+	bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+	bo.extack = &extack;
+	INIT_LIST_HEAD(&bo.cb_list);
+
+	cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
+
+	nft_block_setup(chain, &bo, cmd);
+}
+
+static int nft_indr_block_offload_cmd(struct nft_base_chain *chain,
+				      struct net_device *dev,
+				      enum flow_block_command cmd)
+{
+	struct flow_block_offload bo = {};
+	struct netlink_ext_ack extack = {};
+
+	bo.net = dev_net(dev);
+	bo.block = &chain->flow_block;
+	bo.command = cmd;
+	bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+	bo.extack = &extack;
+	INIT_LIST_HEAD(&bo.cb_list);
+
+	flow_indr_block_call(dev, &bo, cmd);
+
+	if (list_empty(&bo.cb_list))
+		return -EOPNOTSUPP;
+
+	return nft_block_setup(chain, &bo, cmd);
+}
+
+#define FLOW_SETUP_BLOCK TC_SETUP_BLOCK
+
+static int nft_flow_offload_chain(struct nft_chain *chain,
+				  u8 *ppolicy,
+				  enum flow_block_command cmd)
+{
+	struct nft_base_chain *basechain;
+	struct net_device *dev;
+	u8 policy;
+
+	if (!nft_is_base_chain(chain))
+		return -EOPNOTSUPP;
+
+	basechain = nft_base_chain(chain);
+	dev = basechain->ops.dev;
+	if (!dev)
+		return -EOPNOTSUPP;
+
+	policy = ppolicy ? *ppolicy : basechain->policy;
+
+	/* Only default policy to accept is supported for now. */
+	if (cmd == FLOW_BLOCK_BIND && policy == NF_DROP)
+		return -EOPNOTSUPP;
+
+	if (dev->netdev_ops->ndo_setup_tc)
+		return nft_block_offload_cmd(basechain, dev, cmd);
+	else
+		return nft_indr_block_offload_cmd(basechain, dev, cmd);
+}
+
+int nft_flow_rule_offload_commit(struct net *net)
+{
+	struct nft_trans *trans;
+	int err = 0;
+	u8 policy;
+
+	list_for_each_entry(trans, &net->nft.commit_list, list) {
+		if (trans->ctx.family != NFPROTO_NETDEV)
+			continue;
+
+		switch (trans->msg_type) {
+		case NFT_MSG_NEWCHAIN:
+			if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) ||
+			    nft_trans_chain_update(trans))
+				continue;
+
+			policy = nft_trans_chain_policy(trans);
+			err = nft_flow_offload_chain(trans->ctx.chain, &policy,
+						     FLOW_BLOCK_BIND);
+			break;
+		case NFT_MSG_DELCHAIN:
+			if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+				continue;
+
+			policy = nft_trans_chain_policy(trans);
+			err = nft_flow_offload_chain(trans->ctx.chain, &policy,
+						     FLOW_BLOCK_UNBIND);
+			break;
+		case NFT_MSG_NEWRULE:
+			if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+				continue;
+
+			if (trans->ctx.flags & NLM_F_REPLACE ||
+			    !(trans->ctx.flags & NLM_F_APPEND))
+				return -EOPNOTSUPP;
+
+			err = nft_flow_offload_rule(trans->ctx.chain,
+						    nft_trans_rule(trans),
+						    nft_trans_flow_rule(trans),
+						    FLOW_CLS_REPLACE);
+			nft_flow_rule_destroy(nft_trans_flow_rule(trans));
+			break;
+		case NFT_MSG_DELRULE:
+			if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+				continue;
+
+			err = nft_flow_offload_rule(trans->ctx.chain,
+						    nft_trans_rule(trans),
+						    nft_trans_flow_rule(trans),
+						    FLOW_CLS_DESTROY);
+			break;
+		}
+
+		if (err)
+			return err;
+	}
+
+	return err;
+}
+
+static struct nft_chain *__nft_offload_get_chain(struct net_device *dev)
+{
+	struct nft_base_chain *basechain;
+	struct net *net = dev_net(dev);
+	const struct nft_table *table;
+	struct nft_chain *chain;
+
+	list_for_each_entry(table, &net->nft.tables, list) {
+		if (table->family != NFPROTO_NETDEV)
+			continue;
+
+		list_for_each_entry(chain, &table->chains, list) {
+			if (!nft_is_base_chain(chain) ||
+			    !(chain->flags & NFT_CHAIN_HW_OFFLOAD))
+				continue;
+
+			basechain = nft_base_chain(chain);
+			if (strncmp(basechain->dev_name, dev->name, IFNAMSIZ))
+				continue;
+
+			return chain;
+		}
+	}
+
+	return NULL;
+}
+
+static void nft_indr_block_cb(struct net_device *dev,
+			      flow_indr_block_bind_cb_t *cb, void *cb_priv,
+			      enum flow_block_command cmd)
+{
+	struct net *net = dev_net(dev);
+	struct nft_chain *chain;
+
+	mutex_lock(&net->nft.commit_mutex);
+	chain = __nft_offload_get_chain(dev);
+	if (chain) {
+		struct nft_base_chain *basechain;
+
+		basechain = nft_base_chain(chain);
+		nft_indr_block_ing_cmd(dev, basechain, cb, cb_priv, cmd);
+	}
+	mutex_unlock(&net->nft.commit_mutex);
+}
+
+static void nft_offload_chain_clean(struct nft_chain *chain)
+{
+	struct nft_rule *rule;
+
+	list_for_each_entry(rule, &chain->rules, list) {
+		nft_flow_offload_rule(chain, rule,
+				      NULL, FLOW_CLS_DESTROY);
+	}
+
+	nft_flow_offload_chain(chain, NULL, FLOW_BLOCK_UNBIND);
+}
+
+static int nft_offload_netdev_event(struct notifier_block *this,
+				    unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct net *net = dev_net(dev);
+	struct nft_chain *chain;
+
+	mutex_lock(&net->nft.commit_mutex);
+	chain = __nft_offload_get_chain(dev);
+	if (chain)
+		nft_offload_chain_clean(chain);
+	mutex_unlock(&net->nft.commit_mutex);
+
+	return NOTIFY_DONE;
+}
+
+static struct flow_indr_block_ing_entry block_ing_entry = {
+	.cb	= nft_indr_block_cb,
+	.list	= LIST_HEAD_INIT(block_ing_entry.list),
+};
+
+static struct notifier_block nft_offload_netdev_notifier = {
+	.notifier_call	= nft_offload_netdev_event,
+};
+
+int nft_offload_init(void)
+{
+	int err;
+
+	err = register_netdevice_notifier(&nft_offload_netdev_notifier);
+	if (err < 0)
+		return err;
+
+	flow_indr_add_block_ing_cb(&block_ing_entry);
+
+	return 0;
+}
+
+void nft_offload_exit(void)
+{
+	flow_indr_del_block_ing_cb(&block_ing_entry);
+	unregister_netdevice_notifier(&nft_offload_netdev_notifier);
+}

diff --git a/net/netfilter/nf_tables_set_core.c b/net/netfilter/nf_tables_set_core.c
index 8147896..a9fce8d 100644
--- a/net/netfilter/nf_tables_set_core.c
+++ b/net/netfilter/nf_tables_set_core.c

@@ -1,4 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/module.h>
 #include <net/netfilter/nf_tables_core.h>
 
 static int __init nf_tables_set_module_init(void)

diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index e1dc527..87b36da 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * (C) 2015 Red Hat GmbH
  * Author: Florian Westphal <fw@strlen.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 9169134..4abbb45 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c

@@ -206,8 +206,9 @@
 			return -ENOMEM;
 		}
 
-		err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, attrlen,
-				ss->cb[cb_id].policy, extack);
+		err = nla_parse_deprecated(cda, ss->cb[cb_id].attr_count,
+					   attr, attrlen,
+					   ss->cb[cb_id].policy, extack);
 		if (err < 0) {
 			rcu_read_unlock();
 			return err;
@@ -421,8 +422,10 @@
 				goto ack;
 			}
 
-			err = nla_parse(cda, ss->cb[cb_id].attr_count, attr,
-					attrlen, ss->cb[cb_id].policy, NULL);
+			err = nla_parse_deprecated(cda,
+						   ss->cb[cb_id].attr_count,
+						   attr, attrlen,
+						   ss->cb[cb_id].policy, NULL);
 			if (err < 0)
 				goto ack;
 
@@ -520,8 +523,8 @@
 	if (skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg))
 		return;
 
-	err = nla_parse(cda, NFNL_BATCH_MAX, attr, attrlen, nfnl_batch_policy,
-			NULL);
+	err = nla_parse_deprecated(cda, NFNL_BATCH_MAX, attr, attrlen,
+				   nfnl_batch_policy, NULL);
 	if (err < 0) {
 		netlink_ack(skb, nlh, err, NULL);
 		return;
@@ -575,7 +578,7 @@
 	ss = nfnetlink_get_subsys(type << 8);
 	rcu_read_unlock();
 	if (!ss)
-		request_module("nfnetlink-subsys-%d", type);
+		request_module_nowait("nfnetlink-subsys-%d", type);
 	return 0;
 }
 #endif

diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 8fa8bf7..2481470 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org>
  * (C) 2011 Intra2net AG <http://www.intra2net.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation (or any later at your option).
  */
 #include <linux/init.h>
 #include <linux/module.h>
@@ -248,8 +245,8 @@
 	if (!attr)
 		return 0;
 
-	err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy,
-			       NULL);
+	err = nla_parse_nested_deprecated(tb, NFACCT_FILTER_MAX, attr,
+					  filter_policy, NULL);
 	if (err < 0)
 		return err;
 

diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index e5d27b2..7525063 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * (C) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation (or any later at your option).
- *
  * This software has been sponsored by Vyatta Inc. <http://www.vyatta.com>
  */
 #include <linux/init.h>
@@ -78,8 +75,8 @@
 	int err;
 	struct nlattr *tb[NFCTH_TUPLE_MAX+1];
 
-	err = nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr,
-			       nfnl_cthelper_tuple_pol, NULL);
+	err = nla_parse_nested_deprecated(tb, NFCTH_TUPLE_MAX, attr,
+					  nfnl_cthelper_tuple_pol, NULL);
 	if (err < 0)
 		return err;
 
@@ -139,8 +136,8 @@
 	int err;
 	struct nlattr *tb[NFCTH_POLICY_MAX+1];
 
-	err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr,
-			       nfnl_cthelper_expect_pol, NULL);
+	err = nla_parse_nested_deprecated(tb, NFCTH_POLICY_MAX, attr,
+					  nfnl_cthelper_expect_pol, NULL);
 	if (err < 0)
 		return err;
 
@@ -176,8 +173,9 @@
 	struct nlattr *tb[NFCTH_POLICY_SET_MAX+1];
 	unsigned int class_max;
 
-	ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr,
-			       nfnl_cthelper_expect_policy_set, NULL);
+	ret = nla_parse_nested_deprecated(tb, NFCTH_POLICY_SET_MAX, attr,
+					  nfnl_cthelper_expect_policy_set,
+					  NULL);
 	if (ret < 0)
 		return ret;
 
@@ -289,8 +287,8 @@
 	struct nlattr *tb[NFCTH_POLICY_MAX + 1];
 	int err;
 
-	err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr,
-			       nfnl_cthelper_expect_pol, NULL);
+	err = nla_parse_nested_deprecated(tb, NFCTH_POLICY_MAX, attr,
+					  nfnl_cthelper_expect_pol, NULL);
 	if (err < 0)
 		return err;
 
@@ -361,8 +359,9 @@
 	unsigned int class_max;
 	int err;
 
-	err = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr,
-			       nfnl_cthelper_expect_policy_set, NULL);
+	err = nla_parse_nested_deprecated(tb, NFCTH_POLICY_SET_MAX, attr,
+					  nfnl_cthelper_expect_policy_set,
+					  NULL);
 	if (err < 0)
 		return err;
 
@@ -462,7 +461,7 @@
 {
 	struct nlattr *nest_parms;
 
-	nest_parms = nla_nest_start(skb, NFCTH_TUPLE | NLA_F_NESTED);
+	nest_parms = nla_nest_start(skb, NFCTH_TUPLE);
 	if (nest_parms == NULL)
 		goto nla_put_failure;
 
@@ -487,7 +486,7 @@
 	int i;
 	struct nlattr *nest_parms1, *nest_parms2;
 
-	nest_parms1 = nla_nest_start(skb, NFCTH_POLICY | NLA_F_NESTED);
+	nest_parms1 = nla_nest_start(skb, NFCTH_POLICY);
 	if (nest_parms1 == NULL)
 		goto nla_put_failure;
 
@@ -496,8 +495,7 @@
 		goto nla_put_failure;
 
 	for (i = 0; i < helper->expect_class_max + 1; i++) {
-		nest_parms2 = nla_nest_start(skb,
-				(NFCTH_POLICY_SET+i) | NLA_F_NESTED);
+		nest_parms2 = nla_nest_start(skb, (NFCTH_POLICY_SET + i));
 		if (nest_parms2 == NULL)
 			goto nla_put_failure;
 

diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index a30f8ba..da915c2 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org>
  * (C) 2012 by Vyatta Inc. <http://www.vyatta.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation (or any later at your option).
  */
 #include <linux/init.h>
 #include <linux/module.h>
@@ -53,17 +50,17 @@
 	struct nlattr **tb;
 	int ret = 0;
 
-	if (!l4proto->ctnl_timeout.nlattr_to_obj)
-		return 0;
-
 	tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb),
 		     GFP_KERNEL);
 
 	if (!tb)
 		return -ENOMEM;
 
-	ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, attr,
-			       l4proto->ctnl_timeout.nla_policy, NULL);
+	ret = nla_parse_nested_deprecated(tb,
+					  l4proto->ctnl_timeout.nlattr_max,
+					  attr,
+					  l4proto->ctnl_timeout.nla_policy,
+					  NULL);
 	if (ret < 0)
 		goto err;
 
@@ -125,7 +122,7 @@
 		return -EBUSY;
 	}
 
-	l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+	l4proto = nf_ct_l4proto_find(l4num);
 
 	/* This protocol is not supportted, skip. */
 	if (l4proto->l4proto != l4num) {
@@ -155,7 +152,6 @@
 err:
 	kfree(timeout);
 err_proto_put:
-	nf_ct_l4proto_put(l4proto);
 	return ret;
 }
 
@@ -167,6 +163,8 @@
 	struct nfgenmsg *nfmsg;
 	unsigned int flags = portid ? NLM_F_MULTI : 0;
 	const struct nf_conntrack_l4proto *l4proto = timeout->timeout.l4proto;
+	struct nlattr *nest_parms;
+	int ret;
 
 	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
@@ -186,22 +184,15 @@
 			 htonl(refcount_read(&timeout->refcnt))))
 		goto nla_put_failure;
 
-	if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
-		struct nlattr *nest_parms;
-		int ret;
+	nest_parms = nla_nest_start(skb, CTA_TIMEOUT_DATA);
+	if (!nest_parms)
+		goto nla_put_failure;
 
-		nest_parms = nla_nest_start(skb,
-					    CTA_TIMEOUT_DATA | NLA_F_NESTED);
-		if (!nest_parms)
-			goto nla_put_failure;
+	ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->timeout.data);
+	if (ret < 0)
+		goto nla_put_failure;
 
-		ret = l4proto->ctnl_timeout.obj_to_nlattr(skb,
-							&timeout->timeout.data);
-		if (ret < 0)
-			goto nla_put_failure;
-
-		nla_nest_end(skb, nest_parms);
-	}
+	nla_nest_end(skb, nest_parms);
 
 	nlmsg_end(skb, nlh);
 	return skb->len;
@@ -310,7 +301,6 @@
 	if (refcount_dec_if_one(&timeout->refcnt)) {
 		/* We are protected by nfnl mutex. */
 		list_del_rcu(&timeout->head);
-		nf_ct_l4proto_put(timeout->timeout.l4proto);
 		nf_ct_untimeout(net, &timeout->timeout);
 		kfree_rcu(timeout, rcu_head);
 	} else {
@@ -358,7 +348,6 @@
 				 struct netlink_ext_ack *extack)
 {
 	const struct nf_conntrack_l4proto *l4proto;
-	__u16 l3num;
 	__u8 l4num;
 	int ret;
 
@@ -367,9 +356,8 @@
 	    !cda[CTA_TIMEOUT_DATA])
 		return -EINVAL;
 
-	l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
 	l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
-	l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+	l4proto = nf_ct_l4proto_find(l4num);
 
 	/* This protocol is not supported, skip. */
 	if (l4proto->l4proto != l4num) {
@@ -382,21 +370,22 @@
 	if (ret < 0)
 		goto err;
 
-	nf_ct_l4proto_put(l4proto);
 	return 0;
 err:
-	nf_ct_l4proto_put(l4proto);
 	return ret;
 }
 
 static int
 cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
-			    u32 seq, u32 type, int event,
-			    const struct nf_conntrack_l4proto *l4proto)
+			    u32 seq, u32 type, int event, u16 l3num,
+			    const struct nf_conntrack_l4proto *l4proto,
+			    const unsigned int *timeouts)
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
 	unsigned int flags = portid ? NLM_F_MULTI : 0;
+	struct nlattr *nest_parms;
+	int ret;
 
 	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
@@ -408,25 +397,19 @@
 	nfmsg->version = NFNETLINK_V0;
 	nfmsg->res_id = 0;
 
-	if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l4proto->l3proto)) ||
+	if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l3num)) ||
 	    nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto))
 		goto nla_put_failure;
 
-	if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
-		struct nlattr *nest_parms;
-		int ret;
+	nest_parms = nla_nest_start(skb, CTA_TIMEOUT_DATA);
+	if (!nest_parms)
+		goto nla_put_failure;
 
-		nest_parms = nla_nest_start(skb,
-					    CTA_TIMEOUT_DATA | NLA_F_NESTED);
-		if (!nest_parms)
-			goto nla_put_failure;
+	ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts);
+	if (ret < 0)
+		goto nla_put_failure;
 
-		ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, NULL);
-		if (ret < 0)
-			goto nla_put_failure;
-
-		nla_nest_end(skb, nest_parms);
-	}
+	nla_nest_end(skb, nest_parms);
 
 	nlmsg_end(skb, nlh);
 	return skb->len;
@@ -444,6 +427,7 @@
 				 struct netlink_ext_ack *extack)
 {
 	const struct nf_conntrack_l4proto *l4proto;
+	unsigned int *timeouts = NULL;
 	struct sk_buff *skb2;
 	int ret, err;
 	__u16 l3num;
@@ -454,14 +438,52 @@
 
 	l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
 	l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
-	l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+	l4proto = nf_ct_l4proto_find(l4num);
 
-	/* This protocol is not supported, skip. */
-	if (l4proto->l4proto != l4num) {
-		err = -EOPNOTSUPP;
+	err = -EOPNOTSUPP;
+	if (l4proto->l4proto != l4num)
 		goto err;
+
+	switch (l4proto->l4proto) {
+	case IPPROTO_ICMP:
+		timeouts = &nf_icmp_pernet(net)->timeout;
+		break;
+	case IPPROTO_TCP:
+		timeouts = nf_tcp_pernet(net)->timeouts;
+		break;
+	case IPPROTO_UDP: /* fallthrough */
+	case IPPROTO_UDPLITE:
+		timeouts = nf_udp_pernet(net)->timeouts;
+		break;
+	case IPPROTO_DCCP:
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+		timeouts = nf_dccp_pernet(net)->dccp_timeout;
+#endif
+		break;
+	case IPPROTO_ICMPV6:
+		timeouts = &nf_icmpv6_pernet(net)->timeout;
+		break;
+	case IPPROTO_SCTP:
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+		timeouts = nf_sctp_pernet(net)->timeouts;
+#endif
+		break;
+	case IPPROTO_GRE:
+#ifdef CONFIG_NF_CT_PROTO_GRE
+		timeouts = nf_gre_pernet(net)->timeouts;
+#endif
+		break;
+	case 255:
+		timeouts = &nf_generic_pernet(net)->timeout;
+		break;
+	default:
+		WARN_ONCE(1, "Missing timeouts for proto %d", l4proto->l4proto);
+		break;
 	}
 
+	if (!timeouts)
+		goto err;
+
 	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (skb2 == NULL) {
 		err = -ENOMEM;
@@ -472,7 +494,7 @@
 					  nlh->nlmsg_seq,
 					  NFNL_MSG_TYPE(nlh->nlmsg_type),
 					  IPCTNL_MSG_TIMEOUT_DEFAULT_SET,
-					  l4proto);
+					  l3num, l4proto, timeouts);
 	if (ret <= 0) {
 		kfree_skb(skb2);
 		err = -ENOMEM;
@@ -485,7 +507,6 @@
 	/* this avoids a loop in nfnetlink. */
 	return ret == -EAGAIN ? -ENOBUFS : ret;
 err:
-	nf_ct_l4proto_put(l4proto);
 	return err;
 }
 
@@ -566,7 +587,6 @@
 
 	list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) {
 		list_del_rcu(&cur->head);
-		nf_ct_l4proto_put(cur->timeout.l4proto);
 
 		if (refcount_dec_and_test(&cur->refcnt))
 			kfree_rcu(cur, rcu_head);

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 332c69d..0ba020c 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * This is a module which is used for logging packets to userspace via
  * nfetlink.
@@ -7,10 +8,6 @@
  *
  * Based on the old ipv4-only ipt_ULOG.c:
  * (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -148,7 +145,7 @@
 instance_put(struct nfulnl_instance *inst)
 {
 	if (inst && refcount_dec_and_test(&inst->use))
-		call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu);
+		call_rcu(&inst->rcu, nfulnl_instance_free_rcu);
 }
 
 static void nfulnl_timer(struct timer_list *t);
@@ -388,6 +385,57 @@
 	instance_put(inst);
 }
 
+static u32 nfulnl_get_bridge_size(const struct sk_buff *skb)
+{
+	u32 size = 0;
+
+	if (!skb_mac_header_was_set(skb))
+		return 0;
+
+	if (skb_vlan_tag_present(skb)) {
+		size += nla_total_size(0); /* nested */
+		size += nla_total_size(sizeof(u16)); /* id */
+		size += nla_total_size(sizeof(u16)); /* tag */
+	}
+
+	if (skb->network_header > skb->mac_header)
+		size += nla_total_size(skb->network_header - skb->mac_header);
+
+	return size;
+}
+
+static int nfulnl_put_bridge(struct nfulnl_instance *inst, const struct sk_buff *skb)
+{
+	if (!skb_mac_header_was_set(skb))
+		return 0;
+
+	if (skb_vlan_tag_present(skb)) {
+		struct nlattr *nest;
+
+		nest = nla_nest_start(inst->skb, NFULA_VLAN);
+		if (!nest)
+			goto nla_put_failure;
+
+		if (nla_put_be16(inst->skb, NFULA_VLAN_TCI, htons(skb->vlan_tci)) ||
+		    nla_put_be16(inst->skb, NFULA_VLAN_PROTO, skb->vlan_proto))
+			goto nla_put_failure;
+
+		nla_nest_end(inst->skb, nest);
+	}
+
+	if (skb->mac_header < skb->network_header) {
+		int len = (int)(skb->network_header - skb->mac_header);
+
+		if (nla_put(inst->skb, NFULA_L2HDR, len, skb_mac_header(skb)))
+			goto nla_put_failure;
+	}
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
 /* This is an inline function, we don't really care about a long
  * list of arguments */
 static inline int
@@ -540,7 +588,7 @@
 			goto nla_put_failure;
 	}
 
-	if (skb->tstamp) {
+	if (hooknum <= NF_INET_FORWARD && skb->tstamp) {
 		struct nfulnl_msg_packet_timestamp ts;
 		struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
 		ts.sec = cpu_to_be64(kts.tv_sec);
@@ -583,6 +631,10 @@
 				 NFULA_CT, NFULA_CT_INFO) < 0)
 		goto nla_put_failure;
 
+	if ((pf == NFPROTO_NETDEV || pf == NFPROTO_BRIDGE) &&
+	    nfulnl_put_bridge(inst, skb) < 0)
+		goto nla_put_failure;
+
 	if (data_len) {
 		struct nlattr *nla;
 		int size = nla_attr_size(data_len);
@@ -654,7 +706,7 @@
 	/* FIXME: do we want to make the size calculation conditional based on
 	 * what is actually present?  way more branches and checks, but more
 	 * memory efficient... */
-	size =    nlmsg_total_size(sizeof(struct nfgenmsg))
+	size = nlmsg_total_size(sizeof(struct nfgenmsg))
 		+ nla_total_size(sizeof(struct nfulnl_msg_packet_hdr))
 		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
 		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
@@ -671,7 +723,7 @@
 		+ nla_total_size(sizeof(struct nfgenmsg));	/* NLMSG_DONE */
 
 	if (in && skb_mac_header_was_set(skb)) {
-		size +=   nla_total_size(skb->dev->hard_header_len)
+		size += nla_total_size(skb->dev->hard_header_len)
 			+ nla_total_size(sizeof(u_int16_t))	/* hwtype */
 			+ nla_total_size(sizeof(u_int16_t));	/* hwlen */
 	}
@@ -690,6 +742,8 @@
 				size += nfnl_ct->build_size(ct);
 		}
 	}
+	if (pf == NFPROTO_NETDEV || pf == NFPROTO_BRIDGE)
+		size += nfulnl_get_bridge_size(skb);
 
 	qthreshold = inst->qthreshold;
 	/* per-rule qthreshold overrides per-instance */

diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index 00db27d..9f5dea0 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/kernel.h>
@@ -30,32 +31,26 @@
 static inline int nf_osf_ttl(const struct sk_buff *skb,
 			     int ttl_check, unsigned char f_ttl)
 {
+	struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
 	const struct iphdr *ip = ip_hdr(skb);
+	const struct in_ifaddr *ifa;
+	int ret = 0;
 
-	if (ttl_check != -1) {
-		if (ttl_check == NF_OSF_TTL_TRUE)
-			return ip->ttl == f_ttl;
-		if (ttl_check == NF_OSF_TTL_NOCHECK)
-			return 1;
-		else if (ip->ttl <= f_ttl)
-			return 1;
-		else {
-			struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
-			int ret = 0;
+	if (ttl_check == NF_OSF_TTL_TRUE)
+		return ip->ttl == f_ttl;
+	if (ttl_check == NF_OSF_TTL_NOCHECK)
+		return 1;
+	else if (ip->ttl <= f_ttl)
+		return 1;
 
-			for_ifa(in_dev) {
-				if (inet_ifa_match(ip->saddr, ifa)) {
-					ret = (ip->ttl == f_ttl);
-					break;
-				}
-			}
-			endfor_ifa(in_dev);
-
-			return ret;
+	in_dev_for_each_ifa_rcu(ifa, in_dev) {
+		if (inet_ifa_match(ip->saddr, ifa)) {
+			ret = (ip->ttl == f_ttl);
+			break;
 		}
 	}
 
-	return ip->ttl == f_ttl;
+	return ret;
 }
 
 struct nf_osf_hdr_ctx {
@@ -71,6 +66,7 @@
 			     int ttl_check,
 			     struct nf_osf_hdr_ctx *ctx)
 {
+	const __u8 *optpinit = ctx->optp;
 	unsigned int check_WSS = 0;
 	int fmatch = FMATCH_WRONG;
 	int foptsize, optnum;
@@ -160,6 +156,9 @@
 		}
 	}
 
+	if (fmatch != FMATCH_OK)
+		ctx->optp = optpinit;
+
 	return fmatch == FMATCH_OK;
 }
 
@@ -213,7 +212,7 @@
 	if (!tcp)
 		return false;
 
-	ttl_check = (info->flags & NF_OSF_TTL) ? info->ttl : -1;
+	ttl_check = (info->flags & NF_OSF_TTL) ? info->ttl : 0;
 
 	list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) {
 
@@ -256,8 +255,9 @@
 }
 EXPORT_SYMBOL_GPL(nf_osf_match);
 
-const char *nf_osf_find(const struct sk_buff *skb,
-			const struct list_head *nf_osf_fingers)
+bool nf_osf_find(const struct sk_buff *skb,
+		 const struct list_head *nf_osf_fingers,
+		 const int ttl_check, struct nf_osf_data *data)
 {
 	const struct iphdr *ip = ip_hdr(skb);
 	const struct nf_osf_user_finger *f;
@@ -265,24 +265,24 @@
 	const struct nf_osf_finger *kf;
 	struct nf_osf_hdr_ctx ctx;
 	const struct tcphdr *tcp;
-	const char *genre = NULL;
 
 	memset(&ctx, 0, sizeof(ctx));
 
 	tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts);
 	if (!tcp)
-		return NULL;
+		return false;
 
 	list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) {
 		f = &kf->finger;
-		if (!nf_osf_match_one(skb, f, -1, &ctx))
+		if (!nf_osf_match_one(skb, f, ttl_check, &ctx))
 			continue;
 
-		genre = f->genre;
+		data->genre = f->genre;
+		data->version = f->version;
 		break;
 	}
 
-	return genre;
+	return true;
 }
 EXPORT_SYMBOL_GPL(nf_osf_find);
 

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index d33094f..feabdfb 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * This is a module which is used for queueing packets and communicating with
  * userspace via nfnetlink.
@@ -8,11 +9,6 @@
  * Based on the old ipv4-only ip_queue.c:
  * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
  * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -351,7 +347,7 @@
 	if (skb_vlan_tag_present(entskb)) {
 		struct nlattr *nest;
 
-		nest = nla_nest_start(skb, NFQA_VLAN | NLA_F_NESTED);
+		nest = nla_nest_start(skb, NFQA_VLAN);
 		if (!nest)
 			goto nla_put_failure;
 
@@ -398,7 +394,7 @@
 	char *secdata = NULL;
 	u32 seclen = 0;
 
-	size =    nlmsg_total_size(sizeof(struct nfgenmsg))
+	size = nlmsg_total_size(sizeof(struct nfgenmsg))
 		+ nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
 		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
 		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
@@ -457,7 +453,7 @@
 	}
 
 	if (queue->flags & NFQA_CFG_F_UID_GID) {
-		size +=  (nla_total_size(sizeof(u_int32_t))	/* uid */
+		size += (nla_total_size(sizeof(u_int32_t))	/* uid */
 			+ nla_total_size(sizeof(u_int32_t)));	/* gid */
 	}
 
@@ -582,7 +578,7 @@
 	if (nfqnl_put_bridge(entry, skb) < 0)
 		goto nla_put_failure;
 
-	if (entskb->tstamp) {
+	if (entry->state.hook <= NF_INET_FORWARD && entskb->tstamp) {
 		struct nfqnl_msg_packet_timestamp ts;
 		struct timespec64 kts = ktime_to_timespec64(entskb->tstamp);
 
@@ -727,13 +723,13 @@
  */
 static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
 {
-	if (skb->nf_bridge)
+	if (nf_bridge_info_get(skb))
 		__skb_push(skb, skb->network_header - skb->mac_header);
 }
 
 static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
 {
-	if (skb->nf_bridge)
+	if (nf_bridge_info_get(skb))
 		__skb_pull(skb, skb->network_header - skb->mac_header);
 }
 #else
@@ -765,7 +761,7 @@
 		return ret;
 	}
 
-	skb->next = NULL;
+	skb_mark_not_on_list(skb);
 
 	entry_seg = nf_queue_entry_dup(entry);
 	if (entry_seg) {
@@ -863,7 +859,7 @@
 		}
 		skb_put(e->skb, diff);
 	}
-	if (!skb_make_writable(e->skb, data_len))
+	if (skb_ensure_writable(e->skb, data_len))
 		return -ENOMEM;
 	skb_copy_to_linear_data(e->skb, data, data_len);
 	e->skb->ip_summed = CHECKSUM_NONE;
@@ -904,23 +900,22 @@
 static int
 dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
 {
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+	int physinif, physoutif;
+
+	physinif = nf_bridge_get_physinif(entry->skb);
+	physoutif = nf_bridge_get_physoutif(entry->skb);
+
+	if (physinif == ifindex || physoutif == ifindex)
+		return 1;
+#endif
 	if (entry->state.in)
 		if (entry->state.in->ifindex == ifindex)
 			return 1;
 	if (entry->state.out)
 		if (entry->state.out->ifindex == ifindex)
 			return 1;
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-	if (entry->skb->nf_bridge) {
-		int physinif, physoutif;
 
-		physinif = nf_bridge_get_physinif(entry->skb);
-		physoutif = nf_bridge_get_physoutif(entry->skb);
-
-		if (physinif == ifindex || physoutif == ifindex)
-			return 1;
-	}
-#endif
 	return 0;
 }
 
@@ -1140,16 +1135,18 @@
 		struct nlattr *tb[NFQA_VLAN_MAX + 1];
 		int err;
 
-		err = nla_parse_nested(tb, NFQA_VLAN_MAX, nfqa[NFQA_VLAN],
-				       nfqa_vlan_policy, NULL);
+		err = nla_parse_nested_deprecated(tb, NFQA_VLAN_MAX,
+						  nfqa[NFQA_VLAN],
+						  nfqa_vlan_policy, NULL);
 		if (err < 0)
 			return err;
 
 		if (!tb[NFQA_VLAN_TCI] || !tb[NFQA_VLAN_PROTO])
 			return -EINVAL;
 
-		entry->skb->vlan_tci = ntohs(nla_get_be16(tb[NFQA_VLAN_TCI]));
-		entry->skb->vlan_proto = nla_get_be16(tb[NFQA_VLAN_PROTO]);
+		__vlan_hwaccel_put_tag(entry->skb,
+			nla_get_be16(tb[NFQA_VLAN_PROTO]),
+			ntohs(nla_get_be16(tb[NFQA_VLAN_TCI])));
 	}
 
 	if (nfqa[NFQA_L2HDR]) {

diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index fff8073..02afa75 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 
@@ -16,6 +13,7 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables_core.h>
 #include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
 
 struct nft_bitwise {
 	enum nft_registers	sreg:8;
@@ -25,9 +23,8 @@
 	struct nft_data		xor;
 };
 
-static void nft_bitwise_eval(const struct nft_expr *expr,
-			     struct nft_regs *regs,
-			     const struct nft_pktinfo *pkt)
+void nft_bitwise_eval(const struct nft_expr *expr,
+		      struct nft_regs *regs, const struct nft_pktinfo *pkt)
 {
 	const struct nft_bitwise *priv = nft_expr_priv(expr);
 	const u32 *src = &regs->data[priv->sreg];
@@ -130,12 +127,31 @@
 	return -1;
 }
 
+static struct nft_data zero;
+
+static int nft_bitwise_offload(struct nft_offload_ctx *ctx,
+                               struct nft_flow_rule *flow,
+                               const struct nft_expr *expr)
+{
+	const struct nft_bitwise *priv = nft_expr_priv(expr);
+	struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
+
+	if (memcmp(&priv->xor, &zero, sizeof(priv->xor)) ||
+	    priv->sreg != priv->dreg || priv->len != reg->len)
+		return -EOPNOTSUPP;
+
+	memcpy(&reg->mask, &priv->mask, sizeof(priv->mask));
+
+	return 0;
+}
+
 static const struct nft_expr_ops nft_bitwise_ops = {
 	.type		= &nft_bitwise_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_bitwise)),
 	.eval		= nft_bitwise_eval,
 	.init		= nft_bitwise_init,
 	.dump		= nft_bitwise_dump,
+	.offload	= nft_bitwise_offload,
 };
 
 struct nft_expr_type nft_bitwise_type __read_mostly = {

diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index 13d4e42..12bed3f 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 
@@ -26,9 +23,9 @@
 	u8			size;
 };
 
-static void nft_byteorder_eval(const struct nft_expr *expr,
-			       struct nft_regs *regs,
-			       const struct nft_pktinfo *pkt)
+void nft_byteorder_eval(const struct nft_expr *expr,
+			struct nft_regs *regs,
+			const struct nft_pktinfo *pkt)
 {
 	const struct nft_byteorder *priv = nft_expr_priv(expr);
 	u32 *src = &regs->data[priv->sreg];
@@ -46,14 +43,15 @@
 		switch (priv->op) {
 		case NFT_BYTEORDER_NTOH:
 			for (i = 0; i < priv->len / 8; i++) {
-				src64 = get_unaligned((u64 *)&src[i]);
-				put_unaligned_be64(src64, &dst[i]);
+				src64 = nft_reg_load64(&src[i]);
+				nft_reg_store64(&dst[i], be64_to_cpu(src64));
 			}
 			break;
 		case NFT_BYTEORDER_HTON:
 			for (i = 0; i < priv->len / 8; i++) {
-				src64 = get_unaligned_be64(&src[i]);
-				put_unaligned(src64, (u64 *)&dst[i]);
+				src64 = (__force __u64)
+					cpu_to_be64(nft_reg_load64(&src[i]));
+				nft_reg_store64(&dst[i], src64);
 			}
 			break;
 		}

diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 3fd540b..b5d5d07 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c

@@ -193,7 +193,7 @@
 static inline void nft_chain_filter_inet_fini(void) {}
 #endif /* CONFIG_NF_TABLES_IPV6 */
 
-#ifdef CONFIG_NF_TABLES_BRIDGE
+#if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE)
 static unsigned int
 nft_do_chain_bridge(void *priv,
 		    struct sk_buff *skb,

diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c
new file mode 100644
index 0000000..ff9ac8a
--- /dev/null
+++ b/net/netfilter/nft_chain_nat.c

@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/module.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+
+static unsigned int nft_nat_do_chain(void *priv, struct sk_buff *skb,
+				     const struct nf_hook_state *state)
+{
+	struct nft_pktinfo pkt;
+
+	nft_set_pktinfo(&pkt, skb, state);
+
+	switch (state->pf) {
+#ifdef CONFIG_NF_TABLES_IPV4
+	case NFPROTO_IPV4:
+		nft_set_pktinfo_ipv4(&pkt, skb);
+		break;
+#endif
+#ifdef CONFIG_NF_TABLES_IPV6
+	case NFPROTO_IPV6:
+		nft_set_pktinfo_ipv6(&pkt, skb);
+		break;
+#endif
+	default:
+		break;
+	}
+
+	return nft_do_chain(&pkt, priv);
+}
+
+#ifdef CONFIG_NF_TABLES_IPV4
+static const struct nft_chain_type nft_chain_nat_ipv4 = {
+	.name		= "nat",
+	.type		= NFT_CHAIN_T_NAT,
+	.family		= NFPROTO_IPV4,
+	.owner		= THIS_MODULE,
+	.hook_mask	= (1 << NF_INET_PRE_ROUTING) |
+			  (1 << NF_INET_POST_ROUTING) |
+			  (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_LOCAL_IN),
+	.hooks		= {
+		[NF_INET_PRE_ROUTING]	= nft_nat_do_chain,
+		[NF_INET_POST_ROUTING]	= nft_nat_do_chain,
+		[NF_INET_LOCAL_OUT]	= nft_nat_do_chain,
+		[NF_INET_LOCAL_IN]	= nft_nat_do_chain,
+	},
+	.ops_register = nf_nat_ipv4_register_fn,
+	.ops_unregister = nf_nat_ipv4_unregister_fn,
+};
+#endif
+
+#ifdef CONFIG_NF_TABLES_IPV6
+static const struct nft_chain_type nft_chain_nat_ipv6 = {
+	.name		= "nat",
+	.type		= NFT_CHAIN_T_NAT,
+	.family		= NFPROTO_IPV6,
+	.owner		= THIS_MODULE,
+	.hook_mask	= (1 << NF_INET_PRE_ROUTING) |
+			  (1 << NF_INET_POST_ROUTING) |
+			  (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_LOCAL_IN),
+	.hooks		= {
+		[NF_INET_PRE_ROUTING]	= nft_nat_do_chain,
+		[NF_INET_POST_ROUTING]	= nft_nat_do_chain,
+		[NF_INET_LOCAL_OUT]	= nft_nat_do_chain,
+		[NF_INET_LOCAL_IN]	= nft_nat_do_chain,
+	},
+	.ops_register		= nf_nat_ipv6_register_fn,
+	.ops_unregister		= nf_nat_ipv6_unregister_fn,
+};
+#endif
+
+#ifdef CONFIG_NF_TABLES_INET
+static int nft_nat_inet_reg(struct net *net, const struct nf_hook_ops *ops)
+{
+	return nf_nat_inet_register_fn(net, ops);
+}
+
+static void nft_nat_inet_unreg(struct net *net, const struct nf_hook_ops *ops)
+{
+	nf_nat_inet_unregister_fn(net, ops);
+}
+
+static const struct nft_chain_type nft_chain_nat_inet = {
+	.name		= "nat",
+	.type		= NFT_CHAIN_T_NAT,
+	.family		= NFPROTO_INET,
+	.hook_mask	= (1 << NF_INET_PRE_ROUTING) |
+			  (1 << NF_INET_LOCAL_IN) |
+			  (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_POST_ROUTING),
+	.hooks		= {
+		[NF_INET_PRE_ROUTING]	= nft_nat_do_chain,
+		[NF_INET_LOCAL_IN]	= nft_nat_do_chain,
+		[NF_INET_LOCAL_OUT]	= nft_nat_do_chain,
+		[NF_INET_POST_ROUTING]	= nft_nat_do_chain,
+	},
+	.ops_register		= nft_nat_inet_reg,
+	.ops_unregister		= nft_nat_inet_unreg,
+};
+#endif
+
+static int __init nft_chain_nat_init(void)
+{
+#ifdef CONFIG_NF_TABLES_IPV6
+	nft_register_chain_type(&nft_chain_nat_ipv6);
+#endif
+#ifdef CONFIG_NF_TABLES_IPV4
+	nft_register_chain_type(&nft_chain_nat_ipv4);
+#endif
+#ifdef CONFIG_NF_TABLES_INET
+	nft_register_chain_type(&nft_chain_nat_inet);
+#endif
+
+	return 0;
+}
+
+static void __exit nft_chain_nat_exit(void)
+{
+#ifdef CONFIG_NF_TABLES_IPV4
+	nft_unregister_chain_type(&nft_chain_nat_ipv4);
+#endif
+#ifdef CONFIG_NF_TABLES_IPV6
+	nft_unregister_chain_type(&nft_chain_nat_ipv6);
+#endif
+#ifdef CONFIG_NF_TABLES_INET
+	nft_unregister_chain_type(&nft_chain_nat_inet);
+#endif
+}
+
+module_init(nft_chain_nat_init);
+module_exit(nft_chain_nat_exit);
+
+MODULE_LICENSE("GPL");
+#ifdef CONFIG_NF_TABLES_IPV4
+MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat");
+#endif
+#ifdef CONFIG_NF_TABLES_IPV6
+MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat");
+#endif
+#ifdef CONFIG_NF_TABLES_INET
+MODULE_ALIAS_NFT_CHAIN(1, "nat");	/* NFPROTO_INET */
+#endif

diff --git a/net/netfilter/nft_chain_route.c b/net/netfilter/nft_chain_route.c
new file mode 100644
index 0000000..8826bbe
--- /dev/null
+++ b/net/netfilter/nft_chain_route.c

@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+#ifdef CONFIG_NF_TABLES_IPV4
+static unsigned int nf_route_table_hook4(void *priv,
+					 struct sk_buff *skb,
+					 const struct nf_hook_state *state)
+{
+	const struct iphdr *iph;
+	struct nft_pktinfo pkt;
+	__be32 saddr, daddr;
+	unsigned int ret;
+	u32 mark;
+	int err;
+	u8 tos;
+
+	nft_set_pktinfo(&pkt, skb, state);
+	nft_set_pktinfo_ipv4(&pkt, skb);
+
+	mark = skb->mark;
+	iph = ip_hdr(skb);
+	saddr = iph->saddr;
+	daddr = iph->daddr;
+	tos = iph->tos;
+
+	ret = nft_do_chain(&pkt, priv);
+	if (ret == NF_ACCEPT) {
+		iph = ip_hdr(skb);
+
+		if (iph->saddr != saddr ||
+		    iph->daddr != daddr ||
+		    skb->mark != mark ||
+		    iph->tos != tos) {
+			err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
+		}
+	}
+	return ret;
+}
+
+static const struct nft_chain_type nft_chain_route_ipv4 = {
+	.name		= "route",
+	.type		= NFT_CHAIN_T_ROUTE,
+	.family		= NFPROTO_IPV4,
+	.hook_mask	= (1 << NF_INET_LOCAL_OUT),
+	.hooks		= {
+		[NF_INET_LOCAL_OUT]	= nf_route_table_hook4,
+	},
+};
+#endif
+
+#ifdef CONFIG_NF_TABLES_IPV6
+static unsigned int nf_route_table_hook6(void *priv,
+					 struct sk_buff *skb,
+					 const struct nf_hook_state *state)
+{
+	struct in6_addr saddr, daddr;
+	struct nft_pktinfo pkt;
+	u32 mark, flowlabel;
+	unsigned int ret;
+	u8 hop_limit;
+	int err;
+
+	nft_set_pktinfo(&pkt, skb, state);
+	nft_set_pktinfo_ipv6(&pkt, skb);
+
+	/* save source/dest address, mark, hoplimit, flowlabel, priority */
+	memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
+	memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr));
+	mark = skb->mark;
+	hop_limit = ipv6_hdr(skb)->hop_limit;
+
+	/* flowlabel and prio (includes version, which shouldn't change either)*/
+	flowlabel = *((u32 *)ipv6_hdr(skb));
+
+	ret = nft_do_chain(&pkt, priv);
+	if (ret == NF_ACCEPT &&
+	    (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
+	     memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
+	     skb->mark != mark ||
+	     ipv6_hdr(skb)->hop_limit != hop_limit ||
+	     flowlabel != *((u32 *)ipv6_hdr(skb)))) {
+		err = nf_ip6_route_me_harder(state->net, skb);
+		if (err < 0)
+			ret = NF_DROP_ERR(err);
+	}
+
+	return ret;
+}
+
+static const struct nft_chain_type nft_chain_route_ipv6 = {
+	.name		= "route",
+	.type		= NFT_CHAIN_T_ROUTE,
+	.family		= NFPROTO_IPV6,
+	.hook_mask	= (1 << NF_INET_LOCAL_OUT),
+	.hooks		= {
+		[NF_INET_LOCAL_OUT]	= nf_route_table_hook6,
+	},
+};
+#endif
+
+#ifdef CONFIG_NF_TABLES_INET
+static unsigned int nf_route_table_inet(void *priv,
+					struct sk_buff *skb,
+					const struct nf_hook_state *state)
+{
+	struct nft_pktinfo pkt;
+
+	switch (state->pf) {
+	case NFPROTO_IPV4:
+		return nf_route_table_hook4(priv, skb, state);
+	case NFPROTO_IPV6:
+		return nf_route_table_hook6(priv, skb, state);
+	default:
+		nft_set_pktinfo(&pkt, skb, state);
+		break;
+	}
+
+	return nft_do_chain(&pkt, priv);
+}
+
+static const struct nft_chain_type nft_chain_route_inet = {
+	.name		= "route",
+	.type		= NFT_CHAIN_T_ROUTE,
+	.family		= NFPROTO_INET,
+	.hook_mask	= (1 << NF_INET_LOCAL_OUT),
+	.hooks		= {
+		[NF_INET_LOCAL_OUT]	= nf_route_table_inet,
+	},
+};
+#endif
+
+void __init nft_chain_route_init(void)
+{
+#ifdef CONFIG_NF_TABLES_IPV6
+	nft_register_chain_type(&nft_chain_route_ipv6);
+#endif
+#ifdef CONFIG_NF_TABLES_IPV4
+	nft_register_chain_type(&nft_chain_route_ipv4);
+#endif
+#ifdef CONFIG_NF_TABLES_INET
+	nft_register_chain_type(&nft_chain_route_inet);
+#endif
+}
+
+void __exit nft_chain_route_fini(void)
+{
+#ifdef CONFIG_NF_TABLES_IPV6
+	nft_unregister_chain_type(&nft_chain_route_ipv6);
+#endif
+#ifdef CONFIG_NF_TABLES_IPV4
+	nft_unregister_chain_type(&nft_chain_route_ipv4);
+#endif
+#ifdef CONFIG_NF_TABLES_INET
+	nft_unregister_chain_type(&nft_chain_route_inet);
+#endif
+}

diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index fa90a84..0744b2b 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 
@@ -15,6 +12,7 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables_offload.h>
 #include <net/netfilter/nf_tables.h>
 
 struct nft_cmp_expr {
@@ -24,9 +22,9 @@
 	enum nft_cmp_ops	op:8;
 };
 
-static void nft_cmp_eval(const struct nft_expr *expr,
-			 struct nft_regs *regs,
-			 const struct nft_pktinfo *pkt)
+void nft_cmp_eval(const struct nft_expr *expr,
+		  struct nft_regs *regs,
+		  const struct nft_pktinfo *pkt)
 {
 	const struct nft_cmp_expr *priv = nft_expr_priv(expr);
 	int d;
@@ -79,7 +77,8 @@
 
 	err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &desc,
 			    tb[NFTA_CMP_DATA]);
-	BUG_ON(err < 0);
+	if (err < 0)
+		return err;
 
 	priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]);
 	err = nft_validate_register_load(priv->sreg, desc.len);
@@ -109,12 +108,44 @@
 	return -1;
 }
 
+static int __nft_cmp_offload(struct nft_offload_ctx *ctx,
+			     struct nft_flow_rule *flow,
+			     const struct nft_cmp_expr *priv)
+{
+	struct nft_offload_reg *reg = &ctx->regs[priv->sreg];
+	u8 *mask = (u8 *)&flow->match.mask;
+	u8 *key = (u8 *)&flow->match.key;
+
+	if (priv->op != NFT_CMP_EQ || reg->len != priv->len)
+		return -EOPNOTSUPP;
+
+	memcpy(key + reg->offset, &priv->data, priv->len);
+	memcpy(mask + reg->offset, &reg->mask, priv->len);
+
+	flow->match.dissector.used_keys |= BIT(reg->key);
+	flow->match.dissector.offset[reg->key] = reg->base_offset;
+
+	nft_offload_update_dependency(ctx, &priv->data, priv->len);
+
+	return 0;
+}
+
+static int nft_cmp_offload(struct nft_offload_ctx *ctx,
+			   struct nft_flow_rule *flow,
+			   const struct nft_expr *expr)
+{
+	const struct nft_cmp_expr *priv = nft_expr_priv(expr);
+
+	return __nft_cmp_offload(ctx, flow, priv);
+}
+
 static const struct nft_expr_ops nft_cmp_ops = {
 	.type		= &nft_cmp_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)),
 	.eval		= nft_cmp_eval,
 	.init		= nft_cmp_init,
 	.dump		= nft_cmp_dump,
+	.offload	= nft_cmp_offload,
 };
 
 static int nft_cmp_fast_init(const struct nft_ctx *ctx,
@@ -129,7 +160,8 @@
 
 	err = nft_data_init(NULL, &data, sizeof(data), &desc,
 			    tb[NFTA_CMP_DATA]);
-	BUG_ON(err < 0);
+	if (err < 0)
+		return err;
 
 	priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]);
 	err = nft_validate_register_load(priv->sreg, desc.len);
@@ -144,6 +176,25 @@
 	return 0;
 }
 
+static int nft_cmp_fast_offload(struct nft_offload_ctx *ctx,
+				struct nft_flow_rule *flow,
+				const struct nft_expr *expr)
+{
+	const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
+	struct nft_cmp_expr cmp = {
+		.data	= {
+			.data	= {
+				[0] = priv->data,
+			},
+		},
+		.sreg	= priv->sreg,
+		.len	= priv->len / BITS_PER_BYTE,
+		.op	= NFT_CMP_EQ,
+	};
+
+	return __nft_cmp_offload(ctx, flow, &cmp);
+}
+
 static int nft_cmp_fast_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
@@ -170,6 +221,7 @@
 	.eval		= NULL,	/* inlined */
 	.init		= nft_cmp_fast_init,
 	.dump		= nft_cmp_fast_dump,
+	.offload	= nft_cmp_fast_offload,
 };
 
 static const struct nft_expr_ops *

diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 29d6fc7..f9adca6 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * (C) 2012-2013 by Pablo Neira Ayuso <pablo@netfilter.org>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * This software has been sponsored by Sophos Astaro <http://www.sophos.com>
  */
 
@@ -23,19 +20,6 @@
 #include <linux/netfilter_arp/arp_tables.h>
 #include <net/netfilter/nf_tables.h>
 
-struct nft_xt {
-	struct list_head	head;
-	struct nft_expr_ops	ops;
-	unsigned int		refcnt;
-
-	/* Unlike other expressions, ops doesn't have static storage duration.
-	 * nft core assumes they do.  We use kfree_rcu so that nft core can
-	 * can check expr->ops->size even after nft_compat->destroy() frees
-	 * the nft_xt struct that holds the ops structure.
-	 */
-	struct rcu_head		rcu_head;
-};
-
 /* Used for matches where *info is larger than X byte */
 #define NFT_MATCH_LARGE_THRESH	192
 
@@ -43,17 +27,6 @@
 	void *info;
 };
 
-static bool nft_xt_put(struct nft_xt *xt)
-{
-	if (--xt->refcnt == 0) {
-		list_del(&xt->head);
-		kfree_rcu(xt, rcu_head);
-		return true;
-	}
-
-	return false;
-}
-
 static int nft_compat_chain_validate_dependency(const struct nft_ctx *ctx,
 						const char *tablename)
 {
@@ -222,8 +195,8 @@
 	u32 flags;
 	int err;
 
-	err = nla_parse_nested(tb, NFTA_RULE_COMPAT_MAX, attr,
-			       nft_rule_compat_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, NFTA_RULE_COMPAT_MAX, attr,
+					  nft_rule_compat_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -248,7 +221,6 @@
 	struct xt_target *target = expr->ops->data;
 	struct xt_tgchk_param par;
 	size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO]));
-	struct nft_xt *nft_xt;
 	u16 proto = 0;
 	bool inv = false;
 	union nft_entry e = {};
@@ -272,8 +244,6 @@
 	if (!target->target)
 		return -EINVAL;
 
-	nft_xt = container_of(expr->ops, struct nft_xt, ops);
-	nft_xt->refcnt++;
 	return 0;
 }
 
@@ -282,6 +252,7 @@
 {
 	struct xt_target *target = expr->ops->data;
 	void *info = nft_expr_priv(expr);
+	struct module *me = target->me;
 	struct xt_tgdtor_param par;
 
 	par.net = ctx->net;
@@ -291,8 +262,26 @@
 	if (par.target->destroy != NULL)
 		par.target->destroy(&par);
 
-	if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
-		module_put(target->me);
+	module_put(me);
+	kfree(expr->ops);
+}
+
+static int nft_extension_dump_info(struct sk_buff *skb, int attr,
+				   const void *info,
+				   unsigned int size, unsigned int user_size)
+{
+	unsigned int info_size, aligned_size = XT_ALIGN(size);
+	struct nlattr *nla;
+
+	nla = nla_reserve(skb, attr, aligned_size);
+	if (!nla)
+		return -1;
+
+	info_size = user_size ? : size;
+	memcpy(nla_data(nla), info, info_size);
+	memset(nla_data(nla) + info_size, 0, aligned_size - info_size);
+
+	return 0;
 }
 
 static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -302,7 +291,8 @@
 
 	if (nla_put_string(skb, NFTA_TARGET_NAME, target->name) ||
 	    nla_put_be32(skb, NFTA_TARGET_REV, htonl(target->revision)) ||
-	    nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(target->targetsize), info))
+	    nft_extension_dump_info(skb, NFTA_TARGET_INFO, info,
+				    target->targetsize, target->usersize))
 		goto nla_put_failure;
 
 	return 0;
@@ -446,7 +436,6 @@
 	struct xt_match *match = expr->ops->data;
 	struct xt_mtchk_param par;
 	size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO]));
-	struct nft_xt *nft_xt;
 	u16 proto = 0;
 	bool inv = false;
 	union nft_entry e = {};
@@ -462,13 +451,7 @@
 
 	nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv);
 
-	ret = xt_check_match(&par, size, proto, inv);
-	if (ret < 0)
-		return ret;
-
-	nft_xt = container_of(expr->ops, struct nft_xt, ops);
-	nft_xt->refcnt++;
-	return 0;
+	return xt_check_match(&par, size, proto, inv);
 }
 
 static int
@@ -511,8 +494,8 @@
 	if (par.match->destroy != NULL)
 		par.match->destroy(&par);
 
-	if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
-		module_put(me);
+	module_put(me);
+	kfree(expr->ops);
 }
 
 static void
@@ -537,7 +520,8 @@
 
 	if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) ||
 	    nla_put_be32(skb, NFTA_MATCH_REV, htonl(match->revision)) ||
-	    nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(match->matchsize), info))
+	    nft_extension_dump_info(skb, NFTA_MATCH_INFO, info,
+				    match->matchsize, match->usersize))
 		goto nla_put_failure;
 
 	return 0;
@@ -714,22 +698,13 @@
 	.cb		= nfnl_nft_compat_cb,
 };
 
-static LIST_HEAD(nft_match_list);
-
 static struct nft_expr_type nft_match_type;
 
-static bool nft_match_cmp(const struct xt_match *match,
-			  const char *name, u32 rev, u32 family)
-{
-	return strcmp(match->name, name) == 0 && match->revision == rev &&
-	       (match->family == NFPROTO_UNSPEC || match->family == family);
-}
-
 static const struct nft_expr_ops *
 nft_match_select_ops(const struct nft_ctx *ctx,
 		     const struct nlattr * const tb[])
 {
-	struct nft_xt *nft_match;
+	struct nft_expr_ops *ops;
 	struct xt_match *match;
 	unsigned int matchsize;
 	char *mt_name;
@@ -745,14 +720,6 @@
 	rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV]));
 	family = ctx->family;
 
-	/* Re-use the existing match if it's already loaded. */
-	list_for_each_entry(nft_match, &nft_match_list, head) {
-		struct xt_match *match = nft_match->ops.data;
-
-		if (nft_match_cmp(match, mt_name, rev, family))
-			return &nft_match->ops;
-	}
-
 	match = xt_request_find_match(family, mt_name, rev);
 	if (IS_ERR(match))
 		return ERR_PTR(-ENOENT);
@@ -762,66 +729,62 @@
 		goto err;
 	}
 
-	/* This is the first time we use this match, allocate operations */
-	nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
-	if (nft_match == NULL) {
+	ops = kzalloc(sizeof(struct nft_expr_ops), GFP_KERNEL);
+	if (!ops) {
 		err = -ENOMEM;
 		goto err;
 	}
 
-	nft_match->refcnt = 0;
-	nft_match->ops.type = &nft_match_type;
-	nft_match->ops.eval = nft_match_eval;
-	nft_match->ops.init = nft_match_init;
-	nft_match->ops.destroy = nft_match_destroy;
-	nft_match->ops.dump = nft_match_dump;
-	nft_match->ops.validate = nft_match_validate;
-	nft_match->ops.data = match;
+	ops->type = &nft_match_type;
+	ops->eval = nft_match_eval;
+	ops->init = nft_match_init;
+	ops->destroy = nft_match_destroy;
+	ops->dump = nft_match_dump;
+	ops->validate = nft_match_validate;
+	ops->data = match;
 
 	matchsize = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize));
 	if (matchsize > NFT_MATCH_LARGE_THRESH) {
 		matchsize = NFT_EXPR_SIZE(sizeof(struct nft_xt_match_priv));
 
-		nft_match->ops.eval = nft_match_large_eval;
-		nft_match->ops.init = nft_match_large_init;
-		nft_match->ops.destroy = nft_match_large_destroy;
-		nft_match->ops.dump = nft_match_large_dump;
+		ops->eval = nft_match_large_eval;
+		ops->init = nft_match_large_init;
+		ops->destroy = nft_match_large_destroy;
+		ops->dump = nft_match_large_dump;
 	}
 
-	nft_match->ops.size = matchsize;
+	ops->size = matchsize;
 
-	list_add(&nft_match->head, &nft_match_list);
-
-	return &nft_match->ops;
+	return ops;
 err:
 	module_put(match->me);
 	return ERR_PTR(err);
 }
 
+static void nft_match_release_ops(const struct nft_expr_ops *ops)
+{
+	struct xt_match *match = ops->data;
+
+	module_put(match->me);
+	kfree(ops);
+}
+
 static struct nft_expr_type nft_match_type __read_mostly = {
 	.name		= "match",
 	.select_ops	= nft_match_select_ops,
+	.release_ops	= nft_match_release_ops,
 	.policy		= nft_match_policy,
 	.maxattr	= NFTA_MATCH_MAX,
 	.owner		= THIS_MODULE,
 };
 
-static LIST_HEAD(nft_target_list);
-
 static struct nft_expr_type nft_target_type;
 
-static bool nft_target_cmp(const struct xt_target *tg,
-			   const char *name, u32 rev, u32 family)
-{
-	return strcmp(tg->name, name) == 0 && tg->revision == rev &&
-	       (tg->family == NFPROTO_UNSPEC || tg->family == family);
-}
-
 static const struct nft_expr_ops *
 nft_target_select_ops(const struct nft_ctx *ctx,
 		      const struct nlattr * const tb[])
 {
-	struct nft_xt *nft_target;
+	struct nft_expr_ops *ops;
 	struct xt_target *target;
 	char *tg_name;
 	u32 rev, family;
@@ -841,17 +804,6 @@
 	    strcmp(tg_name, "standard") == 0)
 		return ERR_PTR(-EINVAL);
 
-	/* Re-use the existing target if it's already loaded. */
-	list_for_each_entry(nft_target, &nft_target_list, head) {
-		struct xt_target *target = nft_target->ops.data;
-
-		if (!target->target)
-			continue;
-
-		if (nft_target_cmp(target, tg_name, rev, family))
-			return &nft_target->ops;
-	}
-
 	target = xt_request_find_target(family, tg_name, rev);
 	if (IS_ERR(target))
 		return ERR_PTR(-ENOENT);
@@ -866,38 +818,43 @@
 		goto err;
 	}
 
-	/* This is the first time we use this target, allocate operations */
-	nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
-	if (nft_target == NULL) {
+	ops = kzalloc(sizeof(struct nft_expr_ops), GFP_KERNEL);
+	if (!ops) {
 		err = -ENOMEM;
 		goto err;
 	}
 
-	nft_target->refcnt = 0;
-	nft_target->ops.type = &nft_target_type;
-	nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
-	nft_target->ops.init = nft_target_init;
-	nft_target->ops.destroy = nft_target_destroy;
-	nft_target->ops.dump = nft_target_dump;
-	nft_target->ops.validate = nft_target_validate;
-	nft_target->ops.data = target;
+	ops->type = &nft_target_type;
+	ops->size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
+	ops->init = nft_target_init;
+	ops->destroy = nft_target_destroy;
+	ops->dump = nft_target_dump;
+	ops->validate = nft_target_validate;
+	ops->data = target;
 
 	if (family == NFPROTO_BRIDGE)
-		nft_target->ops.eval = nft_target_eval_bridge;
+		ops->eval = nft_target_eval_bridge;
 	else
-		nft_target->ops.eval = nft_target_eval_xt;
+		ops->eval = nft_target_eval_xt;
 
-	list_add(&nft_target->head, &nft_target_list);
-
-	return &nft_target->ops;
+	return ops;
 err:
 	module_put(target->me);
 	return ERR_PTR(err);
 }
 
+static void nft_target_release_ops(const struct nft_expr_ops *ops)
+{
+	struct xt_target *target = ops->data;
+
+	module_put(target->me);
+	kfree(ops);
+}
+
 static struct nft_expr_type nft_target_type __read_mostly = {
 	.name		= "target",
 	.select_ops	= nft_target_select_ops,
+	.release_ops	= nft_target_release_ops,
 	.policy		= nft_target_policy,
 	.maxattr	= NFTA_TARGET_MAX,
 	.owner		= THIS_MODULE,
@@ -922,7 +879,6 @@
 	}
 
 	return ret;
-
 err_target:
 	nft_unregister_expr(&nft_target_type);
 err_match:
@@ -932,32 +888,6 @@
 
 static void __exit nft_compat_module_exit(void)
 {
-	struct nft_xt *xt, *next;
-
-	/* list should be empty here, it can be non-empty only in case there
-	 * was an error that caused nft_xt expr to not be initialized fully
-	 * and noone else requested the same expression later.
-	 *
-	 * In this case, the lists contain 0-refcount entries that still
-	 * hold module reference.
-	 */
-	list_for_each_entry_safe(xt, next, &nft_target_list, head) {
-		struct xt_target *target = xt->ops.data;
-
-		if (WARN_ON_ONCE(xt->refcnt))
-			continue;
-		module_put(target->me);
-		kfree(xt);
-	}
-
-	list_for_each_entry_safe(xt, next, &nft_match_list, head) {
-		struct xt_match *match = xt->ops.data;
-
-		if (WARN_ON_ONCE(xt->refcnt))
-			continue;
-		module_put(match->me);
-		kfree(xt);
-	}
 	nfnetlink_subsys_unregister(&nfnl_compat_subsys);
 	nft_unregister_expr(&nft_target_type);
 	nft_unregister_expr(&nft_match_type);

diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index b90d96b..69d6173 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c

@@ -30,7 +30,6 @@
 	enum ip_conntrack_info ctinfo;
 	const struct nf_conn *ct;
 	unsigned int count;
-	bool addit;
 
 	tuple_ptr = &tuple;
 
@@ -44,19 +43,12 @@
 		return;
 	}
 
-	nf_conncount_lookup(nft_net(pkt), &priv->list, tuple_ptr, zone,
-			    &addit);
-	count = priv->list.count;
-
-	if (!addit)
-		goto out;
-
-	if (nf_conncount_add(&priv->list, tuple_ptr, zone) == NF_CONNCOUNT_ERR) {
+	if (nf_conncount_add(nft_net(pkt), &priv->list, tuple_ptr, zone)) {
 		regs->verdict.code = NF_DROP;
 		return;
 	}
-	count++;
-out:
+
+	count = priv->list.count;
 
 	if ((count > priv->limit) ^ priv->invert) {
 		regs->verdict.code = NFT_BREAK;
@@ -226,8 +218,13 @@
 static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr)
 {
 	struct nft_connlimit *priv = nft_expr_priv(expr);
+	bool ret;
 
-	return nf_conncount_gc_list(net, &priv->list);
+	local_bh_disable();
+	ret = nf_conncount_gc_list(net, &priv->list);
+	local_bh_enable();
+
+	return ret;
 }
 
 static struct nft_expr_type nft_connlimit_type;

diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index a61d7ed..f6d4d0f 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 
@@ -104,7 +101,7 @@
 	nft_counter_do_destroy(priv);
 }
 
-static void nft_counter_reset(struct nft_counter_percpu_priv __percpu *priv,
+static void nft_counter_reset(struct nft_counter_percpu_priv *priv,
 			      struct nft_counter *total)
 {
 	struct nft_counter *this_cpu;

diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 5dd8774..46ca8bc 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
  * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 
@@ -24,6 +21,7 @@
 #include <net/netfilter/nf_conntrack_labels.h>
 #include <net/netfilter/nf_conntrack_timeout.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_expect.h>
 
 struct nft_ct {
 	enum nft_ct_keys	key:8;
@@ -178,6 +176,11 @@
 		return;
 	}
 #endif
+	case NFT_CT_ID:
+		if (!nf_ct_is_confirmed(ct))
+			goto err;
+		*dest = nf_ct_get_id(ct);
+		return;
 	default:
 		break;
 	}
@@ -279,7 +282,7 @@
 {
 	const struct nft_ct *priv = nft_expr_priv(expr);
 	struct sk_buff *skb = pkt->skb;
-#ifdef CONFIG_NF_CONNTRACK_MARK
+#if defined(CONFIG_NF_CONNTRACK_MARK) || defined(CONFIG_NF_CONNTRACK_SECMARK)
 	u32 value = regs->data[priv->sreg];
 #endif
 	enum ip_conntrack_info ctinfo;
@@ -298,6 +301,14 @@
 		}
 		break;
 #endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+	case NFT_CT_SECMARK:
+		if (ct->secmark != value) {
+			ct->secmark = value;
+			nf_conntrack_event_cache(IPCT_SECMARK, ct);
+		}
+		break;
+#endif
 #ifdef CONFIG_NF_CONNTRACK_LABELS
 	case NFT_CT_LABELS:
 		nf_connlabels_replace(ct,
@@ -471,6 +482,9 @@
 		len = sizeof(u16);
 		break;
 #endif
+	case NFT_CT_ID:
+		len = sizeof(u32);
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -565,6 +579,13 @@
 		len = sizeof(u32);
 		break;
 #endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+	case NFT_CT_SECMARK:
+		if (tb[NFTA_CT_DIRECTION])
+			return -EINVAL;
+		len = sizeof(u32);
+		break;
+#endif
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -776,18 +797,17 @@
 	struct nlattr **tb;
 	int ret = 0;
 
-	if (!l4proto->ctnl_timeout.nlattr_to_obj)
-		return 0;
-
 	tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb),
 		     GFP_KERNEL);
 
 	if (!tb)
 		return -ENOMEM;
 
-	ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max,
-			       attr, l4proto->ctnl_timeout.nla_policy,
-			       NULL);
+	ret = nla_parse_nested_deprecated(tb,
+					  l4proto->ctnl_timeout.nlattr_max,
+					  attr,
+					  l4proto->ctnl_timeout.nla_policy,
+					  NULL);
 	if (ret < 0)
 		goto err;
 
@@ -858,7 +878,7 @@
 	l4num = nla_get_u8(tb[NFTA_CT_TIMEOUT_L4PROTO]);
 	priv->l4proto = l4num;
 
-	l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+	l4proto = nf_ct_l4proto_find(l4num);
 
 	if (l4proto->l4proto != l4num) {
 		ret = -EOPNOTSUPP;
@@ -890,7 +910,6 @@
 err_free_timeout:
 	kfree(timeout);
 err_proto_put:
-	nf_ct_l4proto_put(l4proto);
 	return ret;
 }
 
@@ -901,7 +920,6 @@
 	struct nf_ct_timeout *timeout = priv->timeout;
 
 	nf_ct_untimeout(ctx->net, timeout);
-	nf_ct_l4proto_put(timeout->l4proto);
 	nf_ct_netns_put(ctx->net, ctx->family);
 	kfree(priv->timeout);
 }
@@ -918,7 +936,7 @@
 	    nla_put_be16(skb, NFTA_CT_TIMEOUT_L3PROTO, htons(timeout->l3num)))
 		return -1;
 
-	nest_params = nla_nest_start(skb, NFTA_CT_TIMEOUT_DATA | NLA_F_NESTED);
+	nest_params = nla_nest_start(skb, NFTA_CT_TIMEOUT_DATA);
 	if (!nest_params)
 		return -1;
 
@@ -1136,6 +1154,135 @@
 	.owner		= THIS_MODULE,
 };
 
+struct nft_ct_expect_obj {
+	u16		l3num;
+	__be16		dport;
+	u8		l4proto;
+	u8		size;
+	u32		timeout;
+};
+
+static int nft_ct_expect_obj_init(const struct nft_ctx *ctx,
+				  const struct nlattr * const tb[],
+				  struct nft_object *obj)
+{
+	struct nft_ct_expect_obj *priv = nft_obj_data(obj);
+
+	if (!tb[NFTA_CT_EXPECT_L4PROTO] ||
+	    !tb[NFTA_CT_EXPECT_DPORT] ||
+	    !tb[NFTA_CT_EXPECT_TIMEOUT] ||
+	    !tb[NFTA_CT_EXPECT_SIZE])
+		return -EINVAL;
+
+	priv->l3num = ctx->family;
+	if (tb[NFTA_CT_EXPECT_L3PROTO])
+		priv->l3num = ntohs(nla_get_be16(tb[NFTA_CT_EXPECT_L3PROTO]));
+
+	priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]);
+	priv->dport = nla_get_be16(tb[NFTA_CT_EXPECT_DPORT]);
+	priv->timeout = nla_get_u32(tb[NFTA_CT_EXPECT_TIMEOUT]);
+	priv->size = nla_get_u8(tb[NFTA_CT_EXPECT_SIZE]);
+
+	return nf_ct_netns_get(ctx->net, ctx->family);
+}
+
+static void nft_ct_expect_obj_destroy(const struct nft_ctx *ctx,
+				       struct nft_object *obj)
+{
+	nf_ct_netns_put(ctx->net, ctx->family);
+}
+
+static int nft_ct_expect_obj_dump(struct sk_buff *skb,
+				  struct nft_object *obj, bool reset)
+{
+	const struct nft_ct_expect_obj *priv = nft_obj_data(obj);
+
+	if (nla_put_be16(skb, NFTA_CT_EXPECT_L3PROTO, htons(priv->l3num)) ||
+	    nla_put_u8(skb, NFTA_CT_EXPECT_L4PROTO, priv->l4proto) ||
+	    nla_put_be16(skb, NFTA_CT_EXPECT_DPORT, priv->dport) ||
+	    nla_put_u32(skb, NFTA_CT_EXPECT_TIMEOUT, priv->timeout) ||
+	    nla_put_u8(skb, NFTA_CT_EXPECT_SIZE, priv->size))
+		return -1;
+
+	return 0;
+}
+
+static void nft_ct_expect_obj_eval(struct nft_object *obj,
+				   struct nft_regs *regs,
+				   const struct nft_pktinfo *pkt)
+{
+	const struct nft_ct_expect_obj *priv = nft_obj_data(obj);
+	struct nf_conntrack_expect *exp;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn_help *help;
+	enum ip_conntrack_dir dir;
+	u16 l3num = priv->l3num;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(pkt->skb, &ctinfo);
+	if (!ct || ctinfo == IP_CT_UNTRACKED) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+	dir = CTINFO2DIR(ctinfo);
+
+	help = nfct_help(ct);
+	if (!help)
+		help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
+	if (!help) {
+		regs->verdict.code = NF_DROP;
+		return;
+	}
+
+	if (help->expecting[NF_CT_EXPECT_CLASS_DEFAULT] >= priv->size) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+	if (l3num == NFPROTO_INET)
+		l3num = nf_ct_l3num(ct);
+
+	exp = nf_ct_expect_alloc(ct);
+	if (exp == NULL) {
+		regs->verdict.code = NF_DROP;
+		return;
+	}
+	nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, l3num,
+		          &ct->tuplehash[!dir].tuple.src.u3,
+		          &ct->tuplehash[!dir].tuple.dst.u3,
+		          priv->l4proto, NULL, &priv->dport);
+	exp->timeout.expires = jiffies + priv->timeout * HZ;
+
+	if (nf_ct_expect_related(exp, 0) != 0)
+		regs->verdict.code = NF_DROP;
+}
+
+static const struct nla_policy nft_ct_expect_policy[NFTA_CT_EXPECT_MAX + 1] = {
+	[NFTA_CT_EXPECT_L3PROTO]	= { .type = NLA_U16 },
+	[NFTA_CT_EXPECT_L4PROTO]	= { .type = NLA_U8 },
+	[NFTA_CT_EXPECT_DPORT]		= { .type = NLA_U16 },
+	[NFTA_CT_EXPECT_TIMEOUT]	= { .type = NLA_U32 },
+	[NFTA_CT_EXPECT_SIZE]		= { .type = NLA_U8 },
+};
+
+static struct nft_object_type nft_ct_expect_obj_type;
+
+static const struct nft_object_ops nft_ct_expect_obj_ops = {
+	.type		= &nft_ct_expect_obj_type,
+	.size		= sizeof(struct nft_ct_expect_obj),
+	.eval		= nft_ct_expect_obj_eval,
+	.init		= nft_ct_expect_obj_init,
+	.destroy	= nft_ct_expect_obj_destroy,
+	.dump		= nft_ct_expect_obj_dump,
+};
+
+static struct nft_object_type nft_ct_expect_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_CT_EXPECT,
+	.ops		= &nft_ct_expect_obj_ops,
+	.maxattr	= NFTA_CT_EXPECT_MAX,
+	.policy		= nft_ct_expect_policy,
+	.owner		= THIS_MODULE,
+};
+
 static int __init nft_ct_module_init(void)
 {
 	int err;
@@ -1153,17 +1300,23 @@
 	err = nft_register_obj(&nft_ct_helper_obj_type);
 	if (err < 0)
 		goto err2;
+
+	err = nft_register_obj(&nft_ct_expect_obj_type);
+	if (err < 0)
+		goto err3;
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	err = nft_register_obj(&nft_ct_timeout_obj_type);
 	if (err < 0)
-		goto err3;
+		goto err4;
 #endif
 	return 0;
 
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+err4:
+	nft_unregister_obj(&nft_ct_expect_obj_type);
+#endif
 err3:
 	nft_unregister_obj(&nft_ct_helper_obj_type);
-#endif
 err2:
 	nft_unregister_expr(&nft_notrack_type);
 err1:
@@ -1176,6 +1329,7 @@
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	nft_unregister_obj(&nft_ct_timeout_obj_type);
 #endif
+	nft_unregister_obj(&nft_ct_expect_obj_type);
 	nft_unregister_obj(&nft_ct_helper_obj_type);
 	nft_unregister_expr(&nft_notrack_type);
 	nft_unregister_expr(&nft_ct_type);
@@ -1190,3 +1344,4 @@
 MODULE_ALIAS_NFT_EXPR("notrack");
 MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_HELPER);
 MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_TIMEOUT);
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_EXPECT);

diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c
index 2cc1e0e..c2e78c1 100644
--- a/net/netfilter/nft_dup_netdev.c
+++ b/net/netfilter/nft_dup_netdev.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
  */
 
 #include <linux/kernel.h>
@@ -13,6 +10,7 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
 #include <net/netfilter/nf_dup_netdev.h>
 
 struct nft_dup_netdev {
@@ -46,8 +44,6 @@
 	return nft_validate_register_load(priv->sreg_dev, sizeof(int));
 }
 
-static const struct nft_expr_ops nft_dup_netdev_ingress_ops;
-
 static int nft_dup_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	struct nft_dup_netdev *priv = nft_expr_priv(expr);
@@ -61,6 +57,16 @@
 	return -1;
 }
 
+static int nft_dup_netdev_offload(struct nft_offload_ctx *ctx,
+				  struct nft_flow_rule *flow,
+				  const struct nft_expr *expr)
+{
+	const struct nft_dup_netdev *priv = nft_expr_priv(expr);
+	int oif = ctx->regs[priv->sreg_dev].data.data[0];
+
+	return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_MIRRED, oif);
+}
+
 static struct nft_expr_type nft_dup_netdev_type;
 static const struct nft_expr_ops nft_dup_netdev_ops = {
 	.type		= &nft_dup_netdev_type,
@@ -68,6 +74,7 @@
 	.eval		= nft_dup_netdev_eval,
 	.init		= nft_dup_netdev_init,
 	.dump		= nft_dup_netdev_dump,
+	.offload	= nft_dup_netdev_offload,
 };
 
 static struct nft_expr_type nft_dup_netdev_type __read_mostly = {

diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 6e91a37..8887295 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2015 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 
 #include <linux/kernel.h>
@@ -28,6 +24,23 @@
 	struct nft_set_binding		binding;
 };
 
+static int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
+{
+	int err;
+
+	if (src->ops->clone) {
+		dst->ops = src->ops;
+		err = src->ops->clone(dst, src);
+		if (err < 0)
+			return err;
+	} else {
+		memcpy(dst, src, src->ops->size);
+	}
+
+	__module_get(src->ops->type->owner);
+	return 0;
+}
+
 static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
 			    struct nft_regs *regs)
 {
@@ -43,7 +56,7 @@
 	elem = nft_set_elem_init(set, &priv->tmpl,
 				 &regs->data[priv->sreg_key],
 				 &regs->data[priv->sreg_data],
-				 timeout, GFP_ATOMIC);
+				 timeout, 0, GFP_ATOMIC);
 	if (elem == NULL)
 		goto err1;
 
@@ -62,9 +75,8 @@
 	return NULL;
 }
 
-static void nft_dynset_eval(const struct nft_expr *expr,
-			    struct nft_regs *regs,
-			    const struct nft_pktinfo *pkt)
+void nft_dynset_eval(const struct nft_expr *expr,
+		     struct nft_regs *regs, const struct nft_pktinfo *pkt)
 {
 	const struct nft_dynset *priv = nft_expr_priv(expr);
 	struct nft_set *set = priv->set;
@@ -72,6 +84,11 @@
 	const struct nft_expr *sexpr;
 	u64 timeout;
 
+	if (priv->op == NFT_DYNSET_OP_DELETE) {
+		set->ops->delete(set, &regs->data[priv->sreg_key]);
+		return;
+	}
+
 	if (set->ops->update(set, &regs->data[priv->sreg_key], nft_dynset_new,
 			     expr, regs, &ext)) {
 		sexpr = NULL;
@@ -149,6 +166,7 @@
 	priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP]));
 	switch (priv->op) {
 	case NFT_DYNSET_OP_ADD:
+	case NFT_DYNSET_OP_DELETE:
 		break;
 	case NFT_DYNSET_OP_UPDATE:
 		if (!(set->flags & NFT_SET_TIMEOUT))
@@ -235,14 +253,32 @@
 	return err;
 }
 
+static void nft_dynset_deactivate(const struct nft_ctx *ctx,
+				  const struct nft_expr *expr,
+				  enum nft_trans_phase phase)
+{
+	struct nft_dynset *priv = nft_expr_priv(expr);
+
+	nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase);
+}
+
+static void nft_dynset_activate(const struct nft_ctx *ctx,
+				const struct nft_expr *expr)
+{
+	struct nft_dynset *priv = nft_expr_priv(expr);
+
+	priv->set->use++;
+}
+
 static void nft_dynset_destroy(const struct nft_ctx *ctx,
 			       const struct nft_expr *expr)
 {
 	struct nft_dynset *priv = nft_expr_priv(expr);
 
-	nf_tables_unbind_set(ctx, priv->set, &priv->binding);
 	if (priv->expr != NULL)
 		nft_expr_destroy(ctx, priv->expr);
+
+	nf_tables_destroy_set(ctx, priv->set);
 }
 
 static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -279,6 +315,8 @@
 	.eval		= nft_dynset_eval,
 	.init		= nft_dynset_init,
 	.destroy	= nft_dynset_destroy,
+	.activate	= nft_dynset_activate,
+	.deactivate	= nft_dynset_deactivate,
 	.dump		= nft_dynset_dump,
 };
 

diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index a940c9f..a5e8469 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 
@@ -62,6 +59,103 @@
 	regs->verdict.code = NFT_BREAK;
 }
 
+/* find the offset to specified option.
+ *
+ * If target header is found, its offset is set in *offset and return option
+ * number. Otherwise, return negative error.
+ *
+ * If the first fragment doesn't contain the End of Options it is considered
+ * invalid.
+ */
+static int ipv4_find_option(struct net *net, struct sk_buff *skb,
+			    unsigned int *offset, int target)
+{
+	unsigned char optbuf[sizeof(struct ip_options) + 40];
+	struct ip_options *opt = (struct ip_options *)optbuf;
+	struct iphdr *iph, _iph;
+	unsigned int start;
+	bool found = false;
+	__be32 info;
+	int optlen;
+
+	iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
+	if (!iph)
+		return -EBADMSG;
+	start = sizeof(struct iphdr);
+
+	optlen = iph->ihl * 4 - (int)sizeof(struct iphdr);
+	if (optlen <= 0)
+		return -ENOENT;
+
+	memset(opt, 0, sizeof(struct ip_options));
+	/* Copy the options since __ip_options_compile() modifies
+	 * the options.
+	 */
+	if (skb_copy_bits(skb, start, opt->__data, optlen))
+		return -EBADMSG;
+	opt->optlen = optlen;
+
+	if (__ip_options_compile(net, opt, NULL, &info))
+		return -EBADMSG;
+
+	switch (target) {
+	case IPOPT_SSRR:
+	case IPOPT_LSRR:
+		if (!opt->srr)
+			break;
+		found = target == IPOPT_SSRR ? opt->is_strictroute :
+					       !opt->is_strictroute;
+		if (found)
+			*offset = opt->srr + start;
+		break;
+	case IPOPT_RR:
+		if (!opt->rr)
+			break;
+		*offset = opt->rr + start;
+		found = true;
+		break;
+	case IPOPT_RA:
+		if (!opt->router_alert)
+			break;
+		*offset = opt->router_alert + start;
+		found = true;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	return found ? target : -ENOENT;
+}
+
+static void nft_exthdr_ipv4_eval(const struct nft_expr *expr,
+				 struct nft_regs *regs,
+				 const struct nft_pktinfo *pkt)
+{
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	u32 *dest = &regs->data[priv->dreg];
+	struct sk_buff *skb = pkt->skb;
+	unsigned int offset;
+	int err;
+
+	if (skb->protocol != htons(ETH_P_IP))
+		goto err;
+
+	err = ipv4_find_option(nft_net(pkt), skb, &offset, priv->type);
+	if (priv->flags & NFT_EXTHDR_F_PRESENT) {
+		*dest = (err >= 0);
+		return;
+	} else if (err < 0) {
+		goto err;
+	}
+	offset += priv->offset;
+
+	dest[priv->len / NFT_REG32_SIZE] = 0;
+	if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
+		goto err;
+	return;
+err:
+	regs->verdict.code = NFT_BREAK;
+}
+
 static void *
 nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
 		       unsigned int len, void *buffer, unsigned int *tcphdr_len)
@@ -156,7 +250,8 @@
 		if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
 			return;
 
-		if (!skb_make_writable(pkt->skb, pkt->xt.thoff + i + priv->len))
+		if (skb_ensure_writable(pkt->skb,
+					pkt->xt.thoff + i + priv->len))
 			return;
 
 		tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
@@ -314,6 +409,28 @@
 	return nft_validate_register_load(priv->sreg, priv->len);
 }
 
+static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx,
+				const struct nft_expr *expr,
+				const struct nlattr * const tb[])
+{
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	int err = nft_exthdr_init(ctx, expr, tb);
+
+	if (err < 0)
+		return err;
+
+	switch (priv->type) {
+	case IPOPT_SSRR:
+	case IPOPT_LSRR:
+	case IPOPT_RR:
+	case IPOPT_RA:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
 static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv)
 {
 	if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
@@ -360,6 +477,14 @@
 	.dump		= nft_exthdr_dump,
 };
 
+static const struct nft_expr_ops nft_exthdr_ipv4_ops = {
+	.type		= &nft_exthdr_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+	.eval		= nft_exthdr_ipv4_eval,
+	.init		= nft_exthdr_ipv4_init,
+	.dump		= nft_exthdr_dump,
+};
+
 static const struct nft_expr_ops nft_exthdr_tcp_ops = {
 	.type		= &nft_exthdr_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
@@ -400,6 +525,12 @@
 		if (tb[NFTA_EXTHDR_DREG])
 			return &nft_exthdr_ipv6_ops;
 		break;
+	case NFT_EXTHDR_OP_IPV4:
+		if (ctx->family != NFPROTO_IPV6) {
+			if (tb[NFTA_EXTHDR_DREG])
+				return &nft_exthdr_ipv4_ops;
+		}
+		break;
 	}
 
 	return ERR_PTR(-EOPNOTSUPP);

diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index 21df8cc..cfac096 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c

@@ -1,7 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  *
  * Generic part shared by ipv4 and ipv6 backends.
  */
@@ -135,17 +133,17 @@
 EXPORT_SYMBOL_GPL(nft_fib_dump);
 
 void nft_fib_store_result(void *reg, const struct nft_fib *priv,
-			  const struct nft_pktinfo *pkt, int index)
+			  const struct net_device *dev)
 {
-	struct net_device *dev;
 	u32 *dreg = reg;
+	int index;
 
 	switch (priv->result) {
 	case NFT_FIB_RESULT_OIF:
+		index = dev ? dev->ifindex : 0;
 		*dreg = (priv->flags & NFTA_FIB_F_PRESENT) ? !!index : index;
 		break;
 	case NFT_FIB_RESULT_OIFNAME:
-		dev = dev_get_by_index_rcu(nft_net(pkt), index);
 		if (priv->flags & NFTA_FIB_F_PRESENT)
 			*dreg = !!dev;
 		else

diff --git a/net/netfilter/nft_fib_inet.c b/net/netfilter/nft_fib_inet.c
index 9120fc7..465432e 100644
--- a/net/netfilter/nft_fib_inet.c
+++ b/net/netfilter/nft_fib_inet.c

@@ -1,8 +1,4 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0-only
 
 #include <linux/kernel.h>
 #include <linux/init.h>

diff --git a/net/netfilter/nft_fib_netdev.c b/net/netfilter/nft_fib_netdev.c
index 3997ee3..a2e726a 100644
--- a/net/netfilter/nft_fib_netdev.c
+++ b/net/netfilter/nft_fib_netdev.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2017 Pablo M. Bermudo Garay <pablombg@gmail.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * This code is based on net/netfilter/nft_fib_inet.c, written by
  * Florian Westphal <fw@strlen.de>.
  */
@@ -17,6 +14,7 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables_core.h>
 #include <net/netfilter/nf_tables.h>
+#include <net/ipv6.h>
 
 #include <net/netfilter/nft_fib.h>
 
@@ -37,6 +35,8 @@
 		}
 		break;
 	case ETH_P_IPV6:
+		if (!ipv6_mod_enabled())
+			break;
 		switch (priv->result) {
 		case NFT_FIB_RESULT_OIF:
 		case NFT_FIB_RESULT_OIFNAME:

diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 5fd4c57..f29bbc7 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -5,12 +6,13 @@
 #include <linux/netfilter.h>
 #include <linux/workqueue.h>
 #include <linux/spinlock.h>
+#include <linux/netfilter/nf_conntrack_common.h>
 #include <linux/netfilter/nf_tables.h>
 #include <net/ip.h> /* for ipv4 options. */
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables_core.h>
 #include <net/netfilter/nf_conntrack_core.h>
-#include <linux/netfilter/nf_conntrack_common.h>
+#include <net/netfilter/nf_conntrack_extend.h>
 #include <net/netfilter/nf_flow_table.h>
 
 struct nft_flow_offload {
@@ -29,10 +31,12 @@
 	memset(&fl, 0, sizeof(fl));
 	switch (nft_pf(pkt)) {
 	case NFPROTO_IPV4:
-		fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
+		fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
+		fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex;
 		break;
 	case NFPROTO_IPV6:
-		fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
+		fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
+		fl.u.ip6.flowi6_oif = nft_in(pkt)->ifindex;
 		break;
 	}
 
@@ -41,22 +45,25 @@
 		return -ENOENT;
 
 	route->tuple[dir].dst		= this_dst;
-	route->tuple[dir].ifindex	= nft_in(pkt)->ifindex;
 	route->tuple[!dir].dst		= other_dst;
-	route->tuple[!dir].ifindex	= nft_out(pkt)->ifindex;
 
 	return 0;
 }
 
-static bool nft_flow_offload_skip(struct sk_buff *skb)
+static bool nft_flow_offload_skip(struct sk_buff *skb, int family)
 {
-	struct ip_options *opt  = &(IPCB(skb)->opt);
-
-	if (unlikely(opt->optlen))
-		return true;
 	if (skb_sec_path(skb))
 		return true;
 
+	if (family == NFPROTO_IPV4) {
+		const struct ip_options *opt;
+
+		opt = &(IPCB(skb)->opt);
+
+		if (unlikely(opt->optlen))
+			return true;
+	}
+
 	return false;
 }
 
@@ -66,6 +73,7 @@
 {
 	struct nft_flow_offload *priv = nft_expr_priv(expr);
 	struct nf_flowtable *flowtable = &priv->flowtable->data;
+	struct tcphdr _tcph, *tcph = NULL;
 	enum ip_conntrack_info ctinfo;
 	struct nf_flow_route route;
 	struct flow_offload *flow;
@@ -73,7 +81,7 @@
 	struct nf_conn *ct;
 	int ret;
 
-	if (nft_flow_offload_skip(pkt->skb))
+	if (nft_flow_offload_skip(pkt->skb, nft_pf(pkt)))
 		goto out;
 
 	ct = nf_ct_get(pkt->skb, &ctinfo);
@@ -82,17 +90,22 @@
 
 	switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
 	case IPPROTO_TCP:
+		tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff,
+					  sizeof(_tcph), &_tcph);
+		if (unlikely(!tcph || tcph->fin || tcph->rst))
+			goto out;
+		break;
 	case IPPROTO_UDP:
 		break;
 	default:
 		goto out;
 	}
 
-	if (test_bit(IPS_HELPER_BIT, &ct->status))
+	if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
+	    ct->status & IPS_SEQ_ADJUST)
 		goto out;
 
-	if (ctinfo == IP_CT_NEW ||
-	    ctinfo == IP_CT_RELATED)
+	if (!nf_ct_is_confirmed(ct))
 		goto out;
 
 	if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
@@ -106,10 +119,16 @@
 	if (!flow)
 		goto err_flow_alloc;
 
+	if (tcph) {
+		ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+		ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+	}
+
 	ret = flow_offload_add(flowtable, flow);
 	if (ret < 0)
 		goto err_flow_add;
 
+	dst_release(route.tuple[!dir].dst);
 	return;
 
 err_flow_add:
@@ -131,6 +150,11 @@
 	return nft_chain_validate_hooks(ctx->chain, hook_mask);
 }
 
+static const struct nla_policy nft_flow_offload_policy[NFTA_FLOW_MAX + 1] = {
+	[NFTA_FLOW_TABLE_NAME]	= { .type = NLA_STRING,
+				    .len = NFT_NAME_MAXLEN - 1 },
+};
+
 static int nft_flow_offload_init(const struct nft_ctx *ctx,
 				 const struct nft_expr *expr,
 				 const struct nlattr * const tb[])
@@ -153,6 +177,23 @@
 	return nf_ct_netns_get(ctx->net, ctx->family);
 }
 
+static void nft_flow_offload_deactivate(const struct nft_ctx *ctx,
+					const struct nft_expr *expr,
+					enum nft_trans_phase phase)
+{
+	struct nft_flow_offload *priv = nft_expr_priv(expr);
+
+	nf_tables_deactivate_flowtable(ctx, priv->flowtable, phase);
+}
+
+static void nft_flow_offload_activate(const struct nft_ctx *ctx,
+				      const struct nft_expr *expr)
+{
+	struct nft_flow_offload *priv = nft_expr_priv(expr);
+
+	priv->flowtable->use++;
+}
+
 static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
 				     const struct nft_expr *expr)
 {
@@ -181,6 +222,8 @@
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_flow_offload)),
 	.eval		= nft_flow_offload_eval,
 	.init		= nft_flow_offload_init,
+	.activate	= nft_flow_offload_activate,
+	.deactivate	= nft_flow_offload_deactivate,
 	.destroy	= nft_flow_offload_destroy,
 	.validate	= nft_flow_offload_validate,
 	.dump		= nft_flow_offload_dump,
@@ -189,6 +232,7 @@
 static struct nft_expr_type nft_flow_offload_type __read_mostly = {
 	.name		= "flow_offload",
 	.ops		= &nft_flow_offload_ops,
+	.policy		= nft_flow_offload_policy,
 	.maxattr	= NFTA_FLOW_MAX,
 	.owner		= THIS_MODULE,
 };
@@ -201,7 +245,7 @@
 	if (event != NETDEV_DOWN)
 		return NOTIFY_DONE;
 
-	nf_flow_table_cleanup(dev_net(dev), dev);
+	nf_flow_table_cleanup(dev);
 
 	return NOTIFY_DONE;
 }

diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index 8abb989..aba11c2 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
  */
 
 #include <linux/kernel.h>
@@ -15,6 +12,7 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
 #include <net/netfilter/nf_dup_netdev.h>
 #include <net/neighbour.h>
 #include <net/ip.h>
@@ -53,8 +51,6 @@
 	return nft_validate_register_load(priv->sreg_dev, sizeof(int));
 }
 
-static const struct nft_expr_ops nft_fwd_netdev_ingress_ops;
-
 static int nft_fwd_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	struct nft_fwd_netdev *priv = nft_expr_priv(expr);
@@ -68,6 +64,16 @@
 	return -1;
 }
 
+static int nft_fwd_netdev_offload(struct nft_offload_ctx *ctx,
+				  struct nft_flow_rule *flow,
+				  const struct nft_expr *expr)
+{
+	const struct nft_fwd_netdev *priv = nft_expr_priv(expr);
+	int oif = ctx->regs[priv->sreg_dev].data.data[0];
+
+	return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_REDIRECT, oif);
+}
+
 struct nft_fwd_neigh {
 	enum nft_registers	sreg_dev:8;
 	enum nft_registers	sreg_addr:8;
@@ -169,8 +175,6 @@
 	return nft_validate_register_load(priv->sreg_addr, addr_len);
 }
 
-static const struct nft_expr_ops nft_fwd_netdev_ingress_ops;
-
 static int nft_fwd_neigh_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	struct nft_fwd_neigh *priv = nft_expr_priv(expr);
@@ -201,6 +205,7 @@
 	.eval		= nft_fwd_netdev_eval,
 	.init		= nft_fwd_netdev_init,
 	.dump		= nft_fwd_netdev_dump,
+	.offload	= nft_fwd_netdev_offload,
 };
 
 static const struct nft_expr_ops *

diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index c2d2371..b836d55 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2016 Laura Garcia <nevola@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 
 #include <linux/kernel.h>
@@ -25,7 +21,6 @@
 	u32			modulus;
 	u32			seed;
 	u32			offset;
-	struct nft_set		*map;
 };
 
 static void nft_jhash_eval(const struct nft_expr *expr,
@@ -42,33 +37,10 @@
 	regs->data[priv->dreg] = h + priv->offset;
 }
 
-static void nft_jhash_map_eval(const struct nft_expr *expr,
-			       struct nft_regs *regs,
-			       const struct nft_pktinfo *pkt)
-{
-	struct nft_jhash *priv = nft_expr_priv(expr);
-	const void *data = &regs->data[priv->sreg];
-	const struct nft_set *map = priv->map;
-	const struct nft_set_ext *ext;
-	u32 result;
-	bool found;
-
-	result = reciprocal_scale(jhash(data, priv->len, priv->seed),
-					priv->modulus) + priv->offset;
-
-	found = map->ops->lookup(nft_net(pkt), map, &result, &ext);
-	if (!found)
-		return;
-
-	nft_data_copy(&regs->data[priv->dreg],
-		      nft_set_ext_data(ext), map->dlen);
-}
-
 struct nft_symhash {
 	enum nft_registers      dreg:8;
 	u32			modulus;
 	u32			offset;
-	struct nft_set		*map;
 };
 
 static void nft_symhash_eval(const struct nft_expr *expr,
@@ -84,28 +56,6 @@
 	regs->data[priv->dreg] = h + priv->offset;
 }
 
-static void nft_symhash_map_eval(const struct nft_expr *expr,
-				 struct nft_regs *regs,
-				 const struct nft_pktinfo *pkt)
-{
-	struct nft_symhash *priv = nft_expr_priv(expr);
-	struct sk_buff *skb = pkt->skb;
-	const struct nft_set *map = priv->map;
-	const struct nft_set_ext *ext;
-	u32 result;
-	bool found;
-
-	result = reciprocal_scale(__skb_get_hash_symmetric(skb),
-				  priv->modulus) + priv->offset;
-
-	found = map->ops->lookup(nft_net(pkt), map, &result, &ext);
-	if (!found)
-		return;
-
-	nft_data_copy(&regs->data[priv->dreg],
-		      nft_set_ext_data(ext), map->dlen);
-}
-
 static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
 	[NFTA_HASH_SREG]	= { .type = NLA_U32 },
 	[NFTA_HASH_DREG]	= { .type = NLA_U32 },
@@ -114,9 +64,6 @@
 	[NFTA_HASH_SEED]	= { .type = NLA_U32 },
 	[NFTA_HASH_OFFSET]	= { .type = NLA_U32 },
 	[NFTA_HASH_TYPE]	= { .type = NLA_U32 },
-	[NFTA_HASH_SET_NAME]	= { .type = NLA_STRING,
-				    .len = NFT_SET_MAXNAMELEN - 1 },
-	[NFTA_HASH_SET_ID]	= { .type = NLA_U32 },
 };
 
 static int nft_jhash_init(const struct nft_ctx *ctx,
@@ -166,20 +113,6 @@
 					   NFT_DATA_VALUE, sizeof(u32));
 }
 
-static int nft_jhash_map_init(const struct nft_ctx *ctx,
-			      const struct nft_expr *expr,
-			      const struct nlattr * const tb[])
-{
-	struct nft_jhash *priv = nft_expr_priv(expr);
-	u8 genmask = nft_genmask_next(ctx->net);
-
-	nft_jhash_init(ctx, expr, tb);
-	priv->map = nft_set_lookup_global(ctx->net, ctx->table,
-					  tb[NFTA_HASH_SET_NAME],
-					  tb[NFTA_HASH_SET_ID], genmask);
-	return PTR_ERR_OR_ZERO(priv->map);
-}
-
 static int nft_symhash_init(const struct nft_ctx *ctx,
 			    const struct nft_expr *expr,
 			    const struct nlattr * const tb[])
@@ -196,7 +129,7 @@
 	priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
 
 	priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
-	if (priv->modulus <= 1)
+	if (priv->modulus < 1)
 		return -ERANGE;
 
 	if (priv->offset + priv->modulus - 1 < priv->offset)
@@ -206,20 +139,6 @@
 					   NFT_DATA_VALUE, sizeof(u32));
 }
 
-static int nft_symhash_map_init(const struct nft_ctx *ctx,
-				const struct nft_expr *expr,
-				const struct nlattr * const tb[])
-{
-	struct nft_jhash *priv = nft_expr_priv(expr);
-	u8 genmask = nft_genmask_next(ctx->net);
-
-	nft_symhash_init(ctx, expr, tb);
-	priv->map = nft_set_lookup_global(ctx->net, ctx->table,
-					  tb[NFTA_HASH_SET_NAME],
-					  tb[NFTA_HASH_SET_ID], genmask);
-	return PTR_ERR_OR_ZERO(priv->map);
-}
-
 static int nft_jhash_dump(struct sk_buff *skb,
 			  const struct nft_expr *expr)
 {
@@ -247,18 +166,6 @@
 	return -1;
 }
 
-static int nft_jhash_map_dump(struct sk_buff *skb,
-			       const struct nft_expr *expr)
-{
-	const struct nft_jhash *priv = nft_expr_priv(expr);
-
-	if (nft_jhash_dump(skb, expr) ||
-	    nla_put_string(skb, NFTA_HASH_SET_NAME, priv->map->name))
-		return -1;
-
-	return 0;
-}
-
 static int nft_symhash_dump(struct sk_buff *skb,
 			    const struct nft_expr *expr)
 {
@@ -279,18 +186,6 @@
 	return -1;
 }
 
-static int nft_symhash_map_dump(struct sk_buff *skb,
-				const struct nft_expr *expr)
-{
-	const struct nft_symhash *priv = nft_expr_priv(expr);
-
-	if (nft_symhash_dump(skb, expr) ||
-	    nla_put_string(skb, NFTA_HASH_SET_NAME, priv->map->name))
-		return -1;
-
-	return 0;
-}
-
 static struct nft_expr_type nft_hash_type;
 static const struct nft_expr_ops nft_jhash_ops = {
 	.type		= &nft_hash_type,
@@ -300,14 +195,6 @@
 	.dump		= nft_jhash_dump,
 };
 
-static const struct nft_expr_ops nft_jhash_map_ops = {
-	.type		= &nft_hash_type,
-	.size		= NFT_EXPR_SIZE(sizeof(struct nft_jhash)),
-	.eval		= nft_jhash_map_eval,
-	.init		= nft_jhash_map_init,
-	.dump		= nft_jhash_map_dump,
-};
-
 static const struct nft_expr_ops nft_symhash_ops = {
 	.type		= &nft_hash_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_symhash)),
@@ -316,14 +203,6 @@
 	.dump		= nft_symhash_dump,
 };
 
-static const struct nft_expr_ops nft_symhash_map_ops = {
-	.type		= &nft_hash_type,
-	.size		= NFT_EXPR_SIZE(sizeof(struct nft_symhash)),
-	.eval		= nft_symhash_map_eval,
-	.init		= nft_symhash_map_init,
-	.dump		= nft_symhash_map_dump,
-};
-
 static const struct nft_expr_ops *
 nft_hash_select_ops(const struct nft_ctx *ctx,
 		    const struct nlattr * const tb[])
@@ -336,12 +215,8 @@
 	type = ntohl(nla_get_be32(tb[NFTA_HASH_TYPE]));
 	switch (type) {
 	case NFT_HASH_SYM:
-		if (tb[NFTA_HASH_SET_NAME])
-			return &nft_symhash_map_ops;
 		return &nft_symhash_ops;
 	case NFT_HASH_JENKINS:
-		if (tb[NFTA_HASH_SET_NAME])
-			return &nft_jhash_map_ops;
 		return &nft_jhash_ops;
 	default:
 		break;

diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 0777a93..c7f0ef7 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 
@@ -16,10 +13,11 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables_core.h>
 #include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
 
-static void nft_immediate_eval(const struct nft_expr *expr,
-			       struct nft_regs *regs,
-			       const struct nft_pktinfo *pkt)
+void nft_immediate_eval(const struct nft_expr *expr,
+			struct nft_regs *regs,
+			const struct nft_pktinfo *pkt)
 {
 	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
 
@@ -72,10 +70,14 @@
 }
 
 static void nft_immediate_deactivate(const struct nft_ctx *ctx,
-				     const struct nft_expr *expr)
+				     const struct nft_expr *expr,
+				     enum nft_trans_phase phase)
 {
 	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
 
+	if (phase == NFT_TRANS_COMMIT)
+		return;
+
 	return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg));
 }
 
@@ -123,6 +125,44 @@
 	return 0;
 }
 
+static int nft_immediate_offload_verdict(struct nft_offload_ctx *ctx,
+					 struct nft_flow_rule *flow,
+					 const struct nft_immediate_expr *priv)
+{
+	struct flow_action_entry *entry;
+	const struct nft_data *data;
+
+	entry = &flow->rule->action.entries[ctx->num_actions++];
+
+	data = &priv->data;
+	switch (data->verdict.code) {
+	case NF_ACCEPT:
+		entry->id = FLOW_ACTION_ACCEPT;
+		break;
+	case NF_DROP:
+		entry->id = FLOW_ACTION_DROP;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int nft_immediate_offload(struct nft_offload_ctx *ctx,
+				 struct nft_flow_rule *flow,
+				 const struct nft_expr *expr)
+{
+	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+	if (priv->dreg == NFT_REG_VERDICT)
+		return nft_immediate_offload_verdict(ctx, flow, priv);
+
+	memcpy(&ctx->regs[priv->dreg].data, &priv->data, sizeof(priv->data));
+
+	return 0;
+}
+
 static const struct nft_expr_ops nft_imm_ops = {
 	.type		= &nft_imm_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
@@ -132,6 +172,8 @@
 	.deactivate	= nft_immediate_deactivate,
 	.dump		= nft_immediate_dump,
 	.validate	= nft_immediate_validate,
+	.offload	= nft_immediate_offload,
+	.offload_flags	= NFT_OFFLOAD_F_ACTION,
 };
 
 struct nft_expr_type nft_imm_type __read_mostly = {

diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 72f13a1..35b67d7 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 

diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 655187b..fe4831f 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
  * Copyright (c) 2012-2014 Pablo Neira Ayuso <pablo@netfilter.org>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 

diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index ad13e86..660bad6 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2009 Patrick McHardy <kaber@trash.net>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 
@@ -76,9 +73,6 @@
 	if (IS_ERR(set))
 		return PTR_ERR(set);
 
-	if (set->flags & NFT_SET_EVAL)
-		return -EOPNOTSUPP;
-
 	priv->sreg = nft_parse_register(tb[NFTA_LOOKUP_SREG]);
 	err = nft_validate_register_load(priv->sreg, set->klen);
 	if (err < 0)
@@ -121,12 +115,29 @@
 	return 0;
 }
 
+static void nft_lookup_deactivate(const struct nft_ctx *ctx,
+				  const struct nft_expr *expr,
+				  enum nft_trans_phase phase)
+{
+	struct nft_lookup *priv = nft_expr_priv(expr);
+
+	nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase);
+}
+
+static void nft_lookup_activate(const struct nft_ctx *ctx,
+				const struct nft_expr *expr)
+{
+	struct nft_lookup *priv = nft_expr_priv(expr);
+
+	priv->set->use++;
+}
+
 static void nft_lookup_destroy(const struct nft_ctx *ctx,
 			       const struct nft_expr *expr)
 {
 	struct nft_lookup *priv = nft_expr_priv(expr);
 
-	nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+	nf_tables_destroy_set(ctx, priv->set);
 }
 
 static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -209,6 +220,8 @@
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
 	.eval		= nft_lookup_eval,
 	.init		= nft_lookup_init,
+	.activate	= nft_lookup_activate,
+	.deactivate	= nft_lookup_deactivate,
 	.destroy	= nft_lookup_destroy,
 	.dump		= nft_lookup_dump,
 	.validate	= nft_lookup_validate,

diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 9d8655b..39dc94f 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/kernel.h>
@@ -14,18 +11,23 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nft_masq.h>
+#include <net/netfilter/nf_nat_masquerade.h>
 
-const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
+struct nft_masq {
+	u32			flags;
+	enum nft_registers      sreg_proto_min:8;
+	enum nft_registers      sreg_proto_max:8;
+};
+
+static const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
 	[NFTA_MASQ_FLAGS]		= { .type = NLA_U32 },
 	[NFTA_MASQ_REG_PROTO_MIN]	= { .type = NLA_U32 },
 	[NFTA_MASQ_REG_PROTO_MAX]	= { .type = NLA_U32 },
 };
-EXPORT_SYMBOL_GPL(nft_masq_policy);
 
-int nft_masq_validate(const struct nft_ctx *ctx,
-		      const struct nft_expr *expr,
-		      const struct nft_data **data)
+static int nft_masq_validate(const struct nft_ctx *ctx,
+			     const struct nft_expr *expr,
+			     const struct nft_data **data)
 {
 	int err;
 
@@ -36,11 +38,10 @@
 	return nft_chain_validate_hooks(ctx->chain,
 				        (1 << NF_INET_POST_ROUTING));
 }
-EXPORT_SYMBOL_GPL(nft_masq_validate);
 
-int nft_masq_init(const struct nft_ctx *ctx,
-		  const struct nft_expr *expr,
-		  const struct nlattr * const tb[])
+static int nft_masq_init(const struct nft_ctx *ctx,
+			 const struct nft_expr *expr,
+			 const struct nlattr * const tb[])
 {
 	u32 plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
 	struct nft_masq *priv = nft_expr_priv(expr);
@@ -75,9 +76,8 @@
 
 	return nf_ct_netns_get(ctx->net, ctx->family);
 }
-EXPORT_SYMBOL_GPL(nft_masq_init);
 
-int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	const struct nft_masq *priv = nft_expr_priv(expr);
 
@@ -98,7 +98,210 @@
 nla_put_failure:
 	return -1;
 }
-EXPORT_SYMBOL_GPL(nft_masq_dump);
+
+static void nft_masq_ipv4_eval(const struct nft_expr *expr,
+			       struct nft_regs *regs,
+			       const struct nft_pktinfo *pkt)
+{
+	struct nft_masq *priv = nft_expr_priv(expr);
+	struct nf_nat_range2 range;
+
+	memset(&range, 0, sizeof(range));
+	range.flags = priv->flags;
+	if (priv->sreg_proto_min) {
+		range.min_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_min]);
+		range.max_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_max]);
+	}
+	regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt),
+						    &range, nft_out(pkt));
+}
+
+static void
+nft_masq_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+	nf_ct_netns_put(ctx->net, NFPROTO_IPV4);
+}
+
+static struct nft_expr_type nft_masq_ipv4_type;
+static const struct nft_expr_ops nft_masq_ipv4_ops = {
+	.type		= &nft_masq_ipv4_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_masq)),
+	.eval		= nft_masq_ipv4_eval,
+	.init		= nft_masq_init,
+	.destroy	= nft_masq_ipv4_destroy,
+	.dump		= nft_masq_dump,
+	.validate	= nft_masq_validate,
+};
+
+static struct nft_expr_type nft_masq_ipv4_type __read_mostly = {
+	.family		= NFPROTO_IPV4,
+	.name		= "masq",
+	.ops		= &nft_masq_ipv4_ops,
+	.policy		= nft_masq_policy,
+	.maxattr	= NFTA_MASQ_MAX,
+	.owner		= THIS_MODULE,
+};
+
+#ifdef CONFIG_NF_TABLES_IPV6
+static void nft_masq_ipv6_eval(const struct nft_expr *expr,
+			       struct nft_regs *regs,
+			       const struct nft_pktinfo *pkt)
+{
+	struct nft_masq *priv = nft_expr_priv(expr);
+	struct nf_nat_range2 range;
+
+	memset(&range, 0, sizeof(range));
+	range.flags = priv->flags;
+	if (priv->sreg_proto_min) {
+		range.min_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_min]);
+		range.max_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_max]);
+	}
+	regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range,
+						    nft_out(pkt));
+}
+
+static void
+nft_masq_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+	nf_ct_netns_put(ctx->net, NFPROTO_IPV6);
+}
+
+static struct nft_expr_type nft_masq_ipv6_type;
+static const struct nft_expr_ops nft_masq_ipv6_ops = {
+	.type		= &nft_masq_ipv6_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_masq)),
+	.eval		= nft_masq_ipv6_eval,
+	.init		= nft_masq_init,
+	.destroy	= nft_masq_ipv6_destroy,
+	.dump		= nft_masq_dump,
+	.validate	= nft_masq_validate,
+};
+
+static struct nft_expr_type nft_masq_ipv6_type __read_mostly = {
+	.family		= NFPROTO_IPV6,
+	.name		= "masq",
+	.ops		= &nft_masq_ipv6_ops,
+	.policy		= nft_masq_policy,
+	.maxattr	= NFTA_MASQ_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_masq_module_init_ipv6(void)
+{
+	return nft_register_expr(&nft_masq_ipv6_type);
+}
+
+static void nft_masq_module_exit_ipv6(void)
+{
+	nft_unregister_expr(&nft_masq_ipv6_type);
+}
+#else
+static inline int nft_masq_module_init_ipv6(void) { return 0; }
+static inline void nft_masq_module_exit_ipv6(void) {}
+#endif
+
+#ifdef CONFIG_NF_TABLES_INET
+static void nft_masq_inet_eval(const struct nft_expr *expr,
+			       struct nft_regs *regs,
+			       const struct nft_pktinfo *pkt)
+{
+	switch (nft_pf(pkt)) {
+	case NFPROTO_IPV4:
+		return nft_masq_ipv4_eval(expr, regs, pkt);
+	case NFPROTO_IPV6:
+		return nft_masq_ipv6_eval(expr, regs, pkt);
+	}
+
+	WARN_ON_ONCE(1);
+}
+
+static void
+nft_masq_inet_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+	nf_ct_netns_put(ctx->net, NFPROTO_INET);
+}
+
+static struct nft_expr_type nft_masq_inet_type;
+static const struct nft_expr_ops nft_masq_inet_ops = {
+	.type		= &nft_masq_inet_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_masq)),
+	.eval		= nft_masq_inet_eval,
+	.init		= nft_masq_init,
+	.destroy	= nft_masq_inet_destroy,
+	.dump		= nft_masq_dump,
+	.validate	= nft_masq_validate,
+};
+
+static struct nft_expr_type nft_masq_inet_type __read_mostly = {
+	.family		= NFPROTO_INET,
+	.name		= "masq",
+	.ops		= &nft_masq_inet_ops,
+	.policy		= nft_masq_policy,
+	.maxattr	= NFTA_MASQ_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_masq_module_init_inet(void)
+{
+	return nft_register_expr(&nft_masq_inet_type);
+}
+
+static void nft_masq_module_exit_inet(void)
+{
+	nft_unregister_expr(&nft_masq_inet_type);
+}
+#else
+static inline int nft_masq_module_init_inet(void) { return 0; }
+static inline void nft_masq_module_exit_inet(void) {}
+#endif
+
+static int __init nft_masq_module_init(void)
+{
+	int ret;
+
+	ret = nft_masq_module_init_ipv6();
+	if (ret < 0)
+		return ret;
+
+	ret = nft_masq_module_init_inet();
+	if (ret < 0) {
+		nft_masq_module_exit_ipv6();
+		return ret;
+	}
+
+	ret = nft_register_expr(&nft_masq_ipv4_type);
+	if (ret < 0) {
+		nft_masq_module_exit_inet();
+		nft_masq_module_exit_ipv6();
+		return ret;
+	}
+
+	ret = nf_nat_masquerade_inet_register_notifiers();
+	if (ret < 0) {
+		nft_masq_module_exit_ipv6();
+		nft_masq_module_exit_inet();
+		nft_unregister_expr(&nft_masq_ipv4_type);
+		return ret;
+	}
+
+	return ret;
+}
+
+static void __exit nft_masq_module_exit(void)
+{
+	nft_masq_module_exit_ipv6();
+	nft_masq_module_exit_inet();
+	nft_unregister_expr(&nft_masq_ipv4_type);
+	nf_nat_masquerade_inet_unregister_notifiers();
+}
+
+module_init(nft_masq_module_init);
+module_exit(nft_masq_module_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
+MODULE_ALIAS_NFT_EXPR("masq");

diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 297fe7d..317e3a9 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
  * Copyright (c) 2014 Intel Corporation
  * Author: Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 
@@ -24,22 +21,40 @@
 #include <net/tcp_states.h> /* for TCP_TIME_WAIT */
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nft_meta.h>
+#include <net/netfilter/nf_tables_offload.h>
 
 #include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */
 
-struct nft_meta {
-	enum nft_meta_keys	key:8;
-	union {
-		enum nft_registers	dreg:8;
-		enum nft_registers	sreg:8;
-	};
-};
+#define NFT_META_SECS_PER_MINUTE	60
+#define NFT_META_SECS_PER_HOUR		3600
+#define NFT_META_SECS_PER_DAY		86400
+#define NFT_META_DAYS_PER_WEEK		7
 
 static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state);
 
-#ifdef CONFIG_NF_TABLES_BRIDGE
-#include "../bridge/br_private.h"
-#endif
+static u8 nft_meta_weekday(unsigned long secs)
+{
+	unsigned int dse;
+	u8 wday;
+
+	secs -= NFT_META_SECS_PER_MINUTE * sys_tz.tz_minuteswest;
+	dse = secs / NFT_META_SECS_PER_DAY;
+	wday = (4 + dse) % NFT_META_DAYS_PER_WEEK;
+
+	return wday;
+}
+
+static u32 nft_meta_hour(unsigned long secs)
+{
+	struct tm tm;
+
+	time64_to_tm(secs, 0, &tm);
+
+	return tm.tm_hour * NFT_META_SECS_PER_HOUR
+		+ tm.tm_min * NFT_META_SECS_PER_MINUTE
+		+ tm.tm_sec;
+}
 
 void nft_meta_get_eval(const struct nft_expr *expr,
 		       struct nft_regs *regs,
@@ -50,9 +65,6 @@
 	const struct net_device *in = nft_in(pkt), *out = nft_out(pkt);
 	struct sock *sk;
 	u32 *dest = &regs->data[priv->dreg];
-#ifdef CONFIG_NF_TABLES_BRIDGE
-	const struct net_bridge_port *p;
-#endif
 
 	switch (priv->key) {
 	case NFT_META_LEN:
@@ -76,24 +88,16 @@
 		*dest = skb->mark;
 		break;
 	case NFT_META_IIF:
-		if (in == NULL)
-			goto err;
-		*dest = in->ifindex;
+		*dest = in ? in->ifindex : 0;
 		break;
 	case NFT_META_OIF:
-		if (out == NULL)
-			goto err;
-		*dest = out->ifindex;
+		*dest = out ? out->ifindex : 0;
 		break;
 	case NFT_META_IIFNAME:
-		if (in == NULL)
-			goto err;
-		strncpy((char *)dest, in->name, IFNAMSIZ);
+		strncpy((char *)dest, in ? in->name : "", IFNAMSIZ);
 		break;
 	case NFT_META_OIFNAME:
-		if (out == NULL)
-			goto err;
-		strncpy((char *)dest, out->name, IFNAMSIZ);
+		strncpy((char *)dest, out ? out->name : "", IFNAMSIZ);
 		break;
 	case NFT_META_IIFTYPE:
 		if (in == NULL)
@@ -229,21 +233,28 @@
 	}
 #ifdef CONFIG_XFRM
 	case NFT_META_SECPATH:
-		nft_reg_store8(dest, !!skb->sp);
+		nft_reg_store8(dest, secpath_exists(skb));
 		break;
 #endif
-#ifdef CONFIG_NF_TABLES_BRIDGE
-	case NFT_META_BRI_IIFNAME:
-		if (in == NULL || (p = br_port_get_rcu(in)) == NULL)
+	case NFT_META_IIFKIND:
+		if (in == NULL || in->rtnl_link_ops == NULL)
 			goto err;
-		strncpy((char *)dest, p->br->dev->name, IFNAMSIZ);
-		return;
-	case NFT_META_BRI_OIFNAME:
-		if (out == NULL || (p = br_port_get_rcu(out)) == NULL)
+		strncpy((char *)dest, in->rtnl_link_ops->kind, IFNAMSIZ);
+		break;
+	case NFT_META_OIFKIND:
+		if (out == NULL || out->rtnl_link_ops == NULL)
 			goto err;
-		strncpy((char *)dest, p->br->dev->name, IFNAMSIZ);
-		return;
-#endif
+		strncpy((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ);
+		break;
+	case NFT_META_TIME_NS:
+		nft_reg_store64(dest, ktime_get_real_ns());
+		break;
+	case NFT_META_TIME_DAY:
+		nft_reg_store8(dest, nft_meta_weekday(get_seconds()));
+		break;
+	case NFT_META_TIME_HOUR:
+		*dest = nft_meta_hour(get_seconds());
+		break;
 	default:
 		WARN_ON(1);
 		goto err;
@@ -253,10 +264,11 @@
 err:
 	regs->verdict.code = NFT_BREAK;
 }
+EXPORT_SYMBOL_GPL(nft_meta_get_eval);
 
-static void nft_meta_set_eval(const struct nft_expr *expr,
-			      struct nft_regs *regs,
-			       const struct nft_pktinfo *pkt)
+void nft_meta_set_eval(const struct nft_expr *expr,
+		       struct nft_regs *regs,
+		       const struct nft_pktinfo *pkt)
 {
 	const struct nft_meta *meta = nft_expr_priv(expr);
 	struct sk_buff *skb = pkt->skb;
@@ -284,20 +296,27 @@
 
 		skb->nf_trace = !!value8;
 		break;
+#ifdef CONFIG_NETWORK_SECMARK
+	case NFT_META_SECMARK:
+		skb->secmark = value;
+		break;
+#endif
 	default:
 		WARN_ON(1);
 	}
 }
+EXPORT_SYMBOL_GPL(nft_meta_set_eval);
 
-static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
+const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
 	[NFTA_META_DREG]	= { .type = NLA_U32 },
 	[NFTA_META_KEY]		= { .type = NLA_U32 },
 	[NFTA_META_SREG]	= { .type = NLA_U32 },
 };
+EXPORT_SYMBOL_GPL(nft_meta_policy);
 
-static int nft_meta_get_init(const struct nft_ctx *ctx,
-			     const struct nft_expr *expr,
-			     const struct nlattr * const tb[])
+int nft_meta_get_init(const struct nft_ctx *ctx,
+		      const struct nft_expr *expr,
+		      const struct nlattr * const tb[])
 {
 	struct nft_meta *priv = nft_expr_priv(expr);
 	unsigned int len;
@@ -335,6 +354,8 @@
 		break;
 	case NFT_META_IIFNAME:
 	case NFT_META_OIFNAME:
+	case NFT_META_IIFKIND:
+	case NFT_META_OIFKIND:
 		len = IFNAMSIZ;
 		break;
 	case NFT_META_PRANDOM:
@@ -346,14 +367,15 @@
 		len = sizeof(u8);
 		break;
 #endif
-#ifdef CONFIG_NF_TABLES_BRIDGE
-	case NFT_META_BRI_IIFNAME:
-	case NFT_META_BRI_OIFNAME:
-		if (ctx->family != NFPROTO_BRIDGE)
-			return -EOPNOTSUPP;
-		len = IFNAMSIZ;
+	case NFT_META_TIME_NS:
+		len = sizeof(u64);
 		break;
-#endif
+	case NFT_META_TIME_DAY:
+		len = sizeof(u8);
+		break;
+	case NFT_META_TIME_HOUR:
+		len = sizeof(u32);
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -362,6 +384,7 @@
 	return nft_validate_register_store(ctx, priv->dreg, NULL,
 					   NFT_DATA_VALUE, len);
 }
+EXPORT_SYMBOL_GPL(nft_meta_get_init);
 
 static int nft_meta_get_validate(const struct nft_ctx *ctx,
 				 const struct nft_expr *expr,
@@ -395,9 +418,9 @@
 #endif
 }
 
-static int nft_meta_set_validate(const struct nft_ctx *ctx,
-				 const struct nft_expr *expr,
-				 const struct nft_data **data)
+int nft_meta_set_validate(const struct nft_ctx *ctx,
+			  const struct nft_expr *expr,
+			  const struct nft_data **data)
 {
 	struct nft_meta *priv = nft_expr_priv(expr);
 	unsigned int hooks;
@@ -423,10 +446,11 @@
 
 	return nft_chain_validate_hooks(ctx->chain, hooks);
 }
+EXPORT_SYMBOL_GPL(nft_meta_set_validate);
 
-static int nft_meta_set_init(const struct nft_ctx *ctx,
-			     const struct nft_expr *expr,
-			     const struct nlattr * const tb[])
+int nft_meta_set_init(const struct nft_ctx *ctx,
+		      const struct nft_expr *expr,
+		      const struct nlattr * const tb[])
 {
 	struct nft_meta *priv = nft_expr_priv(expr);
 	unsigned int len;
@@ -436,6 +460,9 @@
 	switch (priv->key) {
 	case NFT_META_MARK:
 	case NFT_META_PRIORITY:
+#ifdef CONFIG_NETWORK_SECMARK
+	case NFT_META_SECMARK:
+#endif
 		len = sizeof(u32);
 		break;
 	case NFT_META_NFTRACE:
@@ -458,9 +485,10 @@
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(nft_meta_set_init);
 
-static int nft_meta_get_dump(struct sk_buff *skb,
-			     const struct nft_expr *expr)
+int nft_meta_get_dump(struct sk_buff *skb,
+		      const struct nft_expr *expr)
 {
 	const struct nft_meta *priv = nft_expr_priv(expr);
 
@@ -473,8 +501,9 @@
 nla_put_failure:
 	return -1;
 }
+EXPORT_SYMBOL_GPL(nft_meta_get_dump);
 
-static int nft_meta_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
+int nft_meta_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	const struct nft_meta *priv = nft_expr_priv(expr);
 
@@ -488,15 +517,42 @@
 nla_put_failure:
 	return -1;
 }
+EXPORT_SYMBOL_GPL(nft_meta_set_dump);
 
-static void nft_meta_set_destroy(const struct nft_ctx *ctx,
-				 const struct nft_expr *expr)
+void nft_meta_set_destroy(const struct nft_ctx *ctx,
+			  const struct nft_expr *expr)
 {
 	const struct nft_meta *priv = nft_expr_priv(expr);
 
 	if (priv->key == NFT_META_NFTRACE)
 		static_branch_dec(&nft_trace_enabled);
 }
+EXPORT_SYMBOL_GPL(nft_meta_set_destroy);
+
+static int nft_meta_get_offload(struct nft_offload_ctx *ctx,
+				struct nft_flow_rule *flow,
+				const struct nft_expr *expr)
+{
+	const struct nft_meta *priv = nft_expr_priv(expr);
+	struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
+
+	switch (priv->key) {
+	case NFT_META_PROTOCOL:
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, n_proto,
+				  sizeof(__u16), reg);
+		nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK);
+		break;
+	case NFT_META_L4PROTO:
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto,
+				  sizeof(__u8), reg);
+		nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
 
 static const struct nft_expr_ops nft_meta_get_ops = {
 	.type		= &nft_meta_type,
@@ -505,6 +561,7 @@
 	.init		= nft_meta_get_init,
 	.dump		= nft_meta_get_dump,
 	.validate	= nft_meta_get_validate,
+	.offload	= nft_meta_get_offload,
 };
 
 static const struct nft_expr_ops nft_meta_set_ops = {
@@ -527,6 +584,10 @@
 	if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG])
 		return ERR_PTR(-EINVAL);
 
+#if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE) && IS_MODULE(CONFIG_NFT_BRIDGE_META)
+	if (ctx->family == NFPROTO_BRIDGE)
+		return ERR_PTR(-EAGAIN);
+#endif
 	if (tb[NFTA_META_DREG])
 		return &nft_meta_get_ops;
 
@@ -543,3 +604,111 @@
 	.maxattr	= NFTA_META_MAX,
 	.owner		= THIS_MODULE,
 };
+
+#ifdef CONFIG_NETWORK_SECMARK
+struct nft_secmark {
+	u32 secid;
+	char *ctx;
+};
+
+static const struct nla_policy nft_secmark_policy[NFTA_SECMARK_MAX + 1] = {
+	[NFTA_SECMARK_CTX]     = { .type = NLA_STRING, .len = NFT_SECMARK_CTX_MAXLEN },
+};
+
+static int nft_secmark_compute_secid(struct nft_secmark *priv)
+{
+	u32 tmp_secid = 0;
+	int err;
+
+	err = security_secctx_to_secid(priv->ctx, strlen(priv->ctx), &tmp_secid);
+	if (err)
+		return err;
+
+	if (!tmp_secid)
+		return -ENOENT;
+
+	err = security_secmark_relabel_packet(tmp_secid);
+	if (err)
+		return err;
+
+	priv->secid = tmp_secid;
+	return 0;
+}
+
+static void nft_secmark_obj_eval(struct nft_object *obj, struct nft_regs *regs,
+				 const struct nft_pktinfo *pkt)
+{
+	const struct nft_secmark *priv = nft_obj_data(obj);
+	struct sk_buff *skb = pkt->skb;
+
+	skb->secmark = priv->secid;
+}
+
+static int nft_secmark_obj_init(const struct nft_ctx *ctx,
+				const struct nlattr * const tb[],
+				struct nft_object *obj)
+{
+	struct nft_secmark *priv = nft_obj_data(obj);
+	int err;
+
+	if (tb[NFTA_SECMARK_CTX] == NULL)
+		return -EINVAL;
+
+	priv->ctx = nla_strdup(tb[NFTA_SECMARK_CTX], GFP_KERNEL);
+	if (!priv->ctx)
+		return -ENOMEM;
+
+	err = nft_secmark_compute_secid(priv);
+	if (err) {
+		kfree(priv->ctx);
+		return err;
+	}
+
+	security_secmark_refcount_inc();
+
+	return 0;
+}
+
+static int nft_secmark_obj_dump(struct sk_buff *skb, struct nft_object *obj,
+				bool reset)
+{
+	struct nft_secmark *priv = nft_obj_data(obj);
+	int err;
+
+	if (nla_put_string(skb, NFTA_SECMARK_CTX, priv->ctx))
+		return -1;
+
+	if (reset) {
+		err = nft_secmark_compute_secid(priv);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static void nft_secmark_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj)
+{
+	struct nft_secmark *priv = nft_obj_data(obj);
+
+	security_secmark_refcount_dec();
+
+	kfree(priv->ctx);
+}
+
+static const struct nft_object_ops nft_secmark_obj_ops = {
+	.type		= &nft_secmark_obj_type,
+	.size		= sizeof(struct nft_secmark),
+	.init		= nft_secmark_obj_init,
+	.eval		= nft_secmark_obj_eval,
+	.dump		= nft_secmark_obj_dump,
+	.destroy	= nft_secmark_obj_destroy,
+};
+struct nft_object_type nft_secmark_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_SECMARK,
+	.ops		= &nft_secmark_obj_ops,
+	.maxattr	= NFTA_SECMARK_MAX,
+	.policy		= nft_secmark_policy,
+	.owner		= THIS_MODULE,
+};
+#endif /* CONFIG_NETWORK_SECMARK */

diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index c15807d..c3c93e9 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
  * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
  * Copyright (c) 2012 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
  */
 
 #include <linux/module.h>
@@ -21,9 +17,7 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_nat_l3proto.h>
 #include <net/ip.h>
 
 struct nft_nat {
@@ -142,7 +136,7 @@
 		return -EINVAL;
 
 	family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY]));
-	if (family != ctx->family)
+	if (ctx->family != NFPROTO_INET && ctx->family != family)
 		return -EOPNOTSUPP;
 
 	switch (family) {
@@ -280,13 +274,67 @@
 	.owner          = THIS_MODULE,
 };
 
+#ifdef CONFIG_NF_TABLES_INET
+static void nft_nat_inet_eval(const struct nft_expr *expr,
+			      struct nft_regs *regs,
+			      const struct nft_pktinfo *pkt)
+{
+	const struct nft_nat *priv = nft_expr_priv(expr);
+
+	if (priv->family == nft_pf(pkt))
+		nft_nat_eval(expr, regs, pkt);
+}
+
+static const struct nft_expr_ops nft_nat_inet_ops = {
+	.type           = &nft_nat_type,
+	.size           = NFT_EXPR_SIZE(sizeof(struct nft_nat)),
+	.eval           = nft_nat_inet_eval,
+	.init           = nft_nat_init,
+	.destroy        = nft_nat_destroy,
+	.dump           = nft_nat_dump,
+	.validate	= nft_nat_validate,
+};
+
+static struct nft_expr_type nft_inet_nat_type __read_mostly = {
+	.name           = "nat",
+	.family		= NFPROTO_INET,
+	.ops            = &nft_nat_inet_ops,
+	.policy         = nft_nat_policy,
+	.maxattr        = NFTA_NAT_MAX,
+	.owner          = THIS_MODULE,
+};
+
+static int nft_nat_inet_module_init(void)
+{
+	return nft_register_expr(&nft_inet_nat_type);
+}
+
+static void nft_nat_inet_module_exit(void)
+{
+	nft_unregister_expr(&nft_inet_nat_type);
+}
+#else
+static int nft_nat_inet_module_init(void) { return 0; }
+static void nft_nat_inet_module_exit(void) { }
+#endif
+
 static int __init nft_nat_module_init(void)
 {
-	return nft_register_expr(&nft_nat_type);
+	int ret = nft_nat_inet_module_init();
+
+	if (ret)
+		return ret;
+
+	ret = nft_register_expr(&nft_nat_type);
+	if (ret)
+		nft_nat_inet_module_exit();
+
+	return ret;
 }
 
 static void __exit nft_nat_module_exit(void)
 {
+	nft_nat_inet_module_exit();
 	nft_unregister_expr(&nft_nat_type);
 }
 

diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index 3cc1b3d..48edb9d 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2016 Laura Garcia <nevola@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 
 #include <linux/kernel.h>

diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index cdf348f..bfd18d2 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2012-2016 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 #include <linux/init.h>
@@ -38,7 +35,8 @@
 		return -EINVAL;
 
 	objtype = ntohl(nla_get_be32(tb[NFTA_OBJREF_IMM_TYPE]));
-	obj = nft_obj_lookup(ctx->table, tb[NFTA_OBJREF_IMM_NAME], objtype,
+	obj = nft_obj_lookup(ctx->net, ctx->table,
+			     tb[NFTA_OBJREF_IMM_NAME], objtype,
 			     genmask);
 	if (IS_ERR(obj))
 		return -ENOENT;
@@ -53,7 +51,7 @@
 {
 	const struct nft_object *obj = nft_objref_priv(expr);
 
-	if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->name) ||
+	if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->key.name) ||
 	    nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE,
 			 htonl(obj->ops->type->type)))
 		goto nla_put_failure;
@@ -64,21 +62,34 @@
 	return -1;
 }
 
-static void nft_objref_destroy(const struct nft_ctx *ctx,
-			       const struct nft_expr *expr)
+static void nft_objref_deactivate(const struct nft_ctx *ctx,
+				  const struct nft_expr *expr,
+				  enum nft_trans_phase phase)
 {
 	struct nft_object *obj = nft_objref_priv(expr);
 
+	if (phase == NFT_TRANS_COMMIT)
+		return;
+
 	obj->use--;
 }
 
+static void nft_objref_activate(const struct nft_ctx *ctx,
+				const struct nft_expr *expr)
+{
+	struct nft_object *obj = nft_objref_priv(expr);
+
+	obj->use++;
+}
+
 static struct nft_expr_type nft_objref_type;
 static const struct nft_expr_ops nft_objref_ops = {
 	.type		= &nft_objref_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_object *)),
 	.eval		= nft_objref_eval,
 	.init		= nft_objref_init,
-	.destroy	= nft_objref_destroy,
+	.activate	= nft_objref_activate,
+	.deactivate	= nft_objref_deactivate,
 	.dump		= nft_objref_dump,
 };
 
@@ -155,12 +166,29 @@
 	return -1;
 }
 
+static void nft_objref_map_deactivate(const struct nft_ctx *ctx,
+				      const struct nft_expr *expr,
+				      enum nft_trans_phase phase)
+{
+	struct nft_objref_map *priv = nft_expr_priv(expr);
+
+	nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase);
+}
+
+static void nft_objref_map_activate(const struct nft_ctx *ctx,
+				    const struct nft_expr *expr)
+{
+	struct nft_objref_map *priv = nft_expr_priv(expr);
+
+	priv->set->use++;
+}
+
 static void nft_objref_map_destroy(const struct nft_ctx *ctx,
 				   const struct nft_expr *expr)
 {
 	struct nft_objref_map *priv = nft_expr_priv(expr);
 
-	nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+	nf_tables_destroy_set(ctx, priv->set);
 }
 
 static struct nft_expr_type nft_objref_type;
@@ -169,6 +197,8 @@
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_objref_map)),
 	.eval		= nft_objref_map_eval,
 	.init		= nft_objref_map_init,
+	.activate	= nft_objref_map_activate,
+	.deactivate	= nft_objref_map_deactivate,
 	.destroy	= nft_objref_map_destroy,
 	.dump		= nft_objref_map_dump,
 };

diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index a35fb59..f54d6ae 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <net/ip.h>
 #include <net/tcp.h>
 
@@ -6,10 +7,14 @@
 
 struct nft_osf {
 	enum nft_registers	dreg:8;
+	u8			ttl;
+	u32			flags;
 };
 
 static const struct nla_policy nft_osf_policy[NFTA_OSF_MAX + 1] = {
 	[NFTA_OSF_DREG]		= { .type = NLA_U32 },
+	[NFTA_OSF_TTL]		= { .type = NLA_U8 },
+	[NFTA_OSF_FLAGS]	= { .type = NLA_U32 },
 };
 
 static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
@@ -18,9 +23,10 @@
 	struct nft_osf *priv = nft_expr_priv(expr);
 	u32 *dest = &regs->data[priv->dreg];
 	struct sk_buff *skb = pkt->skb;
+	char os_match[NFT_OSF_MAXGENRELEN + 1];
 	const struct tcphdr *tcp;
+	struct nf_osf_data data;
 	struct tcphdr _tcph;
-	const char *os_name;
 
 	tcp = skb_header_pointer(skb, ip_hdrlen(skb),
 				 sizeof(struct tcphdr), &_tcph);
@@ -33,11 +39,17 @@
 		return;
 	}
 
-	os_name = nf_osf_find(skb, nf_osf_fingers);
-	if (!os_name)
+	if (!nf_osf_find(skb, nf_osf_fingers, priv->ttl, &data)) {
 		strncpy((char *)dest, "unknown", NFT_OSF_MAXGENRELEN);
-	else
-		strncpy((char *)dest, os_name, NFT_OSF_MAXGENRELEN);
+	} else {
+		if (priv->flags & NFT_OSF_F_VERSION)
+			snprintf(os_match, NFT_OSF_MAXGENRELEN, "%s:%s",
+				 data.genre, data.version);
+		else
+			strlcpy(os_match, data.genre, NFT_OSF_MAXGENRELEN);
+
+		strncpy((char *)dest, os_match, NFT_OSF_MAXGENRELEN);
+	}
 }
 
 static int nft_osf_init(const struct nft_ctx *ctx,
@@ -45,7 +57,23 @@
 			const struct nlattr * const tb[])
 {
 	struct nft_osf *priv = nft_expr_priv(expr);
+	u32 flags;
 	int err;
+	u8 ttl;
+
+	if (tb[NFTA_OSF_TTL]) {
+		ttl = nla_get_u8(tb[NFTA_OSF_TTL]);
+		if (ttl > 2)
+			return -EINVAL;
+		priv->ttl = ttl;
+	}
+
+	if (tb[NFTA_OSF_FLAGS]) {
+		flags = ntohl(nla_get_be32(tb[NFTA_OSF_FLAGS]));
+		if (flags != NFT_OSF_F_VERSION)
+			return -EINVAL;
+		priv->flags = flags;
+	}
 
 	priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]);
 	err = nft_validate_register_store(ctx, priv->dreg, NULL,
@@ -60,6 +88,12 @@
 {
 	const struct nft_osf *priv = nft_expr_priv(expr);
 
+	if (nla_put_u8(skb, NFTA_OSF_TTL, priv->ttl))
+		goto nla_put_failure;
+
+	if (nla_put_be32(skb, NFTA_OSF_FLAGS, ntohl(priv->flags)))
+		goto nla_put_failure;
+
 	if (nft_dump_register(skb, NFTA_OSF_DREG, priv->dreg))
 		goto nla_put_failure;
 
@@ -69,6 +103,15 @@
 	return -1;
 }
 
+static int nft_osf_validate(const struct nft_ctx *ctx,
+			    const struct nft_expr *expr,
+			    const struct nft_data **data)
+{
+	return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) |
+						    (1 << NF_INET_PRE_ROUTING) |
+						    (1 << NF_INET_FORWARD));
+}
+
 static struct nft_expr_type nft_osf_type;
 static const struct nft_expr_ops nft_osf_op = {
 	.eval		= nft_osf_eval,
@@ -76,6 +119,7 @@
 	.init		= nft_osf_init,
 	.dump		= nft_osf_dump,
 	.type		= &nft_osf_type,
+	.validate	= nft_osf_validate,
 };
 
 static struct nft_expr_type nft_osf_type __read_mostly = {

diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index e110b0e..5cb2d89 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
  * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 
@@ -18,10 +15,13 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables_core.h>
 #include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
 /* For layer 4 checksum field offset. */
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/icmpv6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
 
 /* add vlan header into the user buffer for if tag was removed by offloads */
 static bool
@@ -70,9 +70,9 @@
 	return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0;
 }
 
-static void nft_payload_eval(const struct nft_expr *expr,
-			     struct nft_regs *regs,
-			     const struct nft_pktinfo *pkt)
+void nft_payload_eval(const struct nft_expr *expr,
+		      struct nft_regs *regs,
+		      const struct nft_pktinfo *pkt)
 {
 	const struct nft_payload *priv = nft_expr_priv(expr);
 	const struct sk_buff *skb = pkt->skb;
@@ -153,12 +153,233 @@
 	return -1;
 }
 
+static int nft_payload_offload_ll(struct nft_offload_ctx *ctx,
+				  struct nft_flow_rule *flow,
+				  const struct nft_payload *priv)
+{
+	struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
+
+	switch (priv->offset) {
+	case offsetof(struct ethhdr, h_source):
+		if (priv->len != ETH_ALEN)
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs,
+				  src, ETH_ALEN, reg);
+		break;
+	case offsetof(struct ethhdr, h_dest):
+		if (priv->len != ETH_ALEN)
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs,
+				  dst, ETH_ALEN, reg);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int nft_payload_offload_ip(struct nft_offload_ctx *ctx,
+				  struct nft_flow_rule *flow,
+				  const struct nft_payload *priv)
+{
+	struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
+
+	switch (priv->offset) {
+	case offsetof(struct iphdr, saddr):
+		if (priv->len != sizeof(struct in_addr))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, src,
+				  sizeof(struct in_addr), reg);
+		break;
+	case offsetof(struct iphdr, daddr):
+		if (priv->len != sizeof(struct in_addr))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, dst,
+				  sizeof(struct in_addr), reg);
+		break;
+	case offsetof(struct iphdr, protocol):
+		if (priv->len != sizeof(__u8))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto,
+				  sizeof(__u8), reg);
+		nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int nft_payload_offload_ip6(struct nft_offload_ctx *ctx,
+				  struct nft_flow_rule *flow,
+				  const struct nft_payload *priv)
+{
+	struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
+
+	switch (priv->offset) {
+	case offsetof(struct ipv6hdr, saddr):
+		if (priv->len != sizeof(struct in6_addr))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, src,
+				  sizeof(struct in6_addr), reg);
+		break;
+	case offsetof(struct ipv6hdr, daddr):
+		if (priv->len != sizeof(struct in6_addr))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, dst,
+				  sizeof(struct in6_addr), reg);
+		break;
+	case offsetof(struct ipv6hdr, nexthdr):
+		if (priv->len != sizeof(__u8))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto,
+				  sizeof(__u8), reg);
+		nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int nft_payload_offload_nh(struct nft_offload_ctx *ctx,
+				  struct nft_flow_rule *flow,
+				  const struct nft_payload *priv)
+{
+	int err;
+
+	switch (ctx->dep.l3num) {
+	case htons(ETH_P_IP):
+		err = nft_payload_offload_ip(ctx, flow, priv);
+		break;
+	case htons(ETH_P_IPV6):
+		err = nft_payload_offload_ip6(ctx, flow, priv);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
+static int nft_payload_offload_tcp(struct nft_offload_ctx *ctx,
+				   struct nft_flow_rule *flow,
+				   const struct nft_payload *priv)
+{
+	struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
+
+	switch (priv->offset) {
+	case offsetof(struct tcphdr, source):
+		if (priv->len != sizeof(__be16))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src,
+				  sizeof(__be16), reg);
+		break;
+	case offsetof(struct tcphdr, dest):
+		if (priv->len != sizeof(__be16))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst,
+				  sizeof(__be16), reg);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int nft_payload_offload_udp(struct nft_offload_ctx *ctx,
+				   struct nft_flow_rule *flow,
+				   const struct nft_payload *priv)
+{
+	struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
+
+	switch (priv->offset) {
+	case offsetof(struct udphdr, source):
+		if (priv->len != sizeof(__be16))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src,
+				  sizeof(__be16), reg);
+		break;
+	case offsetof(struct udphdr, dest):
+		if (priv->len != sizeof(__be16))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst,
+				  sizeof(__be16), reg);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int nft_payload_offload_th(struct nft_offload_ctx *ctx,
+				  struct nft_flow_rule *flow,
+				  const struct nft_payload *priv)
+{
+	int err;
+
+	switch (ctx->dep.protonum) {
+	case IPPROTO_TCP:
+		err = nft_payload_offload_tcp(ctx, flow, priv);
+		break;
+	case IPPROTO_UDP:
+		err = nft_payload_offload_udp(ctx, flow, priv);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
+static int nft_payload_offload(struct nft_offload_ctx *ctx,
+			       struct nft_flow_rule *flow,
+			       const struct nft_expr *expr)
+{
+	const struct nft_payload *priv = nft_expr_priv(expr);
+	int err;
+
+	switch (priv->base) {
+	case NFT_PAYLOAD_LL_HEADER:
+		err = nft_payload_offload_ll(ctx, flow, priv);
+		break;
+	case NFT_PAYLOAD_NETWORK_HEADER:
+		err = nft_payload_offload_nh(ctx, flow, priv);
+		break;
+	case NFT_PAYLOAD_TRANSPORT_HEADER:
+		err = nft_payload_offload_th(ctx, flow, priv);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+	return err;
+}
+
 static const struct nft_expr_ops nft_payload_ops = {
 	.type		= &nft_payload_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_payload)),
 	.eval		= nft_payload_eval,
 	.init		= nft_payload_init,
 	.dump		= nft_payload_dump,
+	.offload	= nft_payload_offload,
 };
 
 const struct nft_expr_ops nft_payload_fast_ops = {
@@ -167,6 +388,7 @@
 	.eval		= nft_payload_eval,
 	.init		= nft_payload_init,
 	.dump		= nft_payload_dump,
+	.offload	= nft_payload_offload,
 };
 
 static inline void nft_csum_replace(__sum16 *sum, __wsum fsum, __wsum tsum)
@@ -243,7 +465,7 @@
 					  tsum));
 	}
 
-	if (!skb_make_writable(skb, l4csum_offset + sizeof(sum)) ||
+	if (skb_ensure_writable(skb, l4csum_offset + sizeof(sum)) ||
 	    skb_store_bits(skb, l4csum_offset, &sum, sizeof(sum)) < 0)
 		return -1;
 
@@ -259,7 +481,7 @@
 		return -1;
 
 	nft_csum_replace(&sum, fsum, tsum);
-	if (!skb_make_writable(skb, csum_offset + sizeof(sum)) ||
+	if (skb_ensure_writable(skb, csum_offset + sizeof(sum)) ||
 	    skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
 		return -1;
 
@@ -312,7 +534,7 @@
 			goto err;
 	}
 
-	if (!skb_make_writable(skb, max(offset + priv->len, 0)) ||
+	if (skb_ensure_writable(skb, max(offset + priv->len, 0)) ||
 	    skb_store_bits(skb, offset, src, priv->len) < 0)
 		goto err;
 

diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index 9861365..5ece0a6 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2013 Eric Leblond <eric@regit.org>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Development of this code partly funded by OISF
  * (http://www.openinfosecfoundation.org/)
  */

diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 0ed124a..4413690 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/kernel.h>
@@ -16,7 +13,7 @@
 #include <net/netfilter/nf_tables.h>
 
 struct nft_quota {
-	u64		quota;
+	atomic64_t	quota;
 	unsigned long	flags;
 	atomic64_t	consumed;
 };
@@ -24,7 +21,8 @@
 static inline bool nft_overquota(struct nft_quota *priv,
 				 const struct sk_buff *skb)
 {
-	return atomic64_add_return(skb->len, &priv->consumed) >= priv->quota;
+	return atomic64_add_return(skb->len, &priv->consumed) >=
+	       atomic64_read(&priv->quota);
 }
 
 static inline bool nft_quota_invert(struct nft_quota *priv)
@@ -61,7 +59,7 @@
 
 	if (overquota &&
 	    !test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags))
-		nft_obj_notify(nft_net(pkt), obj->table, obj, 0, 0,
+		nft_obj_notify(nft_net(pkt), obj->key.table, obj, 0, 0,
 			       NFT_MSG_NEWOBJ, nft_pf(pkt), 0, GFP_ATOMIC);
 }
 
@@ -92,7 +90,7 @@
 			return -EOPNOTSUPP;
 	}
 
-	priv->quota = quota;
+	atomic64_set(&priv->quota, quota);
 	priv->flags = flags;
 	atomic64_set(&priv->consumed, consumed);
 
@@ -108,10 +106,22 @@
 	return nft_quota_do_init(tb, priv);
 }
 
+static void nft_quota_obj_update(struct nft_object *obj,
+				 struct nft_object *newobj)
+{
+	struct nft_quota *newpriv = nft_obj_data(newobj);
+	struct nft_quota *priv = nft_obj_data(obj);
+	u64 newquota;
+
+	newquota = atomic64_read(&newpriv->quota);
+	atomic64_set(&priv->quota, newquota);
+	priv->flags = newpriv->flags;
+}
+
 static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv,
 			     bool reset)
 {
-	u64 consumed, consumed_cap;
+	u64 consumed, consumed_cap, quota;
 	u32 flags = priv->flags;
 
 	/* Since we inconditionally increment consumed quota for each packet
@@ -119,14 +129,15 @@
 	 * userspace.
 	 */
 	consumed = atomic64_read(&priv->consumed);
-	if (consumed >= priv->quota) {
-		consumed_cap = priv->quota;
+	quota = atomic64_read(&priv->quota);
+	if (consumed >= quota) {
+		consumed_cap = quota;
 		flags |= NFT_QUOTA_F_DEPLETED;
 	} else {
 		consumed_cap = consumed;
 	}
 
-	if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(priv->quota),
+	if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(quota),
 			 NFTA_QUOTA_PAD) ||
 	    nla_put_be64(skb, NFTA_QUOTA_CONSUMED, cpu_to_be64(consumed_cap),
 			 NFTA_QUOTA_PAD) ||
@@ -158,6 +169,7 @@
 	.init		= nft_quota_obj_init,
 	.eval		= nft_quota_obj_eval,
 	.dump		= nft_quota_obj_dump,
+	.update		= nft_quota_obj_update,
 };
 
 static struct nft_object_type nft_quota_obj_type __read_mostly = {

diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
index cedb96c..4701fa8 100644
--- a/net/netfilter/nft_range.c
+++ b/net/netfilter/nft_range.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/kernel.h>
@@ -23,9 +20,8 @@
 	enum nft_range_ops	op:8;
 };
 
-static void nft_range_eval(const struct nft_expr *expr,
-			 struct nft_regs *regs,
-			 const struct nft_pktinfo *pkt)
+void nft_range_eval(const struct nft_expr *expr,
+		    struct nft_regs *regs, const struct nft_pktinfo *pkt)
 {
 	const struct nft_range_expr *priv = nft_expr_priv(expr);
 	int d1, d2;

diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index c64cbe7..43eeb1f 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/kernel.h>
@@ -13,19 +10,24 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_redirect.h>
 #include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nft_redir.h>
 
-const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = {
+struct nft_redir {
+	enum nft_registers	sreg_proto_min:8;
+	enum nft_registers	sreg_proto_max:8;
+	u16			flags;
+};
+
+static const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = {
 	[NFTA_REDIR_REG_PROTO_MIN]	= { .type = NLA_U32 },
 	[NFTA_REDIR_REG_PROTO_MAX]	= { .type = NLA_U32 },
 	[NFTA_REDIR_FLAGS]		= { .type = NLA_U32 },
 };
-EXPORT_SYMBOL_GPL(nft_redir_policy);
 
-int nft_redir_validate(const struct nft_ctx *ctx,
-		       const struct nft_expr *expr,
-		       const struct nft_data **data)
+static int nft_redir_validate(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr,
+			      const struct nft_data **data)
 {
 	int err;
 
@@ -37,11 +39,10 @@
 					(1 << NF_INET_PRE_ROUTING) |
 					(1 << NF_INET_LOCAL_OUT));
 }
-EXPORT_SYMBOL_GPL(nft_redir_validate);
 
-int nft_redir_init(const struct nft_ctx *ctx,
-		   const struct nft_expr *expr,
-		   const struct nlattr * const tb[])
+static int nft_redir_init(const struct nft_ctx *ctx,
+			  const struct nft_expr *expr,
+			  const struct nlattr * const tb[])
 {
 	struct nft_redir *priv = nft_expr_priv(expr);
 	unsigned int plen;
@@ -77,9 +78,8 @@
 
 	return nf_ct_netns_get(ctx->net, ctx->family);
 }
-EXPORT_SYMBOL_GPL(nft_redir_init);
 
-int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	const struct nft_redir *priv = nft_expr_priv(expr);
 
@@ -101,7 +101,194 @@
 nla_put_failure:
 	return -1;
 }
-EXPORT_SYMBOL_GPL(nft_redir_dump);
+
+static void nft_redir_ipv4_eval(const struct nft_expr *expr,
+				struct nft_regs *regs,
+				const struct nft_pktinfo *pkt)
+{
+	struct nft_redir *priv = nft_expr_priv(expr);
+	struct nf_nat_ipv4_multi_range_compat mr;
+
+	memset(&mr, 0, sizeof(mr));
+	if (priv->sreg_proto_min) {
+		mr.range[0].min.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_min]);
+		mr.range[0].max.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_max]);
+		mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+	}
+
+	mr.range[0].flags |= priv->flags;
+
+	regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, nft_hook(pkt));
+}
+
+static void
+nft_redir_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+	nf_ct_netns_put(ctx->net, NFPROTO_IPV4);
+}
+
+static struct nft_expr_type nft_redir_ipv4_type;
+static const struct nft_expr_ops nft_redir_ipv4_ops = {
+	.type		= &nft_redir_ipv4_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_redir)),
+	.eval		= nft_redir_ipv4_eval,
+	.init		= nft_redir_init,
+	.destroy	= nft_redir_ipv4_destroy,
+	.dump		= nft_redir_dump,
+	.validate	= nft_redir_validate,
+};
+
+static struct nft_expr_type nft_redir_ipv4_type __read_mostly = {
+	.family		= NFPROTO_IPV4,
+	.name		= "redir",
+	.ops		= &nft_redir_ipv4_ops,
+	.policy		= nft_redir_policy,
+	.maxattr	= NFTA_REDIR_MAX,
+	.owner		= THIS_MODULE,
+};
+
+#ifdef CONFIG_NF_TABLES_IPV6
+static void nft_redir_ipv6_eval(const struct nft_expr *expr,
+				struct nft_regs *regs,
+				const struct nft_pktinfo *pkt)
+{
+	struct nft_redir *priv = nft_expr_priv(expr);
+	struct nf_nat_range2 range;
+
+	memset(&range, 0, sizeof(range));
+	if (priv->sreg_proto_min) {
+		range.min_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_min]);
+		range.max_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_max]);
+		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+	}
+
+	range.flags |= priv->flags;
+
+	regs->verdict.code =
+		nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt));
+}
+
+static void
+nft_redir_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+	nf_ct_netns_put(ctx->net, NFPROTO_IPV6);
+}
+
+static struct nft_expr_type nft_redir_ipv6_type;
+static const struct nft_expr_ops nft_redir_ipv6_ops = {
+	.type		= &nft_redir_ipv6_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_redir)),
+	.eval		= nft_redir_ipv6_eval,
+	.init		= nft_redir_init,
+	.destroy	= nft_redir_ipv6_destroy,
+	.dump		= nft_redir_dump,
+	.validate	= nft_redir_validate,
+};
+
+static struct nft_expr_type nft_redir_ipv6_type __read_mostly = {
+	.family		= NFPROTO_IPV6,
+	.name		= "redir",
+	.ops		= &nft_redir_ipv6_ops,
+	.policy		= nft_redir_policy,
+	.maxattr	= NFTA_REDIR_MAX,
+	.owner		= THIS_MODULE,
+};
+#endif
+
+#ifdef CONFIG_NF_TABLES_INET
+static void nft_redir_inet_eval(const struct nft_expr *expr,
+				struct nft_regs *regs,
+				const struct nft_pktinfo *pkt)
+{
+	switch (nft_pf(pkt)) {
+	case NFPROTO_IPV4:
+		return nft_redir_ipv4_eval(expr, regs, pkt);
+	case NFPROTO_IPV6:
+		return nft_redir_ipv6_eval(expr, regs, pkt);
+	}
+
+	WARN_ON_ONCE(1);
+}
+
+static void
+nft_redir_inet_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+	nf_ct_netns_put(ctx->net, NFPROTO_INET);
+}
+
+static struct nft_expr_type nft_redir_inet_type;
+static const struct nft_expr_ops nft_redir_inet_ops = {
+	.type		= &nft_redir_inet_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_redir)),
+	.eval		= nft_redir_inet_eval,
+	.init		= nft_redir_init,
+	.destroy	= nft_redir_inet_destroy,
+	.dump		= nft_redir_dump,
+	.validate	= nft_redir_validate,
+};
+
+static struct nft_expr_type nft_redir_inet_type __read_mostly = {
+	.family		= NFPROTO_INET,
+	.name		= "redir",
+	.ops		= &nft_redir_inet_ops,
+	.policy		= nft_redir_policy,
+	.maxattr	= NFTA_MASQ_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_redir_module_init_inet(void)
+{
+	return nft_register_expr(&nft_redir_inet_type);
+}
+#else
+static inline int nft_redir_module_init_inet(void) { return 0; }
+#endif
+
+static int __init nft_redir_module_init(void)
+{
+	int ret = nft_register_expr(&nft_redir_ipv4_type);
+
+	if (ret)
+		return ret;
+
+#ifdef CONFIG_NF_TABLES_IPV6
+	ret = nft_register_expr(&nft_redir_ipv6_type);
+	if (ret) {
+		nft_unregister_expr(&nft_redir_ipv4_type);
+		return ret;
+	}
+#endif
+
+	ret = nft_redir_module_init_inet();
+	if (ret < 0) {
+		nft_unregister_expr(&nft_redir_ipv4_type);
+#ifdef CONFIG_NF_TABLES_IPV6
+		nft_unregister_expr(&nft_redir_ipv6_type);
+#endif
+		return ret;
+	}
+
+	return ret;
+}
+
+static void __exit nft_redir_module_exit(void)
+{
+	nft_unregister_expr(&nft_redir_ipv4_type);
+#ifdef CONFIG_NF_TABLES_IPV6
+	nft_unregister_expr(&nft_redir_ipv6_type);
+#endif
+#ifdef CONFIG_NF_TABLES_INET
+	nft_unregister_expr(&nft_redir_inet_type);
+#endif
+}
+
+module_init(nft_redir_module_init);
+module_exit(nft_redir_module_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
+MODULE_ALIAS_NFT_EXPR("redir");

diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index 29f5bd2..00f865f 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
  * Copyright (c) 2013 Eric Leblond <eric@regit.org>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 
@@ -94,7 +91,8 @@
 
 int nft_reject_icmp_code(u8 code)
 {
-	BUG_ON(code > NFT_REJECT_ICMPX_MAX);
+	if (WARN_ON_ONCE(code > NFT_REJECT_ICMPX_MAX))
+		return ICMP_NET_UNREACH;
 
 	return icmp_code_v4[code];
 }
@@ -111,7 +109,8 @@
 
 int nft_reject_icmpv6_code(u8 code)
 {
-	BUG_ON(code > NFT_REJECT_ICMPX_MAX);
+	if (WARN_ON_ONCE(code > NFT_REJECT_ICMPX_MAX))
+		return ICMPV6_NOROUTE;
 
 	return icmp_code_v6[code];
 }

diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 5a7fb5f..f41f414 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2014 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/kernel.h>

diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index 76dba9f..7cfcb0e 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2016 Anders K. Pedersen <akp@cohaesio.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/kernel.h>
@@ -53,9 +50,9 @@
 	return mtu - minlen;
 }
 
-static void nft_rt_get_eval(const struct nft_expr *expr,
-			    struct nft_regs *regs,
-			    const struct nft_pktinfo *pkt)
+void nft_rt_get_eval(const struct nft_expr *expr,
+		     struct nft_regs *regs,
+		     const struct nft_pktinfo *pkt)
 {
 	const struct nft_rt *priv = nft_expr_priv(expr);
 	const struct sk_buff *skb = pkt->skb;
@@ -90,6 +87,11 @@
 	case NFT_RT_TCPMSS:
 		nft_reg_store16(dest, get_tcpmss(pkt, dst));
 		break;
+#ifdef CONFIG_XFRM
+	case NFT_RT_XFRM:
+		nft_reg_store8(dest, !!dst->xfrm);
+		break;
+#endif
 	default:
 		WARN_ON(1);
 		goto err;
@@ -130,6 +132,11 @@
 	case NFT_RT_TCPMSS:
 		len = sizeof(u16);
 		break;
+#ifdef CONFIG_XFRM
+	case NFT_RT_XFRM:
+		len = sizeof(u8);
+		break;
+#endif
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -164,6 +171,7 @@
 	case NFT_RT_NEXTHOP4:
 	case NFT_RT_NEXTHOP6:
 	case NFT_RT_CLASSID:
+	case NFT_RT_XFRM:
 		return 0;
 	case NFT_RT_TCPMSS:
 		hooks = (1 << NF_INET_FORWARD) |

diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index f866bd4..087a056 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2017 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/kernel.h>
@@ -13,7 +10,7 @@
 #include <linux/netlink.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
 
 struct nft_bitmap_elem {
 	struct list_head	head;

diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 015124e..b331a3c 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 
@@ -19,7 +16,7 @@
 #include <linux/rhashtable.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
 
 /* We target a hash table size of 4, element hint is 75% of final size */
 #define NFT_RHASH_ELEMENT_HINT 3
@@ -88,7 +85,7 @@
 		.key	 = key,
 	};
 
-	he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
+	he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
 	if (he != NULL)
 		*ext = &he->ext;
 
@@ -106,7 +103,7 @@
 		.key	 = elem->key.val.data,
 	};
 
-	he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
+	he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
 	if (he != NULL)
 		return he;
 
@@ -129,7 +126,7 @@
 		.key	 = key,
 	};
 
-	he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
+	he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
 	if (he != NULL)
 		goto out;
 
@@ -217,7 +214,7 @@
 	};
 
 	rcu_read_lock();
-	he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
+	he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
 	if (he != NULL &&
 	    !nft_rhash_flush(net, set, he))
 		he = NULL;
@@ -237,6 +234,24 @@
 	rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
 }
 
+static bool nft_rhash_delete(const struct nft_set *set,
+			     const u32 *key)
+{
+	struct nft_rhash *priv = nft_set_priv(set);
+	struct nft_rhash_cmp_arg arg = {
+		.genmask = NFT_GENMASK_ANY,
+		.set = set,
+		.key = key,
+	};
+	struct nft_rhash_elem *he;
+
+	he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
+	if (he == NULL)
+		return false;
+
+	return rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params) == 0;
+}
+
 static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
 			   struct nft_set_iter *iter)
 {
@@ -244,21 +259,15 @@
 	struct nft_rhash_elem *he;
 	struct rhashtable_iter hti;
 	struct nft_set_elem elem;
-	int err;
 
-	err = rhashtable_walk_init(&priv->ht, &hti, GFP_ATOMIC);
-	iter->err = err;
-	if (err)
-		return;
-
+	rhashtable_walk_enter(&priv->ht, &hti);
 	rhashtable_walk_start(&hti);
 
 	while ((he = rhashtable_walk_next(&hti))) {
 		if (IS_ERR(he)) {
-			err = PTR_ERR(he);
-			if (err != -EAGAIN) {
-				iter->err = err;
-				goto out;
+			if (PTR_ERR(he) != -EAGAIN) {
+				iter->err = PTR_ERR(he);
+				break;
 			}
 
 			continue;
@@ -275,13 +284,11 @@
 
 		iter->err = iter->fn(ctx, set, iter, &elem);
 		if (iter->err < 0)
-			goto out;
+			break;
 
 cont:
 		iter->count++;
 	}
-
-out:
 	rhashtable_walk_stop(&hti);
 	rhashtable_walk_exit(&hti);
 }
@@ -293,21 +300,17 @@
 	struct nft_rhash *priv;
 	struct nft_set_gc_batch *gcb = NULL;
 	struct rhashtable_iter hti;
-	int err;
 
 	priv = container_of(work, struct nft_rhash, gc_work.work);
 	set  = nft_set_container_of(priv);
 
-	err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
-	if (err)
-		goto schedule;
-
+	rhashtable_walk_enter(&priv->ht, &hti);
 	rhashtable_walk_start(&hti);
 
 	while ((he = rhashtable_walk_next(&hti))) {
 		if (IS_ERR(he)) {
 			if (PTR_ERR(he) != -EAGAIN)
-				goto out;
+				break;
 			continue;
 		}
 
@@ -326,17 +329,15 @@
 
 		gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
 		if (gcb == NULL)
-			goto out;
+			break;
 		rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
 		atomic_dec(&set->nelems);
 		nft_set_gc_batch_add(gcb, he);
 	}
-out:
 	rhashtable_walk_stop(&hti);
 	rhashtable_walk_exit(&hti);
 
 	nft_set_gc_batch_complete(gcb);
-schedule:
 	queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
 			   nft_set_gc_interval(set));
 }
@@ -456,15 +457,6 @@
 	return ERR_PTR(-ENOENT);
 }
 
-/* nft_hash_select_ops() makes sure key size can be either 2 or 4 bytes . */
-static inline u32 nft_hash_key(const u32 *key, u32 klen)
-{
-	if (klen == 4)
-		return *key;
-
-	return *(u16 *)key;
-}
-
 static bool nft_hash_lookup_fast(const struct net *net,
 				 const struct nft_set *set,
 				 const u32 *key, const struct nft_set_ext **ext)
@@ -474,11 +466,11 @@
 	const struct nft_hash_elem *he;
 	u32 hash, k1, k2;
 
-	k1 = nft_hash_key(key, set->klen);
+	k1 = *key;
 	hash = jhash_1word(k1, priv->seed);
 	hash = reciprocal_scale(hash, priv->buckets);
 	hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
-		k2 = nft_hash_key(nft_set_ext_key(&he->ext)->data, set->klen);
+		k2 = *(u32 *)nft_set_ext_key(&he->ext)->data;
 		if (k1 == k2 &&
 		    nft_set_elem_active(&he->ext, genmask)) {
 			*ext = &he->ext;
@@ -488,6 +480,23 @@
 	return false;
 }
 
+static u32 nft_jhash(const struct nft_set *set, const struct nft_hash *priv,
+		     const struct nft_set_ext *ext)
+{
+	const struct nft_data *key = nft_set_ext_key(ext);
+	u32 hash, k1;
+
+	if (set->klen == 4) {
+		k1 = *(u32 *)key;
+		hash = jhash_1word(k1, priv->seed);
+	} else {
+		hash = jhash(key, set->klen, priv->seed);
+	}
+	hash = reciprocal_scale(hash, priv->buckets);
+
+	return hash;
+}
+
 static int nft_hash_insert(const struct net *net, const struct nft_set *set,
 			   const struct nft_set_elem *elem,
 			   struct nft_set_ext **ext)
@@ -497,8 +506,7 @@
 	u8 genmask = nft_genmask_next(net);
 	u32 hash;
 
-	hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed);
-	hash = reciprocal_scale(hash, priv->buckets);
+	hash = nft_jhash(set, priv, &this->ext);
 	hlist_for_each_entry(he, &priv->table[hash], node) {
 		if (!memcmp(nft_set_ext_key(&this->ext),
 			    nft_set_ext_key(&he->ext), set->klen) &&
@@ -537,10 +545,9 @@
 	u8 genmask = nft_genmask_next(net);
 	u32 hash;
 
-	hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed);
-	hash = reciprocal_scale(hash, priv->buckets);
+	hash = nft_jhash(set, priv, &this->ext);
 	hlist_for_each_entry(he, &priv->table[hash], node) {
-		if (!memcmp(nft_set_ext_key(&this->ext), &elem->key.val,
+		if (!memcmp(nft_set_ext_key(&he->ext), &elem->key.val,
 			    set->klen) &&
 		    nft_set_elem_active(&he->ext, genmask)) {
 			nft_set_elem_change_active(net, set, &he->ext);
@@ -673,6 +680,7 @@
 		.remove		= nft_rhash_remove,
 		.lookup		= nft_rhash_lookup,
 		.update		= nft_rhash_update,
+		.delete		= nft_rhash_delete,
 		.walk		= nft_rhash_walk,
 		.get		= nft_rhash_get,
 	},

diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 0e5ec12..5712325 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 
@@ -16,7 +13,7 @@
 #include <linux/netlink.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
 
 struct nft_rbtree {
 	struct rb_root		root;
@@ -135,9 +132,12 @@
 		d = memcmp(this, key, set->klen);
 		if (d < 0) {
 			parent = rcu_dereference_raw(parent->rb_left);
-			interval = rbe;
+			if (!(flags & NFT_SET_ELEM_INTERVAL_END))
+				interval = rbe;
 		} else if (d > 0) {
 			parent = rcu_dereference_raw(parent->rb_right);
+			if (flags & NFT_SET_ELEM_INTERVAL_END)
+				interval = rbe;
 		} else {
 			if (!nft_set_elem_active(&rbe->ext, genmask))
 				parent = rcu_dereference_raw(parent->rb_left);
@@ -154,7 +154,10 @@
 
 	if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
 	    nft_set_elem_active(&interval->ext, genmask) &&
-	    !nft_rbtree_interval_end(interval)) {
+	    ((!nft_rbtree_interval_end(interval) &&
+	      !(flags & NFT_SET_ELEM_INTERVAL_END)) ||
+	     (nft_rbtree_interval_end(interval) &&
+	      (flags & NFT_SET_ELEM_INTERVAL_END)))) {
 		*elem = interval;
 		return true;
 	}
@@ -302,10 +305,6 @@
 		else if (d > 0)
 			parent = parent->rb_right;
 		else {
-			if (!nft_set_elem_active(&rbe->ext, genmask)) {
-				parent = parent->rb_left;
-				continue;
-			}
 			if (nft_rbtree_interval_end(rbe) &&
 			    !nft_rbtree_interval_end(this)) {
 				parent = parent->rb_left;
@@ -314,6 +313,9 @@
 				   nft_rbtree_interval_end(this)) {
 				parent = parent->rb_right;
 				continue;
+			} else if (!nft_set_elem_active(&rbe->ext, genmask)) {
+				parent = parent->rb_left;
+				continue;
 			}
 			nft_rbtree_flush(net, set, rbe);
 			return rbe;

diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index d7f3776..637ce3e 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c

@@ -47,9 +47,6 @@
 		return;
 	}
 
-	/* So that subsequent socket matching not to require other lookups. */
-	skb->sk = sk;
-
 	switch(priv->key) {
 	case NFT_SOCKET_TRANSPARENT:
 		nft_reg_store8(dest, inet_sk_transparent(sk));
@@ -66,6 +63,9 @@
 		WARN_ON(1);
 		regs->verdict.code = NFT_BREAK;
 	}
+
+	if (sk != skb->sk)
+		sock_gen_put(sk);
 }
 
 static const struct nla_policy nft_socket_policy[NFTA_SOCKET_MAX + 1] = {

diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
new file mode 100644
index 0000000..e2c1fc6
--- /dev/null
+++ b/net/netfilter/nft_synproxy.c

@@ -0,0 +1,390 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/netlink.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_synproxy.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/netfilter/nf_synproxy.h>
+
+struct nft_synproxy {
+	struct nf_synproxy_info	info;
+};
+
+static const struct nla_policy nft_synproxy_policy[NFTA_SYNPROXY_MAX + 1] = {
+	[NFTA_SYNPROXY_MSS]		= { .type = NLA_U16 },
+	[NFTA_SYNPROXY_WSCALE]		= { .type = NLA_U8 },
+	[NFTA_SYNPROXY_FLAGS]		= { .type = NLA_U32 },
+};
+
+static void nft_synproxy_tcp_options(struct synproxy_options *opts,
+				     const struct tcphdr *tcp,
+				     struct synproxy_net *snet,
+				     struct nf_synproxy_info *info,
+				     const struct nft_synproxy *priv)
+{
+	this_cpu_inc(snet->stats->syn_received);
+	if (tcp->ece && tcp->cwr)
+		opts->options |= NF_SYNPROXY_OPT_ECN;
+
+	opts->options &= priv->info.options;
+	opts->mss_encode = opts->mss_option;
+	opts->mss_option = info->mss;
+	if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
+		synproxy_init_timestamp_cookie(info, opts);
+	else
+		opts->options &= ~(NF_SYNPROXY_OPT_WSCALE |
+				   NF_SYNPROXY_OPT_SACK_PERM |
+				   NF_SYNPROXY_OPT_ECN);
+}
+
+static void nft_synproxy_eval_v4(const struct nft_synproxy *priv,
+				 struct nft_regs *regs,
+				 const struct nft_pktinfo *pkt,
+				 const struct tcphdr *tcp,
+				 struct tcphdr *_tcph,
+				 struct synproxy_options *opts)
+{
+	struct nf_synproxy_info info = priv->info;
+	struct net *net = nft_net(pkt);
+	struct synproxy_net *snet = synproxy_pernet(net);
+	struct sk_buff *skb = pkt->skb;
+
+	if (tcp->syn) {
+		/* Initial SYN from client */
+		nft_synproxy_tcp_options(opts, tcp, snet, &info, priv);
+		synproxy_send_client_synack(net, skb, tcp, opts);
+		consume_skb(skb);
+		regs->verdict.code = NF_STOLEN;
+	} else if (tcp->ack) {
+		/* ACK from client */
+		if (synproxy_recv_client_ack(net, skb, tcp, opts,
+					     ntohl(tcp->seq))) {
+			consume_skb(skb);
+			regs->verdict.code = NF_STOLEN;
+		} else {
+			regs->verdict.code = NF_DROP;
+		}
+	}
+}
+
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+static void nft_synproxy_eval_v6(const struct nft_synproxy *priv,
+				 struct nft_regs *regs,
+				 const struct nft_pktinfo *pkt,
+				 const struct tcphdr *tcp,
+				 struct tcphdr *_tcph,
+				 struct synproxy_options *opts)
+{
+	struct nf_synproxy_info info = priv->info;
+	struct net *net = nft_net(pkt);
+	struct synproxy_net *snet = synproxy_pernet(net);
+	struct sk_buff *skb = pkt->skb;
+
+	if (tcp->syn) {
+		/* Initial SYN from client */
+		nft_synproxy_tcp_options(opts, tcp, snet, &info, priv);
+		synproxy_send_client_synack_ipv6(net, skb, tcp, opts);
+		consume_skb(skb);
+		regs->verdict.code = NF_STOLEN;
+	} else if (tcp->ack) {
+		/* ACK from client */
+		if (synproxy_recv_client_ack_ipv6(net, skb, tcp, opts,
+						  ntohl(tcp->seq))) {
+			consume_skb(skb);
+			regs->verdict.code = NF_STOLEN;
+		} else {
+			regs->verdict.code = NF_DROP;
+		}
+	}
+}
+#endif /* CONFIG_NF_TABLES_IPV6*/
+
+static void nft_synproxy_do_eval(const struct nft_synproxy *priv,
+				 struct nft_regs *regs,
+				 const struct nft_pktinfo *pkt)
+{
+	struct synproxy_options opts = {};
+	struct sk_buff *skb = pkt->skb;
+	int thoff = pkt->xt.thoff;
+	const struct tcphdr *tcp;
+	struct tcphdr _tcph;
+
+	if (pkt->tprot != IPPROTO_TCP) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
+	if (nf_ip_checksum(skb, nft_hook(pkt), thoff, IPPROTO_TCP)) {
+		regs->verdict.code = NF_DROP;
+		return;
+	}
+
+	tcp = skb_header_pointer(skb, pkt->xt.thoff,
+				 sizeof(struct tcphdr),
+				 &_tcph);
+	if (!tcp) {
+		regs->verdict.code = NF_DROP;
+		return;
+	}
+
+	if (!synproxy_parse_options(skb, thoff, tcp, &opts)) {
+		regs->verdict.code = NF_DROP;
+		return;
+	}
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		nft_synproxy_eval_v4(priv, regs, pkt, tcp, &_tcph, &opts);
+		return;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+	case htons(ETH_P_IPV6):
+		nft_synproxy_eval_v6(priv, regs, pkt, tcp, &_tcph, &opts);
+		return;
+#endif
+	}
+	regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_synproxy_do_init(const struct nft_ctx *ctx,
+				const struct nlattr * const tb[],
+				struct nft_synproxy *priv)
+{
+	struct synproxy_net *snet = synproxy_pernet(ctx->net);
+	u32 flags;
+	int err;
+
+	if (tb[NFTA_SYNPROXY_MSS])
+		priv->info.mss = ntohs(nla_get_be16(tb[NFTA_SYNPROXY_MSS]));
+	if (tb[NFTA_SYNPROXY_WSCALE])
+		priv->info.wscale = nla_get_u8(tb[NFTA_SYNPROXY_WSCALE]);
+	if (tb[NFTA_SYNPROXY_FLAGS]) {
+		flags = ntohl(nla_get_be32(tb[NFTA_SYNPROXY_FLAGS]));
+		if (flags & ~NF_SYNPROXY_OPT_MASK)
+			return -EOPNOTSUPP;
+		priv->info.options = flags;
+	}
+
+	err = nf_ct_netns_get(ctx->net, ctx->family);
+	if (err)
+		return err;
+
+	switch (ctx->family) {
+	case NFPROTO_IPV4:
+		err = nf_synproxy_ipv4_init(snet, ctx->net);
+		if (err)
+			goto nf_ct_failure;
+		break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+	case NFPROTO_IPV6:
+		err = nf_synproxy_ipv6_init(snet, ctx->net);
+		if (err)
+			goto nf_ct_failure;
+		break;
+#endif
+	case NFPROTO_INET:
+	case NFPROTO_BRIDGE:
+		err = nf_synproxy_ipv4_init(snet, ctx->net);
+		if (err)
+			goto nf_ct_failure;
+		err = nf_synproxy_ipv6_init(snet, ctx->net);
+		if (err)
+			goto nf_ct_failure;
+		break;
+	}
+
+	return 0;
+
+nf_ct_failure:
+	nf_ct_netns_put(ctx->net, ctx->family);
+	return err;
+}
+
+static void nft_synproxy_do_destroy(const struct nft_ctx *ctx)
+{
+	struct synproxy_net *snet = synproxy_pernet(ctx->net);
+
+	switch (ctx->family) {
+	case NFPROTO_IPV4:
+		nf_synproxy_ipv4_fini(snet, ctx->net);
+		break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+	case NFPROTO_IPV6:
+		nf_synproxy_ipv6_fini(snet, ctx->net);
+		break;
+#endif
+	case NFPROTO_INET:
+	case NFPROTO_BRIDGE:
+		nf_synproxy_ipv4_fini(snet, ctx->net);
+		nf_synproxy_ipv6_fini(snet, ctx->net);
+		break;
+	}
+	nf_ct_netns_put(ctx->net, ctx->family);
+}
+
+static int nft_synproxy_do_dump(struct sk_buff *skb, struct nft_synproxy *priv)
+{
+	if (nla_put_be16(skb, NFTA_SYNPROXY_MSS, htons(priv->info.mss)) ||
+	    nla_put_u8(skb, NFTA_SYNPROXY_WSCALE, priv->info.wscale) ||
+	    nla_put_be32(skb, NFTA_SYNPROXY_FLAGS, htonl(priv->info.options)))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static void nft_synproxy_eval(const struct nft_expr *expr,
+			      struct nft_regs *regs,
+			      const struct nft_pktinfo *pkt)
+{
+	const struct nft_synproxy *priv = nft_expr_priv(expr);
+
+	nft_synproxy_do_eval(priv, regs, pkt);
+}
+
+static int nft_synproxy_validate(const struct nft_ctx *ctx,
+				 const struct nft_expr *expr,
+				 const struct nft_data **data)
+{
+	return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) |
+						    (1 << NF_INET_FORWARD));
+}
+
+static int nft_synproxy_init(const struct nft_ctx *ctx,
+			     const struct nft_expr *expr,
+			     const struct nlattr * const tb[])
+{
+	struct nft_synproxy *priv = nft_expr_priv(expr);
+
+	return nft_synproxy_do_init(ctx, tb, priv);
+}
+
+static void nft_synproxy_destroy(const struct nft_ctx *ctx,
+				 const struct nft_expr *expr)
+{
+	nft_synproxy_do_destroy(ctx);
+}
+
+static int nft_synproxy_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	struct nft_synproxy *priv = nft_expr_priv(expr);
+
+	return nft_synproxy_do_dump(skb, priv);
+}
+
+static struct nft_expr_type nft_synproxy_type;
+static const struct nft_expr_ops nft_synproxy_ops = {
+	.eval		= nft_synproxy_eval,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_synproxy)),
+	.init		= nft_synproxy_init,
+	.destroy	= nft_synproxy_destroy,
+	.dump		= nft_synproxy_dump,
+	.type		= &nft_synproxy_type,
+	.validate	= nft_synproxy_validate,
+};
+
+static struct nft_expr_type nft_synproxy_type __read_mostly = {
+	.ops		= &nft_synproxy_ops,
+	.name		= "synproxy",
+	.owner		= THIS_MODULE,
+	.policy		= nft_synproxy_policy,
+	.maxattr	= NFTA_SYNPROXY_MAX,
+};
+
+static int nft_synproxy_obj_init(const struct nft_ctx *ctx,
+				 const struct nlattr * const tb[],
+				 struct nft_object *obj)
+{
+	struct nft_synproxy *priv = nft_obj_data(obj);
+
+	return nft_synproxy_do_init(ctx, tb, priv);
+}
+
+static void nft_synproxy_obj_destroy(const struct nft_ctx *ctx,
+				     struct nft_object *obj)
+{
+	nft_synproxy_do_destroy(ctx);
+}
+
+static int nft_synproxy_obj_dump(struct sk_buff *skb,
+				 struct nft_object *obj, bool reset)
+{
+	struct nft_synproxy *priv = nft_obj_data(obj);
+
+	return nft_synproxy_do_dump(skb, priv);
+}
+
+static void nft_synproxy_obj_eval(struct nft_object *obj,
+				  struct nft_regs *regs,
+				  const struct nft_pktinfo *pkt)
+{
+	const struct nft_synproxy *priv = nft_obj_data(obj);
+
+	nft_synproxy_do_eval(priv, regs, pkt);
+}
+
+static void nft_synproxy_obj_update(struct nft_object *obj,
+				    struct nft_object *newobj)
+{
+	struct nft_synproxy *newpriv = nft_obj_data(newobj);
+	struct nft_synproxy *priv = nft_obj_data(obj);
+
+	priv->info = newpriv->info;
+}
+
+static struct nft_object_type nft_synproxy_obj_type;
+static const struct nft_object_ops nft_synproxy_obj_ops = {
+	.type		= &nft_synproxy_obj_type,
+	.size		= sizeof(struct nft_synproxy),
+	.init		= nft_synproxy_obj_init,
+	.destroy	= nft_synproxy_obj_destroy,
+	.dump		= nft_synproxy_obj_dump,
+	.eval		= nft_synproxy_obj_eval,
+	.update		= nft_synproxy_obj_update,
+};
+
+static struct nft_object_type nft_synproxy_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_SYNPROXY,
+	.ops		= &nft_synproxy_obj_ops,
+	.maxattr	= NFTA_SYNPROXY_MAX,
+	.policy		= nft_synproxy_policy,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_synproxy_module_init(void)
+{
+	int err;
+
+	err = nft_register_obj(&nft_synproxy_obj_type);
+	if (err < 0)
+		return err;
+
+	err = nft_register_expr(&nft_synproxy_type);
+	if (err < 0)
+		goto err;
+
+	return 0;
+
+err:
+	nft_unregister_obj(&nft_synproxy_obj_type);
+	return err;
+}
+
+static void __exit nft_synproxy_module_exit(void)
+{
+	nft_unregister_expr(&nft_synproxy_type);
+	nft_unregister_obj(&nft_synproxy_obj_type);
+}
+
+module_init(nft_synproxy_module_init);
+module_exit(nft_synproxy_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Fernando Fernandez <ffmancera@riseup.net>");
+MODULE_ALIAS_NFT_EXPR("synproxy");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_SYNPROXY);

diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 3a15f21..3d4c2ae 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c

@@ -15,6 +15,7 @@
 struct nft_tunnel {
 	enum nft_tunnel_keys	key:8;
 	enum nft_registers	dreg:8;
+	enum nft_tunnel_mode	mode:8;
 };
 
 static void nft_tunnel_get_eval(const struct nft_expr *expr,
@@ -29,14 +30,32 @@
 
 	switch (priv->key) {
 	case NFT_TUNNEL_PATH:
-		nft_reg_store8(dest, !!tun_info);
+		if (!tun_info) {
+			nft_reg_store8(dest, false);
+			return;
+		}
+		if (priv->mode == NFT_TUNNEL_MODE_NONE ||
+		    (priv->mode == NFT_TUNNEL_MODE_RX &&
+		     !(tun_info->mode & IP_TUNNEL_INFO_TX)) ||
+		    (priv->mode == NFT_TUNNEL_MODE_TX &&
+		     (tun_info->mode & IP_TUNNEL_INFO_TX)))
+			nft_reg_store8(dest, true);
+		else
+			nft_reg_store8(dest, false);
 		break;
 	case NFT_TUNNEL_ID:
 		if (!tun_info) {
 			regs->verdict.code = NFT_BREAK;
 			return;
 		}
-		*dest = ntohl(tunnel_id_to_key32(tun_info->key.tun_id));
+		if (priv->mode == NFT_TUNNEL_MODE_NONE ||
+		    (priv->mode == NFT_TUNNEL_MODE_RX &&
+		     !(tun_info->mode & IP_TUNNEL_INFO_TX)) ||
+		    (priv->mode == NFT_TUNNEL_MODE_TX &&
+		     (tun_info->mode & IP_TUNNEL_INFO_TX)))
+			*dest = ntohl(tunnel_id_to_key32(tun_info->key.tun_id));
+		else
+			regs->verdict.code = NFT_BREAK;
 		break;
 	default:
 		WARN_ON(1);
@@ -47,6 +66,7 @@
 static const struct nla_policy nft_tunnel_policy[NFTA_TUNNEL_MAX + 1] = {
 	[NFTA_TUNNEL_KEY]	= { .type = NLA_U32 },
 	[NFTA_TUNNEL_DREG]	= { .type = NLA_U32 },
+	[NFTA_TUNNEL_MODE]	= { .type = NLA_U32 },
 };
 
 static int nft_tunnel_get_init(const struct nft_ctx *ctx,
@@ -74,6 +94,14 @@
 
 	priv->dreg = nft_parse_register(tb[NFTA_TUNNEL_DREG]);
 
+	if (tb[NFTA_TUNNEL_MODE]) {
+		priv->mode = ntohl(nla_get_be32(tb[NFTA_TUNNEL_MODE]));
+		if (priv->mode > NFT_TUNNEL_MODE_MAX)
+			return -EOPNOTSUPP;
+	} else {
+		priv->mode = NFT_TUNNEL_MODE_NONE;
+	}
+
 	return nft_validate_register_store(ctx, priv->dreg, NULL,
 					   NFT_DATA_VALUE, len);
 }
@@ -87,6 +115,8 @@
 		goto nla_put_failure;
 	if (nft_dump_register(skb, NFTA_TUNNEL_DREG, priv->dreg))
 		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_TUNNEL_MODE, htonl(priv->mode)))
+		goto nla_put_failure;
 	return 0;
 
 nla_put_failure:
@@ -136,8 +166,8 @@
 	struct nlattr *tb[NFTA_TUNNEL_KEY_IP_MAX + 1];
 	int err;
 
-	err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_IP_MAX, attr,
-			       nft_tunnel_ip_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_IP_MAX, attr,
+					  nft_tunnel_ip_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -165,8 +195,8 @@
 	struct nlattr *tb[NFTA_TUNNEL_KEY_IP6_MAX + 1];
 	int err;
 
-	err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_IP6_MAX, attr,
-			       nft_tunnel_ip6_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_IP6_MAX, attr,
+					  nft_tunnel_ip6_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -201,8 +231,8 @@
 	struct nlattr *tb[NFTA_TUNNEL_KEY_VXLAN_MAX + 1];
 	int err;
 
-	err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_VXLAN_MAX, attr,
-			       nft_tunnel_opts_vxlan_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_VXLAN_MAX, attr,
+					  nft_tunnel_opts_vxlan_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -230,8 +260,9 @@
 	uint8_t hwid, dir;
 	int err, version;
 
-	err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_ERSPAN_MAX, attr,
-			       nft_tunnel_opts_erspan_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_ERSPAN_MAX,
+					  attr, nft_tunnel_opts_erspan_policy,
+					  NULL);
 	if (err < 0)
 		return err;
 
@@ -279,8 +310,8 @@
 	struct nlattr *tb[NFTA_TUNNEL_KEY_OPTS_MAX + 1];
 	int err;
 
-	err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_OPTS_MAX, attr,
-			       nft_tunnel_opts_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_OPTS_MAX, attr,
+					  nft_tunnel_opts_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -376,6 +407,13 @@
 		return -ENOMEM;
 
 	memcpy(&md->u.tun_info, &info, sizeof(info));
+#ifdef CONFIG_DST_CACHE
+	err = dst_cache_init(&md->u.tun_info.dst_cache, GFP_KERNEL);
+	if (err < 0) {
+		metadata_dst_free(md);
+		return err;
+	}
+#endif
 	ip_tunnel_info_opts_set(&md->u.tun_info, &priv->opts.u, priv->opts.len,
 				priv->opts.flags);
 	priv->md = md;
@@ -400,7 +438,7 @@
 	struct nlattr *nest;
 
 	if (info->mode & IP_TUNNEL_INFO_IPV6) {
-		nest = nla_nest_start(skb, NFTA_TUNNEL_KEY_IP6);
+		nest = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_IP6);
 		if (!nest)
 			return -1;
 
@@ -411,7 +449,7 @@
 
 		nla_nest_end(skb, nest);
 	} else {
-		nest = nla_nest_start(skb, NFTA_TUNNEL_KEY_IP);
+		nest = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_IP);
 		if (!nest)
 			return -1;
 
@@ -431,7 +469,7 @@
 	struct nft_tunnel_opts *opts = &priv->opts;
 	struct nlattr *nest;
 
-	nest = nla_nest_start(skb, NFTA_TUNNEL_KEY_OPTS);
+	nest = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS);
 	if (!nest)
 		return -1;
 

diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c
new file mode 100644
index 0000000..06d5cab
--- /dev/null
+++ b/net/netfilter/nft_xfrm.c

@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Generic part shared by ipv4 and ipv6 backends.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <linux/in.h>
+#include <net/xfrm.h>
+
+static const struct nla_policy nft_xfrm_policy[NFTA_XFRM_MAX + 1] = {
+	[NFTA_XFRM_KEY]		= { .type = NLA_U32 },
+	[NFTA_XFRM_DIR]		= { .type = NLA_U8 },
+	[NFTA_XFRM_SPNUM]	= { .type = NLA_U32 },
+	[NFTA_XFRM_DREG]	= { .type = NLA_U32 },
+};
+
+struct nft_xfrm {
+	enum nft_xfrm_keys	key:8;
+	enum nft_registers	dreg:8;
+	u8			dir;
+	u8			spnum;
+};
+
+static int nft_xfrm_get_init(const struct nft_ctx *ctx,
+			     const struct nft_expr *expr,
+			     const struct nlattr * const tb[])
+{
+	struct nft_xfrm *priv = nft_expr_priv(expr);
+	unsigned int len = 0;
+	u32 spnum = 0;
+	u8 dir;
+
+	if (!tb[NFTA_XFRM_KEY] || !tb[NFTA_XFRM_DIR] || !tb[NFTA_XFRM_DREG])
+		return -EINVAL;
+
+	switch (ctx->family) {
+	case NFPROTO_IPV4:
+	case NFPROTO_IPV6:
+	case NFPROTO_INET:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	priv->key = ntohl(nla_get_u32(tb[NFTA_XFRM_KEY]));
+	switch (priv->key) {
+	case NFT_XFRM_KEY_REQID:
+	case NFT_XFRM_KEY_SPI:
+		len = sizeof(u32);
+		break;
+	case NFT_XFRM_KEY_DADDR_IP4:
+	case NFT_XFRM_KEY_SADDR_IP4:
+		len = sizeof(struct in_addr);
+		break;
+	case NFT_XFRM_KEY_DADDR_IP6:
+	case NFT_XFRM_KEY_SADDR_IP6:
+		len = sizeof(struct in6_addr);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	dir = nla_get_u8(tb[NFTA_XFRM_DIR]);
+	switch (dir) {
+	case XFRM_POLICY_IN:
+	case XFRM_POLICY_OUT:
+		priv->dir = dir;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (tb[NFTA_XFRM_SPNUM])
+		spnum = ntohl(nla_get_be32(tb[NFTA_XFRM_SPNUM]));
+
+	if (spnum >= XFRM_MAX_DEPTH)
+		return -ERANGE;
+
+	priv->spnum = spnum;
+
+	priv->dreg = nft_parse_register(tb[NFTA_XFRM_DREG]);
+	return nft_validate_register_store(ctx, priv->dreg, NULL,
+					   NFT_DATA_VALUE, len);
+}
+
+/* Return true if key asks for daddr/saddr and current
+ * state does have a valid address (BEET, TUNNEL).
+ */
+static bool xfrm_state_addr_ok(enum nft_xfrm_keys k, u8 family, u8 mode)
+{
+	switch (k) {
+	case NFT_XFRM_KEY_DADDR_IP4:
+	case NFT_XFRM_KEY_SADDR_IP4:
+		if (family == NFPROTO_IPV4)
+			break;
+		return false;
+	case NFT_XFRM_KEY_DADDR_IP6:
+	case NFT_XFRM_KEY_SADDR_IP6:
+		if (family == NFPROTO_IPV6)
+			break;
+		return false;
+	default:
+		return true;
+	}
+
+	return mode == XFRM_MODE_BEET || mode == XFRM_MODE_TUNNEL;
+}
+
+static void nft_xfrm_state_get_key(const struct nft_xfrm *priv,
+				   struct nft_regs *regs,
+				   const struct xfrm_state *state)
+{
+	u32 *dest = &regs->data[priv->dreg];
+
+	if (!xfrm_state_addr_ok(priv->key,
+				state->props.family,
+				state->props.mode)) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
+	switch (priv->key) {
+	case NFT_XFRM_KEY_UNSPEC:
+	case __NFT_XFRM_KEY_MAX:
+		WARN_ON_ONCE(1);
+		break;
+	case NFT_XFRM_KEY_DADDR_IP4:
+		*dest = state->id.daddr.a4;
+		return;
+	case NFT_XFRM_KEY_DADDR_IP6:
+		memcpy(dest, &state->id.daddr.in6, sizeof(struct in6_addr));
+		return;
+	case NFT_XFRM_KEY_SADDR_IP4:
+		*dest = state->props.saddr.a4;
+		return;
+	case NFT_XFRM_KEY_SADDR_IP6:
+		memcpy(dest, &state->props.saddr.in6, sizeof(struct in6_addr));
+		return;
+	case NFT_XFRM_KEY_REQID:
+		*dest = state->props.reqid;
+		return;
+	case NFT_XFRM_KEY_SPI:
+		*dest = state->id.spi;
+		return;
+	}
+
+	regs->verdict.code = NFT_BREAK;
+}
+
+static void nft_xfrm_get_eval_in(const struct nft_xfrm *priv,
+				    struct nft_regs *regs,
+				    const struct nft_pktinfo *pkt)
+{
+	const struct sec_path *sp = skb_sec_path(pkt->skb);
+	const struct xfrm_state *state;
+
+	if (sp == NULL || sp->len <= priv->spnum) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
+	state = sp->xvec[priv->spnum];
+	nft_xfrm_state_get_key(priv, regs, state);
+}
+
+static void nft_xfrm_get_eval_out(const struct nft_xfrm *priv,
+				  struct nft_regs *regs,
+				  const struct nft_pktinfo *pkt)
+{
+	const struct dst_entry *dst = skb_dst(pkt->skb);
+	int i;
+
+	for (i = 0; dst && dst->xfrm;
+	     dst = ((const struct xfrm_dst *)dst)->child, i++) {
+		if (i < priv->spnum)
+			continue;
+
+		nft_xfrm_state_get_key(priv, regs, dst->xfrm);
+		return;
+	}
+
+	regs->verdict.code = NFT_BREAK;
+}
+
+static void nft_xfrm_get_eval(const struct nft_expr *expr,
+			      struct nft_regs *regs,
+			      const struct nft_pktinfo *pkt)
+{
+	const struct nft_xfrm *priv = nft_expr_priv(expr);
+
+	switch (priv->dir) {
+	case XFRM_POLICY_IN:
+		nft_xfrm_get_eval_in(priv, regs, pkt);
+		break;
+	case XFRM_POLICY_OUT:
+		nft_xfrm_get_eval_out(priv, regs, pkt);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		regs->verdict.code = NFT_BREAK;
+		break;
+	}
+}
+
+static int nft_xfrm_get_dump(struct sk_buff *skb,
+			     const struct nft_expr *expr)
+{
+	const struct nft_xfrm *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_XFRM_DREG, priv->dreg))
+		return -1;
+
+	if (nla_put_be32(skb, NFTA_XFRM_KEY, htonl(priv->key)))
+		return -1;
+	if (nla_put_u8(skb, NFTA_XFRM_DIR, priv->dir))
+		return -1;
+	if (nla_put_be32(skb, NFTA_XFRM_SPNUM, htonl(priv->spnum)))
+		return -1;
+
+	return 0;
+}
+
+static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			     const struct nft_data **data)
+{
+	const struct nft_xfrm *priv = nft_expr_priv(expr);
+	unsigned int hooks;
+
+	switch (priv->dir) {
+	case XFRM_POLICY_IN:
+		hooks = (1 << NF_INET_FORWARD) |
+			(1 << NF_INET_LOCAL_IN) |
+			(1 << NF_INET_PRE_ROUTING);
+		break;
+	case XFRM_POLICY_OUT:
+		hooks = (1 << NF_INET_FORWARD) |
+			(1 << NF_INET_LOCAL_OUT) |
+			(1 << NF_INET_POST_ROUTING);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
+
+	return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+
+
+static struct nft_expr_type nft_xfrm_type;
+static const struct nft_expr_ops nft_xfrm_get_ops = {
+	.type		= &nft_xfrm_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_xfrm)),
+	.eval		= nft_xfrm_get_eval,
+	.init		= nft_xfrm_get_init,
+	.dump		= nft_xfrm_get_dump,
+	.validate	= nft_xfrm_validate,
+};
+
+static struct nft_expr_type nft_xfrm_type __read_mostly = {
+	.name		= "xfrm",
+	.ops		= &nft_xfrm_get_ops,
+	.policy		= nft_xfrm_policy,
+	.maxattr	= NFTA_XFRM_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_xfrm_module_init(void)
+{
+	return nft_register_expr(&nft_xfrm_type);
+}
+
+static void __exit nft_xfrm_module_exit(void)
+{
+	nft_unregister_expr(&nft_xfrm_type);
+}
+
+module_init(nft_xfrm_module_init);
+module_exit(nft_xfrm_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("nf_tables: xfrm/IPSec matching");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_AUTHOR("Máté Eckl <ecklm94@gmail.com>");
+MODULE_ALIAS_NFT_EXPR("xfrm");

diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c
index e8da9a9..51b454d 100644
--- a/net/netfilter/utils.c
+++ b/net/netfilter/utils.c

@@ -17,7 +17,8 @@
 	case CHECKSUM_COMPLETE:
 		if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
 			break;
-		if ((protocol == 0 && !csum_fold(skb->csum)) ||
+		if ((protocol != IPPROTO_TCP && protocol != IPPROTO_UDP &&
+		    !csum_fold(skb->csum)) ||
 		    !csum_tcpudp_magic(iph->saddr, iph->daddr,
 				       skb->len - dataoff, protocol,
 				       skb->csum)) {
@@ -26,7 +27,7 @@
 		}
 		/* fall through */
 	case CHECKSUM_NONE:
-		if (protocol == 0)
+		if (protocol != IPPROTO_TCP && protocol != IPPROTO_UDP)
 			skb->csum = 0;
 		else
 			skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
@@ -162,7 +163,7 @@
 int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
 	     bool strict, unsigned short family)
 {
-	const struct nf_ipv6_ops *v6ops;
+	const struct nf_ipv6_ops *v6ops __maybe_unused;
 	int ret = 0;
 
 	switch (family) {
@@ -170,9 +171,7 @@
 		ret = nf_ip_route(net, dst, fl, strict);
 		break;
 	case AF_INET6:
-		v6ops = rcu_dereference(nf_ipv6_ops);
-		if (v6ops)
-			ret = v6ops->route(net, dst, fl, strict);
+		ret = nf_ip6_route(net, dst, fl, strict);
 		break;
 	}
 
@@ -180,6 +179,25 @@
 }
 EXPORT_SYMBOL_GPL(nf_route);
 
+static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
+{
+#ifdef CONFIG_INET
+	const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
+
+	if (entry->state.hook == NF_INET_LOCAL_OUT) {
+		const struct iphdr *iph = ip_hdr(skb);
+
+		if (!(iph->tos == rt_info->tos &&
+		      skb->mark == rt_info->mark &&
+		      iph->daddr == rt_info->daddr &&
+		      iph->saddr == rt_info->saddr))
+			return ip_route_me_harder(entry->state.net, skb,
+						  RTN_UNSPEC);
+	}
+#endif
+	return 0;
+}
+
 int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry)
 {
 	const struct nf_ipv6_ops *v6ops;

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index aecadd4..ce70c25 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * x_tables core - Backend for {ip,ip6,arp}_tables
  *
@@ -7,11 +8,6 @@
  * Based on existing ip_tables code which is
  *   Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
  *   Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/kernel.h>
@@ -227,7 +223,7 @@
 EXPORT_SYMBOL_GPL(xt_request_find_match);
 
 /* Find target, grabs ref.  Returns ERR_PTR() on error. */
-struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
+static struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
 {
 	struct xt_target *t;
 	int err = -ENOENT;
@@ -255,7 +251,6 @@
 
 	return ERR_PTR(err);
 }
-EXPORT_SYMBOL(xt_find_target);
 
 struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision)
 {
@@ -461,7 +456,7 @@
 EXPORT_SYMBOL(xt_check_proc_name);
 
 int xt_check_match(struct xt_mtchk_param *par,
-		   unsigned int size, u_int8_t proto, bool inv_proto)
+		   unsigned int size, u16 proto, bool inv_proto)
 {
 	int ret;
 
@@ -984,7 +979,7 @@
 EXPORT_SYMBOL(xt_find_jump_offset);
 
 int xt_check_target(struct xt_tgchk_param *par,
-		    unsigned int size, u_int8_t proto, bool inv_proto)
+		    unsigned int size, u16 proto, bool inv_proto)
 {
 	int ret;
 
@@ -1899,7 +1894,7 @@
 		seqcount_init(&per_cpu(xt_recseq, i));
 	}
 
-	xt = kmalloc_array(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL);
+	xt = kcalloc(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL);
 	if (!xt)
 		return -ENOMEM;
 

diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
index af883f1..9cdc16b 100644
--- a/net/netfilter/xt_AUDIT.c
+++ b/net/netfilter/xt_AUDIT.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Creates audit record for dropped/accepted packets
  *
  * (C) 2010-2011 Thomas Graf <tgraf@redhat.com>
  * (C) 2010-2011 Red Hat, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
 */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

diff --git a/net/netfilter/xt_CHECKSUM.c b/net/netfilter/xt_CHECKSUM.c
index 6c7aa6a..c8a639f 100644
--- a/net/netfilter/xt_CHECKSUM.c
+++ b/net/netfilter/xt_CHECKSUM.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* iptables module for the packet checksum mangling
  *
  * (C) 2002 by Harald Welte <laforge@netfilter.org>
  * (C) 2010 Red Hat, Inc.
  *
  * Author: Michael S. Tsirkin <mst@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
 */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>

diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index af9c4da..0accac9 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * This is a module which is used for setting the skb->priority field
  * of an skb for qdisc classification.
  */
 
 /* (C) 2001-2002 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>

diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index f3f1caa..a5c8b65 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * This module is used to copy security markings from packets
  * to connections, and restore security markings from connections
@@ -9,11 +10,6 @@
  *    by Henrik Nordstrom <hno@marasystems.com>
  *
  * (C) 2006,2008 Red Hat, Inc., James Morris <jmorris@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>

diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 89457ef..d4deee3 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2010 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
@@ -103,85 +100,24 @@
 	return 0;
 }
 
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-static void __xt_ct_tg_timeout_put(struct nf_ct_timeout *timeout)
-{
-	typeof(nf_ct_timeout_put_hook) timeout_put;
-
-	timeout_put = rcu_dereference(nf_ct_timeout_put_hook);
-	if (timeout_put)
-		timeout_put(timeout);
-}
-#endif
-
 static int
 xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
 		  const char *timeout_name)
 {
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-	typeof(nf_ct_timeout_find_get_hook) timeout_find_get;
 	const struct nf_conntrack_l4proto *l4proto;
-	struct nf_ct_timeout *timeout;
-	struct nf_conn_timeout *timeout_ext;
-	const char *errmsg = NULL;
-	int ret = 0;
 	u8 proto;
 
-	rcu_read_lock();
-	timeout_find_get = rcu_dereference(nf_ct_timeout_find_get_hook);
-	if (timeout_find_get == NULL) {
-		ret = -ENOENT;
-		errmsg = "Timeout policy base is empty";
-		goto out;
-	}
-
 	proto = xt_ct_find_proto(par);
 	if (!proto) {
-		ret = -EINVAL;
-		errmsg = "You must specify a L4 protocol and not use inversions on it";
-		goto out;
+		pr_info_ratelimited("You must specify a L4 protocol and not "
+				    "use inversions on it");
+		return -EINVAL;
 	}
+	l4proto = nf_ct_l4proto_find(proto);
+	return nf_ct_set_timeout(par->net, ct, par->family, l4proto->l4proto,
+				 timeout_name);
 
-	timeout = timeout_find_get(par->net, timeout_name);
-	if (timeout == NULL) {
-		ret = -ENOENT;
-		pr_info_ratelimited("No such timeout policy \"%s\"\n",
-				    timeout_name);
-		goto out;
-	}
-
-	if (timeout->l3num != par->family) {
-		ret = -EINVAL;
-		pr_info_ratelimited("Timeout policy `%s' can only be used by L%d protocol number %d\n",
-				    timeout_name, 3, timeout->l3num);
-		goto err_put_timeout;
-	}
-	/* Make sure the timeout policy matches any existing protocol tracker,
-	 * otherwise default to generic.
-	 */
-	l4proto = __nf_ct_l4proto_find(par->family, proto);
-	if (timeout->l4proto->l4proto != l4proto->l4proto) {
-		ret = -EINVAL;
-		pr_info_ratelimited("Timeout policy `%s' can only be used by L%d protocol number %d\n",
-				    timeout_name, 4, timeout->l4proto->l4proto);
-		goto err_put_timeout;
-	}
-	timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC);
-	if (!timeout_ext) {
-		ret = -ENOMEM;
-		goto err_put_timeout;
-	}
-
-	rcu_read_unlock();
-	return ret;
-
-err_put_timeout:
-	__xt_ct_tg_timeout_put(timeout);
-out:
-	rcu_read_unlock();
-	if (errmsg)
-		pr_info_ratelimited("%s\n", errmsg);
-	return ret;
 #else
 	return -EOPNOTSUPP;
 #endif
@@ -328,26 +264,6 @@
 	return xt_ct_tg_check(par, par->targinfo);
 }
 
-static void xt_ct_destroy_timeout(struct nf_conn *ct)
-{
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-	struct nf_conn_timeout *timeout_ext;
-	typeof(nf_ct_timeout_put_hook) timeout_put;
-
-	rcu_read_lock();
-	timeout_put = rcu_dereference(nf_ct_timeout_put_hook);
-
-	if (timeout_put) {
-		timeout_ext = nf_ct_timeout_find(ct);
-		if (timeout_ext) {
-			timeout_put(timeout_ext->timeout);
-			RCU_INIT_POINTER(timeout_ext->timeout, NULL);
-		}
-	}
-	rcu_read_unlock();
-#endif
-}
-
 static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par,
 			     struct xt_ct_target_info_v1 *info)
 {
@@ -361,7 +277,7 @@
 
 		nf_ct_netns_put(par->net, par->family);
 
-		xt_ct_destroy_timeout(ct);
+		nf_ct_destroy_timeout(ct);
 		nf_ct_put(info->ct);
 	}
 }

diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 098ed85..eababc3 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* x_tables module for setting the IPv4/IPv6 DSCP field, Version 1.8
  *
  * (C) 2002 by Harald Welte <laforge@netfilter.org>
  * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * See RFC2474 for a description of the DSCP field within the IP Header.
 */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -34,7 +31,7 @@
 	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
 
 	if (dscp != dinfo->dscp) {
-		if (!skb_make_writable(skb, sizeof(struct iphdr)))
+		if (skb_ensure_writable(skb, sizeof(struct iphdr)))
 			return NF_DROP;
 
 		ipv4_change_dsfield(ip_hdr(skb),
@@ -52,7 +49,7 @@
 	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
 
 	if (dscp != dinfo->dscp) {
-		if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
+		if (skb_ensure_writable(skb, sizeof(struct ipv6hdr)))
 			return NF_DROP;
 
 		ipv6_change_dsfield(ipv6_hdr(skb),
@@ -82,7 +79,7 @@
 	nv   = (orig & ~info->tos_mask) ^ info->tos_value;
 
 	if (orig != nv) {
-		if (!skb_make_writable(skb, sizeof(struct iphdr)))
+		if (skb_ensure_writable(skb, sizeof(struct iphdr)))
 			return NF_DROP;
 		iph = ip_hdr(skb);
 		ipv4_change_dsfield(iph, 0, nv);
@@ -102,7 +99,7 @@
 	nv   = (orig & ~info->tos_mask) ^ info->tos_value;
 
 	if (orig != nv) {
-		if (!skb_make_writable(skb, sizeof(struct iphdr)))
+		if (skb_ensure_writable(skb, sizeof(struct iphdr)))
 			return NF_DROP;
 		iph = ipv6_hdr(skb);
 		ipv6_change_dsfield(iph, 0, nv);

diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c
index 4653b07..7873b83 100644
--- a/net/netfilter/xt_HL.c
+++ b/net/netfilter/xt_HL.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * TTL modification target for IP tables
  * (C) 2000,2005 by Harald Welte <laforge@netfilter.org>
  *
  * Hop Limit modification target for ip6tables
  * Maciej Soltysiak <solt@dns.toxicfilms.tv>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
@@ -32,7 +29,7 @@
 	const struct ipt_TTL_info *info = par->targinfo;
 	int new_ttl;
 
-	if (!skb_make_writable(skb, skb->len))
+	if (skb_ensure_writable(skb, sizeof(*iph)))
 		return NF_DROP;
 
 	iph = ip_hdr(skb);
@@ -72,7 +69,7 @@
 	const struct ip6t_HL_info *info = par->targinfo;
 	int new_hl;
 
-	if (!skb_make_writable(skb, skb->len))
+	if (skb_ensure_writable(skb, sizeof(*ip6h)))
 		return NF_DROP;
 
 	ip6h = ipv6_hdr(skb);

diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c
index 9c75f41..be7798a 100644
--- a/net/netfilter/xt_HMARK.c
+++ b/net/netfilter/xt_HMARK.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * xt_HMARK - Netfilter module to set mark by means of hashing
  *
  * (C) 2012 by Hans Schillstrom <hans.schillstrom@ericsson.com>
  * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index 25453a1..f56d3ed 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * linux/net/netfilter/xt_IDLETIMER.c
  *
@@ -11,20 +12,6 @@
  * by Luciano Coelho <luciano.coelho@nokia.com>
  *
  * Contact: Luciano Coelho <luciano.coelho@nokia.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -41,19 +28,13 @@
 #include <linux/workqueue.h>
 #include <linux/sysfs.h>
 
-struct idletimer_tg_attr {
-	struct attribute attr;
-	ssize_t	(*show)(struct kobject *kobj,
-			struct attribute *attr, char *buf);
-};
-
 struct idletimer_tg {
 	struct list_head entry;
 	struct timer_list timer;
 	struct work_struct work;
 
 	struct kobject *kobj;
-	struct idletimer_tg_attr attr;
+	struct device_attribute attr;
 
 	unsigned int refcnt;
 };
@@ -68,8 +49,6 @@
 {
 	struct idletimer_tg *entry;
 
-	BUG_ON(!label);
-
 	list_for_each_entry(entry, &idletimer_tg_list, entry) {
 		if (!strcmp(label, entry->attr.attr.name))
 			return entry;
@@ -78,15 +57,15 @@
 	return NULL;
 }
 
-static ssize_t idletimer_tg_show(struct kobject *kobj, struct attribute *attr,
-				 char *buf)
+static ssize_t idletimer_tg_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
 {
 	struct idletimer_tg *timer;
 	unsigned long expires = 0;
 
 	mutex_lock(&list_mutex);
 
-	timer =	__idletimer_tg_find_by_label(attr->name);
+	timer =	__idletimer_tg_find_by_label(attr->attr.name);
 	if (timer)
 		expires = timer->timer.expires;
 
@@ -192,8 +171,6 @@
 	pr_debug("resetting timer %s, timeout period %u\n",
 		 info->label, info->timeout);
 
-	BUG_ON(!info->timer);
-
 	mod_timer(&info->timer->timer,
 		  msecs_to_jiffies(info->timeout * 1000) + jiffies);
 
@@ -306,7 +283,7 @@
 
 	idletimer_tg_kobj = &idletimer_tg_device->kobj;
 
-	err =  xt_register_target(&idletimer_tg);
+	err = xt_register_target(&idletimer_tg);
 	if (err < 0) {
 		pr_debug("couldn't register xt target\n");
 		goto out_dev;

diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 1984644..0371c38 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c

@@ -1,22 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * xt_LED.c - netfilter target to make LEDs blink upon packet matches
  *
  * Copyright (C) 2008 Adam Nielsen <a.nielsen@shikadi.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301 USA.
- *
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>

diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index c3b2017..a1e79b5 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * This is a module which is used for logging packets.
  */
 
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

diff --git a/net/netfilter/xt_MASQUERADE.c b/net/netfilter/xt_MASQUERADE.c
new file mode 100644
index 0000000..eae05c1
--- /dev/null
+++ b/net/netfilter/xt_MASQUERADE.c

@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Masquerade.  Simple mapping which alters range to a local IP address
+   (depending on route). */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_masquerade.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
+
+/* FIXME: Multiple targets. --RR */
+static int masquerade_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+
+	if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) {
+		pr_debug("bad MAP_IPS.\n");
+		return -EINVAL;
+	}
+	if (mr->rangesize != 1) {
+		pr_debug("bad rangesize %u\n", mr->rangesize);
+		return -EINVAL;
+	}
+	return nf_ct_netns_get(par->net, par->family);
+}
+
+static unsigned int
+masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	struct nf_nat_range2 range;
+	const struct nf_nat_ipv4_multi_range_compat *mr;
+
+	mr = par->targinfo;
+	range.flags = mr->range[0].flags;
+	range.min_proto = mr->range[0].min;
+	range.max_proto = mr->range[0].max;
+
+	return nf_nat_masquerade_ipv4(skb, xt_hooknum(par), &range,
+				      xt_out(par));
+}
+
+static void masquerade_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_ct_netns_put(par->net, par->family);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static unsigned int
+masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	return nf_nat_masquerade_ipv6(skb, par->targinfo, xt_out(par));
+}
+
+static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
+{
+	const struct nf_nat_range2 *range = par->targinfo;
+
+	if (range->flags & NF_NAT_RANGE_MAP_IPS)
+		return -EINVAL;
+
+	return nf_ct_netns_get(par->net, par->family);
+}
+#endif
+
+static struct xt_target masquerade_tg_reg[] __read_mostly = {
+	{
+#if IS_ENABLED(CONFIG_IPV6)
+		.name		= "MASQUERADE",
+		.family		= NFPROTO_IPV6,
+		.target		= masquerade_tg6,
+		.targetsize	= sizeof(struct nf_nat_range),
+		.table		= "nat",
+		.hooks		= 1 << NF_INET_POST_ROUTING,
+		.checkentry	= masquerade_tg6_checkentry,
+		.destroy	= masquerade_tg_destroy,
+		.me		= THIS_MODULE,
+	}, {
+#endif
+		.name		= "MASQUERADE",
+		.family		= NFPROTO_IPV4,
+		.target		= masquerade_tg,
+		.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
+		.table		= "nat",
+		.hooks		= 1 << NF_INET_POST_ROUTING,
+		.checkentry	= masquerade_tg_check,
+		.destroy	= masquerade_tg_destroy,
+		.me		= THIS_MODULE,
+	}
+};
+
+static int __init masquerade_tg_init(void)
+{
+	int ret;
+
+	ret = xt_register_targets(masquerade_tg_reg,
+				  ARRAY_SIZE(masquerade_tg_reg));
+	if (ret)
+		return ret;
+
+	ret = nf_nat_masquerade_inet_register_notifiers();
+	if (ret) {
+		xt_unregister_targets(masquerade_tg_reg,
+				      ARRAY_SIZE(masquerade_tg_reg));
+		return ret;
+	}
+
+	return ret;
+}
+
+static void __exit masquerade_tg_exit(void)
+{
+	xt_unregister_targets(masquerade_tg_reg, ARRAY_SIZE(masquerade_tg_reg));
+	nf_nat_masquerade_inet_unregister_notifiers();
+}
+
+module_init(masquerade_tg_init);
+module_exit(masquerade_tg_exit);
+#if IS_ENABLED(CONFIG_IPV6)
+MODULE_ALIAS("ip6t_MASQUERADE");
+#endif
+MODULE_ALIAS("ipt_MASQUERADE");

diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c
index 1d43787..cb2ee80 100644
--- a/net/netfilter/xt_NETMAP.c
+++ b/net/netfilter/xt_NETMAP.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * (C) 2000-2001 Svenning Soerensen <svenning@post5.tele.dk>
  * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/ip.h>

diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 1ed0cac..6e83ce3 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2006 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>

diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index a9aca80..466da23 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* iptables module for using new netfilter netlink queue
  *
  * (C) 2005 by Harald Welte <laforge@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 9e05c86..2236455 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * (C) 2007 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/module.h>
 #include <linux/skbuff.h>

diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c
index 5ce9461..353ca78 100644
--- a/net/netfilter/xt_REDIRECT.c
+++ b/net/netfilter/xt_REDIRECT.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
  * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Based on Rusty Russell's IPv4 REDIRECT target. Development of IPv6
  * NAT funded by Astaro.
  */

diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 4ad5fe2..2317721 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Module for modifying the secmark field of the skb, for use by
  * security subsystems.
@@ -6,11 +7,6 @@
  * (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
  *
  * (C) 2006,2008 Red Hat, Inc., James Morris <jmorris@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
@@ -35,8 +31,6 @@
 	u32 secmark = 0;
 	const struct xt_secmark_target_info *info = par->targinfo;
 
-	BUG_ON(info->mode != mode);
-
 	switch (mode) {
 	case SECMARK_MODE_SEL:
 		secmark = info->secid;

diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 98efb20..122db9f 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * This is a module which is used for setting the MSS option in TCP packets.
  *
  * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
  * Copyright (C) 2007 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
@@ -89,7 +86,7 @@
 	if (par->fragoff != 0)
 		return 0;
 
-	if (!skb_make_writable(skb, skb->len))
+	if (skb_ensure_writable(skb, skb->len))
 		return -1;
 
 	len = skb->len - tcphoff;

diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index eb92bff..30e9946 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * A module for stripping a specific TCP option from TCP packets.
  *
  * Copyright (C) 2007 Sven Schnelle <svens@bitebene.org>
  * Copyright © CC Computer Consultants GmbH, 2007
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -31,33 +28,33 @@
 static unsigned int
 tcpoptstrip_mangle_packet(struct sk_buff *skb,
 			  const struct xt_action_param *par,
-			  unsigned int tcphoff, unsigned int minlen)
+			  unsigned int tcphoff)
 {
 	const struct xt_tcpoptstrip_target_info *info = par->targinfo;
+	struct tcphdr *tcph, _th;
 	unsigned int optl, i, j;
-	struct tcphdr *tcph;
 	u_int16_t n, o;
 	u_int8_t *opt;
-	int len, tcp_hdrlen;
+	int tcp_hdrlen;
 
 	/* This is a fragment, no TCP header is available */
 	if (par->fragoff != 0)
 		return XT_CONTINUE;
 
-	if (!skb_make_writable(skb, skb->len))
+	tcph = skb_header_pointer(skb, tcphoff, sizeof(_th), &_th);
+	if (!tcph)
 		return NF_DROP;
 
-	len = skb->len - tcphoff;
-	if (len < (int)sizeof(struct tcphdr))
-		return NF_DROP;
-
-	tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
 	tcp_hdrlen = tcph->doff * 4;
-
-	if (len < tcp_hdrlen)
+	if (tcp_hdrlen < sizeof(struct tcphdr))
 		return NF_DROP;
 
-	opt  = (u_int8_t *)tcph;
+	if (skb_ensure_writable(skb, tcphoff + tcp_hdrlen))
+		return NF_DROP;
+
+	/* must reload tcph, might have been moved */
+	tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
+	opt  = (u8 *)tcph;
 
 	/*
 	 * Walk through all TCP options - if we find some option to remove,
@@ -91,8 +88,7 @@
 static unsigned int
 tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 {
-	return tcpoptstrip_mangle_packet(skb, par, ip_hdrlen(skb),
-	       sizeof(struct iphdr) + sizeof(struct tcphdr));
+	return tcpoptstrip_mangle_packet(skb, par, ip_hdrlen(skb));
 }
 
 #if IS_ENABLED(CONFIG_IP6_NF_MANGLE)
@@ -109,8 +105,7 @@
 	if (tcphoff < 0)
 		return NF_DROP;
 
-	return tcpoptstrip_mangle_packet(skb, par, tcphoff,
-	       sizeof(*ipv6h) + sizeof(struct tcphdr));
+	return tcpoptstrip_mangle_packet(skb, par, tcphoff);
 }
 #endif
 

diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 0d0d68c..a5ebd56 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	"TEE" target extension for Xtables
  *	Copyright © Sebastian Claßen, 2007
@@ -5,15 +6,13 @@
  *
  *	based on ipt_ROUTE.c from Cédric de Launois
  *	<delaunois@info.ucl.be>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	version 2 or later, as published by the Free Software Foundation.
  */
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/route.h>
 #include <linux/netfilter/x_tables.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
 #include <net/route.h>
 #include <net/netfilter/ipv4/nf_dup_ipv4.h>
 #include <net/netfilter/ipv6/nf_dup_ipv6.h>
@@ -25,8 +24,15 @@
 	int			oif;
 };
 
+static unsigned int tee_net_id __read_mostly;
 static const union nf_inet_addr tee_zero_address;
 
+struct tee_net {
+	struct list_head priv_list;
+	/* lock protects the priv_list */
+	struct mutex lock;
+};
+
 static unsigned int
 tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 {
@@ -51,17 +57,16 @@
 }
 #endif
 
-static DEFINE_MUTEX(priv_list_mutex);
-static LIST_HEAD(priv_list);
-
 static int tee_netdev_event(struct notifier_block *this, unsigned long event,
 			    void *ptr)
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct net *net = dev_net(dev);
+	struct tee_net *tn = net_generic(net, tee_net_id);
 	struct xt_tee_priv *priv;
 
-	mutex_lock(&priv_list_mutex);
-	list_for_each_entry(priv, &priv_list, list) {
+	mutex_lock(&tn->lock);
+	list_for_each_entry(priv, &tn->priv_list, list) {
 		switch (event) {
 		case NETDEV_REGISTER:
 			if (!strcmp(dev->name, priv->tginfo->oif))
@@ -79,13 +84,14 @@
 			break;
 		}
 	}
-	mutex_unlock(&priv_list_mutex);
+	mutex_unlock(&tn->lock);
 
 	return NOTIFY_DONE;
 }
 
 static int tee_tg_check(const struct xt_tgchk_param *par)
 {
+	struct tee_net *tn = net_generic(par->net, tee_net_id);
 	struct xt_tee_tginfo *info = par->targinfo;
 	struct xt_tee_priv *priv;
 
@@ -95,6 +101,8 @@
 		return -EINVAL;
 
 	if (info->oif[0]) {
+		struct net_device *dev;
+
 		if (info->oif[sizeof(info->oif)-1] != '\0')
 			return -EINVAL;
 
@@ -106,9 +114,14 @@
 		priv->oif     = -1;
 		info->priv    = priv;
 
-		mutex_lock(&priv_list_mutex);
-		list_add(&priv->list, &priv_list);
-		mutex_unlock(&priv_list_mutex);
+		dev = dev_get_by_name(par->net, info->oif);
+		if (dev) {
+			priv->oif = dev->ifindex;
+			dev_put(dev);
+		}
+		mutex_lock(&tn->lock);
+		list_add(&priv->list, &tn->priv_list);
+		mutex_unlock(&tn->lock);
 	} else
 		info->priv = NULL;
 
@@ -118,12 +131,13 @@
 
 static void tee_tg_destroy(const struct xt_tgdtor_param *par)
 {
+	struct tee_net *tn = net_generic(par->net, tee_net_id);
 	struct xt_tee_tginfo *info = par->targinfo;
 
 	if (info->priv) {
-		mutex_lock(&priv_list_mutex);
+		mutex_lock(&tn->lock);
 		list_del(&info->priv->list);
-		mutex_unlock(&priv_list_mutex);
+		mutex_unlock(&tn->lock);
 		kfree(info->priv);
 	}
 	static_key_slow_dec(&xt_tee_enabled);
@@ -156,6 +170,21 @@
 #endif
 };
 
+static int __net_init tee_net_init(struct net *net)
+{
+	struct tee_net *tn = net_generic(net, tee_net_id);
+
+	INIT_LIST_HEAD(&tn->priv_list);
+	mutex_init(&tn->lock);
+	return 0;
+}
+
+static struct pernet_operations tee_net_ops = {
+	.init = tee_net_init,
+	.id   = &tee_net_id,
+	.size = sizeof(struct tee_net),
+};
+
 static struct notifier_block tee_netdev_notifier = {
 	.notifier_call = tee_netdev_event,
 };
@@ -164,22 +193,32 @@
 {
 	int ret;
 
+	ret = register_pernet_subsys(&tee_net_ops);
+	if (ret < 0)
+		return ret;
+
 	ret = xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
-	if (ret)
-		return ret;
+	if (ret < 0)
+		goto cleanup_subsys;
+
 	ret = register_netdevice_notifier(&tee_netdev_notifier);
-	if (ret) {
-		xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
-		return ret;
-	}
+	if (ret < 0)
+		goto unregister_targets;
 
 	return 0;
+
+unregister_targets:
+	xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+cleanup_subsys:
+	unregister_pernet_subsys(&tee_net_ops);
+	return ret;
 }
 
 static void __exit tee_tg_exit(void)
 {
 	unregister_netdevice_notifier(&tee_netdev_notifier);
 	xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+	unregister_pernet_subsys(&tee_net_ops);
 }
 
 module_init(tee_tg_init);

diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index ad7420c..194dc03 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Transparent proxy support for Linux/iptables
  *
  * Copyright (c) 2006-2010 BalaBit IT Ltd.
  * Author: Balazs Scheidler, Krisztian Kovacs
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>

diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index 858d189..349ab56 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* This is a module which is used to mark packets for tracing.
  */
 #include <linux/module.h>

diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 89e281b..e9b2181 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  iptables module to match inet_addr_type() of an ip.
  *
  *  Copyright (c) 2004 Patrick McHardy <kaber@trash.net>
  *  (C) 2007 Laszlo Attila Toth <panther@balabit.hu>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/kernel.h>
@@ -36,7 +33,6 @@
 static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
 			    const struct in6_addr *addr, u16 mask)
 {
-	const struct nf_ipv6_ops *v6ops;
 	struct flowi6 flow;
 	struct rt6_info *rt;
 	u32 ret = 0;
@@ -47,18 +43,13 @@
 	if (dev)
 		flow.flowi6_oif = dev->ifindex;
 
-	v6ops = nf_get_ipv6_ops();
-	if (v6ops) {
-		if (dev && (mask & XT_ADDRTYPE_LOCAL)) {
-			if (v6ops->chk_addr(net, addr, dev, true))
-				ret = XT_ADDRTYPE_LOCAL;
-		}
-		route_err = v6ops->route(net, (struct dst_entry **)&rt,
-					 flowi6_to_flowi(&flow), false);
-	} else {
-		route_err = 1;
+	if (dev && (mask & XT_ADDRTYPE_LOCAL)) {
+		if (nf_ipv6_chk_addr(net, addr, dev, true))
+			ret = XT_ADDRTYPE_LOCAL;
 	}
 
+	route_err = nf_ip6_route(net, (struct dst_entry **)&rt,
+				 flowi6_to_flowi(&flow), false);
 	if (route_err)
 		return XT_ADDRTYPE_UNREACHABLE;
 

diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index a2cf8a6..13cf3f9 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Xtables module to match packets using a BPF filter.
  * Copyright 2013 Google Inc.
  * Written by Willem de Bruijn <willemb@google.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index 5d92e17..c0f5e9a 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Xtables module to match the process control group.
  *
@@ -6,10 +7,6 @@
  * Matching is based upon processes tagged to net_cls' classid marker.
  *
  * (C) 2013 Daniel Borkmann <dborkman@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -68,6 +65,38 @@
 	return 0;
 }
 
+static int cgroup_mt_check_v2(const struct xt_mtchk_param *par)
+{
+	struct xt_cgroup_info_v2 *info = par->matchinfo;
+	struct cgroup *cgrp;
+
+	if ((info->invert_path & ~1) || (info->invert_classid & ~1))
+		return -EINVAL;
+
+	if (!info->has_path && !info->has_classid) {
+		pr_info("xt_cgroup: no path or classid specified\n");
+		return -EINVAL;
+	}
+
+	if (info->has_path && info->has_classid) {
+		pr_info_ratelimited("path and classid specified\n");
+		return -EINVAL;
+	}
+
+	info->priv = NULL;
+	if (info->has_path) {
+		cgrp = cgroup_get_from_path(info->path);
+		if (IS_ERR(cgrp)) {
+			pr_info_ratelimited("invalid path, errno=%ld\n",
+					    PTR_ERR(cgrp));
+			return -EINVAL;
+		}
+		info->priv = cgrp;
+	}
+
+	return 0;
+}
+
 static bool
 cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
@@ -99,6 +128,24 @@
 			info->invert_classid;
 }
 
+static bool cgroup_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_cgroup_info_v2 *info = par->matchinfo;
+	struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data;
+	struct cgroup *ancestor = info->priv;
+	struct sock *sk = skb->sk;
+
+	if (!sk || !sk_fullsock(sk) || !net_eq(xt_net(par), sock_net(sk)))
+		return false;
+
+	if (ancestor)
+		return cgroup_is_descendant(sock_cgroup_ptr(skcd), ancestor) ^
+			info->invert_path;
+	else
+		return (info->classid == sock_cgroup_classid(skcd)) ^
+			info->invert_classid;
+}
+
 static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par)
 {
 	struct xt_cgroup_info_v1 *info = par->matchinfo;
@@ -107,6 +154,14 @@
 		cgroup_put(info->priv);
 }
 
+static void cgroup_mt_destroy_v2(const struct xt_mtdtor_param *par)
+{
+	struct xt_cgroup_info_v2 *info = par->matchinfo;
+
+	if (info->priv)
+		cgroup_put(info->priv);
+}
+
 static struct xt_match cgroup_mt_reg[] __read_mostly = {
 	{
 		.name		= "cgroup",
@@ -134,6 +189,20 @@
 				  (1 << NF_INET_POST_ROUTING) |
 				  (1 << NF_INET_LOCAL_IN),
 	},
+	{
+		.name		= "cgroup",
+		.revision	= 2,
+		.family		= NFPROTO_UNSPEC,
+		.checkentry	= cgroup_mt_check_v2,
+		.match		= cgroup_mt_v2,
+		.matchsize	= sizeof(struct xt_cgroup_info_v2),
+		.usersize	= offsetof(struct xt_cgroup_info_v2, priv),
+		.destroy	= cgroup_mt_destroy_v2,
+		.me		= THIS_MODULE,
+		.hooks		= (1 << NF_INET_LOCAL_OUT) |
+				  (1 << NF_INET_POST_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+	},
 };
 
 static int __init cgroup_mt_init(void)

diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 51d0c25..a047a54 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * (C) 2008-2009 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>

diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
index 5c861d2..f095557 100644
--- a/net/netfilter/xt_comment.c
+++ b/net/netfilter/xt_comment.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Implements a dummy match to allow attaching comments to rules
  *

diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c
index 4fa4efd..87505cd 100644
--- a/net/netfilter/xt_connlabel.c
+++ b/net/netfilter/xt_connlabel.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * (C) 2013 Astaro GmbH & Co KG
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -15,7 +12,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
-MODULE_DESCRIPTION("Xtables: add/match connection trackling labels");
+MODULE_DESCRIPTION("Xtables: add/match connection tracking labels");
 MODULE_ALIAS("ipt_connlabel");
 MODULE_ALIAS("ip6t_connlabel");
 

diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index bc6c8ab..46fcac7 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c

@@ -13,6 +13,8 @@
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/ip.h>
+#include <linux/ipv6.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter/x_tables.h>

diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 29c38aa..eec2f3a 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	xt_connmark - Netfilter module to operate on connection marks
  *
@@ -5,19 +6,6 @@
  *	by Henrik Nordstrom <hno@marasystems.com>
  *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
  *	Jan Engelhardt <jengelh@medozas.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/module.h>

diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index df80fe7..ea299da 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *	xt_conntrack - Netfilter module to match connection tracking
  *	information. (Superset of Rusty's minimalistic state match.)
@@ -5,10 +6,6 @@
  *	(C) 2001  Marc Boucher (marc@mbsi.ca).
  *	(C) 2006-2012 Patrick McHardy <kaber@trash.net>
  *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
- *
- *	This program is free software; you can redistribute it and/or modify
- *	it under the terms of the GNU General Public License version 2 as
- *	published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>

diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c
index c7a2e54..3bdc302 100644
--- a/net/netfilter/xt_cpu.c
+++ b/net/netfilter/xt_cpu.c

@@ -1,17 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Kernel module to match running CPU */
 
 /*
  * Might be used to distribute connections on several daemons, if
  * RPS (Remote Packet Steering) is enabled or NIC is multiqueue capable,
  * each RX queue IRQ affined to one CPU (1:1 mapping)
- *
  */
 
 /* (C) 2010 Eric Dumazet
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>

diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index b63d2a3..e5a13ec 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * iptables module for DCCP protocol header matching
  *
  * (C) 2005 by Harald Welte <laforge@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>

diff --git a/net/netfilter/xt_devgroup.c b/net/netfilter/xt_devgroup.c
index 96ebe1c..9520dd0 100644
--- a/net/netfilter/xt_devgroup.c
+++ b/net/netfilter/xt_devgroup.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>

diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index a4c2b86..fb0169a 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* IP tables module for matching the value of the IPv4/IPv6 DSCP field
  *
  * (C) 2002 by Harald Welte <laforge@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>

diff --git a/net/netfilter/xt_ecn.c b/net/netfilter/xt_ecn.c
index c7ad4af..b96e820 100644
--- a/net/netfilter/xt_ecn.c
+++ b/net/netfilter/xt_ecn.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Xtables module for matching the value of the IPv4/IPv6 and TCP ECN bits
  *
  * (C) 2002 by Harald Welte <laforge@gnumonks.org>
  * (C) 2011 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/in.h>

diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 171ba82..2a1c0ad 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Kernel module to match ESP parameters. */
 
 /* (C) 1999-2000 Yon Uriarte <yon@astaro.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 1ad4017..ced3fc8 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *	xt_hashlimit - Netfilter module to limit the number of packets per time
  *	separately for each hashbucket (sourceip/sourceport/dstip/dstport)
@@ -33,9 +34,14 @@
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
-#include <linux/netfilter/xt_hashlimit.h>
 #include <linux/mutex.h>
 #include <linux/kernel.h>
+#include <uapi/linux/netfilter/xt_hashlimit.h>
+
+#define XT_HASHLIMIT_ALL (XT_HASHLIMIT_HASH_DIP | XT_HASHLIMIT_HASH_DPT | \
+			  XT_HASHLIMIT_HASH_SIP | XT_HASHLIMIT_HASH_SPT | \
+			  XT_HASHLIMIT_INVERT | XT_HASHLIMIT_BYTES |\
+			  XT_HASHLIMIT_RATE_MATCH)
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
@@ -260,7 +266,7 @@
 dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
 {
 	hlist_del_rcu(&ent->node);
-	call_rcu_bh(&ent->rcu, dsthash_free_rcu);
+	call_rcu(&ent->rcu, dsthash_free_rcu);
 	ht->count--;
 }
 static void htable_gc(struct work_struct *work);
@@ -274,21 +280,21 @@
 	struct xt_hashlimit_htable *hinfo;
 	const struct seq_operations *ops;
 	unsigned int size, i;
+	unsigned long nr_pages = totalram_pages();
 	int ret;
 
 	if (cfg->size) {
 		size = cfg->size;
 	} else {
-		size = (totalram_pages << PAGE_SHIFT) / 16384 /
+		size = (nr_pages << PAGE_SHIFT) / 16384 /
 		       sizeof(struct hlist_head);
-		if (totalram_pages > 1024 * 1024 * 1024 / PAGE_SIZE)
+		if (nr_pages > 1024 * 1024 * 1024 / PAGE_SIZE)
 			size = 8192;
 		if (size < 16)
 			size = 16;
 	}
 	/* FIXME: don't use vmalloc() here or anywhere else -HW */
-	hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
-	                sizeof(struct hlist_head) * size);
+	hinfo = vmalloc(struct_size(hinfo, hash, size));
 	if (hinfo == NULL)
 		return -ENOMEM;
 	*out_hinfo = hinfo;
@@ -1326,7 +1332,7 @@
 	xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
 	unregister_pernet_subsys(&hashlimit_net_ops);
 
-	rcu_barrier_bh();
+	rcu_barrier();
 	kmem_cache_destroy(hashlimit_cachep);
 }
 

diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index fd077ae..a5a167f 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* iptables module to match on related connections */
 /*
  * (C) 2001 Martin Josefsson <gandalf@wlug.westbo.se>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>

diff --git a/net/netfilter/xt_hl.c b/net/netfilter/xt_hl.c
index 0039511..c1a70f8 100644
--- a/net/netfilter/xt_hl.c
+++ b/net/netfilter/xt_hl.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * IP tables module for matching the value of the TTL
  * (C) 2000,2001 by Harald Welte <laforge@netfilter.org>
  *
  * Hop Limit matching module
  * (C) 2001-2002 Maciej Soltysiak <solt@dns.toxicfilms.tv>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/ip.h>

diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c
index 57f1df5..472da63 100644
--- a/net/netfilter/xt_ipcomp.c
+++ b/net/netfilter/xt_ipcomp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*  Kernel module to match IPComp parameters for IPv4 and IPv6
  *
  *  Copyright (C) 2013 WindRiver
@@ -7,11 +8,6 @@
  *
  *  Based on:
  *  net/netfilter/xt_esp.c
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index b46626c..0c9e014 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *	xt_iprange - Netfilter module to match IP address ranges
  *
- *	(C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *	(C) 2003 Jozsef Kadlecsik <kadlec@netfilter.org>
  *	(C) CC Computer Consultants GmbH, 2008
- *
- *	This program is free software; you can redistribute it and/or modify
- *	it under the terms of the GNU General Public License version 2 as
- *	published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
@@ -133,7 +130,7 @@
 module_init(iprange_mt_init);
 module_exit(iprange_mt_exit);
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
 MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
 MODULE_DESCRIPTION("Xtables: arbitrary IPv4 range matching");
 MODULE_ALIAS("ipt_iprange");

diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index 1d950a6..253c71c 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *	xt_ipvs - kernel module to match IPVS connection properties
  *

diff --git a/net/netfilter/xt_l2tp.c b/net/netfilter/xt_l2tp.c
index c43482b..a61eb81 100644
--- a/net/netfilter/xt_l2tp.c
+++ b/net/netfilter/xt_l2tp.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Kernel module to match L2TP header parameters. */
 
 /* (C) 2013      James Chapman <jchapman@katalix.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index 176e557..1873da3 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Kernel module to match packet length. */
 /* (C) 1999-2001 James Morris <jmorros@intercode.com.au>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>

diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 9f098ec..bd1dea9 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* (C) 1999 Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr>
  * (C) 1999 Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr>
  * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 

diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index d5b4fd4..81649da 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Kernel module to match MAC address parameters. */
 
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>

diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index ebd41dc..1ad74b5 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *	xt_mark - Netfilter module to match NFMARK value
  *
  *	(C) 1999-2001 Marc Boucher <marc@mbsi.ca>
  *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
  *	Jan Engelhardt <jengelh@medozas.de>
- *
- *	This program is free software; you can redistribute it and/or modify
- *	it under the terms of the GNU General Public License version 2 as
- *	published by the Free Software Foundation.
  */
 
 #include <linux/module.h>

diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index 1cde0e4..44a00f5 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Kernel module to match one of a list of TCP/UDP(-Lite)/SCTP/DCCP ports:
    ports are in the same place so we can treat them as equal. */
 
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>

diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
index ac91170..a8e5f6c 100644
--- a/net/netfilter/xt_nat.c
+++ b/net/netfilter/xt_nat.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
  * (C) 2011 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -14,7 +11,7 @@
 #include <linux/skbuff.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat.h>
 
 static int xt_nat_checkentry_v0(const struct xt_tgchk_param *par)
 {

diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index 6b56f41..5aab6df 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org>
  * (C) 2011 Intra2net AG <http://www.intra2net.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 (or any
- * later at your option) as published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
@@ -57,25 +54,39 @@
 	nfnl_acct_put(info->nfacct);
 }
 
-static struct xt_match nfacct_mt_reg __read_mostly = {
-	.name       = "nfacct",
-	.family     = NFPROTO_UNSPEC,
-	.checkentry = nfacct_mt_checkentry,
-	.match      = nfacct_mt,
-	.destroy    = nfacct_mt_destroy,
-	.matchsize  = sizeof(struct xt_nfacct_match_info),
-	.usersize   = offsetof(struct xt_nfacct_match_info, nfacct),
-	.me         = THIS_MODULE,
+static struct xt_match nfacct_mt_reg[] __read_mostly = {
+	{
+		.name       = "nfacct",
+		.revision   = 0,
+		.family     = NFPROTO_UNSPEC,
+		.checkentry = nfacct_mt_checkentry,
+		.match      = nfacct_mt,
+		.destroy    = nfacct_mt_destroy,
+		.matchsize  = sizeof(struct xt_nfacct_match_info),
+		.usersize   = offsetof(struct xt_nfacct_match_info, nfacct),
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "nfacct",
+		.revision   = 1,
+		.family     = NFPROTO_UNSPEC,
+		.checkentry = nfacct_mt_checkentry,
+		.match      = nfacct_mt,
+		.destroy    = nfacct_mt_destroy,
+		.matchsize  = sizeof(struct xt_nfacct_match_info_v1),
+		.usersize   = offsetof(struct xt_nfacct_match_info_v1, nfacct),
+		.me         = THIS_MODULE,
+	},
 };
 
 static int __init nfacct_mt_init(void)
 {
-	return xt_register_match(&nfacct_mt_reg);
+	return xt_register_matches(nfacct_mt_reg, ARRAY_SIZE(nfacct_mt_reg));
 }
 
 static void __exit nfacct_mt_exit(void)
 {
-	xt_unregister_match(&nfacct_mt_reg);
+	xt_unregister_matches(nfacct_mt_reg, ARRAY_SIZE(nfacct_mt_reg));
 }
 
 module_init(nfacct_mt_init);

diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index bf7bba8..e1990ba 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c

@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (c) 2003+ Evgeniy Polyakov <zbr@ioremap.net>
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
@@ -40,14 +27,8 @@
 static bool
 xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
 {
-	const struct xt_osf_info *info = p->matchinfo;
-	struct net *net = xt_net(p);
-
-	if (!info)
-		return false;
-
 	return nf_osf_match(skb, xt_family(p), xt_hooknum(p), xt_in(p),
-			    xt_out(p), info, net, nf_osf_fingers);
+			    xt_out(p), p->matchinfo, xt_net(p), nf_osf_fingers);
 }
 
 static struct xt_match xt_osf_match = {

diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 46686fb..e85ce69 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Kernel module to match various things tied to sockets associated with
  * locally generated outgoing packets.
@@ -5,10 +6,6 @@
  * (C) 2000 Marc Boucher <marc@mbsi.ca>
  *
  * Copyright © CC Computer Consultants GmbH, 2007 - 2008
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/module.h>
 #include <linux/skbuff.h>
@@ -25,6 +22,9 @@
 	struct xt_owner_match_info *info = par->matchinfo;
 	struct net *net = par->net;
 
+	if (info->match & ~XT_OWNER_MASK)
+		return -EINVAL;
+
 	/* Only allow the common case where the userns of the writer
 	 * matches the userns of the network namespace.
 	 */
@@ -91,11 +91,28 @@
 	}
 
 	if (info->match & XT_OWNER_GID) {
+		unsigned int i, match = false;
 		kgid_t gid_min = make_kgid(net->user_ns, info->gid_min);
 		kgid_t gid_max = make_kgid(net->user_ns, info->gid_max);
-		if ((gid_gte(filp->f_cred->fsgid, gid_min) &&
-		     gid_lte(filp->f_cred->fsgid, gid_max)) ^
-		    !(info->invert & XT_OWNER_GID))
+		struct group_info *gi = filp->f_cred->group_info;
+
+		if (gid_gte(filp->f_cred->fsgid, gid_min) &&
+		    gid_lte(filp->f_cred->fsgid, gid_max))
+			match = true;
+
+		if (!match && (info->match & XT_OWNER_SUPPL_GROUPS) && gi) {
+			for (i = 0; i < gi->ngroups; ++i) {
+				kgid_t group = gi->gid[i];
+
+				if (gid_gte(group, gid_min) &&
+				    gid_lte(group, gid_max)) {
+					match = true;
+					break;
+				}
+			}
+		}
+
+		if (match ^ !(info->invert & XT_OWNER_GID))
 			return false;
 	}
 

diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 9d6d67b..ec6ed6f 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c

@@ -1,19 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Kernel module to match the bridge port in and
  * out device for IP packets coming into contact with a bridge. */
 
 /* (C) 2001-2003 Bart De Schuymer <bdschuym@pandora.be>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/if.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter_bridge.h>
-#include <linux/netfilter/xt_physdev.h>
 #include <linux/netfilter/x_tables.h>
-#include <net/netfilter/br_netfilter.h>
+#include <uapi/linux/netfilter/xt_physdev.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
@@ -33,7 +31,7 @@
 	/* Not a bridged IP packet or no info available yet:
 	 * LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if
 	 * the destination device will be a bridge. */
-	if (!skb->nf_bridge) {
+	if (!nf_bridge_info_exists(skb)) {
 		/* Return MATCH if the invert flags of the used options are on */
 		if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) &&
 		    !(info->invert & XT_PHYSDEV_OP_BRIDGED))
@@ -96,8 +94,7 @@
 static int physdev_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_physdev_info *info = par->matchinfo;
-
-	br_netfilter_enable();
+	static bool brnf_probed __read_mostly;
 
 	if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
 	    info->bitmask & ~XT_PHYSDEV_OP_MASK)
@@ -105,12 +102,16 @@
 	if (info->bitmask & (XT_PHYSDEV_OP_OUT | XT_PHYSDEV_OP_ISOUT) &&
 	    (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) ||
 	     info->invert & XT_PHYSDEV_OP_BRIDGED) &&
-	    par->hook_mask & ((1 << NF_INET_LOCAL_OUT) |
-	    (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) {
+	    par->hook_mask & (1 << NF_INET_LOCAL_OUT)) {
 		pr_info_ratelimited("--physdev-out and --physdev-is-out only supported in the FORWARD and POSTROUTING chains with bridged traffic\n");
-		if (par->hook_mask & (1 << NF_INET_LOCAL_OUT))
-			return -EINVAL;
+		return -EINVAL;
 	}
+
+	if (!brnf_probed) {
+		brnf_probed = true;
+		request_module("br_netfilter");
+	}
+
 	return 0;
 }
 

diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index 1ef9915..f48946a 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* (C) 1999-2001 Michal Ludvig <michal@logix.cz>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>

diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 13f8ccf..cb6e827 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* IP tables module for matching IPsec policy
  *
  * Copyright (c) 2004,2005 Patrick McHardy, <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/kernel.h>
@@ -56,7 +53,7 @@
 		unsigned short family)
 {
 	const struct xt_policy_elem *e;
-	const struct sec_path *sp = skb->sp;
+	const struct sec_path *sp = skb_sec_path(skb);
 	int strict = info->flags & XT_POLICY_MATCH_STRICT;
 	int i, pos;
 

diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 10d61a6..4452cc9 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * netfilter module to enforce network quotas
  *

diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index bf77326..72324bd 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * (C) 2007 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/module.h>
 #include <linux/skbuff.h>

diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index 459a7b2..6df485f 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* IP tables module for matching the routing realm
  *
  * (C) 2003 by Sampsa Ranta <sampsa@netsonic.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index f44de4b..781e0b4 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2006 Patrick McHardy <kaber@trash.net>
  * Copyright © CC Computer Consultants GmbH, 2007 - 2008
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * This is a replacement of the old ipt_recent module, which carried the
  * following copyright notice:
  *
@@ -337,7 +334,6 @@
 	unsigned int nstamp_mask;
 	unsigned int i;
 	int ret = -EINVAL;
-	size_t sz;
 
 	net_get_random_once(&hash_rnd, sizeof(hash_rnd));
 
@@ -387,8 +383,7 @@
 		goto out;
 	}
 
-	sz = sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size;
-	t = kvzalloc(sz, GFP_KERNEL);
+	t = kvzalloc(struct_size(t, iphash, ip_list_hash_size), GFP_KERNEL);
 	if (t == NULL) {
 		ret = -ENOMEM;
 		goto out;

diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 2d2fa1d..680015b 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>

diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index bf2890b..731bc2c 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
  *                         Patrick Schaaf <bof@bof.de>
  *                         Martin Josefsson <gandalf@wlug.westbo.se>
- * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org>
  */
 
 /* Kernel module which implements the set match and SET target
@@ -17,11 +14,10 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/ipset/ip_set.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
 #include <uapi/linux/netfilter/xt_set.h>
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: IP set match and target module");
 MODULE_ALIAS("xt_SET");
 MODULE_ALIAS("ipt_set");
@@ -439,6 +435,7 @@
 {
 	const struct xt_set_info_target_v3 *info = par->targinfo;
 	ip_set_id_t index;
+	int ret = 0;
 
 	if (info->add_set.index != IPSET_INVALID_ID) {
 		index = ip_set_nfnl_get_byindex(par->net,
@@ -456,17 +453,16 @@
 		if (index == IPSET_INVALID_ID) {
 			pr_info_ratelimited("Cannot find del_set index %u as target\n",
 					    info->del_set.index);
-			if (info->add_set.index != IPSET_INVALID_ID)
-				ip_set_nfnl_put(par->net,
-						info->add_set.index);
-			return -ENOENT;
+			ret = -ENOENT;
+			goto cleanup_add;
 		}
 	}
 
 	if (info->map_set.index != IPSET_INVALID_ID) {
 		if (strncmp(par->table, "mangle", 7)) {
 			pr_info_ratelimited("--map-set only usable from mangle table\n");
-			return -EINVAL;
+			ret = -EINVAL;
+			goto cleanup_del;
 		}
 		if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) |
 		     (info->flags & IPSET_FLAG_MAP_SKBQUEUE)) &&
@@ -474,20 +470,16 @@
 					 1 << NF_INET_LOCAL_OUT |
 					 1 << NF_INET_POST_ROUTING))) {
 			pr_info_ratelimited("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n");
-			return -EINVAL;
+			ret = -EINVAL;
+			goto cleanup_del;
 		}
 		index = ip_set_nfnl_get_byindex(par->net,
 						info->map_set.index);
 		if (index == IPSET_INVALID_ID) {
 			pr_info_ratelimited("Cannot find map_set index %u as target\n",
 					    info->map_set.index);
-			if (info->add_set.index != IPSET_INVALID_ID)
-				ip_set_nfnl_put(par->net,
-						info->add_set.index);
-			if (info->del_set.index != IPSET_INVALID_ID)
-				ip_set_nfnl_put(par->net,
-						info->del_set.index);
-			return -ENOENT;
+			ret = -ENOENT;
+			goto cleanup_del;
 		}
 	}
 
@@ -495,16 +487,21 @@
 	    info->del_set.dim > IPSET_DIM_MAX ||
 	    info->map_set.dim > IPSET_DIM_MAX) {
 		pr_info_ratelimited("SET target dimension over the limit!\n");
-		if (info->add_set.index != IPSET_INVALID_ID)
-			ip_set_nfnl_put(par->net, info->add_set.index);
-		if (info->del_set.index != IPSET_INVALID_ID)
-			ip_set_nfnl_put(par->net, info->del_set.index);
-		if (info->map_set.index != IPSET_INVALID_ID)
-			ip_set_nfnl_put(par->net, info->map_set.index);
-		return -ERANGE;
+		ret = -ERANGE;
+		goto cleanup_mark;
 	}
 
 	return 0;
+cleanup_mark:
+	if (info->map_set.index != IPSET_INVALID_ID)
+		ip_set_nfnl_put(par->net, info->map_set.index);
+cleanup_del:
+	if (info->del_set.index != IPSET_INVALID_ID)
+		ip_set_nfnl_put(par->net, info->del_set.index);
+cleanup_add:
+	if (info->add_set.index != IPSET_INVALID_ID)
+		ip_set_nfnl_put(par->net, info->add_set.index);
+	return ret;
 }
 
 static void

diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index ada144e..5f97398 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Transparent proxy support for Linux/iptables
  *
  * Copyright (C) 2007-2008 BalaBit IT Ltd.
  * Author: Krisztian Kovacs
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>

diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 0b41c0b..bbe07b1 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Kernel module to match connection tracking information. */
 
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2005 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>

diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 8710fdb..203e24a 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2006 Patrick McHardy <kaber@trash.net>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Based on ipt_random and ipt_nth by Fabrice MARIE <fabrice@netfilter.org>.
  */
 

diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index be1fedd..8ce25bc 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* String matching match for iptables
  *
  * (C) 2005 Pablo Neira Ayuso <pablo@eurodev.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/gfp.h>

diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index c53d4d1..37704ab 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Kernel module to match TCP MSS values. */
 
 /* Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
  * Portions (C) 2005 by Harald Welte <laforge@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>

diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index ade024c..11ec2ab 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/types.h>
 #include <linux/module.h>

diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index c13bcd0..8dbb4d4 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c

@@ -163,19 +163,24 @@
 	s64 stamp;
 
 	/*
-	 * We cannot use get_seconds() instead of __net_timestamp() here.
+	 * We need real time here, but we can neither use skb->tstamp
+	 * nor __net_timestamp().
+	 *
+	 * skb->tstamp and skb->skb_mstamp_ns overlap, however, they
+	 * use different clock types (real vs monotonic).
+	 *
 	 * Suppose you have two rules:
-	 * 	1. match before 13:00
-	 * 	2. match after 13:00
+	 *	1. match before 13:00
+	 *	2. match after 13:00
+	 *
 	 * If you match against processing time (get_seconds) it
 	 * may happen that the same packet matches both rules if
-	 * it arrived at the right moment before 13:00.
+	 * it arrived at the right moment before 13:00, so it would be
+	 * better to check skb->tstamp and set it via __net_timestamp()
+	 * if needed.  This however breaks outgoing packets tx timestamp,
+	 * and causes them to get delayed forever by fq packet scheduler.
 	 */
-	if (skb->tstamp == 0)
-		__net_timestamp((struct sk_buff *)skb);
-
-	stamp = ktime_to_ns(skb->tstamp);
-	stamp = div_s64(stamp, NSEC_PER_SEC);
+	stamp = get_seconds();
 
 	if (info->flags & XT_TIME_LOCAL_TZ)
 		/* Adjust for local timezone */

diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index a95b503..177b40d 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *	xt_u32 - kernel module to match u32 packet content
  *

diff --git a/net/netlabel/Kconfig b/net/netlabel/Kconfig
index d9eaa30..64280a1 100644
--- a/net/netlabel/Kconfig
+++ b/net/netlabel/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # NetLabel configuration
 #

diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c
index d0a3acf..3282acf 100644
--- a/net/netlabel/netlabel_addrlist.c
+++ b/net/netlabel/netlabel_addrlist.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * NetLabel Network Address Lists
  *
@@ -7,25 +8,10 @@
  * as CIPSO and RIPSO.
  *
  * Author: Paul Moore <paul@paul-moore.com>
- *
  */
 
 /*
  * (c) Copyright Hewlett-Packard Development Company, L.P., 2008
- *
- * This program is free software;  you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY;  without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- * the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program;  if not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #include <linux/types.h>

diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
index ac709f0..a01cf49 100644
--- a/net/netlabel/netlabel_addrlist.h
+++ b/net/netlabel/netlabel_addrlist.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * NetLabel Network Address Lists
  *
@@ -7,25 +8,10 @@
  * as CIPSO and RIPSO.
  *
  * Author: Paul Moore <paul@paul-moore.com>
- *
  */
 
 /*
  * (c) Copyright Hewlett-Packard Development Company, L.P., 2008
- *
- * This program is free software;  you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY;  without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- * the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program;  if not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #ifndef _NETLABEL_ADDRLIST_H

diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c
index 4d74897..249da67 100644
--- a/net/netlabel/netlabel_calipso.c
+++ b/net/netlabel/netlabel_calipso.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * NetLabel CALIPSO/IPv6 Support
  *
@@ -7,25 +8,10 @@
  *
  * Authors: Paul Moore <paul@paul-moore.com>
  *          Huw Davies <huw@codeweavers.com>
- *
  */
 
 /* (c) Copyright Hewlett-Packard Development Company, L.P., 2006
  * (c) Copyright Huw Davies <huw@codeweavers.com>, 2015
- *
- * This program is free software;  you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY;  without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- * the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program;  if not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #include <linux/types.h>
@@ -321,29 +307,29 @@
 static const struct genl_ops netlbl_calipso_ops[] = {
 	{
 	.cmd = NLBL_CALIPSO_C_ADD,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = GENL_ADMIN_PERM,
-	.policy = calipso_genl_policy,
 	.doit = netlbl_calipso_add,
 	.dumpit = NULL,
 	},
 	{
 	.cmd = NLBL_CALIPSO_C_REMOVE,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = GENL_ADMIN_PERM,
-	.policy = calipso_genl_policy,
 	.doit = netlbl_calipso_remove,
 	.dumpit = NULL,
 	},
 	{
 	.cmd = NLBL_CALIPSO_C_LIST,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = 0,
-	.policy = calipso_genl_policy,
 	.doit = netlbl_calipso_list,
 	.dumpit = NULL,
 	},
 	{
 	.cmd = NLBL_CALIPSO_C_LISTALL,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = 0,
-	.policy = calipso_genl_policy,
 	.doit = NULL,
 	.dumpit = netlbl_calipso_listall,
 	},
@@ -354,6 +340,7 @@
 	.name = NETLBL_NLTYPE_CALIPSO_NAME,
 	.version = NETLBL_PROTO_VERSION,
 	.maxattr = NLBL_CALIPSO_A_MAX,
+	.policy = calipso_genl_policy,
 	.module = THIS_MODULE,
 	.ops = netlbl_calipso_ops,
 	.n_ops = ARRAY_SIZE(netlbl_calipso_ops),

diff --git a/net/netlabel/netlabel_calipso.h b/net/netlabel/netlabel_calipso.h
index 9fd291c..ef3e9a7 100644
--- a/net/netlabel/netlabel_calipso.h
+++ b/net/netlabel/netlabel_calipso.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * NetLabel CALIPSO Support
  *
@@ -7,25 +8,10 @@
  *
  * Authors: Paul Moore <paul@paul-moore.com>
  *          Huw Davies <huw@codeweavers.com>
- *
  */
 
 /* (c) Copyright Hewlett-Packard Development Company, L.P., 2006
  * (c) Copyright Huw Davies <huw@codeweavers.com>, 2015
- *
- * This program is free software;  you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY;  without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- * the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program;  if not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #ifndef _NETLABEL_CALIPSO

diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 9aacf2d..0f16080 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * NetLabel CIPSO/IPv4 Support
  *
@@ -6,25 +7,10 @@
  * protocols such as CIPSO and RIPSO.
  *
  * Author: Paul Moore <paul@paul-moore.com>
- *
  */
 
 /*
  * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
- *
- * This program is free software;  you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY;  without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- * the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program;  if not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #include <linux/types.h>
@@ -99,9 +85,10 @@
 
 	doi_def->doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
 
-	if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_TAGLST],
-				NLBL_CIPSOV4_A_MAX,
-				netlbl_cipsov4_genl_policy, NULL) != 0)
+	if (nla_validate_nested_deprecated(info->attrs[NLBL_CIPSOV4_A_TAGLST],
+					   NLBL_CIPSOV4_A_MAX,
+					   netlbl_cipsov4_genl_policy,
+					   NULL) != 0)
 		return -EINVAL;
 
 	nla_for_each_nested(nla, info->attrs[NLBL_CIPSOV4_A_TAGLST], nla_rem)
@@ -146,9 +133,10 @@
 	    !info->attrs[NLBL_CIPSOV4_A_MLSLVLLST])
 		return -EINVAL;
 
-	if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSLVLLST],
-				NLBL_CIPSOV4_A_MAX,
-				netlbl_cipsov4_genl_policy, NULL) != 0)
+	if (nla_validate_nested_deprecated(info->attrs[NLBL_CIPSOV4_A_MLSLVLLST],
+					   NLBL_CIPSOV4_A_MAX,
+					   netlbl_cipsov4_genl_policy,
+					   NULL) != 0)
 		return -EINVAL;
 
 	doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
@@ -170,9 +158,10 @@
 			    info->attrs[NLBL_CIPSOV4_A_MLSLVLLST],
 			    nla_a_rem)
 		if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSLVL) {
-			if (nla_validate_nested(nla_a, NLBL_CIPSOV4_A_MAX,
-						netlbl_cipsov4_genl_policy,
-						NULL) != 0)
+			if (nla_validate_nested_deprecated(nla_a,
+							   NLBL_CIPSOV4_A_MAX,
+							   netlbl_cipsov4_genl_policy,
+							   NULL) != 0)
 				goto add_std_failure;
 			nla_for_each_nested(nla_b, nla_a, nla_b_rem)
 				switch (nla_type(nla_b)) {
@@ -234,19 +223,20 @@
 		}
 
 	if (info->attrs[NLBL_CIPSOV4_A_MLSCATLST]) {
-		if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSCATLST],
-					NLBL_CIPSOV4_A_MAX,
-					netlbl_cipsov4_genl_policy, NULL) != 0)
+		if (nla_validate_nested_deprecated(info->attrs[NLBL_CIPSOV4_A_MLSCATLST],
+						   NLBL_CIPSOV4_A_MAX,
+						   netlbl_cipsov4_genl_policy,
+						   NULL) != 0)
 			goto add_std_failure;
 
 		nla_for_each_nested(nla_a,
 				    info->attrs[NLBL_CIPSOV4_A_MLSCATLST],
 				    nla_a_rem)
 			if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSCAT) {
-				if (nla_validate_nested(nla_a,
-							NLBL_CIPSOV4_A_MAX,
-							netlbl_cipsov4_genl_policy,
-							NULL) != 0)
+				if (nla_validate_nested_deprecated(nla_a,
+								   NLBL_CIPSOV4_A_MAX,
+								   netlbl_cipsov4_genl_policy,
+								   NULL) != 0)
 					goto add_std_failure;
 				nla_for_each_nested(nla_b, nla_a, nla_b_rem)
 					switch (nla_type(nla_b)) {
@@ -498,7 +488,7 @@
 	if (ret_val != 0)
 		goto list_failure_lock;
 
-	nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_TAGLST);
+	nla_a = nla_nest_start_noflag(ans_skb, NLBL_CIPSOV4_A_TAGLST);
 	if (nla_a == NULL) {
 		ret_val = -ENOMEM;
 		goto list_failure_lock;
@@ -517,7 +507,8 @@
 
 	switch (doi_def->type) {
 	case CIPSO_V4_MAP_TRANS:
-		nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVLLST);
+		nla_a = nla_nest_start_noflag(ans_skb,
+					      NLBL_CIPSOV4_A_MLSLVLLST);
 		if (nla_a == NULL) {
 			ret_val = -ENOMEM;
 			goto list_failure_lock;
@@ -529,7 +520,8 @@
 			    CIPSO_V4_INV_LVL)
 				continue;
 
-			nla_b = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVL);
+			nla_b = nla_nest_start_noflag(ans_skb,
+						      NLBL_CIPSOV4_A_MLSLVL);
 			if (nla_b == NULL) {
 				ret_val = -ENOMEM;
 				goto list_retry;
@@ -548,7 +540,8 @@
 		}
 		nla_nest_end(ans_skb, nla_a);
 
-		nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSCATLST);
+		nla_a = nla_nest_start_noflag(ans_skb,
+					      NLBL_CIPSOV4_A_MLSCATLST);
 		if (nla_a == NULL) {
 			ret_val = -ENOMEM;
 			goto list_retry;
@@ -560,7 +553,8 @@
 			    CIPSO_V4_INV_CAT)
 				continue;
 
-			nla_b = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSCAT);
+			nla_b = nla_nest_start_noflag(ans_skb,
+						      NLBL_CIPSOV4_A_MLSCAT);
 			if (nla_b == NULL) {
 				ret_val = -ENOMEM;
 				goto list_retry;
@@ -733,29 +727,29 @@
 static const struct genl_ops netlbl_cipsov4_ops[] = {
 	{
 	.cmd = NLBL_CIPSOV4_C_ADD,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = GENL_ADMIN_PERM,
-	.policy = netlbl_cipsov4_genl_policy,
 	.doit = netlbl_cipsov4_add,
 	.dumpit = NULL,
 	},
 	{
 	.cmd = NLBL_CIPSOV4_C_REMOVE,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = GENL_ADMIN_PERM,
-	.policy = netlbl_cipsov4_genl_policy,
 	.doit = netlbl_cipsov4_remove,
 	.dumpit = NULL,
 	},
 	{
 	.cmd = NLBL_CIPSOV4_C_LIST,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = 0,
-	.policy = netlbl_cipsov4_genl_policy,
 	.doit = netlbl_cipsov4_list,
 	.dumpit = NULL,
 	},
 	{
 	.cmd = NLBL_CIPSOV4_C_LISTALL,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = 0,
-	.policy = netlbl_cipsov4_genl_policy,
 	.doit = NULL,
 	.dumpit = netlbl_cipsov4_listall,
 	},
@@ -766,6 +760,7 @@
 	.name = NETLBL_NLTYPE_CIPSOV4_NAME,
 	.version = NETLBL_PROTO_VERSION,
 	.maxattr = NLBL_CIPSOV4_A_MAX,
+	.policy = netlbl_cipsov4_genl_policy,
 	.module = THIS_MODULE,
 	.ops = netlbl_cipsov4_ops,
 	.n_ops = ARRAY_SIZE(netlbl_cipsov4_ops),

diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h
index 8758268..85d7ecb 100644
--- a/net/netlabel/netlabel_cipso_v4.h
+++ b/net/netlabel/netlabel_cipso_v4.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * NetLabel CIPSO/IPv4 Support
  *
@@ -6,25 +7,10 @@
  * protocols such as CIPSO and RIPSO.
  *
  * Author: Paul Moore <paul@paul-moore.com>
- *
  */
 
 /*
  * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
- *
- * This program is free software;  you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY;  without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- * the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program;  if not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #ifndef _NETLABEL_CIPSO_V4

diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 41d0e95..f5d34da 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * NetLabel Domain Hash Table
  *
@@ -7,25 +8,10 @@
  * as CIPSO and RIPSO.
  *
  * Author: Paul Moore <paul@paul-moore.com>
- *
  */
 
 /*
  * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
- *
- * This program is free software;  you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY;  without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- * the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program;  if not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #include <linux/types.h>

diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
index 1f92477..9f80972 100644
--- a/net/netlabel/netlabel_domainhash.h
+++ b/net/netlabel/netlabel_domainhash.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * NetLabel Domain Hash Table
  *
@@ -7,25 +8,10 @@
  * as CIPSO and RIPSO.
  *
  * Author: Paul Moore <paul@paul-moore.com>
- *
  */
 
 /*
  * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
- *
- * This program is free software;  you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY;  without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- * the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program;  if not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #ifndef _NETLABEL_DOMAINHASH_H

diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index ea7c670..409a3ae 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * NetLabel Kernel API
  *
@@ -6,25 +7,10 @@
  * as CIPSO and RIPSO.
  *
  * Author: Paul Moore <paul@paul-moore.com>
- *
  */
 
 /*
  * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
- *
- * This program is free software;  you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY;  without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- * the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program;  if not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #include <linux/init.h>
@@ -621,7 +607,7 @@
  */
 int netlbl_catmap_walk(struct netlbl_lsm_catmap *catmap, u32 offset)
 {
-	struct netlbl_lsm_catmap *iter = catmap;
+	struct netlbl_lsm_catmap *iter;
 	u32 idx;
 	u32 bit;
 	NETLBL_CATMAP_MAPTYPE bitmap;
@@ -903,7 +889,8 @@
 		    (state == 0 && (byte & bitmask) == 0))
 			return bit_spot;
 
-		bit_spot++;
+		if (++bit_spot >= bitmap_len)
+			return -1;
 		bitmask >>= 1;
 		if (bitmask == 0) {
 			byte = bitmap[++byte_offset];

diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 21e0095..e7a25fb 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * NetLabel Management Support
  *
@@ -6,25 +7,10 @@
  * protocols such as CIPSO and RIPSO.
  *
  * Author: Paul Moore <paul@paul-moore.com>
- *
  */
 
 /*
  * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
- *
- * This program is free software;  you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY;  without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- * the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program;  if not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #include <linux/types.h>
@@ -315,7 +301,7 @@
 
 	switch (entry->def.type) {
 	case NETLBL_NLTYPE_ADDRSELECT:
-		nla_a = nla_nest_start(skb, NLBL_MGMT_A_SELECTORLIST);
+		nla_a = nla_nest_start_noflag(skb, NLBL_MGMT_A_SELECTORLIST);
 		if (nla_a == NULL)
 			return -ENOMEM;
 
@@ -323,7 +309,8 @@
 			struct netlbl_domaddr4_map *map4;
 			struct in_addr addr_struct;
 
-			nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR);
+			nla_b = nla_nest_start_noflag(skb,
+						      NLBL_MGMT_A_ADDRSELECTOR);
 			if (nla_b == NULL)
 				return -ENOMEM;
 
@@ -357,7 +344,8 @@
 		netlbl_af6list_foreach_rcu(iter6, &entry->def.addrsel->list6) {
 			struct netlbl_domaddr6_map *map6;
 
-			nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR);
+			nla_b = nla_nest_start_noflag(skb,
+						      NLBL_MGMT_A_ADDRSELECTOR);
 			if (nla_b == NULL)
 				return -ENOMEM;
 
@@ -772,57 +760,57 @@
 static const struct genl_ops netlbl_mgmt_genl_ops[] = {
 	{
 	.cmd = NLBL_MGMT_C_ADD,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = GENL_ADMIN_PERM,
-	.policy = netlbl_mgmt_genl_policy,
 	.doit = netlbl_mgmt_add,
 	.dumpit = NULL,
 	},
 	{
 	.cmd = NLBL_MGMT_C_REMOVE,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = GENL_ADMIN_PERM,
-	.policy = netlbl_mgmt_genl_policy,
 	.doit = netlbl_mgmt_remove,
 	.dumpit = NULL,
 	},
 	{
 	.cmd = NLBL_MGMT_C_LISTALL,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = 0,
-	.policy = netlbl_mgmt_genl_policy,
 	.doit = NULL,
 	.dumpit = netlbl_mgmt_listall,
 	},
 	{
 	.cmd = NLBL_MGMT_C_ADDDEF,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = GENL_ADMIN_PERM,
-	.policy = netlbl_mgmt_genl_policy,
 	.doit = netlbl_mgmt_adddef,
 	.dumpit = NULL,
 	},
 	{
 	.cmd = NLBL_MGMT_C_REMOVEDEF,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = GENL_ADMIN_PERM,
-	.policy = netlbl_mgmt_genl_policy,
 	.doit = netlbl_mgmt_removedef,
 	.dumpit = NULL,
 	},
 	{
 	.cmd = NLBL_MGMT_C_LISTDEF,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = 0,
-	.policy = netlbl_mgmt_genl_policy,
 	.doit = netlbl_mgmt_listdef,
 	.dumpit = NULL,
 	},
 	{
 	.cmd = NLBL_MGMT_C_PROTOCOLS,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = 0,
-	.policy = netlbl_mgmt_genl_policy,
 	.doit = NULL,
 	.dumpit = netlbl_mgmt_protocols,
 	},
 	{
 	.cmd = NLBL_MGMT_C_VERSION,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = 0,
-	.policy = netlbl_mgmt_genl_policy,
 	.doit = netlbl_mgmt_version,
 	.dumpit = NULL,
 	},
@@ -833,6 +821,7 @@
 	.name = NETLBL_NLTYPE_MGMT_NAME,
 	.version = NETLBL_PROTO_VERSION,
 	.maxattr = NLBL_MGMT_A_MAX,
+	.policy = netlbl_mgmt_genl_policy,
 	.module = THIS_MODULE,
 	.ops = netlbl_mgmt_genl_ops,
 	.n_ops = ARRAY_SIZE(netlbl_mgmt_genl_ops),

diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h
index ea01e42..db20dfb 100644
--- a/net/netlabel/netlabel_mgmt.h
+++ b/net/netlabel/netlabel_mgmt.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * NetLabel Management Support
  *
@@ -6,25 +7,10 @@
  * protocols such as CIPSO and RIPSO.
  *
  * Author: Paul Moore <paul@paul-moore.com>
- *
  */
 
 /*
  * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
- *
- * This program is free software;  you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY;  without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- * the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program;  if not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #ifndef _NETLABEL_MGMT_H

diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index c92894c..d2e4ab8 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * NetLabel Unlabeled Support
  *
@@ -6,25 +7,10 @@
  * mappings for network protocols such as CIPSO and RIPSO.
  *
  * Author: Paul Moore <paul@paul-moore.com>
- *
  */
 
 /*
  * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - 2008
- *
- * This program is free software;  you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY;  without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- * the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program;  if not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #include <linux/types.h>
@@ -1317,57 +1303,57 @@
 static const struct genl_ops netlbl_unlabel_genl_ops[] = {
 	{
 	.cmd = NLBL_UNLABEL_C_STATICADD,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = GENL_ADMIN_PERM,
-	.policy = netlbl_unlabel_genl_policy,
 	.doit = netlbl_unlabel_staticadd,
 	.dumpit = NULL,
 	},
 	{
 	.cmd = NLBL_UNLABEL_C_STATICREMOVE,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = GENL_ADMIN_PERM,
-	.policy = netlbl_unlabel_genl_policy,
 	.doit = netlbl_unlabel_staticremove,
 	.dumpit = NULL,
 	},
 	{
 	.cmd = NLBL_UNLABEL_C_STATICLIST,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = 0,
-	.policy = netlbl_unlabel_genl_policy,
 	.doit = NULL,
 	.dumpit = netlbl_unlabel_staticlist,
 	},
 	{
 	.cmd = NLBL_UNLABEL_C_STATICADDDEF,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = GENL_ADMIN_PERM,
-	.policy = netlbl_unlabel_genl_policy,
 	.doit = netlbl_unlabel_staticadddef,
 	.dumpit = NULL,
 	},
 	{
 	.cmd = NLBL_UNLABEL_C_STATICREMOVEDEF,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = GENL_ADMIN_PERM,
-	.policy = netlbl_unlabel_genl_policy,
 	.doit = netlbl_unlabel_staticremovedef,
 	.dumpit = NULL,
 	},
 	{
 	.cmd = NLBL_UNLABEL_C_STATICLISTDEF,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = 0,
-	.policy = netlbl_unlabel_genl_policy,
 	.doit = NULL,
 	.dumpit = netlbl_unlabel_staticlistdef,
 	},
 	{
 	.cmd = NLBL_UNLABEL_C_ACCEPT,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = GENL_ADMIN_PERM,
-	.policy = netlbl_unlabel_genl_policy,
 	.doit = netlbl_unlabel_accept,
 	.dumpit = NULL,
 	},
 	{
 	.cmd = NLBL_UNLABEL_C_LIST,
+	.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	.flags = 0,
-	.policy = netlbl_unlabel_genl_policy,
 	.doit = netlbl_unlabel_list,
 	.dumpit = NULL,
 	},
@@ -1378,6 +1364,7 @@
 	.name = NETLBL_NLTYPE_UNLABELED_NAME,
 	.version = NETLBL_PROTO_VERSION,
 	.maxattr = NLBL_UNLABEL_A_MAX,
+	.policy = netlbl_unlabel_genl_policy,
 	.module = THIS_MODULE,
 	.ops = netlbl_unlabel_genl_ops,
 	.n_ops = ARRAY_SIZE(netlbl_unlabel_genl_ops),

diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h
index 3a9e5dc..058e3a2 100644
--- a/net/netlabel/netlabel_unlabeled.h
+++ b/net/netlabel/netlabel_unlabeled.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * NetLabel Unlabeled Support
  *
@@ -6,25 +7,10 @@
  * mappings for network protocols such as CIPSO and RIPSO.
  *
  * Author: Paul Moore <paul@paul-moore.com>
- *
  */
 
 /*
  * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
- *
- * This program is free software;  you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY;  without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- * the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program;  if not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #ifndef _NETLABEL_UNLABELED_H

diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
index 4676f5b..3ed4fea 100644
--- a/net/netlabel/netlabel_user.c
+++ b/net/netlabel/netlabel_user.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * NetLabel NETLINK Interface
  *
@@ -6,25 +7,10 @@
  * protocols such as CIPSO and RIPSO.
  *
  * Author: Paul Moore <paul@paul-moore.com>
- *
  */
 
 /*
  * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
- *
- * This program is free software;  you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY;  without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- * the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program;  if not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #include <linux/init.h>

diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
index 4a397cd..3c67afc 100644
--- a/net/netlabel/netlabel_user.h
+++ b/net/netlabel/netlabel_user.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * NetLabel NETLINK Interface
  *
@@ -6,25 +7,10 @@
  * protocols such as CIPSO and RIPSO.
  *
  * Author: Paul Moore <paul@paul-moore.com>
- *
  */
 
 /*
  * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
- *
- * This program is free software;  you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY;  without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- * the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program;  if not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #ifndef _NETLABEL_USER_H

diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig
index 5d6e8c0..20f9679 100644
--- a/net/netlink/Kconfig
+++ b/net/netlink/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Netlink Sockets
 #

diff --git a/net/netlink/Makefile b/net/netlink/Makefile
index e837917..de42df7 100644
--- a/net/netlink/Makefile
+++ b/net/netlink/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the netlink driver.
 #

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 930d17f..90b2ab9 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * NETLINK      Kernel-user communication protocol.
  *
@@ -5,11 +6,6 @@
  * 				Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  * 				Patrick McHardy <kaber@trash.net>
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
  *                               added netlink_proto_exit
  * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br>
@@ -245,13 +241,8 @@
 	return 0;
 }
 
-static void __net_exit netlink_tap_exit_net(struct net *net)
-{
-}
-
 static struct pernet_operations netlink_tap_net_ops = {
 	.init = netlink_tap_init_net,
-	.exit = netlink_tap_exit_net,
 	.id   = &netlink_tap_net_id,
 	.size = sizeof(struct netlink_tap_net),
 };
@@ -574,11 +565,6 @@
 	if (nlk_sk(sk)->bound)
 		goto err;
 
-	err = -ENOMEM;
-	if (BITS_PER_LONG > 32 &&
-	    unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX))
-		goto err;
-
 	nlk_sk(sk)->portid = portid;
 	sock_hold(sk);
 
@@ -993,7 +979,7 @@
 	struct netlink_sock *nlk = nlk_sk(sk);
 	struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
 	int err = 0;
-	long unsigned int groups = nladdr->nl_groups;
+	unsigned long groups;
 	bool bound;
 
 	if (addr_len < sizeof(struct sockaddr_nl))
@@ -1001,6 +987,7 @@
 
 	if (nladdr->nl_family != AF_NETLINK)
 		return -EINVAL;
+	groups = nladdr->nl_groups;
 
 	/* Only superuser is allowed to listen multicasts */
 	if (groups) {
@@ -1011,9 +998,7 @@
 			return err;
 	}
 
-	if (nlk->ngroups == 0)
-		groups = 0;
-	else if (nlk->ngroups < 8*sizeof(groups))
+	if (nlk->ngroups < BITS_PER_LONG)
 		groups &= (1UL << nlk->ngroups) - 1;
 
 	bound = nlk->bound;
@@ -1378,6 +1363,14 @@
 }
 EXPORT_SYMBOL_GPL(netlink_has_listeners);
 
+bool netlink_strict_get_check(struct sk_buff *skb)
+{
+	const struct netlink_sock *nlk = nlk_sk(NETLINK_CB(skb).sk);
+
+	return nlk->flags & NETLINK_F_STRICT_CHK;
+}
+EXPORT_SYMBOL_GPL(netlink_strict_get_check);
+
 static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
@@ -1713,6 +1706,13 @@
 			nlk->flags &= ~NETLINK_F_EXT_ACK;
 		err = 0;
 		break;
+	case NETLINK_GET_STRICT_CHK:
+		if (val)
+			nlk->flags |= NETLINK_F_STRICT_CHK;
+		else
+			nlk->flags &= ~NETLINK_F_STRICT_CHK;
+		err = 0;
+		break;
 	default:
 		err = -ENOPROTOOPT;
 	}
@@ -1806,6 +1806,15 @@
 			return -EFAULT;
 		err = 0;
 		break;
+	case NETLINK_GET_STRICT_CHK:
+		if (len < sizeof(int))
+			return -EINVAL;
+		len = sizeof(int);
+		val = nlk->flags & NETLINK_F_STRICT_CHK ? 1 : 0;
+		if (put_user(len, optlen) || put_user(val, optval))
+			return -EFAULT;
+		err = 0;
+		break;
 	default:
 		err = -ENOPROTOOPT;
 	}
@@ -2178,6 +2187,7 @@
 static int netlink_dump(struct sock *sk)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
+	struct netlink_ext_ack extack = {};
 	struct netlink_callback *cb;
 	struct sk_buff *skb = NULL;
 	struct nlmsghdr *nlh;
@@ -2229,8 +2239,11 @@
 	skb_reserve(skb, skb_tailroom(skb) - alloc_size);
 	netlink_skb_set_owner_r(skb, sk);
 
-	if (nlk->dump_done_errno > 0)
+	if (nlk->dump_done_errno > 0) {
+		cb->extack = &extack;
 		nlk->dump_done_errno = cb->dump(skb, cb);
+		cb->extack = NULL;
+	}
 
 	if (nlk->dump_done_errno > 0 ||
 	    skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) {
@@ -2244,7 +2257,8 @@
 	}
 
 	nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE,
-			       sizeof(nlk->dump_done_errno), NLM_F_MULTI);
+			       sizeof(nlk->dump_done_errno),
+			       NLM_F_MULTI | cb->answer_flags);
 	if (WARN_ON(!nlh))
 		goto errout_skb;
 
@@ -2253,6 +2267,12 @@
 	memcpy(nlmsg_data(nlh), &nlk->dump_done_errno,
 	       sizeof(nlk->dump_done_errno));
 
+	if (extack._msg && nlk->flags & NETLINK_F_EXT_ACK) {
+		nlh->nlmsg_flags |= NLM_F_ACK_TLVS;
+		if (!nla_put_string(skb, NLMSGERR_ATTR_MSG, extack._msg))
+			nlmsg_end(skb, nlh);
+	}
+
 	if (sk_filter(sk, skb))
 		kfree_skb(skb);
 	else
@@ -2279,9 +2299,9 @@
 			 const struct nlmsghdr *nlh,
 			 struct netlink_dump_control *control)
 {
+	struct netlink_sock *nlk, *nlk2;
 	struct netlink_callback *cb;
 	struct sock *sk;
-	struct netlink_sock *nlk;
 	int ret;
 
 	refcount_inc(&skb->users);
@@ -2315,6 +2335,9 @@
 	cb->min_dump_alloc = control->min_dump_alloc;
 	cb->skb = skb;
 
+	nlk2 = nlk_sk(NETLINK_CB(skb).sk);
+	cb->strict_check = !!(nlk2->flags & NETLINK_F_STRICT_CHK);
+
 	if (control->start) {
 		ret = control->start(cb);
 		if (ret)
@@ -2516,20 +2539,10 @@
 	int link;
 };
 
-static int netlink_walk_start(struct nl_seq_iter *iter)
+static void netlink_walk_start(struct nl_seq_iter *iter)
 {
-	int err;
-
-	err = rhashtable_walk_init(&nl_table[iter->link].hash, &iter->hti,
-				   GFP_KERNEL);
-	if (err) {
-		iter->link = MAX_LINKS;
-		return err;
-	}
-
+	rhashtable_walk_enter(&nl_table[iter->link].hash, &iter->hti);
 	rhashtable_walk_start(&iter->hti);
-
-	return 0;
 }
 
 static void netlink_walk_stop(struct nl_seq_iter *iter)
@@ -2545,8 +2558,6 @@
 
 	do {
 		for (;;) {
-			int err;
-
 			nlk = rhashtable_walk_next(&iter->hti);
 
 			if (IS_ERR(nlk)) {
@@ -2563,9 +2574,7 @@
 			if (++iter->link >= MAX_LINKS)
 				return NULL;
 
-			err = netlink_walk_start(iter);
-			if (err)
-				return ERR_PTR(err);
+			netlink_walk_start(iter);
 		}
 	} while (sock_net(&nlk->sk) != seq_file_net(seq));
 
@@ -2577,13 +2586,10 @@
 	struct nl_seq_iter *iter = seq->private;
 	void *obj = SEQ_START_TOKEN;
 	loff_t pos;
-	int err;
 
 	iter->link = 0;
 
-	err = netlink_walk_start(iter);
-	if (err)
-		return ERR_PTR(err);
+	netlink_walk_start(iter);
 
 	for (pos = *posp; pos && obj && !IS_ERR(obj); pos--)
 		obj = __netlink_seq_next(seq);
@@ -2618,7 +2624,7 @@
 		struct sock *s = v;
 		struct netlink_sock *nlk = nlk_sk(s);
 
-		seq_printf(seq, "%pK %-3d %-10u %08x %-8d %-8d %-5d %-8d %-8d %-8lu\n",
+		seq_printf(seq, "%pK %-3d %-10u %08x %-8d %-8d %-5d %-8d %-8u %-8lu\n",
 			   s,
 			   s->sk_protocol,
 			   nlk->portid,

diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 962de7b..5f454c8 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h

@@ -15,6 +15,7 @@
 #define NETLINK_F_LISTEN_ALL_NSID	0x10
 #define NETLINK_F_CAP_ACK		0x20
 #define NETLINK_F_EXT_ACK		0x40
+#define NETLINK_F_STRICT_CHK		0x80
 
 #define NLGRPSZ(x)	(ALIGN(x, sizeof(unsigned long) * 8) / 8)
 #define NLGRPLONGS(x)	(NLGRPSZ(x)/sizeof(unsigned long))

diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index 7dda33b..c6255ea 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/module.h>
 
 #include <net/sock.h>

diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 25eeb6d..efccd1a 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c

@@ -362,11 +362,11 @@
 	} else
 		family->attrbuf = NULL;
 
-	family->id = idr_alloc(&genl_fam_idr, family,
-			       start, end + 1, GFP_KERNEL);
+	family->id = idr_alloc_cyclic(&genl_fam_idr, family,
+				      start, end + 1, GFP_KERNEL);
 	if (family->id < 0) {
 		err = family->id;
-		goto errout_locked;
+		goto errout_free;
 	}
 
 	err = genl_validate_assign_mc_groups(family);
@@ -385,6 +385,7 @@
 
 errout_remove:
 	idr_remove(&genl_fam_idr, family->id);
+errout_free:
 	kfree(family->attrbuf);
 errout_locked:
 	genl_unlock_all();
@@ -535,6 +536,28 @@
 		if (ops->dumpit == NULL)
 			return -EOPNOTSUPP;
 
+		if (!(ops->validate & GENL_DONT_VALIDATE_DUMP)) {
+			int hdrlen = GENL_HDRLEN + family->hdrsize;
+
+			if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
+				return -EINVAL;
+
+			if (family->maxattr) {
+				unsigned int validate = NL_VALIDATE_STRICT;
+
+				if (ops->validate &
+				    GENL_DONT_VALIDATE_DUMP_STRICT)
+					validate = NL_VALIDATE_LIBERAL;
+				rc = __nla_validate(nlmsg_attrdata(nlh, hdrlen),
+						    nlmsg_attrlen(nlh, hdrlen),
+						    family->maxattr,
+						    family->policy,
+						    validate, extack);
+				if (rc)
+					return rc;
+			}
+		}
+
 		if (!family->parallel_ops) {
 			struct netlink_dump_control c = {
 				.module = family->module,
@@ -576,8 +599,13 @@
 		attrbuf = family->attrbuf;
 
 	if (attrbuf) {
-		err = nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr,
-				  ops->policy, extack);
+		enum netlink_validation validate = NL_VALIDATE_STRICT;
+
+		if (ops->validate & GENL_DONT_VALIDATE_STRICT)
+			validate = NL_VALIDATE_LIBERAL;
+
+		err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr,
+				    family->policy, validate, extack);
 		if (err < 0)
 			goto out;
 	}
@@ -664,7 +692,7 @@
 		struct nlattr *nla_ops;
 		int i;
 
-		nla_ops = nla_nest_start(skb, CTRL_ATTR_OPS);
+		nla_ops = nla_nest_start_noflag(skb, CTRL_ATTR_OPS);
 		if (nla_ops == NULL)
 			goto nla_put_failure;
 
@@ -677,10 +705,10 @@
 				op_flags |= GENL_CMD_CAP_DUMP;
 			if (ops->doit)
 				op_flags |= GENL_CMD_CAP_DO;
-			if (ops->policy)
+			if (family->policy)
 				op_flags |= GENL_CMD_CAP_HASPOL;
 
-			nest = nla_nest_start(skb, i + 1);
+			nest = nla_nest_start_noflag(skb, i + 1);
 			if (nest == NULL)
 				goto nla_put_failure;
 
@@ -698,7 +726,7 @@
 		struct nlattr *nla_grps;
 		int i;
 
-		nla_grps = nla_nest_start(skb, CTRL_ATTR_MCAST_GROUPS);
+		nla_grps = nla_nest_start_noflag(skb, CTRL_ATTR_MCAST_GROUPS);
 		if (nla_grps == NULL)
 			goto nla_put_failure;
 
@@ -708,7 +736,7 @@
 
 			grp = &family->mcgrps[i];
 
-			nest = nla_nest_start(skb, i + 1);
+			nest = nla_nest_start_noflag(skb, i + 1);
 			if (nest == NULL)
 				goto nla_put_failure;
 
@@ -748,11 +776,11 @@
 	    nla_put_u16(skb, CTRL_ATTR_FAMILY_ID, family->id))
 		goto nla_put_failure;
 
-	nla_grps = nla_nest_start(skb, CTRL_ATTR_MCAST_GROUPS);
+	nla_grps = nla_nest_start_noflag(skb, CTRL_ATTR_MCAST_GROUPS);
 	if (nla_grps == NULL)
 		goto nla_put_failure;
 
-	nest = nla_nest_start(skb, 1);
+	nest = nla_nest_start_noflag(skb, 1);
 	if (nest == NULL)
 		goto nla_put_failure;
 
@@ -937,9 +965,9 @@
 static const struct genl_ops genl_ctrl_ops[] = {
 	{
 		.cmd		= CTRL_CMD_GETFAMILY,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit		= ctrl_getfamily,
 		.dumpit		= ctrl_dumpfamily,
-		.policy		= ctrl_policy,
 	},
 };
 
@@ -957,6 +985,7 @@
 	.name = "nlctrl",
 	.version = 0x2,
 	.maxattr = CTRL_ATTR_MAX,
+	.policy = ctrl_policy,
 	.netnsok = true,
 };
 

diff --git a/net/netrom/Makefile b/net/netrom/Makefile
index 2660f5a..603e36c 100644
--- a/net/netrom/Makefile
+++ b/net/netrom/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Linux NET/ROM layer.
 #

diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 03f37c4..58d5373 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  * Copyright Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
@@ -67,28 +64,6 @@
 static const struct proto_ops nr_proto_ops;
 
 /*
- * NETROM network devices are virtual network devices encapsulating NETROM
- * frames into AX.25 which will be sent through an AX.25 device, so form a
- * special "super class" of normal net devices; split their locks off into a
- * separate class since they always nest.
- */
-static struct lock_class_key nr_netdev_xmit_lock_key;
-static struct lock_class_key nr_netdev_addr_lock_key;
-
-static void nr_set_lockdep_one(struct net_device *dev,
-			       struct netdev_queue *txq,
-			       void *_unused)
-{
-	lockdep_set_class(&txq->_xmit_lock, &nr_netdev_xmit_lock_key);
-}
-
-static void nr_set_lockdep_key(struct net_device *dev)
-{
-	lockdep_set_class(&dev->addr_list_lock, &nr_netdev_addr_lock_key);
-	netdev_for_each_tx_queue(dev, nr_set_lockdep_one, NULL);
-}
-
-/*
  *	Socket removal during an interrupt is now safe.
  */
 static void nr_remove_socket(struct sock *sk)
@@ -153,7 +128,7 @@
 	sk_for_each(s, &nr_list)
 		if (!ax25cmp(&nr_sk(s)->source_addr, addr) &&
 		    s->sk_state == TCP_LISTEN) {
-			bh_lock_sock(s);
+			sock_hold(s);
 			goto found;
 		}
 	s = NULL;
@@ -174,7 +149,7 @@
 		struct nr_sock *nr = nr_sk(s);
 
 		if (nr->my_index == index && nr->my_id == id) {
-			bh_lock_sock(s);
+			sock_hold(s);
 			goto found;
 		}
 	}
@@ -198,7 +173,7 @@
 
 		if (nr->your_index == index && nr->your_id == id &&
 		    !ax25cmp(&nr->dest_addr, dest)) {
-			bh_lock_sock(s);
+			sock_hold(s);
 			goto found;
 		}
 	}
@@ -224,7 +199,7 @@
 		if (i != 0 && j != 0) {
 			if ((sk=nr_find_socket(i, j)) == NULL)
 				break;
-			bh_unlock_sock(sk);
+			sock_put(sk);
 		}
 
 		id++;
@@ -872,7 +847,7 @@
 	unsigned short frametype, flags, window, timeout;
 	int ret;
 
-	skb->sk = NULL;		/* Initially we don't know who it's for */
+	skb_orphan(skb);
 
 	/*
 	 *	skb->data points to the netrom frame start
@@ -920,6 +895,7 @@
 	}
 
 	if (sk != NULL) {
+		bh_lock_sock(sk);
 		skb_reset_transport_header(skb);
 
 		if (frametype == NR_CONNACK && skb->len == 22)
@@ -929,6 +905,7 @@
 
 		ret = nr_process_rx_frame(sk, skb);
 		bh_unlock_sock(sk);
+		sock_put(sk);
 		return ret;
 	}
 
@@ -960,13 +937,17 @@
 	    (make = nr_make_new(sk)) == NULL) {
 		nr_transmit_refusal(skb, 0);
 		if (sk)
-			bh_unlock_sock(sk);
+			sock_put(sk);
 		return 0;
 	}
 
+	bh_lock_sock(sk);
+
 	window = skb->data[20];
 
+	sock_hold(make);
 	skb->sk             = make;
+	skb->destructor     = sock_efree;
 	make->sk_state	    = TCP_ESTABLISHED;
 
 	/* Fill in his circuit details */
@@ -1016,6 +997,7 @@
 		sk->sk_data_ready(sk);
 
 	bh_unlock_sock(sk);
+	sock_put(sk);
 
 	nr_insert_socket(make);
 
@@ -1194,7 +1176,6 @@
 {
 	struct sock *sk = sock->sk;
 	void __user *argp = (void __user *)arg;
-	int ret;
 
 	switch (cmd) {
 	case TIOCOUTQ: {
@@ -1220,18 +1201,6 @@
 		return put_user(amount, (int __user *)argp);
 	}
 
-	case SIOCGSTAMP:
-		lock_sock(sk);
-		ret = sock_get_timestamp(sk, argp);
-		release_sock(sk);
-		return ret;
-
-	case SIOCGSTAMPNS:
-		lock_sock(sk);
-		ret = sock_get_timestampns(sk, argp);
-		release_sock(sk);
-		return ret;
-
 	case SIOCGIFADDR:
 	case SIOCSIFADDR:
 	case SIOCGIFDSTADDR:
@@ -1357,6 +1326,7 @@
 	.getname	=	nr_getname,
 	.poll		=	datagram_poll,
 	.ioctl		=	nr_ioctl,
+	.gettstamp	=	sock_gettstamp,
 	.listen		=	nr_listen,
 	.shutdown	=	sock_no_shutdown,
 	.setsockopt	=	nr_setsockopt,
@@ -1387,18 +1357,22 @@
 	int i;
 	int rc = proto_register(&nr_proto, 0);
 
-	if (rc != 0)
-		goto out;
+	if (rc)
+		return rc;
 
 	if (nr_ndevs > 0x7fffffff/sizeof(struct net_device *)) {
-		printk(KERN_ERR "NET/ROM: nr_proto_init - nr_ndevs parameter to large\n");
-		return -1;
+		pr_err("NET/ROM: %s - nr_ndevs parameter too large\n",
+		       __func__);
+		rc = -EINVAL;
+		goto unregister_proto;
 	}
 
 	dev_nr = kcalloc(nr_ndevs, sizeof(struct net_device *), GFP_KERNEL);
-	if (dev_nr == NULL) {
-		printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device array\n");
-		return -1;
+	if (!dev_nr) {
+		pr_err("NET/ROM: %s - unable to allocate device array\n",
+		       __func__);
+		rc = -ENOMEM;
+		goto unregister_proto;
 	}
 
 	for (i = 0; i < nr_ndevs; i++) {
@@ -1408,50 +1382,77 @@
 		sprintf(name, "nr%d", i);
 		dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, nr_setup);
 		if (!dev) {
-			printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device structure\n");
+			rc = -ENOMEM;
 			goto fail;
 		}
 
 		dev->base_addr = i;
-		if (register_netdev(dev)) {
-			printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register network device\n");
+		rc = register_netdev(dev);
+		if (rc) {
 			free_netdev(dev);
 			goto fail;
 		}
-		nr_set_lockdep_key(dev);
 		dev_nr[i] = dev;
 	}
 
-	if (sock_register(&nr_family_ops)) {
-		printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register socket family\n");
+	rc = sock_register(&nr_family_ops);
+	if (rc)
 		goto fail;
-	}
 
-	register_netdevice_notifier(&nr_dev_notifier);
+	rc = register_netdevice_notifier(&nr_dev_notifier);
+	if (rc)
+		goto out_sock;
 
 	ax25_register_pid(&nr_pid);
 	ax25_linkfail_register(&nr_linkfail_notifier);
 
 #ifdef CONFIG_SYSCTL
-	nr_register_sysctl();
+	rc = nr_register_sysctl();
+	if (rc)
+		goto out_sysctl;
 #endif
 
 	nr_loopback_init();
 
-	proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops);
-	proc_create_seq("nr_neigh", 0444, init_net.proc_net, &nr_neigh_seqops);
-	proc_create_seq("nr_nodes", 0444, init_net.proc_net, &nr_node_seqops);
-out:
-	return rc;
+	rc = -ENOMEM;
+	if (!proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops))
+		goto proc_remove1;
+	if (!proc_create_seq("nr_neigh", 0444, init_net.proc_net,
+			     &nr_neigh_seqops))
+		goto proc_remove2;
+	if (!proc_create_seq("nr_nodes", 0444, init_net.proc_net,
+			     &nr_node_seqops))
+		goto proc_remove3;
+
+	return 0;
+
+proc_remove3:
+	remove_proc_entry("nr_neigh", init_net.proc_net);
+proc_remove2:
+	remove_proc_entry("nr", init_net.proc_net);
+proc_remove1:
+
+	nr_loopback_clear();
+	nr_rt_free();
+
+#ifdef CONFIG_SYSCTL
+	nr_unregister_sysctl();
+out_sysctl:
+#endif
+	ax25_linkfail_release(&nr_linkfail_notifier);
+	ax25_protocol_release(AX25_P_NETROM);
+	unregister_netdevice_notifier(&nr_dev_notifier);
+out_sock:
+	sock_unregister(PF_NETROM);
 fail:
 	while (--i >= 0) {
 		unregister_netdev(dev_nr[i]);
 		free_netdev(dev_nr[i]);
 	}
 	kfree(dev_nr);
+unregister_proto:
 	proto_unregister(&nr_proto);
-	rc = -1;
-	goto out;
+	return rc;
 }
 
 module_init(nr_proto_init);

diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
index 988f542..29e418c 100644
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  */

diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index fbfdae4..2bef377 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  * Copyright Darryl Miles G7LED (dlm@g7led.demon.co.uk)

diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c
index 215ad22..a880dd3 100644
--- a/net/netrom/nr_loopback.c
+++ b/net/netrom/nr_loopback.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright Tomi Manninen OH2BNS (oh2bns@sral.fi)
  */
@@ -70,7 +67,7 @@
 	}
 }
 
-void __exit nr_loopback_clear(void)
+void nr_loopback_clear(void)
 {
 	del_timer_sync(&loopback_timer);
 	skb_queue_purge(&loopback_queue);

diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c
index 00fbf14..4492965 100644
--- a/net/netrom/nr_out.c
+++ b/net/netrom/nr_out.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  * Copyright Darryl Miles G7LED (dlm@g7led.demon.co.uk)

diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 6485f59..d41335b 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  * Copyright Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
@@ -953,7 +950,7 @@
 /*
  *	Free all memory associated with the nodes and routes lists.
  */
-void __exit nr_rt_free(void)
+void nr_rt_free(void)
 {
 	struct nr_neigh *s = NULL;
 	struct nr_node  *t = NULL;

diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c
index 029c8bb..3f99b43 100644
--- a/net/netrom/nr_subr.c
+++ b/net/netrom/nr_subr.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  */

diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index cbd51ed..9115f8a 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  * Copyright (C) 2002 Ralf Baechle DO1GRB (ralf@gnu.org)
@@ -52,21 +49,21 @@
 {
 	struct nr_sock *nr = nr_sk(sk);
 
-	mod_timer(&nr->t1timer, jiffies + nr->t1);
+	sk_reset_timer(sk, &nr->t1timer, jiffies + nr->t1);
 }
 
 void nr_start_t2timer(struct sock *sk)
 {
 	struct nr_sock *nr = nr_sk(sk);
 
-	mod_timer(&nr->t2timer, jiffies + nr->t2);
+	sk_reset_timer(sk, &nr->t2timer, jiffies + nr->t2);
 }
 
 void nr_start_t4timer(struct sock *sk)
 {
 	struct nr_sock *nr = nr_sk(sk);
 
-	mod_timer(&nr->t4timer, jiffies + nr->t4);
+	sk_reset_timer(sk, &nr->t4timer, jiffies + nr->t4);
 }
 
 void nr_start_idletimer(struct sock *sk)
@@ -74,37 +71,37 @@
 	struct nr_sock *nr = nr_sk(sk);
 
 	if (nr->idle > 0)
-		mod_timer(&nr->idletimer, jiffies + nr->idle);
+		sk_reset_timer(sk, &nr->idletimer, jiffies + nr->idle);
 }
 
 void nr_start_heartbeat(struct sock *sk)
 {
-	mod_timer(&sk->sk_timer, jiffies + 5 * HZ);
+	sk_reset_timer(sk, &sk->sk_timer, jiffies + 5 * HZ);
 }
 
 void nr_stop_t1timer(struct sock *sk)
 {
-	del_timer(&nr_sk(sk)->t1timer);
+	sk_stop_timer(sk, &nr_sk(sk)->t1timer);
 }
 
 void nr_stop_t2timer(struct sock *sk)
 {
-	del_timer(&nr_sk(sk)->t2timer);
+	sk_stop_timer(sk, &nr_sk(sk)->t2timer);
 }
 
 void nr_stop_t4timer(struct sock *sk)
 {
-	del_timer(&nr_sk(sk)->t4timer);
+	sk_stop_timer(sk, &nr_sk(sk)->t4timer);
 }
 
 void nr_stop_idletimer(struct sock *sk)
 {
-	del_timer(&nr_sk(sk)->idletimer);
+	sk_stop_timer(sk, &nr_sk(sk)->idletimer);
 }
 
 void nr_stop_heartbeat(struct sock *sk)
 {
-	del_timer(&sk->sk_timer);
+	sk_stop_timer(sk, &sk->sk_timer);
 }
 
 int nr_t1timer_running(struct sock *sk)

diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c
index ba1c368..79fb2d3 100644
--- a/net/netrom/sysctl_net_netrom.c
+++ b/net/netrom/sysctl_net_netrom.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) 1996 Mike Shaver (shaver@zeroknowledge.com)
  */
@@ -146,9 +143,12 @@
 	{ }
 };
 
-void __init nr_register_sysctl(void)
+int __init nr_register_sysctl(void)
 {
 	nr_table_header = register_net_sysctl(&init_net, "net/netrom", nr_table);
+	if (!nr_table_header)
+		return -ENOMEM;
+	return 0;
 }
 
 void nr_unregister_sysctl(void)

diff --git a/net/nfc/Kconfig b/net/nfc/Kconfig
index 6e0fa0c..9b27599 100644
--- a/net/nfc/Kconfig
+++ b/net/nfc/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # NFC sybsystem configuration
 #

diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c
index d3e594e..4a9e720 100644
--- a/net/nfc/af_nfc.c
+++ b/net/nfc/af_nfc.c

@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2011 Instituto Nokia de Tecnologia
  *
  * Authors:
  *    Aloisio Almeida Jr <aloisio.almeida@openbossa.org>
  *    Lauro Ramos Venancio <lauro.venancio@openbossa.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/nfc.h>

diff --git a/net/nfc/core.c b/net/nfc/core.c
index 947a470..c5f9c3e 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c

@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2011 Instituto Nokia de Tecnologia
  *
  * Authors:
  *    Lauro Ramos Venancio <lauro.venancio@openbossa.org>
  *    Aloisio Almeida Jr <aloisio.almeida@openbossa.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__

diff --git a/net/nfc/digital.h b/net/nfc/digital.h
index 3c39c72..c33b2f7 100644
--- a/net/nfc/digital.h
+++ b/net/nfc/digital.h

@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * NFC Digital Protocol stack
  * Copyright (c) 2013, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
  */
 
 #ifndef __DIGITAL_H

diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index ec0a899..e3599ed 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c

@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * NFC Digital Protocol stack
  * Copyright (c) 2013, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
  */
 
 #define pr_fmt(fmt) "digital: %s: " fmt, __func__

diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c
index 4f9a973..65aaa9d 100644
--- a/net/nfc/digital_dep.c
+++ b/net/nfc/digital_dep.c

@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * NFC Digital Protocol stack
  * Copyright (c) 2013, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
  */
 
 #define pr_fmt(fmt) "digital: %s: " fmt, __func__

diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c
index 2021d1d..84d2345 100644
--- a/net/nfc/digital_technology.c
+++ b/net/nfc/digital_technology.c

@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * NFC Digital Protocol stack
  * Copyright (c) 2013, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
  */
 
 #define pr_fmt(fmt) "digital: %s: " fmt, __func__

diff --git a/net/nfc/hci/Kconfig b/net/nfc/hci/Kconfig
index fd67f51..97bd3a2 100644
--- a/net/nfc/hci/Kconfig
+++ b/net/nfc/hci/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config NFC_HCI
        depends on NFC
        tristate "NFC HCI implementation"

diff --git a/net/nfc/hci/Makefile b/net/nfc/hci/Makefile
index c5dbb68..5a0aaae 100644
--- a/net/nfc/hci/Makefile
+++ b/net/nfc/hci/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Linux NFC HCI layer.
 #

diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c
index 844673c..e02b9be 100644
--- a/net/nfc/hci/command.c
+++ b/net/nfc/hci/command.c

@@ -1,18 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2012  Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #define pr_fmt(fmt) "hci: %s: " fmt, __func__

diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 19cb2e4..6f1b096 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c

@@ -1,18 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2012  Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #define pr_fmt(fmt) "hci: %s: " fmt, __func__

diff --git a/net/nfc/hci/hci.h b/net/nfc/hci/hci.h
index ab4c8e8..a59c96f 100644
--- a/net/nfc/hci/hci.h
+++ b/net/nfc/hci/hci.h

@@ -1,18 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright (C) 2012  Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __LOCAL_HCI_H

diff --git a/net/nfc/hci/hcp.c b/net/nfc/hci/hcp.c
index 1fe725d..05c6098 100644
--- a/net/nfc/hci/hcp.c
+++ b/net/nfc/hci/hcp.c

@@ -1,18 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2012  Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #define pr_fmt(fmt) "hci: %s: " fmt, __func__

diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c
index 3d699cb..6ab40ea 100644
--- a/net/nfc/hci/llc.c
+++ b/net/nfc/hci/llc.c

@@ -1,19 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Link Layer Control manager
  *
  * Copyright (C) 2012  Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <net/nfc/llc.h>

diff --git a/net/nfc/hci/llc.h b/net/nfc/hci/llc.h
index 5dad4c5..823ddb6 100644
--- a/net/nfc/hci/llc.h
+++ b/net/nfc/hci/llc.h

@@ -1,19 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Link Layer Control manager
  *
  * Copyright (C) 2012  Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __LOCAL_LLC_H_

diff --git a/net/nfc/hci/llc_nop.c b/net/nfc/hci/llc_nop.c
index d0435d5..a42852f 100644
--- a/net/nfc/hci/llc_nop.c
+++ b/net/nfc/hci/llc_nop.c

@@ -1,19 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * nop (passthrough) Link Layer Control
  *
  * Copyright (C) 2012  Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/types.h>

diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c
index fe98893..0eb4ddc 100644
--- a/net/nfc/hci/llc_shdlc.c
+++ b/net/nfc/hci/llc_shdlc.c

@@ -1,19 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * shdlc Link Layer Control
  *
  * Copyright (C) 2012  Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #define pr_fmt(fmt) "shdlc: %s: " fmt, __func__

diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h
index 1f68724..97853c9 100644
--- a/net/nfc/llcp.h
+++ b/net/nfc/llcp.h

@@ -1,18 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright (C) 2011  Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 enum llcp_state {

diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index 6a196e4..475061c 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c

@@ -1,18 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2011  Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #define pr_fmt(fmt) "llcp: %s: " fmt, __func__
@@ -419,6 +407,10 @@
 						      sock->service_name,
 						      sock->service_name_len,
 						      &service_name_tlv_length);
+		if (!service_name_tlv) {
+			err = -ENOMEM;
+			goto error_tlv;
+		}
 		size += service_name_tlv_length;
 	}
 
@@ -429,9 +421,17 @@
 
 	miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0,
 				      &miux_tlv_length);
+	if (!miux_tlv) {
+		err = -ENOMEM;
+		goto error_tlv;
+	}
 	size += miux_tlv_length;
 
 	rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length);
+	if (!rw_tlv) {
+		err = -ENOMEM;
+		goto error_tlv;
+	}
 	size += rw_tlv_length;
 
 	pr_debug("SKB size %d SN length %zu\n", size, sock->service_name_len);
@@ -484,9 +484,17 @@
 
 	miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0,
 				      &miux_tlv_length);
+	if (!miux_tlv) {
+		err = -ENOMEM;
+		goto error_tlv;
+	}
 	size += miux_tlv_length;
 
 	rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length);
+	if (!rw_tlv) {
+		err = -ENOMEM;
+		goto error_tlv;
+	}
 	size += rw_tlv_length;
 
 	skb = llcp_allocate_pdu(sock, LLCP_PDU_CC, size);

diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index ef4026a..cc99751 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c

@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2011  Intel Corporation. All rights reserved.
  * Copyright (C) 2014 Marvell International Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #define pr_fmt(fmt) "llcp: %s: " fmt, __func__
@@ -532,10 +520,10 @@
 
 static int nfc_llcp_build_gb(struct nfc_llcp_local *local)
 {
-	u8 *gb_cur, *version_tlv, version, version_length;
-	u8 *lto_tlv, lto_length;
-	u8 *wks_tlv, wks_length;
-	u8 *miux_tlv, miux_length;
+	u8 *gb_cur, version, version_length;
+	u8 lto_length, wks_length, miux_length;
+	u8 *version_tlv = NULL, *lto_tlv = NULL,
+	   *wks_tlv = NULL, *miux_tlv = NULL;
 	__be16 wks = cpu_to_be16(local->local_wks);
 	u8 gb_len = 0;
 	int ret = 0;
@@ -543,17 +531,33 @@
 	version = LLCP_VERSION_11;
 	version_tlv = nfc_llcp_build_tlv(LLCP_TLV_VERSION, &version,
 					 1, &version_length);
+	if (!version_tlv) {
+		ret = -ENOMEM;
+		goto out;
+	}
 	gb_len += version_length;
 
 	lto_tlv = nfc_llcp_build_tlv(LLCP_TLV_LTO, &local->lto, 1, &lto_length);
+	if (!lto_tlv) {
+		ret = -ENOMEM;
+		goto out;
+	}
 	gb_len += lto_length;
 
 	pr_debug("Local wks 0x%lx\n", local->local_wks);
 	wks_tlv = nfc_llcp_build_tlv(LLCP_TLV_WKS, (u8 *)&wks, 2, &wks_length);
+	if (!wks_tlv) {
+		ret = -ENOMEM;
+		goto out;
+	}
 	gb_len += wks_length;
 
 	miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0,
 				      &miux_length);
+	if (!miux_tlv) {
+		ret = -ENOMEM;
+		goto out;
+	}
 	gb_len += miux_length;
 
 	gb_len += ARRAY_SIZE(llcp_magic);

diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index ae29627..2860441 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c

@@ -1,18 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2011  Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #define pr_fmt(fmt) "llcp: %s: " fmt, __func__
@@ -119,9 +107,14 @@
 	llcp_sock->service_name = kmemdup(llcp_addr.service_name,
 					  llcp_sock->service_name_len,
 					  GFP_KERNEL);
-
+	if (!llcp_sock->service_name) {
+		ret = -ENOMEM;
+		goto put_dev;
+	}
 	llcp_sock->ssap = nfc_llcp_get_sdp_ssap(local, llcp_sock);
 	if (llcp_sock->ssap == LLCP_SAP_MAX) {
+		kfree(llcp_sock->service_name);
+		llcp_sock->service_name = NULL;
 		ret = -EADDRINUSE;
 		goto put_dev;
 	}
@@ -561,11 +554,11 @@
 	if (sk->sk_state == LLCP_LISTEN)
 		return llcp_accept_poll(sk);
 
-	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+	if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
 		mask |= EPOLLERR |
 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
 
-	if (!skb_queue_empty(&sk->sk_receive_queue))
+	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
 		mask |= EPOLLIN | EPOLLRDNORM;
 
 	if (sk->sk_state == LLCP_CLOSED)
@@ -726,6 +719,10 @@
 	llcp_sock->service_name = kmemdup(addr->service_name,
 					  llcp_sock->service_name_len,
 					  GFP_KERNEL);
+	if (!llcp_sock->service_name) {
+		ret = -ENOMEM;
+		goto sock_llcp_release;
+	}
 
 	nfc_llcp_sock_link(&local->connecting_sockets, sk);
 
@@ -745,10 +742,11 @@
 	return ret;
 
 sock_unlink:
-	nfc_llcp_put_ssap(local, llcp_sock->ssap);
-
 	nfc_llcp_sock_unlink(&local->connecting_sockets, sk);
 
+sock_llcp_release:
+	nfc_llcp_put_ssap(local, llcp_sock->ssap);
+
 put_dev:
 	nfc_put_device(dev);
 
@@ -1011,10 +1009,13 @@
 	    sock->type != SOCK_RAW)
 		return -ESOCKTNOSUPPORT;
 
-	if (sock->type == SOCK_RAW)
+	if (sock->type == SOCK_RAW) {
+		if (!capable(CAP_NET_RAW))
+			return -EPERM;
 		sock->ops = &llcp_rawsock_ops;
-	else
+	} else {
 		sock->ops = &llcp_sock_ops;
+	}
 
 	sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC, kern);
 	if (sk == NULL)

diff --git a/net/nfc/nci/Kconfig b/net/nfc/nci/Kconfig
index 85d4819..ff1f295 100644
--- a/net/nfc/nci/Kconfig
+++ b/net/nfc/nci/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config NFC_NCI
 	depends on NFC
 	tristate "NCI protocol support"

diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 0749601..7cd5248 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  The NFC Controller Interface is the communication protocol between an
  *  NFC Controller (NFCC) and a Device Host (DH).
@@ -10,19 +11,6 @@
  *  Acknowledgements:
  *  This file is based on hci_core.c, which was written
  *  by Maxim Krasnyansky.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__

diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c
index 908f25e..ce3382b 100644
--- a/net/nfc/nci/data.c
+++ b/net/nfc/nci/data.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  The NFC Controller Interface is the communication protocol between an
  *  NFC Controller (NFCC) and a Device Host (DH).
@@ -6,19 +7,6 @@
  *  Copyright (C) 2014 Marvell International Ltd.
  *
  *  Written by Ilan Elias <ilane@ti.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__
@@ -119,7 +107,7 @@
 	conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id);
 	if (!conn_info) {
 		rc = -EPROTO;
-		goto free_exit;
+		goto exit;
 	}
 
 	__skb_queue_head_init(&frags_q);

diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index ddfc52a..c18e76d 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  The NFC Controller Interface is the communication protocol between an
  *  NFC Controller (NFCC) and a Device Host (DH).
@@ -5,19 +6,6 @@
  *  section of the NCI 1.1 specification.
  *
  *  Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #include <linux/skbuff.h>
@@ -312,6 +300,10 @@
 		create_info = (struct nci_hci_create_pipe_resp *)skb->data;
 		dest_gate = create_info->dest_gate;
 		new_pipe = create_info->pipe;
+		if (new_pipe >= NCI_HCI_MAX_PIPES) {
+			status = NCI_HCI_ANY_E_NOK;
+			goto exit;
+		}
 
 		/* Save the new created pipe and bind with local gate,
 		 * the description for skb->data[3] is destination gate id
@@ -336,6 +328,10 @@
 			goto exit;
 		}
 		delete_info = (struct nci_hci_delete_pipe_noti *)skb->data;
+		if (delete_info->pipe >= NCI_HCI_MAX_PIPES) {
+			status = NCI_HCI_ANY_E_NOK;
+			goto exit;
+		}
 
 		ndev->hci_dev->pipes[delete_info->pipe].gate =
 						NCI_HCI_INVALID_GATE;

diff --git a/net/nfc/nci/lib.c b/net/nfc/nci/lib.c
index ed774a2..473323f 100644
--- a/net/nfc/nci/lib.c
+++ b/net/nfc/nci/lib.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  The NFC Controller Interface is the communication protocol between an
  *  NFC Controller (NFCC) and a Device Host (DH).
@@ -9,19 +10,6 @@
  *  Acknowledgements:
  *  This file is based on lib.c, which was written
  *  by Maxim Krasnyansky.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #include <linux/module.h>

diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 1e8c1a1..33e1170 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  The NFC Controller Interface is the communication protocol between an
  *  NFC Controller (NFCC) and a Device Host (DH).
@@ -10,19 +11,6 @@
  *  Acknowledgements:
  *  This file is based on hci_event.c, which was written
  *  by Maxim Krasnyansky.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__

diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index e3bbf19..a48297b 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  The NFC Controller Interface is the communication protocol between an
  *  NFC Controller (NFCC) and a Device Host (DH).
@@ -9,19 +10,6 @@
  *  Acknowledgements:
  *  This file is based on hci_event.c, which was written
  *  by Maxim Krasnyansky.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__

diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c
index 452f4c1..9dd8a10 100644
--- a/net/nfc/nci/spi.c
+++ b/net/nfc/nci/spi.c

@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2013  Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
  */
 
 #define pr_fmt(fmt) "nci_spi: %s: " fmt, __func__

diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index a66f102..78fe622 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c

@@ -192,10 +192,8 @@
 	if (!nu)
 		return;
 
-	if (nu->tx_skb)
-		kfree_skb(nu->tx_skb);
-	if (nu->rx_skb)
-		kfree_skb(nu->rx_skb);
+	kfree_skb(nu->tx_skb);
+	kfree_skb(nu->rx_skb);
 
 	skb_queue_purge(&nu->tx_q);
 
@@ -465,6 +463,7 @@
 	.receive_buf	= nci_uart_tty_receive,
 	.write_wakeup	= nci_uart_tty_wakeup,
 	.ioctl		= nci_uart_tty_ioctl,
+	.compat_ioctl	= nci_uart_tty_ioctl,
 };
 
 static int __init nci_uart_init(void)

diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 376181c..afde0d7 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2011 Instituto Nokia de Tecnologia
  *
@@ -10,19 +11,6 @@
  *
  * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__
@@ -119,9 +107,10 @@
 	int rc;
 	u32 idx;
 
-	rc = nlmsg_parse(cb->nlh, GENL_HDRLEN + nfc_genl_family.hdrsize,
-			 attrbuf, nfc_genl_family.maxattr, nfc_genl_policy,
-			 NULL);
+	rc = nlmsg_parse_deprecated(cb->nlh,
+				    GENL_HDRLEN + nfc_genl_family.hdrsize,
+				    attrbuf, nfc_genl_family.maxattr,
+				    nfc_genl_policy, NULL);
 	if (rc < 0)
 		return ERR_PTR(rc);
 
@@ -392,7 +381,7 @@
 	if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx))
 		goto nla_put_failure;
 
-	sdp_attr = nla_nest_start(msg, NFC_ATTR_LLC_SDP);
+	sdp_attr = nla_nest_start_noflag(msg, NFC_ATTR_LLC_SDP);
 	if (sdp_attr == NULL) {
 		rc = -ENOMEM;
 		goto nla_put_failure;
@@ -402,7 +391,7 @@
 	hlist_for_each_entry_safe(sdres, n, sdres_list, node) {
 		pr_debug("uri: %s, sap: %d\n", sdres->uri, sdres->sap);
 
-		uri_attr = nla_nest_start(msg, i++);
+		uri_attr = nla_nest_start_noflag(msg, i++);
 		if (uri_attr == NULL) {
 			rc = -ENOMEM;
 			goto nla_put_failure;
@@ -922,7 +911,8 @@
 	u32 device_idx, target_idx;
 	int rc;
 
-	if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
+	if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
+	    !info->attrs[NFC_ATTR_TARGET_INDEX])
 		return -EINVAL;
 
 	device_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
@@ -980,7 +970,8 @@
 	int rc;
 	u32 idx;
 
-	if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
+	if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
+	    !info->attrs[NFC_ATTR_TARGET_INDEX])
 		return -EINVAL;
 
 	idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
@@ -1028,7 +1019,8 @@
 	struct sk_buff *msg = NULL;
 	u32 idx;
 
-	if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
+	if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
+	    !info->attrs[NFC_ATTR_FIRMWARE_NAME])
 		return -EINVAL;
 
 	idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
@@ -1107,7 +1099,6 @@
 
 	local = nfc_llcp_find_local(dev);
 	if (!local) {
-		nfc_put_device(dev);
 		rc = -ENODEV;
 		goto exit;
 	}
@@ -1167,7 +1158,6 @@
 
 	local = nfc_llcp_find_local(dev);
 	if (!local) {
-		nfc_put_device(dev);
 		rc = -ENODEV;
 		goto exit;
 	}
@@ -1177,8 +1167,9 @@
 	tlvs_len = 0;
 
 	nla_for_each_nested(attr, info->attrs[NFC_ATTR_LLC_SDP], rem) {
-		rc = nla_parse_nested(sdp_attrs, NFC_SDP_ATTR_MAX, attr,
-				      nfc_sdp_genl_policy, info->extack);
+		rc = nla_parse_nested_deprecated(sdp_attrs, NFC_SDP_ATTR_MAX,
+						 attr, nfc_sdp_genl_policy,
+						 info->extack);
 
 		if (rc != 0) {
 			rc = -EINVAL;
@@ -1667,102 +1658,102 @@
 static const struct genl_ops nfc_genl_ops[] = {
 	{
 		.cmd = NFC_CMD_GET_DEVICE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nfc_genl_get_device,
 		.dumpit = nfc_genl_dump_devices,
 		.done = nfc_genl_dump_devices_done,
-		.policy = nfc_genl_policy,
 	},
 	{
 		.cmd = NFC_CMD_DEV_UP,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nfc_genl_dev_up,
-		.policy = nfc_genl_policy,
 	},
 	{
 		.cmd = NFC_CMD_DEV_DOWN,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nfc_genl_dev_down,
-		.policy = nfc_genl_policy,
 	},
 	{
 		.cmd = NFC_CMD_START_POLL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nfc_genl_start_poll,
-		.policy = nfc_genl_policy,
 	},
 	{
 		.cmd = NFC_CMD_STOP_POLL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nfc_genl_stop_poll,
-		.policy = nfc_genl_policy,
 	},
 	{
 		.cmd = NFC_CMD_DEP_LINK_UP,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nfc_genl_dep_link_up,
-		.policy = nfc_genl_policy,
 	},
 	{
 		.cmd = NFC_CMD_DEP_LINK_DOWN,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nfc_genl_dep_link_down,
-		.policy = nfc_genl_policy,
 	},
 	{
 		.cmd = NFC_CMD_GET_TARGET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.dumpit = nfc_genl_dump_targets,
 		.done = nfc_genl_dump_targets_done,
-		.policy = nfc_genl_policy,
 	},
 	{
 		.cmd = NFC_CMD_LLC_GET_PARAMS,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nfc_genl_llc_get_params,
-		.policy = nfc_genl_policy,
 	},
 	{
 		.cmd = NFC_CMD_LLC_SET_PARAMS,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nfc_genl_llc_set_params,
-		.policy = nfc_genl_policy,
 	},
 	{
 		.cmd = NFC_CMD_LLC_SDREQ,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nfc_genl_llc_sdreq,
-		.policy = nfc_genl_policy,
 	},
 	{
 		.cmd = NFC_CMD_FW_DOWNLOAD,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nfc_genl_fw_download,
-		.policy = nfc_genl_policy,
 	},
 	{
 		.cmd = NFC_CMD_ENABLE_SE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nfc_genl_enable_se,
-		.policy = nfc_genl_policy,
 	},
 	{
 		.cmd = NFC_CMD_DISABLE_SE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nfc_genl_disable_se,
-		.policy = nfc_genl_policy,
 	},
 	{
 		.cmd = NFC_CMD_GET_SE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.dumpit = nfc_genl_dump_ses,
 		.done = nfc_genl_dump_ses_done,
-		.policy = nfc_genl_policy,
 	},
 	{
 		.cmd = NFC_CMD_SE_IO,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nfc_genl_se_io,
-		.policy = nfc_genl_policy,
 	},
 	{
 		.cmd = NFC_CMD_ACTIVATE_TARGET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nfc_genl_activate_target,
-		.policy = nfc_genl_policy,
 	},
 	{
 		.cmd = NFC_CMD_VENDOR,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nfc_genl_vendor_cmd,
-		.policy = nfc_genl_policy,
 	},
 	{
 		.cmd = NFC_CMD_DEACTIVATE_TARGET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nfc_genl_deactivate_target,
-		.policy = nfc_genl_policy,
 	},
 };
 
@@ -1771,6 +1762,7 @@
 	.name = NFC_GENL_NAME,
 	.version = NFC_GENL_VERSION,
 	.maxattr = NFC_ATTR_MAX,
+	.policy = nfc_genl_policy,
 	.module = THIS_MODULE,
 	.ops = nfc_genl_ops,
 	.n_ops = ARRAY_SIZE(nfc_genl_ops),

diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 6c6f76b..889fefd 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h

@@ -1,22 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright (C) 2011 Instituto Nokia de Tecnologia
  *
  * Authors:
  *    Lauro Ramos Venancio <lauro.venancio@openbossa.org>
  *    Aloisio Almeida Jr <aloisio.almeida@openbossa.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __LOCAL_NFC_H

diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index e2188de..ba5ffd3 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c

@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2011 Instituto Nokia de Tecnologia
  *
  * Authors:
  *    Aloisio Almeida Jr <aloisio.almeida@openbossa.org>
  *    Lauro Ramos Venancio <lauro.venancio@openbossa.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__

diff --git a/net/nsh/Kconfig b/net/nsh/Kconfig
index bafc3dd..19af948 100644
--- a/net/nsh/Kconfig
+++ b/net/nsh/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 menuconfig NET_NSH
 	tristate "Network Service Header (NSH) protocol"
 	default n

diff --git a/net/nsh/Makefile b/net/nsh/Makefile
index c93c787..de34a61 100644
--- a/net/nsh/Makefile
+++ b/net/nsh/Makefile

@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_NET_NSH) += nsh.o

diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
index 1a30e16..e9ca007 100644
--- a/net/nsh/nsh.c
+++ b/net/nsh/nsh.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Network Service Header
  *
  * Copyright (c) 2017 Red Hat, Inc. -- Jiri Benc <jbenc@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>

diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 89da951..22d7d56 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Open vSwitch
 #
@@ -8,8 +9,6 @@
 	depends on !NF_CONNTRACK || \
 		   (NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \
 				     (!NF_NAT || NF_NAT) && \
-				     (!NF_NAT_IPV4 || NF_NAT_IPV4) && \
-				     (!NF_NAT_IPV6 || NF_NAT_IPV6) && \
 				     (!NETFILTER_CONNCOUNT || NETFILTER_CONNCOUNT)))
 	select LIBCRC32C
 	select MPLS

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 85ae53d..1c77f52 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c

@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2007-2017 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -169,50 +156,19 @@
 			 const struct nlattr *actions, int len,
 			 bool last, bool clone_flow_key);
 
-static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr,
-			     __be16 ethertype)
-{
-	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		__be16 diff[] = { ~(hdr->h_proto), ethertype };
-
-		skb->csum = ~csum_partial((char *)diff, sizeof(diff),
-					~skb->csum);
-	}
-
-	hdr->h_proto = ethertype;
-}
+static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
+			      struct sw_flow_key *key,
+			      const struct nlattr *attr, int len);
 
 static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 		     const struct ovs_action_push_mpls *mpls)
 {
-	struct mpls_shim_hdr *new_mpls_lse;
+	int err;
 
-	/* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */
-	if (skb->encapsulation)
-		return -ENOTSUPP;
-
-	if (skb_cow_head(skb, MPLS_HLEN) < 0)
-		return -ENOMEM;
-
-	if (!skb->inner_protocol) {
-		skb_set_inner_network_header(skb, skb->mac_len);
-		skb_set_inner_protocol(skb, skb->protocol);
-	}
-
-	skb_push(skb, MPLS_HLEN);
-	memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
-		skb->mac_len);
-	skb_reset_mac_header(skb);
-	skb_set_network_header(skb, skb->mac_len);
-
-	new_mpls_lse = mpls_hdr(skb);
-	new_mpls_lse->label_stack_entry = mpls->mpls_lse;
-
-	skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);
-
-	if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET)
-		update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype);
-	skb->protocol = mpls->mpls_ethertype;
+	err = skb_mpls_push(skb, mpls->mpls_lse, mpls->mpls_ethertype,
+			    skb->mac_len);
+	if (err)
+		return err;
 
 	invalidate_flow_key(key);
 	return 0;
@@ -223,31 +179,10 @@
 {
 	int err;
 
-	err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
-	if (unlikely(err))
+	err = skb_mpls_pop(skb, ethertype, skb->mac_len);
+	if (err)
 		return err;
 
-	skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN);
-
-	memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
-		skb->mac_len);
-
-	__skb_pull(skb, MPLS_HLEN);
-	skb_reset_mac_header(skb);
-	skb_set_network_header(skb, skb->mac_len);
-
-	if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET) {
-		struct ethhdr *hdr;
-
-		/* mpls_hdr() is used to locate the ethertype field correctly in the
-		 * presence of VLAN tags.
-		 */
-		hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);
-		update_ethertype(skb, hdr, ethertype);
-	}
-	if (eth_p_mpls(skb->protocol))
-		skb->protocol = ethertype;
-
 	invalidate_flow_key(key);
 	return 0;
 }
@@ -259,20 +194,12 @@
 	__be32 lse;
 	int err;
 
-	err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
-	if (unlikely(err))
-		return err;
-
 	stack = mpls_hdr(skb);
 	lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask);
-	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		__be32 diff[] = { ~(stack->label_stack_entry), lse };
+	err = skb_mpls_update_lse(skb, lse);
+	if (err)
+		return err;
 
-		skb->csum = ~csum_partial((char *)diff, sizeof(diff),
-					  ~skb->csum);
-	}
-
-	stack->label_stack_entry = lse;
 	flow_key->mpls.top_lse = lse;
 	return 0;
 }
@@ -301,7 +228,7 @@
 		key->eth.vlan.tpid = vlan->vlan_tpid;
 	}
 	return skb_vlan_push(skb, vlan->vlan_tpid,
-			     ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
+			     ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK);
 }
 
 /* 'src' is already properly masked. */
@@ -822,8 +749,10 @@
 	__skb_dst_copy(skb, data->dst);
 	*OVS_CB(skb) = data->cb;
 	skb->inner_protocol = data->inner_protocol;
-	skb->vlan_tci = data->vlan_tci;
-	skb->vlan_proto = data->vlan_proto;
+	if (data->vlan_tci & VLAN_CFI_MASK)
+		__vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci & ~VLAN_CFI_MASK);
+	else
+		__vlan_hwaccel_clear_tag(skb);
 
 	/* Reconstruct the MAC header.  */
 	skb_push(skb, data->l2_len);
@@ -867,7 +796,10 @@
 	data->cb = *OVS_CB(skb);
 	data->inner_protocol = skb->inner_protocol;
 	data->network_offset = orig_network_offset;
-	data->vlan_tci = skb->vlan_tci;
+	if (skb_vlan_tag_present(skb))
+		data->vlan_tci = skb_vlan_tag_get(skb) | VLAN_CFI_MASK;
+	else
+		data->vlan_tci = 0;
 	data->vlan_proto = skb->vlan_proto;
 	data->mac_proto = mac_proto;
 	data->l2_len = hlen;
@@ -1208,6 +1140,40 @@
 	return clone_execute(dp, skb, key, recirc_id, NULL, 0, last, true);
 }
 
+static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
+				 struct sw_flow_key *key,
+				 const struct nlattr *attr, bool last)
+{
+	const struct nlattr *actions, *cpl_arg;
+	const struct check_pkt_len_arg *arg;
+	int rem = nla_len(attr);
+	bool clone_flow_key;
+
+	/* The first netlink attribute in 'attr' is always
+	 * 'OVS_CHECK_PKT_LEN_ATTR_ARG'.
+	 */
+	cpl_arg = nla_data(attr);
+	arg = nla_data(cpl_arg);
+
+	if (skb->len <= arg->pkt_len) {
+		/* Second netlink attribute in 'attr' is always
+		 * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'.
+		 */
+		actions = nla_next(cpl_arg, &rem);
+		clone_flow_key = !arg->exec_for_lesser_equal;
+	} else {
+		/* Third netlink attribute in 'attr' is always
+		 * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER'.
+		 */
+		actions = nla_next(cpl_arg, &rem);
+		actions = nla_next(actions, &rem);
+		clone_flow_key = !arg->exec_for_greater;
+	}
+
+	return clone_execute(dp, skb, key, 0, nla_data(actions),
+			     nla_len(actions), last, clone_flow_key);
+}
+
 /* Execute a list of actions against 'skb'. */
 static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			      struct sw_flow_key *key,
@@ -1369,6 +1335,16 @@
 
 			break;
 		}
+
+		case OVS_ACTION_ATTR_CHECK_PKT_LEN: {
+			bool last = nla_is_last(a, rem);
+
+			err = execute_check_pkt_len(dp, skb, key, a, last);
+			if (last)
+				return err;
+
+			break;
+		}
 		}
 
 		if (unlikely(err)) {

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 35ae64c..05249eb 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c

@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2015 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
  */
 
 #include <linux/module.h>
@@ -24,14 +16,13 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_labels.h>
 #include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 #include <net/ipv6_frag.h>
 
-#ifdef CONFIG_NF_NAT_NEEDED
-#include <linux/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l3proto.h>
+#if IS_ENABLED(CONFIG_NF_NAT)
+#include <net/netfilter/nf_nat.h>
 #endif
 
 #include "datapath.h"
@@ -75,7 +66,9 @@
 	u32 eventmask;              /* Mask of 1 << IPCT_*. */
 	struct md_mark mark;
 	struct md_labels labels;
-#ifdef CONFIG_NF_NAT_NEEDED
+	char timeout[CTNL_TIMEOUT_NAME_MAX];
+	struct nf_ct_timeout *nf_ct_timeout;
+#if IS_ENABLED(CONFIG_NF_NAT)
 	struct nf_nat_range2 range;  /* Only present for SRC NAT and DST NAT. */
 #endif
 };
@@ -532,6 +525,11 @@
 		return -EPFNOSUPPORT;
 	}
 
+	/* The key extracted from the fragment that completed this datagram
+	 * likely didn't have an L4 header, so regenerate it.
+	 */
+	ovs_flow_key_update_l3l4(skb, key);
+
 	key->ip.frag = OVS_FRAG_TYPE_NONE;
 	skb_clear_hash(skb);
 	skb->ignore_df = 1;
@@ -622,7 +620,7 @@
 	if (natted) {
 		struct nf_conntrack_tuple inverse;
 
-		if (!nf_ct_invert_tuplepr(&inverse, &tuple)) {
+		if (!nf_ct_invert_tuple(&inverse, &tuple)) {
 			pr_debug("ovs_ct_find_existing: Inversion failed!\n");
 			return NULL;
 		}
@@ -705,6 +703,14 @@
 		if (help && rcu_access_pointer(help->helper) != info->helper)
 			return false;
 	}
+	if (info->nf_ct_timeout) {
+		struct nf_conn_timeout *timeout_ext;
+
+		timeout_ext = nf_ct_timeout_find(ct);
+		if (!timeout_ext || info->nf_ct_timeout !=
+		    rcu_dereference(timeout_ext->timeout))
+			return false;
+	}
 	/* Force conntrack entry direction to the current packet? */
 	if (info->force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
 		/* Delete the conntrack entry if confirmed, else just release
@@ -721,7 +727,7 @@
 	return ct_executed;
 }
 
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
 /* Modelled after nf_nat_ipv[46]_fn().
  * range is only used for new, uninitialized NAT state.
  * Returns either NF_ACCEPT or NF_DROP.
@@ -745,14 +751,14 @@
 	switch (ctinfo) {
 	case IP_CT_RELATED:
 	case IP_CT_RELATED_REPLY:
-		if (IS_ENABLED(CONFIG_NF_NAT_IPV4) &&
+		if (IS_ENABLED(CONFIG_NF_NAT) &&
 		    skb->protocol == htons(ETH_P_IP) &&
 		    ip_hdr(skb)->protocol == IPPROTO_ICMP) {
 			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
 							   hooknum))
 				err = NF_DROP;
 			goto push;
-		} else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) &&
+		} else if (IS_ENABLED(CONFIG_IPV6) &&
 			   skb->protocol == htons(ETH_P_IPV6)) {
 			__be16 frag_off;
 			u8 nexthdr = ipv6_hdr(skb)->nexthdr;
@@ -903,7 +909,7 @@
 
 	return err;
 }
-#else /* !CONFIG_NF_NAT_NEEDED */
+#else /* !CONFIG_NF_NAT */
 static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
 		      const struct ovs_conntrack_info *info,
 		      struct sk_buff *skb, struct nf_conn *ct,
@@ -933,6 +939,11 @@
 	struct nf_conn *ct;
 
 	if (!cached) {
+		struct nf_hook_state state = {
+			.hook = NF_INET_PRE_ROUTING,
+			.pf = info->family,
+			.net = net,
+		};
 		struct nf_conn *tmpl = info->ct;
 		int err;
 
@@ -944,8 +955,7 @@
 			nf_ct_set(skb, tmpl, IP_CT_NEW);
 		}
 
-		err = nf_conntrack_in(net, info->family,
-				      NF_INET_PRE_ROUTING, skb);
+		err = nf_conntrack_in(skb, &state);
 		if (err != NF_ACCEPT)
 			return -ENOENT;
 
@@ -988,6 +998,12 @@
 							    GFP_ATOMIC);
 			if (err)
 				return err;
+
+			/* helper installed, add seqadj if NAT is required */
+			if (info->nat && !nfct_seqadj(ct)) {
+				if (!nfct_seqadj_ext_add(ct))
+					return -EINVAL;
+			}
 		}
 
 		/* Call the helper only if:
@@ -1162,7 +1178,7 @@
 				&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 			if (err) {
 				net_warn_ratelimited("openvswitch: zone: %u "
-					"execeeds conntrack limit\n",
+					"exceeds conntrack limit\n",
 					info->zone.id);
 				return err;
 			}
@@ -1199,7 +1215,8 @@
 					 &info->labels.mask);
 		if (err)
 			return err;
-	} else if (labels_nonzero(&info->labels.mask)) {
+	} else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
+		   labels_nonzero(&info->labels.mask)) {
 		err = ovs_ct_set_labels(ct, key, &info->labels.value,
 					&info->labels.mask);
 		if (err)
@@ -1296,6 +1313,7 @@
 {
 	struct nf_conntrack_helper *helper;
 	struct nf_conn_help *help;
+	int ret = 0;
 
 	helper = nf_conntrack_helper_try_module_get(name, info->family,
 						    key->ip.proto);
@@ -1310,16 +1328,24 @@
 		return -ENOMEM;
 	}
 
+#if IS_ENABLED(CONFIG_NF_NAT)
+	if (info->nat) {
+		ret = nf_nat_helper_try_module_get(name, info->family,
+						   key->ip.proto);
+		if (ret) {
+			nf_conntrack_helper_put(helper);
+			OVS_NLERR(log, "Failed to load \"%s\" NAT helper, error: %d",
+				  name, ret);
+			return ret;
+		}
+	}
+#endif
 	rcu_assign_pointer(help->helper, helper);
 	info->helper = helper;
-
-	if (info->nat)
-		request_module("ip_nat_%s", name);
-
-	return 0;
+	return ret;
 }
 
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
 static int parse_nat(const struct nlattr *attr,
 		     struct ovs_conntrack_info *info, bool log)
 {
@@ -1456,12 +1482,14 @@
 				    .maxlen = sizeof(struct md_labels) },
 	[OVS_CT_ATTR_HELPER]	= { .minlen = 1,
 				    .maxlen = NF_CT_HELPER_NAME_LEN },
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
 	/* NAT length is checked when parsing the nested attributes. */
 	[OVS_CT_ATTR_NAT]	= { .minlen = 0, .maxlen = INT_MAX },
 #endif
 	[OVS_CT_ATTR_EVENTMASK]	= { .minlen = sizeof(u32),
 				    .maxlen = sizeof(u32) },
+	[OVS_CT_ATTR_TIMEOUT] = { .minlen = 1,
+				  .maxlen = CTNL_TIMEOUT_NAME_MAX },
 };
 
 static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
@@ -1534,7 +1562,7 @@
 				return -EINVAL;
 			}
 			break;
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
 		case OVS_CT_ATTR_NAT: {
 			int err = parse_nat(a, info, log);
 
@@ -1547,6 +1575,15 @@
 			info->have_eventmask = true;
 			info->eventmask = nla_get_u32(a);
 			break;
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+		case OVS_CT_ATTR_TIMEOUT:
+			memcpy(info->timeout, nla_data(a), nla_len(a));
+			if (!memchr(info->timeout, '\0', nla_len(a))) {
+				OVS_NLERR(log, "Invalid conntrack timeout");
+				return -EINVAL;
+			}
+			break;
+#endif
 
 		default:
 			OVS_NLERR(log, "Unknown conntrack attr (%d)",
@@ -1628,6 +1665,18 @@
 		OVS_NLERR(log, "Failed to allocate conntrack template");
 		return -ENOMEM;
 	}
+
+	if (ct_info.timeout[0]) {
+		if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
+				      ct_info.timeout))
+			pr_info_ratelimited("Failed to associated timeout "
+					    "policy `%s'\n", ct_info.timeout);
+		else
+			ct_info.nf_ct_timeout = rcu_dereference(
+				nf_ct_timeout_find(ct_info.ct)->timeout);
+
+	}
+
 	if (helper) {
 		err = ovs_ct_add_helper(&ct_info, helper, key, log);
 		if (err)
@@ -1647,13 +1696,13 @@
 	return err;
 }
 
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
 static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info,
 			       struct sk_buff *skb)
 {
 	struct nlattr *start;
 
-	start = nla_nest_start(skb, OVS_CT_ATTR_NAT);
+	start = nla_nest_start_noflag(skb, OVS_CT_ATTR_NAT);
 	if (!start)
 		return false;
 
@@ -1668,7 +1717,7 @@
 	}
 
 	if (info->range.flags & NF_NAT_RANGE_MAP_IPS) {
-		if (IS_ENABLED(CONFIG_NF_NAT_IPV4) &&
+		if (IS_ENABLED(CONFIG_NF_NAT) &&
 		    info->family == NFPROTO_IPV4) {
 			if (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MIN,
 					    info->range.min_addr.ip) ||
@@ -1677,7 +1726,7 @@
 			     (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MAX,
 					      info->range.max_addr.ip))))
 				return false;
-		} else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) &&
+		} else if (IS_ENABLED(CONFIG_IPV6) &&
 			   info->family == NFPROTO_IPV6) {
 			if (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MIN,
 					     &info->range.min_addr.in6) ||
@@ -1720,7 +1769,7 @@
 {
 	struct nlattr *start;
 
-	start = nla_nest_start(skb, OVS_ACTION_ATTR_CT);
+	start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CT);
 	if (!start)
 		return -EMSGSIZE;
 
@@ -1748,8 +1797,12 @@
 	if (ct_info->have_eventmask &&
 	    nla_put_u32(skb, OVS_CT_ATTR_EVENTMASK, ct_info->eventmask))
 		return -EMSGSIZE;
+	if (ct_info->timeout[0]) {
+		if (nla_put_string(skb, OVS_CT_ATTR_TIMEOUT, ct_info->timeout))
+			return -EMSGSIZE;
+	}
 
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
 	if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb))
 		return -EMSGSIZE;
 #endif
@@ -1767,10 +1820,18 @@
 
 static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
 {
-	if (ct_info->helper)
+	if (ct_info->helper) {
+#if IS_ENABLED(CONFIG_NF_NAT)
+		if (ct_info->nat)
+			nf_nat_helper_put(ct_info->helper);
+#endif
 		nf_conntrack_helper_put(ct_info->helper);
-	if (ct_info->ct)
+	}
+	if (ct_info->ct) {
+		if (ct_info->timeout[0])
+			nf_ct_destroy_timeout(ct_info->ct);
 		nf_ct_tmpl_free(ct_info->ct);
+	}
 }
 
 #if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
@@ -2123,7 +2184,11 @@
 	if (IS_ERR(reply))
 		return PTR_ERR(reply);
 
-	nla_reply = nla_nest_start(reply, OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
+	nla_reply = nla_nest_start_noflag(reply, OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
+	if (!nla_reply) {
+		err = -EMSGSIZE;
+		goto exit_err;
+	}
 
 	if (a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) {
 		err = ovs_ct_limit_get_zone_limit(
@@ -2149,20 +2214,20 @@
 
 static struct genl_ops ct_limit_genl_ops[] = {
 	{ .cmd = OVS_CT_LIMIT_CMD_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
 					   * privilege. */
-		.policy = ct_limit_policy,
 		.doit = ovs_ct_limit_cmd_set,
 	},
 	{ .cmd = OVS_CT_LIMIT_CMD_DEL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
 					   * privilege. */
-		.policy = ct_limit_policy,
 		.doit = ovs_ct_limit_cmd_del,
 	},
 	{ .cmd = OVS_CT_LIMIT_CMD_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = 0,		  /* OK for unprivileged users. */
-		.policy = ct_limit_policy,
 		.doit = ovs_ct_limit_cmd_get,
 	},
 };
@@ -2176,6 +2241,7 @@
 	.name = OVS_CT_LIMIT_FAMILY,
 	.version = OVS_CT_LIMIT_VERSION,
 	.maxattr = OVS_CT_LIMIT_ATTR_MAX,
+	.policy = ct_limit_policy,
 	.netnsok = true,
 	.parallel_ops = true,
 	.ops = ct_limit_genl_ops,

diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h
index 900dadd..59dc327 100644
--- a/net/openvswitch/conntrack.h
+++ b/net/openvswitch/conntrack.h

@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (c) 2015 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
  */
 
 #ifndef OVS_CONNTRACK_H

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 0f5ce77..23f67b8 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c

@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2007-2014 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -235,6 +222,7 @@
 	struct dp_stats_percpu *stats;
 	u64 *stats_counter;
 	u32 n_mask_hit;
+	int error;
 
 	stats = this_cpu_ptr(dp->stats_percpu);
 
@@ -242,7 +230,6 @@
 	flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
 	if (unlikely(!flow)) {
 		struct dp_upcall_info upcall;
-		int error;
 
 		memset(&upcall, 0, sizeof(upcall));
 		upcall.cmd = OVS_PACKET_CMD_MISS;
@@ -259,7 +246,10 @@
 
 	ovs_flow_stats_update(flow, key->tp.flags, skb);
 	sf_acts = rcu_dereference(flow->sf_acts);
-	ovs_execute_actions(dp, skb, sf_acts, key);
+	error = ovs_execute_actions(dp, skb, sf_acts, key);
+	if (unlikely(error))
+		net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
+							ovs_dp_name(dp), error);
 
 	stats_counter = &stats->n_hit;
 
@@ -448,10 +438,15 @@
 
 	upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
 			     0, upcall_info->cmd);
+	if (!upcall) {
+		err = -EINVAL;
+		goto out;
+	}
 	upcall->dp_ifindex = dp_ifindex;
 
 	err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
-	BUG_ON(err);
+	if (err)
+		goto out;
 
 	if (upcall_info->userdata)
 		__nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
@@ -459,15 +454,26 @@
 			  nla_data(upcall_info->userdata));
 
 	if (upcall_info->egress_tun_info) {
-		nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
+		nla = nla_nest_start_noflag(user_skb,
+					    OVS_PACKET_ATTR_EGRESS_TUN_KEY);
+		if (!nla) {
+			err = -EMSGSIZE;
+			goto out;
+		}
 		err = ovs_nla_put_tunnel_info(user_skb,
 					      upcall_info->egress_tun_info);
-		BUG_ON(err);
+		if (err)
+			goto out;
+
 		nla_nest_end(user_skb, nla);
 	}
 
 	if (upcall_info->actions_len) {
-		nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
+		nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS);
+		if (!nla) {
+			err = -EMSGSIZE;
+			goto out;
+		}
 		err = ovs_nla_put_actions(upcall_info->actions,
 					  upcall_info->actions_len,
 					  user_skb);
@@ -626,8 +632,8 @@
 
 static const struct genl_ops dp_packet_genl_ops[] = {
 	{ .cmd = OVS_PACKET_CMD_EXECUTE,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-	  .policy = packet_policy,
 	  .doit = ovs_packet_cmd_execute
 	}
 };
@@ -637,6 +643,7 @@
 	.name = OVS_PACKET_FAMILY,
 	.version = OVS_PACKET_VERSION,
 	.maxattr = OVS_PACKET_ATTR_MAX,
+	.policy = packet_policy,
 	.netnsok = true,
 	.parallel_ops = true,
 	.ops = dp_packet_genl_ops,
@@ -697,9 +704,13 @@
 {
 	size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
 
-	/* OVS_FLOW_ATTR_UFID */
+	/* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback
+	 * see ovs_nla_put_identifier()
+	 */
 	if (sfid && ovs_identifier_is_ufid(sfid))
 		len += nla_total_size(sfid->ufid_len);
+	else
+		len += nla_total_size(ovs_key_attr_size());
 
 	/* OVS_FLOW_ATTR_KEY */
 	if (!sfid || should_fill_key(sfid, ufid_flags))
@@ -764,7 +775,7 @@
 	 * This can only fail for dump operations because the skb is always
 	 * properly sized for single flows.
 	 */
-	start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
+	start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS);
 	if (start) {
 		const struct sw_flow_actions *sf_acts;
 
@@ -875,7 +886,10 @@
 	retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
 					info->snd_portid, info->snd_seq, 0,
 					cmd, ufid_flags);
-	BUG_ON(retval < 0);
+	if (WARN_ON_ONCE(retval < 0)) {
+		kfree_skb(skb);
+		skb = ERR_PTR(retval);
+	}
 	return skb;
 }
 
@@ -1043,7 +1057,7 @@
 }
 
 /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
-static struct sw_flow_actions *get_flow_actions(struct net *net,
+static noinline_for_stack struct sw_flow_actions *get_flow_actions(struct net *net,
 						const struct nlattr *a,
 						const struct sw_flow_key *key,
 						const struct sw_flow_mask *mask,
@@ -1077,12 +1091,13 @@
  * we should not to return match object with dangling reference
  * to mask.
  * */
-static int ovs_nla_init_match_and_action(struct net *net,
-					 struct sw_flow_match *match,
-					 struct sw_flow_key *key,
-					 struct nlattr **a,
-					 struct sw_flow_actions **acts,
-					 bool log)
+static noinline_for_stack int
+ovs_nla_init_match_and_action(struct net *net,
+			      struct sw_flow_match *match,
+			      struct sw_flow_key *key,
+			      struct nlattr **a,
+			      struct sw_flow_actions **acts,
+			      bool log)
 {
 	struct sw_flow_mask mask;
 	int error = 0;
@@ -1182,14 +1197,14 @@
 						       ovs_header->dp_ifindex,
 						       reply, info->snd_portid,
 						       info->snd_seq, 0,
-						       OVS_FLOW_CMD_NEW,
+						       OVS_FLOW_CMD_SET,
 						       ufid_flags);
 			BUG_ON(error < 0);
 		}
 	} else {
 		/* Could not alloc without acts before locking. */
 		reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
-						info, OVS_FLOW_CMD_NEW, false,
+						info, OVS_FLOW_CMD_SET, false,
 						ufid_flags);
 
 		if (IS_ERR(reply)) {
@@ -1265,7 +1280,7 @@
 	}
 
 	reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
-					OVS_FLOW_CMD_NEW, true, ufid_flags);
+					OVS_FLOW_CMD_GET, true, ufid_flags);
 	if (IS_ERR(reply)) {
 		err = PTR_ERR(reply);
 		goto unlock;
@@ -1330,7 +1345,7 @@
 	reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
 					&flow->id, info, false, ufid_flags);
 	if (likely(reply)) {
-		if (likely(!IS_ERR(reply))) {
+		if (!IS_ERR(reply)) {
 			rcu_read_lock();	/*To keep RCU checker happy. */
 			err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
 						     reply, info->snd_portid,
@@ -1338,7 +1353,10 @@
 						     OVS_FLOW_CMD_DEL,
 						     ufid_flags);
 			rcu_read_unlock();
-			BUG_ON(err < 0);
+			if (WARN_ON_ONCE(err < 0)) {
+				kfree_skb(reply);
+				goto out_free;
+			}
 
 			ovs_notify(&dp_flow_genl_family, reply, info);
 		} else {
@@ -1346,6 +1364,7 @@
 		}
 	}
 
+out_free:
 	ovs_flow_free(flow, true);
 	return 0;
 unlock:
@@ -1362,8 +1381,8 @@
 	u32 ufid_flags;
 	int err;
 
-	err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a,
-			    OVS_FLOW_ATTR_MAX, flow_policy, NULL);
+	err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a,
+				       OVS_FLOW_ATTR_MAX, flow_policy, NULL);
 	if (err)
 		return err;
 	ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
@@ -1389,7 +1408,7 @@
 		if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
 					   NETLINK_CB(cb->skb).portid,
 					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					   OVS_FLOW_CMD_NEW, ufid_flags) < 0)
+					   OVS_FLOW_CMD_GET, ufid_flags) < 0)
 			break;
 
 		cb->args[0] = bucket;
@@ -1411,24 +1430,24 @@
 
 static const struct genl_ops dp_flow_genl_ops[] = {
 	{ .cmd = OVS_FLOW_CMD_NEW,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-	  .policy = flow_policy,
 	  .doit = ovs_flow_cmd_new
 	},
 	{ .cmd = OVS_FLOW_CMD_DEL,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-	  .policy = flow_policy,
 	  .doit = ovs_flow_cmd_del
 	},
 	{ .cmd = OVS_FLOW_CMD_GET,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	  .flags = 0,		    /* OK for unprivileged users. */
-	  .policy = flow_policy,
 	  .doit = ovs_flow_cmd_get,
 	  .dumpit = ovs_flow_cmd_dump
 	},
 	{ .cmd = OVS_FLOW_CMD_SET,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-	  .policy = flow_policy,
 	  .doit = ovs_flow_cmd_set,
 	},
 };
@@ -1438,6 +1457,7 @@
 	.name = OVS_FLOW_FAMILY,
 	.version = OVS_FLOW_VERSION,
 	.maxattr = OVS_FLOW_ATTR_MAX,
+	.policy = flow_policy,
 	.netnsok = true,
 	.parallel_ops = true,
 	.ops = dp_flow_genl_ops,
@@ -1536,10 +1556,34 @@
 	dp->user_features = 0;
 }
 
-static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
+DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
+
+static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
 {
-	if (a[OVS_DP_ATTR_USER_FEATURES])
-		dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
+	u32 user_features = 0;
+
+	if (a[OVS_DP_ATTR_USER_FEATURES]) {
+		user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
+
+		if (user_features & ~(OVS_DP_F_VPORT_PIDS |
+				      OVS_DP_F_UNALIGNED |
+				      OVS_DP_F_TC_RECIRC_SHARING))
+			return -EOPNOTSUPP;
+
+#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+		if (user_features & OVS_DP_F_TC_RECIRC_SHARING)
+			return -EOPNOTSUPP;
+#endif
+	}
+
+	dp->user_features = user_features;
+
+	if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
+		static_branch_enable(&tc_recirc_sharing_support);
+	else
+		static_branch_disable(&tc_recirc_sharing_support);
+
+	return 0;
 }
 
 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
@@ -1601,7 +1645,9 @@
 	parms.port_no = OVSP_LOCAL;
 	parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
 
-	ovs_dp_change(dp, a);
+	err = ovs_dp_change(dp, a);
+	if (err)
+		goto err_destroy_meters;
 
 	/* So far only local changes have been made, now need the lock. */
 	ovs_lock();
@@ -1727,10 +1773,12 @@
 	if (IS_ERR(dp))
 		goto err_unlock_free;
 
-	ovs_dp_change(dp, info->attrs);
+	err = ovs_dp_change(dp, info->attrs);
+	if (err)
+		goto err_unlock_free;
 
 	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
-				   info->snd_seq, 0, OVS_DP_CMD_NEW);
+				   info->snd_seq, 0, OVS_DP_CMD_SET);
 	BUG_ON(err < 0);
 
 	ovs_unlock();
@@ -1761,7 +1809,7 @@
 		goto err_unlock_free;
 	}
 	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
-				   info->snd_seq, 0, OVS_DP_CMD_NEW);
+				   info->snd_seq, 0, OVS_DP_CMD_GET);
 	BUG_ON(err < 0);
 	ovs_unlock();
 
@@ -1785,7 +1833,7 @@
 		if (i >= skip &&
 		    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
 					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					 OVS_DP_CMD_NEW) < 0)
+					 OVS_DP_CMD_GET) < 0)
 			break;
 		i++;
 	}
@@ -1804,24 +1852,24 @@
 
 static const struct genl_ops dp_datapath_genl_ops[] = {
 	{ .cmd = OVS_DP_CMD_NEW,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-	  .policy = datapath_policy,
 	  .doit = ovs_dp_cmd_new
 	},
 	{ .cmd = OVS_DP_CMD_DEL,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-	  .policy = datapath_policy,
 	  .doit = ovs_dp_cmd_del
 	},
 	{ .cmd = OVS_DP_CMD_GET,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	  .flags = 0,		    /* OK for unprivileged users. */
-	  .policy = datapath_policy,
 	  .doit = ovs_dp_cmd_get,
 	  .dumpit = ovs_dp_cmd_dump
 	},
 	{ .cmd = OVS_DP_CMD_SET,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-	  .policy = datapath_policy,
 	  .doit = ovs_dp_cmd_set,
 	},
 };
@@ -1831,6 +1879,7 @@
 	.name = OVS_DATAPATH_FAMILY,
 	.version = OVS_DATAPATH_VERSION,
 	.maxattr = OVS_DP_ATTR_MAX,
+	.policy = datapath_policy,
 	.netnsok = true,
 	.parallel_ops = true,
 	.ops = dp_datapath_genl_ops,
@@ -1843,7 +1892,7 @@
 /* Called with ovs_mutex or RCU read lock. */
 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
 				   struct net *net, u32 portid, u32 seq,
-				   u32 flags, u8 cmd)
+				   u32 flags, u8 cmd, gfp_t gfp)
 {
 	struct ovs_header *ovs_header;
 	struct ovs_vport_stats vport_stats;
@@ -1864,7 +1913,7 @@
 		goto nla_put_failure;
 
 	if (!net_eq(net, dev_net(vport->dev))) {
-		int id = peernet2id_alloc(net, dev_net(vport->dev));
+		int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
 
 		if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
 			goto nla_put_failure;
@@ -1905,11 +1954,12 @@
 	struct sk_buff *skb;
 	int retval;
 
-	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 
-	retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd);
+	retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
+					 GFP_KERNEL);
 	BUG_ON(retval < 0);
 
 	return skb;
@@ -1952,10 +2002,9 @@
 
 }
 
-/* Called with ovs_mutex */
-static void update_headroom(struct datapath *dp)
+static unsigned int ovs_get_max_headroom(struct datapath *dp)
 {
-	unsigned dev_headroom, max_headroom = 0;
+	unsigned int dev_headroom, max_headroom = 0;
 	struct net_device *dev;
 	struct vport *vport;
 	int i;
@@ -1969,10 +2018,19 @@
 		}
 	}
 
-	dp->max_headroom = max_headroom;
+	return max_headroom;
+}
+
+/* Called with ovs_mutex */
+static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
+{
+	struct vport *vport;
+	int i;
+
+	dp->max_headroom = new_headroom;
 	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
 		hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node)
-			netdev_set_rx_headroom(vport->dev, max_headroom);
+			netdev_set_rx_headroom(vport->dev, new_headroom);
 }
 
 static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
@@ -1983,6 +2041,7 @@
 	struct sk_buff *reply;
 	struct vport *vport;
 	struct datapath *dp;
+	unsigned int new_headroom;
 	u32 port_no;
 	int err;
 
@@ -2042,10 +2101,12 @@
 
 	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
 				      info->snd_portid, info->snd_seq, 0,
-				      OVS_VPORT_CMD_NEW);
+				      OVS_VPORT_CMD_NEW, GFP_KERNEL);
 
-	if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom)
-		update_headroom(dp);
+	new_headroom = netdev_get_fwd_headroom(vport->dev);
+
+	if (new_headroom > dp->max_headroom)
+		ovs_update_headroom(dp, new_headroom);
 	else
 		netdev_set_rx_headroom(vport->dev, dp->max_headroom);
 
@@ -2101,7 +2162,7 @@
 
 	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
 				      info->snd_portid, info->snd_seq, 0,
-				      OVS_VPORT_CMD_NEW);
+				      OVS_VPORT_CMD_SET, GFP_KERNEL);
 	BUG_ON(err < 0);
 
 	ovs_unlock();
@@ -2116,11 +2177,12 @@
 
 static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
 {
-	bool must_update_headroom = false;
+	bool update_headroom = false;
 	struct nlattr **a = info->attrs;
 	struct sk_buff *reply;
 	struct datapath *dp;
 	struct vport *vport;
+	unsigned int new_headroom;
 	int err;
 
 	reply = ovs_vport_cmd_alloc_info();
@@ -2140,18 +2202,23 @@
 
 	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
 				      info->snd_portid, info->snd_seq, 0,
-				      OVS_VPORT_CMD_DEL);
+				      OVS_VPORT_CMD_DEL, GFP_KERNEL);
 	BUG_ON(err < 0);
 
 	/* the vport deletion may trigger dp headroom update */
 	dp = vport->dp;
 	if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
-		must_update_headroom = true;
+		update_headroom = true;
+
 	netdev_reset_rx_headroom(vport->dev);
 	ovs_dp_detach_port(vport);
 
-	if (must_update_headroom)
-		update_headroom(dp);
+	if (update_headroom) {
+		new_headroom = ovs_get_max_headroom(dp);
+
+		if (new_headroom < dp->max_headroom)
+			ovs_update_headroom(dp, new_headroom);
+	}
 	ovs_unlock();
 
 	ovs_notify(&dp_vport_genl_family, reply, info);
@@ -2182,7 +2249,7 @@
 		goto exit_unlock_free;
 	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
 				      info->snd_portid, info->snd_seq, 0,
-				      OVS_VPORT_CMD_NEW);
+				      OVS_VPORT_CMD_GET, GFP_ATOMIC);
 	BUG_ON(err < 0);
 	rcu_read_unlock();
 
@@ -2218,7 +2285,8 @@
 						    NETLINK_CB(cb->skb).portid,
 						    cb->nlh->nlmsg_seq,
 						    NLM_F_MULTI,
-						    OVS_VPORT_CMD_NEW) < 0)
+						    OVS_VPORT_CMD_GET,
+						    GFP_ATOMIC) < 0)
 				goto out;
 
 			j++;
@@ -2239,7 +2307,7 @@
 	[OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
 	[OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
 	[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
-	[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
+	[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
 	[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
 	[OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
 	[OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
@@ -2247,24 +2315,24 @@
 
 static const struct genl_ops dp_vport_genl_ops[] = {
 	{ .cmd = OVS_VPORT_CMD_NEW,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-	  .policy = vport_policy,
 	  .doit = ovs_vport_cmd_new
 	},
 	{ .cmd = OVS_VPORT_CMD_DEL,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-	  .policy = vport_policy,
 	  .doit = ovs_vport_cmd_del
 	},
 	{ .cmd = OVS_VPORT_CMD_GET,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	  .flags = 0,		    /* OK for unprivileged users. */
-	  .policy = vport_policy,
 	  .doit = ovs_vport_cmd_get,
 	  .dumpit = ovs_vport_cmd_dump
 	},
 	{ .cmd = OVS_VPORT_CMD_SET,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-	  .policy = vport_policy,
 	  .doit = ovs_vport_cmd_set,
 	},
 };
@@ -2274,6 +2342,7 @@
 	.name = OVS_VPORT_FAMILY,
 	.version = OVS_VPORT_VERSION,
 	.maxattr = OVS_VPORT_ATTR_MAX,
+	.policy = vport_policy,
 	.netnsok = true,
 	.parallel_ops = true,
 	.ops = dp_vport_genl_ops,

diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index c9eb267..81e85dd 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h

@@ -1,19 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (c) 2007-2014 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #ifndef DATAPATH_H
@@ -231,6 +218,8 @@
 extern struct notifier_block ovs_dp_device_notifier;
 extern struct genl_family dp_vport_genl_family;
 
+DECLARE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
+
 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
 void ovs_dp_detach_port(struct vport *);
 int ovs_dp_upcall(struct datapath *, struct sk_buff *,

diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index f3ee2f2..7af0cde 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c

@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2007-2012 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #include <linux/netdevice.h>
@@ -61,7 +48,7 @@
 				if (vport->ops->type == OVS_VPORT_TYPE_INTERNAL)
 					continue;
 
-				if (!(vport->dev->priv_flags & IFF_OVS_DATAPATH))
+				if (!(netif_is_ovs_port(vport->dev)))
 					dp_detach_port_notify(vport);
 			}
 		}

diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 56b8e71..38147e6 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c

@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2007-2014 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #include <linux/uaccess.h>
@@ -72,7 +59,7 @@
 void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
 			   const struct sk_buff *skb)
 {
-	struct flow_stats *stats;
+	struct sw_flow_stats *stats;
 	unsigned int cpu = smp_processor_id();
 	int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
 
@@ -100,7 +87,7 @@
 			if (likely(flow->stats_last_writer != -1) &&
 			    likely(!rcu_access_pointer(flow->stats[cpu]))) {
 				/* Try to allocate CPU-specific stats. */
-				struct flow_stats *new_stats;
+				struct sw_flow_stats *new_stats;
 
 				new_stats =
 					kmem_cache_alloc_node(flow_stats_cache,
@@ -147,7 +134,7 @@
 
 	/* We open code this to make sure cpu 0 is always considered */
 	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
-		struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
+		struct sw_flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
 
 		if (stats) {
 			/* Local CPU may write on non-local stats, so we must
@@ -171,7 +158,7 @@
 
 	/* We open code this to make sure cpu 0 is always considered */
 	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
-		struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
+		struct sw_flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
 
 		if (stats) {
 			spin_lock_bh(&stats->lock);
@@ -254,21 +241,18 @@
 
 static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
 {
+	unsigned short frag_off;
+	unsigned int payload_ofs = 0;
 	unsigned int nh_ofs = skb_network_offset(skb);
 	unsigned int nh_len;
-	int payload_ofs;
 	struct ipv6hdr *nh;
-	uint8_t nexthdr;
-	__be16 frag_off;
-	int err;
+	int err, nexthdr, flags = 0;
 
 	err = check_header(skb, nh_ofs + sizeof(*nh));
 	if (unlikely(err))
 		return err;
 
 	nh = ipv6_hdr(skb);
-	nexthdr = nh->nexthdr;
-	payload_ofs = (u8 *)(nh + 1) - skb->data;
 
 	key->ip.proto = NEXTHDR_NONE;
 	key->ip.tos = ipv6_get_dsfield(nh);
@@ -277,22 +261,23 @@
 	key->ipv6.addr.src = nh->saddr;
 	key->ipv6.addr.dst = nh->daddr;
 
-	payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr, &frag_off);
-
-	if (frag_off) {
-		if (frag_off & htons(~0x7))
+	nexthdr = ipv6_find_hdr(skb, &payload_ofs, -1, &frag_off, &flags);
+	if (flags & IP6_FH_F_FRAG) {
+		if (frag_off) {
 			key->ip.frag = OVS_FRAG_TYPE_LATER;
-		else
-			key->ip.frag = OVS_FRAG_TYPE_FIRST;
+			key->ip.proto = nexthdr;
+			return 0;
+		}
+		key->ip.frag = OVS_FRAG_TYPE_FIRST;
 	} else {
 		key->ip.frag = OVS_FRAG_TYPE_NONE;
 	}
 
-	/* Delayed handling of error in ipv6_skip_exthdr() as it
-	 * always sets frag_off to a valid value which may be
+	/* Delayed handling of error in ipv6_find_hdr() as it
+	 * always sets flags and frag_off to a valid value which may be
 	 * used to set key->ip.frag above.
 	 */
-	if (unlikely(payload_ofs < 0))
+	if (unlikely(nexthdr < 0))
 		return -EPROTO;
 
 	nh_len = payload_ofs - nh_ofs;
@@ -329,7 +314,7 @@
 		return -ENOMEM;
 
 	vh = (struct vlan_head *)skb->data;
-	key_vh->tci = vh->tci | htons(VLAN_TAG_PRESENT);
+	key_vh->tci = vh->tci | htons(VLAN_CFI_MASK);
 	key_vh->tpid = vh->tpid;
 
 	if (unlikely(untag_vlan)) {
@@ -362,7 +347,7 @@
 	int res;
 
 	if (skb_vlan_tag_present(skb)) {
-		key->eth.vlan.tci = htons(skb->vlan_tci);
+		key->eth.vlan.tci = htons(skb->vlan_tci) | htons(VLAN_CFI_MASK);
 		key->eth.vlan.tpid = skb->vlan_proto;
 	} else {
 		/* Parse outer vlan tag in the non-accelerated case. */
@@ -538,78 +523,15 @@
 }
 
 /**
- * key_extract - extracts a flow key from an Ethernet frame.
+ * key_extract_l3l4 - extracts L3/L4 header information.
  * @skb: sk_buff that contains the frame, with skb->data pointing to the
- * Ethernet header
+ *       L3 header
  * @key: output flow key
  *
- * The caller must ensure that skb->len >= ETH_HLEN.
- *
- * Returns 0 if successful, otherwise a negative errno value.
- *
- * Initializes @skb header fields as follows:
- *
- *    - skb->mac_header: the L2 header.
- *
- *    - skb->network_header: just past the L2 header, or just past the
- *      VLAN header, to the first byte of the L2 payload.
- *
- *    - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
- *      on output, then just past the IP header, if one is present and
- *      of a correct length, otherwise the same as skb->network_header.
- *      For other key->eth.type values it is left untouched.
- *
- *    - skb->protocol: the type of the data starting at skb->network_header.
- *      Equals to key->eth.type.
  */
-static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
+static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
 {
 	int error;
-	struct ethhdr *eth;
-
-	/* Flags are always used as part of stats */
-	key->tp.flags = 0;
-
-	skb_reset_mac_header(skb);
-
-	/* Link layer. */
-	clear_vlan(key);
-	if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
-		if (unlikely(eth_type_vlan(skb->protocol)))
-			return -EINVAL;
-
-		skb_reset_network_header(skb);
-		key->eth.type = skb->protocol;
-	} else {
-		eth = eth_hdr(skb);
-		ether_addr_copy(key->eth.src, eth->h_source);
-		ether_addr_copy(key->eth.dst, eth->h_dest);
-
-		__skb_pull(skb, 2 * ETH_ALEN);
-		/* We are going to push all headers that we pull, so no need to
-		* update skb->csum here.
-		*/
-
-		if (unlikely(parse_vlan(skb, key)))
-			return -ENOMEM;
-
-		key->eth.type = parse_ethertype(skb);
-		if (unlikely(key->eth.type == htons(0)))
-			return -ENOMEM;
-
-		/* Multiple tagged packets need to retain TPID to satisfy
-		 * skb_vlan_pop(), which will later shift the ethertype into
-		 * skb->protocol.
-		 */
-		if (key->eth.cvlan.tci & htons(VLAN_TAG_PRESENT))
-			skb->protocol = key->eth.cvlan.tpid;
-		else
-			skb->protocol = key->eth.type;
-
-		skb_reset_network_header(skb);
-		__skb_push(skb, skb->data - skb_mac_header(skb));
-	}
-	skb_reset_mac_len(skb);
 
 	/* Network layer. */
 	if (key->eth.type == htons(ETH_P_IP)) {
@@ -638,6 +560,7 @@
 		offset = nh->frag_off & htons(IP_OFFSET);
 		if (offset) {
 			key->ip.frag = OVS_FRAG_TYPE_LATER;
+			memset(&key->tp, 0, sizeof(key->tp));
 			return 0;
 		}
 		if (nh->frag_off & htons(IP_MF) ||
@@ -755,8 +678,10 @@
 			return error;
 		}
 
-		if (key->ip.frag == OVS_FRAG_TYPE_LATER)
+		if (key->ip.frag == OVS_FRAG_TYPE_LATER) {
+			memset(&key->tp, 0, sizeof(key->tp));
 			return 0;
+		}
 		if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
 			key->ip.frag = OVS_FRAG_TYPE_FIRST;
 
@@ -803,6 +728,92 @@
 	return 0;
 }
 
+/**
+ * key_extract - extracts a flow key from an Ethernet frame.
+ * @skb: sk_buff that contains the frame, with skb->data pointing to the
+ * Ethernet header
+ * @key: output flow key
+ *
+ * The caller must ensure that skb->len >= ETH_HLEN.
+ *
+ * Returns 0 if successful, otherwise a negative errno value.
+ *
+ * Initializes @skb header fields as follows:
+ *
+ *    - skb->mac_header: the L2 header.
+ *
+ *    - skb->network_header: just past the L2 header, or just past the
+ *      VLAN header, to the first byte of the L2 payload.
+ *
+ *    - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
+ *      on output, then just past the IP header, if one is present and
+ *      of a correct length, otherwise the same as skb->network_header.
+ *      For other key->eth.type values it is left untouched.
+ *
+ *    - skb->protocol: the type of the data starting at skb->network_header.
+ *      Equals to key->eth.type.
+ */
+static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
+{
+	struct ethhdr *eth;
+
+	/* Flags are always used as part of stats */
+	key->tp.flags = 0;
+
+	skb_reset_mac_header(skb);
+
+	/* Link layer. */
+	clear_vlan(key);
+	if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
+		if (unlikely(eth_type_vlan(skb->protocol)))
+			return -EINVAL;
+
+		skb_reset_network_header(skb);
+		key->eth.type = skb->protocol;
+	} else {
+		eth = eth_hdr(skb);
+		ether_addr_copy(key->eth.src, eth->h_source);
+		ether_addr_copy(key->eth.dst, eth->h_dest);
+
+		__skb_pull(skb, 2 * ETH_ALEN);
+		/* We are going to push all headers that we pull, so no need to
+		 * update skb->csum here.
+		 */
+
+		if (unlikely(parse_vlan(skb, key)))
+			return -ENOMEM;
+
+		key->eth.type = parse_ethertype(skb);
+		if (unlikely(key->eth.type == htons(0)))
+			return -ENOMEM;
+
+		/* Multiple tagged packets need to retain TPID to satisfy
+		 * skb_vlan_pop(), which will later shift the ethertype into
+		 * skb->protocol.
+		 */
+		if (key->eth.cvlan.tci & htons(VLAN_CFI_MASK))
+			skb->protocol = key->eth.cvlan.tpid;
+		else
+			skb->protocol = key->eth.type;
+
+		skb_reset_network_header(skb);
+		__skb_push(skb, skb->data - skb_mac_header(skb));
+	}
+
+	skb_reset_mac_len(skb);
+
+	/* Fill out L3/L4 key info, if any */
+	return key_extract_l3l4(skb, key);
+}
+
+/* In the case of conntrack fragment handling it expects L3 headers,
+ * add a helper.
+ */
+int ovs_flow_key_update_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
+{
+	return key_extract_l3l4(skb, key);
+}
+
 int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
 {
 	int res;
@@ -831,6 +842,9 @@
 int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
 			 struct sk_buff *skb, struct sw_flow_key *key)
 {
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+	struct tc_skb_ext *tc_ext;
+#endif
 	int res, err;
 
 	/* Extract metadata from packet. */
@@ -863,7 +877,17 @@
 	if (res < 0)
 		return res;
 	key->mac_proto = res;
+
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+	if (static_branch_unlikely(&tc_recirc_sharing_support)) {
+		tc_ext = skb_ext_find(skb, TC_SKB_EXT);
+		key->recirc_id = tc_ext ? tc_ext->chain : 0;
+	} else {
+		key->recirc_id = 0;
+	}
+#else
 	key->recirc_id = 0;
+#endif
 
 	err = key_extract(skb, key);
 	if (!err)

diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index c670dd2..b830d5f 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h

@@ -1,19 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (c) 2007-2017 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #ifndef FLOW_H
@@ -30,7 +17,6 @@
 #include <linux/in6.h>
 #include <linux/jiffies.h>
 #include <linux/time.h>
-#include <linux/flex_array.h>
 #include <linux/cpumask.h>
 #include <net/inet_ecn.h>
 #include <net/ip_tunnels.h>
@@ -60,7 +46,7 @@
 
 struct vlan_head {
 	__be16 tpid; /* Vlan type. Generally 802.1q or 802.1ad.*/
-	__be16 tci;  /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
+	__be16 tci;  /* 0 if no VLAN, VLAN_CFI_MASK set otherwise. */
 };
 
 #define OVS_SW_FLOW_KEY_METADATA_SIZE			\
@@ -208,7 +194,7 @@
 	struct nlattr actions[];
 };
 
-struct flow_stats {
+struct sw_flow_stats {
 	u64 packet_count;		/* Number of packets matched. */
 	u64 byte_count;			/* Number of bytes matched. */
 	unsigned long used;		/* Last used time (in jiffies). */
@@ -230,7 +216,7 @@
 	struct cpumask cpu_used_mask;
 	struct sw_flow_mask *mask;
 	struct sw_flow_actions __rcu *sf_acts;
-	struct flow_stats __rcu *stats[]; /* One for each CPU.  First one
+	struct sw_flow_stats __rcu *stats[]; /* One for each CPU.  First one
 					   * is allocated at flow creation time,
 					   * the rest are allocated on demand
 					   * while holding the 'stats[0].lock'.
@@ -284,6 +270,7 @@
 u64 ovs_flow_used_time(unsigned long flow_jiffies);
 
 int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key);
+int ovs_flow_key_update_l3l4(struct sk_buff *skb, struct sw_flow_key *key);
 int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
 			 struct sk_buff *skb,
 			 struct sw_flow_key *key);

diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 865ecef..d7559c6 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c

@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2007-2017 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -91,6 +78,7 @@
 		case OVS_ACTION_ATTR_SET:
 		case OVS_ACTION_ATTR_SET_MASKED:
 		case OVS_ACTION_ATTR_METER:
+		case OVS_ACTION_ATTR_CHECK_PKT_LEN:
 		default:
 			return true;
 		}
@@ -403,6 +391,7 @@
 	[OVS_TUNNEL_KEY_ATTR_IPV6_SRC]      = { .len = sizeof(struct in6_addr) },
 	[OVS_TUNNEL_KEY_ATTR_IPV6_DST]      = { .len = sizeof(struct in6_addr) },
 	[OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS]   = { .len = OVS_ATTR_VARIABLE },
+	[OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE]   = { .len = 0 },
 };
 
 static const struct ovs_len_tbl
@@ -500,7 +489,7 @@
 			return -EINVAL;
 		}
 
-		if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
+		if (!nz || !is_all_zero(nla_data(nla), nla_len(nla))) {
 			attrs |= 1 << type;
 			a[type] = nla;
 		}
@@ -666,6 +655,7 @@
 			      bool log)
 {
 	bool ttl = false, ipv4 = false, ipv6 = false;
+	bool info_bridge_mode = false;
 	__be16 tun_flags = 0;
 	int opts_type = 0;
 	struct nlattr *a;
@@ -782,6 +772,10 @@
 			tun_flags |= TUNNEL_ERSPAN_OPT;
 			opts_type = type;
 			break;
+		case OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE:
+			info_bridge_mode = true;
+			ipv4 = true;
+			break;
 		default:
 			OVS_NLERR(log, "Unknown IP tunnel attribute %d",
 				  type);
@@ -812,16 +806,29 @@
 			OVS_NLERR(log, "IP tunnel dst address not specified");
 			return -EINVAL;
 		}
-		if (ipv4 && !match->key->tun_key.u.ipv4.dst) {
-			OVS_NLERR(log, "IPv4 tunnel dst address is zero");
-			return -EINVAL;
+		if (ipv4) {
+			if (info_bridge_mode) {
+				if (match->key->tun_key.u.ipv4.src ||
+				    match->key->tun_key.u.ipv4.dst ||
+				    match->key->tun_key.tp_src ||
+				    match->key->tun_key.tp_dst ||
+				    match->key->tun_key.ttl ||
+				    match->key->tun_key.tos ||
+				    tun_flags & ~TUNNEL_KEY) {
+					OVS_NLERR(log, "IPv4 tun info is not correct");
+					return -EINVAL;
+				}
+			} else if (!match->key->tun_key.u.ipv4.dst) {
+				OVS_NLERR(log, "IPv4 tunnel dst address is zero");
+				return -EINVAL;
+			}
 		}
 		if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) {
 			OVS_NLERR(log, "IPv6 tunnel dst address is zero");
 			return -EINVAL;
 		}
 
-		if (!ttl) {
+		if (!ttl && !info_bridge_mode) {
 			OVS_NLERR(log, "IP tunnel TTL not specified.");
 			return -EINVAL;
 		}
@@ -836,7 +843,7 @@
 	const struct vxlan_metadata *opts = tun_opts;
 	struct nlattr *nla;
 
-	nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
+	nla = nla_nest_start_noflag(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
 	if (!nla)
 		return -EMSGSIZE;
 
@@ -850,12 +857,17 @@
 static int __ip_tun_to_nlattr(struct sk_buff *skb,
 			      const struct ip_tunnel_key *output,
 			      const void *tun_opts, int swkey_tun_opts_len,
-			      unsigned short tun_proto)
+			      unsigned short tun_proto, u8 mode)
 {
 	if (output->tun_flags & TUNNEL_KEY &&
 	    nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id,
 			 OVS_TUNNEL_KEY_ATTR_PAD))
 		return -EMSGSIZE;
+
+	if (mode & IP_TUNNEL_INFO_BRIDGE)
+		return nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE)
+		       ? -EMSGSIZE : 0;
+
 	switch (tun_proto) {
 	case AF_INET:
 		if (output->u.ipv4.src &&
@@ -918,17 +930,17 @@
 static int ip_tun_to_nlattr(struct sk_buff *skb,
 			    const struct ip_tunnel_key *output,
 			    const void *tun_opts, int swkey_tun_opts_len,
-			    unsigned short tun_proto)
+			    unsigned short tun_proto, u8 mode)
 {
 	struct nlattr *nla;
 	int err;
 
-	nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
+	nla = nla_nest_start_noflag(skb, OVS_KEY_ATTR_TUNNEL);
 	if (!nla)
 		return -EMSGSIZE;
 
 	err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len,
-				 tun_proto);
+				 tun_proto, mode);
 	if (err)
 		return err;
 
@@ -942,7 +954,7 @@
 	return __ip_tun_to_nlattr(skb, &tun_info->key,
 				  ip_tunnel_info_opts(tun_info),
 				  tun_info->options_len,
-				  ip_tunnel_info_af(tun_info));
+				  ip_tunnel_info_af(tun_info), tun_info->mode);
 }
 
 static int encode_vlan_from_nlattrs(struct sw_flow_match *match,
@@ -990,9 +1002,9 @@
 	if (a[OVS_KEY_ATTR_VLAN])
 		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
 
-	if (!(tci & htons(VLAN_TAG_PRESENT))) {
+	if (!(tci & htons(VLAN_CFI_MASK))) {
 		if (tci) {
-			OVS_NLERR(log, "%s TCI does not have VLAN_TAG_PRESENT bit set.",
+			OVS_NLERR(log, "%s TCI does not have VLAN_CFI_MASK bit set.",
 				  (inner) ? "C-VLAN" : "VLAN");
 			return -EINVAL;
 		} else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) {
@@ -1013,9 +1025,9 @@
 	__be16 tci = 0;
 	__be16 tpid = 0;
 	bool encap_valid = !!(match->key->eth.vlan.tci &
-			      htons(VLAN_TAG_PRESENT));
+			      htons(VLAN_CFI_MASK));
 	bool i_encap_valid = !!(match->key->eth.cvlan.tci &
-				htons(VLAN_TAG_PRESENT));
+				htons(VLAN_CFI_MASK));
 
 	if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) {
 		/* Not a VLAN. */
@@ -1039,8 +1051,8 @@
 			  (inner) ? "C-VLAN" : "VLAN", ntohs(tpid));
 		return -EINVAL;
 	}
-	if (!(tci & htons(VLAN_TAG_PRESENT))) {
-		OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_TAG_PRESENT bit.",
+	if (!(tci & htons(VLAN_CFI_MASK))) {
+		OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_CFI_MASK bit.",
 			  (inner) ? "C-VLAN" : "VLAN");
 		return -EINVAL;
 	}
@@ -1095,7 +1107,7 @@
 	if (err)
 		return err;
 
-	encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_TAG_PRESENT));
+	encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_CFI_MASK));
 	if (encap_valid) {
 		err = __parse_vlan_from_nlattrs(match, key_attrs, true, a,
 						is_mask, log);
@@ -1932,7 +1944,7 @@
 {
 	struct nlattr *start;
 
-	start = nla_nest_start(skb, OVS_KEY_ATTR_NSH);
+	start = nla_nest_start_noflag(skb, OVS_KEY_ATTR_NSH);
 	if (!start)
 		return -EMSGSIZE;
 
@@ -1980,7 +1992,7 @@
 			opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
 
 		if (ip_tun_to_nlattr(skb, &output->tun_key, opts,
-				     swkey->tun_opts_len, swkey->tun_proto))
+				     swkey->tun_opts_len, swkey->tun_proto, 0))
 			goto nla_put_failure;
 	}
 
@@ -2015,14 +2027,15 @@
 		if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
 			if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
 				goto nla_put_failure;
-			encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
+			encap = nla_nest_start_noflag(skb, OVS_KEY_ATTR_ENCAP);
 			if (!swkey->eth.vlan.tci)
 				goto unencap;
 
 			if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
 				if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
 					goto nla_put_failure;
-				in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
+				in_encap = nla_nest_start_noflag(skb,
+								 OVS_KEY_ATTR_ENCAP);
 				if (!swkey->eth.cvlan.tci)
 					goto unencap;
 			}
@@ -2201,7 +2214,7 @@
 	int err;
 	struct nlattr *nla;
 
-	nla = nla_nest_start(skb, attr);
+	nla = nla_nest_start_noflag(skb, attr);
 	if (!nla)
 		return -EMSGSIZE;
 	err = __ovs_nla_put_key(swkey, output, is_mask, skb);
@@ -2306,14 +2319,14 @@
 
 	struct sw_flow_actions *acts;
 	int new_acts_size;
-	int req_size = NLA_ALIGN(attr_len);
+	size_t req_size = NLA_ALIGN(attr_len);
 	int next_offset = offsetof(struct sw_flow_actions, actions) +
 					(*sfa)->actions_len;
 
 	if (req_size <= (ksize(*sfa) - next_offset))
 		goto out;
 
-	new_acts_size = ksize(*sfa) * 2;
+	new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2);
 
 	if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
 		if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) {
@@ -2605,6 +2618,8 @@
 	tun_info->mode = IP_TUNNEL_INFO_TX;
 	if (key.tun_proto == AF_INET6)
 		tun_info->mode |= IP_TUNNEL_INFO_IPV6;
+	else if (key.tun_proto == AF_INET && key.tun_key.u.ipv4.dst == 0)
+		tun_info->mode |= IP_TUNNEL_INFO_BRIDGE;
 	tun_info->key = key.tun_key;
 
 	/* We need to store the options in the action itself since
@@ -2826,8 +2841,8 @@
 	struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
 	int error;
 
-	error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, attr,
-				 userspace_policy, NULL);
+	error = nla_parse_nested_deprecated(a, OVS_USERSPACE_ATTR_MAX, attr,
+					    userspace_policy, NULL);
 	if (error)
 		return error;
 
@@ -2838,6 +2853,88 @@
 	return 0;
 }
 
+static const struct nla_policy cpl_policy[OVS_CHECK_PKT_LEN_ATTR_MAX + 1] = {
+	[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] = {.type = NLA_U16 },
+	[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER] = {.type = NLA_NESTED },
+	[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL] = {.type = NLA_NESTED },
+};
+
+static int validate_and_copy_check_pkt_len(struct net *net,
+					   const struct nlattr *attr,
+					   const struct sw_flow_key *key,
+					   struct sw_flow_actions **sfa,
+					   __be16 eth_type, __be16 vlan_tci,
+					   bool log, bool last)
+{
+	const struct nlattr *acts_if_greater, *acts_if_lesser_eq;
+	struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1];
+	struct check_pkt_len_arg arg;
+	int nested_acts_start;
+	int start, err;
+
+	err = nla_parse_deprecated_strict(a, OVS_CHECK_PKT_LEN_ATTR_MAX,
+					  nla_data(attr), nla_len(attr),
+					  cpl_policy, NULL);
+	if (err)
+		return err;
+
+	if (!a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] ||
+	    !nla_get_u16(a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN]))
+		return -EINVAL;
+
+	acts_if_lesser_eq = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL];
+	acts_if_greater = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER];
+
+	/* Both the nested action should be present. */
+	if (!acts_if_greater || !acts_if_lesser_eq)
+		return -EINVAL;
+
+	/* validation done, copy the nested actions. */
+	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CHECK_PKT_LEN,
+					log);
+	if (start < 0)
+		return start;
+
+	arg.pkt_len = nla_get_u16(a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN]);
+	arg.exec_for_lesser_equal =
+		last || !actions_may_change_flow(acts_if_lesser_eq);
+	arg.exec_for_greater =
+		last || !actions_may_change_flow(acts_if_greater);
+
+	err = ovs_nla_add_action(sfa, OVS_CHECK_PKT_LEN_ATTR_ARG, &arg,
+				 sizeof(arg), log);
+	if (err)
+		return err;
+
+	nested_acts_start = add_nested_action_start(sfa,
+		OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL, log);
+	if (nested_acts_start < 0)
+		return nested_acts_start;
+
+	err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa,
+				     eth_type, vlan_tci, log);
+
+	if (err)
+		return err;
+
+	add_nested_action_end(*sfa, nested_acts_start);
+
+	nested_acts_start = add_nested_action_start(sfa,
+		OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER, log);
+	if (nested_acts_start < 0)
+		return nested_acts_start;
+
+	err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa,
+				     eth_type, vlan_tci, log);
+
+	if (err)
+		return err;
+
+	add_nested_action_end(*sfa, nested_acts_start);
+	add_nested_action_end(*sfa, start);
+	return 0;
+}
+
 static int copy_action(const struct nlattr *from,
 		       struct sw_flow_actions **sfa, bool log)
 {
@@ -2884,6 +2981,7 @@
 			[OVS_ACTION_ATTR_POP_NSH] = 0,
 			[OVS_ACTION_ATTR_METER] = sizeof(u32),
 			[OVS_ACTION_ATTR_CLONE] = (u32)-1,
+			[OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
 		};
 		const struct ovs_action_push_vlan *vlan;
 		int type = nla_type(a);
@@ -2943,7 +3041,7 @@
 			vlan = nla_data(a);
 			if (!eth_type_vlan(vlan->vlan_tpid))
 				return -EINVAL;
-			if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
+			if (!(vlan->vlan_tci & htons(VLAN_CFI_MASK)))
 				return -EINVAL;
 			vlan_tci = vlan->vlan_tci;
 			break;
@@ -2959,7 +3057,7 @@
 			/* Prohibit push MPLS other than to a white list
 			 * for packets that have a known tag order.
 			 */
-			if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
+			if (vlan_tci & htons(VLAN_CFI_MASK) ||
 			    (eth_type != htons(ETH_P_IP) &&
 			     eth_type != htons(ETH_P_IPV6) &&
 			     eth_type != htons(ETH_P_ARP) &&
@@ -2971,7 +3069,7 @@
 		}
 
 		case OVS_ACTION_ATTR_POP_MPLS:
-			if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
+			if (vlan_tci & htons(VLAN_CFI_MASK) ||
 			    !eth_p_mpls(eth_type))
 				return -EINVAL;
 
@@ -3036,7 +3134,7 @@
 		case OVS_ACTION_ATTR_POP_ETH:
 			if (mac_proto != MAC_PROTO_ETHERNET)
 				return -EINVAL;
-			if (vlan_tci & htons(VLAN_TAG_PRESENT))
+			if (vlan_tci & htons(VLAN_CFI_MASK))
 				return -EINVAL;
 			mac_proto = MAC_PROTO_NONE;
 			break;
@@ -3085,6 +3183,19 @@
 			break;
 		}
 
+		case OVS_ACTION_ATTR_CHECK_PKT_LEN: {
+			bool last = nla_is_last(a, rem);
+
+			err = validate_and_copy_check_pkt_len(net, a, key, sfa,
+							      eth_type,
+							      vlan_tci, log,
+							      last);
+			if (err)
+				return err;
+			skip_copy = true;
+			break;
+		}
+
 		default:
 			OVS_NLERR(log, "Unknown Action type %d", type);
 			return -EINVAL;
@@ -3130,7 +3241,7 @@
 	const struct sample_arg *arg;
 	struct nlattr *actions;
 
-	start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
+	start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SAMPLE);
 	if (!start)
 		return -EMSGSIZE;
 
@@ -3143,7 +3254,7 @@
 		goto out;
 	}
 
-	ac_start = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
+	ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
 	if (!ac_start) {
 		err = -EMSGSIZE;
 		goto out;
@@ -3169,7 +3280,7 @@
 	struct nlattr *start;
 	int err = 0, rem = nla_len(attr);
 
-	start = nla_nest_start(skb, OVS_ACTION_ATTR_CLONE);
+	start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CLONE);
 	if (!start)
 		return -EMSGSIZE;
 
@@ -3183,6 +3294,75 @@
 	return err;
 }
 
+static int check_pkt_len_action_to_attr(const struct nlattr *attr,
+					struct sk_buff *skb)
+{
+	struct nlattr *start, *ac_start = NULL;
+	const struct check_pkt_len_arg *arg;
+	const struct nlattr *a, *cpl_arg;
+	int err = 0, rem = nla_len(attr);
+
+	start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CHECK_PKT_LEN);
+	if (!start)
+		return -EMSGSIZE;
+
+	/* The first nested attribute in 'attr' is always
+	 * 'OVS_CHECK_PKT_LEN_ATTR_ARG'.
+	 */
+	cpl_arg = nla_data(attr);
+	arg = nla_data(cpl_arg);
+
+	if (nla_put_u16(skb, OVS_CHECK_PKT_LEN_ATTR_PKT_LEN, arg->pkt_len)) {
+		err = -EMSGSIZE;
+		goto out;
+	}
+
+	/* Second nested attribute in 'attr' is always
+	 * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'.
+	 */
+	a = nla_next(cpl_arg, &rem);
+	ac_start =  nla_nest_start_noflag(skb,
+					  OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL);
+	if (!ac_start) {
+		err = -EMSGSIZE;
+		goto out;
+	}
+
+	err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
+	if (err) {
+		nla_nest_cancel(skb, ac_start);
+		goto out;
+	} else {
+		nla_nest_end(skb, ac_start);
+	}
+
+	/* Third nested attribute in 'attr' is always
+	 * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER.
+	 */
+	a = nla_next(a, &rem);
+	ac_start =  nla_nest_start_noflag(skb,
+					  OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER);
+	if (!ac_start) {
+		err = -EMSGSIZE;
+		goto out;
+	}
+
+	err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
+	if (err) {
+		nla_nest_cancel(skb, ac_start);
+		goto out;
+	} else {
+		nla_nest_end(skb, ac_start);
+	}
+
+	nla_nest_end(skb, start);
+	return 0;
+
+out:
+	nla_nest_cancel(skb, start);
+	return err;
+}
+
 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
 {
 	const struct nlattr *ovs_key = nla_data(a);
@@ -3195,14 +3375,14 @@
 		struct ovs_tunnel_info *ovs_tun = nla_data(ovs_key);
 		struct ip_tunnel_info *tun_info = &ovs_tun->tun_dst->u.tun_info;
 
-		start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
+		start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SET);
 		if (!start)
 			return -EMSGSIZE;
 
 		err =  ip_tun_to_nlattr(skb, &tun_info->key,
 					ip_tunnel_info_opts(tun_info),
 					tun_info->options_len,
-					ip_tunnel_info_af(tun_info));
+					ip_tunnel_info_af(tun_info), tun_info->mode);
 		if (err)
 			return err;
 		nla_nest_end(skb, start);
@@ -3227,7 +3407,7 @@
 	/* Revert the conversion we did from a non-masked set action to
 	 * masked set action.
 	 */
-	nla = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
+	nla = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SET);
 	if (!nla)
 		return -EMSGSIZE;
 
@@ -3277,6 +3457,12 @@
 				return err;
 			break;
 
+		case OVS_ACTION_ATTR_CHECK_PKT_LEN:
+			err = check_pkt_len_action_to_attr(a, skb);
+			if (err)
+				return err;
+			break;
+
 		default:
 			if (nla_put(skb, type, nla_len(a), nla_data(a)))
 				return -EMSGSIZE;

diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 6657606..fe7f77f 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h

@@ -1,19 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (c) 2007-2013 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 
@@ -30,7 +17,6 @@
 #include <linux/in6.h>
 #include <linux/jiffies.h>
 #include <linux/time.h>
-#include <linux/flex_array.h>
 
 #include <net/inet_ecn.h>
 #include <net/ip_tunnels.h>

diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 80ea2a7..cf3582c 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c

@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2007-2014 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #include "flow.h"
@@ -79,7 +66,7 @@
 struct sw_flow *ovs_flow_alloc(void)
 {
 	struct sw_flow *flow;
-	struct flow_stats *stats;
+	struct sw_flow_stats *stats;
 
 	flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL);
 	if (!flow)
@@ -111,29 +98,6 @@
 	return table->count;
 }
 
-static struct flex_array *alloc_buckets(unsigned int n_buckets)
-{
-	struct flex_array *buckets;
-	int i, err;
-
-	buckets = flex_array_alloc(sizeof(struct hlist_head),
-				   n_buckets, GFP_KERNEL);
-	if (!buckets)
-		return NULL;
-
-	err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL);
-	if (err) {
-		flex_array_free(buckets);
-		return NULL;
-	}
-
-	for (i = 0; i < n_buckets; i++)
-		INIT_HLIST_HEAD((struct hlist_head *)
-					flex_array_get(buckets, i));
-
-	return buckets;
-}
-
 static void flow_free(struct sw_flow *flow)
 {
 	int cpu;
@@ -146,7 +110,7 @@
 	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask))
 		if (flow->stats[cpu])
 			kmem_cache_free(flow_stats_cache,
-					(struct flow_stats __force *)flow->stats[cpu]);
+					(struct sw_flow_stats __force *)flow->stats[cpu]);
 	kmem_cache_free(flow_cache, flow);
 }
 
@@ -168,31 +132,30 @@
 		flow_free(flow);
 }
 
-static void free_buckets(struct flex_array *buckets)
-{
-	flex_array_free(buckets);
-}
-
-
 static void __table_instance_destroy(struct table_instance *ti)
 {
-	free_buckets(ti->buckets);
+	kvfree(ti->buckets);
 	kfree(ti);
 }
 
 static struct table_instance *table_instance_alloc(int new_size)
 {
 	struct table_instance *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
+	int i;
 
 	if (!ti)
 		return NULL;
 
-	ti->buckets = alloc_buckets(new_size);
-
+	ti->buckets = kvmalloc_array(new_size, sizeof(struct hlist_head),
+				     GFP_KERNEL);
 	if (!ti->buckets) {
 		kfree(ti);
 		return NULL;
 	}
+
+	for (i = 0; i < new_size; i++)
+		INIT_HLIST_HEAD(&ti->buckets[i]);
+
 	ti->n_buckets = new_size;
 	ti->node_ver = 0;
 	ti->keep_flows = false;
@@ -249,7 +212,7 @@
 
 	for (i = 0; i < ti->n_buckets; i++) {
 		struct sw_flow *flow;
-		struct hlist_head *head = flex_array_get(ti->buckets, i);
+		struct hlist_head *head = &ti->buckets[i];
 		struct hlist_node *n;
 		int ver = ti->node_ver;
 		int ufid_ver = ufid_ti->node_ver;
@@ -294,7 +257,7 @@
 	ver = ti->node_ver;
 	while (*bucket < ti->n_buckets) {
 		i = 0;
-		head = flex_array_get(ti->buckets, *bucket);
+		head = &ti->buckets[*bucket];
 		hlist_for_each_entry_rcu(flow, head, flow_table.node[ver]) {
 			if (i < *last) {
 				i++;
@@ -313,8 +276,7 @@
 static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash)
 {
 	hash = jhash_1word(hash, ti->hash_seed);
-	return flex_array_get(ti->buckets,
-				(hash & (ti->n_buckets - 1)));
+	return &ti->buckets[hash & (ti->n_buckets - 1)];
 }
 
 static void table_instance_insert(struct table_instance *ti,
@@ -347,9 +309,7 @@
 	/* Insert in new table. */
 	for (i = 0; i < old->n_buckets; i++) {
 		struct sw_flow *flow;
-		struct hlist_head *head;
-
-		head = flex_array_get(old->buckets, i);
+		struct hlist_head *head = &old->buckets[i];
 
 		if (ufid)
 			hlist_for_each_entry(flow, head,
@@ -752,13 +712,13 @@
 
 	flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
 				       + (nr_cpu_ids
-					  * sizeof(struct flow_stats *)),
+					  * sizeof(struct sw_flow_stats *)),
 				       0, 0, NULL);
 	if (flow_cache == NULL)
 		return -ENOMEM;
 
 	flow_stats_cache
-		= kmem_cache_create("sw_flow_stats", sizeof(struct flow_stats),
+		= kmem_cache_create("sw_flow_stats", sizeof(struct sw_flow_stats),
 				    0, SLAB_HWCACHE_ALIGN, NULL);
 	if (flow_stats_cache == NULL) {
 		kmem_cache_destroy(flow_cache);

diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 2dd9900..bc52045 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h

@@ -1,19 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (c) 2007-2013 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #ifndef FLOW_TABLE_H
@@ -29,7 +16,6 @@
 #include <linux/in6.h>
 #include <linux/jiffies.h>
 #include <linux/time.h>
-#include <linux/flex_array.h>
 
 #include <net/inet_ecn.h>
 #include <net/ip_tunnels.h>
@@ -37,7 +23,7 @@
 #include "flow.h"
 
 struct table_instance {
-	struct flex_array *buckets;
+	struct hlist_head *buckets;
 	unsigned int n_buckets;
 	struct rcu_head rcu;
 	int node_ver;

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index c038e02..3323b79 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2017 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -127,7 +124,7 @@
 			      OVS_METER_ATTR_PAD))
 		goto error;
 
-	nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
+	nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS);
 	if (!nla)
 		goto error;
 
@@ -136,7 +133,7 @@
 	for (i = 0; i < meter->n_bands; ++i, ++band) {
 		struct nlattr *band_nla;
 
-		band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
+		band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC);
 		if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS,
 					 sizeof(struct ovs_flow_stats),
 					 &band->stats))
@@ -166,11 +163,11 @@
 	    nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
 		goto nla_put_failure;
 
-	nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
+	nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS);
 	if (!nla)
 		goto nla_put_failure;
 
-	band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
+	band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC);
 	if (!band_nla)
 		goto nla_put_failure;
 	/* Currently only DROP band type is supported. */
@@ -206,8 +203,7 @@
 			return ERR_PTR(-EINVAL);
 
 	/* Allocate and set up the meter before locking anything. */
-	meter = kzalloc(n_bands * sizeof(struct dp_meter_band) +
-			sizeof(*meter), GFP_KERNEL);
+	meter = kzalloc(struct_size(meter, bands, n_bands), GFP_KERNEL);
 	if (!meter)
 		return ERR_PTR(-ENOMEM);
 
@@ -228,9 +224,9 @@
 		struct nlattr *attr[OVS_BAND_ATTR_MAX + 1];
 		u32 band_max_delta_t;
 
-		err = nla_parse((struct nlattr **)&attr, OVS_BAND_ATTR_MAX,
-				nla_data(nla), nla_len(nla), band_policy,
-				NULL);
+		err = nla_parse_deprecated((struct nlattr **)&attr,
+					   OVS_BAND_ATTR_MAX, nla_data(nla),
+					   nla_len(nla), band_policy, NULL);
 		if (err)
 			goto exit_free_meter;
 
@@ -527,27 +523,27 @@
 
 static struct genl_ops dp_meter_genl_ops[] = {
 	{ .cmd = OVS_METER_CMD_FEATURES,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = 0,		  /* OK for unprivileged users. */
-		.policy = meter_policy,
 		.doit = ovs_meter_cmd_features
 	},
 	{ .cmd = OVS_METER_CMD_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
 					   *  privilege.
 					   */
-		.policy = meter_policy,
 		.doit = ovs_meter_cmd_set,
 	},
 	{ .cmd = OVS_METER_CMD_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = 0,		  /* OK for unprivileged users. */
-		.policy = meter_policy,
 		.doit = ovs_meter_cmd_get,
 	},
 	{ .cmd = OVS_METER_CMD_DEL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
 					   *  privilege.
 					   */
-		.policy = meter_policy,
 		.doit = ovs_meter_cmd_del
 	},
 };
@@ -561,6 +557,7 @@
 	.name = OVS_METER_FAMILY,
 	.version = OVS_METER_VERSION,
 	.maxattr = OVS_METER_ATTR_MAX,
+	.policy = meter_policy,
 	.netnsok = true,
 	.parallel_ops = true,
 	.ops = dp_meter_genl_ops,

diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
index 964ace2..f645913 100644
--- a/net/openvswitch/meter.h
+++ b/net/openvswitch/meter.h

@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (c) 2017 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
  */
 
 #ifndef METER_H

diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 5aaf3ba..89a8e15 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (c) 2014 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -93,7 +89,7 @@
 		return ERR_CAST(dev);
 	}
 
-	err = dev_change_flags(dev, dev->flags | IFF_UP);
+	err = dev_change_flags(dev, dev->flags | IFF_UP, NULL);
 	if (err < 0) {
 		rtnl_delete_link(dev);
 		rtnl_unlock();

diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 0e72d95..e6b5e76 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c

@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2007-2014 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -68,7 +55,7 @@
 		return ERR_CAST(dev);
 	}
 
-	err = dev_change_flags(dev, dev->flags | IFF_UP);
+	err = dev_change_flags(dev, dev->flags | IFF_UP, NULL);
 	if (err < 0) {
 		rtnl_delete_link(dev);
 		rtnl_unlock();

diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index bb95c43..58a7b83 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c

@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2007-2012 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #include <linux/if_vlan.h>
@@ -43,7 +30,8 @@
 }
 
 /* Called with rcu_read_lock_bh. */
-static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t
+internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	int len, err;
 
@@ -62,7 +50,7 @@
 	} else {
 		netdev->stats.tx_errors++;
 	}
-	return 0;
+	return NETDEV_TX_OK;
 }
 
 static int internal_dev_open(struct net_device *netdev)
@@ -149,7 +137,7 @@
 	netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
 			      IFF_NO_QUEUE;
 	netdev->needs_free_netdev = true;
-	netdev->priv_destructor = internal_dev_destructor;
+	netdev->priv_destructor = NULL;
 	netdev->ethtool_ops = &internal_dev_ethtool_ops;
 	netdev->rtnl_link_ops = &internal_dev_link_ops;
 
@@ -169,6 +157,7 @@
 {
 	struct vport *vport;
 	struct internal_dev *internal_dev;
+	struct net_device *dev;
 	int err;
 
 	vport = ovs_vport_alloc(0, &ovs_internal_vport_ops, parms);
@@ -177,8 +166,9 @@
 		goto error;
 	}
 
-	vport->dev = alloc_netdev(sizeof(struct internal_dev),
-				  parms->name, NET_NAME_USER, do_setup);
+	dev = alloc_netdev(sizeof(struct internal_dev),
+			   parms->name, NET_NAME_USER, do_setup);
+	vport->dev = dev;
 	if (!vport->dev) {
 		err = -ENOMEM;
 		goto error_free_vport;
@@ -201,6 +191,7 @@
 	err = register_netdevice(vport->dev);
 	if (err)
 		goto error_unlock;
+	vport->dev->priv_destructor = internal_dev_destructor;
 
 	dev_set_promiscuity(vport->dev, 1);
 	rtnl_unlock();
@@ -210,9 +201,9 @@
 
 error_unlock:
 	rtnl_unlock();
-	free_percpu(vport->dev->tstats);
+	free_percpu(dev->tstats);
 error_free_netdev:
-	free_netdev(vport->dev);
+	free_netdev(dev);
 error_free_vport:
 	ovs_vport_free(vport);
 error:
@@ -243,7 +234,7 @@
 	}
 
 	skb_dst_drop(skb);
-	nf_reset(skb);
+	nf_reset_ct(skb);
 	secpath_reset(skb);
 
 	skb->pkt_type = PACKET_HOST;

diff --git a/net/openvswitch/vport-internal_dev.h b/net/openvswitch/vport-internal_dev.h
index 1b179a1..0112d1b 100644
--- a/net/openvswitch/vport-internal_dev.h
+++ b/net/openvswitch/vport-internal_dev.h

@@ -1,19 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (c) 2007-2011 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #ifndef VPORT_INTERNAL_DEV_H

diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 2e5e7a4..57d6436 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c

@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2007-2012 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -84,7 +71,6 @@
 	struct vport *local;
 
 	local = ovs_vport_ovsl(dp, OVSP_LOCAL);
-	BUG_ON(!local);
 	return local->dev;
 }
 
@@ -170,7 +156,7 @@
 static void netdev_destroy(struct vport *vport)
 {
 	rtnl_lock();
-	if (vport->dev->priv_flags & IFF_OVS_DATAPATH)
+	if (netif_is_ovs_port(vport->dev))
 		ovs_netdev_detach_dev(vport);
 	rtnl_unlock();
 
@@ -180,7 +166,7 @@
 void ovs_netdev_tunnel_destroy(struct vport *vport)
 {
 	rtnl_lock();
-	if (vport->dev->priv_flags & IFF_OVS_DATAPATH)
+	if (netif_is_ovs_port(vport->dev))
 		ovs_netdev_detach_dev(vport);
 
 	/* We can be invoked by both explicit vport deletion and
@@ -200,7 +186,7 @@
 /* Returns null if this device is not attached to a datapath. */
 struct vport *ovs_netdev_get_vport(struct net_device *dev)
 {
-	if (likely(dev->priv_flags & IFF_OVS_DATAPATH))
+	if (likely(netif_is_ovs_port(dev)))
 		return (struct vport *)
 			rcu_dereference_rtnl(dev->rx_handler_data);
 	else

diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
index 19e29c1..c5d83a4 100644
--- a/net/openvswitch/vport-netdev.h
+++ b/net/openvswitch/vport-netdev.h

@@ -1,19 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (c) 2007-2011 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #ifndef VPORT_NETDEV_H

diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 7e6301b..188e9c1 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c

@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2014 Nicira, Inc.
  * Copyright (c) 2013 Cisco Systems, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #include <linux/kernel.h>
@@ -43,7 +30,7 @@
 	if (vxlan->cfg.flags & VXLAN_F_GBP) {
 		struct nlattr *exts;
 
-		exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION);
+		exts = nla_nest_start_noflag(skb, OVS_TUNNEL_ATTR_EXTENSION);
 		if (!exts)
 			return -EMSGSIZE;
 
@@ -70,8 +57,8 @@
 	if (nla_len(attr) < sizeof(struct nlattr))
 		return -EINVAL;
 
-	err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy,
-			       NULL);
+	err = nla_parse_nested_deprecated(exts, OVS_VXLAN_EXT_MAX, attr,
+					  exts_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -131,7 +118,7 @@
 		return ERR_CAST(dev);
 	}
 
-	err = dev_change_flags(dev, dev->flags | IFF_UP);
+	err = dev_change_flags(dev, dev->flags | IFF_UP, NULL);
 	if (err < 0) {
 		rtnl_delete_link(dev);
 		rtnl_unlock();

diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 19f6765..3fc38d1 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c

@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2007-2014 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #include <linux/etherdevice.h>
@@ -261,8 +248,6 @@
  */
 void ovs_vport_del(struct vport *vport)
 {
-	ASSERT_OVSL();
-
 	hlist_del_rcu(&vport->hash_node);
 	module_put(vport->ops->owner);
 	vport->ops->destroy(vport);
@@ -319,7 +304,7 @@
 	if (!vport->ops->get_options)
 		return 0;
 
-	nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS);
+	nla = nla_nest_start_noflag(skb, OVS_VPORT_ATTR_OPTIONS);
 	if (!nla)
 		return -EMSGSIZE;
 

diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index cda66c2..1eb7495 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h

@@ -1,19 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (c) 2007-2012 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #ifndef VPORT_H

diff --git a/net/packet/Kconfig b/net/packet/Kconfig
index cc55b35..b4abad1 100644
--- a/net/packet/Kconfig
+++ b/net/packet/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Packet configuration
 #

diff --git a/net/packet/Makefile b/net/packet/Makefile
index 9df6134..97d502e 100644
--- a/net/packet/Makefile
+++ b/net/packet/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the packet AF.
 #

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 6477b13..82a50e8 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -43,13 +44,6 @@
  *		Chetan Loke	:	Implemented TPACKET_V3 block abstraction
  *					layer.
  *					Copyright (C) 2011, <lokec@ccs.neu.edu>
- *
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  */
 
 #include <linux/types.h>
@@ -275,24 +269,22 @@
 	return po->xmit == packet_direct_xmit;
 }
 
-static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb,
-				  struct net_device *sb_dev)
-{
-	return dev_pick_tx_cpu_id(dev, skb, sb_dev, NULL);
-}
-
 static u16 packet_pick_tx_queue(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
 	const struct net_device_ops *ops = dev->netdev_ops;
+	int cpu = raw_smp_processor_id();
 	u16 queue_index;
 
+#ifdef CONFIG_XPS
+	skb->sender_cpu = cpu + 1;
+#endif
+	skb_record_rx_queue(skb, cpu % dev->real_num_tx_queues);
 	if (ops->ndo_select_queue) {
-		queue_index = ops->ndo_select_queue(dev, skb, NULL,
-						    __packet_pick_tx_queue);
+		queue_index = ops->ndo_select_queue(dev, skb, NULL);
 		queue_index = netdev_cap_txqueue(dev, queue_index);
 	} else {
-		queue_index = __packet_pick_tx_queue(dev, skb, NULL);
+		queue_index = netdev_pick_tx(dev, skb, NULL);
 	}
 
 	return queue_index;
@@ -392,7 +384,7 @@
 	smp_wmb();
 }
 
-static int __packet_get_status(struct packet_sock *po, void *frame)
+static int __packet_get_status(const struct packet_sock *po, void *frame)
 {
 	union tpacket_uhdr h;
 
@@ -468,10 +460,10 @@
 	return ts_status;
 }
 
-static void *packet_lookup_frame(struct packet_sock *po,
-		struct packet_ring_buffer *rb,
-		unsigned int position,
-		int status)
+static void *packet_lookup_frame(const struct packet_sock *po,
+				 const struct packet_ring_buffer *rb,
+				 unsigned int position,
+				 int status)
 {
 	unsigned int pg_vec_pos, frame_offset;
 	union tpacket_uhdr h;
@@ -766,7 +758,7 @@
 	struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
 	struct sock *sk = &po->sk;
 
-	if (po->stats.stats3.tp_drops)
+	if (atomic_read(&po->tp_drops))
 		status |= TP_STATUS_LOSING;
 
 	last_pkt = (struct tpacket3_hdr *)pkc1->prev;
@@ -1011,7 +1003,6 @@
 /* Assumes caller has the sk->rx_queue.lock */
 static void *__packet_lookup_frame_in_block(struct packet_sock *po,
 					    struct sk_buff *skb,
-						int status,
 					    unsigned int len
 					    )
 {
@@ -1083,7 +1074,7 @@
 					po->rx_ring.head, status);
 		return curr;
 	case TPACKET_V3:
-		return __packet_lookup_frame_in_block(po, skb, status, len);
+		return __packet_lookup_frame_in_block(po, skb, len);
 	default:
 		WARN(1, "TPACKET version not supported\n");
 		BUG();
@@ -1091,10 +1082,10 @@
 	}
 }
 
-static void *prb_lookup_block(struct packet_sock *po,
-				     struct packet_ring_buffer *rb,
-				     unsigned int idx,
-				     int status)
+static void *prb_lookup_block(const struct packet_sock *po,
+			      const struct packet_ring_buffer *rb,
+			      unsigned int idx,
+			      int status)
 {
 	struct tpacket_kbdq_core *pkc  = GET_PBDQC_FROM_RB(rb);
 	struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx);
@@ -1207,12 +1198,12 @@
 #define ROOM_LOW	0x1
 #define ROOM_NORMAL	0x2
 
-static bool __tpacket_has_room(struct packet_sock *po, int pow_off)
+static bool __tpacket_has_room(const struct packet_sock *po, int pow_off)
 {
 	int idx, len;
 
-	len = po->rx_ring.frame_max + 1;
-	idx = po->rx_ring.head;
+	len = READ_ONCE(po->rx_ring.frame_max) + 1;
+	idx = READ_ONCE(po->rx_ring.head);
 	if (pow_off)
 		idx += len >> pow_off;
 	if (idx >= len)
@@ -1220,12 +1211,12 @@
 	return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
 }
 
-static bool __tpacket_v3_has_room(struct packet_sock *po, int pow_off)
+static bool __tpacket_v3_has_room(const struct packet_sock *po, int pow_off)
 {
 	int idx, len;
 
-	len = po->rx_ring.prb_bdqc.knum_blocks;
-	idx = po->rx_ring.prb_bdqc.kactive_blk_num;
+	len = READ_ONCE(po->rx_ring.prb_bdqc.knum_blocks);
+	idx = READ_ONCE(po->rx_ring.prb_bdqc.kactive_blk_num);
 	if (pow_off)
 		idx += len >> pow_off;
 	if (idx >= len)
@@ -1233,15 +1224,18 @@
 	return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
 }
 
-static int __packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
+static int __packet_rcv_has_room(const struct packet_sock *po,
+				 const struct sk_buff *skb)
 {
-	struct sock *sk = &po->sk;
+	const struct sock *sk = &po->sk;
 	int ret = ROOM_NONE;
 
 	if (po->prot_hook.func != tpacket_rcv) {
-		int avail = sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc)
-					  - (skb ? skb->truesize : 0);
-		if (avail > (sk->sk_rcvbuf >> ROOM_POW_OFF))
+		int rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+		int avail = rcvbuf - atomic_read(&sk->sk_rmem_alloc)
+				   - (skb ? skb->truesize : 0);
+
+		if (avail > (rcvbuf >> ROOM_POW_OFF))
 			return ROOM_NORMAL;
 		else if (avail > 0)
 			return ROOM_LOW;
@@ -1266,19 +1260,24 @@
 
 static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
 {
-	int ret;
-	bool has_room;
+	int pressure, ret;
 
-	spin_lock_bh(&po->sk.sk_receive_queue.lock);
 	ret = __packet_rcv_has_room(po, skb);
-	has_room = ret == ROOM_NORMAL;
-	if (po->pressure == has_room)
-		po->pressure = !has_room;
-	spin_unlock_bh(&po->sk.sk_receive_queue.lock);
+	pressure = ret != ROOM_NORMAL;
+
+	if (READ_ONCE(po->pressure) != pressure)
+		WRITE_ONCE(po->pressure, pressure);
 
 	return ret;
 }
 
+static void packet_rcv_try_clear_pressure(struct packet_sock *po)
+{
+	if (READ_ONCE(po->pressure) &&
+	    __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
+		WRITE_ONCE(po->pressure,  0);
+}
+
 static void packet_sock_destruct(struct sock *sk)
 {
 	skb_queue_purge(&sk->sk_error_queue);
@@ -1359,7 +1358,7 @@
 	i = j = min_t(int, po->rollover->sock, num - 1);
 	do {
 		po_next = pkt_sk(f->arr[i]);
-		if (po_next != po_skip && !po_next->pressure &&
+		if (po_next != po_skip && !READ_ONCE(po_next->pressure) &&
 		    packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) {
 			if (i != j)
 				po->rollover->sock = i;
@@ -1822,7 +1821,7 @@
 	skb_dst_drop(skb);
 
 	/* drop conntrack reference */
-	nf_reset(skb);
+	nf_reset_ct(skb);
 
 	spkt = &PACKET_SKB_CB(skb)->sa.pkt;
 
@@ -1850,6 +1849,16 @@
 	return 0;
 }
 
+static void packet_parse_headers(struct sk_buff *skb, struct socket *sock)
+{
+	if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) &&
+	    sock->type == SOCK_RAW) {
+		skb_reset_mac_header(skb);
+		skb->protocol = dev_parse_header_protocol(skb);
+	}
+
+	skb_probe_transport_header(skb);
+}
 
 /*
  *	Output a raw packet to a device layer. This bypasses all the other
@@ -1965,12 +1974,12 @@
 	skb->mark = sk->sk_mark;
 	skb->tstamp = sockc.transmit_time;
 
-	sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags);
+	skb_setup_tx_timestamp(skb, sockc.tsflags);
 
 	if (unlikely(extra_len == 4))
 		skb->no_fcs = 1;
 
-	skb_probe_transport_header(skb, 0);
+	packet_parse_headers(skb, sock);
 
 	dev_queue_xmit(skb);
 	rcu_read_unlock();
@@ -2112,7 +2121,7 @@
 	skb_dst_drop(skb);
 
 	/* drop conntrack reference */
-	nf_reset(skb);
+	nf_reset_ct(skb);
 
 	spin_lock(&sk->sk_receive_queue.lock);
 	po->stats.stats1.tp_packets++;
@@ -2124,10 +2133,8 @@
 
 drop_n_acct:
 	is_drop_n_account = true;
-	spin_lock(&sk->sk_receive_queue.lock);
-	po->stats.stats1.tp_drops++;
+	atomic_inc(&po->tp_drops);
 	atomic_inc(&sk->sk_drops);
-	spin_unlock(&sk->sk_receive_queue.lock);
 
 drop_n_restore:
 	if (skb_head != skb->data && skb_shared(skb)) {
@@ -2191,6 +2198,12 @@
 	if (!res)
 		goto drop_n_restore;
 
+	/* If we are flooded, just give up */
+	if (__packet_rcv_has_room(po, skb) == ROOM_NONE) {
+		atomic_inc(&po->tp_drops);
+		goto drop_n_restore;
+	}
+
 	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		status |= TP_STATUS_CSUMNOTREADY;
 	else if (skb->pkt_type != PACKET_OUTGOING &&
@@ -2261,7 +2274,7 @@
 	 * Anyways, moving it for V1/V2 only as V3 doesn't need this
 	 * at packet level.
 	 */
-		if (po->stats.stats1.tp_drops)
+		if (atomic_read(&po->tp_drops))
 			status |= TP_STATUS_LOSING;
 	}
 
@@ -2377,9 +2390,9 @@
 	return 0;
 
 drop_n_account:
-	is_drop_n_account = true;
-	po->stats.stats1.tp_drops++;
 	spin_unlock(&sk->sk_receive_queue.lock);
+	atomic_inc(&po->tp_drops);
+	is_drop_n_account = true;
 
 	sk->sk_data_ready(sk);
 	kfree_skb(copy_skb);
@@ -2399,20 +2412,14 @@
 
 		ts = __packet_set_timestamp(po, ph, skb);
 		__packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
+
+		if (!packet_read_pending(&po->tx_ring))
+			complete(&po->skb_completion);
 	}
 
 	sock_wfree(skb);
 }
 
-static void tpacket_set_protocol(const struct net_device *dev,
-				 struct sk_buff *skb)
-{
-	if (dev->type == ARPHRD_ETHER) {
-		skb_reset_mac_header(skb);
-		skb->protocol = eth_hdr(skb)->h_proto;
-	}
-}
-
 static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
 {
 	if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
@@ -2460,7 +2467,7 @@
 	skb->priority = po->sk.sk_priority;
 	skb->mark = po->sk.sk_mark;
 	skb->tstamp = sockc->transmit_time;
-	sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags);
+	skb_setup_tx_timestamp(skb, sockc->tsflags);
 	skb_zcopy_set_nouarg(skb, ph.raw);
 
 	skb_reserve(skb, hlen);
@@ -2483,8 +2490,6 @@
 			return err;
 		if (!dev_validate_header(dev, skb->data, hdrlen))
 			return -EINVAL;
-		if (!skb->protocol)
-			tpacket_set_protocol(dev, skb);
 
 		data += hdrlen;
 		to_write -= hdrlen;
@@ -2519,7 +2524,7 @@
 		len = ((to_write > len_max) ? len_max : to_write);
 	}
 
-	skb_probe_transport_header(skb, 0);
+	packet_parse_headers(skb, sock);
 
 	return tp_len;
 }
@@ -2594,7 +2599,7 @@
 
 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 {
-	struct sk_buff *skb;
+	struct sk_buff *skb = NULL;
 	struct net_device *dev;
 	struct virtio_net_hdr *vnet_hdr = NULL;
 	struct sockcm_cookie sockc;
@@ -2603,19 +2608,26 @@
 	void *ph;
 	DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
 	bool need_wait = !(msg->msg_flags & MSG_DONTWAIT);
+	unsigned char *addr = NULL;
 	int tp_len, size_max;
-	unsigned char *addr;
 	void *data;
 	int len_sum = 0;
 	int status = TP_STATUS_AVAILABLE;
 	int hlen, tlen, copylen = 0;
+	long timeo = 0;
 
 	mutex_lock(&po->pg_vec_lock);
 
+	/* packet_sendmsg() check on tx_ring.pg_vec was lockless,
+	 * we need to confirm it under protection of pg_vec_lock.
+	 */
+	if (unlikely(!po->tx_ring.pg_vec)) {
+		err = -EBUSY;
+		goto out;
+	}
 	if (likely(saddr == NULL)) {
 		dev	= packet_cached_dev_get(po);
 		proto	= po->num;
-		addr	= NULL;
 	} else {
 		err = -EINVAL;
 		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@ -2625,8 +2637,13 @@
 						sll_addr)))
 			goto out;
 		proto	= saddr->sll_protocol;
-		addr	= saddr->sll_addr;
 		dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
+		if (po->sk.sk_socket->type == SOCK_DGRAM) {
+			if (dev && msg->msg_namelen < dev->addr_len +
+				   offsetof(struct sockaddr_ll, sll_addr))
+				goto out_put;
+			addr = saddr->sll_addr;
+		}
 	}
 
 	err = -ENXIO;
@@ -2651,12 +2668,21 @@
 	if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
 		size_max = dev->mtu + reserve + VLAN_HLEN;
 
+	reinit_completion(&po->skb_completion);
+
 	do {
 		ph = packet_current_frame(po, &po->tx_ring,
 					  TP_STATUS_SEND_REQUEST);
 		if (unlikely(ph == NULL)) {
-			if (need_wait && need_resched())
-				schedule();
+			if (need_wait && skb) {
+				timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT);
+				timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo);
+				if (timeo <= 0) {
+					err = !timeo ? -ETIMEDOUT : -ERESTARTSYS;
+					goto out_put;
+				}
+			}
+			/* check for additional frames */
 			continue;
 		}
 
@@ -2798,7 +2824,7 @@
 	struct sk_buff *skb;
 	struct net_device *dev;
 	__be16 proto;
-	unsigned char *addr;
+	unsigned char *addr = NULL;
 	int err, reserve = 0;
 	struct sockcm_cookie sockc;
 	struct virtio_net_hdr vnet_hdr = { 0 };
@@ -2815,7 +2841,6 @@
 	if (likely(saddr == NULL)) {
 		dev	= packet_cached_dev_get(po);
 		proto	= po->num;
-		addr	= NULL;
 	} else {
 		err = -EINVAL;
 		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@ -2823,8 +2848,13 @@
 		if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
 			goto out;
 		proto	= saddr->sll_protocol;
-		addr	= saddr->sll_addr;
 		dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
+		if (sock->type == SOCK_DGRAM) {
+			if (dev && msg->msg_namelen < dev->addr_len +
+				   offsetof(struct sockaddr_ll, sll_addr))
+				goto out_unlock;
+			addr = saddr->sll_addr;
+		}
 	}
 
 	err = -ENXIO;
@@ -2883,7 +2913,8 @@
 			goto out_free;
 	} else if (reserve) {
 		skb_reserve(skb, -reserve);
-		if (len < reserve)
+		if (len < reserve + sizeof(struct ipv6hdr) &&
+		    dev->min_header_len != dev->hard_header_len)
 			skb_reset_network_header(skb);
 	}
 
@@ -2898,7 +2929,7 @@
 		goto out_free;
 	}
 
-	sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags);
+	skb_setup_tx_timestamp(skb, sockc.tsflags);
 
 	if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) &&
 	    !packet_extra_vlan_len_allowed(dev, skb)) {
@@ -2920,7 +2951,7 @@
 		virtio_net_hdr_set_proto(skb, &vnet_hdr);
 	}
 
-	skb_probe_transport_header(skb, reserve);
+	packet_parse_headers(skb, sock);
 
 	if (unlikely(extra_len == 4))
 		skb->no_fcs = 1;
@@ -3008,8 +3039,8 @@
 
 	synchronize_net();
 
+	kfree(po->rollover);
 	if (f) {
-		kfree(po->rollover);
 		fanout_release_data(f);
 		kfree(f);
 	}
@@ -3207,6 +3238,7 @@
 	sock_init_data(sock, sk);
 
 	po = pkt_sk(sk);
+	init_completion(&po->skb_completion);
 	sk->sk_family = PF_PACKET;
 	po->num = proto;
 	po->xmit = dev_queue_xmit;
@@ -3240,7 +3272,7 @@
 	}
 
 	mutex_lock(&net->packet.sklist_lock);
-	sk_add_node_rcu(sk, &net->packet.sklist);
+	sk_add_node_tail_rcu(sk, &net->packet.sklist);
 	mutex_unlock(&net->packet.sklist_lock);
 
 	preempt_disable();
@@ -3304,8 +3336,7 @@
 	if (skb == NULL)
 		goto out;
 
-	if (pkt_sk(sk)->pressure)
-		packet_rcv_has_room(pkt_sk(sk), NULL);
+	packet_rcv_try_clear_pressure(pkt_sk(sk));
 
 	if (pkt_sk(sk)->has_vnet_hdr) {
 		err = packet_rcv_vnet(msg, skb, &len);
@@ -3340,20 +3371,29 @@
 	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (msg->msg_name) {
+		int copy_len;
+
 		/* If the address length field is there to be filled
 		 * in, we fill it in now.
 		 */
 		if (sock->type == SOCK_PACKET) {
 			__sockaddr_check_size(sizeof(struct sockaddr_pkt));
 			msg->msg_namelen = sizeof(struct sockaddr_pkt);
+			copy_len = msg->msg_namelen;
 		} else {
 			struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
 
 			msg->msg_namelen = sll->sll_halen +
 				offsetof(struct sockaddr_ll, sll_addr);
+			copy_len = msg->msg_namelen;
+			if (msg->msg_namelen < sizeof(struct sockaddr_ll)) {
+				memset(msg->msg_name +
+				       offsetof(struct sockaddr_ll, sll_addr),
+				       0, sizeof(sll->sll_addr));
+				msg->msg_namelen = sizeof(struct sockaddr_ll);
+			}
 		}
-		memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
-		       msg->msg_namelen);
+		memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
 	}
 
 	if (pkt_sk(sk)->auxdata) {
@@ -3808,6 +3848,20 @@
 
 		return fanout_set_data(po, optval, optlen);
 	}
+	case PACKET_IGNORE_OUTGOING:
+	{
+		int val;
+
+		if (optlen != sizeof(val))
+			return -EINVAL;
+		if (copy_from_user(&val, optval, sizeof(val)))
+			return -EFAULT;
+		if (val < 0 || val > 1)
+			return -EINVAL;
+
+		po->prot_hook.ignore_outgoing = !!val;
+		return 0;
+	}
 	case PACKET_TX_HAS_OFF:
 	{
 		unsigned int val;
@@ -3854,6 +3908,7 @@
 	void *data = &val;
 	union tpacket_stats_u st;
 	struct tpacket_rollover_stats rstats;
+	int drops;
 
 	if (level != SOL_PACKET)
 		return -ENOPROTOOPT;
@@ -3870,14 +3925,17 @@
 		memcpy(&st, &po->stats, sizeof(st));
 		memset(&po->stats, 0, sizeof(po->stats));
 		spin_unlock_bh(&sk->sk_receive_queue.lock);
+		drops = atomic_xchg(&po->tp_drops, 0);
 
 		if (po->tp_version == TPACKET_V3) {
 			lv = sizeof(struct tpacket_stats_v3);
-			st.stats3.tp_packets += st.stats3.tp_drops;
+			st.stats3.tp_drops = drops;
+			st.stats3.tp_packets += drops;
 			data = &st.stats3;
 		} else {
 			lv = sizeof(struct tpacket_stats);
-			st.stats1.tp_packets += st.stats1.tp_drops;
+			st.stats1.tp_drops = drops;
+			st.stats1.tp_packets += drops;
 			data = &st.stats1;
 		}
 
@@ -3931,6 +3989,9 @@
 			((u32)po->fanout->flags << 24)) :
 		       0);
 		break;
+	case PACKET_IGNORE_OUTGOING:
+		val = po->prot_hook.ignore_outgoing;
+		break;
 	case PACKET_ROLLOVER_STATS:
 		if (!po->rollover)
 			return -EINVAL;
@@ -4056,11 +4117,6 @@
 		spin_unlock_bh(&sk->sk_receive_queue.lock);
 		return put_user(amount, (int __user *)arg);
 	}
-	case SIOCGSTAMP:
-		return sock_get_timestamp(sk, (struct timeval __user *)arg);
-	case SIOCGSTAMPNS:
-		return sock_get_timestampns(sk, (struct timespec __user *)arg);
-
 #ifdef CONFIG_INET
 	case SIOCADDRT:
 	case SIOCDELRT:
@@ -4098,8 +4154,7 @@
 			TP_STATUS_KERNEL))
 			mask |= EPOLLIN | EPOLLRDNORM;
 	}
-	if (po->pressure && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
-		po->pressure = 0;
+	packet_rcv_try_clear_pressure(po);
 	spin_unlock_bh(&sk->sk_receive_queue.lock);
 	spin_lock_bh(&sk->sk_write_queue.lock);
 	if (po->tx_ring.pg_vec) {
@@ -4189,7 +4244,7 @@
 	struct pgv *pg_vec;
 	int i;
 
-	pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
+	pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN);
 	if (unlikely(!pg_vec))
 		goto out;
 
@@ -4270,7 +4325,7 @@
 		rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
 		if (unlikely(rb->frames_per_block == 0))
 			goto out;
-		if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr))
+		if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr))
 			goto out;
 		if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
 					req->tp_frame_nr))
@@ -4293,7 +4348,7 @@
 				    req3->tp_sizeof_priv ||
 				    req3->tp_feature_req_word) {
 					err = -EINVAL;
-					goto out;
+					goto out_free_pg_vec;
 				}
 			}
 			break;
@@ -4357,6 +4412,7 @@
 			prb_shutdown_retire_blk_timer(po, rb_queue);
 	}
 
+out_free_pg_vec:
 	if (pg_vec)
 		free_pg_vec(pg_vec, order, req->tp_block_nr);
 out:
@@ -4436,6 +4492,7 @@
 	.getname =	packet_getname_spkt,
 	.poll =		datagram_poll,
 	.ioctl =	packet_ioctl,
+	.gettstamp =	sock_gettstamp,
 	.listen =	sock_no_listen,
 	.shutdown =	sock_no_shutdown,
 	.setsockopt =	sock_no_setsockopt,
@@ -4457,6 +4514,7 @@
 	.getname =	packet_getname,
 	.poll =		packet_poll,
 	.ioctl =	packet_ioctl,
+	.gettstamp =	sock_gettstamp,
 	.listen =	sock_no_listen,
 	.shutdown =	sock_no_shutdown,
 	.setsockopt =	packet_setsockopt,
@@ -4569,14 +4627,29 @@
 
 static int __init packet_init(void)
 {
-	int rc = proto_register(&packet_proto, 0);
+	int rc;
 
-	if (rc != 0)
+	rc = proto_register(&packet_proto, 0);
+	if (rc)
 		goto out;
+	rc = sock_register(&packet_family_ops);
+	if (rc)
+		goto out_proto;
+	rc = register_pernet_subsys(&packet_net_ops);
+	if (rc)
+		goto out_sock;
+	rc = register_netdevice_notifier(&packet_netdev_notifier);
+	if (rc)
+		goto out_pernet;
 
-	sock_register(&packet_family_ops);
-	register_pernet_subsys(&packet_net_ops);
-	register_netdevice_notifier(&packet_netdev_notifier);
+	return 0;
+
+out_pernet:
+	unregister_pernet_subsys(&packet_net_ops);
+out_sock:
+	sock_unregister(PF_PACKET);
+out_proto:
+	proto_unregister(&packet_proto);
 out:
 	return rc;
 }

diff --git a/net/packet/diag.c b/net/packet/diag.c
index 7ef1c88..07812ae 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/module.h>
 #include <linux/sock_diag.h>
 #include <linux/net.h>
@@ -39,7 +40,7 @@
 	struct nlattr *mca;
 	struct packet_mclist *ml;
 
-	mca = nla_nest_start(nlskb, PACKET_DIAG_MCLIST);
+	mca = nla_nest_start_noflag(nlskb, PACKET_DIAG_MCLIST);
 	if (!mca)
 		return -EMSGSIZE;
 

diff --git a/net/packet/internal.h b/net/packet/internal.h
index 3bb7c5f..82fb2b1 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h

@@ -128,9 +128,11 @@
 	unsigned int		tp_hdrlen;
 	unsigned int		tp_reserve;
 	unsigned int		tp_tstamp;
+	struct completion	skb_completion;
 	struct net_device __rcu	*cached_dev;
 	int			(*xmit)(struct sk_buff *skb);
 	struct packet_type	prot_hook ____cacheline_aligned_in_smp;
+	atomic_t		tp_drops ____cacheline_aligned_in_smp;
 };
 
 static struct packet_sock *pkt_sk(struct sock *sk)

diff --git a/net/phonet/Kconfig b/net/phonet/Kconfig
index 6ec7d55..07f2c21 100644
--- a/net/phonet/Kconfig
+++ b/net/phonet/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Phonet protocol
 #

diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 3b0ef69..ca6ae4c 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * File: af_phonet.c
  *
@@ -7,20 +8,6 @@
  *
  * Authors: Sakari Ailus <sakari.ailus@nokia.com>
  *          Rémi Denis-Courmont
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
  */
 
 #include <linux/kernel.h>

diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index b44fb90..393e6aa 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * File: datagram.c
  *
@@ -7,20 +8,6 @@
  *
  * Authors: Sakari Ailus <sakari.ailus@nokia.com>
  *          Rémi Denis-Courmont
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
  */
 
 #include <linux/kernel.h>

diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c
index 2c93379..1f5df04 100644
--- a/net/phonet/pep-gprs.c
+++ b/net/phonet/pep-gprs.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * File: pep-gprs.c
  *
@@ -6,20 +7,6 @@
  * Copyright (C) 2008 Nokia Corporation.
  *
  * Author: Rémi Denis-Courmont
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
  */
 
 #include <linux/kernel.h>

diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 9fc76b1..4577e43 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * File: pep.c
  *
@@ -6,20 +7,6 @@
  * Copyright (C) 2008 Nokia Corporation.
  *
  * Author: Rémi Denis-Courmont
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
  */
 
 #include <linux/kernel.h>
@@ -132,7 +119,7 @@
 	ph->utid = 0;
 	ph->message_id = id;
 	ph->pipe_handle = pn->pipe_handle;
-	ph->data[0] = code;
+	ph->error_code = code;
 	return pn_skb_send(sk, skb, NULL);
 }
 
@@ -153,7 +140,7 @@
 	ph->utid = id; /* whatever */
 	ph->message_id = id;
 	ph->pipe_handle = pn->pipe_handle;
-	ph->data[0] = code;
+	ph->error_code = code;
 	return pn_skb_send(sk, skb, NULL);
 }
 
@@ -208,7 +195,7 @@
 	struct pnpipehdr *ph;
 	struct sockaddr_pn dst;
 	u8 data[4] = {
-		oph->data[0], /* PEP type */
+		oph->pep_type, /* PEP type */
 		code, /* error code, at an unusual offset */
 		PAD, PAD,
 	};
@@ -221,7 +208,7 @@
 	ph->utid = oph->utid;
 	ph->message_id = PNS_PEP_CTRL_RESP;
 	ph->pipe_handle = oph->pipe_handle;
-	ph->data[0] = oph->data[1]; /* CTRL id */
+	ph->data0 = oph->data[0]; /* CTRL id */
 
 	pn_skb_get_src_sockaddr(oskb, &dst);
 	return pn_skb_send(sk, skb, &dst);
@@ -272,17 +259,17 @@
 		return -EINVAL;
 
 	hdr = pnp_hdr(skb);
-	if (hdr->data[0] != PN_PEP_TYPE_COMMON) {
+	if (hdr->pep_type != PN_PEP_TYPE_COMMON) {
 		net_dbg_ratelimited("Phonet unknown PEP type: %u\n",
-				    (unsigned int)hdr->data[0]);
+				    (unsigned int)hdr->pep_type);
 		return -EOPNOTSUPP;
 	}
 
-	switch (hdr->data[1]) {
+	switch (hdr->data[0]) {
 	case PN_PEP_IND_FLOW_CONTROL:
 		switch (pn->tx_fc) {
 		case PN_LEGACY_FLOW_CONTROL:
-			switch (hdr->data[4]) {
+			switch (hdr->data[3]) {
 			case PEP_IND_BUSY:
 				atomic_set(&pn->tx_credits, 0);
 				break;
@@ -292,7 +279,7 @@
 			}
 			break;
 		case PN_ONE_CREDIT_FLOW_CONTROL:
-			if (hdr->data[4] == PEP_IND_READY)
+			if (hdr->data[3] == PEP_IND_READY)
 				atomic_set(&pn->tx_credits, wake = 1);
 			break;
 		}
@@ -301,12 +288,12 @@
 	case PN_PEP_IND_ID_MCFC_GRANT_CREDITS:
 		if (pn->tx_fc != PN_MULTI_CREDIT_FLOW_CONTROL)
 			break;
-		atomic_add(wake = hdr->data[4], &pn->tx_credits);
+		atomic_add(wake = hdr->data[3], &pn->tx_credits);
 		break;
 
 	default:
 		net_dbg_ratelimited("Phonet unknown PEP indication: %u\n",
-				    (unsigned int)hdr->data[1]);
+				    (unsigned int)hdr->data[0]);
 		return -EOPNOTSUPP;
 	}
 	if (wake)
@@ -318,7 +305,7 @@
 {
 	struct pep_sock *pn = pep_sk(sk);
 	struct pnpipehdr *hdr = pnp_hdr(skb);
-	u8 n_sb = hdr->data[0];
+	u8 n_sb = hdr->data0;
 
 	pn->rx_fc = pn->tx_fc = PN_LEGACY_FLOW_CONTROL;
 	__skb_pull(skb, sizeof(*hdr));
@@ -506,7 +493,7 @@
 		return -ECONNREFUSED;
 
 	/* Parse sub-blocks */
-	n_sb = hdr->data[4];
+	n_sb = hdr->data[3];
 	while (n_sb > 0) {
 		u8 type, buf[6], len = sizeof(buf);
 		const u8 *data = pep_get_sb(skb, &type, &len, buf);
@@ -739,7 +726,7 @@
 	ph->utid = 0;
 	ph->message_id = PNS_PIPE_REMOVE_REQ;
 	ph->pipe_handle = pn->pipe_handle;
-	ph->data[0] = PAD;
+	ph->data0 = PAD;
 	return pn_skb_send(sk, skb, NULL);
 }
 
@@ -817,7 +804,7 @@
 	peer_type = hdr->other_pep_type << 8;
 
 	/* Parse sub-blocks (options) */
-	n_sb = hdr->data[4];
+	n_sb = hdr->data[3];
 	while (n_sb > 0) {
 		u8 type, buf[1], len = sizeof(buf);
 		const u8 *data = pep_get_sb(skb, &type, &len, buf);
@@ -1109,7 +1096,7 @@
 	ph->utid = 0;
 	if (pn->aligned) {
 		ph->message_id = PNS_PIPE_ALIGNED_DATA;
-		ph->data[0] = 0; /* padding */
+		ph->data0 = 0; /* padding */
 	} else
 		ph->message_id = PNS_PIPE_DATA;
 	ph->pipe_handle = pn->pipe_handle;

diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 6cb4f60..49bd76a 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * File: pn_dev.c
  *
@@ -7,20 +8,6 @@
  *
  * Authors: Sakari Ailus <sakari.ailus@nokia.com>
  *          Rémi Denis-Courmont
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
  */
 
 #include <linux/kernel.h>

diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 871eaf2..59aebe2 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * File: pn_netlink.c
  *
@@ -7,20 +8,6 @@
  *
  * Authors: Sakari Ailus <sakari.ailus@nokia.com>
  *          Remi Denis-Courmont
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
  */
 
 #include <linux/kernel.h>
@@ -79,8 +66,8 @@
 
 	ASSERT_RTNL();
 
-	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_phonet_policy,
-			  extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
+				     ifa_phonet_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -246,8 +233,8 @@
 
 	ASSERT_RTNL();
 
-	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_phonet_policy,
-			  extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
+				     rtm_phonet_policy, extack);
 	if (err < 0)
 		return err;
 

diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 3018799..76d499f 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * File: socket.c
  *
@@ -7,20 +8,6 @@
  *
  * Authors: Sakari Ailus <sakari.ailus@nokia.com>
  *          Rémi Denis-Courmont
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
  */
 
 #include <linux/gfp.h>
@@ -351,9 +338,9 @@
 
 	if (sk->sk_state == TCP_CLOSE)
 		return EPOLLERR;
-	if (!skb_queue_empty(&sk->sk_receive_queue))
+	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
 		mask |= EPOLLIN | EPOLLRDNORM;
-	if (!skb_queue_empty(&pn->ctrlreq_queue))
+	if (!skb_queue_empty_lockless(&pn->ctrlreq_queue))
 		mask |= EPOLLPRI;
 	if (!mask && sk->sk_state == TCP_CLOSE_WAIT)
 		return EPOLLHUP;
@@ -607,7 +594,7 @@
 		struct pn_sock *pn = pn_sk(sk);
 
 		seq_printf(seq, "%2d %04X:%04X:%02X %02X %08X:%08X %5d %lu "
-			"%d %pK %d",
+			"%d %pK %u",
 			sk->sk_protocol, pn->sobject, pn->dobject,
 			pn->resource, sk->sk_state,
 			sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk),

diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c
index c02a8c4..251e750 100644
--- a/net/phonet/sysctl.c
+++ b/net/phonet/sysctl.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * File: sysctl.c
  *
@@ -6,20 +7,6 @@
  * Copyright (C) 2008 Nokia Corporation.
  *
  * Author: Rémi Denis-Courmont
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
  */
 
 #include <linux/seqlock.h>

diff --git a/net/psample/Kconfig b/net/psample/Kconfig
index d850246..028f514 100644
--- a/net/psample/Kconfig
+++ b/net/psample/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # psample packet sampling configuration
 #

diff --git a/net/psample/Makefile b/net/psample/Makefile
index 609b0a7..a04367b 100644
--- a/net/psample/Makefile
+++ b/net/psample/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the psample netlink channel
 #

diff --git a/net/psample/psample.c b/net/psample/psample.c
index 64f9562..6f2fbc6 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * net/psample/psample.c - Netlink channel for packet sampling
  * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/types.h>
@@ -76,7 +73,7 @@
 	int idx = 0;
 	int err;
 
-	spin_lock(&psample_groups_lock);
+	spin_lock_bh(&psample_groups_lock);
 	list_for_each_entry(group, &psample_groups_list, list) {
 		if (!net_eq(group->net, sock_net(msg->sk)))
 			continue;
@@ -92,7 +89,7 @@
 		idx++;
 	}
 
-	spin_unlock(&psample_groups_lock);
+	spin_unlock_bh(&psample_groups_lock);
 	cb->args[0] = idx;
 	return msg->len;
 }
@@ -100,6 +97,7 @@
 static const struct genl_ops psample_nl_ops[] = {
 	{
 		.cmd = PSAMPLE_CMD_GET_GROUP,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.dumpit = psample_nl_cmd_get_group_dumpit,
 		/* can be retrieved by unprivileged users */
 	}
@@ -156,7 +154,7 @@
 {
 	psample_group_notify(group, PSAMPLE_CMD_DEL_GROUP);
 	list_del(&group->list);
-	kfree(group);
+	kfree_rcu(group, rcu);
 }
 
 static struct psample_group *
@@ -174,7 +172,7 @@
 {
 	struct psample_group *group;
 
-	spin_lock(&psample_groups_lock);
+	spin_lock_bh(&psample_groups_lock);
 
 	group = psample_group_lookup(net, group_num);
 	if (!group) {
@@ -185,19 +183,27 @@
 	group->refcount++;
 
 out:
-	spin_unlock(&psample_groups_lock);
+	spin_unlock_bh(&psample_groups_lock);
 	return group;
 }
 EXPORT_SYMBOL_GPL(psample_group_get);
 
+void psample_group_take(struct psample_group *group)
+{
+	spin_lock_bh(&psample_groups_lock);
+	group->refcount++;
+	spin_unlock_bh(&psample_groups_lock);
+}
+EXPORT_SYMBOL_GPL(psample_group_take);
+
 void psample_group_put(struct psample_group *group)
 {
-	spin_lock(&psample_groups_lock);
+	spin_lock_bh(&psample_groups_lock);
 
 	if (--group->refcount == 0)
 		psample_group_destroy(group);
 
-	spin_unlock(&psample_groups_lock);
+	spin_unlock_bh(&psample_groups_lock);
 }
 EXPORT_SYMBOL_GPL(psample_group_put);
 
@@ -223,7 +229,7 @@
 		data_len = PSAMPLE_MAX_PACKET_SIZE - meta_len - NLA_HDRLEN
 			    - NLA_ALIGNTO;
 
-	nl_skb = genlmsg_new(meta_len + data_len, GFP_ATOMIC);
+	nl_skb = genlmsg_new(meta_len + nla_total_size(data_len), GFP_ATOMIC);
 	if (unlikely(!nl_skb))
 		return;
 

diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig
index 1944834..63f89cc 100644
--- a/net/qrtr/Kconfig
+++ b/net/qrtr/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 # Qualcomm IPC Router configuration
 #
 

diff --git a/net/qrtr/Makefile b/net/qrtr/Makefile
index be012bf..1c6d6c1 100644
--- a/net/qrtr/Makefile
+++ b/net/qrtr/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_QRTR) := qrtr.o
 
 obj-$(CONFIG_QRTR_SMD) += qrtr-smd.o

diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 86e1e37..88f98f2 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c

@@ -1,20 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2015, Sony Mobile Communications Inc.
  * Copyright (c) 2013, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 #include <linux/module.h>
 #include <linux/netlink.h>
 #include <linux/qrtr.h>
 #include <linux/termios.h>	/* For TIOCINQ/OUTQ */
+#include <linux/numa.h>
 
 #include <net/sock.h>
 
@@ -101,7 +94,7 @@
 	return container_of(sk, struct qrtr_sock, sk);
 }
 
-static unsigned int qrtr_local_nid = -1;
+static unsigned int qrtr_local_nid = NUMA_NO_NODE;
 
 /* for node ids */
 static RADIX_TREE(qrtr_nodes, GFP_KERNEL);
@@ -157,6 +150,7 @@
 	list_del(&node->item);
 	mutex_unlock(&qrtr_node_lock);
 
+	cancel_work_sync(&node->work);
 	skb_queue_purge(&node->rx_queue);
 	kfree(node);
 }
@@ -727,12 +721,13 @@
 	DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
 	int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *, int,
 			  struct sockaddr_qrtr *, struct sockaddr_qrtr *);
+	__le32 qrtr_type = cpu_to_le32(QRTR_TYPE_DATA);
 	struct qrtr_sock *ipc = qrtr_sk(sock->sk);
 	struct sock *sk = sock->sk;
 	struct qrtr_node *node;
 	struct sk_buff *skb;
 	size_t plen;
-	u32 type = QRTR_TYPE_DATA;
+	u32 type;
 	int rc;
 
 	if (msg->msg_flags & ~(MSG_DONTWAIT))
@@ -806,10 +801,10 @@
 		}
 
 		/* control messages already require the type as 'command' */
-		skb_copy_bits(skb, 0, &type, 4);
-		type = le32_to_cpu(type);
+		skb_copy_bits(skb, 0, &qrtr_type, 4);
 	}
 
+	type = le32_to_cpu(qrtr_type);
 	rc = enqueue_fn(node, skb, type, &ipc->us, addr);
 	if (rc >= 0)
 		rc = len;
@@ -967,9 +962,6 @@
 			break;
 		}
 		break;
-	case SIOCGSTAMP:
-		rc = sock_get_timestamp(sk, argp);
-		break;
 	case SIOCADDRT:
 	case SIOCDELRT:
 	case SIOCSIFADDR:
@@ -1032,6 +1024,7 @@
 	.recvmsg	= qrtr_recvmsg,
 	.getname	= qrtr_getname,
 	.ioctl		= qrtr_ioctl,
+	.gettstamp	= sock_gettstamp,
 	.poll		= datagram_poll,
 	.shutdown	= sock_no_shutdown,
 	.setsockopt	= sock_no_setsockopt,
@@ -1092,7 +1085,8 @@
 
 	ASSERT_RTNL();
 
-	rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, qrtr_policy, extack);
+	rc = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
+				    qrtr_policy, extack);
 	if (rc < 0)
 		return rc;
 

diff --git a/net/qrtr/smd.c b/net/qrtr/smd.c
index 9cf089b..c91bf03 100644
--- a/net/qrtr/smd.c
+++ b/net/qrtr/smd.c

@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2015, Sony Mobile Communications Inc.
  * Copyright (c) 2013, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/module.h>

diff --git a/net/qrtr/tun.c b/net/qrtr/tun.c
index ccff1e5..e35869e 100644
--- a/net/qrtr/tun.c
+++ b/net/qrtr/tun.c

@@ -84,11 +84,14 @@
 	if (!kbuf)
 		return -ENOMEM;
 
-	if (!copy_from_iter_full(kbuf, len, from))
+	if (!copy_from_iter_full(kbuf, len, from)) {
+		kfree(kbuf);
 		return -EFAULT;
+	}
 
 	ret = qrtr_endpoint_post(&tun->ep, kbuf, len);
 
+	kfree(kbuf);
 	return ret < 0 ? ret : len;
 }
 

diff --git a/net/rds/Kconfig b/net/rds/Kconfig
index b909211..c64e154 100644
--- a/net/rds/Kconfig
+++ b/net/rds/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 
 config RDS
 	tristate "The Reliable Datagram Sockets Protocol"
@@ -22,6 +23,6 @@
 	  This transport does not support RDMA operations.
 
 config RDS_DEBUG
-        bool "RDS debugging messages"
+	bool "RDS debugging messages"
 	depends on RDS
-        default n
+	default n

diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 65387e1..1a5bf3f 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -254,7 +254,40 @@
 
 static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
-	return -ENOIOCTLCMD;
+	struct rds_sock *rs = rds_sk_to_rs(sock->sk);
+	rds_tos_t utos, tos = 0;
+
+	switch (cmd) {
+	case SIOCRDSSETTOS:
+		if (get_user(utos, (rds_tos_t __user *)arg))
+			return -EFAULT;
+
+		if (rs->rs_transport &&
+		    rs->rs_transport->get_tos_map)
+			tos = rs->rs_transport->get_tos_map(utos);
+		else
+			return -ENOIOCTLCMD;
+
+		spin_lock_bh(&rds_sock_lock);
+		if (rs->rs_tos || rs->rs_conn) {
+			spin_unlock_bh(&rds_sock_lock);
+			return -EINVAL;
+		}
+		rs->rs_tos = tos;
+		spin_unlock_bh(&rds_sock_lock);
+		break;
+	case SIOCRDSGETTOS:
+		spin_lock_bh(&rds_sock_lock);
+		tos = rs->rs_tos;
+		spin_unlock_bh(&rds_sock_lock);
+		if (put_user(tos, (rds_tos_t __user *)arg))
+			return -EFAULT;
+		break;
+	default:
+		return -ENOIOCTLCMD;
+	}
+
+	return 0;
 }
 
 static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
@@ -348,7 +381,7 @@
 }
 
 static int rds_enable_recvtstamp(struct sock *sk, char __user *optval,
-				 int optlen)
+				 int optlen, int optname)
 {
 	int val, valbool;
 
@@ -360,6 +393,9 @@
 
 	valbool = val ? 1 : 0;
 
+	if (optname == SO_TIMESTAMP_NEW)
+		sock_set_flag(sk, SOCK_TSTAMP_NEW);
+
 	if (valbool)
 		sock_set_flag(sk, SOCK_RCVTSTAMP);
 	else
@@ -430,9 +466,10 @@
 		ret = rds_set_transport(rs, optval, optlen);
 		release_sock(sock->sk);
 		break;
-	case SO_TIMESTAMP:
+	case SO_TIMESTAMP_OLD:
+	case SO_TIMESTAMP_NEW:
 		lock_sock(sock->sk);
-		ret = rds_enable_recvtstamp(sock->sk, optval, optlen);
+		ret = rds_enable_recvtstamp(sock->sk, optval, optlen, optname);
 		release_sock(sock->sk);
 		break;
 	case SO_RDS_MSG_RXPATH_LATENCY:
@@ -506,6 +543,9 @@
 	struct rds_sock *rs = rds_sk_to_rs(sk);
 	int ret = 0;
 
+	if (addr_len < offsetofend(struct sockaddr, sa_family))
+		return -EINVAL;
+
 	lock_sock(sk);
 
 	switch (uaddr->sa_family) {
@@ -519,7 +559,7 @@
 			ret = -EDESTADDRREQ;
 			break;
 		}
-		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) ||
+		if (ipv4_is_multicast(sin->sin_addr.s_addr) ||
 		    sin->sin_addr.s_addr == htonl(INADDR_BROADCAST)) {
 			ret = -EINVAL;
 			break;
@@ -553,7 +593,7 @@
 			addr4 = sin6->sin6_addr.s6_addr32[3];
 			if (addr4 == htonl(INADDR_ANY) ||
 			    addr4 == htonl(INADDR_BROADCAST) ||
-			    IN_MULTICAST(ntohl(addr4))) {
+			    ipv4_is_multicast(addr4)) {
 				ret = -EPROTOTYPE;
 				break;
 			}
@@ -646,6 +686,8 @@
 	spin_lock_init(&rs->rs_rdma_lock);
 	rs->rs_rdma_keys = RB_ROOT;
 	rs->rs_rx_traces = 0;
+	rs->rs_tos = 0;
+	rs->rs_conn = NULL;
 
 	spin_lock_bh(&rds_sock_lock);
 	list_add_tail(&rs->rs_item, &rds_sock_list);
@@ -663,7 +705,7 @@
 	if (sock->type != SOCK_SEQPACKET || protocol)
 		return -ESOCKTNOSUPPORT;
 
-	sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto, kern);
+	sk = sk_alloc(net, AF_RDS, GFP_KERNEL, &rds_proto, kern);
 	if (!sk)
 		return -ENOMEM;
 
@@ -699,6 +741,10 @@
 	spin_lock_bh(&rds_sock_lock);
 
 	list_for_each_entry(rs, &rds_sock_list, rs_item) {
+		/* This option only supports IPv4 sockets. */
+		if (!ipv6_addr_v4mapped(&rs->rs_bound_addr))
+			continue;
+
 		read_lock(&rs->rs_recv_lock);
 
 		/* XXX too lazy to maintain counts.. */
@@ -720,21 +766,60 @@
 	lens->each = sizeof(struct rds_info_message);
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static void rds6_sock_inc_info(struct socket *sock, unsigned int len,
+			       struct rds_info_iterator *iter,
+			       struct rds_info_lengths *lens)
+{
+	struct rds_incoming *inc;
+	unsigned int total = 0;
+	struct rds_sock *rs;
+
+	len /= sizeof(struct rds6_info_message);
+
+	spin_lock_bh(&rds_sock_lock);
+
+	list_for_each_entry(rs, &rds_sock_list, rs_item) {
+		read_lock(&rs->rs_recv_lock);
+
+		list_for_each_entry(inc, &rs->rs_recv_queue, i_item) {
+			total++;
+			if (total <= len)
+				rds6_inc_info_copy(inc, iter, &inc->i_saddr,
+						   &rs->rs_bound_addr, 1);
+		}
+
+		read_unlock(&rs->rs_recv_lock);
+	}
+
+	spin_unlock_bh(&rds_sock_lock);
+
+	lens->nr = total;
+	lens->each = sizeof(struct rds6_info_message);
+}
+#endif
+
 static void rds_sock_info(struct socket *sock, unsigned int len,
 			  struct rds_info_iterator *iter,
 			  struct rds_info_lengths *lens)
 {
 	struct rds_info_socket sinfo;
+	unsigned int cnt = 0;
 	struct rds_sock *rs;
 
 	len /= sizeof(struct rds_info_socket);
 
 	spin_lock_bh(&rds_sock_lock);
 
-	if (len < rds_sock_count)
+	if (len < rds_sock_count) {
+		cnt = rds_sock_count;
 		goto out;
+	}
 
 	list_for_each_entry(rs, &rds_sock_list, rs_item) {
+		/* This option only supports IPv4 sockets. */
+		if (!ipv6_addr_v4mapped(&rs->rs_bound_addr))
+			continue;
 		sinfo.sndbuf = rds_sk_sndbuf(rs);
 		sinfo.rcvbuf = rds_sk_rcvbuf(rs);
 		sinfo.bound_addr = rs->rs_bound_addr_v4;
@@ -744,15 +829,51 @@
 		sinfo.inum = sock_i_ino(rds_rs_to_sk(rs));
 
 		rds_info_copy(iter, &sinfo, sizeof(sinfo));
+		cnt++;
 	}
 
 out:
-	lens->nr = rds_sock_count;
+	lens->nr = cnt;
 	lens->each = sizeof(struct rds_info_socket);
 
 	spin_unlock_bh(&rds_sock_lock);
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static void rds6_sock_info(struct socket *sock, unsigned int len,
+			   struct rds_info_iterator *iter,
+			   struct rds_info_lengths *lens)
+{
+	struct rds6_info_socket sinfo6;
+	struct rds_sock *rs;
+
+	len /= sizeof(struct rds6_info_socket);
+
+	spin_lock_bh(&rds_sock_lock);
+
+	if (len < rds_sock_count)
+		goto out;
+
+	list_for_each_entry(rs, &rds_sock_list, rs_item) {
+		sinfo6.sndbuf = rds_sk_sndbuf(rs);
+		sinfo6.rcvbuf = rds_sk_rcvbuf(rs);
+		sinfo6.bound_addr = rs->rs_bound_addr;
+		sinfo6.connected_addr = rs->rs_conn_addr;
+		sinfo6.bound_port = rs->rs_bound_port;
+		sinfo6.connected_port = rs->rs_conn_port;
+		sinfo6.inum = sock_i_ino(rds_rs_to_sk(rs));
+
+		rds_info_copy(iter, &sinfo6, sizeof(sinfo6));
+	}
+
+ out:
+	lens->nr = rds_sock_count;
+	lens->each = sizeof(struct rds6_info_socket);
+
+	spin_unlock_bh(&rds_sock_lock);
+}
+#endif
+
 static void rds_exit(void)
 {
 	sock_unregister(rds_family_ops.family);
@@ -766,6 +887,10 @@
 	rds_bind_lock_destroy();
 	rds_info_deregister_func(RDS_INFO_SOCKETS, rds_sock_info);
 	rds_info_deregister_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info);
+#if IS_ENABLED(CONFIG_IPV6)
+	rds_info_deregister_func(RDS6_INFO_SOCKETS, rds6_sock_info);
+	rds_info_deregister_func(RDS6_INFO_RECV_MESSAGES, rds6_sock_inc_info);
+#endif
 }
 module_exit(rds_exit);
 
@@ -803,6 +928,10 @@
 
 	rds_info_register_func(RDS_INFO_SOCKETS, rds_sock_info);
 	rds_info_register_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info);
+#if IS_ENABLED(CONFIG_IPV6)
+	rds_info_register_func(RDS6_INFO_SOCKETS, rds6_sock_info);
+	rds_info_register_func(RDS6_INFO_RECV_MESSAGES, rds6_sock_inc_info);
+#endif
 
 	goto out;
 

diff --git a/net/rds/bind.c b/net/rds/bind.c
index 762d2c6..5b5fb4c 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -78,10 +78,10 @@
 	__rds_create_bind_key(key, addr, port, scope_id);
 	rcu_read_lock();
 	rs = rhashtable_lookup(&bind_hash_table, key, ht_parms);
-	if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
-		rds_sock_addref(rs);
-	else
+	if (rs && (sock_flag(rds_rs_to_sk(rs), SOCK_DEAD) ||
+		   !refcount_inc_not_zero(&rds_rs_to_sk(rs)->sk_refcnt)))
 		rs = NULL;
+
 	rcu_read_unlock();
 
 	rdsdebug("returning rs %p for %pI6c:%u\n", rs, addr,
@@ -173,13 +173,15 @@
 	/* We allow an RDS socket to be bound to either IPv4 or IPv6
 	 * address.
 	 */
+	if (addr_len < offsetofend(struct sockaddr, sa_family))
+		return -EINVAL;
 	if (uaddr->sa_family == AF_INET) {
 		struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
 
 		if (addr_len < sizeof(struct sockaddr_in) ||
 		    sin->sin_addr.s_addr == htonl(INADDR_ANY) ||
 		    sin->sin_addr.s_addr == htonl(INADDR_BROADCAST) ||
-		    IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+		    ipv4_is_multicast(sin->sin_addr.s_addr))
 			return -EINVAL;
 		ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &v6addr);
 		binding_addr = &v6addr;
@@ -204,7 +206,7 @@
 			addr4 = sin6->sin6_addr.s6_addr32[3];
 			if (addr4 == htonl(INADDR_ANY) ||
 			    addr4 == htonl(INADDR_BROADCAST) ||
-			    IN_MULTICAST(ntohl(addr4)))
+			    ipv4_is_multicast(addr4))
 				return -EINVAL;
 		}
 		/* The scope ID must be specified for link local address. */
@@ -237,34 +239,33 @@
 		goto out;
 	}
 
+	/* The transport can be set using SO_RDS_TRANSPORT option before the
+	 * socket is bound.
+	 */
+	if (rs->rs_transport) {
+		trans = rs->rs_transport;
+		if (!trans->laddr_check ||
+		    trans->laddr_check(sock_net(sock->sk),
+				       binding_addr, scope_id) != 0) {
+			ret = -ENOPROTOOPT;
+			goto out;
+		}
+	} else {
+		trans = rds_trans_get_preferred(sock_net(sock->sk),
+						binding_addr, scope_id);
+		if (!trans) {
+			ret = -EADDRNOTAVAIL;
+			pr_info_ratelimited("RDS: %s could not find a transport for %pI6c, load rds_tcp or rds_rdma?\n",
+					    __func__, binding_addr);
+			goto out;
+		}
+		rs->rs_transport = trans;
+	}
+
 	sock_set_flag(sk, SOCK_RCU_FREE);
 	ret = rds_add_bound(rs, binding_addr, &port, scope_id);
 	if (ret)
-		goto out;
-
-	if (rs->rs_transport) { /* previously bound */
-		trans = rs->rs_transport;
-		if (trans->laddr_check(sock_net(sock->sk),
-				       binding_addr, scope_id) != 0) {
-			ret = -ENOPROTOOPT;
-			rds_remove_bound(rs);
-		} else {
-			ret = 0;
-		}
-		goto out;
-	}
-	trans = rds_trans_get_preferred(sock_net(sock->sk), binding_addr,
-					scope_id);
-	if (!trans) {
-		ret = -EADDRNOTAVAIL;
-		rds_remove_bound(rs);
-		pr_info_ratelimited("RDS: %s could not find a transport for %pI6c, load rds_tcp or rds_rdma?\n",
-				    __func__, binding_addr);
-		goto out;
-	}
-
-	rs->rs_transport = trans;
-	ret = 0;
+		rs->rs_transport = NULL;
 
 out:
 	release_sock(sk);

diff --git a/net/rds/connection.c b/net/rds/connection.c
index 3bd2f4a..ed7f213 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c

@@ -84,7 +84,7 @@
 					      const struct in6_addr *laddr,
 					      const struct in6_addr *faddr,
 					      struct rds_transport *trans,
-					      int dev_if)
+					      u8 tos, int dev_if)
 {
 	struct rds_connection *conn, *ret = NULL;
 
@@ -92,6 +92,7 @@
 		if (ipv6_addr_equal(&conn->c_faddr, faddr) &&
 		    ipv6_addr_equal(&conn->c_laddr, laddr) &&
 		    conn->c_trans == trans &&
+		    conn->c_tos == tos &&
 		    net == rds_conn_net(conn) &&
 		    conn->c_dev_if == dev_if) {
 			ret = conn;
@@ -139,6 +140,7 @@
 	atomic_set(&cp->cp_state, RDS_CONN_DOWN);
 	cp->cp_send_gen = 0;
 	cp->cp_reconnect_jiffies = 0;
+	cp->cp_conn->c_proposed_version = RDS_PROTOCOL_VERSION;
 	INIT_DELAYED_WORK(&cp->cp_send_w, rds_send_worker);
 	INIT_DELAYED_WORK(&cp->cp_recv_w, rds_recv_worker);
 	INIT_DELAYED_WORK(&cp->cp_conn_w, rds_connect_worker);
@@ -159,7 +161,7 @@
 						const struct in6_addr *laddr,
 						const struct in6_addr *faddr,
 						struct rds_transport *trans,
-						gfp_t gfp,
+						gfp_t gfp, u8 tos,
 						int is_outgoing,
 						int dev_if)
 {
@@ -171,7 +173,7 @@
 	int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);
 
 	rcu_read_lock();
-	conn = rds_conn_lookup(net, head, laddr, faddr, trans, dev_if);
+	conn = rds_conn_lookup(net, head, laddr, faddr, trans, tos, dev_if);
 	if (conn &&
 	    conn->c_loopback &&
 	    conn->c_trans != &rds_loop_transport &&
@@ -205,6 +207,7 @@
 	conn->c_isv6 = !ipv6_addr_v4mapped(laddr);
 	conn->c_faddr = *faddr;
 	conn->c_dev_if = dev_if;
+	conn->c_tos = tos;
 
 #if IS_ENABLED(CONFIG_IPV6)
 	/* If the local address is link local, set c_bound_if to be the
@@ -297,7 +300,7 @@
 		struct rds_connection *found;
 
 		found = rds_conn_lookup(net, head, laddr, faddr, trans,
-					dev_if);
+					tos, dev_if);
 		if (found) {
 			struct rds_conn_path *cp;
 			int i;
@@ -332,10 +335,10 @@
 struct rds_connection *rds_conn_create(struct net *net,
 				       const struct in6_addr *laddr,
 				       const struct in6_addr *faddr,
-				       struct rds_transport *trans, gfp_t gfp,
-				       int dev_if)
+				       struct rds_transport *trans, u8 tos,
+				       gfp_t gfp, int dev_if)
 {
-	return __rds_conn_create(net, laddr, faddr, trans, gfp, 0, dev_if);
+	return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 0, dev_if);
 }
 EXPORT_SYMBOL_GPL(rds_conn_create);
 
@@ -343,9 +346,9 @@
 						const struct in6_addr *laddr,
 						const struct in6_addr *faddr,
 						struct rds_transport *trans,
-						gfp_t gfp, int dev_if)
+						u8 tos, gfp_t gfp, int dev_if)
 {
-	return __rds_conn_create(net, laddr, faddr, trans, gfp, 1, dev_if);
+	return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 1, dev_if);
 }
 EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
 
@@ -733,6 +736,7 @@
 	cinfo->next_rx_seq = cp->cp_next_rx_seq;
 	cinfo->laddr = conn->c_laddr.s6_addr32[3];
 	cinfo->faddr = conn->c_faddr.s6_addr32[3];
+	cinfo->tos = conn->c_tos;
 	strncpy(cinfo->transport, conn->c_trans->t_name,
 		sizeof(cinfo->transport));
 	cinfo->flags = 0;

diff --git a/net/rds/ib.c b/net/rds/ib.c
index eba75c1..9de2ae2 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c

@@ -87,7 +87,7 @@
 
 	spin_lock_irqsave(&rds_ibdev->spinlock, flags);
 	list_for_each_entry(ic, &rds_ibdev->conn_list, ib_node)
-		rds_conn_drop(ic->conn);
+		rds_conn_path_drop(&ic->conn->c_path[0], true);
 	spin_unlock_irqrestore(&rds_ibdev->spinlock, flags);
 }
 
@@ -143,13 +143,16 @@
 	refcount_set(&rds_ibdev->refcount, 1);
 	INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free);
 
+	INIT_LIST_HEAD(&rds_ibdev->ipaddr_list);
+	INIT_LIST_HEAD(&rds_ibdev->conn_list);
+
 	rds_ibdev->max_wrs = device->attrs.max_qp_wr;
 	rds_ibdev->max_sge = min(device->attrs.max_send_sge, RDS_IB_MAX_SGE);
 
 	has_fr = (device->attrs.device_cap_flags &
 		  IB_DEVICE_MEM_MGT_EXTENSIONS);
-	has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
-		   device->map_phys_fmr && device->unmap_fmr);
+	has_fmr = (device->ops.alloc_fmr && device->ops.dealloc_fmr &&
+		   device->ops.map_phys_fmr && device->ops.unmap_fmr);
 	rds_ibdev->use_fastreg = (has_fr && !has_fmr);
 
 	rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32;
@@ -203,9 +206,6 @@
 		device->name,
 		rds_ibdev->use_fastreg ? "FRMR" : "FMR");
 
-	INIT_LIST_HEAD(&rds_ibdev->ipaddr_list);
-	INIT_LIST_HEAD(&rds_ibdev->conn_list);
-
 	down_write(&rds_ib_devices_lock);
 	list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices);
 	up_write(&rds_ib_devices_lock);
@@ -291,7 +291,7 @@
 				    void *buffer)
 {
 	struct rds_info_rdma_connection *iinfo = buffer;
-	struct rds_ib_connection *ic;
+	struct rds_ib_connection *ic = conn->c_transport_data;
 
 	/* We will only ever look at IB transports */
 	if (conn->c_trans != &rds_ib_transport)
@@ -301,14 +301,16 @@
 
 	iinfo->src_addr = conn->c_laddr.s6_addr32[3];
 	iinfo->dst_addr = conn->c_faddr.s6_addr32[3];
+	if (ic) {
+		iinfo->tos = conn->c_tos;
+		iinfo->sl = ic->i_sl;
+	}
 
 	memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
 	memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
 	if (rds_conn_state(conn) == RDS_CONN_UP) {
 		struct rds_ib_device *rds_ibdev;
 
-		ic = conn->c_transport_data;
-
 		rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo->src_gid,
 			       (union ib_gid *)&iinfo->dst_gid);
 
@@ -317,6 +319,7 @@
 		iinfo->max_recv_wr = ic->i_recv_ring.w_nr;
 		iinfo->max_send_sge = rds_ibdev->max_sge;
 		rds_ib_get_mr_info(rds_ibdev, iinfo);
+		iinfo->cache_allocs = atomic_read(&ic->i_cache_allocs);
 	}
 	return 1;
 }
@@ -327,7 +330,7 @@
 				     void *buffer)
 {
 	struct rds6_info_rdma_connection *iinfo6 = buffer;
-	struct rds_ib_connection *ic;
+	struct rds_ib_connection *ic = conn->c_transport_data;
 
 	/* We will only ever look at IB transports */
 	if (conn->c_trans != &rds_ib_transport)
@@ -335,6 +338,10 @@
 
 	iinfo6->src_addr = conn->c_laddr;
 	iinfo6->dst_addr = conn->c_faddr;
+	if (ic) {
+		iinfo6->tos = conn->c_tos;
+		iinfo6->sl = ic->i_sl;
+	}
 
 	memset(&iinfo6->src_gid, 0, sizeof(iinfo6->src_gid));
 	memset(&iinfo6->dst_gid, 0, sizeof(iinfo6->dst_gid));
@@ -342,7 +349,6 @@
 	if (rds_conn_state(conn) == RDS_CONN_UP) {
 		struct rds_ib_device *rds_ibdev;
 
-		ic = conn->c_transport_data;
 		rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo6->src_gid,
 			       (union ib_gid *)&iinfo6->dst_gid);
 		rds_ibdev = ic->rds_ibdev;
@@ -350,6 +356,7 @@
 		iinfo6->max_recv_wr = ic->i_recv_ring.w_nr;
 		iinfo6->max_send_sge = rds_ibdev->max_sge;
 		rds6_ib_get_mr_info(rds_ibdev, iinfo6);
+		iinfo6->cache_allocs = atomic_read(&ic->i_cache_allocs);
 	}
 	return 1;
 }
@@ -514,6 +521,15 @@
 	rds_ib_mr_exit();
 }
 
+static u8 rds_ib_get_tos_map(u8 tos)
+{
+	/* 1:1 user to transport map for RDMA transport.
+	 * In future, if custom map is desired, hook can export
+	 * user configurable map.
+	 */
+	return tos;
+}
+
 struct rds_transport rds_ib_transport = {
 	.laddr_check		= rds_ib_laddr_check,
 	.xmit_path_complete	= rds_ib_xmit_path_complete,
@@ -536,6 +552,7 @@
 	.sync_mr		= rds_ib_sync_mr,
 	.free_mr		= rds_ib_free_mr,
 	.flush_mrs		= rds_ib_flush_mrs,
+	.get_tos_map		= rds_ib_get_tos_map,
 	.t_owner		= THIS_MODULE,
 	.t_name			= "infiniband",
 	.t_unloading		= rds_ib_is_unloading,

diff --git a/net/rds/ib.h b/net/rds/ib.h
index 71ff356..f2b558e 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h

@@ -15,8 +15,7 @@
 
 #define RDS_IB_DEFAULT_RECV_WR		1024
 #define RDS_IB_DEFAULT_SEND_WR		256
-#define RDS_IB_DEFAULT_FR_WR		256
-#define RDS_IB_DEFAULT_FR_INV_WR	256
+#define RDS_IB_DEFAULT_FR_WR		512
 
 #define RDS_IB_DEFAULT_RETRY_COUNT	1
 
@@ -67,7 +66,9 @@
 	u8			ricpc_protocol_major;
 	u8			ricpc_protocol_minor;
 	__be16			ricpc_protocol_minor_mask;	/* bitmask */
-	__be32			ricpc_reserved1;
+	u8			ricpc_dp_toss;
+	u8			ripc_reserved1;
+	__be16			ripc_reserved2;
 	__be64			ricpc_ack_seq;
 	__be32			ricpc_credit;	/* non-zero enables flow ctl */
 };
@@ -155,7 +156,7 @@
 
 	/* To control the number of wrs from fastreg */
 	atomic_t		i_fastreg_wrs;
-	atomic_t		i_fastunreg_wrs;
+	atomic_t		i_fastreg_inuse_count;
 
 	/* interrupt handling */
 	struct tasklet_struct	i_send_tasklet;
@@ -219,6 +220,7 @@
 	/* Send/Recv vectors */
 	int			i_scq_vector;
 	int			i_rcq_vector;
+	u8			i_sl;
 };
 
 /* This assumes that atomic_t is at least 32 bits */
@@ -331,10 +333,8 @@
 	unsigned int i;
 
 	for_each_sg(sglist, sg, sg_dma_len, i) {
-		ib_dma_sync_single_for_cpu(dev,
-				ib_sg_dma_address(dev, sg),
-				ib_sg_dma_len(dev, sg),
-				direction);
+		ib_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
+					   sg_dma_len(sg), direction);
 	}
 }
 #define ib_dma_sync_sg_for_cpu	rds_ib_dma_sync_sg_for_cpu
@@ -348,10 +348,8 @@
 	unsigned int i;
 
 	for_each_sg(sglist, sg, sg_dma_len, i) {
-		ib_dma_sync_single_for_device(dev,
-				ib_sg_dma_address(dev, sg),
-				ib_sg_dma_len(dev, sg),
-				direction);
+		ib_dma_sync_single_for_device(dev, sg_dma_address(sg),
+					      sg_dma_len(sg), direction);
 	}
 }
 #define ib_dma_sync_sg_for_device	rds_ib_dma_sync_sg_for_device

diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index bfbb31f..18c6fac 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c

@@ -40,6 +40,7 @@
 #include "rds_single_path.h"
 #include "rds.h"
 #include "ib.h"
+#include "ib_mr.h"
 
 /*
  * Set the selected protocol version
@@ -133,23 +134,27 @@
 		rds_ib_set_flow_control(conn, be32_to_cpu(credit));
 	}
 
-	if (conn->c_version < RDS_PROTOCOL(3, 1)) {
-		pr_notice("RDS/IB: Connection <%pI6c,%pI6c> version %u.%u no longer supported\n",
-			  &conn->c_laddr, &conn->c_faddr,
-			  RDS_PROTOCOL_MAJOR(conn->c_version),
-			  RDS_PROTOCOL_MINOR(conn->c_version));
-		set_bit(RDS_DESTROY_PENDING, &conn->c_path[0].cp_flags);
-		rds_conn_destroy(conn);
-		return;
-	} else {
-		pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c> version %u.%u%s\n",
-			  ic->i_active_side ? "Active" : "Passive",
-			  &conn->c_laddr, &conn->c_faddr,
-			  RDS_PROTOCOL_MAJOR(conn->c_version),
-			  RDS_PROTOCOL_MINOR(conn->c_version),
-			  ic->i_flowctl ? ", flow control" : "");
+	if (conn->c_version < RDS_PROTOCOL_VERSION) {
+		if (conn->c_version != RDS_PROTOCOL_COMPAT_VERSION) {
+			pr_notice("RDS/IB: Connection <%pI6c,%pI6c> version %u.%u no longer supported\n",
+				  &conn->c_laddr, &conn->c_faddr,
+				  RDS_PROTOCOL_MAJOR(conn->c_version),
+				  RDS_PROTOCOL_MINOR(conn->c_version));
+			rds_conn_destroy(conn);
+			return;
+		}
 	}
 
+	pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c,%d> version %u.%u%s\n",
+		  ic->i_active_side ? "Active" : "Passive",
+		  &conn->c_laddr, &conn->c_faddr, conn->c_tos,
+		  RDS_PROTOCOL_MAJOR(conn->c_version),
+		  RDS_PROTOCOL_MINOR(conn->c_version),
+		  ic->i_flowctl ? ", flow control" : "");
+
+	/* receive sl from the peer */
+	ic->i_sl = ic->i_cm_id->route.path_rec->sl;
+
 	atomic_set(&ic->i_cq_quiesce, 0);
 
 	/* Init rings and fill recv. this needs to wait until protocol
@@ -184,6 +189,7 @@
 					    NULL);
 	}
 
+	conn->c_proposed_version = conn->c_version;
 	rds_connect_complete(conn);
 }
 
@@ -220,6 +226,7 @@
 			    cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
 			dp->ricp_v6.dp_ack_seq =
 			    cpu_to_be64(rds_ib_piggyb_ack(ic));
+			dp->ricp_v6.dp_cmn.ricpc_dp_toss = conn->c_tos;
 
 			conn_param->private_data = &dp->ricp_v6;
 			conn_param->private_data_len = sizeof(dp->ricp_v6);
@@ -234,6 +241,7 @@
 			    cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
 			dp->ricp_v4.dp_ack_seq =
 			    cpu_to_be64(rds_ib_piggyb_ack(ic));
+			dp->ricp_v4.dp_cmn.ricpc_dp_toss = conn->c_tos;
 
 			conn_param->private_data = &dp->ricp_v4;
 			conn_param->private_data_len = sizeof(dp->ricp_v4);
@@ -389,10 +397,9 @@
 		rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
 		break;
 	default:
-		rdsdebug("Fatal QP Event %u (%s) "
-			"- connection %pI6c->%pI6c, reconnecting\n",
-			event->event, ib_event_msg(event->event),
-			&conn->c_laddr, &conn->c_faddr);
+		rdsdebug("Fatal QP Event %u (%s) - connection %pI6c->%pI6c, reconnecting\n",
+			 event->event, ib_event_msg(event->event),
+			 &conn->c_laddr, &conn->c_faddr);
 		rds_conn_drop(conn);
 		break;
 	}
@@ -443,6 +450,7 @@
 	struct ib_qp_init_attr attr;
 	struct ib_cq_init_attr cq_attr = {};
 	struct rds_ib_device *rds_ibdev;
+	unsigned long max_wrs;
 	int ret, fr_queue_space;
 
 	/*
@@ -457,18 +465,20 @@
 	 * completion queue and send queue. This extra space is used for FRMR
 	 * registration and invalidation work requests
 	 */
-	fr_queue_space = rds_ibdev->use_fastreg ?
-			 (RDS_IB_DEFAULT_FR_WR + 1) +
-			 (RDS_IB_DEFAULT_FR_INV_WR + 1)
-			 : 0;
+	fr_queue_space = (rds_ibdev->use_fastreg ? RDS_IB_DEFAULT_FR_WR : 0);
 
 	/* add the conn now so that connection establishment has the dev */
 	rds_ib_add_conn(rds_ibdev, conn);
 
-	if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1)
-		rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1);
-	if (rds_ibdev->max_wrs < ic->i_recv_ring.w_nr + 1)
-		rds_ib_ring_resize(&ic->i_recv_ring, rds_ibdev->max_wrs - 1);
+	max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_send_wr + 1 ?
+		rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_send_wr;
+	if (ic->i_send_ring.w_nr != max_wrs)
+		rds_ib_ring_resize(&ic->i_send_ring, max_wrs);
+
+	max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_recv_wr + 1 ?
+		rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_recv_wr;
+	if (ic->i_recv_ring.w_nr != max_wrs)
+		rds_ib_ring_resize(&ic->i_recv_ring, max_wrs);
 
 	/* Protection domain and memory range */
 	ic->i_pd = rds_ibdev->pd;
@@ -526,8 +536,6 @@
 	attr.qp_type = IB_QPT_RC;
 	attr.send_cq = ic->i_send_cq;
 	attr.recv_cq = ic->i_recv_cq;
-	atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR);
-	atomic_set(&ic->i_fastunreg_wrs, RDS_IB_DEFAULT_FR_INV_WR);
 
 	/*
 	 * XXX this can fail if max_*_wr is too large?  Are we supposed
@@ -608,11 +616,11 @@
 qp_out:
 	rdma_destroy_qp(ic->i_cm_id);
 recv_cq_out:
-	if (!ib_destroy_cq(ic->i_recv_cq))
-		ic->i_recv_cq = NULL;
+	ib_destroy_cq(ic->i_recv_cq);
+	ic->i_recv_cq = NULL;
 send_cq_out:
-	if (!ib_destroy_cq(ic->i_send_cq))
-		ic->i_send_cq = NULL;
+	ib_destroy_cq(ic->i_send_cq);
+	ic->i_send_cq = NULL;
 rds_ibdev_out:
 	rds_ib_remove_conn(rds_ibdev, conn);
 out:
@@ -660,13 +668,16 @@
 
 	/* Even if len is crap *now* I still want to check it. -ASG */
 	if (event->param.conn.private_data_len < data_len || major == 0)
-		return RDS_PROTOCOL_3_0;
+		return RDS_PROTOCOL_4_0;
 
 	common = be16_to_cpu(mask) & RDS_IB_SUPPORTED_PROTOCOLS;
-	if (major == 3 && common) {
-		version = RDS_PROTOCOL_3_0;
+	if (major == 4 && common) {
+		version = RDS_PROTOCOL_4_0;
 		while ((common >>= 1) != 0)
 			version++;
+	} else if (RDS_PROTOCOL_COMPAT_VERSION ==
+		   RDS_PROTOCOL(major, minor)) {
+		version = RDS_PROTOCOL_COMPAT_VERSION;
 	} else {
 		if (isv6)
 			printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI6c using incompatible protocol version %u.%u\n",
@@ -729,8 +740,10 @@
 
 	/* Check whether the remote protocol version matches ours. */
 	version = rds_ib_protocol_compatible(event, isv6);
-	if (!version)
+	if (!version) {
+		err = RDS_RDMA_REJ_INCOMPAT;
 		goto out;
+	}
 
 	dp = event->param.conn.private_data;
 	if (isv6) {
@@ -771,15 +784,16 @@
 		daddr6 = &d_mapped_addr;
 	}
 
-	rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid "
-		 "0x%llx\n", saddr6, daddr6,
-		 RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version),
+	rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid 0x%llx, tos:%d\n",
+		 saddr6, daddr6, RDS_PROTOCOL_MAJOR(version),
+		 RDS_PROTOCOL_MINOR(version),
 		 (unsigned long long)be64_to_cpu(lguid),
-		 (unsigned long long)be64_to_cpu(fguid));
+		 (unsigned long long)be64_to_cpu(fguid), dp_cmn->ricpc_dp_toss);
 
 	/* RDS/IB is not currently netns aware, thus init_net */
 	conn = rds_conn_create(&init_net, daddr6, saddr6,
-			       &rds_ib_transport, GFP_KERNEL, ifindex);
+			       &rds_ib_transport, dp_cmn->ricpc_dp_toss,
+			       GFP_KERNEL, ifindex);
 	if (IS_ERR(conn)) {
 		rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
 		conn = NULL;
@@ -846,7 +860,7 @@
 	if (conn)
 		mutex_unlock(&conn->c_cm_lock);
 	if (err)
-		rdma_reject(cm_id, NULL, 0);
+		rdma_reject(cm_id, &err, sizeof(int));
 	return destroy;
 }
 
@@ -861,7 +875,7 @@
 
 	/* If the peer doesn't do protocol negotiation, we must
 	 * default to RDSv3.0 */
-	rds_ib_set_protocol(conn, RDS_PROTOCOL_3_0);
+	rds_ib_set_protocol(conn, RDS_PROTOCOL_4_1);
 	ic->i_flowctl = rds_ib_sysctl_flow_control;	/* advertise flow control */
 
 	ret = rds_ib_setup_qp(conn);
@@ -870,7 +884,8 @@
 		goto out;
 	}
 
-	rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION,
+	rds_ib_cm_fill_conn_param(conn, &conn_param, &dp,
+				  conn->c_proposed_version,
 				  UINT_MAX, UINT_MAX, isv6);
 	ret = rdma_connect(cm_id, &conn_param);
 	if (ret)
@@ -987,6 +1002,11 @@
 				ic->i_cm_id, err);
 		}
 
+		/* kick off "flush_worker" for all pools in order to reap
+		 * all FRMR registrations that are still marked "FRMR_IS_INUSE"
+		 */
+		rds_ib_flush_mrs();
+
 		/*
 		 * We want to wait for tx and rx completion to finish
 		 * before we tear down the connection, but we have to be
@@ -999,8 +1019,8 @@
 		wait_event(rds_ib_ring_empty_wait,
 			   rds_ib_ring_empty(&ic->i_recv_ring) &&
 			   (atomic_read(&ic->i_signaled_sends) == 0) &&
-			   (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR) &&
-			   (atomic_read(&ic->i_fastunreg_wrs) == RDS_IB_DEFAULT_FR_INV_WR));
+			   (atomic_read(&ic->i_fastreg_inuse_count) == 0) &&
+			   (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR));
 		tasklet_kill(&ic->i_send_tasklet);
 		tasklet_kill(&ic->i_recv_tasklet);
 
@@ -1085,8 +1105,9 @@
 	ic->i_flowctl = 0;
 	atomic_set(&ic->i_credits, 0);
 
-	rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr);
-	rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr);
+	/* Re-init rings, but retain sizes. */
+	rds_ib_ring_init(&ic->i_send_ring, ic->i_send_ring.w_nr);
+	rds_ib_ring_init(&ic->i_recv_ring, ic->i_recv_ring.w_nr);
 
 	if (ic->i_ibinc) {
 		rds_inc_put(&ic->i_ibinc->ii_inc);
@@ -1127,13 +1148,14 @@
 	spin_lock_init(&ic->i_ack_lock);
 #endif
 	atomic_set(&ic->i_signaled_sends, 0);
+	atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR);
 
 	/*
 	 * rds_ib_conn_shutdown() waits for these to be emptied so they
 	 * must be initialized before it can be called.
 	 */
-	rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr);
-	rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr);
+	rds_ib_ring_init(&ic->i_send_ring, 0);
+	rds_ib_ring_init(&ic->i_recv_ring, 0);
 
 	ic->conn = conn;
 	conn->c_transport_data = ic;

diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c
index e0f70c4..93c0437 100644
--- a/net/rds/ib_fmr.c
+++ b/net/rds/ib_fmr.c

@@ -44,6 +44,17 @@
 	else
 		pool = rds_ibdev->mr_1m_pool;
 
+	if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
+		queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
+
+	/* Switch pools if one of the pool is reaching upper limit */
+	if (atomic_read(&pool->dirty_count) >=  pool->max_items * 9 / 10) {
+		if (pool->pool_type == RDS_IB_MR_8K_POOL)
+			pool = rds_ibdev->mr_1m_pool;
+		else
+			pool = rds_ibdev->mr_8k_pool;
+	}
+
 	ibmr = rds_ib_try_reuse_ibmr(pool);
 	if (ibmr)
 		return ibmr;
@@ -108,8 +119,8 @@
 	page_cnt = 0;
 
 	for (i = 0; i < sg_dma_len; ++i) {
-		unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]);
-		u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
+		unsigned int dma_len = sg_dma_len(&scat[i]);
+		u64 dma_addr = sg_dma_address(&scat[i]);
 
 		if (dma_addr & ~PAGE_MASK) {
 			if (i > 0) {
@@ -148,8 +159,8 @@
 
 	page_cnt = 0;
 	for (i = 0; i < sg_dma_len; ++i) {
-		unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]);
-		u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
+		unsigned int dma_len = sg_dma_len(&scat[i]);
+		u64 dma_addr = sg_dma_address(&scat[i]);
 
 		for (j = 0; j < dma_len; j += PAGE_SIZE)
 			dma_pages[page_cnt++] =

diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c
index 6431a02..06ecf9d 100644
--- a/net/rds/ib_frmr.c
+++ b/net/rds/ib_frmr.c

@@ -32,6 +32,24 @@
 
 #include "ib_mr.h"
 
+static inline void
+rds_transition_frwr_state(struct rds_ib_mr *ibmr,
+			  enum rds_ib_fr_state old_state,
+			  enum rds_ib_fr_state new_state)
+{
+	if (cmpxchg(&ibmr->u.frmr.fr_state,
+		    old_state, new_state) == old_state &&
+	    old_state == FRMR_IS_INUSE) {
+		/* enforce order of ibmr->u.frmr.fr_state update
+		 * before decrementing i_fastreg_inuse_count
+		 */
+		smp_mb__before_atomic();
+		atomic_dec(&ibmr->ic->i_fastreg_inuse_count);
+		if (waitqueue_active(&rds_ib_ring_empty_wait))
+			wake_up(&rds_ib_ring_empty_wait);
+	}
+}
+
 static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev,
 					   int npages)
 {
@@ -75,6 +93,8 @@
 		pool->max_items_soft = pool->max_items;
 
 	frmr->fr_state = FRMR_IS_FREE;
+	init_waitqueue_head(&frmr->fr_inv_done);
+	init_waitqueue_head(&frmr->fr_reg_done);
 	return ibmr;
 
 out_no_cigar:
@@ -116,13 +136,19 @@
 	if (unlikely(ret != ibmr->sg_len))
 		return ret < 0 ? ret : -EINVAL;
 
+	if (cmpxchg(&frmr->fr_state,
+		    FRMR_IS_FREE, FRMR_IS_INUSE) != FRMR_IS_FREE)
+		return -EBUSY;
+
+	atomic_inc(&ibmr->ic->i_fastreg_inuse_count);
+
 	/* Perform a WR for the fast_reg_mr. Each individual page
 	 * in the sg list is added to the fast reg page list and placed
 	 * inside the fast_reg_mr WR.  The key used is a rolling 8bit
 	 * counter, which should guarantee uniqueness.
 	 */
 	ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++);
-	frmr->fr_state = FRMR_IS_INUSE;
+	frmr->fr_reg = true;
 
 	memset(&reg_wr, 0, sizeof(reg_wr));
 	reg_wr.wr.wr_id = (unsigned long)(void *)ibmr;
@@ -138,12 +164,23 @@
 	ret = ib_post_send(ibmr->ic->i_cm_id->qp, &reg_wr.wr, NULL);
 	if (unlikely(ret)) {
 		/* Failure here can be because of -ENOMEM as well */
-		frmr->fr_state = FRMR_IS_STALE;
+		rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
+
 		atomic_inc(&ibmr->ic->i_fastreg_wrs);
 		if (printk_ratelimit())
 			pr_warn("RDS/IB: %s returned error(%d)\n",
 				__func__, ret);
+		goto out;
 	}
+
+	/* Wait for the registration to complete in order to prevent an invalid
+	 * access error resulting from a race between the memory region already
+	 * being accessed while registration is still pending.
+	 */
+	wait_event(frmr->fr_reg_done, !frmr->fr_reg);
+
+out:
+
 	return ret;
 }
 
@@ -181,8 +218,8 @@
 
 	ret = -EINVAL;
 	for (i = 0; i < ibmr->sg_dma_len; ++i) {
-		unsigned int dma_len = ib_sg_dma_len(dev, &ibmr->sg[i]);
-		u64 dma_addr = ib_sg_dma_address(dev, &ibmr->sg[i]);
+		unsigned int dma_len = sg_dma_len(&ibmr->sg[i]);
+		u64 dma_addr = sg_dma_address(&ibmr->sg[i]);
 
 		frmr->sg_byte_len += dma_len;
 		if (dma_addr & ~PAGE_MASK) {
@@ -239,8 +276,8 @@
 	if (frmr->fr_state != FRMR_IS_INUSE)
 		goto out;
 
-	while (atomic_dec_return(&ibmr->ic->i_fastunreg_wrs) <= 0) {
-		atomic_inc(&ibmr->ic->i_fastunreg_wrs);
+	while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) {
+		atomic_inc(&ibmr->ic->i_fastreg_wrs);
 		cpu_relax();
 	}
 
@@ -255,12 +292,29 @@
 
 	ret = ib_post_send(i_cm_id->qp, s_wr, NULL);
 	if (unlikely(ret)) {
-		frmr->fr_state = FRMR_IS_STALE;
+		rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
 		frmr->fr_inv = false;
-		atomic_inc(&ibmr->ic->i_fastunreg_wrs);
+		/* enforce order of frmr->fr_inv update
+		 * before incrementing i_fastreg_wrs
+		 */
+		smp_mb__before_atomic();
+		atomic_inc(&ibmr->ic->i_fastreg_wrs);
 		pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret);
 		goto out;
 	}
+
+	/* Wait for the FRMR_IS_FREE (or FRMR_IS_STALE) transition in order to
+	 * 1) avoid a silly bouncing between "clean_list" and "drop_list"
+	 *    triggered by function "rds_ib_reg_frmr" as it is releases frmr
+	 *    regions whose state is not "FRMR_IS_FREE" right away.
+	 * 2) prevents an invalid access error in a race
+	 *    from a pending "IB_WR_LOCAL_INV" operation
+	 *    with a teardown ("dma_unmap_sg", "put_page")
+	 *    and de-registration ("ib_dereg_mr") of the corresponding
+	 *    memory region.
+	 */
+	wait_event(frmr->fr_inv_done, frmr->fr_state != FRMR_IS_INUSE);
+
 out:
 	return ret;
 }
@@ -271,7 +325,7 @@
 	struct rds_ib_frmr *frmr = &ibmr->u.frmr;
 
 	if (wc->status != IB_WC_SUCCESS) {
-		frmr->fr_state = FRMR_IS_STALE;
+		rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
 		if (rds_conn_up(ic->conn))
 			rds_ib_conn_error(ic->conn,
 					  "frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n",
@@ -283,12 +337,21 @@
 	}
 
 	if (frmr->fr_inv) {
-		frmr->fr_state = FRMR_IS_FREE;
+		rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_FREE);
 		frmr->fr_inv = false;
-		atomic_inc(&ic->i_fastreg_wrs);
-	} else {
-		atomic_inc(&ic->i_fastunreg_wrs);
+		wake_up(&frmr->fr_inv_done);
 	}
+
+	if (frmr->fr_reg) {
+		frmr->fr_reg = false;
+		wake_up(&frmr->fr_reg_done);
+	}
+
+	/* enforce order of frmr->{fr_reg,fr_inv} update
+	 * before incrementing i_fastreg_wrs
+	 */
+	smp_mb__before_atomic();
+	atomic_inc(&ic->i_fastreg_wrs);
 }
 
 void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed,
@@ -296,14 +359,18 @@
 {
 	struct rds_ib_mr *ibmr, *next;
 	struct rds_ib_frmr *frmr;
-	int ret = 0;
+	int ret = 0, ret2;
 	unsigned int freed = *nfreed;
 
 	/* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
 	list_for_each_entry(ibmr, list, unmap_list) {
-		if (ibmr->sg_dma_len)
-			ret |= rds_ib_post_inv(ibmr);
+		if (ibmr->sg_dma_len) {
+			ret2 = rds_ib_post_inv(ibmr);
+			if (ret2 && !ret)
+				ret = ret2;
+		}
 	}
+
 	if (ret)
 		pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret);
 

diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h
index 5da12c2..9045a8c 100644
--- a/net/rds/ib_mr.h
+++ b/net/rds/ib_mr.h

@@ -57,6 +57,9 @@
 	struct ib_mr		*mr;
 	enum rds_ib_fr_state	fr_state;
 	bool			fr_inv;
+	wait_queue_head_t	fr_inv_done;
+	bool			fr_reg;
+	wait_queue_head_t	fr_reg_done;
 	struct ib_send_wr	fr_wr;
 	unsigned int		dma_npages;
 	unsigned int		sg_byte_len;
@@ -97,6 +100,7 @@
 	struct llist_head	free_list;	/* unused MRs */
 	struct llist_head	clean_list;	/* unused & unmapped MRs */
 	wait_queue_head_t	flush_wait;
+	spinlock_t		clean_lock;	/* "clean_list" concurrency */
 
 	atomic_t		free_pinned;	/* memory pinned by free MRs */
 	unsigned long		max_items;

diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 63c8d10..c8c1e3a 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c

@@ -40,9 +40,6 @@
 
 struct workqueue_struct *rds_ib_mr_wq;
 
-static DEFINE_PER_CPU(unsigned long, clean_list_grace);
-#define CLEAN_LIST_BUSY_BIT 0
-
 static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
 {
 	struct rds_ib_device *rds_ibdev;
@@ -195,12 +192,11 @@
 {
 	struct rds_ib_mr *ibmr = NULL;
 	struct llist_node *ret;
-	unsigned long *flag;
+	unsigned long flags;
 
-	preempt_disable();
-	flag = this_cpu_ptr(&clean_list_grace);
-	set_bit(CLEAN_LIST_BUSY_BIT, flag);
+	spin_lock_irqsave(&pool->clean_lock, flags);
 	ret = llist_del_first(&pool->clean_list);
+	spin_unlock_irqrestore(&pool->clean_lock, flags);
 	if (ret) {
 		ibmr = llist_entry(ret, struct rds_ib_mr, llnode);
 		if (pool->pool_type == RDS_IB_MR_8K_POOL)
@@ -209,23 +205,9 @@
 			rds_ib_stats_inc(s_ib_rdma_mr_1m_reused);
 	}
 
-	clear_bit(CLEAN_LIST_BUSY_BIT, flag);
-	preempt_enable();
 	return ibmr;
 }
 
-static inline void wait_clean_list_grace(void)
-{
-	int cpu;
-	unsigned long *flag;
-
-	for_each_online_cpu(cpu) {
-		flag = &per_cpu(clean_list_grace, cpu);
-		while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
-			cpu_relax();
-	}
-}
-
 void rds_ib_sync_mr(void *trans_private, int direction)
 {
 	struct rds_ib_mr *ibmr = trans_private;
@@ -324,8 +306,7 @@
  * of clusters.  Each cluster has linked llist nodes of
  * MR_CLUSTER_SIZE mrs that are ready for reuse.
  */
-static void list_to_llist_nodes(struct rds_ib_mr_pool *pool,
-				struct list_head *list,
+static void list_to_llist_nodes(struct list_head *list,
 				struct llist_node **nodes_head,
 				struct llist_node **nodes_tail)
 {
@@ -402,8 +383,13 @@
 	 */
 	dirty_to_clean = llist_append_to_list(&pool->drop_list, &unmap_list);
 	dirty_to_clean += llist_append_to_list(&pool->free_list, &unmap_list);
-	if (free_all)
+	if (free_all) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&pool->clean_lock, flags);
 		llist_append_to_list(&pool->clean_list, &unmap_list);
+		spin_unlock_irqrestore(&pool->clean_lock, flags);
+	}
 
 	free_goal = rds_ib_flush_goal(pool, free_all);
 
@@ -416,25 +402,20 @@
 		rds_ib_unreg_fmr(&unmap_list, &nfreed, &unpinned, free_goal);
 
 	if (!list_empty(&unmap_list)) {
-		/* we have to make sure that none of the things we're about
-		 * to put on the clean list would race with other cpus trying
-		 * to pull items off.  The llist would explode if we managed to
-		 * remove something from the clean list and then add it back again
-		 * while another CPU was spinning on that same item in llist_del_first.
-		 *
-		 * This is pretty unlikely, but just in case  wait for an llist grace period
-		 * here before adding anything back into the clean list.
-		 */
-		wait_clean_list_grace();
+		unsigned long flags;
 
-		list_to_llist_nodes(pool, &unmap_list, &clean_nodes, &clean_tail);
-		if (ibmr_ret)
+		list_to_llist_nodes(&unmap_list, &clean_nodes, &clean_tail);
+		if (ibmr_ret) {
 			*ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode);
-
+			clean_nodes = clean_nodes->next;
+		}
 		/* more than one entry in llist nodes */
-		if (clean_nodes->next)
-			llist_add_batch(clean_nodes->next, clean_tail, &pool->clean_list);
-
+		if (clean_nodes) {
+			spin_lock_irqsave(&pool->clean_lock, flags);
+			llist_add_batch(clean_nodes, clean_tail,
+					&pool->clean_list);
+			spin_unlock_irqrestore(&pool->clean_lock, flags);
+		}
 	}
 
 	atomic_sub(unpinned, &pool->free_pinned);
@@ -454,9 +435,6 @@
 	struct rds_ib_mr *ibmr = NULL;
 	int iter = 0;
 
-	if (atomic_read(&pool->dirty_count) >= pool->max_items_soft / 10)
-		queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
-
 	while (1) {
 		ibmr = rds_ib_reuse_mr(pool);
 		if (ibmr)
@@ -472,7 +450,7 @@
 				rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_depleted);
 			else
 				rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_depleted);
-			return ERR_PTR(-EAGAIN);
+			break;
 		}
 
 		/* We do have some empty MRs. Flush them out. */
@@ -486,7 +464,7 @@
 			return ibmr;
 	}
 
-	return ibmr;
+	return NULL;
 }
 
 static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
@@ -611,6 +589,7 @@
 	init_llist_head(&pool->free_list);
 	init_llist_head(&pool->drop_list);
 	init_llist_head(&pool->clean_list);
+	spin_lock_init(&pool->clean_lock);
 	mutex_init(&pool->flush_lock);
 	init_waitqueue_head(&pool->flush_wait);
 	INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);

diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 2f16146..a0f99bb 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c

@@ -168,6 +168,7 @@
 		list_del(&inc->ii_cache_entry);
 		WARN_ON(!list_empty(&inc->ii_frags));
 		kmem_cache_free(rds_ib_incoming_slab, inc);
+		atomic_dec(&rds_ib_allocation);
 	}
 
 	rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
@@ -346,8 +347,8 @@
 	sge->length = sizeof(struct rds_header);
 
 	sge = &recv->r_sge[1];
-	sge->addr = ib_sg_dma_address(ic->i_cm_id->device, &recv->r_frag->f_sg);
-	sge->length = ib_sg_dma_len(ic->i_cm_id->device, &recv->r_frag->f_sg);
+	sge->addr = sg_dma_address(&recv->r_frag->f_sg);
+	sge->length = sg_dma_len(&recv->r_frag->f_sg);
 
 	ret = 0;
 out:
@@ -384,6 +385,7 @@
 	unsigned int posted = 0;
 	int ret = 0;
 	bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM);
+	bool must_wake = false;
 	u32 pos;
 
 	/* the goal here is to just make sure that someone, somewhere
@@ -404,14 +406,13 @@
 		recv = &ic->i_recvs[pos];
 		ret = rds_ib_recv_refill_one(conn, recv, gfp);
 		if (ret) {
+			must_wake = true;
 			break;
 		}
 
 		rdsdebug("recv %p ibinc %p page %p addr %lu\n", recv,
 			 recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
-			 (long) ib_sg_dma_address(
-				ic->i_cm_id->device,
-				&recv->r_frag->f_sg));
+			 (long)sg_dma_address(&recv->r_frag->f_sg));
 
 		/* XXX when can this fail? */
 		ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, NULL);
@@ -424,6 +425,11 @@
 		}
 
 		posted++;
+
+		if ((posted > 128 && need_resched()) || posted > 8192) {
+			must_wake = true;
+			break;
+		}
 	}
 
 	/* We're doing flow control - update the window. */
@@ -446,10 +452,13 @@
 	 * if we should requeue.
 	 */
 	if (rds_conn_up(conn) &&
-	    ((can_wait && rds_ib_ring_low(&ic->i_recv_ring)) ||
+	    (must_wake ||
+	    (can_wait && rds_ib_ring_low(&ic->i_recv_ring)) ||
 	    rds_ib_ring_empty(&ic->i_recv_ring))) {
 		queue_delayed_work(rds_wq, &conn->c_recv_w, 1);
 	}
+	if (can_wait)
+		cond_resched();
 }
 
 /*
@@ -774,7 +783,7 @@
 	unsigned long frag_off;
 	unsigned long to_copy;
 	unsigned long copied;
-	uint64_t uncongested = 0;
+	__le64 uncongested = 0;
 	void *addr;
 
 	/* catch completely corrupt packets */
@@ -791,7 +800,7 @@
 	copied = 0;
 
 	while (copied < RDS_CONG_MAP_BYTES) {
-		uint64_t *src, *dst;
+		__le64 *src, *dst;
 		unsigned int k;
 
 		to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
@@ -826,9 +835,7 @@
 	}
 
 	/* the congestion map is in little endian order */
-	uncongested = le64_to_cpu(uncongested);
-
-	rds_cong_map_updated(map, uncongested);
+	rds_cong_map_updated(map, le64_to_cpu(uncongested));
 }
 
 static void rds_ib_process_recv(struct rds_connection *conn,
@@ -986,9 +993,9 @@
 	} else {
 		/* We expect errors as the qp is drained during shutdown */
 		if (rds_conn_up(conn) || rds_conn_connecting(conn))
-			rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n",
+			rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), disconnecting and reconnecting\n",
 					  &conn->c_laddr, &conn->c_faddr,
-					  wc->status,
+					  conn->c_tos, wc->status,
 					  ib_wc_status_msg(wc->status));
 	}
 
@@ -1041,9 +1048,14 @@
 	si_meminfo(&si);
 	rds_ib_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE;
 
-	rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming",
-					sizeof(struct rds_ib_incoming),
-					0, SLAB_HWCACHE_ALIGN, NULL);
+	rds_ib_incoming_slab =
+		kmem_cache_create_usercopy("rds_ib_incoming",
+					   sizeof(struct rds_ib_incoming),
+					   0, SLAB_HWCACHE_ALIGN,
+					   offsetof(struct rds_ib_incoming,
+						    ii_inc.i_usercopy),
+					   sizeof(struct rds_inc_usercopy),
+					   NULL);
 	if (!rds_ib_incoming_slab)
 		goto out;
 
@@ -1061,6 +1073,8 @@
 
 void rds_ib_recv_exit(void)
 {
+	WARN_ON(atomic_read(&rds_ib_allocation));
+
 	kmem_cache_destroy(rds_ib_incoming_slab);
 	kmem_cache_destroy(rds_ib_frag_slab);
 }

diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 2dcb555..dfe6237 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c

@@ -69,6 +69,16 @@
 	complete(rm, notify_status);
 }
 
+static void rds_ib_send_unmap_data(struct rds_ib_connection *ic,
+				   struct rm_data_op *op,
+				   int wc_status)
+{
+	if (op->op_nents)
+		ib_dma_unmap_sg(ic->i_cm_id->device,
+				op->op_sg, op->op_nents,
+				DMA_TO_DEVICE);
+}
+
 static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic,
 				   struct rm_rdma_op *op,
 				   int wc_status)
@@ -129,21 +139,6 @@
 		rds_ib_stats_inc(s_ib_atomic_fadd);
 }
 
-static void rds_ib_send_unmap_data(struct rds_ib_connection *ic,
-				   struct rm_data_op *op,
-				   int wc_status)
-{
-	struct rds_message *rm = container_of(op, struct rds_message, data);
-
-	if (op->op_nents)
-		ib_dma_unmap_sg(ic->i_cm_id->device,
-				op->op_sg, op->op_nents,
-				DMA_TO_DEVICE);
-
-	if (rm->rdma.op_active && rm->data.op_notify)
-		rds_ib_send_unmap_rdma(ic, &rm->rdma, wc_status);
-}
-
 /*
  * Unmap the resources associated with a struct send_work.
  *
@@ -305,8 +300,9 @@
 
 	/* We expect errors as the qp is drained during shutdown */
 	if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) {
-		rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n",
-				  &conn->c_laddr, &conn->c_faddr, wc->status,
+		rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), disconnecting and reconnecting\n",
+				  &conn->c_laddr, &conn->c_faddr,
+				  conn->c_tos, wc->status,
 				  ib_wc_status_msg(wc->status));
 	}
 }
@@ -522,7 +518,7 @@
 	if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0)
 		i = 1;
 	else
-		i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE);
+		i = DIV_ROUND_UP(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE);
 
 	work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos);
 	if (work_alloc == 0) {
@@ -645,16 +641,16 @@
 		if (i < work_alloc
 		    && scat != &rm->data.op_sg[rm->data.op_count]) {
 			len = min(RDS_FRAG_SIZE,
-				ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff);
+				  sg_dma_len(scat) - rm->data.op_dmaoff);
 			send->s_wr.num_sge = 2;
 
-			send->s_sge[1].addr = ib_sg_dma_address(dev, scat);
+			send->s_sge[1].addr = sg_dma_address(scat);
 			send->s_sge[1].addr += rm->data.op_dmaoff;
 			send->s_sge[1].length = len;
 
 			bytes_sent += len;
 			rm->data.op_dmaoff += len;
-			if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) {
+			if (rm->data.op_dmaoff == sg_dma_len(scat)) {
 				scat++;
 				rm->data.op_dmasg++;
 				rm->data.op_dmaoff = 0;
@@ -808,8 +804,8 @@
 	}
 
 	/* Convert our struct scatterlist to struct ib_sge */
-	send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg);
-	send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg);
+	send->s_sge[0].addr = sg_dma_address(op->op_sg);
+	send->s_sge[0].length = sg_dma_len(op->op_sg);
 	send->s_sge[0].lkey = ic->i_pd->local_dma_lkey;
 
 	rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr,
@@ -879,7 +875,7 @@
 	 * Instead of knowing how to return a partial rdma read/write we insist that there
 	 * be enough work requests to send the entire message.
 	 */
-	i = ceil(op->op_count, max_sge);
+	i = DIV_ROUND_UP(op->op_count, max_sge);
 
 	work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos);
 	if (work_alloc != i) {
@@ -901,7 +897,9 @@
 		send->s_queued = jiffies;
 		send->s_op = NULL;
 
-		nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify);
+		if (!op->op_notify)
+			nr_sig += rds_ib_set_wr_signal_state(ic, send,
+							     op->op_notify);
 
 		send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
 		send->s_rdma_wr.remote_addr = remote_addr;
@@ -921,9 +919,8 @@
 
 		for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
 		     scat != &op->op_sg[op->op_count]; j++) {
-			len = ib_sg_dma_len(ic->i_cm_id->device, scat);
-			send->s_sge[j].addr =
-				 ib_sg_dma_address(ic->i_cm_id->device, scat);
+			len = sg_dma_len(scat);
+			send->s_sge[j].addr = sg_dma_address(scat);
 			send->s_sge[j].length = len;
 			send->s_sge[j].lkey = ic->i_pd->local_dma_lkey;
 

diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c
index 9252ad1..ac46d89 100644
--- a/net/rds/ib_stats.c
+++ b/net/rds/ib_stats.c

@@ -42,7 +42,7 @@
 static const char *const rds_ib_stat_names[] = {
 	"ib_connect_raced",
 	"ib_listen_closed_stale",
-	"s_ib_evt_handler_call",
+	"ib_evt_handler_call",
 	"ib_tasklet_call",
 	"ib_tx_cq_event",
 	"ib_tx_ring_full",

diff --git a/net/rds/info.c b/net/rds/info.c
index e367a97..03f6fd5 100644
--- a/net/rds/info.c
+++ b/net/rds/info.c

@@ -193,7 +193,7 @@
 		ret = -ENOMEM;
 		goto out;
 	}
-	ret = get_user_pages_fast(start, nr_pages, 1, pages);
+	ret = get_user_pages_fast(start, nr_pages, FOLL_WRITE, pages);
 	if (ret != nr_pages) {
 		if (ret > 0)
 			nr_pages = ret;

diff --git a/net/rds/message.c b/net/rds/message.c
index 4b00b11..50f13f1 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c

@@ -308,17 +308,28 @@
 /*
  * RDS ops use this to grab SG entries from the rm's sg pool.
  */
-struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents)
+struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents,
+					  int *ret)
 {
 	struct scatterlist *sg_first = (struct scatterlist *) &rm[1];
 	struct scatterlist *sg_ret;
 
-	WARN_ON(rm->m_used_sgs + nents > rm->m_total_sgs);
-	WARN_ON(!nents);
-
-	if (rm->m_used_sgs + nents > rm->m_total_sgs)
+	if (WARN_ON(!ret))
 		return NULL;
 
+	if (nents <= 0) {
+		pr_warn("rds: alloc sgs failed! nents <= 0\n");
+		*ret = -EINVAL;
+		return NULL;
+	}
+
+	if (rm->m_used_sgs + nents > rm->m_total_sgs) {
+		pr_warn("rds: alloc sgs failed! total %d used %d nents %d\n",
+			rm->m_total_sgs, rm->m_used_sgs, nents);
+		*ret = -ENOMEM;
+		return NULL;
+	}
+
 	sg_ret = &sg_first[rm->m_used_sgs];
 	sg_init_table(sg_ret, nents);
 	rm->m_used_sgs += nents;
@@ -330,8 +341,9 @@
 {
 	struct rds_message *rm;
 	unsigned int i;
-	int num_sgs = ceil(total_len, PAGE_SIZE);
+	int num_sgs = DIV_ROUND_UP(total_len, PAGE_SIZE);
 	int extra_bytes = num_sgs * sizeof(struct scatterlist);
+	int ret;
 
 	rm = rds_message_alloc(extra_bytes, GFP_NOWAIT);
 	if (!rm)
@@ -339,11 +351,11 @@
 
 	set_bit(RDS_MSG_PAGEVEC, &rm->m_flags);
 	rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
-	rm->data.op_nents = ceil(total_len, PAGE_SIZE);
-	rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
+	rm->data.op_nents = DIV_ROUND_UP(total_len, PAGE_SIZE);
+	rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs, &ret);
 	if (!rm->data.op_sg) {
 		rds_message_put(rm);
-		return ERR_PTR(-ENOMEM);
+		return ERR_PTR(ret);
 	}
 
 	for (i = 0; i < rm->data.op_nents; ++i) {

diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 98237fe..916f5ec 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c

@@ -158,7 +158,8 @@
 {
 	int ret;
 
-	ret = get_user_pages_fast(user_addr, nr_pages, write, pages);
+	ret = get_user_pages_fast(user_addr, nr_pages, write ? FOLL_WRITE : 0,
+				  pages);
 
 	if (ret >= 0 && ret < nr_pages) {
 		while (ret--)
@@ -517,9 +518,10 @@
 	return tot_pages;
 }
 
-int rds_rdma_extra_size(struct rds_rdma_args *args)
+int rds_rdma_extra_size(struct rds_rdma_args *args,
+			struct rds_iov_vector *iov)
 {
-	struct rds_iovec vec;
+	struct rds_iovec *vec;
 	struct rds_iovec __user *local_vec;
 	int tot_pages = 0;
 	unsigned int nr_pages;
@@ -530,13 +532,23 @@
 	if (args->nr_local == 0)
 		return -EINVAL;
 
-	/* figure out the number of pages in the vector */
-	for (i = 0; i < args->nr_local; i++) {
-		if (copy_from_user(&vec, &local_vec[i],
-				   sizeof(struct rds_iovec)))
-			return -EFAULT;
+	iov->iov = kcalloc(args->nr_local,
+			   sizeof(struct rds_iovec),
+			   GFP_KERNEL);
+	if (!iov->iov)
+		return -ENOMEM;
 
-		nr_pages = rds_pages_in_vec(&vec);
+	vec = &iov->iov[0];
+
+	if (copy_from_user(vec, local_vec, args->nr_local *
+			   sizeof(struct rds_iovec)))
+		return -EFAULT;
+	iov->len = args->nr_local;
+
+	/* figure out the number of pages in the vector */
+	for (i = 0; i < args->nr_local; i++, vec++) {
+
+		nr_pages = rds_pages_in_vec(vec);
 		if (nr_pages == 0)
 			return -EINVAL;
 
@@ -558,15 +570,15 @@
  * Extract all arguments and set up the rdma_op
  */
 int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
-			  struct cmsghdr *cmsg)
+		       struct cmsghdr *cmsg,
+		       struct rds_iov_vector *vec)
 {
 	struct rds_rdma_args *args;
 	struct rm_rdma_op *op = &rm->rdma;
 	int nr_pages;
 	unsigned int nr_bytes;
 	struct page **pages = NULL;
-	struct rds_iovec iovstack[UIO_FASTIOV], *iovs = iovstack;
-	int iov_size;
+	struct rds_iovec *iovs;
 	unsigned int i, j;
 	int ret = 0;
 
@@ -586,31 +598,23 @@
 		goto out_ret;
 	}
 
-	/* Check whether to allocate the iovec area */
-	iov_size = args->nr_local * sizeof(struct rds_iovec);
-	if (args->nr_local > UIO_FASTIOV) {
-		iovs = sock_kmalloc(rds_rs_to_sk(rs), iov_size, GFP_KERNEL);
-		if (!iovs) {
-			ret = -ENOMEM;
-			goto out_ret;
-		}
+	if (vec->len != args->nr_local) {
+		ret = -EINVAL;
+		goto out_ret;
 	}
 
-	if (copy_from_user(iovs, (struct rds_iovec __user *)(unsigned long) args->local_vec_addr, iov_size)) {
-		ret = -EFAULT;
-		goto out;
-	}
+	iovs = vec->iov;
 
 	nr_pages = rds_rdma_pages(iovs, args->nr_local);
 	if (nr_pages < 0) {
 		ret = -EINVAL;
-		goto out;
+		goto out_ret;
 	}
 
 	pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
 	if (!pages) {
 		ret = -ENOMEM;
-		goto out;
+		goto out_ret;
 	}
 
 	op->op_write = !!(args->flags & RDS_RDMA_READWRITE);
@@ -620,11 +624,9 @@
 	op->op_active = 1;
 	op->op_recverr = rs->rs_recverr;
 	WARN_ON(!nr_pages);
-	op->op_sg = rds_message_alloc_sgs(rm, nr_pages);
-	if (!op->op_sg) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	op->op_sg = rds_message_alloc_sgs(rm, nr_pages, &ret);
+	if (!op->op_sg)
+		goto out_pages;
 
 	if (op->op_notify || op->op_recverr) {
 		/* We allocate an uninitialized notifier here, because
@@ -635,20 +637,10 @@
 		op->op_notifier = kmalloc(sizeof(struct rds_notifier), GFP_KERNEL);
 		if (!op->op_notifier) {
 			ret = -ENOMEM;
-			goto out;
+			goto out_pages;
 		}
 		op->op_notifier->n_user_token = args->user_token;
 		op->op_notifier->n_status = RDS_RDMA_SUCCESS;
-
-		/* Enable rmda notification on data operation for composite
-		 * rds messages and make sure notification is enabled only
-		 * for the data operation which follows it so that application
-		 * gets notified only after full message gets delivered.
-		 */
-		if (rm->data.op_sg) {
-			rm->rdma.op_notify = 0;
-			rm->data.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
-		}
 	}
 
 	/* The cookie contains the R_Key of the remote memory region, and
@@ -681,7 +673,7 @@
 		 */
 		ret = rds_pin_pages(iov->addr, nr, pages, !op->op_write);
 		if (ret < 0)
-			goto out;
+			goto out_pages;
 		else
 			ret = 0;
 
@@ -714,13 +706,11 @@
 				nr_bytes,
 				(unsigned int) args->remote_vec.bytes);
 		ret = -EINVAL;
-		goto out;
+		goto out_pages;
 	}
 	op->op_bytes = nr_bytes;
 
-out:
-	if (iovs != iovstack)
-		sock_kfree_s(rds_rs_to_sk(rs), iovs, iov_size);
+out_pages:
 	kfree(pages);
 out_ret:
 	if (ret)
@@ -838,11 +828,9 @@
 	rm->atomic.op_silent = !!(args->flags & RDS_RDMA_SILENT);
 	rm->atomic.op_active = 1;
 	rm->atomic.op_recverr = rs->rs_recverr;
-	rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1);
-	if (!rm->atomic.op_sg) {
-		ret = -ENOMEM;
+	rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1, &ret);
+	if (!rm->atomic.op_sg)
 		goto err;
-	}
 
 	/* verify 8 byte-aligned */
 	if (args->local_addr & 0x7) {

diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 6b0f57c..5f741e5 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c

@@ -43,6 +43,9 @@
 static struct rdma_cm_id *rds6_rdma_listen_id;
 #endif
 
+/* Per IB specification 7.7.3, service level is a 4-bit field. */
+#define TOS_TO_SL(tos)		((tos) & 0xF)
+
 static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
 					 struct rdma_cm_event *event,
 					 bool isv6)
@@ -51,6 +54,8 @@
 	struct rds_connection *conn = cm_id->context;
 	struct rds_transport *trans;
 	int ret = 0;
+	int *err;
+	u8 len;
 
 	rdsdebug("conn %p id %p handling event %u (%s)\n", conn, cm_id,
 		 event->event, rdma_event_msg(event->event));
@@ -81,6 +86,7 @@
 		break;
 
 	case RDMA_CM_EVENT_ADDR_RESOLVED:
+		rdma_set_service_type(cm_id, conn->c_tos);
 		/* XXX do we need to clean up if this fails? */
 		ret = rdma_resolve_route(cm_id,
 					 RDS_RDMA_RESOLVE_TIMEOUT_MS);
@@ -94,21 +100,39 @@
 			struct rds_ib_connection *ibic;
 
 			ibic = conn->c_transport_data;
-			if (ibic && ibic->i_cm_id == cm_id)
+			if (ibic && ibic->i_cm_id == cm_id) {
+				cm_id->route.path_rec[0].sl =
+					TOS_TO_SL(conn->c_tos);
 				ret = trans->cm_initiate_connect(cm_id, isv6);
-			else
+			} else {
 				rds_conn_drop(conn);
+			}
 		}
 		break;
 
 	case RDMA_CM_EVENT_ESTABLISHED:
-		trans->cm_connect_complete(conn, event);
+		if (conn)
+			trans->cm_connect_complete(conn, event);
 		break;
 
 	case RDMA_CM_EVENT_REJECTED:
+		if (!conn)
+			break;
+		err = (int *)rdma_consumer_reject_data(cm_id, event, &len);
+		if (!err ||
+		    (err && len >= sizeof(*err) &&
+		     ((*err) <= RDS_RDMA_REJ_INCOMPAT))) {
+			pr_warn("RDS/RDMA: conn <%pI6c, %pI6c> rejected, dropping connection\n",
+				&conn->c_laddr, &conn->c_faddr);
+
+			if (!conn->c_tos)
+				conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION;
+
+			rds_conn_drop(conn);
+		}
 		rdsdebug("Connection rejected: %s\n",
 			 rdma_reject_msg(cm_id, event->status));
-		/* FALLTHROUGH */
+		break;
 	case RDMA_CM_EVENT_ADDR_ERROR:
 	case RDMA_CM_EVENT_ROUTE_ERROR:
 	case RDMA_CM_EVENT_CONNECT_ERROR:
@@ -120,6 +144,8 @@
 		break;
 
 	case RDMA_CM_EVENT_DISCONNECTED:
+		if (!conn)
+			break;
 		rdsdebug("DISCONNECT event - dropping connection "
 			 "%pI6c->%pI6c\n", &conn->c_laddr,
 			 &conn->c_faddr);

diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h
index 200d313..bfafd4a 100644
--- a/net/rds/rdma_transport.h
+++ b/net/rds/rdma_transport.h

@@ -11,6 +11,12 @@
 
 #define RDS_RDMA_RESOLVE_TIMEOUT_MS     5000
 
+/* Below reject reason is for legacy interoperability issue with non-linux
+ * RDS endpoints where older version incompatibility is conveyed via value 1.
+ * For future version(s), proper encoded reject reason should be be used.
+ */
+#define RDS_RDMA_REJ_INCOMPAT		1
+
 int rds_rdma_conn_connect(struct rds_connection *conn);
 int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
 			      struct rdma_cm_event *event);

diff --git a/net/rds/rds.h b/net/rds/rds.h
index c4dcf65..53e8691 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h

@@ -19,10 +19,13 @@
  */
 #define RDS_PROTOCOL_3_0	0x0300
 #define RDS_PROTOCOL_3_1	0x0301
+#define RDS_PROTOCOL_4_0	0x0400
+#define RDS_PROTOCOL_4_1	0x0401
 #define RDS_PROTOCOL_VERSION	RDS_PROTOCOL_3_1
 #define RDS_PROTOCOL_MAJOR(v)	((v) >> 8)
 #define RDS_PROTOCOL_MINOR(v)	((v) & 255)
 #define RDS_PROTOCOL(maj, min)	(((maj) << 8) | min)
+#define RDS_PROTOCOL_COMPAT_VERSION	RDS_PROTOCOL_3_1
 
 /* The following ports, 16385, 18634, 18635, are registered with IANA as
  * the ports to be used for RDS over TCP and UDP.  Currently, only RDS over
@@ -48,10 +51,6 @@
 }
 #endif
 
-/* XXX is there one of these somewhere? */
-#define ceil(x, y) \
-	({ unsigned long __x = (x), __y = (y); (__x + __y - 1) / __y; })
-
 #define RDS_FRAG_SHIFT	12
 #define RDS_FRAG_SIZE	((unsigned int)(1 << RDS_FRAG_SHIFT))
 
@@ -155,9 +154,13 @@
 	struct rds_cong_map	*c_fcong;
 
 	/* Protocol version */
+	unsigned int		c_proposed_version;
 	unsigned int		c_version;
 	possible_net_t		c_net;
 
+	/* TOS */
+	u8			c_tos;
+
 	struct list_head	c_map_item;
 	unsigned long		c_map_queued;
 
@@ -268,6 +271,12 @@
 #define	RDS_MSG_RX_END		2
 #define	RDS_MSG_RX_CMSG		3
 
+/* The following values are whitelisted for usercopy */
+struct rds_inc_usercopy {
+	rds_rdma_cookie_t	rdma_cookie;
+	ktime_t			rx_tstamp;
+};
+
 struct rds_incoming {
 	refcount_t		i_refcount;
 	struct list_head	i_item;
@@ -277,8 +286,7 @@
 	unsigned long		i_rx_jiffies;
 	struct in6_addr		i_saddr;
 
-	rds_rdma_cookie_t	i_rdma_cookie;
-	struct timeval		i_rx_tstamp;
+	struct rds_inc_usercopy i_usercopy;
 	u64			i_rx_lat_trace[RDS_RX_MAX_TRACES];
 };
 
@@ -386,6 +394,18 @@
 	INIT_LIST_HEAD(&q->zcookie_head);
 }
 
+struct rds_iov_vector {
+	struct rds_iovec *iov;
+	int               len;
+};
+
+struct rds_iov_vector_arr {
+	struct rds_iov_vector *vec;
+	int                    len;
+	int                    indx;
+	int                    incr;
+};
+
 struct rds_message {
 	refcount_t		m_refcount;
 	struct list_head	m_sock_item;
@@ -461,7 +481,6 @@
 		} rdma;
 		struct rm_data_op {
 			unsigned int		op_active:1;
-			unsigned int		op_notify:1;
 			unsigned int		op_nents;
 			unsigned int		op_count;
 			unsigned int		op_dmasg;
@@ -559,6 +578,7 @@
 	void (*free_mr)(void *trans_private, int invalidate);
 	void (*flush_mrs)(void);
 	bool (*t_unloading)(struct rds_connection *conn);
+	u8 (*get_tos_map)(u8 tos);
 };
 
 /* Bind hash table key length.  It is the sum of the size of a struct
@@ -640,6 +660,7 @@
 	u8			rs_rx_traces;
 	u8			rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
 	struct rds_msg_zcopy_queue rs_zcookie_queue;
+	u8			rs_tos;
 };
 
 static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
@@ -701,7 +722,7 @@
 	uint64_t	s_cong_send_blocked;
 	uint64_t	s_recv_bytes_added_to_socket;
 	uint64_t	s_recv_bytes_removed_from_socket;
-
+	uint64_t	s_send_stuck_rm;
 };
 
 /* af_rds.c */
@@ -748,13 +769,14 @@
 struct rds_connection *rds_conn_create(struct net *net,
 				       const struct in6_addr *laddr,
 				       const struct in6_addr *faddr,
-				       struct rds_transport *trans, gfp_t gfp,
+				       struct rds_transport *trans,
+				       u8 tos, gfp_t gfp,
 				       int dev_if);
 struct rds_connection *rds_conn_create_outgoing(struct net *net,
 						const struct in6_addr *laddr,
 						const struct in6_addr *faddr,
 						struct rds_transport *trans,
-						gfp_t gfp, int dev_if);
+						u8 tos, gfp_t gfp, int dev_if);
 void rds_conn_shutdown(struct rds_conn_path *cpath);
 void rds_conn_destroy(struct rds_connection *conn);
 void rds_conn_drop(struct rds_connection *conn);
@@ -827,7 +849,8 @@
 
 /* message.c */
 struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
-struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
+struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents,
+					  int *ret);
 int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
 			       bool zcopy);
 struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
@@ -904,13 +927,13 @@
 int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen);
 int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen);
 void rds_rdma_drop_keys(struct rds_sock *rs);
-int rds_rdma_extra_size(struct rds_rdma_args *args);
-int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
-			  struct cmsghdr *cmsg);
+int rds_rdma_extra_size(struct rds_rdma_args *args,
+			struct rds_iov_vector *iov);
 int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
 			  struct cmsghdr *cmsg);
 int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
-			  struct cmsghdr *cmsg);
+			  struct cmsghdr *cmsg,
+			  struct rds_iov_vector *vec);
 int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
 			  struct cmsghdr *cmsg);
 void rds_rdma_free_op(struct rm_rdma_op *ro);

diff --git a/net/rds/recv.c b/net/rds/recv.c
index 504cd6b..c840497 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -43,18 +43,14 @@
 void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
 		 struct in6_addr *saddr)
 {
-	int i;
-
 	refcount_set(&inc->i_refcount, 1);
 	INIT_LIST_HEAD(&inc->i_item);
 	inc->i_conn = conn;
 	inc->i_saddr = *saddr;
-	inc->i_rdma_cookie = 0;
-	inc->i_rx_tstamp.tv_sec = 0;
-	inc->i_rx_tstamp.tv_usec = 0;
+	inc->i_usercopy.rdma_cookie = 0;
+	inc->i_usercopy.rx_tstamp = ktime_set(0, 0);
 
-	for (i = 0; i < RDS_RX_MAX_TRACES; i++)
-		inc->i_rx_lat_trace[i] = 0;
+	memset(inc->i_rx_lat_trace, 0, sizeof(inc->i_rx_lat_trace));
 }
 EXPORT_SYMBOL_GPL(rds_inc_init);
 
@@ -66,9 +62,8 @@
 	inc->i_conn = cp->cp_conn;
 	inc->i_conn_path = cp;
 	inc->i_saddr = *saddr;
-	inc->i_rdma_cookie = 0;
-	inc->i_rx_tstamp.tv_sec = 0;
-	inc->i_rx_tstamp.tv_usec = 0;
+	inc->i_usercopy.rdma_cookie = 0;
+	inc->i_usercopy.rx_tstamp = ktime_set(0, 0);
 }
 EXPORT_SYMBOL_GPL(rds_inc_path_init);
 
@@ -191,7 +186,7 @@
 		case RDS_EXTHDR_RDMA_DEST:
 			/* We ignore the size for now. We could stash it
 			 * somewhere and use it for error checking. */
-			inc->i_rdma_cookie = rds_rdma_make_cookie(
+			inc->i_usercopy.rdma_cookie = rds_rdma_make_cookie(
 					be32_to_cpu(buffer.rdma_dest.h_rdma_rkey),
 					be32_to_cpu(buffer.rdma_dest.h_rdma_offset));
 
@@ -385,7 +380,7 @@
 				      be32_to_cpu(inc->i_hdr.h_len),
 				      inc->i_hdr.h_dport);
 		if (sock_flag(sk, SOCK_RCVTSTAMP))
-			do_gettimeofday(&inc->i_rx_tstamp);
+			inc->i_usercopy.rx_tstamp = ktime_get_real();
 		rds_inc_addref(inc);
 		inc->i_rx_lat_trace[RDS_MSG_RX_END] = local_clock();
 		list_add_tail(&inc->i_item, &rs->rs_recv_queue);
@@ -545,18 +540,32 @@
 {
 	int ret = 0;
 
-	if (inc->i_rdma_cookie) {
+	if (inc->i_usercopy.rdma_cookie) {
 		ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST,
-				sizeof(inc->i_rdma_cookie), &inc->i_rdma_cookie);
+				sizeof(inc->i_usercopy.rdma_cookie),
+				&inc->i_usercopy.rdma_cookie);
 		if (ret)
 			goto out;
 	}
 
-	if ((inc->i_rx_tstamp.tv_sec != 0) &&
+	if ((inc->i_usercopy.rx_tstamp != 0) &&
 	    sock_flag(rds_rs_to_sk(rs), SOCK_RCVTSTAMP)) {
-		ret = put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
-			       sizeof(struct timeval),
-			       &inc->i_rx_tstamp);
+		struct __kernel_old_timeval tv =
+			ns_to_kernel_old_timeval(inc->i_usercopy.rx_tstamp);
+
+		if (!sock_flag(rds_rs_to_sk(rs), SOCK_TSTAMP_NEW)) {
+			ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
+				       sizeof(tv), &tv);
+		} else {
+			struct __kernel_sock_timeval sk_tv;
+
+			sk_tv.tv_sec = tv.tv_sec;
+			sk_tv.tv_usec = tv.tv_usec;
+
+			ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
+				       sizeof(sk_tv), &sk_tv);
+		}
+
 		if (ret)
 			goto out;
 	}
@@ -775,6 +784,7 @@
 
 	minfo.seq = be64_to_cpu(inc->i_hdr.h_sequence);
 	minfo.len = be32_to_cpu(inc->i_hdr.h_len);
+	minfo.tos = inc->i_conn->c_tos;
 
 	if (flip) {
 		minfo.laddr = daddr;
@@ -803,6 +813,7 @@
 
 	minfo6.seq = be64_to_cpu(inc->i_hdr.h_sequence);
 	minfo6.len = be32_to_cpu(inc->i_hdr.h_len);
+	minfo6.tos = inc->i_conn->c_tos;
 
 	if (flip) {
 		minfo6.laddr = *daddr;
@@ -816,6 +827,8 @@
 		minfo6.fport = inc->i_hdr.h_dport;
 	}
 
+	minfo6.flags = 0;
+
 	rds_info_copy(iter, &minfo6, sizeof(minfo6));
 }
 #endif

diff --git a/net/rds/send.c b/net/rds/send.c
index fe785ee..82dcd8b 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c

@@ -145,6 +145,7 @@
 	LIST_HEAD(to_be_dropped);
 	int batch_count;
 	unsigned long send_gen = 0;
+	int same_rm = 0;
 
 restart:
 	batch_count = 0;
@@ -200,6 +201,17 @@
 
 		rm = cp->cp_xmit_rm;
 
+		if (!rm) {
+			same_rm = 0;
+		} else {
+			same_rm++;
+			if (same_rm >= 4096) {
+				rds_stats_inc(s_send_stuck_rm);
+				ret = -EAGAIN;
+				break;
+			}
+		}
+
 		/*
 		 * If between sending messages, we can send a pending congestion
 		 * map update.
@@ -491,14 +503,12 @@
 	struct rm_rdma_op *ro;
 	struct rds_notifier *notifier;
 	unsigned long flags;
-	unsigned int notify = 0;
 
 	spin_lock_irqsave(&rm->m_rs_lock, flags);
 
-	notify =  rm->rdma.op_notify | rm->data.op_notify;
 	ro = &rm->rdma;
 	if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) &&
-	    ro->op_active && notify && ro->op_notifier) {
+	    ro->op_active && ro->op_notify && ro->op_notifier) {
 		notifier = ro->op_notifier;
 		rs = rm->m_rs;
 		sock_hold(rds_rs_to_sk(rs));
@@ -876,13 +886,18 @@
  * rds_message is getting to be quite complicated, and we'd like to allocate
  * it all in one go. This figures out how big it needs to be up front.
  */
-static int rds_rm_size(struct msghdr *msg, int num_sgs)
+static int rds_rm_size(struct msghdr *msg, int num_sgs,
+		       struct rds_iov_vector_arr *vct)
 {
 	struct cmsghdr *cmsg;
 	int size = 0;
 	int cmsg_groups = 0;
 	int retval;
 	bool zcopy_cookie = false;
+	struct rds_iov_vector *iov, *tmp_iov;
+
+	if (num_sgs < 0)
+		return -EINVAL;
 
 	for_each_cmsghdr(cmsg, msg) {
 		if (!CMSG_OK(msg, cmsg))
@@ -893,8 +908,24 @@
 
 		switch (cmsg->cmsg_type) {
 		case RDS_CMSG_RDMA_ARGS:
+			if (vct->indx >= vct->len) {
+				vct->len += vct->incr;
+				tmp_iov =
+					krealloc(vct->vec,
+						 vct->len *
+						 sizeof(struct rds_iov_vector),
+						 GFP_KERNEL);
+				if (!tmp_iov) {
+					vct->len -= vct->incr;
+					return -ENOMEM;
+				}
+				vct->vec = tmp_iov;
+			}
+			iov = &vct->vec[vct->indx];
+			memset(iov, 0, sizeof(struct rds_iov_vector));
+			vct->indx++;
 			cmsg_groups |= 1;
-			retval = rds_rdma_extra_size(CMSG_DATA(cmsg));
+			retval = rds_rdma_extra_size(CMSG_DATA(cmsg), iov);
 			if (retval < 0)
 				return retval;
 			size += retval;
@@ -951,10 +982,11 @@
 }
 
 static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
-			 struct msghdr *msg, int *allocated_mr)
+			 struct msghdr *msg, int *allocated_mr,
+			 struct rds_iov_vector_arr *vct)
 {
 	struct cmsghdr *cmsg;
-	int ret = 0;
+	int ret = 0, ind = 0;
 
 	for_each_cmsghdr(cmsg, msg) {
 		if (!CMSG_OK(msg, cmsg))
@@ -968,7 +1000,10 @@
 		 */
 		switch (cmsg->cmsg_type) {
 		case RDS_CMSG_RDMA_ARGS:
-			ret = rds_cmsg_rdma_args(rs, rm, cmsg);
+			if (ind >= vct->indx)
+				return -ENOMEM;
+			ret = rds_cmsg_rdma_args(rs, rm, cmsg, &vct->vec[ind]);
+			ind++;
 			break;
 
 		case RDS_CMSG_RDMA_DEST:
@@ -1082,8 +1117,15 @@
 	size_t total_payload_len = payload_len, rdma_payload_len = 0;
 	bool zcopy = ((msg->msg_flags & MSG_ZEROCOPY) &&
 		      sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY));
-	int num_sgs = ceil(payload_len, PAGE_SIZE);
+	int num_sgs = DIV_ROUND_UP(payload_len, PAGE_SIZE);
 	int namelen;
+	struct rds_iov_vector_arr vct;
+	int ind;
+
+	memset(&vct, 0, sizeof(vct));
+
+	/* expect 1 RDMA CMSG per rds_sendmsg. can still grow if more needed. */
+	vct.incr = 1;
 
 	/* Mirror Linux UDP mirror of BSD error message compatibility */
 	/* XXX: Perhaps MSG_MORE someday */
@@ -1102,7 +1144,7 @@
 		case AF_INET:
 			if (usin->sin_addr.s_addr == htonl(INADDR_ANY) ||
 			    usin->sin_addr.s_addr == htonl(INADDR_BROADCAST) ||
-			    IN_MULTICAST(ntohl(usin->sin_addr.s_addr))) {
+			    ipv4_is_multicast(usin->sin_addr.s_addr)) {
 				ret = -EINVAL;
 				goto out;
 			}
@@ -1133,7 +1175,7 @@
 				addr4 = sin6->sin6_addr.s6_addr32[3];
 				if (addr4 == htonl(INADDR_ANY) ||
 				    addr4 == htonl(INADDR_BROADCAST) ||
-				    IN_MULTICAST(ntohl(addr4))) {
+				    ipv4_is_multicast(addr4)) {
 					ret = -EINVAL;
 					goto out;
 				}
@@ -1220,7 +1262,7 @@
 		num_sgs = iov_iter_npages(&msg->msg_iter, INT_MAX);
 	}
 	/* size of rm including all sgs */
-	ret = rds_rm_size(msg, num_sgs);
+	ret = rds_rm_size(msg, num_sgs, &vct);
 	if (ret < 0)
 		goto out;
 
@@ -1232,11 +1274,9 @@
 
 	/* Attach data to the rm */
 	if (payload_len) {
-		rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
-		if (!rm->data.op_sg) {
-			ret = -ENOMEM;
+		rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs, &ret);
+		if (!rm->data.op_sg)
 			goto out;
-		}
 		ret = rds_message_copy_from_user(rm, &msg->msg_iter, zcopy);
 		if (ret)
 			goto out;
@@ -1247,12 +1287,13 @@
 
 	/* rds_conn_create has a spinlock that runs with IRQ off.
 	 * Caching the conn in the socket helps a lot. */
-	if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr))
+	if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr) &&
+	    rs->rs_tos == rs->rs_conn->c_tos) {
 		conn = rs->rs_conn;
-	else {
+	} else {
 		conn = rds_conn_create_outgoing(sock_net(sock->sk),
 						&rs->rs_bound_addr, &daddr,
-						rs->rs_transport,
+						rs->rs_transport, rs->rs_tos,
 						sock->sk->sk_allocation,
 						scope_id);
 		if (IS_ERR(conn)) {
@@ -1270,7 +1311,7 @@
 	rm->m_conn_path = cpath;
 
 	/* Parse any control messages the user may have included. */
-	ret = rds_cmsg_send(rs, rm, msg, &allocated_mr);
+	ret = rds_cmsg_send(rs, rm, msg, &allocated_mr, &vct);
 	if (ret) {
 		/* Trigger connection so that its ready for the next retry */
 		if (ret ==  -EAGAIN)
@@ -1348,9 +1389,18 @@
 	if (ret)
 		goto out;
 	rds_message_put(rm);
+
+	for (ind = 0; ind < vct.indx; ind++)
+		kfree(vct.vec[ind].iov);
+	kfree(vct.vec);
+
 	return payload_len;
 
 out:
+	for (ind = 0; ind < vct.indx; ind++)
+		kfree(vct.vec[ind].iov);
+	kfree(vct.vec);
+
 	/* If the user included a RDMA_MAP cmsg, we allocated a MR on the fly.
 	 * If the sendmsg goes through, we keep the MR. If it fails with EAGAIN
 	 * or in any other way, we need to destroy the MR again */

diff --git a/net/rds/stats.c b/net/rds/stats.c
index 73be187..9e87da4 100644
--- a/net/rds/stats.c
+++ b/net/rds/stats.c

@@ -76,6 +76,9 @@
 	"cong_update_received",
 	"cong_send_error",
 	"cong_send_blocked",
+	"recv_bytes_added_to_sock",
+	"recv_bytes_freed_fromsock",
+	"send_stuck_rm",
 };
 
 void rds_stats_info_copy(struct rds_info_iterator *iter,

diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index b9bbcf3..66121bc 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c

@@ -267,6 +267,7 @@
 		tsinfo.last_sent_nxt = tc->t_last_sent_nxt;
 		tsinfo.last_expected_una = tc->t_last_expected_una;
 		tsinfo.last_seen_una = tc->t_last_seen_una;
+		tsinfo.tos = tc->t_cpath->cp_conn->c_tos;
 
 		rds_info_copy(iter, &tsinfo, sizeof(tsinfo));
 	}
@@ -452,6 +453,12 @@
 
 static void rds_tcp_exit(void);
 
+static u8 rds_tcp_get_tos_map(u8 tos)
+{
+	/* all user tos mapped to default 0 for TCP transport */
+	return 0;
+}
+
 struct rds_transport rds_tcp_transport = {
 	.laddr_check		= rds_tcp_laddr_check,
 	.xmit_path_prepare	= rds_tcp_xmit_path_prepare,
@@ -466,6 +473,7 @@
 	.inc_free		= rds_tcp_inc_free,
 	.stats_info_copy	= rds_tcp_stats_info_copy,
 	.exit			= rds_tcp_exit,
+	.get_tos_map		= rds_tcp_get_tos_map,
 	.t_owner		= THIS_MODULE,
 	.t_name			= "tcp",
 	.t_type			= RDS_TRANS_TCP,
@@ -543,7 +551,7 @@
 		tbl = kmemdup(rds_tcp_sysctl_table,
 			      sizeof(rds_tcp_sysctl_table), GFP_KERNEL);
 		if (!tbl) {
-			pr_warn("could not set allocate syctl table\n");
+			pr_warn("could not set allocate sysctl table\n");
 			return -ENOMEM;
 		}
 		rtn->ctl_table = tbl;
@@ -600,7 +608,7 @@
 	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
 		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
 
-		if (net != c_net || !tc->t_sock)
+		if (net != c_net)
 			continue;
 		if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) {
 			list_move_tail(&tc->t_tcp_node, &tmp_list);
@@ -623,7 +631,7 @@
 	if (rtn->rds_tcp_sysctl)
 		unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
 
-	if (net != &init_net && rtn->ctl_table)
+	if (net != &init_net)
 		kfree(rtn->ctl_table);
 }
 

diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index c12203f..810a3a4 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c

@@ -200,7 +200,7 @@
 
 	conn = rds_conn_create(sock_net(sock->sk),
 			       my_addr, peer_addr,
-			       &rds_tcp_transport, GFP_KERNEL, dev_if);
+			       &rds_tcp_transport, 0, GFP_KERNEL, dev_if);
 
 	if (IS_ERR(conn)) {
 		ret = PTR_ERR(conn);

diff --git a/net/rds/threads.c b/net/rds/threads.c
index e64f9e4..32dc50f 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c

@@ -93,6 +93,7 @@
 		queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
 	}
 	rcu_read_unlock();
+	cp->cp_conn->c_proposed_version = RDS_PROTOCOL_VERSION;
 }
 EXPORT_SYMBOL_GPL(rds_connect_path_complete);
 

diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig
index 060600b..83a7af8 100644
--- a/net/rfkill/Kconfig
+++ b/net/rfkill/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # RF switch subsystem configuration
 #

diff --git a/net/rfkill/Makefile b/net/rfkill/Makefile
index 87a80ad..dc47b61 100644
--- a/net/rfkill/Makefile
+++ b/net/rfkill/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the RF switch subsystem.
 #

diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 1355f5c..f9b08a6 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c

@@ -1,20 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2006 - 2007 Ivo van Doorn
  * Copyright (C) 2007 Dmitry Torokhov
  * Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/kernel.h>
@@ -510,8 +498,8 @@
 /**
  * rfkill_is_epo_lock_active - returns true EPO is active
  *
- * Returns 0 (false) if there is NOT an active EPO contidion,
- * and 1 (true) if there is an active EPO contition, which
+ * Returns 0 (false) if there is NOT an active EPO condition,
+ * and 1 (true) if there is an active EPO condition, which
  * locks all radios in one of the BLOCKED states.
  *
  * Can be called in atomic context.
@@ -1143,7 +1131,7 @@
 
 	file->private_data = data;
 
-	return nonseekable_open(inode, file);
+	return stream_open(inode, file);
 
  free:
 	mutex_unlock(&data->mtx);

diff --git a/net/rfkill/input.c b/net/rfkill/input.c
index b85107b..4b01bae 100644
--- a/net/rfkill/input.c
+++ b/net/rfkill/input.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Input layer to RF Kill interface connector
  *
  * Copyright (c) 2007 Dmitry Torokhov
  * Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
  * If you ever run into a situation in which you have a SW_ type rfkill
  * input device, then you can revive code that was removed in the patch
  * "rfkill-input: remove unused code".

diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 0f84658..f5afc9b 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c

@@ -1,22 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (c) 2011, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
 
-#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>

diff --git a/net/rfkill/rfkill.h b/net/rfkill/rfkill.h
index d1117cb..001c40c 100644
--- a/net/rfkill/rfkill.h
+++ b/net/rfkill/rfkill.h

@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2007 Ivo van Doorn
  * Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
  */
 
-/*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- */
 
 #ifndef __RFKILL_INPUT_H
 #define __RFKILL_INPUT_H

diff --git a/net/rose/Makefile b/net/rose/Makefile
index fa24811..3e6638f 100644
--- a/net/rose/Makefile
+++ b/net/rose/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Linux Rose (X.25 PLP) layer.
 #

diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index d00a0ef..6a0df7c 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
@@ -68,28 +65,6 @@
 ax25_address rose_callsign;
 
 /*
- * ROSE network devices are virtual network devices encapsulating ROSE
- * frames into AX.25 which will be sent through an AX.25 device, so form a
- * special "super class" of normal net devices; split their locks off into a
- * separate class since they always nest.
- */
-static struct lock_class_key rose_netdev_xmit_lock_key;
-static struct lock_class_key rose_netdev_addr_lock_key;
-
-static void rose_set_lockdep_one(struct net_device *dev,
-				 struct netdev_queue *txq,
-				 void *_unused)
-{
-	lockdep_set_class(&txq->_xmit_lock, &rose_netdev_xmit_lock_key);
-}
-
-static void rose_set_lockdep_key(struct net_device *dev)
-{
-	lockdep_set_class(&dev->addr_list_lock, &rose_netdev_addr_lock_key);
-	netdev_for_each_tx_queue(dev, rose_set_lockdep_one, NULL);
-}
-
-/*
  *	Convert a ROSE address into text.
  */
 char *rose2asc(char *buf, const rose_address *addr)
@@ -689,8 +664,10 @@
 		rose->source_call = user->call;
 		ax25_uid_put(user);
 	} else {
-		if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE))
+		if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) {
+			dev_put(dev);
 			return -EACCES;
+		}
 		rose->source_call   = *source;
 	}
 
@@ -1299,12 +1276,6 @@
 		return put_user(amount, (unsigned int __user *) argp);
 	}
 
-	case SIOCGSTAMP:
-		return sock_get_timestamp(sk, (struct timeval __user *) argp);
-
-	case SIOCGSTAMPNS:
-		return sock_get_timestampns(sk, (struct timespec __user *) argp);
-
 	case SIOCGIFADDR:
 	case SIOCSIFADDR:
 	case SIOCGIFDSTADDR:
@@ -1472,6 +1443,7 @@
 	.getname	=	rose_getname,
 	.poll		=	datagram_poll,
 	.ioctl		=	rose_ioctl,
+	.gettstamp	=	sock_gettstamp,
 	.listen		=	rose_listen,
 	.shutdown	=	sock_no_shutdown,
 	.setsockopt	=	rose_setsockopt,
@@ -1539,7 +1511,6 @@
 			free_netdev(dev);
 			goto fail;
 		}
-		rose_set_lockdep_key(dev);
 		dev_rose[i] = dev;
 	}
 

diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c
index 369ca81..051804f 100644
--- a/net/rose/rose_dev.c
+++ b/net/rose/rose_dev.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  */

diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c
index 9bbbfe3..0d4fab2 100644
--- a/net/rose/rose_in.c
+++ b/net/rose/rose_in.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  *

diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index 62055d3..f6102e6 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  */

diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index 7af4f99..7b09427 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  */
@@ -16,6 +13,7 @@
 #include <linux/init.h>
 
 static struct sk_buff_head loopback_queue;
+#define ROSE_LOOPBACK_LIMIT 1000
 static struct timer_list loopback_timer;
 
 static void rose_set_loopback_timer(void);
@@ -35,29 +33,27 @@
 
 int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh)
 {
-	struct sk_buff *skbn;
+	struct sk_buff *skbn = NULL;
 
-	skbn = skb_clone(skb, GFP_ATOMIC);
+	if (skb_queue_len(&loopback_queue) < ROSE_LOOPBACK_LIMIT)
+		skbn = skb_clone(skb, GFP_ATOMIC);
 
-	kfree_skb(skb);
-
-	if (skbn != NULL) {
+	if (skbn) {
+		consume_skb(skb);
 		skb_queue_tail(&loopback_queue, skbn);
 
 		if (!rose_loopback_running())
 			rose_set_loopback_timer();
+	} else {
+		kfree_skb(skb);
 	}
 
 	return 1;
 }
 
-
 static void rose_set_loopback_timer(void)
 {
-	del_timer(&loopback_timer);
-
-	loopback_timer.expires  = jiffies + 10;
-	add_timer(&loopback_timer);
+	mod_timer(&loopback_timer, jiffies + 10);
 }
 
 static void rose_loopback_timer(struct timer_list *unused)
@@ -68,8 +64,12 @@
 	struct sock *sk;
 	unsigned short frametype;
 	unsigned int lci_i, lci_o;
+	int count;
 
-	while ((skb = skb_dequeue(&loopback_queue)) != NULL) {
+	for (count = 0; count < ROSE_LOOPBACK_LIMIT; count++) {
+		skb = skb_dequeue(&loopback_queue);
+		if (!skb)
+			return;
 		if (skb->len < ROSE_MIN_LEN) {
 			kfree_skb(skb);
 			continue;
@@ -106,6 +106,8 @@
 			kfree_skb(skb);
 		}
 	}
+	if (!skb_queue_empty(&loopback_queue))
+		mod_timer(&loopback_timer, jiffies + 1);
 }
 
 void __exit rose_loopback_clear(void)

diff --git a/net/rose/rose_out.c b/net/rose/rose_out.c
index 9ad98b5..9050e33 100644
--- a/net/rose/rose_out.c
+++ b/net/rose/rose_out.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  */

diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 77e9f85..c533076 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  * Copyright (C) Terry Dawson VK2KTJ (terry@animats.net)
@@ -850,6 +847,7 @@
 
 /*
  *	Route a frame to an appropriate AX.25 connection.
+ *	A NULL ax25_cb indicates an internally generated frame.
  */
 int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
 {
@@ -867,6 +865,10 @@
 
 	if (skb->len < ROSE_MIN_LEN)
 		return res;
+
+	if (!ax25)
+		return rose_loopback_queue(skb, NULL);
+
 	frametype = skb->data[2];
 	lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF);
 	if (frametype == ROSE_CALL_REQUEST &&

diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c
index 7ca5774..4dbc437 100644
--- a/net/rose/rose_subr.c
+++ b/net/rose/rose_subr.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  */
@@ -105,16 +102,17 @@
 	struct sk_buff *skb;
 	unsigned char  *dptr;
 	unsigned char  lci1, lci2;
-	char buffer[100];
-	int len, faclen = 0;
+	int maxfaclen = 0;
+	int len, faclen;
+	int reserve;
 
-	len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 1;
+	reserve = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + 1;
+	len = ROSE_MIN_LEN;
 
 	switch (frametype) {
 	case ROSE_CALL_REQUEST:
 		len   += 1 + ROSE_ADDR_LEN + ROSE_ADDR_LEN;
-		faclen = rose_create_facilities(buffer, rose);
-		len   += faclen;
+		maxfaclen = 256;
 		break;
 	case ROSE_CALL_ACCEPTED:
 	case ROSE_CLEAR_REQUEST:
@@ -123,15 +121,16 @@
 		break;
 	}
 
-	if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL)
+	skb = alloc_skb(reserve + len + maxfaclen, GFP_ATOMIC);
+	if (!skb)
 		return;
 
 	/*
 	 *	Space for AX.25 header and PID.
 	 */
-	skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + 1);
+	skb_reserve(skb, reserve);
 
-	dptr = skb_put(skb, skb_tailroom(skb));
+	dptr = skb_put(skb, len);
 
 	lci1 = (rose->lci >> 8) & 0x0F;
 	lci2 = (rose->lci >> 0) & 0xFF;
@@ -146,7 +145,8 @@
 		dptr   += ROSE_ADDR_LEN;
 		memcpy(dptr, &rose->source_addr, ROSE_ADDR_LEN);
 		dptr   += ROSE_ADDR_LEN;
-		memcpy(dptr, buffer, faclen);
+		faclen = rose_create_facilities(dptr, rose);
+		skb_put(skb, faclen);
 		dptr   += faclen;
 		break;
 

diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c
index 74555fb..b3138fc 100644
--- a/net/rose/rose_timer.c
+++ b/net/rose/rose_timer.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  * Copyright (C) 2002 Ralf Baechle DO1GRB (ralf@gnu.org)

diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c
index 89a9278..d391d77 100644
--- a/net/rose/sysctl_net_rose.c
+++ b/net/rose/sysctl_net_rose.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  *
  * Copyright (C) 1996 Mike Shaver (shaver@zeroknowledge.com)
  */

diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
index 86f8853..05610c3 100644
--- a/net/rxrpc/Kconfig
+++ b/net/rxrpc/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # RxRPC session sockets
 #

diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 3c39b88..d72ddb6 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* AF_RXRPC implementation
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -97,7 +93,8 @@
 	    srx->transport_len > len)
 		return -EINVAL;
 
-	if (srx->transport.family != rx->family)
+	if (srx->transport.family != rx->family &&
+	    srx->transport.family == AF_INET && rx->family != AF_INET6)
 		return -EAFNOSUPPORT;
 
 	switch (srx->transport.family) {
@@ -134,7 +131,7 @@
 	struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr;
 	struct rxrpc_local *local;
 	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
-	u16 service_id = srx->srx_service;
+	u16 service_id;
 	int ret;
 
 	_enter("%p,%p,%d", rx, saddr, len);
@@ -142,6 +139,7 @@
 	ret = rxrpc_validate_address(rx, srx, len);
 	if (ret < 0)
 		goto error;
+	service_id = srx->srx_service;
 
 	lock_sock(&rx->sk);
 
@@ -195,7 +193,7 @@
 
 service_in_use:
 	write_unlock(&local->services_lock);
-	rxrpc_put_local(local);
+	rxrpc_unuse_local(local);
 	ret = -EADDRINUSE;
 error_unlock:
 	release_sock(&rx->sk);
@@ -268,6 +266,7 @@
  * @gfp: The allocation constraints
  * @notify_rx: Where to send notifications instead of socket queue
  * @upgrade: Request service upgrade for call
+ * @intr: The call is interruptible
  * @debug_id: The debug ID for tracing to be assigned to the call
  *
  * Allow a kernel service to begin a call on the nominated socket.  This just
@@ -285,6 +284,7 @@
 					   gfp_t gfp,
 					   rxrpc_notify_rx_t notify_rx,
 					   bool upgrade,
+					   bool intr,
 					   unsigned int debug_id)
 {
 	struct rxrpc_conn_parameters cp;
@@ -309,6 +309,7 @@
 	memset(&p, 0, sizeof(p));
 	p.user_call_ID = user_call_ID;
 	p.tx_total_len = tx_total_len;
+	p.intr = intr;
 
 	memset(&cp, 0, sizeof(cp));
 	cp.local		= rx->local;
@@ -369,90 +370,57 @@
  * rxrpc_kernel_check_life - Check to see whether a call is still alive
  * @sock: The socket the call is on
  * @call: The call to check
+ * @_life: Where to store the life value
  *
  * Allow a kernel service to find out whether a call is still alive - ie. we're
- * getting ACKs from the server.  Returns a number representing the life state
- * which can be compared to that returned by a previous call.
+ * getting ACKs from the server.  Passes back in *_life a number representing
+ * the life state which can be compared to that returned by a previous call and
+ * return true if the call is still alive.
  *
- * If this is a client call, ping ACKs will be sent to the server to find out
- * whether it's still responsive and whether the call is still alive on the
- * server.
+ * If the life state stalls, rxrpc_kernel_probe_life() should be called and
+ * then 2RTT waited.
  */
-u32 rxrpc_kernel_check_life(struct socket *sock, struct rxrpc_call *call)
+bool rxrpc_kernel_check_life(const struct socket *sock,
+			     const struct rxrpc_call *call,
+			     u32 *_life)
 {
-	return call->acks_latest;
+	*_life = call->acks_latest;
+	return call->state != RXRPC_CALL_COMPLETE;
 }
 EXPORT_SYMBOL(rxrpc_kernel_check_life);
 
 /**
- * rxrpc_kernel_check_call - Check a call's state
+ * rxrpc_kernel_probe_life - Poke the peer to see if it's still alive
  * @sock: The socket the call is on
  * @call: The call to check
- * @_compl: Where to store the completion state
- * @_abort_code: Where to store any abort code
  *
- * Allow a kernel service to query the state of a call and find out the manner
- * of its termination if it has completed.  Returns -EINPROGRESS if the call is
- * still going, 0 if the call finished successfully, -ECONNABORTED if the call
- * was aborted and an appropriate error if the call failed in some other way.
+ * In conjunction with rxrpc_kernel_check_life(), allow a kernel service to
+ * find out whether a call is still alive by pinging it.  This should cause the
+ * life state to be bumped in about 2*RTT.
+ *
+ * The must be called in TASK_RUNNING state on pain of might_sleep() objecting.
  */
-int rxrpc_kernel_check_call(struct socket *sock, struct rxrpc_call *call,
-			    enum rxrpc_call_completion *_compl, u32 *_abort_code)
+void rxrpc_kernel_probe_life(struct socket *sock, struct rxrpc_call *call)
 {
-	if (call->state != RXRPC_CALL_COMPLETE)
-		return -EINPROGRESS;
-	smp_rmb();
-	*_compl = call->completion;
-	*_abort_code = call->abort_code;
-	return call->error;
+	rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false,
+			  rxrpc_propose_ack_ping_for_check_life);
+	rxrpc_send_ack_packet(call, true, NULL);
 }
-EXPORT_SYMBOL(rxrpc_kernel_check_call);
+EXPORT_SYMBOL(rxrpc_kernel_probe_life);
 
 /**
- * rxrpc_kernel_retry_call - Allow a kernel service to retry a call
+ * rxrpc_kernel_get_epoch - Retrieve the epoch value from a call.
  * @sock: The socket the call is on
- * @call: The call to retry
- * @srx: The address of the peer to contact
- * @key: The security context to use (defaults to socket setting)
+ * @call: The call to query
  *
- * Allow a kernel service to try resending a client call that failed due to a
- * network error to a new address.  The Tx queue is maintained intact, thereby
- * relieving the need to re-encrypt any request data that has already been
- * buffered.
+ * Allow a kernel service to retrieve the epoch value from a service call to
+ * see if the client at the other end rebooted.
  */
-int rxrpc_kernel_retry_call(struct socket *sock, struct rxrpc_call *call,
-			    struct sockaddr_rxrpc *srx, struct key *key)
+u32 rxrpc_kernel_get_epoch(struct socket *sock, struct rxrpc_call *call)
 {
-	struct rxrpc_conn_parameters cp;
-	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
-	int ret;
-
-	_enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
-
-	if (!key)
-		key = rx->key;
-	if (key && !key->payload.data[0])
-		key = NULL; /* a no-security key */
-
-	memset(&cp, 0, sizeof(cp));
-	cp.local		= rx->local;
-	cp.key			= key;
-	cp.security_level	= 0;
-	cp.exclusive		= false;
-	cp.service_id		= srx->srx_service;
-
-	mutex_lock(&call->user_mutex);
-
-	ret = rxrpc_prepare_call_for_retry(rx, call);
-	if (ret == 0)
-		ret = rxrpc_retry_client_call(rx, call, &cp, srx, GFP_KERNEL);
-
-	mutex_unlock(&call->user_mutex);
-	rxrpc_put_peer(cp.peer);
-	_leave(" = %d", ret);
-	return ret;
+	return call->conn->proto.epoch;
 }
-EXPORT_SYMBOL(rxrpc_kernel_retry_call);
+EXPORT_SYMBOL(rxrpc_kernel_get_epoch);
 
 /**
  * rxrpc_kernel_new_call_notification - Get notifications of new calls
@@ -474,6 +442,31 @@
 }
 EXPORT_SYMBOL(rxrpc_kernel_new_call_notification);
 
+/**
+ * rxrpc_kernel_set_max_life - Set maximum lifespan on a call
+ * @sock: The socket the call is on
+ * @call: The call to configure
+ * @hard_timeout: The maximum lifespan of the call in jiffies
+ *
+ * Set the maximum lifespan of a call.  The call will end with ETIME or
+ * ETIMEDOUT if it takes longer than this.
+ */
+void rxrpc_kernel_set_max_life(struct socket *sock, struct rxrpc_call *call,
+			       unsigned long hard_timeout)
+{
+	unsigned long now;
+
+	mutex_lock(&call->user_mutex);
+
+	now = jiffies;
+	hard_timeout += now;
+	WRITE_ONCE(call->expect_term_by, hard_timeout);
+	rxrpc_reduce_call_timer(call, hard_timeout, now, rxrpc_timer_set_for_hard);
+
+	mutex_unlock(&call->user_mutex);
+}
+EXPORT_SYMBOL(rxrpc_kernel_set_max_life);
+
 /*
  * connect an RxRPC socket
  * - this just targets it at a specific destination; no actual connection
@@ -552,6 +545,7 @@
 
 	switch (rx->sk.sk_state) {
 	case RXRPC_UNBOUND:
+	case RXRPC_CLIENT_UNBOUND:
 		rx->srx.srx_family = AF_RXRPC;
 		rx->srx.srx_service = 0;
 		rx->srx.transport_type = SOCK_DGRAM;
@@ -576,10 +570,9 @@
 		}
 
 		rx->local = local;
-		rx->sk.sk_state = RXRPC_CLIENT_UNBOUND;
+		rx->sk.sk_state = RXRPC_CLIENT_BOUND;
 		/* Fall through */
 
-	case RXRPC_CLIENT_UNBOUND:
 	case RXRPC_CLIENT_BOUND:
 		if (!m->msg_name &&
 		    test_bit(RXRPC_SOCK_CONNECTED, &rx->flags)) {
@@ -869,7 +862,6 @@
 static int rxrpc_release_sock(struct sock *sk)
 {
 	struct rxrpc_sock *rx = rxrpc_sk(sk);
-	struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
 
 	_enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt));
 
@@ -905,10 +897,8 @@
 	rxrpc_release_calls_on_socket(rx);
 	flush_workqueue(rxrpc_workqueue);
 	rxrpc_purge_queue(&sk->sk_receive_queue);
-	rxrpc_queue_work(&rxnet->service_conn_reaper);
-	rxrpc_queue_work(&rxnet->client_conn_reaper);
 
-	rxrpc_put_local(rx->local);
+	rxrpc_unuse_local(rx->local);
 	rx->local = NULL;
 	key_put(rx->key);
 	rx->key = NULL;

diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 03e0fc8..7c7d10f 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h

@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /* AF_RXRPC internal definitions
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/atomic.h>
@@ -189,11 +185,17 @@
  * - max 48 bytes (struct sk_buff::cb)
  */
 struct rxrpc_skb_priv {
-	union {
-		u8		nr_jumbo;	/* Number of jumbo subpackets */
-	};
+	atomic_t	nr_ring_pins;		/* Number of rxtx ring pins */
+	u8		nr_subpackets;		/* Number of subpackets */
+	u8		rx_flags;		/* Received packet flags */
+#define RXRPC_SKB_INCL_LAST	0x01		/* - Includes last packet */
+#define RXRPC_SKB_TX_BUFFER	0x02		/* - Is transmit buffer */
 	union {
 		int		remain;		/* amount of space remaining for next write */
+
+		/* List of requested ACKs on subpackets */
+		unsigned long	rx_req_ack[(RXRPC_MAX_NR_JUMBO + BITS_PER_LONG - 1) /
+					   BITS_PER_LONG];
 	};
 
 	struct rxrpc_host_header hdr;		/* RxRPC packet header from this packet */
@@ -230,6 +232,9 @@
 	int (*verify_packet)(struct rxrpc_call *, struct sk_buff *,
 			     unsigned int, unsigned int, rxrpc_seq_t, u16);
 
+	/* Free crypto request on a call */
+	void (*free_call_crypto)(struct rxrpc_call *);
+
 	/* Locate the data in a received packet that has been verified. */
 	void (*locate_data)(struct rxrpc_call *, struct sk_buff *,
 			    unsigned int *, unsigned int *);
@@ -258,7 +263,8 @@
  */
 struct rxrpc_local {
 	struct rcu_head		rcu;
-	atomic_t		usage;
+	atomic_t		active_users;	/* Number of users of the local endpoint */
+	atomic_t		usage;		/* Number of references to the structure */
 	struct rxrpc_net	*rxnet;		/* The network ns in which this resides */
 	struct list_head	link;
 	struct socket		*socket;	/* my UDP socket */
@@ -435,7 +441,7 @@
 	struct sk_buff_head	rx_queue;	/* received conn-level packets */
 	const struct rxrpc_security *security;	/* applied security module */
 	struct key		*server_key;	/* security for this service */
-	struct crypto_skcipher	*cipher;	/* encryption handle */
+	struct crypto_sync_skcipher *cipher;	/* encryption handle */
 	struct rxrpc_crypt	csum_iv;	/* packet checksum base */
 	unsigned long		flags;
 	unsigned long		events;
@@ -476,13 +482,13 @@
 	RXRPC_CALL_EXPOSED,		/* The call was exposed to the world */
 	RXRPC_CALL_RX_LAST,		/* Received the last packet (at rxtx_top) */
 	RXRPC_CALL_TX_LAST,		/* Last packet in Tx buffer (at rxtx_top) */
-	RXRPC_CALL_TX_LASTQ,		/* Last packet has been queued */
 	RXRPC_CALL_SEND_PING,		/* A ping will need to be sent */
 	RXRPC_CALL_PINGING,		/* Ping in process */
 	RXRPC_CALL_RETRANS_TIMEOUT,	/* Retransmission due to timeout occurred */
 	RXRPC_CALL_BEGAN_RX_TIMER,	/* We began the expect_rx_by timer */
 	RXRPC_CALL_RX_HEARD,		/* The peer responded at least once to this call */
 	RXRPC_CALL_RX_UNDERRUN,		/* Got data underrun */
+	RXRPC_CALL_IS_INTR,		/* The call is interruptible */
 };
 
 /*
@@ -518,6 +524,18 @@
 };
 
 /*
+ * Call completion condition (state == RXRPC_CALL_COMPLETE).
+ */
+enum rxrpc_call_completion {
+	RXRPC_CALL_SUCCEEDED,		/* - Normal termination */
+	RXRPC_CALL_REMOTELY_ABORTED,	/* - call aborted by peer */
+	RXRPC_CALL_LOCALLY_ABORTED,	/* - call aborted locally on error or close */
+	RXRPC_CALL_LOCAL_ERROR,		/* - call failed due to local error */
+	RXRPC_CALL_NETWORK_ERROR,	/* - call terminated by network error */
+	NR__RXRPC_CALL_COMPLETIONS
+};
+
+/*
  * Call Tx congestion management modes.
  */
 enum rxrpc_congest_mode {
@@ -538,6 +556,7 @@
 	struct rxrpc_peer	*peer;		/* Peer record for remote address */
 	struct rxrpc_sock __rcu	*socket;	/* socket responsible */
 	struct rxrpc_net	*rxnet;		/* Network namespace to which call belongs */
+	const struct rxrpc_security *security;	/* applied security module */
 	struct mutex		user_mutex;	/* User access mutex */
 	unsigned long		ack_at;		/* When deferred ACK needs to happen */
 	unsigned long		ack_lost_at;	/* When ACK is figured as lost */
@@ -549,6 +568,7 @@
 	unsigned long		expect_term_by;	/* When we expect call termination by */
 	u32			next_rx_timo;	/* Timeout for next Rx packet (jif) */
 	u32			next_req_timo;	/* Timeout for next Rx request packet (jif) */
+	struct skcipher_request	*cipher_req;	/* Packet cipher request buffer */
 	struct timer_list	timer;		/* Combined event timer */
 	struct work_struct	processor;	/* Event processor */
 	rxrpc_notify_rx_t	notify_rx;	/* kernel service Rx notification function */
@@ -581,6 +601,7 @@
 	int			debug_id;	/* debug ID for printks */
 	unsigned short		rx_pkt_offset;	/* Current recvmsg packet offset */
 	unsigned short		rx_pkt_len;	/* Current recvmsg packet len */
+	bool			rx_pkt_last;	/* Current recvmsg packet is last */
 
 	/* Rx/Tx circular buffer, depending on phase.
 	 *
@@ -604,8 +625,7 @@
 #define RXRPC_TX_ANNO_LAST	0x04
 #define RXRPC_TX_ANNO_RESENT	0x08
 
-#define RXRPC_RX_ANNO_JUMBO	0x3f		/* Jumbo subpacket number + 1 if not zero */
-#define RXRPC_RX_ANNO_JLAST	0x40		/* Set if last element of a jumbo packet */
+#define RXRPC_RX_ANNO_SUBPACKET	0x3f		/* Subpacket number in jumbogram */
 #define RXRPC_RX_ANNO_VERIFIED	0x80		/* Set if verified and decrypted */
 	rxrpc_seq_t		tx_hard_ack;	/* Dead slot in buffer; the first transmitted but
 						 * not hard-ACK'd packet follows this.
@@ -641,8 +661,8 @@
 
 	/* receive-phase ACK management */
 	u8			ackr_reason;	/* reason to ACK */
-	u16			ackr_skew;	/* skew on packet being ACK'd */
 	rxrpc_serial_t		ackr_serial;	/* serial of packet being ACK'd */
+	rxrpc_serial_t		ackr_first_seq;	/* first sequence number received */
 	rxrpc_seq_t		ackr_prev_seq;	/* previous sequence number received */
 	rxrpc_seq_t		ackr_consumed;	/* Highest packet shown consumed */
 	rxrpc_seq_t		ackr_seen;	/* Highest packet shown seen */
@@ -699,6 +719,7 @@
 		u32		normal;		/* Max time since last call packet (msec) */
 	} timeouts;
 	u8			nr_timeouts;	/* Number of timeouts specified */
+	bool			intr;		/* The call is interruptible */
 };
 
 struct rxrpc_send_params {
@@ -733,7 +754,7 @@
 /*
  * call_event.c
  */
-void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool,
+void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool, bool,
 		       enum rxrpc_propose_ack_trace);
 void rxrpc_process_call(struct work_struct *);
 
@@ -761,15 +782,9 @@
 					 struct sockaddr_rxrpc *,
 					 struct rxrpc_call_params *, gfp_t,
 					 unsigned int);
-int rxrpc_retry_client_call(struct rxrpc_sock *,
-			    struct rxrpc_call *,
-			    struct rxrpc_conn_parameters *,
-			    struct sockaddr_rxrpc *,
-			    gfp_t);
 void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *,
 			 struct sk_buff *);
 void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *);
-int rxrpc_prepare_call_for_retry(struct rxrpc_sock *, struct rxrpc_call *);
 void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
 bool __rxrpc_queue_call(struct rxrpc_call *);
 bool rxrpc_queue_call(struct rxrpc_call *);
@@ -901,6 +916,7 @@
 void rxrpc_put_client_conn(struct rxrpc_connection *);
 void rxrpc_discard_expired_client_conns(struct work_struct *);
 void rxrpc_destroy_all_client_connections(struct rxrpc_net *);
+void rxrpc_clean_up_local_conns(struct rxrpc_local *);
 
 /*
  * conn_event.c
@@ -998,6 +1014,8 @@
 struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *);
 struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *);
 void rxrpc_put_local(struct rxrpc_local *);
+struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *);
+void rxrpc_unuse_local(struct rxrpc_local *);
 void rxrpc_queue_local(struct rxrpc_local *);
 void rxrpc_destroy_all_locals(struct rxrpc_net *);
 
@@ -1057,12 +1075,14 @@
 struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *);
 struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *);
 void rxrpc_put_peer(struct rxrpc_peer *);
+void rxrpc_put_peer_locked(struct rxrpc_peer *);
 
 /*
  * proc.c
  */
 extern const struct seq_operations rxrpc_call_seq_ops;
 extern const struct seq_operations rxrpc_connection_seq_ops;
+extern const struct seq_operations rxrpc_peer_seq_ops;
 
 /*
  * recvmsg.c
@@ -1097,9 +1117,9 @@
 void rxrpc_packet_destructor(struct sk_buff *);
 void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace);
 void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace);
+void rxrpc_eaten_skb(struct sk_buff *, enum rxrpc_skb_trace);
 void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace);
 void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace);
-void rxrpc_lose_skb(struct sk_buff *, enum rxrpc_skb_trace);
 void rxrpc_purge_queue(struct sk_buff_head *);
 
 /*
@@ -1116,8 +1136,7 @@
 /*
  * utils.c
  */
-int rxrpc_extract_addr_from_skb(struct rxrpc_local *, struct sockaddr_rxrpc *,
-				struct sk_buff *);
+int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
 
 static inline bool before(u32 seq1, u32 seq2)
 {

diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 8079aac..135bf5c 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* incoming call handling
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -88,7 +84,7 @@
 		smp_store_release(&b->conn_backlog_head,
 				  (head + 1) & (size - 1));
 
-		trace_rxrpc_conn(conn, rxrpc_conn_new_service,
+		trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service,
 				 atomic_read(&conn->usage), here);
 	}
 
@@ -101,7 +97,7 @@
 	call->flags |= (1 << RXRPC_CALL_IS_SERVICE);
 	call->state = RXRPC_CALL_SERVER_PREALLOC;
 
-	trace_rxrpc_call(call, rxrpc_call_new_service,
+	trace_rxrpc_call(call->debug_id, rxrpc_call_new_service,
 			 atomic_read(&call->usage),
 			 here, (const void *)user_call_ID);
 
@@ -280,7 +276,7 @@
 			peer = NULL;
 		if (!peer) {
 			peer = b->peer_backlog[peer_tail];
-			if (rxrpc_extract_addr_from_skb(local, &peer->srx, skb) < 0)
+			if (rxrpc_extract_addr_from_skb(&peer->srx, skb) < 0)
 				return NULL;
 			b->peer_backlog[peer_tail] = NULL;
 			smp_store_release(&b->peer_backlog_tail,
@@ -311,6 +307,7 @@
 
 	rxrpc_see_call(call);
 	call->conn = conn;
+	call->security = conn->security;
 	call->peer = rxrpc_get_peer(conn->params.peer);
 	call->cong_cwnd = call->peer->cong_cwnd;
 	return call;

diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 468efc3..cedbbb3 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Management of Tx window, Tx resend, ACKs and out-of-sequence reception
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -47,8 +43,7 @@
  * propose an ACK be sent
  */
 static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
-				u16 skew, u32 serial, bool immediate,
-				bool background,
+				u32 serial, bool immediate, bool background,
 				enum rxrpc_propose_ack_trace why)
 {
 	enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use;
@@ -73,14 +68,12 @@
 		if (RXRPC_ACK_UPDATEABLE & (1 << ack_reason)) {
 			outcome = rxrpc_propose_ack_update;
 			call->ackr_serial = serial;
-			call->ackr_skew = skew;
 		}
 		if (!immediate)
 			goto trace;
 	} else if (prior > rxrpc_ack_priority[call->ackr_reason]) {
 		call->ackr_reason = ack_reason;
 		call->ackr_serial = serial;
-		call->ackr_skew = skew;
 	} else {
 		outcome = rxrpc_propose_ack_subsume;
 	}
@@ -141,11 +134,11 @@
  * propose an ACK be sent, locking the call structure
  */
 void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
-		       u16 skew, u32 serial, bool immediate, bool background,
+		       u32 serial, bool immediate, bool background,
 		       enum rxrpc_propose_ack_trace why)
 {
 	spin_lock_bh(&call->lock);
-	__rxrpc_propose_ACK(call, ack_reason, skew, serial,
+	__rxrpc_propose_ACK(call, ack_reason, serial,
 			    immediate, background, why);
 	spin_unlock_bh(&call->lock);
 }
@@ -206,7 +199,7 @@
 			continue;
 
 		skb = call->rxtx_buffer[ix];
-		rxrpc_see_skb(skb, rxrpc_skb_tx_seen);
+		rxrpc_see_skb(skb, rxrpc_skb_seen);
 
 		if (anno_type == RXRPC_TX_ANNO_UNACK) {
 			if (ktime_after(skb->tstamp, max_age)) {
@@ -243,7 +236,7 @@
 		ack_ts = ktime_sub(now, call->acks_latest_ts);
 		if (ktime_to_ns(ack_ts) < call->peer->rtt)
 			goto out;
-		rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
+		rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false,
 				  rxrpc_propose_ack_ping_for_lost_ack);
 		rxrpc_send_ack_packet(call, true, NULL);
 		goto out;
@@ -262,18 +255,18 @@
 			continue;
 
 		skb = call->rxtx_buffer[ix];
-		rxrpc_get_skb(skb, rxrpc_skb_tx_got);
+		rxrpc_get_skb(skb, rxrpc_skb_got);
 		spin_unlock_bh(&call->lock);
 
 		if (rxrpc_send_data_packet(call, skb, true) < 0) {
-			rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
+			rxrpc_free_skb(skb, rxrpc_skb_freed);
 			return;
 		}
 
 		if (rxrpc_is_client_call(call))
 			rxrpc_expose_client_call(call);
 
-		rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
+		rxrpc_free_skb(skb, rxrpc_skb_freed);
 		spin_lock_bh(&call->lock);
 
 		/* We need to clear the retransmit state, but there are two
@@ -376,7 +369,7 @@
 	if (time_after_eq(now, t)) {
 		trace_rxrpc_timer(call, rxrpc_timer_exp_keepalive, now);
 		cmpxchg(&call->keepalive_at, t, now + MAX_JIFFY_OFFSET);
-		rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, true,
+		rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, true,
 				  rxrpc_propose_ack_ping_for_keepalive);
 		set_bit(RXRPC_CALL_EV_PING, &call->events);
 	}
@@ -411,7 +404,7 @@
 	send_ack = NULL;
 	if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) {
 		call->acks_lost_top = call->tx_top;
-		rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
+		rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false,
 				  rxrpc_propose_ack_ping_for_lost_ack);
 		send_ack = &call->acks_lost_ping;
 	}

diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 8f1a8f8..a31c18c 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* RxRPC individual remote procedure call handling
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -241,8 +237,11 @@
 		return call;
 	}
 
+	if (p->intr)
+		__set_bit(RXRPC_CALL_IS_INTR, &call->flags);
 	call->tx_total_len = p->tx_total_len;
-	trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage),
+	trace_rxrpc_call(call->debug_id, rxrpc_call_new_client,
+			 atomic_read(&call->usage),
 			 here, (const void *)p->user_call_ID);
 
 	/* We need to protect a partially set up call against the user as we
@@ -292,8 +291,8 @@
 	if (ret < 0)
 		goto error;
 
-	trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage),
-			 here, NULL);
+	trace_rxrpc_call(call->debug_id, rxrpc_call_connected,
+			 atomic_read(&call->usage), here, NULL);
 
 	rxrpc_start_call_timer(call);
 
@@ -315,8 +314,8 @@
 error:
 	__rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
 				    RX_CALL_DEAD, ret);
-	trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage),
-			 here, ERR_PTR(ret));
+	trace_rxrpc_call(call->debug_id, rxrpc_call_error,
+			 atomic_read(&call->usage), here, ERR_PTR(ret));
 	rxrpc_release_call(rx, call);
 	mutex_unlock(&call->user_mutex);
 	rxrpc_put_call(call, rxrpc_call_put);
@@ -325,48 +324,6 @@
 }
 
 /*
- * Retry a call to a new address.  It is expected that the Tx queue of the call
- * will contain data previously packaged for an old call.
- */
-int rxrpc_retry_client_call(struct rxrpc_sock *rx,
-			    struct rxrpc_call *call,
-			    struct rxrpc_conn_parameters *cp,
-			    struct sockaddr_rxrpc *srx,
-			    gfp_t gfp)
-{
-	const void *here = __builtin_return_address(0);
-	int ret;
-
-	/* Set up or get a connection record and set the protocol parameters,
-	 * including channel number and call ID.
-	 */
-	ret = rxrpc_connect_call(rx, call, cp, srx, gfp);
-	if (ret < 0)
-		goto error;
-
-	trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage),
-			 here, NULL);
-
-	rxrpc_start_call_timer(call);
-
-	_net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id);
-
-	if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
-		rxrpc_queue_call(call);
-
-	_leave(" = 0");
-	return 0;
-
-error:
-	rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
-				  RX_CALL_DEAD, ret);
-	trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage),
-			 here, ERR_PTR(ret));
-	_leave(" = %d", ret);
-	return ret;
-}
-
-/*
  * Set up an incoming call.  call->conn points to the connection.
  * This is called in BH context and isn't allowed to fail.
  */
@@ -420,7 +377,8 @@
 	if (n == 0)
 		return false;
 	if (rxrpc_queue_work(&call->processor))
-		trace_rxrpc_call(call, rxrpc_call_queued, n + 1, here, NULL);
+		trace_rxrpc_call(call->debug_id, rxrpc_call_queued, n + 1,
+				 here, NULL);
 	else
 		rxrpc_put_call(call, rxrpc_call_put_noqueue);
 	return true;
@@ -435,7 +393,8 @@
 	int n = atomic_read(&call->usage);
 	ASSERTCMP(n, >=, 1);
 	if (rxrpc_queue_work(&call->processor))
-		trace_rxrpc_call(call, rxrpc_call_queued_ref, n, here, NULL);
+		trace_rxrpc_call(call->debug_id, rxrpc_call_queued_ref, n,
+				 here, NULL);
 	else
 		rxrpc_put_call(call, rxrpc_call_put_noqueue);
 	return true;
@@ -450,7 +409,8 @@
 	if (call) {
 		int n = atomic_read(&call->usage);
 
-		trace_rxrpc_call(call, rxrpc_call_seen, n, here, NULL);
+		trace_rxrpc_call(call->debug_id, rxrpc_call_seen, n,
+				 here, NULL);
 	}
 }
 
@@ -462,7 +422,20 @@
 	const void *here = __builtin_return_address(0);
 	int n = atomic_inc_return(&call->usage);
 
-	trace_rxrpc_call(call, op, n, here, NULL);
+	trace_rxrpc_call(call->debug_id, op, n, here, NULL);
+}
+
+/*
+ * Clean up the RxTx skb ring.
+ */
+static void rxrpc_cleanup_ring(struct rxrpc_call *call)
+{
+	int i;
+
+	for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) {
+		rxrpc_free_skb(call->rxtx_buffer[i], rxrpc_skb_cleaned);
+		call->rxtx_buffer[i] = NULL;
+	}
 }
 
 /*
@@ -473,11 +446,11 @@
 	const void *here = __builtin_return_address(0);
 	struct rxrpc_connection *conn = call->conn;
 	bool put = false;
-	int i;
 
 	_enter("{%d,%d}", call->debug_id, atomic_read(&call->usage));
 
-	trace_rxrpc_call(call, rxrpc_call_release, atomic_read(&call->usage),
+	trace_rxrpc_call(call->debug_id, rxrpc_call_release,
+			 atomic_read(&call->usage),
 			 here, (const void *)call->flags);
 
 	ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
@@ -522,73 +495,14 @@
 
 	if (conn)
 		rxrpc_disconnect_call(call);
+	if (call->security)
+		call->security->free_call_crypto(call);
 
-	for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) {
-		rxrpc_free_skb(call->rxtx_buffer[i],
-			       (call->tx_phase ? rxrpc_skb_tx_cleaned :
-				rxrpc_skb_rx_cleaned));
-		call->rxtx_buffer[i] = NULL;
-	}
-
+	rxrpc_cleanup_ring(call);
 	_leave("");
 }
 
 /*
- * Prepare a kernel service call for retry.
- */
-int rxrpc_prepare_call_for_retry(struct rxrpc_sock *rx, struct rxrpc_call *call)
-{
-	const void *here = __builtin_return_address(0);
-	int i;
-	u8 last = 0;
-
-	_enter("{%d,%d}", call->debug_id, atomic_read(&call->usage));
-
-	trace_rxrpc_call(call, rxrpc_call_release, atomic_read(&call->usage),
-			 here, (const void *)call->flags);
-
-	ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
-	ASSERTCMP(call->completion, !=, RXRPC_CALL_REMOTELY_ABORTED);
-	ASSERTCMP(call->completion, !=, RXRPC_CALL_LOCALLY_ABORTED);
-	ASSERT(list_empty(&call->recvmsg_link));
-
-	del_timer_sync(&call->timer);
-
-	_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, call->conn);
-
-	if (call->conn)
-		rxrpc_disconnect_call(call);
-
-	if (rxrpc_is_service_call(call) ||
-	    !call->tx_phase ||
-	    call->tx_hard_ack != 0 ||
-	    call->rx_hard_ack != 0 ||
-	    call->rx_top != 0)
-		return -EINVAL;
-
-	call->state = RXRPC_CALL_UNINITIALISED;
-	call->completion = RXRPC_CALL_SUCCEEDED;
-	call->call_id = 0;
-	call->cid = 0;
-	call->cong_cwnd = 0;
-	call->cong_extra = 0;
-	call->cong_ssthresh = 0;
-	call->cong_mode = 0;
-	call->cong_dup_acks = 0;
-	call->cong_cumul_acks = 0;
-	call->acks_lowest_nak = 0;
-
-	for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) {
-		last |= call->rxtx_annotations[i];
-		call->rxtx_annotations[i] &= RXRPC_TX_ANNO_LAST;
-		call->rxtx_annotations[i] |= RXRPC_TX_ANNO_RETRANS;
-	}
-
-	_leave(" = 0");
-	return 0;
-}
-
-/*
  * release all the calls associated with a socket
  */
 void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
@@ -625,12 +539,13 @@
 {
 	struct rxrpc_net *rxnet = call->rxnet;
 	const void *here = __builtin_return_address(0);
+	unsigned int debug_id = call->debug_id;
 	int n;
 
 	ASSERT(call != NULL);
 
 	n = atomic_dec_return(&call->usage);
-	trace_rxrpc_call(call, op, n, here, NULL);
+	trace_rxrpc_call(debug_id, op, n, here, NULL);
 	ASSERTCMP(n, >=, 0);
 	if (n == 0) {
 		_debug("call %d dead", call->debug_id);
@@ -667,8 +582,6 @@
  */
 void rxrpc_cleanup_call(struct rxrpc_call *call)
 {
-	int i;
-
 	_net("DESTROY CALL %d", call->debug_id);
 
 	memset(&call->sock_node, 0xcd, sizeof(call->sock_node));
@@ -679,13 +592,8 @@
 	ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
 	ASSERTCMP(call->conn, ==, NULL);
 
-	/* Clean up the Rx/Tx buffer */
-	for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++)
-		rxrpc_free_skb(call->rxtx_buffer[i],
-			       (call->tx_phase ? rxrpc_skb_tx_cleaned :
-				rxrpc_skb_rx_cleaned));
-
-	rxrpc_free_skb(call->tx_pending, rxrpc_skb_tx_cleaned);
+	rxrpc_cleanup_ring(call);
+	rxrpc_free_skb(call->tx_pending, rxrpc_skb_cleaned);
 
 	call_rcu(&call->rcu, rxrpc_rcu_destroy_call);
 }
@@ -701,30 +609,30 @@
 
 	_enter("");
 
-	if (list_empty(&rxnet->calls))
-		return;
+	if (!list_empty(&rxnet->calls)) {
+		write_lock(&rxnet->call_lock);
 
-	write_lock(&rxnet->call_lock);
+		while (!list_empty(&rxnet->calls)) {
+			call = list_entry(rxnet->calls.next,
+					  struct rxrpc_call, link);
+			_debug("Zapping call %p", call);
 
-	while (!list_empty(&rxnet->calls)) {
-		call = list_entry(rxnet->calls.next, struct rxrpc_call, link);
-		_debug("Zapping call %p", call);
+			rxrpc_see_call(call);
+			list_del_init(&call->link);
 
-		rxrpc_see_call(call);
-		list_del_init(&call->link);
+			pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
+			       call, atomic_read(&call->usage),
+			       rxrpc_call_states[call->state],
+			       call->flags, call->events);
 
-		pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
-		       call, atomic_read(&call->usage),
-		       rxrpc_call_states[call->state],
-		       call->flags, call->events);
+			write_unlock(&rxnet->call_lock);
+			cond_resched();
+			write_lock(&rxnet->call_lock);
+		}
 
 		write_unlock(&rxnet->call_lock);
-		cond_resched();
-		write_lock(&rxnet->call_lock);
 	}
 
-	write_unlock(&rxnet->call_lock);
-
 	atomic_dec(&rxnet->nr_calls);
 	wait_var_event(&rxnet->nr_calls, !atomic_read(&rxnet->nr_calls));
 }

diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 521189f..376370c 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c

@@ -1,14 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Client connection-specific management code.
  *
  * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- *
- *
  * Client connections need to be cached for a little while after they've made a
  * call so as to handle retransmitted DATA packets in case the server didn't
  * receive the final ACK or terminating ABORT we sent it.
@@ -217,7 +212,8 @@
 	rxrpc_get_local(conn->params.local);
 	key_get(conn->params.key);
 
-	trace_rxrpc_conn(conn, rxrpc_conn_new_client, atomic_read(&conn->usage),
+	trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_client,
+			 atomic_read(&conn->usage),
 			 __builtin_return_address(0));
 	trace_rxrpc_client(conn, -1, rxrpc_client_alloc);
 	_leave(" = %p", conn);
@@ -353,10 +349,11 @@
 	 * normally have to take channel_lock but we do this before anyone else
 	 * can see the connection.
 	 */
-	list_add_tail(&call->chan_wait_link, &candidate->waiting_calls);
+	list_add(&call->chan_wait_link, &candidate->waiting_calls);
 
 	if (cp->exclusive) {
 		call->conn = candidate;
+		call->security = candidate->security;
 		call->security_ix = candidate->security_ix;
 		call->service_id = candidate->service_id;
 		_leave(" = 0 [exclusive %d]", candidate->debug_id);
@@ -408,6 +405,7 @@
 candidate_published:
 	set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
 	call->conn = candidate;
+	call->security = candidate->security;
 	call->security_ix = candidate->security_ix;
 	call->service_id = candidate->service_id;
 	spin_unlock(&local->client_conns_lock);
@@ -430,9 +428,10 @@
 
 	spin_lock(&conn->channel_lock);
 	call->conn = conn;
+	call->security = conn->security;
 	call->security_ix = conn->security_ix;
 	call->service_id = conn->service_id;
-	list_add(&call->chan_wait_link, &conn->waiting_calls);
+	list_add_tail(&call->chan_wait_link, &conn->waiting_calls);
 	spin_unlock(&conn->channel_lock);
 	_leave(" = 0 [extant %d]", conn->debug_id);
 	return 0;
@@ -562,10 +561,7 @@
 	clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags);
 
 	write_lock_bh(&call->state_lock);
-	if (!test_bit(RXRPC_CALL_TX_LASTQ, &call->flags))
-		call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
-	else
-		call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
+	call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
 	write_unlock_bh(&call->state_lock);
 
 	rxrpc_see_call(call);
@@ -659,10 +655,14 @@
 
 		add_wait_queue_exclusive(&call->waitq, &myself);
 		for (;;) {
-			set_current_state(TASK_INTERRUPTIBLE);
+			if (test_bit(RXRPC_CALL_IS_INTR, &call->flags))
+				set_current_state(TASK_INTERRUPTIBLE);
+			else
+				set_current_state(TASK_UNINTERRUPTIBLE);
 			if (call->call_id)
 				break;
-			if (signal_pending(current)) {
+			if (test_bit(RXRPC_CALL_IS_INTR, &call->flags) &&
+			    signal_pending(current)) {
 				ret = -ERESTARTSYS;
 				break;
 			}
@@ -707,6 +707,7 @@
 
 	ret = rxrpc_wait_for_channel(call, gfp);
 	if (ret < 0) {
+		trace_rxrpc_client(call->conn, ret, rxrpc_client_chan_wait_failed);
 		rxrpc_disconnect_client_call(call);
 		goto out;
 	}
@@ -777,16 +778,22 @@
  */
 void rxrpc_disconnect_client_call(struct rxrpc_call *call)
 {
-	unsigned int channel = call->cid & RXRPC_CHANNELMASK;
 	struct rxrpc_connection *conn = call->conn;
-	struct rxrpc_channel *chan = &conn->channels[channel];
+	struct rxrpc_channel *chan = NULL;
 	struct rxrpc_net *rxnet = conn->params.local->rxnet;
-
-	trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect);
-	call->conn = NULL;
+	unsigned int channel = -1;
+	u32 cid;
 
 	spin_lock(&conn->channel_lock);
 
+	cid = call->cid;
+	if (cid) {
+		channel = cid & RXRPC_CHANNELMASK;
+		chan = &conn->channels[channel];
+	}
+	trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect);
+	call->conn = NULL;
+
 	/* Calls that have never actually been assigned a channel can simply be
 	 * discarded.  If the conn didn't get used either, it will follow
 	 * immediately unless someone else grabs it in the meantime.
@@ -810,7 +817,10 @@
 		goto out;
 	}
 
-	ASSERTCMP(rcu_access_pointer(chan->call), ==, call);
+	if (rcu_access_pointer(chan->call) != call) {
+		spin_unlock(&conn->channel_lock);
+		BUG();
+	}
 
 	/* If a client call was exposed to the world, we save the result for
 	 * retransmission.
@@ -979,11 +989,12 @@
 void rxrpc_put_client_conn(struct rxrpc_connection *conn)
 {
 	const void *here = __builtin_return_address(0);
+	unsigned int debug_id = conn->debug_id;
 	int n;
 
 	do {
 		n = atomic_dec_return(&conn->usage);
-		trace_rxrpc_conn(conn, rxrpc_conn_put_client, n, here);
+		trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, n, here);
 		if (n > 0)
 			return;
 		ASSERTCMP(n, >=, 0);
@@ -1156,3 +1167,47 @@
 
 	_leave("");
 }
+
+/*
+ * Clean up the client connections on a local endpoint.
+ */
+void rxrpc_clean_up_local_conns(struct rxrpc_local *local)
+{
+	struct rxrpc_connection *conn, *tmp;
+	struct rxrpc_net *rxnet = local->rxnet;
+	unsigned int nr_active;
+	LIST_HEAD(graveyard);
+
+	_enter("");
+
+	spin_lock(&rxnet->client_conn_cache_lock);
+	nr_active = rxnet->nr_active_client_conns;
+
+	list_for_each_entry_safe(conn, tmp, &rxnet->idle_client_conns,
+				 cache_link) {
+		if (conn->params.local == local) {
+			ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_IDLE);
+
+			trace_rxrpc_client(conn, -1, rxrpc_client_discard);
+			if (!test_and_clear_bit(RXRPC_CONN_EXPOSED, &conn->flags))
+				BUG();
+			conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE;
+			list_move(&conn->cache_link, &graveyard);
+			nr_active--;
+		}
+	}
+
+	rxnet->nr_active_client_conns = nr_active;
+	spin_unlock(&rxnet->client_conn_cache_lock);
+	ASSERTCMP(nr_active, >=, 0);
+
+	while (!list_empty(&graveyard)) {
+		conn = list_entry(graveyard.next,
+				  struct rxrpc_connection, cache_link);
+		list_del_init(&conn->cache_link);
+
+		rxrpc_put_connection(conn);
+	}
+
+	_leave(" [culled]");
+}

diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index b6fca8e..a1ceef4 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* connection-level event handling
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -153,7 +149,8 @@
  * pass a connection-level abort onto all calls on that connection
  */
 static void rxrpc_abort_calls(struct rxrpc_connection *conn,
-			      enum rxrpc_call_completion compl)
+			      enum rxrpc_call_completion compl,
+			      rxrpc_serial_t serial)
 {
 	struct rxrpc_call *call;
 	int i;
@@ -173,6 +170,9 @@
 						  call->call_id, 0,
 						  conn->abort_code,
 						  conn->error);
+			else
+				trace_rxrpc_rx_abort(call, serial,
+						     conn->abort_code);
 			if (rxrpc_set_call_completion(call, compl,
 						      conn->abort_code,
 						      conn->error))
@@ -213,8 +213,6 @@
 	conn->state = RXRPC_CONN_LOCALLY_ABORTED;
 	spin_unlock_bh(&conn->state_lock);
 
-	rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED);
-
 	msg.msg_name	= &conn->params.peer->srx.transport;
 	msg.msg_namelen	= conn->params.peer->srx.transport_len;
 	msg.msg_control	= NULL;
@@ -242,6 +240,7 @@
 	len = iov[0].iov_len + iov[1].iov_len;
 
 	serial = atomic_inc_return(&conn->serial);
+	rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, serial);
 	whdr.serial = htonl(serial);
 	_proto("Tx CONN ABORT %%%u { %d }", serial, conn->abort_code);
 
@@ -321,7 +320,7 @@
 		conn->error = -ECONNABORTED;
 		conn->abort_code = abort_code;
 		conn->state = RXRPC_CONN_REMOTELY_ABORTED;
-		rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED);
+		rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial);
 		return -ECONNABORTED;
 
 	case RXRPC_PACKET_TYPE_CHALLENGE:
@@ -473,7 +472,7 @@
 	/* go through the conn-level event packets, releasing the ref on this
 	 * connection that each one has when we've finished with it */
 	while ((skb = skb_dequeue(&conn->rx_queue))) {
-		rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
+		rxrpc_see_skb(skb, rxrpc_skb_seen);
 		ret = rxrpc_process_event(conn, skb, &abort_code);
 		switch (ret) {
 		case -EPROTO:
@@ -485,7 +484,7 @@
 			goto requeue_and_leave;
 		case -ECONNABORTED:
 		default:
-			rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+			rxrpc_free_skb(skb, rxrpc_skb_freed);
 			break;
 		}
 	}
@@ -502,6 +501,6 @@
 protocol_error:
 	if (rxrpc_abort_connection(conn, ret, abort_code) < 0)
 		goto requeue_and_leave;
-	rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+	rxrpc_free_skb(skb, rxrpc_skb_freed);
 	goto out;
 }

diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 885dae8..38d718e 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* RxRPC virtual connection handler, common bits.
  *
  * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -86,11 +82,12 @@
 
 	_enter(",%x", sp->hdr.cid & RXRPC_CIDMASK);
 
-	if (rxrpc_extract_addr_from_skb(local, &srx, skb) < 0)
+	if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
 		goto not_found;
 
-	/* We may have to handle mixing IPv4 and IPv6 */
-	if (srx.transport.family != local->srx.transport.family) {
+	if (srx.transport.family != local->srx.transport.family &&
+	    (srx.transport.family == AF_INET &&
+	     local->srx.transport.family != AF_INET6)) {
 		pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n",
 				    srx.transport.family,
 				    local->srx.transport.family);
@@ -272,7 +269,7 @@
 	if (n == 0)
 		return false;
 	if (rxrpc_queue_work(&conn->processor))
-		trace_rxrpc_conn(conn, rxrpc_conn_queued, n + 1, here);
+		trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, n + 1, here);
 	else
 		rxrpc_put_connection(conn);
 	return true;
@@ -287,7 +284,7 @@
 	if (conn) {
 		int n = atomic_read(&conn->usage);
 
-		trace_rxrpc_conn(conn, rxrpc_conn_seen, n, here);
+		trace_rxrpc_conn(conn->debug_id, rxrpc_conn_seen, n, here);
 	}
 }
 
@@ -299,7 +296,7 @@
 	const void *here = __builtin_return_address(0);
 	int n = atomic_inc_return(&conn->usage);
 
-	trace_rxrpc_conn(conn, rxrpc_conn_got, n, here);
+	trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n, here);
 }
 
 /*
@@ -313,7 +310,7 @@
 	if (conn) {
 		int n = atomic_fetch_add_unless(&conn->usage, 1, 0);
 		if (n > 0)
-			trace_rxrpc_conn(conn, rxrpc_conn_got, n + 1, here);
+			trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n + 1, here);
 		else
 			conn = NULL;
 	}
@@ -336,10 +333,11 @@
 void rxrpc_put_service_conn(struct rxrpc_connection *conn)
 {
 	const void *here = __builtin_return_address(0);
+	unsigned int debug_id = conn->debug_id;
 	int n;
 
 	n = atomic_dec_return(&conn->usage);
-	trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here);
+	trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, n, here);
 	ASSERTCMP(n, >=, 0);
 	if (n == 1)
 		rxrpc_set_service_reap_timer(conn->params.local->rxnet,
@@ -401,7 +399,7 @@
 		if (conn->state == RXRPC_CONN_SERVICE_PREALLOC)
 			continue;
 
-		if (rxnet->live) {
+		if (rxnet->live && !conn->params.local->dead) {
 			idle_timestamp = READ_ONCE(conn->idle_timestamp);
 			expire_at = idle_timestamp + rxrpc_connection_expiry * HZ;
 			if (conn->params.local->service_closed)
@@ -423,7 +421,7 @@
 		 */
 		if (atomic_cmpxchg(&conn->usage, 1, 0) != 1)
 			continue;
-		trace_rxrpc_conn(conn, rxrpc_conn_reap_service, 0, NULL);
+		trace_rxrpc_conn(conn->debug_id, rxrpc_conn_reap_service, 0, NULL);
 
 		if (rxrpc_conn_is_client(conn))
 			BUG();

diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index 80773a5..123d6ce 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Service connection management
  *
  * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
  */
 
 #include <linux/slab.h>
@@ -138,7 +134,7 @@
 		list_add_tail(&conn->proc_link, &rxnet->conn_proc_list);
 		write_unlock(&rxnet->conn_lock);
 
-		trace_rxrpc_conn(conn, rxrpc_conn_new_service,
+		trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service,
 				 atomic_read(&conn->usage),
 				 __builtin_return_address(0));
 	}

diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 570b49d..157be1f 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* RxRPC packet reception
  *
  * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -200,15 +196,14 @@
  * Ping the other end to fill our RTT cache and to retrieve the rwind
  * and MTU parameters.
  */
-static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb,
-			    int skew)
+static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	ktime_t now = skb->tstamp;
 
 	if (call->peer->rtt_usage < 3 ||
 	    ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now))
-		rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial,
+		rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial,
 				  true, true,
 				  rxrpc_propose_ack_ping_for_params);
 }
@@ -238,7 +233,7 @@
 		ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK;
 		skb = call->rxtx_buffer[ix];
 		annotation = call->rxtx_annotations[ix];
-		rxrpc_see_skb(skb, rxrpc_skb_tx_rotated);
+		rxrpc_see_skb(skb, rxrpc_skb_rotated);
 		call->rxtx_buffer[ix] = NULL;
 		call->rxtx_annotations[ix] = 0;
 		skb->next = list;
@@ -262,8 +257,8 @@
 	while (list) {
 		skb = list;
 		list = skb->next;
-		skb->next = NULL;
-		rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
+		skb_mark_not_on_list(skb);
+		rxrpc_free_skb(skb, rxrpc_skb_freed);
 	}
 
 	return rot_last;
@@ -352,7 +347,7 @@
 }
 
 /*
- * Scan a jumbo packet to validate its structure and to work out how many
+ * Scan a data packet to validate its structure and to work out how many
  * subpackets it contains.
  *
  * A jumbo packet is a collection of consecutive packets glued together with
@@ -363,16 +358,21 @@
  * the last are RXRPC_JUMBO_DATALEN in size.  The last subpacket may be of any
  * size.
  */
-static bool rxrpc_validate_jumbo(struct sk_buff *skb)
+static bool rxrpc_validate_data(struct sk_buff *skb)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	unsigned int offset = sizeof(struct rxrpc_wire_header);
 	unsigned int len = skb->len;
-	int nr_jumbo = 1;
 	u8 flags = sp->hdr.flags;
 
-	do {
-		nr_jumbo++;
+	for (;;) {
+		if (flags & RXRPC_REQUEST_ACK)
+			__set_bit(sp->nr_subpackets, sp->rx_req_ack);
+		sp->nr_subpackets++;
+
+		if (!(flags & RXRPC_JUMBO_PACKET))
+			break;
+
 		if (len - offset < RXRPC_JUMBO_SUBPKTLEN)
 			goto protocol_error;
 		if (flags & RXRPC_LAST_PACKET)
@@ -381,9 +381,10 @@
 		if (skb_copy_bits(skb, offset, &flags, 1) < 0)
 			goto protocol_error;
 		offset += sizeof(struct rxrpc_jumbo_header);
-	} while (flags & RXRPC_JUMBO_PACKET);
+	}
 
-	sp->nr_jumbo = nr_jumbo;
+	if (flags & RXRPC_LAST_PACKET)
+		sp->rx_flags |= RXRPC_SKB_INCL_LAST;
 	return true;
 
 protocol_error:
@@ -404,10 +405,10 @@
  * (that information is encoded in the ACK packet).
  */
 static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq,
-				 u8 annotation, bool *_jumbo_bad)
+				 bool is_jumbo, bool *_jumbo_bad)
 {
 	/* Discard normal packets that are duplicates. */
-	if (annotation == 0)
+	if (is_jumbo)
 		return;
 
 	/* Skip jumbo subpackets that are duplicates.  When we've had three or
@@ -421,30 +422,30 @@
 }
 
 /*
- * Process a DATA packet, adding the packet to the Rx ring.
+ * Process a DATA packet, adding the packet to the Rx ring.  The caller's
+ * packet ref must be passed on or discarded.
  */
-static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
-			     u16 skew)
+static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	enum rxrpc_call_state state;
-	unsigned int offset = sizeof(struct rxrpc_wire_header);
-	unsigned int ix;
+	unsigned int j;
 	rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0;
-	rxrpc_seq_t seq = sp->hdr.seq, hard_ack;
-	bool immediate_ack = false, jumbo_bad = false, queued;
-	u16 len;
-	u8 ack = 0, flags, annotation = 0;
+	rxrpc_seq_t seq0 = sp->hdr.seq, hard_ack;
+	bool immediate_ack = false, jumbo_bad = false;
+	u8 ack = 0;
 
 	_enter("{%u,%u},{%u,%u}",
-	       call->rx_hard_ack, call->rx_top, skb->len, seq);
+	       call->rx_hard_ack, call->rx_top, skb->len, seq0);
 
-	_proto("Rx DATA %%%u { #%u f=%02x }",
-	       sp->hdr.serial, seq, sp->hdr.flags);
+	_proto("Rx DATA %%%u { #%u f=%02x n=%u }",
+	       sp->hdr.serial, seq0, sp->hdr.flags, sp->nr_subpackets);
 
 	state = READ_ONCE(call->state);
-	if (state >= RXRPC_CALL_COMPLETE)
+	if (state >= RXRPC_CALL_COMPLETE) {
+		rxrpc_free_skb(skb, rxrpc_skb_freed);
 		return;
+	}
 
 	if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST) {
 		unsigned long timo = READ_ONCE(call->next_req_timo);
@@ -469,156 +470,157 @@
 	    !rxrpc_receiving_reply(call))
 		goto unlock;
 
-	call->ackr_prev_seq = seq;
-
+	call->ackr_prev_seq = seq0;
 	hard_ack = READ_ONCE(call->rx_hard_ack);
-	if (after(seq, hard_ack + call->rx_winsize)) {
-		ack = RXRPC_ACK_EXCEEDS_WINDOW;
-		ack_serial = serial;
-		goto ack;
-	}
 
-	flags = sp->hdr.flags;
-	if (flags & RXRPC_JUMBO_PACKET) {
+	if (sp->nr_subpackets > 1) {
 		if (call->nr_jumbo_bad > 3) {
 			ack = RXRPC_ACK_NOSPACE;
 			ack_serial = serial;
 			goto ack;
 		}
-		annotation = 1;
 	}
 
-next_subpacket:
-	queued = false;
-	ix = seq & RXRPC_RXTX_BUFF_MASK;
-	len = skb->len;
-	if (flags & RXRPC_JUMBO_PACKET)
-		len = RXRPC_JUMBO_DATALEN;
+	for (j = 0; j < sp->nr_subpackets; j++) {
+		rxrpc_serial_t serial = sp->hdr.serial + j;
+		rxrpc_seq_t seq = seq0 + j;
+		unsigned int ix = seq & RXRPC_RXTX_BUFF_MASK;
+		bool terminal = (j == sp->nr_subpackets - 1);
+		bool last = terminal && (sp->rx_flags & RXRPC_SKB_INCL_LAST);
+		u8 flags, annotation = j;
 
-	if (flags & RXRPC_LAST_PACKET) {
-		if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
-		    seq != call->rx_top) {
-			rxrpc_proto_abort("LSN", call, seq);
-			goto unlock;
+		_proto("Rx DATA+%u %%%u { #%x t=%u l=%u }",
+		     j, serial, seq, terminal, last);
+
+		if (last) {
+			if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
+			    seq != call->rx_top) {
+				rxrpc_proto_abort("LSN", call, seq);
+				goto unlock;
+			}
+		} else {
+			if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
+			    after_eq(seq, call->rx_top)) {
+				rxrpc_proto_abort("LSA", call, seq);
+				goto unlock;
+			}
 		}
-	} else {
-		if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
-		    after_eq(seq, call->rx_top)) {
-			rxrpc_proto_abort("LSA", call, seq);
-			goto unlock;
-		}
-	}
 
-	trace_rxrpc_rx_data(call->debug_id, seq, serial, flags, annotation);
-	if (before_eq(seq, hard_ack)) {
-		ack = RXRPC_ACK_DUPLICATE;
-		ack_serial = serial;
-		goto skip;
-	}
+		flags = 0;
+		if (last)
+			flags |= RXRPC_LAST_PACKET;
+		if (!terminal)
+			flags |= RXRPC_JUMBO_PACKET;
+		if (test_bit(j, sp->rx_req_ack))
+			flags |= RXRPC_REQUEST_ACK;
+		trace_rxrpc_rx_data(call->debug_id, seq, serial, flags, annotation);
 
-	if (flags & RXRPC_REQUEST_ACK && !ack) {
-		ack = RXRPC_ACK_REQUESTED;
-		ack_serial = serial;
-	}
-
-	if (call->rxtx_buffer[ix]) {
-		rxrpc_input_dup_data(call, seq, annotation, &jumbo_bad);
-		if (ack != RXRPC_ACK_DUPLICATE) {
+		if (before_eq(seq, hard_ack)) {
 			ack = RXRPC_ACK_DUPLICATE;
 			ack_serial = serial;
+			continue;
 		}
-		immediate_ack = true;
-		goto skip;
-	}
 
-	/* Queue the packet.  We use a couple of memory barriers here as need
-	 * to make sure that rx_top is perceived to be set after the buffer
-	 * pointer and that the buffer pointer is set after the annotation and
-	 * the skb data.
-	 *
-	 * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window()
-	 * and also rxrpc_fill_out_ack().
-	 */
-	rxrpc_get_skb(skb, rxrpc_skb_rx_got);
-	call->rxtx_annotations[ix] = annotation;
-	smp_wmb();
-	call->rxtx_buffer[ix] = skb;
-	if (after(seq, call->rx_top)) {
-		smp_store_release(&call->rx_top, seq);
-	} else if (before(seq, call->rx_top)) {
-		/* Send an immediate ACK if we fill in a hole */
-		if (!ack) {
-			ack = RXRPC_ACK_DELAY;
-			ack_serial = serial;
+		if (call->rxtx_buffer[ix]) {
+			rxrpc_input_dup_data(call, seq, sp->nr_subpackets > 1,
+					     &jumbo_bad);
+			if (ack != RXRPC_ACK_DUPLICATE) {
+				ack = RXRPC_ACK_DUPLICATE;
+				ack_serial = serial;
+			}
+			immediate_ack = true;
+			continue;
 		}
-		immediate_ack = true;
-	}
-	if (flags & RXRPC_LAST_PACKET) {
-		set_bit(RXRPC_CALL_RX_LAST, &call->flags);
-		trace_rxrpc_receive(call, rxrpc_receive_queue_last, serial, seq);
-	} else {
-		trace_rxrpc_receive(call, rxrpc_receive_queue, serial, seq);
-	}
-	queued = true;
 
-	if (after_eq(seq, call->rx_expect_next)) {
-		if (after(seq, call->rx_expect_next)) {
-			_net("OOS %u > %u", seq, call->rx_expect_next);
-			ack = RXRPC_ACK_OUT_OF_SEQUENCE;
-			ack_serial = serial;
-		}
-		call->rx_expect_next = seq + 1;
-	}
-
-skip:
-	offset += len;
-	if (flags & RXRPC_JUMBO_PACKET) {
-		if (skb_copy_bits(skb, offset, &flags, 1) < 0) {
-			rxrpc_proto_abort("XJF", call, seq);
-			goto unlock;
-		}
-		offset += sizeof(struct rxrpc_jumbo_header);
-		seq++;
-		serial++;
-		annotation++;
-		if (flags & RXRPC_JUMBO_PACKET)
-			annotation |= RXRPC_RX_ANNO_JLAST;
 		if (after(seq, hard_ack + call->rx_winsize)) {
 			ack = RXRPC_ACK_EXCEEDS_WINDOW;
 			ack_serial = serial;
-			if (!jumbo_bad) {
-				call->nr_jumbo_bad++;
-				jumbo_bad = true;
+			if (flags & RXRPC_JUMBO_PACKET) {
+				if (!jumbo_bad) {
+					call->nr_jumbo_bad++;
+					jumbo_bad = true;
+				}
 			}
+
 			goto ack;
 		}
 
-		_proto("Rx DATA Jumbo %%%u", serial);
-		goto next_subpacket;
-	}
+		if (flags & RXRPC_REQUEST_ACK && !ack) {
+			ack = RXRPC_ACK_REQUESTED;
+			ack_serial = serial;
+		}
 
-	if (queued && flags & RXRPC_LAST_PACKET && !ack) {
-		ack = RXRPC_ACK_DELAY;
-		ack_serial = serial;
+		/* Queue the packet.  We use a couple of memory barriers here as need
+		 * to make sure that rx_top is perceived to be set after the buffer
+		 * pointer and that the buffer pointer is set after the annotation and
+		 * the skb data.
+		 *
+		 * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window()
+		 * and also rxrpc_fill_out_ack().
+		 */
+		if (!terminal)
+			rxrpc_get_skb(skb, rxrpc_skb_got);
+		call->rxtx_annotations[ix] = annotation;
+		smp_wmb();
+		call->rxtx_buffer[ix] = skb;
+		if (after(seq, call->rx_top)) {
+			smp_store_release(&call->rx_top, seq);
+		} else if (before(seq, call->rx_top)) {
+			/* Send an immediate ACK if we fill in a hole */
+			if (!ack) {
+				ack = RXRPC_ACK_DELAY;
+				ack_serial = serial;
+			}
+			immediate_ack = true;
+		}
+
+		if (terminal) {
+			/* From this point on, we're not allowed to touch the
+			 * packet any longer as its ref now belongs to the Rx
+			 * ring.
+			 */
+			skb = NULL;
+		}
+
+		if (last) {
+			set_bit(RXRPC_CALL_RX_LAST, &call->flags);
+			if (!ack) {
+				ack = RXRPC_ACK_DELAY;
+				ack_serial = serial;
+			}
+			trace_rxrpc_receive(call, rxrpc_receive_queue_last, serial, seq);
+		} else {
+			trace_rxrpc_receive(call, rxrpc_receive_queue, serial, seq);
+		}
+
+		if (after_eq(seq, call->rx_expect_next)) {
+			if (after(seq, call->rx_expect_next)) {
+				_net("OOS %u > %u", seq, call->rx_expect_next);
+				ack = RXRPC_ACK_OUT_OF_SEQUENCE;
+				ack_serial = serial;
+			}
+			call->rx_expect_next = seq + 1;
+		}
 	}
 
 ack:
 	if (ack)
-		rxrpc_propose_ACK(call, ack, skew, ack_serial,
+		rxrpc_propose_ACK(call, ack, ack_serial,
 				  immediate_ack, true,
 				  rxrpc_propose_ack_input_data);
 	else
-		rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, skew, serial,
+		rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial,
 				  false, true,
 				  rxrpc_propose_ack_input_data);
 
-	if (sp->hdr.seq == READ_ONCE(call->rx_hard_ack) + 1) {
+	if (seq0 == READ_ONCE(call->rx_hard_ack) + 1) {
 		trace_rxrpc_notify_socket(call->debug_id, serial);
 		rxrpc_notify_socket(call);
 	}
 
 unlock:
 	spin_unlock(&call->input_lock);
+	rxrpc_free_skb(skb, rxrpc_skb_freed);
 	_leave(" [queued]");
 }
 
@@ -826,8 +828,7 @@
  * soft-ACK means that the packet may be discarded and retransmission
  * requested.  A phase is complete when all packets are hard-ACK'd.
  */
-static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
-			    u16 skew)
+static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
 {
 	struct rxrpc_ack_summary summary = { 0 };
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
@@ -837,7 +838,7 @@
 		u8 acks[RXRPC_MAXACKS];
 	} buf;
 	rxrpc_serial_t acked_serial;
-	rxrpc_seq_t first_soft_ack, hard_ack;
+	rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt;
 	int nr_acks, offset, ioffset;
 
 	_enter("");
@@ -851,13 +852,14 @@
 
 	acked_serial = ntohl(buf.ack.serial);
 	first_soft_ack = ntohl(buf.ack.firstPacket);
+	prev_pkt = ntohl(buf.ack.previousPacket);
 	hard_ack = first_soft_ack - 1;
 	nr_acks = buf.ack.nAcks;
 	summary.ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ?
 			      buf.ack.reason : RXRPC_ACK__INVALID);
 
 	trace_rxrpc_rx_ack(call, sp->hdr.serial, acked_serial,
-			   first_soft_ack, ntohl(buf.ack.previousPacket),
+			   first_soft_ack, prev_pkt,
 			   summary.ack_reason, nr_acks);
 
 	if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE)
@@ -870,16 +872,17 @@
 	if (buf.ack.reason == RXRPC_ACK_PING) {
 		_proto("Rx ACK %%%u PING Request", sp->hdr.serial);
 		rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
-				  skew, sp->hdr.serial, true, true,
+				  sp->hdr.serial, true, true,
 				  rxrpc_propose_ack_respond_to_ping);
 	} else if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
 		rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED,
-				  skew, sp->hdr.serial, true, true,
+				  sp->hdr.serial, true, true,
 				  rxrpc_propose_ack_respond_to_ack);
 	}
 
-	/* Discard any out-of-order or duplicate ACKs. */
-	if (before_eq(sp->hdr.serial, call->acks_latest))
+	/* Discard any out-of-order or duplicate ACKs (outside lock). */
+	if (before(first_soft_ack, call->ackr_first_seq) ||
+	    before(prev_pkt, call->ackr_prev_seq))
 		return;
 
 	buf.info.rxMTU = 0;
@@ -890,12 +893,16 @@
 
 	spin_lock(&call->input_lock);
 
-	/* Discard any out-of-order or duplicate ACKs. */
-	if (before_eq(sp->hdr.serial, call->acks_latest))
+	/* Discard any out-of-order or duplicate ACKs (inside lock). */
+	if (before(first_soft_ack, call->ackr_first_seq) ||
+	    before(prev_pkt, call->ackr_prev_seq))
 		goto out;
 	call->acks_latest_ts = skb->tstamp;
 	call->acks_latest = sp->hdr.serial;
 
+	call->ackr_first_seq = first_soft_ack;
+	call->ackr_prev_seq = prev_pkt;
+
 	/* Parse rwind and mtu sizes if provided. */
 	if (buf.info.rxMTU)
 		rxrpc_input_ackinfo(call, skb, &buf.info);
@@ -946,7 +953,7 @@
 	    RXRPC_TX_ANNO_LAST &&
 	    summary.nr_acks == call->tx_top - hard_ack &&
 	    rxrpc_is_client_call(call))
-		rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial,
+		rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial,
 				  false, true,
 				  rxrpc_propose_ack_ping_for_lost_reply);
 
@@ -1002,7 +1009,7 @@
  * Process an incoming call packet.
  */
 static void rxrpc_input_call_packet(struct rxrpc_call *call,
-				    struct sk_buff *skb, u16 skew)
+				    struct sk_buff *skb)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	unsigned long timo;
@@ -1021,11 +1028,11 @@
 
 	switch (sp->hdr.type) {
 	case RXRPC_PACKET_TYPE_DATA:
-		rxrpc_input_data(call, skb, skew);
-		break;
+		rxrpc_input_data(call, skb);
+		goto no_free;
 
 	case RXRPC_PACKET_TYPE_ACK:
-		rxrpc_input_ack(call, skb, skew);
+		rxrpc_input_ack(call, skb);
 		break;
 
 	case RXRPC_PACKET_TYPE_BUSY:
@@ -1049,6 +1056,8 @@
 		break;
 	}
 
+	rxrpc_free_skb(skb, rxrpc_skb_freed);
+no_free:
 	_leave("");
 }
 
@@ -1106,8 +1115,12 @@
 {
 	_enter("%p,%p", local, skb);
 
-	skb_queue_tail(&local->event_queue, skb);
-	rxrpc_queue_local(local);
+	if (rxrpc_get_local_maybe(local)) {
+		skb_queue_tail(&local->event_queue, skb);
+		rxrpc_queue_local(local);
+	} else {
+		rxrpc_free_skb(skb, rxrpc_skb_freed);
+	}
 }
 
 /*
@@ -1117,8 +1130,12 @@
 {
 	CHECK_SLAB_OKAY(&local->usage);
 
-	skb_queue_tail(&local->reject_queue, skb);
-	rxrpc_queue_local(local);
+	if (rxrpc_get_local_maybe(local)) {
+		skb_queue_tail(&local->reject_queue, skb);
+		rxrpc_queue_local(local);
+	} else {
+		rxrpc_free_skb(skb, rxrpc_skb_freed);
+	}
 }
 
 /*
@@ -1155,30 +1172,33 @@
  * handle data received on the local endpoint
  * - may be called in interrupt context
  *
- * The socket is locked by the caller and this prevents the socket from being
- * shut down and the local endpoint from going away, thus sk_user_data will not
- * be cleared until this function returns.
+ * [!] Note that as this is called from the encap_rcv hook, the socket is not
+ * held locked by the caller and nothing prevents sk_user_data on the UDP from
+ * being cleared in the middle of processing this function.
  *
  * Called with the RCU read lock held from the IP layer via UDP.
  */
 int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
 {
+	struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk);
 	struct rxrpc_connection *conn;
 	struct rxrpc_channel *chan;
 	struct rxrpc_call *call = NULL;
 	struct rxrpc_skb_priv *sp;
-	struct rxrpc_local *local = udp_sk->sk_user_data;
 	struct rxrpc_peer *peer = NULL;
 	struct rxrpc_sock *rx = NULL;
 	unsigned int channel;
-	int skew = 0;
 
 	_enter("%p", udp_sk);
 
+	if (unlikely(!local)) {
+		kfree_skb(skb);
+		return 0;
+	}
 	if (skb->tstamp == 0)
 		skb->tstamp = ktime_get_real();
 
-	rxrpc_new_skb(skb, rxrpc_skb_rx_received);
+	rxrpc_new_skb(skb, rxrpc_skb_received);
 
 	skb_pull(skb, sizeof(struct udphdr));
 
@@ -1195,7 +1215,7 @@
 		static int lose;
 		if ((lose++ & 7) == 7) {
 			trace_rxrpc_rx_lose(sp);
-			rxrpc_free_skb(skb, rxrpc_skb_rx_lost);
+			rxrpc_free_skb(skb, rxrpc_skb_lost);
 			return 0;
 		}
 	}
@@ -1227,9 +1247,26 @@
 		if (sp->hdr.callNumber == 0 ||
 		    sp->hdr.seq == 0)
 			goto bad_message;
-		if (sp->hdr.flags & RXRPC_JUMBO_PACKET &&
-		    !rxrpc_validate_jumbo(skb))
+		if (!rxrpc_validate_data(skb))
 			goto bad_message;
+
+		/* Unshare the packet so that it can be modified for in-place
+		 * decryption.
+		 */
+		if (sp->hdr.securityIndex != 0) {
+			struct sk_buff *nskb = skb_unshare(skb, GFP_ATOMIC);
+			if (!nskb) {
+				rxrpc_eaten_skb(skb, rxrpc_skb_unshared_nomem);
+				goto out;
+			}
+
+			if (nskb != skb) {
+				rxrpc_eaten_skb(skb, rxrpc_skb_received);
+				skb = nskb;
+				rxrpc_new_skb(skb, rxrpc_skb_unshared);
+				sp = rxrpc_skb(skb);
+			}
+		}
 		break;
 
 	case RXRPC_PACKET_TYPE_CHALLENGE:
@@ -1295,15 +1332,8 @@
 			goto out;
 		}
 
-		/* Note the serial number skew here */
-		skew = (int)sp->hdr.serial - (int)conn->hi_serial;
-		if (skew >= 0) {
-			if (skew > 0)
-				conn->hi_serial = sp->hdr.serial;
-		} else {
-			skew = -skew;
-			skew = min(skew, 65535);
-		}
+		if ((int)sp->hdr.serial - (int)conn->hi_serial > 0)
+			conn->hi_serial = sp->hdr.serial;
 
 		/* Call-bound packets are routed by connection channel. */
 		channel = sp->hdr.cid & RXRPC_CHANNELMASK;
@@ -1366,15 +1396,18 @@
 		call = rxrpc_new_incoming_call(local, rx, skb);
 		if (!call)
 			goto reject_packet;
-		rxrpc_send_ping(call, skb, skew);
+		rxrpc_send_ping(call, skb);
 		mutex_unlock(&call->user_mutex);
 	}
 
-	rxrpc_input_call_packet(call, skb, skew);
-	goto discard;
+	/* Process a call packet; this either discards or passes on the ref
+	 * elsewhere.
+	 */
+	rxrpc_input_call_packet(call, skb);
+	goto out;
 
 discard:
-	rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+	rxrpc_free_skb(skb, rxrpc_skb_freed);
 out:
 	trace_rxrpc_rx_done(0, 0);
 	return 0;

diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index af276f1..f6c59f5 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Null security operations.
  *
  * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
  */
 
 #include <net/af_rxrpc.h>
@@ -37,6 +33,10 @@
 	return 0;
 }
 
+static void none_free_call_crypto(struct rxrpc_call *call)
+{
+}
+
 static void none_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
 			     unsigned int *_offset, unsigned int *_len)
 {
@@ -87,6 +87,7 @@
 	.exit				= none_exit,
 	.init_connection_security	= none_init_connection_security,
 	.prime_packet_security		= none_prime_packet_security,
+	.free_call_crypto		= none_free_call_crypto,
 	.secure_packet			= none_secure_packet,
 	.verify_packet			= none_verify_packet,
 	.locate_data			= none_locate_data,

diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c
index e7f6b88..6c3f35f 100644
--- a/net/rxrpc/key.c
+++ b/net/rxrpc/key.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* RxRPC key management
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
  * RxRPC keys should have a description of describing their purpose:
  *	"afs@CAMBRIDGE.REDHAT.COM>
  */
@@ -43,6 +39,7 @@
  */
 struct key_type key_type_rxrpc = {
 	.name		= "rxrpc",
+	.flags		= KEY_TYPE_NET_DOMAIN,
 	.preparse	= rxrpc_preparse,
 	.free_preparse	= rxrpc_free_preparse,
 	.instantiate	= generic_key_instantiate,
@@ -58,6 +55,7 @@
  */
 struct key_type key_type_rxrpc_s = {
 	.name		= "rxrpc_s",
+	.flags		= KEY_TYPE_NET_DOMAIN,
 	.vet_description = rxrpc_vet_description_s,
 	.preparse	= rxrpc_preparse_s,
 	.free_preparse	= rxrpc_free_preparse_s,
@@ -912,7 +910,7 @@
 	if (IS_ERR(description))
 		return PTR_ERR(description);
 
-	key = request_key(&key_type_rxrpc, description, NULL);
+	key = request_key_net(&key_type_rxrpc, description, sock_net(&rx->sk), NULL);
 	if (IS_ERR(key)) {
 		kfree(description);
 		_leave(" = %ld", PTR_ERR(key));
@@ -943,7 +941,7 @@
 	if (IS_ERR(description))
 		return PTR_ERR(description);
 
-	key = request_key(&key_type_keyring, description, NULL);
+	key = request_key_net(&key_type_keyring, description, sock_net(&rx->sk), NULL);
 	if (IS_ERR(key)) {
 		kfree(description);
 		_leave(" = %ld", PTR_ERR(key));

diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c
index 13bd8a4..3ce6d62 100644
--- a/net/rxrpc/local_event.c
+++ b/net/rxrpc/local_event.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* AF_RXRPC local endpoint management
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -39,7 +35,7 @@
 
 	_enter("");
 
-	if (rxrpc_extract_addr_from_skb(local, &srx, skb) < 0)
+	if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
 		return;
 
 	msg.msg_name	= &srx.transport;
@@ -94,7 +90,7 @@
 	if (skb) {
 		struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 
-		rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
+		rxrpc_see_skb(skb, rxrpc_skb_seen);
 		_debug("{%d},{%u}", local->debug_id, sp->hdr.type);
 
 		switch (sp->hdr.type) {
@@ -112,7 +108,7 @@
 			break;
 		}
 
-		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+		rxrpc_free_skb(skb, rxrpc_skb_freed);
 	}
 
 	_leave("");

diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 0906e51..3658726 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Local endpoint object management
  *
  * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -83,6 +79,7 @@
 	local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL);
 	if (local) {
 		atomic_set(&local->usage, 1);
+		atomic_set(&local->active_users, 1);
 		local->rxnet = rxnet;
 		INIT_LIST_HEAD(&local->link);
 		INIT_WORK(&local->processor, rxrpc_local_processor);
@@ -96,7 +93,7 @@
 		local->debug_id = atomic_inc_return(&rxrpc_debug_id);
 		memcpy(&local->srx, srx, sizeof(*srx));
 		local->srx.srx_service = 0;
-		trace_rxrpc_local(local, rxrpc_local_new, 1, NULL);
+		trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, NULL);
 	}
 
 	_leave(" = %p", local);
@@ -180,7 +177,7 @@
 		/* Fall through and set IPv4 options too otherwise we don't get
 		 * errors from IPv4 packets sent through the IPv6 socket.
 		 */
-
+		/* Fall through */
 	case AF_INET:
 		/* we want to receive ICMP errors */
 		opt = 1;
@@ -202,7 +199,7 @@
 
 		/* We want receive timestamps. */
 		opt = 1;
-		ret = kernel_setsockopt(local->socket, SOL_SOCKET, SO_TIMESTAMPNS,
+		ret = kernel_setsockopt(local->socket, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
 					(char *)&opt, sizeof(opt));
 		if (ret < 0) {
 			_debug("setsockopt failed");
@@ -270,11 +267,8 @@
 		 * bind the transport socket may still fail if we're attempting
 		 * to use a local address that the dying object is still using.
 		 */
-		if (!rxrpc_get_local_maybe(local)) {
-			cursor = cursor->next;
-			list_del_init(&local->link);
+		if (!rxrpc_use_local(local))
 			break;
-		}
 
 		age = "old";
 		goto found;
@@ -288,7 +282,10 @@
 	if (ret < 0)
 		goto sock_error;
 
-	list_add_tail(&local->link, cursor);
+	if (cursor != &rxnet->local_endpoints)
+		list_replace_init(cursor, &local->link);
+	else
+		list_add_tail(&local->link, cursor);
 	age = "new";
 
 found:
@@ -304,7 +301,8 @@
 	ret = -ENOMEM;
 sock_error:
 	mutex_unlock(&rxnet->local_mutex);
-	kfree(local);
+	if (local)
+		call_rcu(&local->rcu, rxrpc_local_rcu);
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
 
@@ -323,7 +321,7 @@
 	int n;
 
 	n = atomic_inc_return(&local->usage);
-	trace_rxrpc_local(local, rxrpc_local_got, n, here);
+	trace_rxrpc_local(local->debug_id, rxrpc_local_got, n, here);
 	return local;
 }
 
@@ -337,7 +335,8 @@
 	if (local) {
 		int n = atomic_fetch_add_unless(&local->usage, 1, 0);
 		if (n > 0)
-			trace_rxrpc_local(local, rxrpc_local_got, n + 1, here);
+			trace_rxrpc_local(local->debug_id, rxrpc_local_got,
+					  n + 1, here);
 		else
 			local = NULL;
 	}
@@ -345,24 +344,18 @@
 }
 
 /*
- * Queue a local endpoint.
+ * Queue a local endpoint and pass the caller's reference to the work item.
  */
 void rxrpc_queue_local(struct rxrpc_local *local)
 {
 	const void *here = __builtin_return_address(0);
+	unsigned int debug_id = local->debug_id;
+	int n = atomic_read(&local->usage);
 
 	if (rxrpc_queue_work(&local->processor))
-		trace_rxrpc_local(local, rxrpc_local_queued,
-				  atomic_read(&local->usage), here);
-}
-
-/*
- * A local endpoint reached its end of life.
- */
-static void __rxrpc_put_local(struct rxrpc_local *local)
-{
-	_enter("%d", local->debug_id);
-	rxrpc_queue_work(&local->processor);
+		trace_rxrpc_local(debug_id, rxrpc_local_queued, n, here);
+	else
+		rxrpc_put_local(local);
 }
 
 /*
@@ -375,10 +368,47 @@
 
 	if (local) {
 		n = atomic_dec_return(&local->usage);
-		trace_rxrpc_local(local, rxrpc_local_put, n, here);
+		trace_rxrpc_local(local->debug_id, rxrpc_local_put, n, here);
 
 		if (n == 0)
-			__rxrpc_put_local(local);
+			call_rcu(&local->rcu, rxrpc_local_rcu);
+	}
+}
+
+/*
+ * Start using a local endpoint.
+ */
+struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local)
+{
+	unsigned int au;
+
+	local = rxrpc_get_local_maybe(local);
+	if (!local)
+		return NULL;
+
+	au = atomic_fetch_add_unless(&local->active_users, 1, 0);
+	if (au == 0) {
+		rxrpc_put_local(local);
+		return NULL;
+	}
+
+	return local;
+}
+
+/*
+ * Cease using a local endpoint.  Once the number of active users reaches 0, we
+ * start the closure of the transport in the work processor.
+ */
+void rxrpc_unuse_local(struct rxrpc_local *local)
+{
+	unsigned int au;
+
+	if (local) {
+		au = atomic_dec_return(&local->active_users);
+		if (au == 0)
+			rxrpc_queue_local(local);
+		else
+			rxrpc_put_local(local);
 	}
 }
 
@@ -396,21 +426,14 @@
 
 	_enter("%d", local->debug_id);
 
-	/* We can get a race between an incoming call packet queueing the
-	 * processor again and the work processor starting the destruction
-	 * process which will shut down the UDP socket.
-	 */
-	if (local->dead) {
-		_leave(" [already dead]");
-		return;
-	}
 	local->dead = true;
 
 	mutex_lock(&rxnet->local_mutex);
 	list_del_init(&local->link);
 	mutex_unlock(&rxnet->local_mutex);
 
-	ASSERT(RB_EMPTY_ROOT(&local->client_conns));
+	rxrpc_clean_up_local_conns(local);
+	rxrpc_service_connection_reaper(&rxnet->service_conn_reaper);
 	ASSERT(!local->service);
 
 	if (socket) {
@@ -425,13 +448,11 @@
 	 */
 	rxrpc_purge_queue(&local->reject_queue);
 	rxrpc_purge_queue(&local->event_queue);
-
-	_debug("rcu local %d", local->debug_id);
-	call_rcu(&local->rcu, rxrpc_local_rcu);
 }
 
 /*
- * Process events on an endpoint
+ * Process events on an endpoint.  The work item carries a ref which
+ * we must release.
  */
 static void rxrpc_local_processor(struct work_struct *work)
 {
@@ -439,13 +460,15 @@
 		container_of(work, struct rxrpc_local, processor);
 	bool again;
 
-	trace_rxrpc_local(local, rxrpc_local_processing,
+	trace_rxrpc_local(local->debug_id, rxrpc_local_processing,
 			  atomic_read(&local->usage), NULL);
 
 	do {
 		again = false;
-		if (atomic_read(&local->usage) == 0)
-			return rxrpc_local_destroyer(local);
+		if (atomic_read(&local->active_users) == 0) {
+			rxrpc_local_destroyer(local);
+			break;
+		}
 
 		if (!skb_queue_empty(&local->reject_queue)) {
 			rxrpc_reject_packets(local);
@@ -457,6 +480,8 @@
 			again = true;
 		}
 	} while (again);
+
+	rxrpc_put_local(local);
 }
 
 /*

diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
index c1d9e7f..214405f 100644
--- a/net/rxrpc/misc.c
+++ b/net/rxrpc/misc.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Miscellaneous bits
  *
  * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
  */
 
 #include <linux/kernel.h>

diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index 417d808..b312aab 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* rxrpc network namespace handling.
  *
  * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
  */
 
 #include <linux/proc_fs.h>
@@ -102,6 +98,9 @@
 	proc_create_net("conns", 0444, rxnet->proc_net,
 			&rxrpc_connection_seq_ops,
 			sizeof(struct seq_net_private));
+	proc_create_net("peers", 0444, rxnet->proc_net,
+			&rxrpc_peer_seq_ops,
+			sizeof(struct seq_net_private));
 	return 0;
 
 err_proc:

diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 345dc1c..935bb60 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* RxRPC packet transmission
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -91,7 +87,7 @@
 	*_top = top;
 
 	pkt->ack.bufferSpace	= htons(8);
-	pkt->ack.maxSkew	= htons(call->ackr_skew);
+	pkt->ack.maxSkew	= htons(0);
 	pkt->ack.firstPacket	= htonl(hard_ack + 1);
 	pkt->ack.previousPacket	= htonl(call->ackr_prev_seq);
 	pkt->ack.serial		= htonl(serial);
@@ -232,7 +228,6 @@
 			if (ping)
 				clear_bit(RXRPC_CALL_PINGING, &call->flags);
 			rxrpc_propose_ACK(call, pkt->ack.reason,
-					  ntohs(pkt->ack.maxSkew),
 					  ntohl(pkt->ack.serial),
 					  false, true,
 					  rxrpc_propose_ack_retry_tx);
@@ -335,7 +330,6 @@
 	struct kvec iov[2];
 	rxrpc_serial_t serial;
 	size_t len;
-	bool lost = false;
 	int ret, opt;
 
 	_enter(",{%d}", skb->len);
@@ -393,12 +387,14 @@
 		static int lose;
 		if ((lose++ & 7) == 7) {
 			ret = 0;
-			lost = true;
+			trace_rxrpc_tx_data(call, sp->hdr.seq, serial,
+					    whdr.flags, retrans, true);
 			goto done;
 		}
 	}
 
-	_proto("Tx DATA %%%u { #%u }", serial, sp->hdr.seq);
+	trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, retrans,
+			    false);
 
 	/* send the packet with the don't fragment bit set if we currently
 	 * think it's small enough */
@@ -432,8 +428,6 @@
 		goto send_fragmentable;
 
 done:
-	trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags,
-			    retrans, lost);
 	if (ret >= 0) {
 		if (whdr.flags & RXRPC_REQUEST_ACK) {
 			call->peer->rtt_last_req = skb->tstamp;
@@ -524,6 +518,9 @@
 		}
 		break;
 #endif
+
+	default:
+		BUG();
 	}
 
 	if (ret < 0)
@@ -568,7 +565,7 @@
 	memset(&whdr, 0, sizeof(whdr));
 
 	while ((skb = skb_dequeue(&local->reject_queue))) {
-		rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
+		rxrpc_see_skb(skb, rxrpc_skb_seen);
 		sp = rxrpc_skb(skb);
 
 		switch (skb->mark) {
@@ -584,11 +581,11 @@
 			ioc = 2;
 			break;
 		default:
-			rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+			rxrpc_free_skb(skb, rxrpc_skb_freed);
 			continue;
 		}
 
-		if (rxrpc_extract_addr_from_skb(local, &srx, skb) == 0) {
+		if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) {
 			msg.msg_namelen = srx.transport_len;
 
 			whdr.epoch	= htonl(sp->hdr.epoch);
@@ -609,7 +606,7 @@
 						      rxrpc_tx_point_reject);
 		}
 
-		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+		rxrpc_free_skb(skb, rxrpc_skb_freed);
 	}
 
 	_leave("");

diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index bd2fa3b..48f67a9 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Peer event handling, typically ICMP messages.
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -47,6 +43,8 @@
 	 */
 	switch (srx->transport.family) {
 	case AF_INET:
+		srx->transport_len = sizeof(srx->transport.sin);
+		srx->transport.family = AF_INET;
 		srx->transport.sin.sin_port = serr->port;
 		switch (serr->ee.ee_origin) {
 		case SO_EE_ORIGIN_ICMP:
@@ -70,20 +68,20 @@
 
 #ifdef CONFIG_AF_RXRPC_IPV6
 	case AF_INET6:
-		srx->transport.sin6.sin6_port = serr->port;
 		switch (serr->ee.ee_origin) {
 		case SO_EE_ORIGIN_ICMP6:
 			_net("Rx ICMP6");
+			srx->transport.sin6.sin6_port = serr->port;
 			memcpy(&srx->transport.sin6.sin6_addr,
 			       skb_network_header(skb) + serr->addr_offset,
 			       sizeof(struct in6_addr));
 			break;
 		case SO_EE_ORIGIN_ICMP:
 			_net("Rx ICMP on v6 sock");
-			srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
-			srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
-			srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
-			memcpy(srx->transport.sin6.sin6_addr.s6_addr + 12,
+			srx->transport_len = sizeof(srx->transport.sin);
+			srx->transport.family = AF_INET;
+			srx->transport.sin.sin_port = serr->port;
+			memcpy(&srx->transport.sin.sin_addr,
 			       skb_network_header(skb) + serr->addr_offset,
 			       sizeof(struct in_addr));
 			break;
@@ -149,32 +147,44 @@
 {
 	struct sock_exterr_skb *serr;
 	struct sockaddr_rxrpc srx;
-	struct rxrpc_local *local = sk->sk_user_data;
+	struct rxrpc_local *local;
 	struct rxrpc_peer *peer;
 	struct sk_buff *skb;
 
+	rcu_read_lock();
+	local = rcu_dereference_sk_user_data(sk);
+	if (unlikely(!local)) {
+		rcu_read_unlock();
+		return;
+	}
 	_enter("%p{%d}", sk, local->debug_id);
 
+	/* Clear the outstanding error value on the socket so that it doesn't
+	 * cause kernel_sendmsg() to return it later.
+	 */
+	sock_error(sk);
+
 	skb = sock_dequeue_err_skb(sk);
 	if (!skb) {
+		rcu_read_unlock();
 		_leave("UDP socket errqueue empty");
 		return;
 	}
-	rxrpc_new_skb(skb, rxrpc_skb_rx_received);
+	rxrpc_new_skb(skb, rxrpc_skb_received);
 	serr = SKB_EXT_ERR(skb);
 	if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
 		_leave("UDP empty message");
-		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+		rcu_read_unlock();
+		rxrpc_free_skb(skb, rxrpc_skb_freed);
 		return;
 	}
 
-	rcu_read_lock();
 	peer = rxrpc_lookup_peer_icmp_rcu(local, skb, &srx);
 	if (peer && !rxrpc_get_peer_maybe(peer))
 		peer = NULL;
 	if (!peer) {
 		rcu_read_unlock();
-		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+		rxrpc_free_skb(skb, rxrpc_skb_freed);
 		_leave(" [no peer]");
 		return;
 	}
@@ -186,7 +196,7 @@
 	     serr->ee.ee_code == ICMP_FRAG_NEEDED)) {
 		rxrpc_adjust_mtu(peer, serr);
 		rcu_read_unlock();
-		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+		rxrpc_free_skb(skb, rxrpc_skb_freed);
 		rxrpc_put_peer(peer);
 		_leave(" [MTU update]");
 		return;
@@ -194,7 +204,7 @@
 
 	rxrpc_store_error(peer, serr);
 	rcu_read_unlock();
-	rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+	rxrpc_free_skb(skb, rxrpc_skb_freed);
 	rxrpc_put_peer(peer);
 
 	_leave("");
@@ -375,7 +385,7 @@
 		spin_lock_bh(&rxnet->peer_hash_lock);
 		list_add_tail(&peer->keepalive_link,
 			      &rxnet->peer_keepalive[slot & mask]);
-		rxrpc_put_peer(peer);
+		rxrpc_put_peer_locked(peer);
 	}
 
 	spin_unlock_bh(&rxnet->peer_hash_lock);

diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 5691b7d..64830d8 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* RxRPC remote transport endpoint record management
  *
  * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -220,7 +216,7 @@
 	peer = kzalloc(sizeof(struct rxrpc_peer), gfp);
 	if (peer) {
 		atomic_set(&peer->usage, 1);
-		peer->local = local;
+		peer->local = rxrpc_get_local(local);
 		INIT_HLIST_HEAD(&peer->error_targets);
 		peer->service_conns = RB_ROOT;
 		seqlock_init(&peer->service_conn_lock);
@@ -311,7 +307,6 @@
 	unsigned long hash_key;
 
 	hash_key = rxrpc_peer_hash_key(local, &peer->srx);
-	peer->local = local;
 	rxrpc_init_peer(rx, peer, hash_key);
 
 	spin_lock(&rxnet->peer_hash_lock);
@@ -386,7 +381,7 @@
 	int n;
 
 	n = atomic_inc_return(&peer->usage);
-	trace_rxrpc_peer(peer, rxrpc_peer_got, n, here);
+	trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n, here);
 	return peer;
 }
 
@@ -400,7 +395,7 @@
 	if (peer) {
 		int n = atomic_fetch_add_unless(&peer->usage, 1, 0);
 		if (n > 0)
-			trace_rxrpc_peer(peer, rxrpc_peer_got, n + 1, here);
+			trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n + 1, here);
 		else
 			peer = NULL;
 	}
@@ -421,6 +416,7 @@
 	list_del_init(&peer->keepalive_link);
 	spin_unlock_bh(&rxnet->peer_hash_lock);
 
+	rxrpc_put_local(peer->local);
 	kfree_rcu(peer, rcu);
 }
 
@@ -430,17 +426,39 @@
 void rxrpc_put_peer(struct rxrpc_peer *peer)
 {
 	const void *here = __builtin_return_address(0);
+	unsigned int debug_id;
 	int n;
 
 	if (peer) {
+		debug_id = peer->debug_id;
 		n = atomic_dec_return(&peer->usage);
-		trace_rxrpc_peer(peer, rxrpc_peer_put, n, here);
+		trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here);
 		if (n == 0)
 			__rxrpc_put_peer(peer);
 	}
 }
 
 /*
+ * Drop a ref on a peer record where the caller already holds the
+ * peer_hash_lock.
+ */
+void rxrpc_put_peer_locked(struct rxrpc_peer *peer)
+{
+	const void *here = __builtin_return_address(0);
+	unsigned int debug_id = peer->debug_id;
+	int n;
+
+	n = atomic_dec_return(&peer->usage);
+	trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here);
+	if (n == 0) {
+		hash_del_rcu(&peer->hash_link);
+		list_del_init(&peer->keepalive_link);
+		rxrpc_put_local(peer->local);
+		kfree_rcu(peer, rcu);
+	}
+}
+
+/*
  * Make sure all peer records have been discarded.
  */
 void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet)

diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 9805e3b..b9d053e 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* /proc/net/ support for AF_RXRPC
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -212,3 +208,129 @@
 	.stop   = rxrpc_connection_seq_stop,
 	.show   = rxrpc_connection_seq_show,
 };
+
+/*
+ * generate a list of extant virtual peers in /proc/net/rxrpc/peers
+ */
+static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
+{
+	struct rxrpc_peer *peer;
+	time64_t now;
+	char lbuff[50], rbuff[50];
+
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq,
+			 "Proto Local                                          "
+			 " Remote                                         "
+			 " Use CW  MTU   LastUse          RTT Rc\n"
+			 );
+		return 0;
+	}
+
+	peer = list_entry(v, struct rxrpc_peer, hash_link);
+
+	sprintf(lbuff, "%pISpc", &peer->local->srx.transport);
+
+	sprintf(rbuff, "%pISpc", &peer->srx.transport);
+
+	now = ktime_get_seconds();
+	seq_printf(seq,
+		   "UDP   %-47.47s %-47.47s %3u"
+		   " %3u %5u %6llus %12llu %2u\n",
+		   lbuff,
+		   rbuff,
+		   atomic_read(&peer->usage),
+		   peer->cong_cwnd,
+		   peer->mtu,
+		   now - peer->last_tx_at,
+		   peer->rtt,
+		   peer->rtt_cursor);
+
+	return 0;
+}
+
+static void *rxrpc_peer_seq_start(struct seq_file *seq, loff_t *_pos)
+	__acquires(rcu)
+{
+	struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+	unsigned int bucket, n;
+	unsigned int shift = 32 - HASH_BITS(rxnet->peer_hash);
+	void *p;
+
+	rcu_read_lock();
+
+	if (*_pos >= UINT_MAX)
+		return NULL;
+
+	n = *_pos & ((1U << shift) - 1);
+	bucket = *_pos >> shift;
+	for (;;) {
+		if (bucket >= HASH_SIZE(rxnet->peer_hash)) {
+			*_pos = UINT_MAX;
+			return NULL;
+		}
+		if (n == 0) {
+			if (bucket == 0)
+				return SEQ_START_TOKEN;
+			*_pos += 1;
+			n++;
+		}
+
+		p = seq_hlist_start_rcu(&rxnet->peer_hash[bucket], n - 1);
+		if (p)
+			return p;
+		bucket++;
+		n = 1;
+		*_pos = (bucket << shift) | n;
+	}
+}
+
+static void *rxrpc_peer_seq_next(struct seq_file *seq, void *v, loff_t *_pos)
+{
+	struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+	unsigned int bucket, n;
+	unsigned int shift = 32 - HASH_BITS(rxnet->peer_hash);
+	void *p;
+
+	if (*_pos >= UINT_MAX)
+		return NULL;
+
+	bucket = *_pos >> shift;
+
+	p = seq_hlist_next_rcu(v, &rxnet->peer_hash[bucket], _pos);
+	if (p)
+		return p;
+
+	for (;;) {
+		bucket++;
+		n = 1;
+		*_pos = (bucket << shift) | n;
+
+		if (bucket >= HASH_SIZE(rxnet->peer_hash)) {
+			*_pos = UINT_MAX;
+			return NULL;
+		}
+		if (n == 0) {
+			*_pos += 1;
+			n++;
+		}
+
+		p = seq_hlist_start_rcu(&rxnet->peer_hash[bucket], n - 1);
+		if (p)
+			return p;
+	}
+}
+
+static void rxrpc_peer_seq_stop(struct seq_file *seq, void *v)
+	__releases(rcu)
+{
+	rcu_read_unlock();
+}
+
+
+const struct seq_operations rxrpc_peer_seq_ops = {
+	.start  = rxrpc_peer_seq_start,
+	.next   = rxrpc_peer_seq_next,
+	.stop   = rxrpc_peer_seq_stop,
+	.show   = rxrpc_peer_seq_show,
+};

diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h
index f9cb83c..49bb972 100644
--- a/net/rxrpc/protocol.h
+++ b/net/rxrpc/protocol.h

@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /* packet.h: Rx packet layout and definitions
  *
  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #ifndef _LINUX_RXRPC_PACKET_H
@@ -93,6 +89,15 @@
 #define RXRPC_JUMBO_DATALEN	1412	/* non-terminal jumbo packet data length */
 #define RXRPC_JUMBO_SUBPKTLEN	(RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header))
 
+/*
+ * The maximum number of subpackets that can possibly fit in a UDP packet is:
+ *
+ *	((max_IP - IP_hdr - UDP_hdr) / RXRPC_JUMBO_SUBPKTLEN) + 1
+ *	= ((65535 - 28 - 28) / 1416) + 1
+ *	= 46 non-terminal packets and 1 terminal packet.
+ */
+#define RXRPC_MAX_NR_JUMBO	47
+
 /*****************************************************************************/
 /*
  * on-the-wire Rx ACK packet data payload

diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 816b19a..8578c39 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* RxRPC recvmsg() implementation
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -145,7 +141,7 @@
 	ASSERTCMP(call->rx_hard_ack, ==, call->rx_top);
 
 	if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) {
-		rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, false, true,
+		rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, serial, false, true,
 				  rxrpc_propose_ack_terminal_ack);
 		//rxrpc_send_ack_packet(call, false, NULL);
 	}
@@ -163,7 +159,7 @@
 		call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
 		call->expect_req_by = jiffies + MAX_JIFFY_OFFSET;
 		write_unlock_bh(&call->state_lock);
-		rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, false, true,
+		rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, false, true,
 				  rxrpc_propose_ack_processing_op);
 		break;
 	default:
@@ -181,7 +177,8 @@
 	struct sk_buff *skb;
 	rxrpc_serial_t serial;
 	rxrpc_seq_t hard_ack, top;
-	u8 flags;
+	bool last = false;
+	u8 subpacket;
 	int ix;
 
 	_enter("%d", call->debug_id);
@@ -193,30 +190,32 @@
 	hard_ack++;
 	ix = hard_ack & RXRPC_RXTX_BUFF_MASK;
 	skb = call->rxtx_buffer[ix];
-	rxrpc_see_skb(skb, rxrpc_skb_rx_rotated);
+	rxrpc_see_skb(skb, rxrpc_skb_rotated);
 	sp = rxrpc_skb(skb);
-	flags = sp->hdr.flags;
-	serial = sp->hdr.serial;
-	if (call->rxtx_annotations[ix] & RXRPC_RX_ANNO_JUMBO)
-		serial += (call->rxtx_annotations[ix] & RXRPC_RX_ANNO_JUMBO) - 1;
+
+	subpacket = call->rxtx_annotations[ix] & RXRPC_RX_ANNO_SUBPACKET;
+	serial = sp->hdr.serial + subpacket;
+
+	if (subpacket == sp->nr_subpackets - 1 &&
+	    sp->rx_flags & RXRPC_SKB_INCL_LAST)
+		last = true;
 
 	call->rxtx_buffer[ix] = NULL;
 	call->rxtx_annotations[ix] = 0;
 	/* Barrier against rxrpc_input_data(). */
 	smp_store_release(&call->rx_hard_ack, hard_ack);
 
-	rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+	rxrpc_free_skb(skb, rxrpc_skb_freed);
 
-	_debug("%u,%u,%02x", hard_ack, top, flags);
 	trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack);
-	if (flags & RXRPC_LAST_PACKET) {
+	if (last) {
 		rxrpc_end_rx_phase(call, serial);
 	} else {
 		/* Check to see if there's an ACK that needs sending. */
 		if (after_eq(hard_ack, call->ackr_consumed + 2) ||
 		    after_eq(top, call->ackr_seen + 2) ||
 		    (hard_ack == top && after(hard_ack, call->ackr_consumed)))
-			rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial,
+			rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial,
 					  true, true,
 					  rxrpc_propose_ack_rotate_rx);
 		if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY)
@@ -237,22 +236,23 @@
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	rxrpc_seq_t seq = sp->hdr.seq;
 	u16 cksum = sp->hdr.cksum;
+	u8 subpacket = annotation & RXRPC_RX_ANNO_SUBPACKET;
 
 	_enter("");
 
 	/* For all but the head jumbo subpacket, the security checksum is in a
 	 * jumbo header immediately prior to the data.
 	 */
-	if ((annotation & RXRPC_RX_ANNO_JUMBO) > 1) {
+	if (subpacket > 0) {
 		__be16 tmp;
 		if (skb_copy_bits(skb, offset - 2, &tmp, 2) < 0)
 			BUG();
 		cksum = ntohs(tmp);
-		seq += (annotation & RXRPC_RX_ANNO_JUMBO) - 1;
+		seq += subpacket;
 	}
 
-	return call->conn->security->verify_packet(call, skb, offset, len,
-						   seq, cksum);
+	return call->security->verify_packet(call, skb, offset, len,
+					     seq, cksum);
 }
 
 /*
@@ -267,21 +267,24 @@
  */
 static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
 			     u8 *_annotation,
-			     unsigned int *_offset, unsigned int *_len)
+			     unsigned int *_offset, unsigned int *_len,
+			     bool *_last)
 {
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	unsigned int offset = sizeof(struct rxrpc_wire_header);
 	unsigned int len;
+	bool last = false;
 	int ret;
 	u8 annotation = *_annotation;
+	u8 subpacket = annotation & RXRPC_RX_ANNO_SUBPACKET;
 
 	/* Locate the subpacket */
+	offset += subpacket * RXRPC_JUMBO_SUBPKTLEN;
 	len = skb->len - offset;
-	if ((annotation & RXRPC_RX_ANNO_JUMBO) > 0) {
-		offset += (((annotation & RXRPC_RX_ANNO_JUMBO) - 1) *
-			   RXRPC_JUMBO_SUBPKTLEN);
-		len = (annotation & RXRPC_RX_ANNO_JLAST) ?
-			skb->len - offset : RXRPC_JUMBO_SUBPKTLEN;
-	}
+	if (subpacket < sp->nr_subpackets - 1)
+		len = RXRPC_JUMBO_DATALEN;
+	else if (sp->rx_flags & RXRPC_SKB_INCL_LAST)
+		last = true;
 
 	if (!(annotation & RXRPC_RX_ANNO_VERIFIED)) {
 		ret = rxrpc_verify_packet(call, skb, annotation, offset, len);
@@ -292,7 +295,8 @@
 
 	*_offset = offset;
 	*_len = len;
-	call->conn->security->locate_data(call, skb, _offset, _len);
+	*_last = last;
+	call->security->locate_data(call, skb, _offset, _len);
 	return 0;
 }
 
@@ -307,9 +311,10 @@
 {
 	struct rxrpc_skb_priv *sp;
 	struct sk_buff *skb;
+	rxrpc_serial_t serial;
 	rxrpc_seq_t hard_ack, top, seq;
 	size_t remain;
-	bool last;
+	bool rx_pkt_last;
 	unsigned int rx_pkt_offset, rx_pkt_len;
 	int ix, copy, ret = -EAGAIN, ret2;
 
@@ -319,6 +324,7 @@
 
 	rx_pkt_offset = call->rx_pkt_offset;
 	rx_pkt_len = call->rx_pkt_len;
+	rx_pkt_last = call->rx_pkt_last;
 
 	if (call->state >= RXRPC_CALL_SERVER_ACK_REQUEST) {
 		seq = call->rx_hard_ack;
@@ -329,6 +335,7 @@
 	/* Barriers against rxrpc_input_data(). */
 	hard_ack = call->rx_hard_ack;
 	seq = hard_ack + 1;
+
 	while (top = smp_load_acquire(&call->rx_top),
 	       before_eq(seq, top)
 	       ) {
@@ -340,12 +347,15 @@
 			break;
 		}
 		smp_rmb();
-		rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
+		rxrpc_see_skb(skb, rxrpc_skb_seen);
 		sp = rxrpc_skb(skb);
 
-		if (!(flags & MSG_PEEK))
+		if (!(flags & MSG_PEEK)) {
+			serial = sp->hdr.serial;
+			serial += call->rxtx_annotations[ix] & RXRPC_RX_ANNO_SUBPACKET;
 			trace_rxrpc_receive(call, rxrpc_receive_front,
-					    sp->hdr.serial, seq);
+					    serial, seq);
+		}
 
 		if (msg)
 			sock_recv_timestamp(msg, sock->sk, skb);
@@ -353,7 +363,8 @@
 		if (rx_pkt_offset == 0) {
 			ret2 = rxrpc_locate_data(call, skb,
 						 &call->rxtx_annotations[ix],
-						 &rx_pkt_offset, &rx_pkt_len);
+						 &rx_pkt_offset, &rx_pkt_len,
+						 &rx_pkt_last);
 			trace_rxrpc_recvmsg(call, rxrpc_recvmsg_next, seq,
 					    rx_pkt_offset, rx_pkt_len, ret2);
 			if (ret2 < 0) {
@@ -393,13 +404,12 @@
 		}
 
 		/* The whole packet has been transferred. */
-		last = sp->hdr.flags & RXRPC_LAST_PACKET;
 		if (!(flags & MSG_PEEK))
 			rxrpc_rotate_rx_window(call);
 		rx_pkt_offset = 0;
 		rx_pkt_len = 0;
 
-		if (last) {
+		if (rx_pkt_last) {
 			ASSERTCMP(seq, ==, READ_ONCE(call->rx_top));
 			ret = 1;
 			goto out;
@@ -412,6 +422,7 @@
 	if (!(flags & MSG_PEEK)) {
 		call->rx_pkt_offset = rx_pkt_offset;
 		call->rx_pkt_len = rx_pkt_len;
+		call->rx_pkt_last = rx_pkt_last;
 	}
 done:
 	trace_rxrpc_recvmsg(call, rxrpc_recvmsg_data_return, seq,
@@ -596,6 +607,7 @@
 	}
 error_no_call:
 	release_sock(&rx->sk);
+error_trace:
 	trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret);
 	return ret;
 
@@ -604,7 +616,7 @@
 wait_error:
 	finish_wait(sk_sleep(&rx->sk), &wait);
 	call = NULL;
-	goto error_no_call;
+	goto error_trace;
 }
 
 /**
@@ -715,3 +727,46 @@
 	goto out;
 }
 EXPORT_SYMBOL(rxrpc_kernel_recv_data);
+
+/**
+ * rxrpc_kernel_get_reply_time - Get timestamp on first reply packet
+ * @sock: The socket that the call exists on
+ * @call: The call to query
+ * @_ts: Where to put the timestamp
+ *
+ * Retrieve the timestamp from the first DATA packet of the reply if it is
+ * in the ring.  Returns true if successful, false if not.
+ */
+bool rxrpc_kernel_get_reply_time(struct socket *sock, struct rxrpc_call *call,
+				 ktime_t *_ts)
+{
+	struct sk_buff *skb;
+	rxrpc_seq_t hard_ack, top, seq;
+	bool success = false;
+
+	mutex_lock(&call->user_mutex);
+
+	if (READ_ONCE(call->state) != RXRPC_CALL_CLIENT_RECV_REPLY)
+		goto out;
+
+	hard_ack = call->rx_hard_ack;
+	if (hard_ack != 0)
+		goto out;
+
+	seq = hard_ack + 1;
+	top = smp_load_acquire(&call->rx_top);
+	if (after(seq, top))
+		goto out;
+
+	skb = call->rxtx_buffer[seq & RXRPC_RXTX_BUFF_MASK];
+	if (!skb)
+		goto out;
+
+	*_ts = skb_get_ktime(skb);
+	success = true;
+
+out:
+	mutex_unlock(&call->user_mutex);
+	return success;
+}
+EXPORT_SYMBOL(rxrpc_kernel_get_reply_time);

diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index cea1683..8d8aa3c 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Kerberos-based RxRPC security
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -46,7 +42,8 @@
  * alloc routine, but since we have it to hand, we use it to decrypt RESPONSE
  * packets
  */
-static struct crypto_skcipher *rxkad_ci;
+static struct crypto_sync_skcipher *rxkad_ci;
+static struct skcipher_request *rxkad_ci_req;
 static DEFINE_MUTEX(rxkad_ci_mutex);
 
 /*
@@ -54,7 +51,7 @@
  */
 static int rxkad_init_connection_security(struct rxrpc_connection *conn)
 {
-	struct crypto_skcipher *ci;
+	struct crypto_sync_skcipher *ci;
 	struct rxrpc_key_token *token;
 	int ret;
 
@@ -63,14 +60,14 @@
 	token = conn->params.key->payload.data[0];
 	conn->security_ix = token->security_index;
 
-	ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
+	ci = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0);
 	if (IS_ERR(ci)) {
 		_debug("no cipher");
 		ret = PTR_ERR(ci);
 		goto error;
 	}
 
-	if (crypto_skcipher_setkey(ci, token->kad->session_key,
+	if (crypto_sync_skcipher_setkey(ci, token->kad->session_key,
 				   sizeof(token->kad->session_key)) < 0)
 		BUG();
 
@@ -103,8 +100,8 @@
  */
 static int rxkad_prime_packet_security(struct rxrpc_connection *conn)
 {
+	struct skcipher_request *req;
 	struct rxrpc_key_token *token;
-	SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
 	struct scatterlist sg;
 	struct rxrpc_crypt iv;
 	__be32 *tmpbuf;
@@ -119,6 +116,12 @@
 	if (!tmpbuf)
 		return -ENOMEM;
 
+	req = skcipher_request_alloc(&conn->cipher->base, GFP_NOFS);
+	if (!req) {
+		kfree(tmpbuf);
+		return -ENOMEM;
+	}
+
 	token = conn->params.key->payload.data[0];
 	memcpy(&iv, token->kad->session_key, sizeof(iv));
 
@@ -128,11 +131,11 @@
 	tmpbuf[3] = htonl(conn->security_ix);
 
 	sg_init_one(&sg, tmpbuf, tmpsize);
-	skcipher_request_set_tfm(req, conn->cipher);
+	skcipher_request_set_sync_tfm(req, conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, &sg, &sg, tmpsize, iv.x);
 	crypto_skcipher_encrypt(req);
-	skcipher_request_zero(req);
+	skcipher_request_free(req);
 
 	memcpy(&conn->csum_iv, tmpbuf + 2, sizeof(conn->csum_iv));
 	kfree(tmpbuf);
@@ -141,6 +144,35 @@
 }
 
 /*
+ * Allocate and prepare the crypto request on a call.  For any particular call,
+ * this is called serially for the packets, so no lock should be necessary.
+ */
+static struct skcipher_request *rxkad_get_call_crypto(struct rxrpc_call *call)
+{
+	struct crypto_skcipher *tfm = &call->conn->cipher->base;
+	struct skcipher_request	*cipher_req = call->cipher_req;
+
+	if (!cipher_req) {
+		cipher_req = skcipher_request_alloc(tfm, GFP_NOFS);
+		if (!cipher_req)
+			return NULL;
+		call->cipher_req = cipher_req;
+	}
+
+	return cipher_req;
+}
+
+/*
+ * Clean up the crypto on a call.
+ */
+static void rxkad_free_call_crypto(struct rxrpc_call *call)
+{
+	if (call->cipher_req)
+		skcipher_request_free(call->cipher_req);
+	call->cipher_req = NULL;
+}
+
+/*
  * partially encrypt a packet (level 1 security)
  */
 static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
@@ -167,7 +199,7 @@
 	memset(&iv, 0, sizeof(iv));
 
 	sg_init_one(&sg, sechdr, 8);
-	skcipher_request_set_tfm(req, call->conn->cipher);
+	skcipher_request_set_sync_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
 	crypto_skcipher_encrypt(req);
@@ -191,10 +223,8 @@
 	struct rxrpc_skb_priv *sp;
 	struct rxrpc_crypt iv;
 	struct scatterlist sg[16];
-	struct sk_buff *trailer;
 	unsigned int len;
 	u16 check;
-	int nsg;
 	int err;
 
 	sp = rxrpc_skb(skb);
@@ -212,21 +242,20 @@
 	memcpy(&iv, token->kad->session_key, sizeof(iv));
 
 	sg_init_one(&sg[0], sechdr, sizeof(rxkhdr));
-	skcipher_request_set_tfm(req, call->conn->cipher);
+	skcipher_request_set_sync_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, &sg[0], &sg[0], sizeof(rxkhdr), iv.x);
 	crypto_skcipher_encrypt(req);
 
 	/* we want to encrypt the skbuff in-place */
-	nsg = skb_cow_data(skb, 0, &trailer);
-	err = -ENOMEM;
-	if (nsg < 0 || nsg > 16)
+	err = -EMSGSIZE;
+	if (skb_shinfo(skb)->nr_frags > 16)
 		goto out;
 
 	len = data_size + call->conn->size_align - 1;
 	len &= ~(call->conn->size_align - 1);
 
-	sg_init_table(sg, nsg);
+	sg_init_table(sg, ARRAY_SIZE(sg));
 	err = skb_to_sgvec(skb, sg, 0, len);
 	if (unlikely(err < 0))
 		goto out;
@@ -250,7 +279,7 @@
 			       void *sechdr)
 {
 	struct rxrpc_skb_priv *sp;
-	SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
+	struct skcipher_request	*req;
 	struct rxrpc_crypt iv;
 	struct scatterlist sg;
 	u32 x, y;
@@ -269,6 +298,10 @@
 	if (ret < 0)
 		return ret;
 
+	req = rxkad_get_call_crypto(call);
+	if (!req)
+		return -ENOMEM;
+
 	/* continue encrypting from where we left off */
 	memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
 
@@ -279,7 +312,7 @@
 	call->crypto_buf[1] = htonl(x);
 
 	sg_init_one(&sg, call->crypto_buf, 8);
-	skcipher_request_set_tfm(req, call->conn->cipher);
+	skcipher_request_set_sync_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
 	crypto_skcipher_encrypt(req);
@@ -323,11 +356,10 @@
 	struct rxkad_level1_hdr sechdr;
 	struct rxrpc_crypt iv;
 	struct scatterlist sg[16];
-	struct sk_buff *trailer;
 	bool aborted;
 	u32 data_size, buf;
 	u16 check;
-	int nsg, ret;
+	int ret;
 
 	_enter("");
 
@@ -340,11 +372,7 @@
 	/* Decrypt the skbuff in-place.  TODO: We really want to decrypt
 	 * directly into the target buffer.
 	 */
-	nsg = skb_cow_data(skb, 0, &trailer);
-	if (nsg < 0 || nsg > 16)
-		goto nomem;
-
-	sg_init_table(sg, nsg);
+	sg_init_table(sg, ARRAY_SIZE(sg));
 	ret = skb_to_sgvec(skb, sg, offset, 8);
 	if (unlikely(ret < 0))
 		return ret;
@@ -352,7 +380,7 @@
 	/* start the decryption afresh */
 	memset(&iv, 0, sizeof(iv));
 
-	skcipher_request_set_tfm(req, call->conn->cipher);
+	skcipher_request_set_sync_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, 8, iv.x);
 	crypto_skcipher_decrypt(req);
@@ -392,10 +420,6 @@
 	if (aborted)
 		rxrpc_send_abort_packet(call);
 	return -EPROTO;
-
-nomem:
-	_leave(" = -ENOMEM");
-	return -ENOMEM;
 }
 
 /*
@@ -410,7 +434,6 @@
 	struct rxkad_level2_hdr sechdr;
 	struct rxrpc_crypt iv;
 	struct scatterlist _sg[4], *sg;
-	struct sk_buff *trailer;
 	bool aborted;
 	u32 data_size, buf;
 	u16 check;
@@ -427,12 +450,11 @@
 	/* Decrypt the skbuff in-place.  TODO: We really want to decrypt
 	 * directly into the target buffer.
 	 */
-	nsg = skb_cow_data(skb, 0, &trailer);
-	if (nsg < 0)
-		goto nomem;
-
 	sg = _sg;
-	if (unlikely(nsg > 4)) {
+	nsg = skb_shinfo(skb)->nr_frags;
+	if (nsg <= 4) {
+		nsg = 4;
+	} else {
 		sg = kmalloc_array(nsg, sizeof(*sg), GFP_NOIO);
 		if (!sg)
 			goto nomem;
@@ -450,7 +472,7 @@
 	token = call->conn->params.key->payload.data[0];
 	memcpy(&iv, token->kad->session_key, sizeof(iv));
 
-	skcipher_request_set_tfm(req, call->conn->cipher);
+	skcipher_request_set_sync_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, len, iv.x);
 	crypto_skcipher_decrypt(req);
@@ -506,7 +528,7 @@
 			       unsigned int offset, unsigned int len,
 			       rxrpc_seq_t seq, u16 expected_cksum)
 {
-	SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
+	struct skcipher_request	*req;
 	struct rxrpc_crypt iv;
 	struct scatterlist sg;
 	bool aborted;
@@ -519,6 +541,10 @@
 	if (!call->conn->cipher)
 		return 0;
 
+	req = rxkad_get_call_crypto(call);
+	if (!req)
+		return -ENOMEM;
+
 	/* continue encrypting from where we left off */
 	memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
 
@@ -529,7 +555,7 @@
 	call->crypto_buf[1] = htonl(x);
 
 	sg_init_one(&sg, call->crypto_buf, 8);
-	skcipher_request_set_tfm(req, call->conn->cipher);
+	skcipher_request_set_sync_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
 	crypto_skcipher_encrypt(req);
@@ -751,24 +777,29 @@
 /*
  * encrypt the response packet
  */
-static void rxkad_encrypt_response(struct rxrpc_connection *conn,
-				   struct rxkad_response *resp,
-				   const struct rxkad_key *s2)
+static int rxkad_encrypt_response(struct rxrpc_connection *conn,
+				  struct rxkad_response *resp,
+				  const struct rxkad_key *s2)
 {
-	SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
+	struct skcipher_request *req;
 	struct rxrpc_crypt iv;
 	struct scatterlist sg[1];
 
+	req = skcipher_request_alloc(&conn->cipher->base, GFP_NOFS);
+	if (!req)
+		return -ENOMEM;
+
 	/* continue encrypting from where we left off */
 	memcpy(&iv, s2->session_key, sizeof(iv));
 
 	sg_init_table(sg, 1);
 	sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted));
-	skcipher_request_set_tfm(req, conn->cipher);
+	skcipher_request_set_sync_tfm(req, conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
 	crypto_skcipher_encrypt(req);
-	skcipher_request_zero(req);
+	skcipher_request_free(req);
+	return 0;
 }
 
 /*
@@ -843,8 +874,9 @@
 
 	/* calculate the response checksum and then do the encryption */
 	rxkad_calc_response_checksum(resp);
-	rxkad_encrypt_response(conn, resp, token->kad);
-	ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad);
+	ret = rxkad_encrypt_response(conn, resp, token->kad);
+	if (ret == 0)
+		ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad);
 	kfree(resp);
 	return ret;
 
@@ -1021,25 +1053,23 @@
 				   struct rxkad_response *resp,
 				   const struct rxrpc_crypt *session_key)
 {
-	SKCIPHER_REQUEST_ON_STACK(req, rxkad_ci);
+	struct skcipher_request *req = rxkad_ci_req;
 	struct scatterlist sg[1];
 	struct rxrpc_crypt iv;
 
 	_enter(",,%08x%08x",
 	       ntohl(session_key->n[0]), ntohl(session_key->n[1]));
 
-	ASSERT(rxkad_ci != NULL);
-
 	mutex_lock(&rxkad_ci_mutex);
-	if (crypto_skcipher_setkey(rxkad_ci, session_key->x,
-				   sizeof(*session_key)) < 0)
+	if (crypto_sync_skcipher_setkey(rxkad_ci, session_key->x,
+					sizeof(*session_key)) < 0)
 		BUG();
 
 	memcpy(&iv, session_key, sizeof(iv));
 
 	sg_init_table(sg, 1);
 	sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted));
-	skcipher_request_set_tfm(req, rxkad_ci);
+	skcipher_request_set_sync_tfm(req, rxkad_ci);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
 	crypto_skcipher_decrypt(req);
@@ -1218,7 +1248,7 @@
 	_enter("");
 
 	if (conn->cipher)
-		crypto_free_skcipher(conn->cipher);
+		crypto_free_sync_skcipher(conn->cipher);
 }
 
 /*
@@ -1226,10 +1256,26 @@
  */
 static int rxkad_init(void)
 {
+	struct crypto_sync_skcipher *tfm;
+	struct skcipher_request *req;
+
 	/* pin the cipher we need so that the crypto layer doesn't invoke
 	 * keventd to go get it */
-	rxkad_ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
-	return PTR_ERR_OR_ZERO(rxkad_ci);
+	tfm = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0);
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	req = skcipher_request_alloc(&tfm->base, GFP_KERNEL);
+	if (!req)
+		goto nomem_tfm;
+
+	rxkad_ci_req = req;
+	rxkad_ci = tfm;
+	return 0;
+
+nomem_tfm:
+	crypto_free_sync_skcipher(tfm);
+	return -ENOMEM;
 }
 
 /*
@@ -1237,8 +1283,8 @@
  */
 static void rxkad_exit(void)
 {
-	if (rxkad_ci)
-		crypto_free_skcipher(rxkad_ci);
+	crypto_free_sync_skcipher(rxkad_ci);
+	skcipher_request_free(rxkad_ci_req);
 }
 
 /*
@@ -1253,6 +1299,7 @@
 	.prime_packet_security		= rxkad_prime_packet_security,
 	.secure_packet			= rxkad_secure_packet,
 	.verify_packet			= rxkad_verify_packet,
+	.free_call_crypto		= rxkad_free_call_crypto,
 	.locate_data			= rxkad_locate_data,
 	.issue_challenge		= rxkad_issue_challenge,
 	.respond_to_challenge		= rxkad_respond_to_challenge,

diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
index c4479af..a4c47d2 100644
--- a/net/rxrpc/security.c
+++ b/net/rxrpc/security.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* RxRPC security handling
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -148,7 +144,7 @@
 
 	/* look through the service's keyring */
 	kref = keyring_search(make_key_ref(rx->securities, 1UL),
-			      &key_type_rxrpc_s, kdesc);
+			      &key_type_rxrpc_s, kdesc, true);
 	if (IS_ERR(kref)) {
 		read_unlock(&local->services_lock);
 		_leave(" = %ld [search]", PTR_ERR(kref));

diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index be01f9c..813fd68 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* AF_RXRPC sendmsg() implementation.
  *
  * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -80,7 +76,8 @@
 		if (call->state >= RXRPC_CALL_COMPLETE)
 			return call->error;
 
-		if (timeout == 0 &&
+		if (test_bit(RXRPC_CALL_IS_INTR, &call->flags) &&
+		    timeout == 0 &&
 		    tx_win == tx_start && signal_pending(current))
 			return -EINTR;
 
@@ -152,12 +149,13 @@
 }
 
 /*
- * Queue a DATA packet for transmission, set the resend timeout and send the
- * packet immediately
+ * Queue a DATA packet for transmission, set the resend timeout and send
+ * the packet immediately.  Returns the error from rxrpc_send_data_packet()
+ * in case the caller wants to do something with it.
  */
-static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
-			       struct sk_buff *skb, bool last,
-			       rxrpc_notify_end_tx_t notify_end_tx)
+static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
+			      struct sk_buff *skb, bool last,
+			      rxrpc_notify_end_tx_t notify_end_tx)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	unsigned long now;
@@ -169,10 +167,8 @@
 
 	ASSERTCMP(seq, ==, call->tx_top + 1);
 
-	if (last) {
+	if (last)
 		annotation |= RXRPC_TX_ANNO_LAST;
-		set_bit(RXRPC_CALL_TX_LASTQ, &call->flags);
-	}
 
 	/* We have to set the timestamp before queueing as the retransmit
 	 * algorithm can see the packet as soon as we queue it.
@@ -180,7 +176,7 @@
 	skb->tstamp = ktime_get_real();
 
 	ix = seq & RXRPC_RXTX_BUFF_MASK;
-	rxrpc_get_skb(skb, rxrpc_skb_tx_got);
+	rxrpc_get_skb(skb, rxrpc_skb_got);
 	call->rxtx_annotations[ix] = annotation;
 	smp_wmb();
 	call->rxtx_buffer[ix] = skb;
@@ -230,6 +226,7 @@
 			rxrpc_set_call_completion(call,
 						  RXRPC_CALL_LOCAL_ERROR,
 						  0, ret);
+			rxrpc_notify_socket(call);
 			goto out;
 		}
 		_debug("need instant resend %d", ret);
@@ -251,8 +248,9 @@
 	}
 
 out:
-	rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
-	_leave("");
+	rxrpc_free_skb(skb, rxrpc_skb_freed);
+	_leave(" = %d", ret);
+	return ret;
 }
 
 /*
@@ -291,7 +289,7 @@
 
 	skb = call->tx_pending;
 	call->tx_pending = NULL;
-	rxrpc_see_skb(skb, rxrpc_skb_tx_seen);
+	rxrpc_see_skb(skb, rxrpc_skb_seen);
 
 	copied = 0;
 	do {
@@ -338,7 +336,9 @@
 			if (!skb)
 				goto maybe_error;
 
-			rxrpc_new_skb(skb, rxrpc_skb_tx_new);
+			sp = rxrpc_skb(skb);
+			sp->rx_flags |= RXRPC_SKB_TX_BUFFER;
+			rxrpc_new_skb(skb, rxrpc_skb_new);
 
 			_debug("ALLOC SEND %p", skb);
 
@@ -348,7 +348,6 @@
 			skb_reserve(skb, call->conn->security_size);
 			skb->len += call->conn->security_size;
 
-			sp = rxrpc_skb(skb);
 			sp->remain = chunk;
 			if (sp->remain > skb_tailroom(skb))
 				sp->remain = skb_tailroom(skb);
@@ -386,6 +385,11 @@
 				call->tx_total_len -= copy;
 		}
 
+		/* check for the far side aborting the call or a network error
+		 * occurring */
+		if (call->state == RXRPC_CALL_COMPLETE)
+			goto call_terminated;
+
 		/* add the packet to the send queue if it's now full */
 		if (sp->remain <= 0 ||
 		    (msg_data_left(msg) == 0 && !more)) {
@@ -415,26 +419,17 @@
 				 call->tx_winsize)
 				sp->hdr.flags |= RXRPC_MORE_PACKETS;
 
-			ret = conn->security->secure_packet(
+			ret = call->security->secure_packet(
 				call, skb, skb->mark, skb->head);
 			if (ret < 0)
 				goto out;
 
-			rxrpc_queue_packet(rx, call, skb,
-					   !msg_data_left(msg) && !more,
-					   notify_end_tx);
+			ret = rxrpc_queue_packet(rx, call, skb,
+						 !msg_data_left(msg) && !more,
+						 notify_end_tx);
+			/* Should check for failure here */
 			skb = NULL;
 		}
-
-		/* Check for the far side aborting the call or a network error
-		 * occurring.  If this happens, save any packet that was under
-		 * construction so that in the case of a network error, the
-		 * call can be retried or redirected.
-		 */
-		if (call->state == RXRPC_CALL_COMPLETE) {
-			ret = call->error;
-			goto out;
-		}
 	} while (msg_data_left(msg) > 0);
 
 success:
@@ -444,6 +439,11 @@
 	_leave(" = %d", ret);
 	return ret;
 
+call_terminated:
+	rxrpc_free_skb(skb, rxrpc_skb_freed);
+	_leave(" = %d", call->error);
+	return call->error;
+
 maybe_error:
 	if (copied)
 		goto success;
@@ -619,6 +619,7 @@
 		.call.tx_total_len	= -1,
 		.call.user_call_ID	= 0,
 		.call.nr_timeouts	= 0,
+		.call.intr		= true,
 		.abort_code		= 0,
 		.command		= RXRPC_CMD_SEND_DATA,
 		.exclusive		= false,
@@ -660,6 +661,7 @@
 		case RXRPC_CALL_SERVER_PREALLOC:
 		case RXRPC_CALL_SERVER_SECURING:
 		case RXRPC_CALL_SERVER_ACCEPTING:
+			rxrpc_put_call(call, rxrpc_call_put);
 			ret = -EBUSY;
 			goto error_release_sock;
 		default:

diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index b8985d0..0348d2b 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* ar-skbuff.c: socket buffer destruction handling
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -18,7 +14,8 @@
 #include <net/af_rxrpc.h>
 #include "ar-internal.h"
 
-#define select_skb_count(op) (op >= rxrpc_skb_tx_cleaned ? &rxrpc_n_tx_skbs : &rxrpc_n_rx_skbs)
+#define is_tx_skb(skb) (rxrpc_skb(skb)->rx_flags & RXRPC_SKB_TX_BUFFER)
+#define select_skb_count(skb) (is_tx_skb(skb) ? &rxrpc_n_tx_skbs : &rxrpc_n_rx_skbs)
 
 /*
  * Note the allocation or reception of a socket buffer.
@@ -26,8 +23,9 @@
 void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 {
 	const void *here = __builtin_return_address(0);
-	int n = atomic_inc_return(select_skb_count(op));
-	trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
+	int n = atomic_inc_return(select_skb_count(skb));
+	trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n,
+			rxrpc_skb(skb)->rx_flags, here);
 }
 
 /*
@@ -37,8 +35,9 @@
 {
 	const void *here = __builtin_return_address(0);
 	if (skb) {
-		int n = atomic_read(select_skb_count(op));
-		trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
+		int n = atomic_read(select_skb_count(skb));
+		trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n,
+				rxrpc_skb(skb)->rx_flags, here);
 	}
 }
 
@@ -48,12 +47,23 @@
 void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 {
 	const void *here = __builtin_return_address(0);
-	int n = atomic_inc_return(select_skb_count(op));
-	trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
+	int n = atomic_inc_return(select_skb_count(skb));
+	trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n,
+			rxrpc_skb(skb)->rx_flags, here);
 	skb_get(skb);
 }
 
 /*
+ * Note the dropping of a ref on a socket buffer by the core.
+ */
+void rxrpc_eaten_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
+{
+	const void *here = __builtin_return_address(0);
+	int n = atomic_inc_return(&rxrpc_n_rx_skbs);
+	trace_rxrpc_skb(skb, op, 0, n, 0, here);
+}
+
+/*
  * Note the destruction of a socket buffer.
  */
 void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
@@ -62,23 +72,9 @@
 	if (skb) {
 		int n;
 		CHECK_SLAB_OKAY(&skb->users);
-		n = atomic_dec_return(select_skb_count(op));
-		trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
-		kfree_skb(skb);
-	}
-}
-
-/*
- * Note the injected loss of a socket buffer.
- */
-void rxrpc_lose_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
-{
-	const void *here = __builtin_return_address(0);
-	if (skb) {
-		int n;
-		CHECK_SLAB_OKAY(&skb->users);
-		n = atomic_dec_return(select_skb_count(op));
-		trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
+		n = atomic_dec_return(select_skb_count(skb));
+		trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n,
+				rxrpc_skb(skb)->rx_flags, here);
 		kfree_skb(skb);
 	}
 }
@@ -91,9 +87,10 @@
 	const void *here = __builtin_return_address(0);
 	struct sk_buff *skb;
 	while ((skb = skb_dequeue((list))) != NULL) {
-		int n = atomic_dec_return(select_skb_count(rxrpc_skb_rx_purged));
-		trace_rxrpc_skb(skb, rxrpc_skb_rx_purged,
-				refcount_read(&skb->users), n, here);
+		int n = atomic_dec_return(select_skb_count(skb));
+		trace_rxrpc_skb(skb, rxrpc_skb_purged,
+				refcount_read(&skb->users), n,
+				rxrpc_skb(skb)->rx_flags, here);
 		kfree_skb(skb);
 	}
 }

diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index d75bd15..2bbb381 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* sysctls for configuring RxRPC operating parameters
  *
  * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
  */
 
 #include <linux/sysctl.h>
@@ -15,7 +11,6 @@
 #include "ar-internal.h"
 
 static struct ctl_table_header *rxrpc_sysctl_reg_table;
-static const unsigned int one = 1;
 static const unsigned int four = 4;
 static const unsigned int thirtytwo = 32;
 static const unsigned int n_65535 = 65535;
@@ -101,7 +96,7 @@
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= (void *)&one,
+		.extra1		= (void *)SYSCTL_ONE,
 		.extra2		= (void *)&rxrpc_max_client_connections,
 	},
 	{
@@ -119,7 +114,7 @@
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= (void *)&one,
+		.extra1		= (void *)SYSCTL_ONE,
 		.extra2		= (void *)&n_max_acks,
 	},
 	{
@@ -128,7 +123,7 @@
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= (void *)&one,
+		.extra1		= (void *)SYSCTL_ONE,
 		.extra2		= (void *)&n_65535,
 	},
 	{
@@ -137,7 +132,7 @@
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= (void *)&one,
+		.extra1		= (void *)SYSCTL_ONE,
 		.extra2		= (void *)&four,
 	},
 

diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c
index e801171..2e4b9d8 100644
--- a/net/rxrpc/utils.c
+++ b/net/rxrpc/utils.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Utility routines
  *
  * Copyright (C) 2015 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
  */
 
 #include <linux/ip.h>
@@ -17,28 +13,17 @@
 /*
  * Fill out a peer address from a socket buffer containing a packet.
  */
-int rxrpc_extract_addr_from_skb(struct rxrpc_local *local,
-				struct sockaddr_rxrpc *srx,
-				struct sk_buff *skb)
+int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb)
 {
 	memset(srx, 0, sizeof(*srx));
 
 	switch (ntohs(skb->protocol)) {
 	case ETH_P_IP:
-		if (local->srx.transport.family == AF_INET6) {
-			srx->transport_type = SOCK_DGRAM;
-			srx->transport_len = sizeof(srx->transport.sin6);
-			srx->transport.sin6.sin6_family = AF_INET6;
-			srx->transport.sin6.sin6_port = udp_hdr(skb)->source;
-			srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
-			srx->transport.sin6.sin6_addr.s6_addr32[3] = ip_hdr(skb)->saddr;
-		} else {
-			srx->transport_type = SOCK_DGRAM;
-			srx->transport_len = sizeof(srx->transport.sin);
-			srx->transport.sin.sin_family = AF_INET;
-			srx->transport.sin.sin_port = udp_hdr(skb)->source;
-			srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
-		}
+		srx->transport_type = SOCK_DGRAM;
+		srx->transport_len = sizeof(srx->transport.sin);
+		srx->transport.sin.sin_family = AF_INET;
+		srx->transport.sin.sin_port = udp_hdr(skb)->source;
+		srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
 		return 0;
 
 #ifdef CONFIG_AF_RXRPC_IPV6

diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index e957413..2985509 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Traffic control configuration.
 #
@@ -194,6 +195,17 @@
 	  To compile this code as a module, choose M here: the
 	  module will be called sch_etf.
 
+config NET_SCH_TAPRIO
+	tristate "Time Aware Priority (taprio) Scheduler"
+	help
+	  Say Y here if you want to use the Time Aware Priority (taprio) packet
+	  scheduling algorithm.
+
+	  See the top of <file:net/sched/sch_taprio.c> for more details.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_taprio.
+
 config NET_SCH_GRED
 	tristate "Generic Random Early Detection (GRED)"
 	---help---
@@ -312,7 +324,7 @@
 	tristate "Common Applications Kept Enhanced (CAKE)"
 	help
 	  Say Y here if you want to use the Common Applications Kept Enhanced
-          (CAKE) queue management algorithm.
+	  (CAKE) queue management algorithm.
 
 	  To compile this driver as a module, choose M here: the module
 	  will be called sch_cake.
@@ -347,8 +359,7 @@
 	help
 	  Say Y here if you want to use the Proportional Integral controller
 	  Enhanced scheduler packet scheduling algorithm.
-	  For more information, please see
-	  http://tools.ietf.org/html/draft-pan-tsvwg-pie-00
+	  For more information, please see https://tools.ietf.org/html/rfc8033
 
 	  To compile this driver as a module, choose M here: the module
 	  will be called sch_pie.
@@ -719,8 +730,8 @@
 
 config NET_ACT_POLICE
 	tristate "Traffic Policing"
-        depends on NET_CLS_ACT
-        ---help---
+	depends on NET_CLS_ACT
+	---help---
 	  Say Y here if you want to do traffic policing, i.e. strict
 	  bandwidth limiting. This action replaces the existing policing
 	  module.
@@ -729,9 +740,9 @@
 	  module will be called act_police.
 
 config NET_ACT_GACT
-        tristate "Generic actions"
-        depends on NET_CLS_ACT
-        ---help---
+	tristate "Generic actions"
+	depends on NET_CLS_ACT
+	---help---
 	  Say Y here to take generic actions such as dropping and
 	  accepting packets.
 
@@ -739,15 +750,15 @@
 	  module will be called act_gact.
 
 config GACT_PROB
-        bool "Probability support"
-        depends on NET_ACT_GACT
-        ---help---
+	bool "Probability support"
+	depends on NET_ACT_GACT
+	---help---
 	  Say Y here to use the generic action randomly or deterministically.
 
 config NET_ACT_MIRRED
-        tristate "Redirecting and Mirroring"
-        depends on NET_CLS_ACT
-        ---help---
+	tristate "Redirecting and Mirroring"
+	depends on NET_CLS_ACT
+	---help---
 	  Say Y here to allow packets to be mirrored or redirected to
 	  other devices.
 
@@ -755,10 +766,10 @@
 	  module will be called act_mirred.
 
 config NET_ACT_SAMPLE
-        tristate "Traffic Sampling"
-        depends on NET_CLS_ACT
-        select PSAMPLE
-        ---help---
+	tristate "Traffic Sampling"
+	depends on NET_CLS_ACT
+	select PSAMPLE
+	---help---
 	  Say Y here to allow packet sampling tc action. The packet sample
 	  action consists of statistically choosing packets and sampling
 	  them using the psample module.
@@ -767,9 +778,9 @@
 	  module will be called act_sample.
 
 config NET_ACT_IPT
-        tristate "IPtables targets"
-        depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
-        ---help---
+	tristate "IPtables targets"
+	depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
+	---help---
 	  Say Y here to be able to invoke iptables targets after successful
 	  classification.
 
@@ -777,9 +788,9 @@
 	  module will be called act_ipt.
 
 config NET_ACT_NAT
-        tristate "Stateless NAT"
-        depends on NET_CLS_ACT
-        ---help---
+	tristate "Stateless NAT"
+	depends on NET_CLS_ACT
+	---help---
 	  Say Y here to do stateless NAT on IPv4 packets.  You should use
 	  netfilter for NAT unless you know what you are doing.
 
@@ -787,18 +798,18 @@
 	  module will be called act_nat.
 
 config NET_ACT_PEDIT
-        tristate "Packet Editing"
-        depends on NET_CLS_ACT
-        ---help---
+	tristate "Packet Editing"
+	depends on NET_CLS_ACT
+	---help---
 	  Say Y here if you want to mangle the content of packets.
 
 	  To compile this code as a module, choose M here: the
 	  module will be called act_pedit.
 
 config NET_ACT_SIMP
-        tristate "Simple Example (Debug)"
-        depends on NET_CLS_ACT
-        ---help---
+	tristate "Simple Example (Debug)"
+	depends on NET_CLS_ACT
+	---help---
 	  Say Y here to add a simple action for demonstration purposes.
 	  It is meant as an example and for debugging purposes. It will
 	  print a configured policy string followed by the packet count
@@ -810,9 +821,9 @@
 	  module will be called act_simple.
 
 config NET_ACT_SKBEDIT
-        tristate "SKB Editing"
-        depends on NET_CLS_ACT
-        ---help---
+	tristate "SKB Editing"
+	depends on NET_CLS_ACT
+	---help---
 	  Say Y here to change skb priority or queue_mapping settings.
 
 	  If unsure, say N.
@@ -821,20 +832,31 @@
 	  module will be called act_skbedit.
 
 config NET_ACT_CSUM
-        tristate "Checksum Updating"
-        depends on NET_CLS_ACT && INET
-        select LIBCRC32C
-        ---help---
+	tristate "Checksum Updating"
+	depends on NET_CLS_ACT && INET
+	select LIBCRC32C
+	---help---
 	  Say Y here to update some common checksum after some direct
 	  packet alterations.
 
 	  To compile this code as a module, choose M here: the
 	  module will be called act_csum.
 
+config NET_ACT_MPLS
+	tristate "MPLS manipulation"
+	depends on NET_CLS_ACT
+	help
+	  Say Y here to push or pop MPLS headers.
+
+	  If unsure, say N.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called act_mpls.
+
 config NET_ACT_VLAN
-        tristate "Vlan manipulation"
-        depends on NET_CLS_ACT
-        ---help---
+	tristate "Vlan manipulation"
+	depends on NET_CLS_ACT
+	---help---
 	  Say Y here to push or pop vlan headers.
 
 	  If unsure, say N.
@@ -843,9 +865,9 @@
 	  module will be called act_vlan.
 
 config NET_ACT_BPF
-        tristate "BPF based action"
-        depends on NET_CLS_ACT
-        ---help---
+	tristate "BPF based action"
+	depends on NET_CLS_ACT
+	---help---
 	  Say Y here to execute BPF code on packets. The BPF code will decide
 	  if the packet should be dropped or not.
 
@@ -855,10 +877,10 @@
 	  module will be called act_bpf.
 
 config NET_ACT_CONNMARK
-        tristate "Netfilter Connection Mark Retriever"
-        depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
-        depends on NF_CONNTRACK && NF_CONNTRACK_MARK
-        ---help---
+	tristate "Netfilter Connection Mark Retriever"
+	depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
+	depends on NF_CONNTRACK && NF_CONNTRACK_MARK
+	---help---
 	  Say Y here to allow retrieving of conn mark
 
 	  If unsure, say N.
@@ -866,22 +888,39 @@
 	  To compile this code as a module, choose M here: the
 	  module will be called act_connmark.
 
+config NET_ACT_CTINFO
+	tristate "Netfilter Connection Mark Actions"
+	depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
+	depends on NF_CONNTRACK && NF_CONNTRACK_MARK
+	help
+	  Say Y here to allow transfer of a connmark stored information.
+	  Current actions transfer connmark stored DSCP into
+	  ipv4/v6 diffserv and/or to transfer connmark to packet
+	  mark.  Both are useful for restoring egress based marks
+	  back onto ingress connections for qdisc priority mapping
+	  purposes.
+
+	  If unsure, say N.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called act_ctinfo.
+
 config NET_ACT_SKBMOD
-        tristate "skb data modification action"
-        depends on NET_CLS_ACT
-        ---help---
-         Say Y here to allow modification of skb data
+	tristate "skb data modification action"
+	depends on NET_CLS_ACT
+	---help---
+	 Say Y here to allow modification of skb data
 
-         If unsure, say N.
+	 If unsure, say N.
 
-         To compile this code as a module, choose M here: the
-         module will be called act_skbmod.
+	 To compile this code as a module, choose M here: the
+	 module will be called act_skbmod.
 
 config NET_ACT_IFE
-        tristate "Inter-FE action based on IETF ForCES InterFE LFB"
-        depends on NET_CLS_ACT
-        select NET_IFE
-        ---help---
+	tristate "Inter-FE action based on IETF ForCES InterFE LFB"
+	depends on NET_CLS_ACT
+	select NET_IFE
+	---help---
 	  Say Y here to allow for sourcing and terminating metadata
 	  For details refer to netdev01 paper:
 	  "Distributing Linux Traffic Control Classifier-Action Subsystem"
@@ -891,9 +930,9 @@
 	  module will be called act_ife.
 
 config NET_ACT_TUNNEL_KEY
-        tristate "IP tunnel metadata manipulation"
-        depends on NET_CLS_ACT
-        ---help---
+	tristate "IP tunnel metadata manipulation"
+	depends on NET_CLS_ACT
+	---help---
 	  Say Y here to set/release ip tunnel metadata.
 
 	  If unsure, say N.
@@ -901,25 +940,40 @@
 	  To compile this code as a module, choose M here: the
 	  module will be called act_tunnel_key.
 
+config NET_ACT_CT
+	tristate "connection tracking tc action"
+	depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT
+	help
+	  Say Y here to allow sending the packets to conntrack module.
+
+	  If unsure, say N.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called act_ct.
+
 config NET_IFE_SKBMARK
-        tristate "Support to encoding decoding skb mark on IFE action"
-        depends on NET_ACT_IFE
+	tristate "Support to encoding decoding skb mark on IFE action"
+	depends on NET_ACT_IFE
 
 config NET_IFE_SKBPRIO
-        tristate "Support to encoding decoding skb prio on IFE action"
-        depends on NET_ACT_IFE
+	tristate "Support to encoding decoding skb prio on IFE action"
+	depends on NET_ACT_IFE
 
 config NET_IFE_SKBTCINDEX
-        tristate "Support to encoding decoding skb tcindex on IFE action"
-        depends on NET_ACT_IFE
+	tristate "Support to encoding decoding skb tcindex on IFE action"
+	depends on NET_ACT_IFE
 
-config NET_CLS_IND
-	bool "Incoming device classification"
-	depends on NET_CLS_U32 || NET_CLS_FW
-	---help---
-	  Say Y here to extend the u32 and fw classifier to support
-	  classification based on the incoming device. This option is
-	  likely to disappear in favour of the metadata ematch.
+config NET_TC_SKB_EXT
+	bool "TC recirculation support"
+	depends on NET_CLS_ACT
+	select SKB_EXTENSIONS
+
+	help
+	  Say Y here to allow tc chain misses to continue in OvS datapath in
+	  the correct recirc_id, and hardware chain misses to continue in
+	  the correct chain in tc software datapath.
+
+	  Say N here if you won't be using tc<->ovs offload or tc chains offload.
 
 endif # NET_SCHED
 

diff --git a/net/sched/Makefile b/net/sched/Makefile
index f0403f4..415d1e1 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile

@@ -18,15 +18,18 @@
 obj-$(CONFIG_NET_ACT_SIMP)	+= act_simple.o
 obj-$(CONFIG_NET_ACT_SKBEDIT)	+= act_skbedit.o
 obj-$(CONFIG_NET_ACT_CSUM)	+= act_csum.o
+obj-$(CONFIG_NET_ACT_MPLS)	+= act_mpls.o
 obj-$(CONFIG_NET_ACT_VLAN)	+= act_vlan.o
 obj-$(CONFIG_NET_ACT_BPF)	+= act_bpf.o
 obj-$(CONFIG_NET_ACT_CONNMARK)	+= act_connmark.o
+obj-$(CONFIG_NET_ACT_CTINFO)	+= act_ctinfo.o
 obj-$(CONFIG_NET_ACT_SKBMOD)	+= act_skbmod.o
 obj-$(CONFIG_NET_ACT_IFE)	+= act_ife.o
 obj-$(CONFIG_NET_IFE_SKBMARK)	+= act_meta_mark.o
 obj-$(CONFIG_NET_IFE_SKBPRIO)	+= act_meta_skbprio.o
 obj-$(CONFIG_NET_IFE_SKBTCINDEX)	+= act_meta_skbtcindex.o
 obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o
+obj-$(CONFIG_NET_ACT_CT)	+= act_ct.o
 obj-$(CONFIG_NET_SCH_FIFO)	+= sch_fifo.o
 obj-$(CONFIG_NET_SCH_CBQ)	+= sch_cbq.o
 obj-$(CONFIG_NET_SCH_HTB)	+= sch_htb.o
@@ -57,6 +60,7 @@
 obj-$(CONFIG_NET_SCH_PIE)	+= sch_pie.o
 obj-$(CONFIG_NET_SCH_CBS)	+= sch_cbs.o
 obj-$(CONFIG_NET_SCH_ETF)	+= sch_etf.o
+obj-$(CONFIG_NET_SCH_TAPRIO)	+= sch_taprio.o
 
 obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index e12f8ef..69d4676 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c

@@ -1,14 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/act_api.c	Packet action API.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Author:	Jamal Hadi Salim
- *
- *
  */
 
 #include <linux/types.h>
@@ -21,8 +15,6 @@
 #include <linux/kmod.h>
 #include <linux/err.h>
 #include <linux/module.h>
-#include <linux/rhashtable.h>
-#include <linux/list.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/sch_generic.h>
@@ -30,27 +22,10 @@
 #include <net/act_api.h>
 #include <net/netlink.h>
 
-static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp)
-{
-	u32 chain_index = a->tcfa_action & TC_ACT_EXT_VAL_MASK;
-
-	if (!tp)
-		return -EINVAL;
-	a->goto_chain = tcf_chain_get_by_act(tp->chain->block, chain_index);
-	if (!a->goto_chain)
-		return -ENOMEM;
-	return 0;
-}
-
-static void tcf_action_goto_chain_fini(struct tc_action *a)
-{
-	tcf_chain_put_by_act(a->goto_chain);
-}
-
 static void tcf_action_goto_chain_exec(const struct tc_action *a,
 				       struct tcf_result *res)
 {
-	const struct tcf_chain *chain = a->goto_chain;
+	const struct tcf_chain *chain = rcu_dereference_bh(a->goto_chain);
 
 	res->goto_tp = rcu_dereference_bh(chain->filter_chain);
 }
@@ -73,6 +48,51 @@
 		call_rcu(&old->rcu, tcf_free_cookie_rcu);
 }
 
+int tcf_action_check_ctrlact(int action, struct tcf_proto *tp,
+			     struct tcf_chain **newchain,
+			     struct netlink_ext_ack *extack)
+{
+	int opcode = TC_ACT_EXT_OPCODE(action), ret = -EINVAL;
+	u32 chain_index;
+
+	if (!opcode)
+		ret = action > TC_ACT_VALUE_MAX ? -EINVAL : 0;
+	else if (opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC)
+		ret = 0;
+	if (ret) {
+		NL_SET_ERR_MSG(extack, "invalid control action");
+		goto end;
+	}
+
+	if (TC_ACT_EXT_CMP(action, TC_ACT_GOTO_CHAIN)) {
+		chain_index = action & TC_ACT_EXT_VAL_MASK;
+		if (!tp || !newchain) {
+			ret = -EINVAL;
+			NL_SET_ERR_MSG(extack,
+				       "can't goto NULL proto/chain");
+			goto end;
+		}
+		*newchain = tcf_chain_get_by_act(tp->chain->block, chain_index);
+		if (!*newchain) {
+			ret = -ENOMEM;
+			NL_SET_ERR_MSG(extack,
+				       "can't allocate goto_chain");
+		}
+	}
+end:
+	return ret;
+}
+EXPORT_SYMBOL(tcf_action_check_ctrlact);
+
+struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action,
+					 struct tcf_chain *goto_chain)
+{
+	a->tcfa_action = action;
+	rcu_swap_protected(a->goto_chain, goto_chain, 1);
+	return goto_chain;
+}
+EXPORT_SYMBOL(tcf_action_set_ctrlact);
+
 /* XXX: For standalone actions, we don't need a RCU grace period either, because
  * actions are always connected to filters and filters are already destroyed in
  * RCU callbacks, so after a RCU grace period actions are already disconnected
@@ -80,12 +100,15 @@
  */
 static void free_tcf(struct tc_action *p)
 {
+	struct tcf_chain *chain = rcu_dereference_protected(p->goto_chain, 1);
+
 	free_percpu(p->cpu_bstats);
+	free_percpu(p->cpu_bstats_hw);
 	free_percpu(p->cpu_qstats);
 
 	tcf_set_action_cookie(&p->act_cookie, NULL);
-	if (p->goto_chain)
-		tcf_action_goto_chain_fini(p);
+	if (chain)
+		tcf_chain_put_by_act(chain);
 
 	kfree(p);
 }
@@ -103,11 +126,11 @@
 {
 	struct tcf_idrinfo *idrinfo = p->idrinfo;
 
-	if (refcount_dec_and_lock(&p->tcfa_refcnt, &idrinfo->lock)) {
+	if (refcount_dec_and_mutex_lock(&p->tcfa_refcnt, &idrinfo->lock)) {
 		if (bind)
 			atomic_dec(&p->tcfa_bindcnt);
 		idr_remove(&idrinfo->action_idr, p->tcfa_index);
-		spin_unlock(&idrinfo->lock);
+		mutex_unlock(&idrinfo->lock);
 
 		tcf_action_cleanup(p);
 		return 1;
@@ -198,12 +221,13 @@
 	struct idr *idr = &idrinfo->action_idr;
 	struct tc_action *p;
 	unsigned long id = 1;
+	unsigned long tmp;
 
-	spin_lock(&idrinfo->lock);
+	mutex_lock(&idrinfo->lock);
 
 	s_i = cb->args[0];
 
-	idr_for_each_entry_ul(idr, p, id) {
+	idr_for_each_entry_ul(idr, p, tmp, id) {
 		index++;
 		if (index < s_i)
 			continue;
@@ -213,7 +237,7 @@
 			       (unsigned long)p->tcfa_tm.lastuse))
 			continue;
 
-		nest = nla_nest_start(skb, n_i);
+		nest = nla_nest_start_noflag(skb, n_i);
 		if (!nest) {
 			index--;
 			goto nla_put_failure;
@@ -234,7 +258,7 @@
 	if (index >= 0)
 		cb->args[0] = index + 1;
 
-	spin_unlock(&idrinfo->lock);
+	mutex_unlock(&idrinfo->lock);
 	if (n_i) {
 		if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
 			cb->args[1] = n_i;
@@ -246,6 +270,20 @@
 	goto done;
 }
 
+static int tcf_idr_release_unsafe(struct tc_action *p)
+{
+	if (atomic_read(&p->tcfa_bindcnt) > 0)
+		return -EPERM;
+
+	if (refcount_dec_and_test(&p->tcfa_refcnt)) {
+		idr_remove(&p->idrinfo->action_idr, p->tcfa_index);
+		tcf_action_cleanup(p);
+		return ACT_P_DELETED;
+	}
+
+	return 0;
+}
+
 static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 			  const struct tc_action_ops *ops)
 {
@@ -255,22 +293,27 @@
 	struct idr *idr = &idrinfo->action_idr;
 	struct tc_action *p;
 	unsigned long id = 1;
+	unsigned long tmp;
 
-	nest = nla_nest_start(skb, 0);
+	nest = nla_nest_start_noflag(skb, 0);
 	if (nest == NULL)
 		goto nla_put_failure;
 	if (nla_put_string(skb, TCA_KIND, ops->kind))
 		goto nla_put_failure;
 
-	idr_for_each_entry_ul(idr, p, id) {
-		ret = __tcf_idr_release(p, false, true);
+	mutex_lock(&idrinfo->lock);
+	idr_for_each_entry_ul(idr, p, tmp, id) {
+		ret = tcf_idr_release_unsafe(p);
 		if (ret == ACT_P_DELETED) {
 			module_put(ops->owner);
 			n_i++;
 		} else if (ret < 0) {
+			mutex_unlock(&idrinfo->lock);
 			goto nla_put_failure;
 		}
 	}
+	mutex_unlock(&idrinfo->lock);
+
 	if (nla_put_u32(skb, TCA_FCNT, n_i))
 		goto nla_put_failure;
 	nla_nest_end(skb, nest);
@@ -305,13 +348,13 @@
 	struct tcf_idrinfo *idrinfo = tn->idrinfo;
 	struct tc_action *p;
 
-	spin_lock(&idrinfo->lock);
+	mutex_lock(&idrinfo->lock);
 	p = idr_find(&idrinfo->action_idr, index);
 	if (IS_ERR(p))
 		p = NULL;
 	else if (p)
 		refcount_inc(&p->tcfa_refcnt);
-	spin_unlock(&idrinfo->lock);
+	mutex_unlock(&idrinfo->lock);
 
 	if (p) {
 		*a = p;
@@ -326,10 +369,10 @@
 	struct tc_action *p;
 	int ret = 0;
 
-	spin_lock(&idrinfo->lock);
+	mutex_lock(&idrinfo->lock);
 	p = idr_find(&idrinfo->action_idr, index);
 	if (!p) {
-		spin_unlock(&idrinfo->lock);
+		mutex_unlock(&idrinfo->lock);
 		return -ENOENT;
 	}
 
@@ -339,7 +382,7 @@
 
 			WARN_ON(p != idr_remove(&idrinfo->action_idr,
 						p->tcfa_index));
-			spin_unlock(&idrinfo->lock);
+			mutex_unlock(&idrinfo->lock);
 
 			tcf_action_cleanup(p);
 			module_put(owner);
@@ -350,7 +393,7 @@
 		ret = -EPERM;
 	}
 
-	spin_unlock(&idrinfo->lock);
+	mutex_unlock(&idrinfo->lock);
 	return ret;
 }
 
@@ -372,9 +415,12 @@
 		p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
 		if (!p->cpu_bstats)
 			goto err1;
+		p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+		if (!p->cpu_bstats_hw)
+			goto err2;
 		p->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
 		if (!p->cpu_qstats)
-			goto err2;
+			goto err3;
 	}
 	spin_lock_init(&p->tcfa_lock);
 	p->tcfa_index = index;
@@ -386,15 +432,17 @@
 					&p->tcfa_rate_est,
 					&p->tcfa_lock, NULL, est);
 		if (err)
-			goto err3;
+			goto err4;
 	}
 
 	p->idrinfo = idrinfo;
 	p->ops = ops;
 	*a = p;
 	return 0;
-err3:
+err4:
 	free_percpu(p->cpu_qstats);
+err3:
+	free_percpu(p->cpu_bstats_hw);
 err2:
 	free_percpu(p->cpu_bstats);
 err1:
@@ -407,10 +455,10 @@
 {
 	struct tcf_idrinfo *idrinfo = tn->idrinfo;
 
-	spin_lock(&idrinfo->lock);
+	mutex_lock(&idrinfo->lock);
 	/* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
 	WARN_ON(!IS_ERR(idr_replace(&idrinfo->action_idr, a, a->tcfa_index)));
-	spin_unlock(&idrinfo->lock);
+	mutex_unlock(&idrinfo->lock);
 }
 EXPORT_SYMBOL(tcf_idr_insert);
 
@@ -420,10 +468,10 @@
 {
 	struct tcf_idrinfo *idrinfo = tn->idrinfo;
 
-	spin_lock(&idrinfo->lock);
+	mutex_lock(&idrinfo->lock);
 	/* Remove ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
 	WARN_ON(!IS_ERR(idr_remove(&idrinfo->action_idr, index)));
-	spin_unlock(&idrinfo->lock);
+	mutex_unlock(&idrinfo->lock);
 }
 EXPORT_SYMBOL(tcf_idr_cleanup);
 
@@ -441,14 +489,14 @@
 	int ret;
 
 again:
-	spin_lock(&idrinfo->lock);
+	mutex_lock(&idrinfo->lock);
 	if (*index) {
 		p = idr_find(&idrinfo->action_idr, *index);
 		if (IS_ERR(p)) {
 			/* This means that another process allocated
 			 * index but did not assign the pointer yet.
 			 */
-			spin_unlock(&idrinfo->lock);
+			mutex_unlock(&idrinfo->lock);
 			goto again;
 		}
 
@@ -461,7 +509,7 @@
 		} else {
 			*a = NULL;
 			ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
-					    *index, GFP_ATOMIC);
+					    *index, GFP_KERNEL);
 			if (!ret)
 				idr_replace(&idrinfo->action_idr,
 					    ERR_PTR(-EBUSY), *index);
@@ -470,12 +518,12 @@
 		*index = 1;
 		*a = NULL;
 		ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
-				    UINT_MAX, GFP_ATOMIC);
+				    UINT_MAX, GFP_KERNEL);
 		if (!ret)
 			idr_replace(&idrinfo->action_idr, ERR_PTR(-EBUSY),
 				    *index);
 	}
-	spin_unlock(&idrinfo->lock);
+	mutex_unlock(&idrinfo->lock);
 	return ret;
 }
 EXPORT_SYMBOL(tcf_idr_check_alloc);
@@ -487,8 +535,9 @@
 	struct tc_action *p;
 	int ret;
 	unsigned long id = 1;
+	unsigned long tmp;
 
-	idr_for_each_entry_ul(idr, p, id) {
+	idr_for_each_entry_ul(idr, p, tmp, id) {
 		ret = __tcf_idr_release(p, false, true);
 		if (ret == ACT_P_DELETED)
 			module_put(ops->owner);
@@ -521,7 +570,7 @@
 
 	write_lock(&act_mod_lock);
 	list_for_each_entry(a, &act_base, head) {
-		if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) {
+		if (act->id == a->id || (strcmp(act->kind, a->kind) == 0)) {
 			write_unlock(&act_mod_lock);
 			unregister_pernet_subsys(ops);
 			return -EEXIST;
@@ -632,6 +681,10 @@
 					return TC_ACT_OK;
 			}
 		} else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) {
+			if (unlikely(!rcu_access_pointer(a->goto_chain))) {
+				net_warn_ratelimited("can't go to NULL chain!\n");
+				return TC_ACT_SHOT;
+			}
 			tcf_action_goto_chain_exec(a, res);
 		}
 
@@ -720,7 +773,7 @@
 	}
 	rcu_read_unlock();
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
 	err = tcf_action_dump_old(skb, a, bind, ref);
@@ -744,7 +797,7 @@
 
 	for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
 		a = actions[i];
-		nest = nla_nest_start(skb, a->order);
+		nest = nla_nest_start_noflag(skb, i + 1);
 		if (nest == NULL)
 			goto nla_put_failure;
 		err = tcf_action_dump_1(skb, a, bind, ref);
@@ -778,14 +831,13 @@
 	return c;
 }
 
-static bool tcf_action_valid(int action)
-{
-	int opcode = TC_ACT_EXT_OPCODE(action);
-
-	if (!opcode)
-		return action <= TC_ACT_VALUE_MAX;
-	return opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC;
-}
+static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = {
+	[TCA_ACT_KIND]		= { .type = NLA_STRING },
+	[TCA_ACT_INDEX]		= { .type = NLA_U32 },
+	[TCA_ACT_COOKIE]	= { .type = NLA_BINARY,
+				    .len = TC_COOKIE_MAX_SIZE },
+	[TCA_ACT_OPTIONS]	= { .type = NLA_NESTED },
+};
 
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
@@ -802,7 +854,8 @@
 	int err;
 
 	if (name == NULL) {
-		err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
+		err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
+						  tcf_action_policy, extack);
 		if (err < 0)
 			goto err_out;
 		err = -EINVAL;
@@ -816,13 +869,6 @@
 			goto err_out;
 		}
 		if (tb[TCA_ACT_COOKIE]) {
-			int cklen = nla_len(tb[TCA_ACT_COOKIE]);
-
-			if (cklen > TC_COOKIE_MAX_SIZE) {
-				NL_SET_ERR_MSG(extack, "TC cookie size above the maximum");
-				goto err_out;
-			}
-
 			cookie = nla_memdup_cookie(tb);
 			if (!cookie) {
 				NL_SET_ERR_MSG(extack, "No memory to generate TC cookie");
@@ -868,10 +914,10 @@
 	/* backward compatibility for policer */
 	if (name == NULL)
 		err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
-				rtnl_held, extack);
+				rtnl_held, tp, extack);
 	else
 		err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held,
-				extack);
+				tp, extack);
 	if (err < 0)
 		goto err_mod;
 
@@ -885,18 +931,10 @@
 	if (err != ACT_P_CREATED)
 		module_put(a_o->owner);
 
-	if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) {
-		err = tcf_action_goto_chain_init(a, tp);
-		if (err) {
-			tcf_action_destroy_1(a, bind);
-			NL_SET_ERR_MSG(extack, "Failed to init TC action chain");
-			return ERR_PTR(err);
-		}
-	}
-
-	if (!tcf_action_valid(a->tcfa_action)) {
+	if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) &&
+	    !rcu_access_pointer(a->goto_chain)) {
 		tcf_action_destroy_1(a, bind);
-		NL_SET_ERR_MSG(extack, "Invalid control action value");
+		NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain");
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -925,7 +963,8 @@
 	int err;
 	int i;
 
-	err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
+	err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO, nla, NULL,
+					  extack);
 	if (err < 0)
 		return err;
 
@@ -979,6 +1018,8 @@
 		goto errout;
 
 	if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 ||
+	    gnet_stats_copy_basic_hw(NULL, &d, p->cpu_bstats_hw,
+				     &p->tcfa_bstats_hw) < 0 ||
 	    gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 ||
 	    gnet_stats_copy_queue(&d, p->cpu_qstats,
 				  &p->tcfa_qstats,
@@ -1011,7 +1052,7 @@
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
 
-	nest = nla_nest_start(skb, TCA_ACT_TAB);
+	nest = nla_nest_start_noflag(skb, TCA_ACT_TAB);
 	if (!nest)
 		goto out_nlmsg_trim;
 
@@ -1058,7 +1099,8 @@
 	int index;
 	int err;
 
-	err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
+	err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
+					  tcf_action_policy, extack);
 	if (err < 0)
 		goto err_out;
 
@@ -1073,12 +1115,14 @@
 	err = -EINVAL;
 	ops = tc_lookup_action(tb[TCA_ACT_KIND]);
 	if (!ops) { /* could happen in batch of actions */
-		NL_SET_ERR_MSG(extack, "Specified TC action not found");
+		NL_SET_ERR_MSG(extack, "Specified TC action kind not found");
 		goto err_out;
 	}
 	err = -ENOENT;
-	if (ops->lookup(net, &a, index, extack) == 0)
+	if (ops->lookup(net, &a, index) == 0) {
+		NL_SET_ERR_MSG(extack, "TC action with specified index not found");
 		goto err_mod;
+	}
 
 	module_put(ops->owner);
 	return a;
@@ -1110,7 +1154,8 @@
 
 	b = skb_tail_pointer(skb);
 
-	err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
+	err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
+					  tcf_action_policy, extack);
 	if (err < 0)
 		goto err_out;
 
@@ -1133,7 +1178,7 @@
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
 
-	nest = nla_nest_start(skb, TCA_ACT_TAB);
+	nest = nla_nest_start_noflag(skb, TCA_ACT_TAB);
 	if (!nest) {
 		NL_SET_ERR_MSG(extack, "Failed to add new netlink message");
 		goto out_module_put;
@@ -1239,7 +1284,8 @@
 	size_t attr_size = 0;
 	struct tc_action *actions[TCA_ACT_MAX_PRIO] = {};
 
-	ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
+	ret = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO, nla, NULL,
+					  extack);
 	if (ret < 0)
 		return ret;
 
@@ -1257,7 +1303,6 @@
 			ret = PTR_ERR(act);
 			goto err;
 		}
-		act->order = i;
 		attr_size += tcf_action_fill_size(act);
 		actions[i - 1] = act;
 	}
@@ -1308,11 +1353,16 @@
 			  struct netlink_ext_ack *extack)
 {
 	size_t attr_size = 0;
-	int ret = 0;
+	int loop, ret;
 	struct tc_action *actions[TCA_ACT_MAX_PRIO] = {};
 
-	ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, actions,
-			      &attr_size, true, extack);
+	for (loop = 0; loop < 10; loop++) {
+		ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0,
+				      actions, &attr_size, true, extack);
+		if (ret != -EAGAIN)
+			break;
+	}
+
 	if (ret < 0)
 		return ret;
 	ret = tcf_add_notify(net, n, actions, portid, attr_size, extack);
@@ -1341,8 +1391,8 @@
 	    !netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
-	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL,
-			  extack);
+	ret = nlmsg_parse_deprecated(n, sizeof(struct tcamsg), tca,
+				     TCA_ROOT_MAX, NULL, extack);
 	if (ret < 0)
 		return ret;
 
@@ -1362,11 +1412,8 @@
 		 */
 		if (n->nlmsg_flags & NLM_F_REPLACE)
 			ovr = 1;
-replay:
 		ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr,
 				     extack);
-		if (ret == -EAGAIN)
-			goto replay;
 		break;
 	case RTM_DELACTION:
 		ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
@@ -1393,13 +1440,12 @@
 	if (tb1 == NULL)
 		return NULL;
 
-	if (nla_parse(tb, TCA_ACT_MAX_PRIO, nla_data(tb1),
-		      NLMSG_ALIGN(nla_len(tb1)), NULL, NULL) < 0)
+	if (nla_parse_deprecated(tb, TCA_ACT_MAX_PRIO, nla_data(tb1), NLMSG_ALIGN(nla_len(tb1)), NULL, NULL) < 0)
 		return NULL;
 
 	if (tb[1] == NULL)
 		return NULL;
-	if (nla_parse_nested(tb2, TCA_ACT_MAX, tb[1], NULL, NULL) < 0)
+	if (nla_parse_nested_deprecated(tb2, TCA_ACT_MAX, tb[1], tcf_action_policy, NULL) < 0)
 		return NULL;
 	kind = tb2[TCA_ACT_KIND];
 
@@ -1423,8 +1469,8 @@
 	u32 msecs_since = 0;
 	u32 act_count = 0;
 
-	ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX,
-			  tcaa_policy, NULL);
+	ret = nlmsg_parse_deprecated(cb->nlh, sizeof(struct tcamsg), tb,
+				     TCA_ROOT_MAX, tcaa_policy, cb->extack);
 	if (ret < 0)
 		return ret;
 
@@ -1465,7 +1511,7 @@
 	if (!count_attr)
 		goto out_module_put;
 
-	nest = nla_nest_start(skb, TCA_ACT_TAB);
+	nest = nla_nest_start_noflag(skb, TCA_ACT_TAB);
 	if (nest == NULL)
 		goto out_module_put;
 
@@ -1494,227 +1540,8 @@
 	return skb->len;
 }
 
-struct tcf_action_net {
-	struct rhashtable egdev_ht;
-};
-
-static unsigned int tcf_action_net_id;
-
-struct tcf_action_egdev_cb {
-	struct list_head list;
-	tc_setup_cb_t *cb;
-	void *cb_priv;
-};
-
-struct tcf_action_egdev {
-	struct rhash_head ht_node;
-	const struct net_device *dev;
-	unsigned int refcnt;
-	struct list_head cb_list;
-};
-
-static const struct rhashtable_params tcf_action_egdev_ht_params = {
-	.key_offset = offsetof(struct tcf_action_egdev, dev),
-	.head_offset = offsetof(struct tcf_action_egdev, ht_node),
-	.key_len = sizeof(const struct net_device *),
-};
-
-static struct tcf_action_egdev *
-tcf_action_egdev_lookup(const struct net_device *dev)
-{
-	struct net *net = dev_net(dev);
-	struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
-
-	return rhashtable_lookup_fast(&tan->egdev_ht, &dev,
-				      tcf_action_egdev_ht_params);
-}
-
-static struct tcf_action_egdev *
-tcf_action_egdev_get(const struct net_device *dev)
-{
-	struct tcf_action_egdev *egdev;
-	struct tcf_action_net *tan;
-
-	egdev = tcf_action_egdev_lookup(dev);
-	if (egdev)
-		goto inc_ref;
-
-	egdev = kzalloc(sizeof(*egdev), GFP_KERNEL);
-	if (!egdev)
-		return NULL;
-	INIT_LIST_HEAD(&egdev->cb_list);
-	egdev->dev = dev;
-	tan = net_generic(dev_net(dev), tcf_action_net_id);
-	rhashtable_insert_fast(&tan->egdev_ht, &egdev->ht_node,
-			       tcf_action_egdev_ht_params);
-
-inc_ref:
-	egdev->refcnt++;
-	return egdev;
-}
-
-static void tcf_action_egdev_put(struct tcf_action_egdev *egdev)
-{
-	struct tcf_action_net *tan;
-
-	if (--egdev->refcnt)
-		return;
-	tan = net_generic(dev_net(egdev->dev), tcf_action_net_id);
-	rhashtable_remove_fast(&tan->egdev_ht, &egdev->ht_node,
-			       tcf_action_egdev_ht_params);
-	kfree(egdev);
-}
-
-static struct tcf_action_egdev_cb *
-tcf_action_egdev_cb_lookup(struct tcf_action_egdev *egdev,
-			   tc_setup_cb_t *cb, void *cb_priv)
-{
-	struct tcf_action_egdev_cb *egdev_cb;
-
-	list_for_each_entry(egdev_cb, &egdev->cb_list, list)
-		if (egdev_cb->cb == cb && egdev_cb->cb_priv == cb_priv)
-			return egdev_cb;
-	return NULL;
-}
-
-static int tcf_action_egdev_cb_call(struct tcf_action_egdev *egdev,
-				    enum tc_setup_type type,
-				    void *type_data, bool err_stop)
-{
-	struct tcf_action_egdev_cb *egdev_cb;
-	int ok_count = 0;
-	int err;
-
-	list_for_each_entry(egdev_cb, &egdev->cb_list, list) {
-		err = egdev_cb->cb(type, type_data, egdev_cb->cb_priv);
-		if (err) {
-			if (err_stop)
-				return err;
-		} else {
-			ok_count++;
-		}
-	}
-	return ok_count;
-}
-
-static int tcf_action_egdev_cb_add(struct tcf_action_egdev *egdev,
-				   tc_setup_cb_t *cb, void *cb_priv)
-{
-	struct tcf_action_egdev_cb *egdev_cb;
-
-	egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
-	if (WARN_ON(egdev_cb))
-		return -EEXIST;
-	egdev_cb = kzalloc(sizeof(*egdev_cb), GFP_KERNEL);
-	if (!egdev_cb)
-		return -ENOMEM;
-	egdev_cb->cb = cb;
-	egdev_cb->cb_priv = cb_priv;
-	list_add(&egdev_cb->list, &egdev->cb_list);
-	return 0;
-}
-
-static void tcf_action_egdev_cb_del(struct tcf_action_egdev *egdev,
-				    tc_setup_cb_t *cb, void *cb_priv)
-{
-	struct tcf_action_egdev_cb *egdev_cb;
-
-	egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
-	if (WARN_ON(!egdev_cb))
-		return;
-	list_del(&egdev_cb->list);
-	kfree(egdev_cb);
-}
-
-static int __tc_setup_cb_egdev_register(const struct net_device *dev,
-					tc_setup_cb_t *cb, void *cb_priv)
-{
-	struct tcf_action_egdev *egdev = tcf_action_egdev_get(dev);
-	int err;
-
-	if (!egdev)
-		return -ENOMEM;
-	err = tcf_action_egdev_cb_add(egdev, cb, cb_priv);
-	if (err)
-		goto err_cb_add;
-	return 0;
-
-err_cb_add:
-	tcf_action_egdev_put(egdev);
-	return err;
-}
-int tc_setup_cb_egdev_register(const struct net_device *dev,
-			       tc_setup_cb_t *cb, void *cb_priv)
-{
-	int err;
-
-	rtnl_lock();
-	err = __tc_setup_cb_egdev_register(dev, cb, cb_priv);
-	rtnl_unlock();
-	return err;
-}
-EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_register);
-
-static void __tc_setup_cb_egdev_unregister(const struct net_device *dev,
-					   tc_setup_cb_t *cb, void *cb_priv)
-{
-	struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
-
-	if (WARN_ON(!egdev))
-		return;
-	tcf_action_egdev_cb_del(egdev, cb, cb_priv);
-	tcf_action_egdev_put(egdev);
-}
-void tc_setup_cb_egdev_unregister(const struct net_device *dev,
-				  tc_setup_cb_t *cb, void *cb_priv)
-{
-	rtnl_lock();
-	__tc_setup_cb_egdev_unregister(dev, cb, cb_priv);
-	rtnl_unlock();
-}
-EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_unregister);
-
-int tc_setup_cb_egdev_call(const struct net_device *dev,
-			   enum tc_setup_type type, void *type_data,
-			   bool err_stop)
-{
-	struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
-
-	if (!egdev)
-		return 0;
-	return tcf_action_egdev_cb_call(egdev, type, type_data, err_stop);
-}
-EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_call);
-
-static __net_init int tcf_action_net_init(struct net *net)
-{
-	struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
-
-	return rhashtable_init(&tan->egdev_ht, &tcf_action_egdev_ht_params);
-}
-
-static void __net_exit tcf_action_net_exit(struct net *net)
-{
-	struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
-
-	rhashtable_destroy(&tan->egdev_ht);
-}
-
-static struct pernet_operations tcf_action_net_ops = {
-	.init = tcf_action_net_init,
-	.exit = tcf_action_net_exit,
-	.id = &tcf_action_net_id,
-	.size = sizeof(struct tcf_action_net),
-};
-
 static int __init tc_action_init(void)
 {
-	int err;
-
-	err = register_pernet_subsys(&tcf_action_net_ops);
-	if (err)
-		return err;
-
 	rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action,

diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 0c68bc9..04b7bd4 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -17,6 +13,7 @@
 
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
 
 #include <linux/tc_act/tc_bpf.h>
 #include <net/tc_act/tc_bpf.h>
@@ -278,20 +275,23 @@
 static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **act,
 			int replace, int bind, bool rtnl_held,
-			struct netlink_ext_ack *extack)
+			struct tcf_proto *tp, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 	struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
+	struct tcf_chain *goto_ch = NULL;
 	struct tcf_bpf_cfg cfg, old;
 	struct tc_act_bpf *parm;
 	struct tcf_bpf *prog;
 	bool is_bpf, is_ebpf;
 	int ret, res = 0;
+	u32 index;
 
 	if (!nla)
 		return -EINVAL;
 
-	ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy, NULL);
+	ret = nla_parse_nested_deprecated(tb, TCA_ACT_BPF_MAX, nla,
+					  act_bpf_policy, NULL);
 	if (ret < 0)
 		return ret;
 
@@ -299,13 +299,13 @@
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
-
-	ret = tcf_idr_check_alloc(tn, &parm->index, act, bind);
+	index = parm->index;
+	ret = tcf_idr_check_alloc(tn, &index, act, bind);
 	if (!ret) {
-		ret = tcf_idr_create(tn, parm->index, est, act,
+		ret = tcf_idr_create(tn, index, est, act,
 				     &act_bpf_ops, bind, true);
 		if (ret < 0) {
-			tcf_idr_cleanup(tn, parm->index);
+			tcf_idr_cleanup(tn, index);
 			return ret;
 		}
 
@@ -323,12 +323,16 @@
 		return ret;
 	}
 
+	ret = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+	if (ret < 0)
+		goto release_idr;
+
 	is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
 	is_ebpf = tb[TCA_ACT_BPF_FD];
 
 	if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) {
 		ret = -EINVAL;
-		goto out;
+		goto put_chain;
 	}
 
 	memset(&cfg, 0, sizeof(cfg));
@@ -336,7 +340,7 @@
 	ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) :
 		       tcf_bpf_init_from_efd(tb, &cfg);
 	if (ret < 0)
-		goto out;
+		goto put_chain;
 
 	prog = to_bpf(*act);
 
@@ -350,10 +354,13 @@
 	if (cfg.bpf_num_ops)
 		prog->bpf_num_ops = cfg.bpf_num_ops;
 
-	prog->tcf_action = parm->action;
+	goto_ch = tcf_action_set_ctrlact(*act, parm->action, goto_ch);
 	rcu_assign_pointer(prog->filter, cfg.filter);
 	spin_unlock_bh(&prog->tcf_lock);
 
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+
 	if (res == ACT_P_CREATED) {
 		tcf_idr_insert(tn, *act);
 	} else {
@@ -363,9 +370,13 @@
 	}
 
 	return res;
-out:
-	tcf_idr_release(*act, bind);
 
+put_chain:
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+
+release_idr:
+	tcf_idr_release(*act, bind);
 	return ret;
 }
 
@@ -387,8 +398,7 @@
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index,
-			  struct netlink_ext_ack *extack)
+static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 
@@ -397,7 +407,7 @@
 
 static struct tc_action_ops act_bpf_ops __read_mostly = {
 	.kind		=	"bpf",
-	.type		=	TCA_ACT_BPF,
+	.id		=	TCA_ID_BPF,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_bpf_act,
 	.dump		=	tcf_bpf_dump,
@@ -412,7 +422,7 @@
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 
-	return tc_action_net_init(tn, &act_bpf_ops);
+	return tc_action_net_init(net, tn, &act_bpf_ops);
 }
 
 static void __net_exit bpf_exit_net(struct list_head *net_list)

diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 6f0f273..2b43cac 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/act_connmark.c  netfilter connmark retriever action
  * skb mark is over-written
  *
  * Copyright (c) 2011 Felix Fietkau <nbd@openwrt.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
 */
 
 #include <linux/module.h>
@@ -21,6 +17,7 @@
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/act_api.h>
+#include <net/pkt_cls.h>
 #include <uapi/linux/tc_act/tc_connmark.h>
 #include <net/tc_act/tc_connmark.h>
 
@@ -97,19 +94,22 @@
 static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 			     struct nlattr *est, struct tc_action **a,
 			     int ovr, int bind, bool rtnl_held,
+			     struct tcf_proto *tp,
 			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 	struct nlattr *tb[TCA_CONNMARK_MAX + 1];
+	struct tcf_chain *goto_ch = NULL;
 	struct tcf_connmark_info *ci;
 	struct tc_connmark *parm;
-	int ret = 0;
+	int ret = 0, err;
+	u32 index;
 
 	if (!nla)
 		return -EINVAL;
 
-	ret = nla_parse_nested(tb, TCA_CONNMARK_MAX, nla, connmark_policy,
-			       NULL);
+	ret = nla_parse_nested_deprecated(tb, TCA_CONNMARK_MAX, nla,
+					  connmark_policy, NULL);
 	if (ret < 0)
 		return ret;
 
@@ -117,18 +117,22 @@
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_CONNMARK_PARMS]);
-
-	ret = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	index = parm->index;
+	ret = tcf_idr_check_alloc(tn, &index, a, bind);
 	if (!ret) {
-		ret = tcf_idr_create(tn, parm->index, est, a,
+		ret = tcf_idr_create(tn, index, est, a,
 				     &act_connmark_ops, bind, false);
 		if (ret) {
-			tcf_idr_cleanup(tn, parm->index);
+			tcf_idr_cleanup(tn, index);
 			return ret;
 		}
 
 		ci = to_connmark(*a);
-		ci->tcf_action = parm->action;
+		err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch,
+					       extack);
+		if (err < 0)
+			goto release_idr;
+		tcf_action_set_ctrlact(*a, parm->action, goto_ch);
 		ci->net = net;
 		ci->zone = parm->zone;
 
@@ -142,13 +146,24 @@
 			tcf_idr_release(*a, bind);
 			return -EEXIST;
 		}
+		err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch,
+					       extack);
+		if (err < 0)
+			goto release_idr;
 		/* replacing action and zone */
-		ci->tcf_action = parm->action;
+		spin_lock_bh(&ci->tcf_lock);
+		goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
 		ci->zone = parm->zone;
+		spin_unlock_bh(&ci->tcf_lock);
+		if (goto_ch)
+			tcf_chain_put_by_act(goto_ch);
 		ret = 0;
 	}
 
 	return ret;
+release_idr:
+	tcf_idr_release(*a, bind);
+	return err;
 }
 
 static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
@@ -156,16 +171,16 @@
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_connmark_info *ci = to_connmark(a);
-
 	struct tc_connmark opt = {
 		.index   = ci->tcf_index,
 		.refcnt  = refcount_read(&ci->tcf_refcnt) - ref,
 		.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind,
-		.action  = ci->tcf_action,
-		.zone   = ci->zone,
 	};
 	struct tcf_t t;
 
+	spin_lock_bh(&ci->tcf_lock);
+	opt.action = ci->tcf_action;
+	opt.zone = ci->zone;
 	if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
 
@@ -173,9 +188,12 @@
 	if (nla_put_64bit(skb, TCA_CONNMARK_TM, sizeof(t), &t,
 			  TCA_CONNMARK_PAD))
 		goto nla_put_failure;
+	spin_unlock_bh(&ci->tcf_lock);
 
 	return skb->len;
+
 nla_put_failure:
+	spin_unlock_bh(&ci->tcf_lock);
 	nlmsg_trim(skb, b);
 	return -1;
 }
@@ -190,8 +208,7 @@
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index,
-			       struct netlink_ext_ack *extack)
+static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 
@@ -200,7 +217,7 @@
 
 static struct tc_action_ops act_connmark_ops = {
 	.kind		=	"connmark",
-	.type		=	TCA_ACT_CONNMARK,
+	.id		=	TCA_ID_CONNMARK,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_connmark_act,
 	.dump		=	tcf_connmark_dump,
@@ -214,7 +231,7 @@
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 
-	return tc_action_net_init(tn, &act_connmark_ops);
+	return tc_action_net_init(net, tn, &act_connmark_ops);
 }
 
 static void __net_exit connmark_exit_net(struct list_head *net_list)

diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index b8a67ae..d3cfad8 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c

@@ -1,13 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Checksum updating actions
  *
  * Copyright (c) 2010 Gregoire Baron <baronchon@n7mm.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
  */
 
 #include <linux/types.h>
@@ -33,6 +28,7 @@
 #include <net/sctp/checksum.h>
 
 #include <net/act_api.h>
+#include <net/pkt_cls.h>
 
 #include <linux/tc_act/tc_csum.h>
 #include <net/tc_act/tc_csum.h>
@@ -46,33 +42,36 @@
 
 static int tcf_csum_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a, int ovr,
-			 int bind, bool rtnl_held,
+			 int bind, bool rtnl_held, struct tcf_proto *tp,
 			 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 	struct tcf_csum_params *params_new;
 	struct nlattr *tb[TCA_CSUM_MAX + 1];
+	struct tcf_chain *goto_ch = NULL;
 	struct tc_csum *parm;
 	struct tcf_csum *p;
 	int ret = 0, err;
+	u32 index;
 
 	if (nla == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_CSUM_MAX, nla, csum_policy,
+					  NULL);
 	if (err < 0)
 		return err;
 
 	if (tb[TCA_CSUM_PARMS] == NULL)
 		return -EINVAL;
 	parm = nla_data(tb[TCA_CSUM_PARMS]);
-
-	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	index = parm->index;
+	err = tcf_idr_check_alloc(tn, &index, a, bind);
 	if (!err) {
-		ret = tcf_idr_create(tn, parm->index, est, a,
+		ret = tcf_idr_create(tn, index, est, a,
 				     &act_csum_ops, bind, true);
 		if (ret) {
-			tcf_idr_cleanup(tn, parm->index);
+			tcf_idr_cleanup(tn, index);
 			return ret;
 		}
 		ret = ACT_P_CREATED;
@@ -87,21 +86,27 @@
 		return err;
 	}
 
+	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+	if (err < 0)
+		goto release_idr;
+
 	p = to_tcf_csum(*a);
 
 	params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
 	if (unlikely(!params_new)) {
-		tcf_idr_release(*a, bind);
-		return -ENOMEM;
+		err = -ENOMEM;
+		goto put_chain;
 	}
 	params_new->update_flags = parm->update_flags;
 
 	spin_lock_bh(&p->tcf_lock);
-	p->tcf_action = parm->action;
+	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
 	rcu_swap_protected(p->params, params_new,
 			   lockdep_is_held(&p->tcf_lock));
 	spin_unlock_bh(&p->tcf_lock);
 
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
 	if (params_new)
 		kfree_rcu(params_new, rcu);
 
@@ -109,6 +114,12 @@
 		tcf_idr_insert(tn, *a);
 
 	return ret;
+put_chain:
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+release_idr:
+	tcf_idr_release(*a, bind);
+	return err;
 }
 
 /**
@@ -559,8 +570,11 @@
 			struct tcf_result *res)
 {
 	struct tcf_csum *p = to_tcf_csum(a);
+	bool orig_vlan_tag_present = false;
+	unsigned int vlan_hdr_count = 0;
 	struct tcf_csum_params *params;
 	u32 update_flags;
+	__be16 protocol;
 	int action;
 
 	params = rcu_dereference_bh(p->params);
@@ -573,7 +587,9 @@
 		goto drop;
 
 	update_flags = params->update_flags;
-	switch (tc_skb_protocol(skb)) {
+	protocol = tc_skb_protocol(skb);
+again:
+	switch (protocol) {
 	case cpu_to_be16(ETH_P_IP):
 		if (!tcf_csum_ipv4(skb, update_flags))
 			goto drop;
@@ -582,13 +598,35 @@
 		if (!tcf_csum_ipv6(skb, update_flags))
 			goto drop;
 		break;
+	case cpu_to_be16(ETH_P_8021AD): /* fall through */
+	case cpu_to_be16(ETH_P_8021Q):
+		if (skb_vlan_tag_present(skb) && !orig_vlan_tag_present) {
+			protocol = skb->protocol;
+			orig_vlan_tag_present = true;
+		} else {
+			struct vlan_hdr *vlan = (struct vlan_hdr *)skb->data;
+
+			protocol = vlan->h_vlan_encapsulated_proto;
+			skb_pull(skb, VLAN_HLEN);
+			skb_reset_network_header(skb);
+			vlan_hdr_count++;
+		}
+		goto again;
+	}
+
+out:
+	/* Restore the skb for the pulled VLAN tags */
+	while (vlan_hdr_count--) {
+		skb_push(skb, VLAN_HLEN);
+		skb_reset_network_header(skb);
 	}
 
 	return action;
 
 drop:
 	qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats));
-	return TC_ACT_SHOT;
+	action = TC_ACT_SHOT;
+	goto out;
 }
 
 static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
@@ -646,8 +684,7 @@
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index,
-			   struct netlink_ext_ack *extack)
+static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 
@@ -661,7 +698,7 @@
 
 static struct tc_action_ops act_csum_ops = {
 	.kind		= "csum",
-	.type		= TCA_ACT_CSUM,
+	.id		= TCA_ID_CSUM,
 	.owner		= THIS_MODULE,
 	.act		= tcf_csum_act,
 	.dump		= tcf_csum_dump,
@@ -677,7 +714,7 @@
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 
-	return tc_action_net_init(tn, &act_csum_ops);
+	return tc_action_net_init(net, tn, &act_csum_ops);
 }
 
 static void __net_exit csum_exit_net(struct list_head *net_list)

diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
new file mode 100644
index 0000000..fcc4602
--- /dev/null
+++ b/net/sched/act_ct.c

@@ -0,0 +1,985 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* -
+ * net/sched/act_ct.c  Connection Tracking action
+ *
+ * Authors:   Paul Blakey <paulb@mellanox.com>
+ *            Yossi Kuperman <yossiku@mellanox.com>
+ *            Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/pkt_cls.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+#include <net/act_api.h>
+#include <net/ip.h>
+#include <net/ipv6_frag.h>
+#include <uapi/linux/tc_act/tc_ct.h>
+#include <net/tc_act/tc_ct.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include <uapi/linux/netfilter/nf_nat.h>
+
+static struct tc_action_ops act_ct_ops;
+static unsigned int ct_net_id;
+
+struct tc_ct_action_net {
+	struct tc_action_net tn; /* Must be first */
+	bool labels;
+};
+
+/* Determine whether skb->_nfct is equal to the result of conntrack lookup. */
+static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb,
+				   u16 zone_id, bool force)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct)
+		return false;
+	if (!net_eq(net, read_pnet(&ct->ct_net)))
+		return false;
+	if (nf_ct_zone(ct)->id != zone_id)
+		return false;
+
+	/* Force conntrack entry direction. */
+	if (force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
+		if (nf_ct_is_confirmed(ct))
+			nf_ct_kill(ct);
+
+		nf_conntrack_put(&ct->ct_general);
+		nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+
+		return false;
+	}
+
+	return true;
+}
+
+/* Trim the skb to the length specified by the IP/IPv6 header,
+ * removing any trailing lower-layer padding. This prepares the skb
+ * for higher-layer processing that assumes skb->len excludes padding
+ * (such as nf_ip_checksum). The caller needs to pull the skb to the
+ * network header, and ensure ip_hdr/ipv6_hdr points to valid data.
+ */
+static int tcf_ct_skb_network_trim(struct sk_buff *skb, int family)
+{
+	unsigned int len;
+	int err;
+
+	switch (family) {
+	case NFPROTO_IPV4:
+		len = ntohs(ip_hdr(skb)->tot_len);
+		break;
+	case NFPROTO_IPV6:
+		len = sizeof(struct ipv6hdr)
+			+ ntohs(ipv6_hdr(skb)->payload_len);
+		break;
+	default:
+		len = skb->len;
+	}
+
+	err = pskb_trim_rcsum(skb, len);
+
+	return err;
+}
+
+static u8 tcf_ct_skb_nf_family(struct sk_buff *skb)
+{
+	u8 family = NFPROTO_UNSPEC;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		family = NFPROTO_IPV4;
+		break;
+	case htons(ETH_P_IPV6):
+		family = NFPROTO_IPV6;
+		break;
+	default:
+		break;
+	}
+
+	return family;
+}
+
+static int tcf_ct_ipv4_is_fragment(struct sk_buff *skb, bool *frag)
+{
+	unsigned int len;
+
+	len =  skb_network_offset(skb) + sizeof(struct iphdr);
+	if (unlikely(skb->len < len))
+		return -EINVAL;
+	if (unlikely(!pskb_may_pull(skb, len)))
+		return -ENOMEM;
+
+	*frag = ip_is_fragment(ip_hdr(skb));
+	return 0;
+}
+
+static int tcf_ct_ipv6_is_fragment(struct sk_buff *skb, bool *frag)
+{
+	unsigned int flags = 0, len, payload_ofs = 0;
+	unsigned short frag_off;
+	int nexthdr;
+
+	len =  skb_network_offset(skb) + sizeof(struct ipv6hdr);
+	if (unlikely(skb->len < len))
+		return -EINVAL;
+	if (unlikely(!pskb_may_pull(skb, len)))
+		return -ENOMEM;
+
+	nexthdr = ipv6_find_hdr(skb, &payload_ofs, -1, &frag_off, &flags);
+	if (unlikely(nexthdr < 0))
+		return -EPROTO;
+
+	*frag = flags & IP6_FH_F_FRAG;
+	return 0;
+}
+
+static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
+				   u8 family, u16 zone)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+	int err = 0;
+	bool frag;
+
+	/* Previously seen (loopback)? Ignore. */
+	ct = nf_ct_get(skb, &ctinfo);
+	if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED)
+		return 0;
+
+	if (family == NFPROTO_IPV4)
+		err = tcf_ct_ipv4_is_fragment(skb, &frag);
+	else
+		err = tcf_ct_ipv6_is_fragment(skb, &frag);
+	if (err || !frag)
+		return err;
+
+	skb_get(skb);
+
+	if (family == NFPROTO_IPV4) {
+		enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
+
+		memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
+		local_bh_disable();
+		err = ip_defrag(net, skb, user);
+		local_bh_enable();
+		if (err && err != -EINPROGRESS)
+			goto out_free;
+	} else { /* NFPROTO_IPV6 */
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+		enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
+
+		memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
+		err = nf_ct_frag6_gather(net, skb, user);
+		if (err && err != -EINPROGRESS)
+			goto out_free;
+#else
+		err = -EOPNOTSUPP;
+		goto out_free;
+#endif
+	}
+
+	skb_clear_hash(skb);
+	skb->ignore_df = 1;
+	return err;
+
+out_free:
+	kfree_skb(skb);
+	return err;
+}
+
+static void tcf_ct_params_free(struct rcu_head *head)
+{
+	struct tcf_ct_params *params = container_of(head,
+						    struct tcf_ct_params, rcu);
+
+	if (params->tmpl)
+		nf_conntrack_put(&params->tmpl->ct_general);
+	kfree(params);
+}
+
+#if IS_ENABLED(CONFIG_NF_NAT)
+/* Modelled after nf_nat_ipv[46]_fn().
+ * range is only used for new, uninitialized NAT state.
+ * Returns either NF_ACCEPT or NF_DROP.
+ */
+static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
+			  enum ip_conntrack_info ctinfo,
+			  const struct nf_nat_range2 *range,
+			  enum nf_nat_manip_type maniptype)
+{
+	int hooknum, err = NF_ACCEPT;
+
+	/* See HOOK2MANIP(). */
+	if (maniptype == NF_NAT_MANIP_SRC)
+		hooknum = NF_INET_LOCAL_IN; /* Source NAT */
+	else
+		hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */
+
+	switch (ctinfo) {
+	case IP_CT_RELATED:
+	case IP_CT_RELATED_REPLY:
+		if (skb->protocol == htons(ETH_P_IP) &&
+		    ip_hdr(skb)->protocol == IPPROTO_ICMP) {
+			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
+							   hooknum))
+				err = NF_DROP;
+			goto out;
+		} else if (IS_ENABLED(CONFIG_IPV6) &&
+			   skb->protocol == htons(ETH_P_IPV6)) {
+			__be16 frag_off;
+			u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+			int hdrlen = ipv6_skip_exthdr(skb,
+						      sizeof(struct ipv6hdr),
+						      &nexthdr, &frag_off);
+
+			if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
+				if (!nf_nat_icmpv6_reply_translation(skb, ct,
+								     ctinfo,
+								     hooknum,
+								     hdrlen))
+					err = NF_DROP;
+				goto out;
+			}
+		}
+		/* Non-ICMP, fall thru to initialize if needed. */
+		/* fall through */
+	case IP_CT_NEW:
+		/* Seen it before?  This can happen for loopback, retrans,
+		 * or local packets.
+		 */
+		if (!nf_nat_initialized(ct, maniptype)) {
+			/* Initialize according to the NAT action. */
+			err = (range && range->flags & NF_NAT_RANGE_MAP_IPS)
+				/* Action is set up to establish a new
+				 * mapping.
+				 */
+				? nf_nat_setup_info(ct, range, maniptype)
+				: nf_nat_alloc_null_binding(ct, hooknum);
+			if (err != NF_ACCEPT)
+				goto out;
+		}
+		break;
+
+	case IP_CT_ESTABLISHED:
+	case IP_CT_ESTABLISHED_REPLY:
+		break;
+
+	default:
+		err = NF_DROP;
+		goto out;
+	}
+
+	err = nf_nat_packet(ct, ctinfo, hooknum, skb);
+out:
+	return err;
+}
+#endif /* CONFIG_NF_NAT */
+
+static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
+	u32 new_mark;
+
+	if (!mask)
+		return;
+
+	new_mark = mark | (ct->mark & ~(mask));
+	if (ct->mark != new_mark) {
+		ct->mark = new_mark;
+		if (nf_ct_is_confirmed(ct))
+			nf_conntrack_event_cache(IPCT_MARK, ct);
+	}
+#endif
+}
+
+static void tcf_ct_act_set_labels(struct nf_conn *ct,
+				  u32 *labels,
+				  u32 *labels_m)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)
+	size_t labels_sz = FIELD_SIZEOF(struct tcf_ct_params, labels);
+
+	if (!memchr_inv(labels_m, 0, labels_sz))
+		return;
+
+	nf_connlabels_replace(ct, labels, labels_m, 4);
+#endif
+}
+
+static int tcf_ct_act_nat(struct sk_buff *skb,
+			  struct nf_conn *ct,
+			  enum ip_conntrack_info ctinfo,
+			  int ct_action,
+			  struct nf_nat_range2 *range,
+			  bool commit)
+{
+#if IS_ENABLED(CONFIG_NF_NAT)
+	enum nf_nat_manip_type maniptype;
+
+	if (!(ct_action & TCA_CT_ACT_NAT))
+		return NF_ACCEPT;
+
+	/* Add NAT extension if not confirmed yet. */
+	if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct))
+		return NF_DROP;   /* Can't NAT. */
+
+	if (ctinfo != IP_CT_NEW && (ct->status & IPS_NAT_MASK) &&
+	    (ctinfo != IP_CT_RELATED || commit)) {
+		/* NAT an established or related connection like before. */
+		if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY)
+			/* This is the REPLY direction for a connection
+			 * for which NAT was applied in the forward
+			 * direction.  Do the reverse NAT.
+			 */
+			maniptype = ct->status & IPS_SRC_NAT
+				? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC;
+		else
+			maniptype = ct->status & IPS_SRC_NAT
+				? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST;
+	} else if (ct_action & TCA_CT_ACT_NAT_SRC) {
+		maniptype = NF_NAT_MANIP_SRC;
+	} else if (ct_action & TCA_CT_ACT_NAT_DST) {
+		maniptype = NF_NAT_MANIP_DST;
+	} else {
+		return NF_ACCEPT;
+	}
+
+	return ct_nat_execute(skb, ct, ctinfo, range, maniptype);
+#else
+	return NF_ACCEPT;
+#endif
+}
+
+static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
+		      struct tcf_result *res)
+{
+	struct net *net = dev_net(skb->dev);
+	bool cached, commit, clear, force;
+	enum ip_conntrack_info ctinfo;
+	struct tcf_ct *c = to_ct(a);
+	struct nf_conn *tmpl = NULL;
+	struct nf_hook_state state;
+	int nh_ofs, err, retval;
+	struct tcf_ct_params *p;
+	struct nf_conn *ct;
+	u8 family;
+
+	p = rcu_dereference_bh(c->params);
+
+	retval = READ_ONCE(c->tcf_action);
+	commit = p->ct_action & TCA_CT_ACT_COMMIT;
+	clear = p->ct_action & TCA_CT_ACT_CLEAR;
+	force = p->ct_action & TCA_CT_ACT_FORCE;
+	tmpl = p->tmpl;
+
+	if (clear) {
+		ct = nf_ct_get(skb, &ctinfo);
+		if (ct) {
+			nf_conntrack_put(&ct->ct_general);
+			nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+		}
+
+		goto out;
+	}
+
+	family = tcf_ct_skb_nf_family(skb);
+	if (family == NFPROTO_UNSPEC)
+		goto drop;
+
+	/* The conntrack module expects to be working at L3.
+	 * We also try to pull the IPv4/6 header to linear area
+	 */
+	nh_ofs = skb_network_offset(skb);
+	skb_pull_rcsum(skb, nh_ofs);
+	err = tcf_ct_handle_fragments(net, skb, family, p->zone);
+	if (err == -EINPROGRESS) {
+		retval = TC_ACT_STOLEN;
+		goto out;
+	}
+	if (err)
+		goto drop;
+
+	err = tcf_ct_skb_network_trim(skb, family);
+	if (err)
+		goto drop;
+
+	/* If we are recirculating packets to match on ct fields and
+	 * committing with a separate ct action, then we don't need to
+	 * actually run the packet through conntrack twice unless it's for a
+	 * different zone.
+	 */
+	cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force);
+	if (!cached) {
+		/* Associate skb with specified zone. */
+		if (tmpl) {
+			ct = nf_ct_get(skb, &ctinfo);
+			if (skb_nfct(skb))
+				nf_conntrack_put(skb_nfct(skb));
+			nf_conntrack_get(&tmpl->ct_general);
+			nf_ct_set(skb, tmpl, IP_CT_NEW);
+		}
+
+		state.hook = NF_INET_PRE_ROUTING;
+		state.net = net;
+		state.pf = family;
+		err = nf_conntrack_in(skb, &state);
+		if (err != NF_ACCEPT)
+			goto out_push;
+	}
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct)
+		goto out_push;
+	nf_ct_deliver_cached_events(ct);
+
+	err = tcf_ct_act_nat(skb, ct, ctinfo, p->ct_action, &p->range, commit);
+	if (err != NF_ACCEPT)
+		goto drop;
+
+	if (commit) {
+		tcf_ct_act_set_mark(ct, p->mark, p->mark_mask);
+		tcf_ct_act_set_labels(ct, p->labels, p->labels_mask);
+
+		/* This will take care of sending queued events
+		 * even if the connection is already confirmed.
+		 */
+		nf_conntrack_confirm(skb);
+	}
+
+out_push:
+	skb_push_rcsum(skb, nh_ofs);
+
+out:
+	bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb);
+	return retval;
+
+drop:
+	qstats_drop_inc(this_cpu_ptr(a->cpu_qstats));
+	return TC_ACT_SHOT;
+}
+
+static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = {
+	[TCA_CT_UNSPEC] = { .strict_start_type = TCA_CT_UNSPEC + 1 },
+	[TCA_CT_ACTION] = { .type = NLA_U16 },
+	[TCA_CT_PARMS] = { .type = NLA_EXACT_LEN, .len = sizeof(struct tc_ct) },
+	[TCA_CT_ZONE] = { .type = NLA_U16 },
+	[TCA_CT_MARK] = { .type = NLA_U32 },
+	[TCA_CT_MARK_MASK] = { .type = NLA_U32 },
+	[TCA_CT_LABELS] = { .type = NLA_BINARY,
+			    .len = 128 / BITS_PER_BYTE },
+	[TCA_CT_LABELS_MASK] = { .type = NLA_BINARY,
+				 .len = 128 / BITS_PER_BYTE },
+	[TCA_CT_NAT_IPV4_MIN] = { .type = NLA_U32 },
+	[TCA_CT_NAT_IPV4_MAX] = { .type = NLA_U32 },
+	[TCA_CT_NAT_IPV6_MIN] = { .type = NLA_EXACT_LEN,
+				  .len = sizeof(struct in6_addr) },
+	[TCA_CT_NAT_IPV6_MAX] = { .type = NLA_EXACT_LEN,
+				   .len = sizeof(struct in6_addr) },
+	[TCA_CT_NAT_PORT_MIN] = { .type = NLA_U16 },
+	[TCA_CT_NAT_PORT_MAX] = { .type = NLA_U16 },
+};
+
+static int tcf_ct_fill_params_nat(struct tcf_ct_params *p,
+				  struct tc_ct *parm,
+				  struct nlattr **tb,
+				  struct netlink_ext_ack *extack)
+{
+	struct nf_nat_range2 *range;
+
+	if (!(p->ct_action & TCA_CT_ACT_NAT))
+		return 0;
+
+	if (!IS_ENABLED(CONFIG_NF_NAT)) {
+		NL_SET_ERR_MSG_MOD(extack, "Netfilter nat isn't enabled in kernel");
+		return -EOPNOTSUPP;
+	}
+
+	if (!(p->ct_action & (TCA_CT_ACT_NAT_SRC | TCA_CT_ACT_NAT_DST)))
+		return 0;
+
+	if ((p->ct_action & TCA_CT_ACT_NAT_SRC) &&
+	    (p->ct_action & TCA_CT_ACT_NAT_DST)) {
+		NL_SET_ERR_MSG_MOD(extack, "dnat and snat can't be enabled at the same time");
+		return -EOPNOTSUPP;
+	}
+
+	range = &p->range;
+	if (tb[TCA_CT_NAT_IPV4_MIN]) {
+		struct nlattr *max_attr = tb[TCA_CT_NAT_IPV4_MAX];
+
+		p->ipv4_range = true;
+		range->flags |= NF_NAT_RANGE_MAP_IPS;
+		range->min_addr.ip =
+			nla_get_in_addr(tb[TCA_CT_NAT_IPV4_MIN]);
+
+		range->max_addr.ip = max_attr ?
+				     nla_get_in_addr(max_attr) :
+				     range->min_addr.ip;
+	} else if (tb[TCA_CT_NAT_IPV6_MIN]) {
+		struct nlattr *max_attr = tb[TCA_CT_NAT_IPV6_MAX];
+
+		p->ipv4_range = false;
+		range->flags |= NF_NAT_RANGE_MAP_IPS;
+		range->min_addr.in6 =
+			nla_get_in6_addr(tb[TCA_CT_NAT_IPV6_MIN]);
+
+		range->max_addr.in6 = max_attr ?
+				      nla_get_in6_addr(max_attr) :
+				      range->min_addr.in6;
+	}
+
+	if (tb[TCA_CT_NAT_PORT_MIN]) {
+		range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+		range->min_proto.all = nla_get_be16(tb[TCA_CT_NAT_PORT_MIN]);
+
+		range->max_proto.all = tb[TCA_CT_NAT_PORT_MAX] ?
+				       nla_get_be16(tb[TCA_CT_NAT_PORT_MAX]) :
+				       range->min_proto.all;
+	}
+
+	return 0;
+}
+
+static void tcf_ct_set_key_val(struct nlattr **tb,
+			       void *val, int val_type,
+			       void *mask, int mask_type,
+			       int len)
+{
+	if (!tb[val_type])
+		return;
+	nla_memcpy(val, tb[val_type], len);
+
+	if (!mask)
+		return;
+
+	if (mask_type == TCA_CT_UNSPEC || !tb[mask_type])
+		memset(mask, 0xff, len);
+	else
+		nla_memcpy(mask, tb[mask_type], len);
+}
+
+static int tcf_ct_fill_params(struct net *net,
+			      struct tcf_ct_params *p,
+			      struct tc_ct *parm,
+			      struct nlattr **tb,
+			      struct netlink_ext_ack *extack)
+{
+	struct tc_ct_action_net *tn = net_generic(net, ct_net_id);
+	struct nf_conntrack_zone zone;
+	struct nf_conn *tmpl;
+	int err;
+
+	p->zone = NF_CT_DEFAULT_ZONE_ID;
+
+	tcf_ct_set_key_val(tb,
+			   &p->ct_action, TCA_CT_ACTION,
+			   NULL, TCA_CT_UNSPEC,
+			   sizeof(p->ct_action));
+
+	if (p->ct_action & TCA_CT_ACT_CLEAR)
+		return 0;
+
+	err = tcf_ct_fill_params_nat(p, parm, tb, extack);
+	if (err)
+		return err;
+
+	if (tb[TCA_CT_MARK]) {
+		if (!IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)) {
+			NL_SET_ERR_MSG_MOD(extack, "Conntrack mark isn't enabled.");
+			return -EOPNOTSUPP;
+		}
+		tcf_ct_set_key_val(tb,
+				   &p->mark, TCA_CT_MARK,
+				   &p->mark_mask, TCA_CT_MARK_MASK,
+				   sizeof(p->mark));
+	}
+
+	if (tb[TCA_CT_LABELS]) {
+		if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)) {
+			NL_SET_ERR_MSG_MOD(extack, "Conntrack labels isn't enabled.");
+			return -EOPNOTSUPP;
+		}
+
+		if (!tn->labels) {
+			NL_SET_ERR_MSG_MOD(extack, "Failed to set connlabel length");
+			return -EOPNOTSUPP;
+		}
+		tcf_ct_set_key_val(tb,
+				   p->labels, TCA_CT_LABELS,
+				   p->labels_mask, TCA_CT_LABELS_MASK,
+				   sizeof(p->labels));
+	}
+
+	if (tb[TCA_CT_ZONE]) {
+		if (!IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)) {
+			NL_SET_ERR_MSG_MOD(extack, "Conntrack zones isn't enabled.");
+			return -EOPNOTSUPP;
+		}
+
+		tcf_ct_set_key_val(tb,
+				   &p->zone, TCA_CT_ZONE,
+				   NULL, TCA_CT_UNSPEC,
+				   sizeof(p->zone));
+	}
+
+	if (p->zone == NF_CT_DEFAULT_ZONE_ID)
+		return 0;
+
+	nf_ct_zone_init(&zone, p->zone, NF_CT_DEFAULT_ZONE_DIR, 0);
+	tmpl = nf_ct_tmpl_alloc(net, &zone, GFP_KERNEL);
+	if (!tmpl) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to allocate conntrack template");
+		return -ENOMEM;
+	}
+	__set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
+	nf_conntrack_get(&tmpl->ct_general);
+	p->tmpl = tmpl;
+
+	return 0;
+}
+
+static int tcf_ct_init(struct net *net, struct nlattr *nla,
+		       struct nlattr *est, struct tc_action **a,
+		       int replace, int bind, bool rtnl_held,
+		       struct tcf_proto *tp,
+		       struct netlink_ext_ack *extack)
+{
+	struct tc_action_net *tn = net_generic(net, ct_net_id);
+	struct tcf_ct_params *params = NULL;
+	struct nlattr *tb[TCA_CT_MAX + 1];
+	struct tcf_chain *goto_ch = NULL;
+	struct tc_ct *parm;
+	struct tcf_ct *c;
+	int err, res = 0;
+	u32 index;
+
+	if (!nla) {
+		NL_SET_ERR_MSG_MOD(extack, "Ct requires attributes to be passed");
+		return -EINVAL;
+	}
+
+	err = nla_parse_nested(tb, TCA_CT_MAX, nla, ct_policy, extack);
+	if (err < 0)
+		return err;
+
+	if (!tb[TCA_CT_PARMS]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing required ct parameters");
+		return -EINVAL;
+	}
+	parm = nla_data(tb[TCA_CT_PARMS]);
+	index = parm->index;
+	err = tcf_idr_check_alloc(tn, &index, a, bind);
+	if (err < 0)
+		return err;
+
+	if (!err) {
+		err = tcf_idr_create(tn, index, est, a,
+				     &act_ct_ops, bind, true);
+		if (err) {
+			tcf_idr_cleanup(tn, index);
+			return err;
+		}
+		res = ACT_P_CREATED;
+	} else {
+		if (bind)
+			return 0;
+
+		if (!replace) {
+			tcf_idr_release(*a, bind);
+			return -EEXIST;
+		}
+	}
+	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+	if (err < 0)
+		goto cleanup;
+
+	c = to_ct(*a);
+
+	params = kzalloc(sizeof(*params), GFP_KERNEL);
+	if (unlikely(!params)) {
+		err = -ENOMEM;
+		goto cleanup;
+	}
+
+	err = tcf_ct_fill_params(net, params, parm, tb, extack);
+	if (err)
+		goto cleanup;
+
+	spin_lock_bh(&c->tcf_lock);
+	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+	rcu_swap_protected(c->params, params, lockdep_is_held(&c->tcf_lock));
+	spin_unlock_bh(&c->tcf_lock);
+
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+	if (params)
+		kfree_rcu(params, rcu);
+	if (res == ACT_P_CREATED)
+		tcf_idr_insert(tn, *a);
+
+	return res;
+
+cleanup:
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+	kfree(params);
+	tcf_idr_release(*a, bind);
+	return err;
+}
+
+static void tcf_ct_cleanup(struct tc_action *a)
+{
+	struct tcf_ct_params *params;
+	struct tcf_ct *c = to_ct(a);
+
+	params = rcu_dereference_protected(c->params, 1);
+	if (params)
+		call_rcu(&params->rcu, tcf_ct_params_free);
+}
+
+static int tcf_ct_dump_key_val(struct sk_buff *skb,
+			       void *val, int val_type,
+			       void *mask, int mask_type,
+			       int len)
+{
+	int err;
+
+	if (mask && !memchr_inv(mask, 0, len))
+		return 0;
+
+	err = nla_put(skb, val_type, len, val);
+	if (err)
+		return err;
+
+	if (mask_type != TCA_CT_UNSPEC) {
+		err = nla_put(skb, mask_type, len, mask);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int tcf_ct_dump_nat(struct sk_buff *skb, struct tcf_ct_params *p)
+{
+	struct nf_nat_range2 *range = &p->range;
+
+	if (!(p->ct_action & TCA_CT_ACT_NAT))
+		return 0;
+
+	if (!(p->ct_action & (TCA_CT_ACT_NAT_SRC | TCA_CT_ACT_NAT_DST)))
+		return 0;
+
+	if (range->flags & NF_NAT_RANGE_MAP_IPS) {
+		if (p->ipv4_range) {
+			if (nla_put_in_addr(skb, TCA_CT_NAT_IPV4_MIN,
+					    range->min_addr.ip))
+				return -1;
+			if (nla_put_in_addr(skb, TCA_CT_NAT_IPV4_MAX,
+					    range->max_addr.ip))
+				return -1;
+		} else {
+			if (nla_put_in6_addr(skb, TCA_CT_NAT_IPV6_MIN,
+					     &range->min_addr.in6))
+				return -1;
+			if (nla_put_in6_addr(skb, TCA_CT_NAT_IPV6_MAX,
+					     &range->max_addr.in6))
+				return -1;
+		}
+	}
+
+	if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
+		if (nla_put_be16(skb, TCA_CT_NAT_PORT_MIN,
+				 range->min_proto.all))
+			return -1;
+		if (nla_put_be16(skb, TCA_CT_NAT_PORT_MAX,
+				 range->max_proto.all))
+			return -1;
+	}
+
+	return 0;
+}
+
+static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a,
+			      int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_ct *c = to_ct(a);
+	struct tcf_ct_params *p;
+
+	struct tc_ct opt = {
+		.index   = c->tcf_index,
+		.refcnt  = refcount_read(&c->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&c->tcf_bindcnt) - bind,
+	};
+	struct tcf_t t;
+
+	spin_lock_bh(&c->tcf_lock);
+	p = rcu_dereference_protected(c->params,
+				      lockdep_is_held(&c->tcf_lock));
+	opt.action = c->tcf_action;
+
+	if (tcf_ct_dump_key_val(skb,
+				&p->ct_action, TCA_CT_ACTION,
+				NULL, TCA_CT_UNSPEC,
+				sizeof(p->ct_action)))
+		goto nla_put_failure;
+
+	if (p->ct_action & TCA_CT_ACT_CLEAR)
+		goto skip_dump;
+
+	if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
+	    tcf_ct_dump_key_val(skb,
+				&p->mark, TCA_CT_MARK,
+				&p->mark_mask, TCA_CT_MARK_MASK,
+				sizeof(p->mark)))
+		goto nla_put_failure;
+
+	if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
+	    tcf_ct_dump_key_val(skb,
+				p->labels, TCA_CT_LABELS,
+				p->labels_mask, TCA_CT_LABELS_MASK,
+				sizeof(p->labels)))
+		goto nla_put_failure;
+
+	if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
+	    tcf_ct_dump_key_val(skb,
+				&p->zone, TCA_CT_ZONE,
+				NULL, TCA_CT_UNSPEC,
+				sizeof(p->zone)))
+		goto nla_put_failure;
+
+	if (tcf_ct_dump_nat(skb, p))
+		goto nla_put_failure;
+
+skip_dump:
+	if (nla_put(skb, TCA_CT_PARMS, sizeof(opt), &opt))
+		goto nla_put_failure;
+
+	tcf_tm_dump(&t, &c->tcf_tm);
+	if (nla_put_64bit(skb, TCA_CT_TM, sizeof(t), &t, TCA_CT_PAD))
+		goto nla_put_failure;
+	spin_unlock_bh(&c->tcf_lock);
+
+	return skb->len;
+nla_put_failure:
+	spin_unlock_bh(&c->tcf_lock);
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int tcf_ct_walker(struct net *net, struct sk_buff *skb,
+			 struct netlink_callback *cb, int type,
+			 const struct tc_action_ops *ops,
+			 struct netlink_ext_ack *extack)
+{
+	struct tc_action_net *tn = net_generic(net, ct_net_id);
+
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
+}
+
+static int tcf_ct_search(struct net *net, struct tc_action **a, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, ct_net_id);
+
+	return tcf_idr_search(tn, a, index);
+}
+
+static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
+			     u64 lastuse, bool hw)
+{
+	struct tcf_ct *c = to_ct(a);
+
+	_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+
+	if (hw)
+		_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
+				   bytes, packets);
+	c->tcf_tm.lastuse = max_t(u64, c->tcf_tm.lastuse, lastuse);
+}
+
+static struct tc_action_ops act_ct_ops = {
+	.kind		=	"ct",
+	.id		=	TCA_ID_CT,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_ct_act,
+	.dump		=	tcf_ct_dump,
+	.init		=	tcf_ct_init,
+	.cleanup	=	tcf_ct_cleanup,
+	.walk		=	tcf_ct_walker,
+	.lookup		=	tcf_ct_search,
+	.stats_update	=	tcf_stats_update,
+	.size		=	sizeof(struct tcf_ct),
+};
+
+static __net_init int ct_init_net(struct net *net)
+{
+	unsigned int n_bits = FIELD_SIZEOF(struct tcf_ct_params, labels) * 8;
+	struct tc_ct_action_net *tn = net_generic(net, ct_net_id);
+
+	if (nf_connlabels_get(net, n_bits - 1)) {
+		tn->labels = false;
+		pr_err("act_ct: Failed to set connlabels length");
+	} else {
+		tn->labels = true;
+	}
+
+	return tc_action_net_init(net, &tn->tn, &act_ct_ops);
+}
+
+static void __net_exit ct_exit_net(struct list_head *net_list)
+{
+	struct net *net;
+
+	rtnl_lock();
+	list_for_each_entry(net, net_list, exit_list) {
+		struct tc_ct_action_net *tn = net_generic(net, ct_net_id);
+
+		if (tn->labels)
+			nf_connlabels_put(net);
+	}
+	rtnl_unlock();
+
+	tc_action_net_exit(net_list, ct_net_id);
+}
+
+static struct pernet_operations ct_net_ops = {
+	.init = ct_init_net,
+	.exit_batch = ct_exit_net,
+	.id   = &ct_net_id,
+	.size = sizeof(struct tc_ct_action_net),
+};
+
+static int __init ct_init_module(void)
+{
+	return tcf_register_action(&act_ct_ops, &ct_net_ops);
+}
+
+static void __exit ct_cleanup_module(void)
+{
+	tcf_unregister_action(&act_ct_ops, &ct_net_ops);
+}
+
+module_init(ct_init_module);
+module_exit(ct_cleanup_module);
+MODULE_AUTHOR("Paul Blakey <paulb@mellanox.com>");
+MODULE_AUTHOR("Yossi Kuperman <yossiku@mellanox.com>");
+MODULE_AUTHOR("Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>");
+MODULE_DESCRIPTION("Connection tracking action");
+MODULE_LICENSE("GPL v2");
+

diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
new file mode 100644
index 0000000..0dbcfd1
--- /dev/null
+++ b/net/sched/act_ctinfo.c

@@ -0,0 +1,408 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* net/sched/act_ctinfo.c  netfilter ctinfo connmark actions
+ *
+ * Copyright (c) 2019 Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/pkt_cls.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+#include <uapi/linux/tc_act/tc_ctinfo.h>
+#include <net/tc_act/tc_ctinfo.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+
+static struct tc_action_ops act_ctinfo_ops;
+static unsigned int ctinfo_net_id;
+
+static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
+				struct tcf_ctinfo_params *cp,
+				struct sk_buff *skb, int wlen, int proto)
+{
+	u8 dscp, newdscp;
+
+	newdscp = (((ct->mark & cp->dscpmask) >> cp->dscpmaskshift) << 2) &
+		     ~INET_ECN_MASK;
+
+	switch (proto) {
+	case NFPROTO_IPV4:
+		dscp = ipv4_get_dsfield(ip_hdr(skb)) & ~INET_ECN_MASK;
+		if (dscp != newdscp) {
+			if (likely(!skb_try_make_writable(skb, wlen))) {
+				ipv4_change_dsfield(ip_hdr(skb),
+						    INET_ECN_MASK,
+						    newdscp);
+				ca->stats_dscp_set++;
+			} else {
+				ca->stats_dscp_error++;
+			}
+		}
+		break;
+	case NFPROTO_IPV6:
+		dscp = ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK;
+		if (dscp != newdscp) {
+			if (likely(!skb_try_make_writable(skb, wlen))) {
+				ipv6_change_dsfield(ipv6_hdr(skb),
+						    INET_ECN_MASK,
+						    newdscp);
+				ca->stats_dscp_set++;
+			} else {
+				ca->stats_dscp_error++;
+			}
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+static void tcf_ctinfo_cpmark_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
+				  struct tcf_ctinfo_params *cp,
+				  struct sk_buff *skb)
+{
+	ca->stats_cpmark_set++;
+	skb->mark = ct->mark & cp->cpmarkmask;
+}
+
+static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a,
+			  struct tcf_result *res)
+{
+	const struct nf_conntrack_tuple_hash *thash = NULL;
+	struct tcf_ctinfo *ca = to_ctinfo(a);
+	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_zone zone;
+	enum ip_conntrack_info ctinfo;
+	struct tcf_ctinfo_params *cp;
+	struct nf_conn *ct;
+	int proto, wlen;
+	int action;
+
+	cp = rcu_dereference_bh(ca->params);
+
+	tcf_lastuse_update(&ca->tcf_tm);
+	bstats_update(&ca->tcf_bstats, skb);
+	action = READ_ONCE(ca->tcf_action);
+
+	wlen = skb_network_offset(skb);
+	if (tc_skb_protocol(skb) == htons(ETH_P_IP)) {
+		wlen += sizeof(struct iphdr);
+		if (!pskb_may_pull(skb, wlen))
+			goto out;
+
+		proto = NFPROTO_IPV4;
+	} else if (tc_skb_protocol(skb) == htons(ETH_P_IPV6)) {
+		wlen += sizeof(struct ipv6hdr);
+		if (!pskb_may_pull(skb, wlen))
+			goto out;
+
+		proto = NFPROTO_IPV6;
+	} else {
+		goto out;
+	}
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct) { /* look harder, usually ingress */
+		if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+				       proto, cp->net, &tuple))
+			goto out;
+		zone.id = cp->zone;
+		zone.dir = NF_CT_DEFAULT_ZONE_DIR;
+
+		thash = nf_conntrack_find_get(cp->net, &zone, &tuple);
+		if (!thash)
+			goto out;
+
+		ct = nf_ct_tuplehash_to_ctrack(thash);
+	}
+
+	if (cp->mode & CTINFO_MODE_DSCP)
+		if (!cp->dscpstatemask || (ct->mark & cp->dscpstatemask))
+			tcf_ctinfo_dscp_set(ct, ca, cp, skb, wlen, proto);
+
+	if (cp->mode & CTINFO_MODE_CPMARK)
+		tcf_ctinfo_cpmark_set(ct, ca, cp, skb);
+
+	if (thash)
+		nf_ct_put(ct);
+out:
+	return action;
+}
+
+static const struct nla_policy ctinfo_policy[TCA_CTINFO_MAX + 1] = {
+	[TCA_CTINFO_ACT]		  = { .type = NLA_EXACT_LEN,
+					      .len = sizeof(struct
+							    tc_ctinfo) },
+	[TCA_CTINFO_ZONE]		  = { .type = NLA_U16 },
+	[TCA_CTINFO_PARMS_DSCP_MASK]	  = { .type = NLA_U32 },
+	[TCA_CTINFO_PARMS_DSCP_STATEMASK] = { .type = NLA_U32 },
+	[TCA_CTINFO_PARMS_CPMARK_MASK]	  = { .type = NLA_U32 },
+};
+
+static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
+			   struct nlattr *est, struct tc_action **a,
+			   int ovr, int bind, bool rtnl_held,
+			   struct tcf_proto *tp,
+			   struct netlink_ext_ack *extack)
+{
+	struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+	u32 dscpmask = 0, dscpstatemask, index;
+	struct nlattr *tb[TCA_CTINFO_MAX + 1];
+	struct tcf_ctinfo_params *cp_new;
+	struct tcf_chain *goto_ch = NULL;
+	struct tc_ctinfo *actparm;
+	struct tcf_ctinfo *ci;
+	u8 dscpmaskshift;
+	int ret = 0, err;
+
+	if (!nla) {
+		NL_SET_ERR_MSG_MOD(extack, "ctinfo requires attributes to be passed");
+		return -EINVAL;
+	}
+
+	err = nla_parse_nested(tb, TCA_CTINFO_MAX, nla, ctinfo_policy, extack);
+	if (err < 0)
+		return err;
+
+	if (!tb[TCA_CTINFO_ACT]) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Missing required TCA_CTINFO_ACT attribute");
+		return -EINVAL;
+	}
+	actparm = nla_data(tb[TCA_CTINFO_ACT]);
+
+	/* do some basic validation here before dynamically allocating things */
+	/* that we would otherwise have to clean up.			      */
+	if (tb[TCA_CTINFO_PARMS_DSCP_MASK]) {
+		dscpmask = nla_get_u32(tb[TCA_CTINFO_PARMS_DSCP_MASK]);
+		/* need contiguous 6 bit mask */
+		dscpmaskshift = dscpmask ? __ffs(dscpmask) : 0;
+		if ((~0 & (dscpmask >> dscpmaskshift)) != 0x3f) {
+			NL_SET_ERR_MSG_ATTR(extack,
+					    tb[TCA_CTINFO_PARMS_DSCP_MASK],
+					    "dscp mask must be 6 contiguous bits");
+			return -EINVAL;
+		}
+		dscpstatemask = tb[TCA_CTINFO_PARMS_DSCP_STATEMASK] ?
+			nla_get_u32(tb[TCA_CTINFO_PARMS_DSCP_STATEMASK]) : 0;
+		/* mask & statemask must not overlap */
+		if (dscpmask & dscpstatemask) {
+			NL_SET_ERR_MSG_ATTR(extack,
+					    tb[TCA_CTINFO_PARMS_DSCP_STATEMASK],
+					    "dscp statemask must not overlap dscp mask");
+			return -EINVAL;
+		}
+	}
+
+	/* done the validation:now to the actual action allocation */
+	index = actparm->index;
+	err = tcf_idr_check_alloc(tn, &index, a, bind);
+	if (!err) {
+		ret = tcf_idr_create(tn, index, est, a,
+				     &act_ctinfo_ops, bind, false);
+		if (ret) {
+			tcf_idr_cleanup(tn, index);
+			return ret;
+		}
+		ret = ACT_P_CREATED;
+	} else if (err > 0) {
+		if (bind) /* don't override defaults */
+			return 0;
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
+			return -EEXIST;
+		}
+	} else {
+		return err;
+	}
+
+	err = tcf_action_check_ctrlact(actparm->action, tp, &goto_ch, extack);
+	if (err < 0)
+		goto release_idr;
+
+	ci = to_ctinfo(*a);
+
+	cp_new = kzalloc(sizeof(*cp_new), GFP_KERNEL);
+	if (unlikely(!cp_new)) {
+		err = -ENOMEM;
+		goto put_chain;
+	}
+
+	cp_new->net = net;
+	cp_new->zone = tb[TCA_CTINFO_ZONE] ?
+			nla_get_u16(tb[TCA_CTINFO_ZONE]) : 0;
+	if (dscpmask) {
+		cp_new->dscpmask = dscpmask;
+		cp_new->dscpmaskshift = dscpmaskshift;
+		cp_new->dscpstatemask = dscpstatemask;
+		cp_new->mode |= CTINFO_MODE_DSCP;
+	}
+
+	if (tb[TCA_CTINFO_PARMS_CPMARK_MASK]) {
+		cp_new->cpmarkmask =
+				nla_get_u32(tb[TCA_CTINFO_PARMS_CPMARK_MASK]);
+		cp_new->mode |= CTINFO_MODE_CPMARK;
+	}
+
+	spin_lock_bh(&ci->tcf_lock);
+	goto_ch = tcf_action_set_ctrlact(*a, actparm->action, goto_ch);
+	rcu_swap_protected(ci->params, cp_new,
+			   lockdep_is_held(&ci->tcf_lock));
+	spin_unlock_bh(&ci->tcf_lock);
+
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+	if (cp_new)
+		kfree_rcu(cp_new, rcu);
+
+	if (ret == ACT_P_CREATED)
+		tcf_idr_insert(tn, *a);
+
+	return ret;
+
+put_chain:
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+release_idr:
+	tcf_idr_release(*a, bind);
+	return err;
+}
+
+static int tcf_ctinfo_dump(struct sk_buff *skb, struct tc_action *a,
+			   int bind, int ref)
+{
+	struct tcf_ctinfo *ci = to_ctinfo(a);
+	struct tc_ctinfo opt = {
+		.index   = ci->tcf_index,
+		.refcnt  = refcount_read(&ci->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind,
+	};
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_ctinfo_params *cp;
+	struct tcf_t t;
+
+	spin_lock_bh(&ci->tcf_lock);
+	cp = rcu_dereference_protected(ci->params,
+				       lockdep_is_held(&ci->tcf_lock));
+
+	tcf_tm_dump(&t, &ci->tcf_tm);
+	if (nla_put_64bit(skb, TCA_CTINFO_TM, sizeof(t), &t, TCA_CTINFO_PAD))
+		goto nla_put_failure;
+
+	opt.action = ci->tcf_action;
+	if (nla_put(skb, TCA_CTINFO_ACT, sizeof(opt), &opt))
+		goto nla_put_failure;
+
+	if (nla_put_u16(skb, TCA_CTINFO_ZONE, cp->zone))
+		goto nla_put_failure;
+
+	if (cp->mode & CTINFO_MODE_DSCP) {
+		if (nla_put_u32(skb, TCA_CTINFO_PARMS_DSCP_MASK,
+				cp->dscpmask))
+			goto nla_put_failure;
+		if (nla_put_u32(skb, TCA_CTINFO_PARMS_DSCP_STATEMASK,
+				cp->dscpstatemask))
+			goto nla_put_failure;
+	}
+
+	if (cp->mode & CTINFO_MODE_CPMARK) {
+		if (nla_put_u32(skb, TCA_CTINFO_PARMS_CPMARK_MASK,
+				cp->cpmarkmask))
+			goto nla_put_failure;
+	}
+
+	if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_SET,
+			      ci->stats_dscp_set, TCA_CTINFO_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_ERROR,
+			      ci->stats_dscp_error, TCA_CTINFO_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_CPMARK_SET,
+			      ci->stats_cpmark_set, TCA_CTINFO_PAD))
+		goto nla_put_failure;
+
+	spin_unlock_bh(&ci->tcf_lock);
+	return skb->len;
+
+nla_put_failure:
+	spin_unlock_bh(&ci->tcf_lock);
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int tcf_ctinfo_walker(struct net *net, struct sk_buff *skb,
+			     struct netlink_callback *cb, int type,
+			     const struct tc_action_ops *ops,
+			     struct netlink_ext_ack *extack)
+{
+	struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
+}
+
+static int tcf_ctinfo_search(struct net *net, struct tc_action **a, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+
+	return tcf_idr_search(tn, a, index);
+}
+
+static struct tc_action_ops act_ctinfo_ops = {
+	.kind	= "ctinfo",
+	.id	= TCA_ID_CTINFO,
+	.owner	= THIS_MODULE,
+	.act	= tcf_ctinfo_act,
+	.dump	= tcf_ctinfo_dump,
+	.init	= tcf_ctinfo_init,
+	.walk	= tcf_ctinfo_walker,
+	.lookup	= tcf_ctinfo_search,
+	.size	= sizeof(struct tcf_ctinfo),
+};
+
+static __net_init int ctinfo_init_net(struct net *net)
+{
+	struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+
+	return tc_action_net_init(net, tn, &act_ctinfo_ops);
+}
+
+static void __net_exit ctinfo_exit_net(struct list_head *net_list)
+{
+	tc_action_net_exit(net_list, ctinfo_net_id);
+}
+
+static struct pernet_operations ctinfo_net_ops = {
+	.init		= ctinfo_init_net,
+	.exit_batch	= ctinfo_exit_net,
+	.id		= &ctinfo_net_id,
+	.size		= sizeof(struct tc_action_net),
+};
+
+static int __init ctinfo_init_module(void)
+{
+	return tcf_register_action(&act_ctinfo_ops, &ctinfo_net_ops);
+}
+
+static void __exit ctinfo_cleanup_module(void)
+{
+	tcf_unregister_action(&act_ctinfo_ops, &ctinfo_net_ops);
+}
+
+module_init(ctinfo_init_module);
+module_exit(ctinfo_cleanup_module);
+MODULE_AUTHOR("Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>");
+MODULE_DESCRIPTION("Connection tracking mark actions");
+MODULE_LICENSE("GPL");

diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index cd1d9bd..324f1d1 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c

@@ -1,13 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/act_gact.c		Generic actions
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * copyright 	Jamal Hadi Salim (2002-4)
- *
  */
 
 #include <linux/types.h>
@@ -20,6 +15,7 @@
 #include <linux/init.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
 #include <linux/tc_act/tc_gact.h>
 #include <net/tc_act/tc_gact.h>
 
@@ -57,13 +53,15 @@
 static int tcf_gact_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
 			 int ovr, int bind, bool rtnl_held,
-			 struct netlink_ext_ack *extack)
+			 struct tcf_proto *tp, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 	struct nlattr *tb[TCA_GACT_MAX + 1];
+	struct tcf_chain *goto_ch = NULL;
 	struct tc_gact *parm;
 	struct tcf_gact *gact;
 	int ret = 0;
+	u32 index;
 	int err;
 #ifdef CONFIG_GACT_PROB
 	struct tc_gact_p *p_parm = NULL;
@@ -72,13 +70,15 @@
 	if (nla == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_GACT_MAX, nla, gact_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_GACT_MAX, nla, gact_policy,
+					  NULL);
 	if (err < 0)
 		return err;
 
 	if (tb[TCA_GACT_PARMS] == NULL)
 		return -EINVAL;
 	parm = nla_data(tb[TCA_GACT_PARMS]);
+	index = parm->index;
 
 #ifndef CONFIG_GACT_PROB
 	if (tb[TCA_GACT_PROB] != NULL)
@@ -88,15 +88,20 @@
 		p_parm = nla_data(tb[TCA_GACT_PROB]);
 		if (p_parm->ptype >= MAX_RAND)
 			return -EINVAL;
+		if (TC_ACT_EXT_CMP(p_parm->paction, TC_ACT_GOTO_CHAIN)) {
+			NL_SET_ERR_MSG(extack,
+				       "goto chain not allowed on fallback");
+			return -EINVAL;
+		}
 	}
 #endif
 
-	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &index, a, bind);
 	if (!err) {
-		ret = tcf_idr_create(tn, parm->index, est, a,
+		ret = tcf_idr_create(tn, index, est, a,
 				     &act_gact_ops, bind, true);
 		if (ret) {
-			tcf_idr_cleanup(tn, parm->index);
+			tcf_idr_cleanup(tn, index);
 			return ret;
 		}
 		ret = ACT_P_CREATED;
@@ -111,10 +116,13 @@
 		return err;
 	}
 
+	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+	if (err < 0)
+		goto release_idr;
 	gact = to_gact(*a);
 
 	spin_lock_bh(&gact->tcf_lock);
-	gact->tcf_action = parm->action;
+	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
 #ifdef CONFIG_GACT_PROB
 	if (p_parm) {
 		gact->tcfg_paction = p_parm->paction;
@@ -128,9 +136,15 @@
 #endif
 	spin_unlock_bh(&gact->tcf_lock);
 
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+
 	if (ret == ACT_P_CREATED)
 		tcf_idr_insert(tn, *a);
 	return ret;
+release_idr:
+	tcf_idr_release(*a, bind);
+	return err;
 }
 
 static int tcf_gact_act(struct sk_buff *skb, const struct tc_action *a,
@@ -157,7 +171,7 @@
 }
 
 static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
-				  u64 lastuse)
+				  u64 lastuse, bool hw)
 {
 	struct tcf_gact *gact = to_gact(a);
 	int action = READ_ONCE(gact->tcf_action);
@@ -168,6 +182,10 @@
 	if (action == TC_ACT_SHOT)
 		this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
 
+	if (hw)
+		_bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats_hw),
+				   bytes, packets);
+
 	tm->lastuse = max_t(u64, tm->lastuse, lastuse);
 }
 
@@ -222,8 +240,7 @@
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index,
-			   struct netlink_ext_ack *extack)
+static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 
@@ -245,7 +262,7 @@
 
 static struct tc_action_ops act_gact_ops = {
 	.kind		=	"gact",
-	.type		=	TCA_ACT_GACT,
+	.id		=	TCA_ID_GACT,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_gact_act,
 	.stats_update	=	tcf_gact_stats_update,
@@ -261,7 +278,7 @@
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 
-	return tc_action_net_init(tn, &act_gact_ops);
+	return tc_action_net_init(net, tn, &act_gact_ops);
 }
 
 static void __net_exit gact_exit_net(struct list_head *net_list)

diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 06a3d48..3a31e24 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/ife.c	Inter-FE action based on ForCES WG InterFE LFB
  *
@@ -9,13 +10,7 @@
  *		Subsystem"
  *		Authors: Jamal Hadi Salim and Damascene M. Joachimpillai
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * copyright Jamal Hadi Salim (2015)
- *
 */
 
 #include <linux/types.h>
@@ -29,6 +24,7 @@
 #include <net/net_namespace.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
 #include <uapi/linux/tc_act/tc_ife.h>
 #include <net/tc_act/tc_ife.h>
 #include <linux/etherdevice.h>
@@ -386,7 +382,7 @@
 	if (list_empty(&ife->metalist))
 		return 0;
 
-	nest = nla_nest_start(skb, TCA_IFE_METALST);
+	nest = nla_nest_start_noflag(skb, TCA_IFE_METALST);
 	if (!nest)
 		goto out_nlmsg_trim;
 
@@ -469,11 +465,12 @@
 static int tcf_ife_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **a,
 			int ovr, int bind, bool rtnl_held,
-			struct netlink_ext_ack *extack)
+			struct tcf_proto *tp, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 	struct nlattr *tb[TCA_IFE_MAX + 1];
 	struct nlattr *tb2[IFE_META_MAX + 1];
+	struct tcf_chain *goto_ch = NULL;
 	struct tcf_ife_params *p;
 	struct tcf_ife_info *ife;
 	u16 ife_type = ETH_P_IFE;
@@ -482,9 +479,16 @@
 	u8 *saddr = NULL;
 	bool exists = false;
 	int ret = 0;
+	u32 index;
 	int err;
 
-	err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy, NULL);
+	if (!nla) {
+		NL_SET_ERR_MSG_MOD(extack, "IFE requires attributes to be passed");
+		return -EINVAL;
+	}
+
+	err = nla_parse_nested_deprecated(tb, TCA_IFE_MAX, nla, ife_policy,
+					  NULL);
 	if (err < 0)
 		return err;
 
@@ -504,7 +508,8 @@
 	if (!p)
 		return -ENOMEM;
 
-	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	index = parm->index;
+	err = tcf_idr_check_alloc(tn, &index, a, bind);
 	if (err < 0) {
 		kfree(p);
 		return err;
@@ -516,10 +521,10 @@
 	}
 
 	if (!exists) {
-		ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops,
+		ret = tcf_idr_create(tn, index, est, a, &act_ife_ops,
 				     bind, true);
 		if (ret) {
-			tcf_idr_cleanup(tn, parm->index);
+			tcf_idr_cleanup(tn, index);
 			kfree(p);
 			return ret;
 		}
@@ -531,6 +536,10 @@
 	}
 
 	ife = to_ife(*a);
+	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+	if (err < 0)
+		goto release_idr;
+
 	p->flags = parm->flags;
 
 	if (parm->flags & IFE_ENCODE) {
@@ -561,15 +570,11 @@
 		INIT_LIST_HEAD(&ife->metalist);
 
 	if (tb[TCA_IFE_METALST]) {
-		err = nla_parse_nested(tb2, IFE_META_MAX, tb[TCA_IFE_METALST],
-				       NULL, NULL);
-		if (err) {
-metadata_parse_err:
-			tcf_idr_release(*a, bind);
-			kfree(p);
-			return err;
-		}
-
+		err = nla_parse_nested_deprecated(tb2, IFE_META_MAX,
+						  tb[TCA_IFE_METALST], NULL,
+						  NULL);
+		if (err)
+			goto metadata_parse_err;
 		err = populate_metalist(ife, tb2, exists, rtnl_held);
 		if (err)
 			goto metadata_parse_err;
@@ -581,21 +586,20 @@
 		 * going to bail out
 		 */
 		err = use_all_metadata(ife, exists);
-		if (err) {
-			tcf_idr_release(*a, bind);
-			kfree(p);
-			return err;
-		}
+		if (err)
+			goto metadata_parse_err;
 	}
 
 	if (exists)
 		spin_lock_bh(&ife->tcf_lock);
-	ife->tcf_action = parm->action;
 	/* protected by tcf_lock when modifying existing action */
+	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
 	rcu_swap_protected(ife->params, p, 1);
 
 	if (exists)
 		spin_unlock_bh(&ife->tcf_lock);
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
 	if (p)
 		kfree_rcu(p, rcu);
 
@@ -603,6 +607,13 @@
 		tcf_idr_insert(tn, *a);
 
 	return ret;
+metadata_parse_err:
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+release_idr:
+	kfree(p);
+	tcf_idr_release(*a, bind);
+	return err;
 }
 
 static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
@@ -855,8 +866,7 @@
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index,
-			  struct netlink_ext_ack *extack)
+static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 
@@ -865,7 +875,7 @@
 
 static struct tc_action_ops act_ife_ops = {
 	.kind = "ife",
-	.type = TCA_ACT_IFE,
+	.id = TCA_ID_IFE,
 	.owner = THIS_MODULE,
 	.act = tcf_ife_act,
 	.dump = tcf_ife_dump,
@@ -880,7 +890,7 @@
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 
-	return tc_action_net_init(tn, &act_ife_ops);
+	return tc_action_net_init(net, tn, &act_ife_ops);
 }
 
 static void __net_exit ife_exit_net(struct list_head *net_list)

diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 8525de8..214a03d 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/act_ipt.c		iptables target interface
  *
  *TODO: Add other tables. For now we only support the ipv4 table targets
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Copyright:	Jamal Hadi Salim (2002-13)
  */
 
@@ -65,12 +61,13 @@
 	return 0;
 }
 
-static void ipt_destroy_target(struct xt_entry_target *t)
+static void ipt_destroy_target(struct xt_entry_target *t, struct net *net)
 {
 	struct xt_tgdtor_param par = {
 		.target   = t->u.kernel.target,
 		.targinfo = t->data,
 		.family   = NFPROTO_IPV4,
+		.net      = net,
 	};
 	if (par.target->destroy != NULL)
 		par.target->destroy(&par);
@@ -82,7 +79,7 @@
 	struct tcf_ipt *ipt = to_ipt(a);
 
 	if (ipt->tcfi_t) {
-		ipt_destroy_target(ipt->tcfi_t);
+		ipt_destroy_target(ipt->tcfi_t, a->idrinfo->net);
 		kfree(ipt->tcfi_t);
 	}
 	kfree(ipt->tcfi_tname);
@@ -97,7 +94,8 @@
 
 static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 			  struct nlattr *est, struct tc_action **a,
-			  const struct tc_action_ops *ops, int ovr, int bind)
+			  const struct tc_action_ops *ops, int ovr, int bind,
+			  struct tcf_proto *tp)
 {
 	struct tc_action_net *tn = net_generic(net, id);
 	struct nlattr *tb[TCA_IPT_MAX + 1];
@@ -112,7 +110,8 @@
 	if (nla == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_IPT_MAX, nla, ipt_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_IPT_MAX, nla, ipt_policy,
+					  NULL);
 	if (err < 0)
 		return err;
 
@@ -182,7 +181,7 @@
 
 	spin_lock_bh(&ipt->tcf_lock);
 	if (ret != ACT_P_CREATED) {
-		ipt_destroy_target(ipt->tcfi_t);
+		ipt_destroy_target(ipt->tcfi_t, net);
 		kfree(ipt->tcfi_tname);
 		kfree(ipt->tcfi_t);
 	}
@@ -199,27 +198,26 @@
 err2:
 	kfree(tname);
 err1:
-	if (ret == ACT_P_CREATED)
-		tcf_idr_release(*a, bind);
+	tcf_idr_release(*a, bind);
 	return err;
 }
 
 static int tcf_ipt_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **a, int ovr,
-			int bind, bool rtnl_held,
+			int bind, bool rtnl_held, struct tcf_proto *tp,
 			struct netlink_ext_ack *extack)
 {
 	return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
-			      bind);
+			      bind, tp);
 }
 
 static int tcf_xt_init(struct net *net, struct nlattr *nla,
 		       struct nlattr *est, struct tc_action **a, int ovr,
-		       int bind, bool unlocked,
+		       int bind, bool unlocked, struct tcf_proto *tp,
 		       struct netlink_ext_ack *extack)
 {
 	return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
-			      bind);
+			      bind, tp);
 }
 
 static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a,
@@ -329,8 +327,7 @@
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index,
-			  struct netlink_ext_ack *extack)
+static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, ipt_net_id);
 
@@ -339,7 +336,7 @@
 
 static struct tc_action_ops act_ipt_ops = {
 	.kind		=	"ipt",
-	.type		=	TCA_ACT_IPT,
+	.id		=	TCA_ID_IPT,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_ipt_act,
 	.dump		=	tcf_ipt_dump,
@@ -354,7 +351,7 @@
 {
 	struct tc_action_net *tn = net_generic(net, ipt_net_id);
 
-	return tc_action_net_init(tn, &act_ipt_ops);
+	return tc_action_net_init(net, tn, &act_ipt_ops);
 }
 
 static void __net_exit ipt_exit_net(struct list_head *net_list)
@@ -379,8 +376,7 @@
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index,
-			 struct netlink_ext_ack *extack)
+static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, xt_net_id);
 
@@ -389,7 +385,7 @@
 
 static struct tc_action_ops act_xt_ops = {
 	.kind		=	"xt",
-	.type		=	TCA_ACT_XT,
+	.id		=	TCA_ID_XT,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_ipt_act,
 	.dump		=	tcf_ipt_dump,
@@ -404,7 +400,7 @@
 {
 	struct tc_action_net *tn = net_generic(net, xt_net_id);
 
-	return tc_action_net_init(tn, &act_xt_ops);
+	return tc_action_net_init(net, tn, &act_xt_ops);
 }
 
 static void __net_exit xt_exit_net(struct list_head *net_list)

diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c
index 6445184..ea0573c 100644
--- a/net/sched/act_meta_mark.c
+++ b/net/sched/act_meta_mark.c

@@ -1,13 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/act_meta_mark.c IFE skb->mark metadata module
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * copyright Jamal Hadi Salim (2015)
- *
 */
 
 #include <linux/types.h>

diff --git a/net/sched/act_meta_skbprio.c b/net/sched/act_meta_skbprio.c
index 4033f9f..2df3133 100644
--- a/net/sched/act_meta_skbprio.c
+++ b/net/sched/act_meta_skbprio.c

@@ -1,13 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/act_meta_prio.c IFE skb->priority metadata module
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * copyright Jamal Hadi Salim (2015)
- *
 */
 
 #include <linux/types.h>

diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c
index 7221437..44547ca 100644
--- a/net/sched/act_meta_skbtcindex.c
+++ b/net/sched/act_meta_skbtcindex.c

@@ -1,13 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/act_meta_tc_index.c IFE skb->tc_index metadata module
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * copyright Jamal Hadi Salim (2016)
- *
 */
 
 #include <linux/types.h>

diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 8bf66d0..08923b2 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c

@@ -1,15 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/act_mirred.c	packet mirroring and redirect actions
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Jamal Hadi Salim (2002-4)
  *
  * TODO: Add ingress support (and socket redirect support)
- *
  */
 
 #include <linux/types.h>
@@ -32,6 +27,9 @@
 static LIST_HEAD(mirred_list);
 static DEFINE_SPINLOCK(mirred_list_lock);
 
+#define MIRRED_RECURSION_LIMIT    4
+static DEFINE_PER_CPU(unsigned int, mirred_rec_level);
+
 static bool tcf_mirred_is_act_redirect(int action)
 {
 	return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR;
@@ -94,22 +92,26 @@
 static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a,
 			   int ovr, int bind, bool rtnl_held,
+			   struct tcf_proto *tp,
 			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 	struct nlattr *tb[TCA_MIRRED_MAX + 1];
+	struct tcf_chain *goto_ch = NULL;
 	bool mac_header_xmit = false;
 	struct tc_mirred *parm;
 	struct tcf_mirred *m;
 	struct net_device *dev;
 	bool exists = false;
 	int ret, err;
+	u32 index;
 
 	if (!nla) {
 		NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed");
 		return -EINVAL;
 	}
-	ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, extack);
+	ret = nla_parse_nested_deprecated(tb, TCA_MIRRED_MAX, nla,
+					  mirred_policy, extack);
 	if (ret < 0)
 		return ret;
 	if (!tb[TCA_MIRRED_PARMS]) {
@@ -117,8 +119,8 @@
 		return -EINVAL;
 	}
 	parm = nla_data(tb[TCA_MIRRED_PARMS]);
-
-	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	index = parm->index;
+	err = tcf_idr_check_alloc(tn, &index, a, bind);
 	if (err < 0)
 		return err;
 	exists = err;
@@ -135,21 +137,21 @@
 		if (exists)
 			tcf_idr_release(*a, bind);
 		else
-			tcf_idr_cleanup(tn, parm->index);
+			tcf_idr_cleanup(tn, index);
 		NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option");
 		return -EINVAL;
 	}
 
 	if (!exists) {
 		if (!parm->ifindex) {
-			tcf_idr_cleanup(tn, parm->index);
+			tcf_idr_cleanup(tn, index);
 			NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
 			return -EINVAL;
 		}
-		ret = tcf_idr_create(tn, parm->index, est, a,
+		ret = tcf_idr_create(tn, index, est, a,
 				     &act_mirred_ops, bind, true);
 		if (ret) {
-			tcf_idr_cleanup(tn, parm->index);
+			tcf_idr_cleanup(tn, index);
 			return ret;
 		}
 		ret = ACT_P_CREATED;
@@ -157,18 +159,23 @@
 		tcf_idr_release(*a, bind);
 		return -EEXIST;
 	}
+
 	m = to_mirred(*a);
+	if (ret == ACT_P_CREATED)
+		INIT_LIST_HEAD(&m->tcfm_list);
+
+	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+	if (err < 0)
+		goto release_idr;
 
 	spin_lock_bh(&m->tcf_lock);
-	m->tcf_action = parm->action;
-	m->tcfm_eaction = parm->eaction;
 
 	if (parm->ifindex) {
 		dev = dev_get_by_index(net, parm->ifindex);
 		if (!dev) {
 			spin_unlock_bh(&m->tcf_lock);
-			tcf_idr_release(*a, bind);
-			return -ENODEV;
+			err = -ENODEV;
+			goto put_chain;
 		}
 		mac_header_xmit = dev_is_mac_header_xmit(dev);
 		rcu_swap_protected(m->tcfm_dev, dev,
@@ -177,7 +184,11 @@
 			dev_put(dev);
 		m->tcfm_mac_header_xmit = mac_header_xmit;
 	}
+	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+	m->tcfm_eaction = parm->eaction;
 	spin_unlock_bh(&m->tcf_lock);
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
 
 	if (ret == ACT_P_CREATED) {
 		spin_lock(&mirred_list_lock);
@@ -188,6 +199,12 @@
 	}
 
 	return ret;
+put_chain:
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+release_idr:
+	tcf_idr_release(*a, bind);
+	return err;
 }
 
 static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
@@ -197,6 +214,7 @@
 	struct sk_buff *skb2 = skb;
 	bool m_mac_header_xmit;
 	struct net_device *dev;
+	unsigned int rec_level;
 	int retval, err = 0;
 	bool use_reinsert;
 	bool want_ingress;
@@ -204,6 +222,14 @@
 	int m_eaction;
 	int mac_len;
 
+	rec_level = __this_cpu_inc_return(mirred_rec_level);
+	if (unlikely(rec_level > MIRRED_RECURSION_LIMIT)) {
+		net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n",
+				     netdev_name(skb->dev));
+		__this_cpu_dec(mirred_rec_level);
+		return TC_ACT_SHOT;
+	}
+
 	tcf_lastuse_update(&m->tcf_tm);
 	bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
 
@@ -258,12 +284,15 @@
 	if (is_redirect) {
 		skb2->tc_redirected = 1;
 		skb2->tc_from_ingress = skb2->tc_at_ingress;
-
+		if (skb2->tc_from_ingress)
+			skb2->tstamp = 0;
 		/* let's the caller reinsert the packet, if possible */
 		if (use_reinsert) {
 			res->ingress = want_ingress;
 			res->qstats = this_cpu_ptr(m->common.cpu_qstats);
-			return TC_ACT_REINSERT;
+			skb_tc_reinsert(skb, res);
+			__this_cpu_dec(mirred_rec_level);
+			return TC_ACT_CONSUMED;
 		}
 	}
 
@@ -278,17 +307,21 @@
 		if (tcf_mirred_is_act_redirect(m_eaction))
 			retval = TC_ACT_SHOT;
 	}
+	__this_cpu_dec(mirred_rec_level);
 
 	return retval;
 }
 
 static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
-			     u64 lastuse)
+			     u64 lastuse, bool hw)
 {
 	struct tcf_mirred *m = to_mirred(a);
 	struct tcf_t *tm = &m->tcf_tm;
 
 	_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+	if (hw)
+		_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
+				   bytes, packets);
 	tm->lastuse = max_t(u64, tm->lastuse, lastuse);
 }
 
@@ -338,8 +371,7 @@
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index,
-			     struct netlink_ext_ack *extack)
+static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 
@@ -376,28 +408,39 @@
 	.notifier_call = mirred_device_event,
 };
 
-static struct net_device *tcf_mirred_get_dev(const struct tc_action *a)
+static void tcf_mirred_dev_put(void *priv)
+{
+	struct net_device *dev = priv;
+
+	dev_put(dev);
+}
+
+static struct net_device *
+tcf_mirred_get_dev(const struct tc_action *a,
+		   tc_action_priv_destructor *destructor)
 {
 	struct tcf_mirred *m = to_mirred(a);
 	struct net_device *dev;
 
 	rcu_read_lock();
 	dev = rcu_dereference(m->tcfm_dev);
-	if (dev)
+	if (dev) {
 		dev_hold(dev);
+		*destructor = tcf_mirred_dev_put;
+	}
 	rcu_read_unlock();
 
 	return dev;
 }
 
-static void tcf_mirred_put_dev(struct net_device *dev)
+static size_t tcf_mirred_get_fill_size(const struct tc_action *act)
 {
-	dev_put(dev);
+	return nla_total_size(sizeof(struct tc_mirred));
 }
 
 static struct tc_action_ops act_mirred_ops = {
 	.kind		=	"mirred",
-	.type		=	TCA_ACT_MIRRED,
+	.id		=	TCA_ID_MIRRED,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_mirred_act,
 	.stats_update	=	tcf_stats_update,
@@ -406,16 +449,16 @@
 	.init		=	tcf_mirred_init,
 	.walk		=	tcf_mirred_walker,
 	.lookup		=	tcf_mirred_search,
+	.get_fill_size	=	tcf_mirred_get_fill_size,
 	.size		=	sizeof(struct tcf_mirred),
 	.get_dev	=	tcf_mirred_get_dev,
-	.put_dev	=	tcf_mirred_put_dev,
 };
 
 static __net_init int mirred_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 
-	return tc_action_net_init(tn, &act_mirred_ops);
+	return tc_action_net_init(net, tn, &act_mirred_ops);
 }
 
 static void __net_exit mirred_exit_net(struct list_head *net_list)
@@ -441,7 +484,11 @@
 		return err;
 
 	pr_info("Mirror/redirect action on\n");
-	return tcf_register_action(&act_mirred_ops, &mirred_net_ops);
+	err = tcf_register_action(&act_mirred_ops, &mirred_net_ops);
+	if (err)
+		unregister_netdevice_notifier(&mirred_device_notifier);
+
+	return err;
 }
 
 static void __exit mirred_cleanup_module(void)

diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
new file mode 100644
index 0000000..4cf6c55
--- /dev/null
+++ b/net/sched/act_mpls.c

@@ -0,0 +1,412 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mpls.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <linux/tc_act/tc_mpls.h>
+#include <net/mpls.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_mpls.h>
+
+static unsigned int mpls_net_id;
+static struct tc_action_ops act_mpls_ops;
+
+#define ACT_MPLS_TTL_DEFAULT	255
+
+static __be32 tcf_mpls_get_lse(struct mpls_shim_hdr *lse,
+			       struct tcf_mpls_params *p, bool set_bos)
+{
+	u32 new_lse = 0;
+
+	if (lse)
+		new_lse = be32_to_cpu(lse->label_stack_entry);
+
+	if (p->tcfm_label != ACT_MPLS_LABEL_NOT_SET) {
+		new_lse &= ~MPLS_LS_LABEL_MASK;
+		new_lse |= p->tcfm_label << MPLS_LS_LABEL_SHIFT;
+	}
+	if (p->tcfm_ttl) {
+		new_lse &= ~MPLS_LS_TTL_MASK;
+		new_lse |= p->tcfm_ttl << MPLS_LS_TTL_SHIFT;
+	}
+	if (p->tcfm_tc != ACT_MPLS_TC_NOT_SET) {
+		new_lse &= ~MPLS_LS_TC_MASK;
+		new_lse |= p->tcfm_tc << MPLS_LS_TC_SHIFT;
+	}
+	if (p->tcfm_bos != ACT_MPLS_BOS_NOT_SET) {
+		new_lse &= ~MPLS_LS_S_MASK;
+		new_lse |= p->tcfm_bos << MPLS_LS_S_SHIFT;
+	} else if (set_bos) {
+		new_lse |= 1 << MPLS_LS_S_SHIFT;
+	}
+
+	return cpu_to_be32(new_lse);
+}
+
+static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a,
+			struct tcf_result *res)
+{
+	struct tcf_mpls *m = to_mpls(a);
+	struct tcf_mpls_params *p;
+	__be32 new_lse;
+	int ret, mac_len;
+
+	tcf_lastuse_update(&m->tcf_tm);
+	bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
+
+	/* Ensure 'data' points at mac_header prior calling mpls manipulating
+	 * functions.
+	 */
+	if (skb_at_tc_ingress(skb)) {
+		skb_push_rcsum(skb, skb->mac_len);
+		mac_len = skb->mac_len;
+	} else {
+		mac_len = skb_network_header(skb) - skb_mac_header(skb);
+	}
+
+	ret = READ_ONCE(m->tcf_action);
+
+	p = rcu_dereference_bh(m->mpls_p);
+
+	switch (p->tcfm_action) {
+	case TCA_MPLS_ACT_POP:
+		if (skb_mpls_pop(skb, p->tcfm_proto, mac_len))
+			goto drop;
+		break;
+	case TCA_MPLS_ACT_PUSH:
+		new_lse = tcf_mpls_get_lse(NULL, p, !eth_p_mpls(skb->protocol));
+		if (skb_mpls_push(skb, new_lse, p->tcfm_proto, mac_len))
+			goto drop;
+		break;
+	case TCA_MPLS_ACT_MODIFY:
+		new_lse = tcf_mpls_get_lse(mpls_hdr(skb), p, false);
+		if (skb_mpls_update_lse(skb, new_lse))
+			goto drop;
+		break;
+	case TCA_MPLS_ACT_DEC_TTL:
+		if (skb_mpls_dec_ttl(skb))
+			goto drop;
+		break;
+	}
+
+	if (skb_at_tc_ingress(skb))
+		skb_pull_rcsum(skb, skb->mac_len);
+
+	return ret;
+
+drop:
+	qstats_drop_inc(this_cpu_ptr(m->common.cpu_qstats));
+	return TC_ACT_SHOT;
+}
+
+static int valid_label(const struct nlattr *attr,
+		       struct netlink_ext_ack *extack)
+{
+	const u32 *label = nla_data(attr);
+
+	if (*label & ~MPLS_LABEL_MASK || *label == MPLS_LABEL_IMPLNULL) {
+		NL_SET_ERR_MSG_MOD(extack, "MPLS label out of range");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = {
+	[TCA_MPLS_UNSPEC]	= { .strict_start_type = TCA_MPLS_UNSPEC + 1 },
+	[TCA_MPLS_PARMS]	= NLA_POLICY_EXACT_LEN(sizeof(struct tc_mpls)),
+	[TCA_MPLS_PROTO]	= { .type = NLA_U16 },
+	[TCA_MPLS_LABEL]	= NLA_POLICY_VALIDATE_FN(NLA_U32, valid_label),
+	[TCA_MPLS_TC]		= NLA_POLICY_RANGE(NLA_U8, 0, 7),
+	[TCA_MPLS_TTL]		= NLA_POLICY_MIN(NLA_U8, 1),
+	[TCA_MPLS_BOS]		= NLA_POLICY_RANGE(NLA_U8, 0, 1),
+};
+
+static int tcf_mpls_init(struct net *net, struct nlattr *nla,
+			 struct nlattr *est, struct tc_action **a,
+			 int ovr, int bind, bool rtnl_held,
+			 struct tcf_proto *tp, struct netlink_ext_ack *extack)
+{
+	struct tc_action_net *tn = net_generic(net, mpls_net_id);
+	struct nlattr *tb[TCA_MPLS_MAX + 1];
+	struct tcf_chain *goto_ch = NULL;
+	struct tcf_mpls_params *p;
+	struct tc_mpls *parm;
+	bool exists = false;
+	struct tcf_mpls *m;
+	int ret = 0, err;
+	u8 mpls_ttl = 0;
+	u32 index;
+
+	if (!nla) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing netlink attributes");
+		return -EINVAL;
+	}
+
+	err = nla_parse_nested(tb, TCA_MPLS_MAX, nla, mpls_policy, extack);
+	if (err < 0)
+		return err;
+
+	if (!tb[TCA_MPLS_PARMS]) {
+		NL_SET_ERR_MSG_MOD(extack, "No MPLS params");
+		return -EINVAL;
+	}
+	parm = nla_data(tb[TCA_MPLS_PARMS]);
+	index = parm->index;
+
+	/* Verify parameters against action type. */
+	switch (parm->m_action) {
+	case TCA_MPLS_ACT_POP:
+		if (!tb[TCA_MPLS_PROTO]) {
+			NL_SET_ERR_MSG_MOD(extack, "Protocol must be set for MPLS pop");
+			return -EINVAL;
+		}
+		if (!eth_proto_is_802_3(nla_get_be16(tb[TCA_MPLS_PROTO]))) {
+			NL_SET_ERR_MSG_MOD(extack, "Invalid protocol type for MPLS pop");
+			return -EINVAL;
+		}
+		if (tb[TCA_MPLS_LABEL] || tb[TCA_MPLS_TTL] || tb[TCA_MPLS_TC] ||
+		    tb[TCA_MPLS_BOS]) {
+			NL_SET_ERR_MSG_MOD(extack, "Label, TTL, TC or BOS cannot be used with MPLS pop");
+			return -EINVAL;
+		}
+		break;
+	case TCA_MPLS_ACT_DEC_TTL:
+		if (tb[TCA_MPLS_PROTO] || tb[TCA_MPLS_LABEL] ||
+		    tb[TCA_MPLS_TTL] || tb[TCA_MPLS_TC] || tb[TCA_MPLS_BOS]) {
+			NL_SET_ERR_MSG_MOD(extack, "Label, TTL, TC, BOS or protocol cannot be used with MPLS dec_ttl");
+			return -EINVAL;
+		}
+		break;
+	case TCA_MPLS_ACT_PUSH:
+		if (!tb[TCA_MPLS_LABEL]) {
+			NL_SET_ERR_MSG_MOD(extack, "Label is required for MPLS push");
+			return -EINVAL;
+		}
+		if (tb[TCA_MPLS_PROTO] &&
+		    !eth_p_mpls(nla_get_be16(tb[TCA_MPLS_PROTO]))) {
+			NL_SET_ERR_MSG_MOD(extack, "Protocol must be an MPLS type for MPLS push");
+			return -EPROTONOSUPPORT;
+		}
+		/* Push needs a TTL - if not specified, set a default value. */
+		if (!tb[TCA_MPLS_TTL]) {
+#if IS_ENABLED(CONFIG_MPLS)
+			mpls_ttl = net->mpls.default_ttl ?
+				   net->mpls.default_ttl : ACT_MPLS_TTL_DEFAULT;
+#else
+			mpls_ttl = ACT_MPLS_TTL_DEFAULT;
+#endif
+		}
+		break;
+	case TCA_MPLS_ACT_MODIFY:
+		if (tb[TCA_MPLS_PROTO]) {
+			NL_SET_ERR_MSG_MOD(extack, "Protocol cannot be used with MPLS modify");
+			return -EINVAL;
+		}
+		break;
+	default:
+		NL_SET_ERR_MSG_MOD(extack, "Unknown MPLS action");
+		return -EINVAL;
+	}
+
+	err = tcf_idr_check_alloc(tn, &index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
+	if (exists && bind)
+		return 0;
+
+	if (!exists) {
+		ret = tcf_idr_create(tn, index, est, a,
+				     &act_mpls_ops, bind, true);
+		if (ret) {
+			tcf_idr_cleanup(tn, index);
+			return ret;
+		}
+
+		ret = ACT_P_CREATED;
+	} else if (!ovr) {
+		tcf_idr_release(*a, bind);
+		return -EEXIST;
+	}
+
+	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+	if (err < 0)
+		goto release_idr;
+
+	m = to_mpls(*a);
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p) {
+		err = -ENOMEM;
+		goto put_chain;
+	}
+
+	p->tcfm_action = parm->m_action;
+	p->tcfm_label = tb[TCA_MPLS_LABEL] ? nla_get_u32(tb[TCA_MPLS_LABEL]) :
+					     ACT_MPLS_LABEL_NOT_SET;
+	p->tcfm_tc = tb[TCA_MPLS_TC] ? nla_get_u8(tb[TCA_MPLS_TC]) :
+				       ACT_MPLS_TC_NOT_SET;
+	p->tcfm_ttl = tb[TCA_MPLS_TTL] ? nla_get_u8(tb[TCA_MPLS_TTL]) :
+					 mpls_ttl;
+	p->tcfm_bos = tb[TCA_MPLS_BOS] ? nla_get_u8(tb[TCA_MPLS_BOS]) :
+					 ACT_MPLS_BOS_NOT_SET;
+	p->tcfm_proto = tb[TCA_MPLS_PROTO] ? nla_get_be16(tb[TCA_MPLS_PROTO]) :
+					     htons(ETH_P_MPLS_UC);
+
+	spin_lock_bh(&m->tcf_lock);
+	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+	rcu_swap_protected(m->mpls_p, p, lockdep_is_held(&m->tcf_lock));
+	spin_unlock_bh(&m->tcf_lock);
+
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+	if (p)
+		kfree_rcu(p, rcu);
+
+	if (ret == ACT_P_CREATED)
+		tcf_idr_insert(tn, *a);
+	return ret;
+put_chain:
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+release_idr:
+	tcf_idr_release(*a, bind);
+	return err;
+}
+
+static void tcf_mpls_cleanup(struct tc_action *a)
+{
+	struct tcf_mpls *m = to_mpls(a);
+	struct tcf_mpls_params *p;
+
+	p = rcu_dereference_protected(m->mpls_p, 1);
+	if (p)
+		kfree_rcu(p, rcu);
+}
+
+static int tcf_mpls_dump(struct sk_buff *skb, struct tc_action *a,
+			 int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_mpls *m = to_mpls(a);
+	struct tcf_mpls_params *p;
+	struct tc_mpls opt = {
+		.index    = m->tcf_index,
+		.refcnt   = refcount_read(&m->tcf_refcnt) - ref,
+		.bindcnt  = atomic_read(&m->tcf_bindcnt) - bind,
+	};
+	struct tcf_t t;
+
+	spin_lock_bh(&m->tcf_lock);
+	opt.action = m->tcf_action;
+	p = rcu_dereference_protected(m->mpls_p, lockdep_is_held(&m->tcf_lock));
+	opt.m_action = p->tcfm_action;
+
+	if (nla_put(skb, TCA_MPLS_PARMS, sizeof(opt), &opt))
+		goto nla_put_failure;
+
+	if (p->tcfm_label != ACT_MPLS_LABEL_NOT_SET &&
+	    nla_put_u32(skb, TCA_MPLS_LABEL, p->tcfm_label))
+		goto nla_put_failure;
+
+	if (p->tcfm_tc != ACT_MPLS_TC_NOT_SET &&
+	    nla_put_u8(skb, TCA_MPLS_TC, p->tcfm_tc))
+		goto nla_put_failure;
+
+	if (p->tcfm_ttl && nla_put_u8(skb, TCA_MPLS_TTL, p->tcfm_ttl))
+		goto nla_put_failure;
+
+	if (p->tcfm_bos != ACT_MPLS_BOS_NOT_SET &&
+	    nla_put_u8(skb, TCA_MPLS_BOS, p->tcfm_bos))
+		goto nla_put_failure;
+
+	if (nla_put_be16(skb, TCA_MPLS_PROTO, p->tcfm_proto))
+		goto nla_put_failure;
+
+	tcf_tm_dump(&t, &m->tcf_tm);
+
+	if (nla_put_64bit(skb, TCA_MPLS_TM, sizeof(t), &t, TCA_MPLS_PAD))
+		goto nla_put_failure;
+
+	spin_unlock_bh(&m->tcf_lock);
+
+	return skb->len;
+
+nla_put_failure:
+	spin_unlock_bh(&m->tcf_lock);
+	nlmsg_trim(skb, b);
+	return -EMSGSIZE;
+}
+
+static int tcf_mpls_walker(struct net *net, struct sk_buff *skb,
+			   struct netlink_callback *cb, int type,
+			   const struct tc_action_ops *ops,
+			   struct netlink_ext_ack *extack)
+{
+	struct tc_action_net *tn = net_generic(net, mpls_net_id);
+
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
+}
+
+static int tcf_mpls_search(struct net *net, struct tc_action **a, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, mpls_net_id);
+
+	return tcf_idr_search(tn, a, index);
+}
+
+static struct tc_action_ops act_mpls_ops = {
+	.kind		=	"mpls",
+	.id		=	TCA_ID_MPLS,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_mpls_act,
+	.dump		=	tcf_mpls_dump,
+	.init		=	tcf_mpls_init,
+	.cleanup	=	tcf_mpls_cleanup,
+	.walk		=	tcf_mpls_walker,
+	.lookup		=	tcf_mpls_search,
+	.size		=	sizeof(struct tcf_mpls),
+};
+
+static __net_init int mpls_init_net(struct net *net)
+{
+	struct tc_action_net *tn = net_generic(net, mpls_net_id);
+
+	return tc_action_net_init(net, tn, &act_mpls_ops);
+}
+
+static void __net_exit mpls_exit_net(struct list_head *net_list)
+{
+	tc_action_net_exit(net_list, mpls_net_id);
+}
+
+static struct pernet_operations mpls_net_ops = {
+	.init = mpls_init_net,
+	.exit_batch = mpls_exit_net,
+	.id   = &mpls_net_id,
+	.size = sizeof(struct tc_action_net),
+};
+
+static int __init mpls_init_module(void)
+{
+	return tcf_register_action(&act_mpls_ops, &mpls_net_ops);
+}
+
+static void __exit mpls_cleanup_module(void)
+{
+	tcf_unregister_action(&act_mpls_ops, &mpls_net_ops);
+}
+
+module_init(mpls_init_module);
+module_exit(mpls_cleanup_module);
+
+MODULE_AUTHOR("Netronome Systems <oss-drivers@netronome.com>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MPLS manipulation actions");

diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 4313aa1..ea4c535 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Stateless NAT actions
  *
  * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
  */
 
 #include <linux/errno.h>
@@ -21,6 +17,7 @@
 #include <linux/string.h>
 #include <linux/tc_act/tc_nat.h>
 #include <net/act_api.h>
+#include <net/pkt_cls.h>
 #include <net/icmp.h>
 #include <net/ip.h>
 #include <net/netlink.h>
@@ -38,31 +35,35 @@
 
 static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 			struct tc_action **a, int ovr, int bind,
-			bool rtnl_held, struct netlink_ext_ack *extack)
+			bool rtnl_held,	struct tcf_proto *tp,
+			struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 	struct nlattr *tb[TCA_NAT_MAX + 1];
+	struct tcf_chain *goto_ch = NULL;
 	struct tc_nat *parm;
 	int ret = 0, err;
 	struct tcf_nat *p;
+	u32 index;
 
 	if (nla == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_NAT_MAX, nla, nat_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_NAT_MAX, nla, nat_policy,
+					  NULL);
 	if (err < 0)
 		return err;
 
 	if (tb[TCA_NAT_PARMS] == NULL)
 		return -EINVAL;
 	parm = nla_data(tb[TCA_NAT_PARMS]);
-
-	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	index = parm->index;
+	err = tcf_idr_check_alloc(tn, &index, a, bind);
 	if (!err) {
-		ret = tcf_idr_create(tn, parm->index, est, a,
+		ret = tcf_idr_create(tn, index, est, a,
 				     &act_nat_ops, bind, false);
 		if (ret) {
-			tcf_idr_cleanup(tn, parm->index);
+			tcf_idr_cleanup(tn, index);
 			return ret;
 		}
 		ret = ACT_P_CREATED;
@@ -76,6 +77,9 @@
 	} else {
 		return err;
 	}
+	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+	if (err < 0)
+		goto release_idr;
 	p = to_tcf_nat(*a);
 
 	spin_lock_bh(&p->tcf_lock);
@@ -84,13 +88,18 @@
 	p->mask = parm->mask;
 	p->flags = parm->flags;
 
-	p->tcf_action = parm->action;
+	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
 	spin_unlock_bh(&p->tcf_lock);
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
 
 	if (ret == ACT_P_CREATED)
 		tcf_idr_insert(tn, *a);
 
 	return ret;
+release_idr:
+	tcf_idr_release(*a, bind);
+	return err;
 }
 
 static int tcf_nat_act(struct sk_buff *skb, const struct tc_action *a,
@@ -256,28 +265,31 @@
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_nat *p = to_tcf_nat(a);
 	struct tc_nat opt = {
-		.old_addr = p->old_addr,
-		.new_addr = p->new_addr,
-		.mask     = p->mask,
-		.flags    = p->flags,
-
 		.index    = p->tcf_index,
-		.action   = p->tcf_action,
 		.refcnt   = refcount_read(&p->tcf_refcnt) - ref,
 		.bindcnt  = atomic_read(&p->tcf_bindcnt) - bind,
 	};
 	struct tcf_t t;
 
+	spin_lock_bh(&p->tcf_lock);
+	opt.old_addr = p->old_addr;
+	opt.new_addr = p->new_addr;
+	opt.mask = p->mask;
+	opt.flags = p->flags;
+	opt.action = p->tcf_action;
+
 	if (nla_put(skb, TCA_NAT_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
 
 	tcf_tm_dump(&t, &p->tcf_tm);
 	if (nla_put_64bit(skb, TCA_NAT_TM, sizeof(t), &t, TCA_NAT_PAD))
 		goto nla_put_failure;
+	spin_unlock_bh(&p->tcf_lock);
 
 	return skb->len;
 
 nla_put_failure:
+	spin_unlock_bh(&p->tcf_lock);
 	nlmsg_trim(skb, b);
 	return -1;
 }
@@ -292,8 +304,7 @@
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index,
-			  struct netlink_ext_ack *extack)
+static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 
@@ -302,7 +313,7 @@
 
 static struct tc_action_ops act_nat_ops = {
 	.kind		=	"nat",
-	.type		=	TCA_ACT_NAT,
+	.id		=	TCA_ID_NAT,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_nat_act,
 	.dump		=	tcf_nat_dump,
@@ -316,7 +327,7 @@
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 
-	return tc_action_net_init(tn, &act_nat_ops);
+	return tc_action_net_init(net, tn, &act_nat_ops);
 }
 
 static void __net_exit nat_exit_net(struct list_head *net_list)

diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index ca535a8..b5bc631 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/act_pedit.c	Generic packet editor
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Jamal Hadi Salim (2002-4)
  */
 
@@ -23,6 +19,7 @@
 #include <linux/tc_act/tc_pedit.h>
 #include <net/tc_act/tc_pedit.h>
 #include <uapi/linux/tc_act/tc_pedit.h>
+#include <net/pkt_cls.h>
 
 static unsigned int pedit_net_id;
 static struct tc_action_ops act_pedit_ops;
@@ -46,7 +43,7 @@
 	int err = -EINVAL;
 	int rem;
 
-	if (!nla || !n)
+	if (!nla)
 		return NULL;
 
 	keys_ex = kcalloc(n, sizeof(*k), GFP_KERNEL);
@@ -69,8 +66,9 @@
 			goto err_out;
 		}
 
-		err = nla_parse_nested(tb, TCA_PEDIT_KEY_EX_MAX, ka,
-				       pedit_key_ex_policy, NULL);
+		err = nla_parse_nested_deprecated(tb, TCA_PEDIT_KEY_EX_MAX,
+						  ka, pedit_key_ex_policy,
+						  NULL);
 		if (err)
 			goto err_out;
 
@@ -107,14 +105,15 @@
 static int tcf_pedit_key_ex_dump(struct sk_buff *skb,
 				 struct tcf_pedit_key_ex *keys_ex, int n)
 {
-	struct nlattr *keys_start = nla_nest_start(skb, TCA_PEDIT_KEYS_EX);
+	struct nlattr *keys_start = nla_nest_start_noflag(skb,
+							  TCA_PEDIT_KEYS_EX);
 
 	if (!keys_start)
 		goto nla_failure;
 	for (; n > 0; n--) {
 		struct nlattr *key_start;
 
-		key_start = nla_nest_start(skb, TCA_PEDIT_KEY_EX);
+		key_start = nla_nest_start_noflag(skb, TCA_PEDIT_KEY_EX);
 		if (!key_start)
 			goto nla_failure;
 
@@ -138,10 +137,11 @@
 static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 			  struct nlattr *est, struct tc_action **a,
 			  int ovr, int bind, bool rtnl_held,
-			  struct netlink_ext_ack *extack)
+			  struct tcf_proto *tp, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 	struct nlattr *tb[TCA_PEDIT_MAX + 1];
+	struct tcf_chain *goto_ch = NULL;
 	struct tc_pedit_key *keys = NULL;
 	struct tcf_pedit_key_ex *keys_ex;
 	struct tc_pedit *parm;
@@ -149,13 +149,15 @@
 	struct tcf_pedit *p;
 	int ret = 0, err;
 	int ksize;
+	u32 index;
 
 	if (!nla) {
 		NL_SET_ERR_MSG_MOD(extack, "Pedit requires attributes to be passed");
 		return -EINVAL;
 	}
 
-	err = nla_parse_nested(tb, TCA_PEDIT_MAX, nla, pedit_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_PEDIT_MAX, nla,
+					  pedit_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -168,6 +170,10 @@
 	}
 
 	parm = nla_data(pattr);
+	if (!parm->nkeys) {
+		NL_SET_ERR_MSG_MOD(extack, "Pedit requires keys to be passed");
+		return -EINVAL;
+	}
 	ksize = parm->nkeys * sizeof(struct tc_pedit_key);
 	if (nla_len(pattr) < sizeof(*parm) + ksize) {
 		NL_SET_ERR_MSG_ATTR(extack, pattr, "Length of TCA_PEDIT_PARMS or TCA_PEDIT_PARMS_EX pedit attribute is invalid");
@@ -178,18 +184,13 @@
 	if (IS_ERR(keys_ex))
 		return PTR_ERR(keys_ex);
 
-	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	index = parm->index;
+	err = tcf_idr_check_alloc(tn, &index, a, bind);
 	if (!err) {
-		if (!parm->nkeys) {
-			tcf_idr_cleanup(tn, parm->index);
-			NL_SET_ERR_MSG_MOD(extack, "Pedit requires keys to be passed");
-			ret = -EINVAL;
-			goto out_free;
-		}
-		ret = tcf_idr_create(tn, parm->index, est, a,
+		ret = tcf_idr_create(tn, index, est, a,
 				     &act_pedit_ops, bind, false);
 		if (ret) {
-			tcf_idr_cleanup(tn, parm->index);
+			tcf_idr_cleanup(tn, index);
 			goto out_free;
 		}
 		ret = ACT_P_CREATED;
@@ -205,6 +206,11 @@
 		goto out_free;
 	}
 
+	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+	if (err < 0) {
+		ret = err;
+		goto out_release;
+	}
 	p = to_pedit(*a);
 	spin_lock_bh(&p->tcf_lock);
 
@@ -214,7 +220,7 @@
 		if (!keys) {
 			spin_unlock_bh(&p->tcf_lock);
 			ret = -ENOMEM;
-			goto out_release;
+			goto put_chain;
 		}
 		kfree(p->tcfp_keys);
 		p->tcfp_keys = keys;
@@ -223,16 +229,21 @@
 	memcpy(p->tcfp_keys, parm->keys, ksize);
 
 	p->tcfp_flags = parm->flags;
-	p->tcf_action = parm->action;
+	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
 
 	kfree(p->tcfp_keys_ex);
 	p->tcfp_keys_ex = keys_ex;
 
 	spin_unlock_bh(&p->tcf_lock);
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
 	if (ret == ACT_P_CREATED)
 		tcf_idr_insert(tn, *a);
 	return ret;
 
+put_chain:
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
 out_release:
 	tcf_idr_release(*a, bind);
 out_free:
@@ -406,7 +417,7 @@
 	struct tcf_t t;
 	int s;
 
-	s = sizeof(*opt) + p->tcfp_nkeys * sizeof(struct tc_pedit_key);
+	s = struct_size(opt, keys, p->tcfp_nkeys);
 
 	/* netlink spinlocks held above us - must use ATOMIC */
 	opt = kzalloc(s, GFP_ATOMIC);
@@ -461,8 +472,7 @@
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index,
-			    struct netlink_ext_ack *extack)
+static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 
@@ -471,7 +481,7 @@
 
 static struct tc_action_ops act_pedit_ops = {
 	.kind		=	"pedit",
-	.type		=	TCA_ACT_PEDIT,
+	.id		=	TCA_ID_PEDIT,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_pedit_act,
 	.dump		=	tcf_pedit_dump,
@@ -486,7 +496,7 @@
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 
-	return tc_action_net_init(tn, &act_pedit_ops);
+	return tc_action_net_init(net, tn, &act_pedit_ops);
 }
 
 static void __net_exit pedit_exit_net(struct list_head *net_list)

diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 5d8bfa8..89c04c5 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/act_police.c	Input police filter
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  * 		J Hadi Salim (action changes)
  */
@@ -21,35 +17,8 @@
 #include <linux/slab.h>
 #include <net/act_api.h>
 #include <net/netlink.h>
-
-struct tcf_police {
-	struct tc_action	common;
-	int			tcfp_result;
-	u32			tcfp_ewma_rate;
-	s64			tcfp_burst;
-	u32			tcfp_mtu;
-	s64			tcfp_toks;
-	s64			tcfp_ptoks;
-	s64			tcfp_mtu_ptoks;
-	s64			tcfp_t_c;
-	struct psched_ratecfg	rate;
-	bool			rate_present;
-	struct psched_ratecfg	peak;
-	bool			peak_present;
-};
-
-#define to_police(pc) ((struct tcf_police *)pc)
-
-/* old policer structure from before tc actions */
-struct tc_police_compat {
-	u32			index;
-	int			action;
-	u32			limit;
-	u32			burst;
-	u32			mtu;
-	struct tc_ratespec	rate;
-	struct tc_ratespec	peakrate;
-};
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_police.h>
 
 /* Each policer is serialized by its individual spinlock */
 
@@ -71,26 +40,33 @@
 	[TCA_POLICE_PEAKRATE]	= { .len = TC_RTAB_SIZE },
 	[TCA_POLICE_AVRATE]	= { .type = NLA_U32 },
 	[TCA_POLICE_RESULT]	= { .type = NLA_U32 },
+	[TCA_POLICE_RATE64]     = { .type = NLA_U64 },
+	[TCA_POLICE_PEAKRATE64] = { .type = NLA_U64 },
 };
 
 static int tcf_police_init(struct net *net, struct nlattr *nla,
 			       struct nlattr *est, struct tc_action **a,
 			       int ovr, int bind, bool rtnl_held,
+			       struct tcf_proto *tp,
 			       struct netlink_ext_ack *extack)
 {
-	int ret = 0, err;
+	int ret = 0, tcfp_result = TC_ACT_OK, err, size;
 	struct nlattr *tb[TCA_POLICE_MAX + 1];
+	struct tcf_chain *goto_ch = NULL;
 	struct tc_police *parm;
 	struct tcf_police *police;
 	struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
 	struct tc_action_net *tn = net_generic(net, police_net_id);
+	struct tcf_police_params *new;
 	bool exists = false;
-	int size;
+	u32 index;
+	u64 rate64, prate64;
 
 	if (nla == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_POLICE_MAX, nla, police_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_POLICE_MAX, nla,
+					  police_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -101,7 +77,8 @@
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_POLICE_TBF]);
-	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	index = parm->index;
+	err = tcf_idr_check_alloc(tn, &index, a, bind);
 	if (err < 0)
 		return err;
 	exists = err;
@@ -109,17 +86,21 @@
 		return 0;
 
 	if (!exists) {
-		ret = tcf_idr_create(tn, parm->index, NULL, a,
-				     &act_police_ops, bind, false);
+		ret = tcf_idr_create(tn, index, NULL, a,
+				     &act_police_ops, bind, true);
 		if (ret) {
-			tcf_idr_cleanup(tn, parm->index);
+			tcf_idr_cleanup(tn, index);
 			return ret;
 		}
 		ret = ACT_P_CREATED;
+		spin_lock_init(&(to_police(*a)->tcfp_lock));
 	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
 		return -EEXIST;
 	}
+	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+	if (err < 0)
+		goto release_idr;
 
 	police = to_police(*a);
 	if (parm->rate.rate) {
@@ -137,7 +118,8 @@
 	}
 
 	if (est) {
-		err = gen_replace_estimator(&police->tcf_bstats, NULL,
+		err = gen_replace_estimator(&police->tcf_bstats,
+					    police->common.cpu_bstats,
 					    &police->tcf_rate_est,
 					    &police->tcf_lock,
 					    NULL, est);
@@ -150,55 +132,85 @@
 		goto failure;
 	}
 
-	spin_lock_bh(&police->tcf_lock);
+	if (tb[TCA_POLICE_RESULT]) {
+		tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]);
+		if (TC_ACT_EXT_CMP(tcfp_result, TC_ACT_GOTO_CHAIN)) {
+			NL_SET_ERR_MSG(extack,
+				       "goto chain not allowed on fallback");
+			err = -EINVAL;
+			goto failure;
+		}
+	}
+
+	new = kzalloc(sizeof(*new), GFP_KERNEL);
+	if (unlikely(!new)) {
+		err = -ENOMEM;
+		goto failure;
+	}
+
 	/* No failure allowed after this point */
-	police->tcfp_mtu = parm->mtu;
-	if (police->tcfp_mtu == 0) {
-		police->tcfp_mtu = ~0;
+	new->tcfp_result = tcfp_result;
+	new->tcfp_mtu = parm->mtu;
+	if (!new->tcfp_mtu) {
+		new->tcfp_mtu = ~0;
 		if (R_tab)
-			police->tcfp_mtu = 255 << R_tab->rate.cell_log;
+			new->tcfp_mtu = 255 << R_tab->rate.cell_log;
 	}
 	if (R_tab) {
-		police->rate_present = true;
-		psched_ratecfg_precompute(&police->rate, &R_tab->rate, 0);
+		new->rate_present = true;
+		rate64 = tb[TCA_POLICE_RATE64] ?
+			 nla_get_u64(tb[TCA_POLICE_RATE64]) : 0;
+		psched_ratecfg_precompute(&new->rate, &R_tab->rate, rate64);
 		qdisc_put_rtab(R_tab);
 	} else {
-		police->rate_present = false;
+		new->rate_present = false;
 	}
 	if (P_tab) {
-		police->peak_present = true;
-		psched_ratecfg_precompute(&police->peak, &P_tab->rate, 0);
+		new->peak_present = true;
+		prate64 = tb[TCA_POLICE_PEAKRATE64] ?
+			  nla_get_u64(tb[TCA_POLICE_PEAKRATE64]) : 0;
+		psched_ratecfg_precompute(&new->peak, &P_tab->rate, prate64);
 		qdisc_put_rtab(P_tab);
 	} else {
-		police->peak_present = false;
+		new->peak_present = false;
 	}
 
-	if (tb[TCA_POLICE_RESULT])
-		police->tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]);
-	police->tcfp_burst = PSCHED_TICKS2NS(parm->burst);
-	police->tcfp_toks = police->tcfp_burst;
-	if (police->peak_present) {
-		police->tcfp_mtu_ptoks = (s64) psched_l2t_ns(&police->peak,
-							     police->tcfp_mtu);
-		police->tcfp_ptoks = police->tcfp_mtu_ptoks;
-	}
-	police->tcf_action = parm->action;
+	new->tcfp_burst = PSCHED_TICKS2NS(parm->burst);
+	if (new->peak_present)
+		new->tcfp_mtu_ptoks = (s64)psched_l2t_ns(&new->peak,
+							 new->tcfp_mtu);
 
 	if (tb[TCA_POLICE_AVRATE])
-		police->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
+		new->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
 
-	spin_unlock_bh(&police->tcf_lock);
-	if (ret != ACT_P_CREATED)
-		return ret;
-
+	spin_lock_bh(&police->tcf_lock);
+	spin_lock_bh(&police->tcfp_lock);
 	police->tcfp_t_c = ktime_get_ns();
-	tcf_idr_insert(tn, *a);
+	police->tcfp_toks = new->tcfp_burst;
+	if (new->peak_present)
+		police->tcfp_ptoks = new->tcfp_mtu_ptoks;
+	spin_unlock_bh(&police->tcfp_lock);
+	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+	rcu_swap_protected(police->params,
+			   new,
+			   lockdep_is_held(&police->tcf_lock));
+	spin_unlock_bh(&police->tcf_lock);
 
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+	if (new)
+		kfree_rcu(new, rcu);
+
+	if (ret == ACT_P_CREATED)
+		tcf_idr_insert(tn, *a);
 	return ret;
 
 failure:
 	qdisc_put_rtab(P_tab);
 	qdisc_put_rtab(R_tab);
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+release_idr:
 	tcf_idr_release(*a, bind);
 	return err;
 }
@@ -207,64 +219,86 @@
 			  struct tcf_result *res)
 {
 	struct tcf_police *police = to_police(a);
-	s64 now;
-	s64 toks;
-	s64 ptoks = 0;
+	struct tcf_police_params *p;
+	s64 now, toks, ptoks = 0;
+	int ret;
 
-	spin_lock(&police->tcf_lock);
-
-	bstats_update(&police->tcf_bstats, skb);
 	tcf_lastuse_update(&police->tcf_tm);
+	bstats_cpu_update(this_cpu_ptr(police->common.cpu_bstats), skb);
 
-	if (police->tcfp_ewma_rate) {
+	ret = READ_ONCE(police->tcf_action);
+	p = rcu_dereference_bh(police->params);
+
+	if (p->tcfp_ewma_rate) {
 		struct gnet_stats_rate_est64 sample;
 
 		if (!gen_estimator_read(&police->tcf_rate_est, &sample) ||
-		    sample.bps >= police->tcfp_ewma_rate) {
-			police->tcf_qstats.overlimits++;
-			if (police->tcf_action == TC_ACT_SHOT)
-				police->tcf_qstats.drops++;
-			spin_unlock(&police->tcf_lock);
-			return police->tcf_action;
-		}
+		    sample.bps >= p->tcfp_ewma_rate)
+			goto inc_overlimits;
 	}
 
-	if (qdisc_pkt_len(skb) <= police->tcfp_mtu) {
-		if (!police->rate_present) {
-			spin_unlock(&police->tcf_lock);
-			return police->tcfp_result;
+	if (qdisc_pkt_len(skb) <= p->tcfp_mtu) {
+		if (!p->rate_present) {
+			ret = p->tcfp_result;
+			goto end;
 		}
 
 		now = ktime_get_ns();
-		toks = min_t(s64, now - police->tcfp_t_c,
-			     police->tcfp_burst);
-		if (police->peak_present) {
+		spin_lock_bh(&police->tcfp_lock);
+		toks = min_t(s64, now - police->tcfp_t_c, p->tcfp_burst);
+		if (p->peak_present) {
 			ptoks = toks + police->tcfp_ptoks;
-			if (ptoks > police->tcfp_mtu_ptoks)
-				ptoks = police->tcfp_mtu_ptoks;
-			ptoks -= (s64) psched_l2t_ns(&police->peak,
-						     qdisc_pkt_len(skb));
+			if (ptoks > p->tcfp_mtu_ptoks)
+				ptoks = p->tcfp_mtu_ptoks;
+			ptoks -= (s64)psched_l2t_ns(&p->peak,
+						    qdisc_pkt_len(skb));
 		}
 		toks += police->tcfp_toks;
-		if (toks > police->tcfp_burst)
-			toks = police->tcfp_burst;
-		toks -= (s64) psched_l2t_ns(&police->rate, qdisc_pkt_len(skb));
+		if (toks > p->tcfp_burst)
+			toks = p->tcfp_burst;
+		toks -= (s64)psched_l2t_ns(&p->rate, qdisc_pkt_len(skb));
 		if ((toks|ptoks) >= 0) {
 			police->tcfp_t_c = now;
 			police->tcfp_toks = toks;
 			police->tcfp_ptoks = ptoks;
-			if (police->tcfp_result == TC_ACT_SHOT)
-				police->tcf_qstats.drops++;
-			spin_unlock(&police->tcf_lock);
-			return police->tcfp_result;
+			spin_unlock_bh(&police->tcfp_lock);
+			ret = p->tcfp_result;
+			goto inc_drops;
 		}
+		spin_unlock_bh(&police->tcfp_lock);
 	}
 
-	police->tcf_qstats.overlimits++;
-	if (police->tcf_action == TC_ACT_SHOT)
-		police->tcf_qstats.drops++;
-	spin_unlock(&police->tcf_lock);
-	return police->tcf_action;
+inc_overlimits:
+	qstats_overlimit_inc(this_cpu_ptr(police->common.cpu_qstats));
+inc_drops:
+	if (ret == TC_ACT_SHOT)
+		qstats_drop_inc(this_cpu_ptr(police->common.cpu_qstats));
+end:
+	return ret;
+}
+
+static void tcf_police_cleanup(struct tc_action *a)
+{
+	struct tcf_police *police = to_police(a);
+	struct tcf_police_params *p;
+
+	p = rcu_dereference_protected(police->params, 1);
+	if (p)
+		kfree_rcu(p, rcu);
+}
+
+static void tcf_police_stats_update(struct tc_action *a,
+				    u64 bytes, u32 packets,
+				    u64 lastuse, bool hw)
+{
+	struct tcf_police *police = to_police(a);
+	struct tcf_t *tm = &police->tcf_tm;
+
+	_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+	if (hw)
+		_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
+				   bytes, packets);
+	tm->lastuse = max_t(u64, tm->lastuse, lastuse);
 }
 
 static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
@@ -272,6 +306,7 @@
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_police *police = to_police(a);
+	struct tcf_police_params *p;
 	struct tc_police opt = {
 		.index = police->tcf_index,
 		.refcnt = refcount_read(&police->tcf_refcnt) - ref,
@@ -281,19 +316,33 @@
 
 	spin_lock_bh(&police->tcf_lock);
 	opt.action = police->tcf_action;
-	opt.mtu = police->tcfp_mtu;
-	opt.burst = PSCHED_NS2TICKS(police->tcfp_burst);
-	if (police->rate_present)
-		psched_ratecfg_getrate(&opt.rate, &police->rate);
-	if (police->peak_present)
-		psched_ratecfg_getrate(&opt.peakrate, &police->peak);
+	p = rcu_dereference_protected(police->params,
+				      lockdep_is_held(&police->tcf_lock));
+	opt.mtu = p->tcfp_mtu;
+	opt.burst = PSCHED_NS2TICKS(p->tcfp_burst);
+	if (p->rate_present) {
+		psched_ratecfg_getrate(&opt.rate, &p->rate);
+		if ((police->params->rate.rate_bytes_ps >= (1ULL << 32)) &&
+		    nla_put_u64_64bit(skb, TCA_POLICE_RATE64,
+				      police->params->rate.rate_bytes_ps,
+				      TCA_POLICE_PAD))
+			goto nla_put_failure;
+	}
+	if (p->peak_present) {
+		psched_ratecfg_getrate(&opt.peakrate, &p->peak);
+		if ((police->params->peak.rate_bytes_ps >= (1ULL << 32)) &&
+		    nla_put_u64_64bit(skb, TCA_POLICE_PEAKRATE64,
+				      police->params->peak.rate_bytes_ps,
+				      TCA_POLICE_PAD))
+			goto nla_put_failure;
+	}
 	if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt))
 		goto nla_put_failure;
-	if (police->tcfp_result &&
-	    nla_put_u32(skb, TCA_POLICE_RESULT, police->tcfp_result))
+	if (p->tcfp_result &&
+	    nla_put_u32(skb, TCA_POLICE_RESULT, p->tcfp_result))
 		goto nla_put_failure;
-	if (police->tcfp_ewma_rate &&
-	    nla_put_u32(skb, TCA_POLICE_AVRATE, police->tcfp_ewma_rate))
+	if (p->tcfp_ewma_rate &&
+	    nla_put_u32(skb, TCA_POLICE_AVRATE, p->tcfp_ewma_rate))
 		goto nla_put_failure;
 
 	t.install = jiffies_to_clock_t(jiffies - police->tcf_tm.install);
@@ -312,8 +361,7 @@
 	return -1;
 }
 
-static int tcf_police_search(struct net *net, struct tc_action **a, u32 index,
-			     struct netlink_ext_ack *extack)
+static int tcf_police_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, police_net_id);
 
@@ -326,13 +374,15 @@
 
 static struct tc_action_ops act_police_ops = {
 	.kind		=	"police",
-	.type		=	TCA_ID_POLICE,
+	.id		=	TCA_ID_POLICE,
 	.owner		=	THIS_MODULE,
+	.stats_update	=	tcf_police_stats_update,
 	.act		=	tcf_police_act,
 	.dump		=	tcf_police_dump,
 	.init		=	tcf_police_init,
 	.walk		=	tcf_police_walker,
 	.lookup		=	tcf_police_search,
+	.cleanup	=	tcf_police_cleanup,
 	.size		=	sizeof(struct tcf_police),
 };
 
@@ -340,7 +390,7 @@
 {
 	struct tc_action_net *tn = net_generic(net, police_net_id);
 
-	return tc_action_net_init(tn, &act_police_ops);
+	return tc_action_net_init(net, tn, &act_police_ops);
 }
 
 static void __net_exit police_exit_net(struct list_head *net_list)

diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 6b67aa1..514456a 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * net/sched/act_sample.c - Packet sampling tc action
  * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/types.h>
@@ -22,6 +19,7 @@
 #include <linux/tc_act/tc_sample.h>
 #include <net/tc_act/tc_sample.h>
 #include <net/psample.h>
+#include <net/pkt_cls.h>
 
 #include <linux/if_arp.h>
 
@@ -37,21 +35,23 @@
 
 static int tcf_sample_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a, int ovr,
-			   int bind, bool rtnl_held,
+			   int bind, bool rtnl_held, struct tcf_proto *tp,
 			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 	struct nlattr *tb[TCA_SAMPLE_MAX + 1];
 	struct psample_group *psample_group;
+	u32 psample_group_num, rate, index;
+	struct tcf_chain *goto_ch = NULL;
 	struct tc_sample *parm;
-	u32 psample_group_num;
 	struct tcf_sample *s;
 	bool exists = false;
 	int ret, err;
 
 	if (!nla)
 		return -EINVAL;
-	ret = nla_parse_nested(tb, TCA_SAMPLE_MAX, nla, sample_policy, NULL);
+	ret = nla_parse_nested_deprecated(tb, TCA_SAMPLE_MAX, nla,
+					  sample_policy, NULL);
 	if (ret < 0)
 		return ret;
 	if (!tb[TCA_SAMPLE_PARMS] || !tb[TCA_SAMPLE_RATE] ||
@@ -59,8 +59,8 @@
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_SAMPLE_PARMS]);
-
-	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	index = parm->index;
+	err = tcf_idr_check_alloc(tn, &index, a, bind);
 	if (err < 0)
 		return err;
 	exists = err;
@@ -68,10 +68,10 @@
 		return 0;
 
 	if (!exists) {
-		ret = tcf_idr_create(tn, parm->index, est, a,
+		ret = tcf_idr_create(tn, index, est, a,
 				     &act_sample_ops, bind, true);
 		if (ret) {
-			tcf_idr_cleanup(tn, parm->index);
+			tcf_idr_cleanup(tn, index);
 			return ret;
 		}
 		ret = ACT_P_CREATED;
@@ -79,21 +79,31 @@
 		tcf_idr_release(*a, bind);
 		return -EEXIST;
 	}
+	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+	if (err < 0)
+		goto release_idr;
 
+	rate = nla_get_u32(tb[TCA_SAMPLE_RATE]);
+	if (!rate) {
+		NL_SET_ERR_MSG(extack, "invalid sample rate");
+		err = -EINVAL;
+		goto put_chain;
+	}
 	psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]);
 	psample_group = psample_group_get(net, psample_group_num);
 	if (!psample_group) {
-		tcf_idr_release(*a, bind);
-		return -ENOMEM;
+		err = -ENOMEM;
+		goto put_chain;
 	}
 
 	s = to_sample(*a);
 
 	spin_lock_bh(&s->tcf_lock);
-	s->tcf_action = parm->action;
-	s->rate = nla_get_u32(tb[TCA_SAMPLE_RATE]);
+	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+	s->rate = rate;
 	s->psample_group_num = psample_group_num;
-	RCU_INIT_POINTER(s->psample_group, psample_group);
+	rcu_swap_protected(s->psample_group, psample_group,
+			   lockdep_is_held(&s->tcf_lock));
 
 	if (tb[TCA_SAMPLE_TRUNC_SIZE]) {
 		s->truncate = true;
@@ -101,9 +111,20 @@
 	}
 	spin_unlock_bh(&s->tcf_lock);
 
+	if (psample_group)
+		psample_group_put(psample_group);
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+
 	if (ret == ACT_P_CREATED)
 		tcf_idr_insert(tn, *a);
 	return ret;
+put_chain:
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+release_idr:
+	tcf_idr_release(*a, bind);
+	return err;
 }
 
 static void tcf_sample_cleanup(struct tc_action *a)
@@ -125,6 +146,7 @@
 	case ARPHRD_TUNNEL6:
 	case ARPHRD_SIT:
 	case ARPHRD_IPGRE:
+	case ARPHRD_IP6GRE:
 	case ARPHRD_VOID:
 	case ARPHRD_NONE:
 		return false;
@@ -224,17 +246,42 @@
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index,
-			     struct netlink_ext_ack *extack)
+static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 
 	return tcf_idr_search(tn, a, index);
 }
 
+static void tcf_psample_group_put(void *priv)
+{
+	struct psample_group *group = priv;
+
+	psample_group_put(group);
+}
+
+static struct psample_group *
+tcf_sample_get_group(const struct tc_action *a,
+		     tc_action_priv_destructor *destructor)
+{
+	struct tcf_sample *s = to_sample(a);
+	struct psample_group *group;
+
+	spin_lock_bh(&s->tcf_lock);
+	group = rcu_dereference_protected(s->psample_group,
+					  lockdep_is_held(&s->tcf_lock));
+	if (group) {
+		psample_group_take(group);
+		*destructor = tcf_psample_group_put;
+	}
+	spin_unlock_bh(&s->tcf_lock);
+
+	return group;
+}
+
 static struct tc_action_ops act_sample_ops = {
 	.kind	  = "sample",
-	.type	  = TCA_ACT_SAMPLE,
+	.id	  = TCA_ID_SAMPLE,
 	.owner	  = THIS_MODULE,
 	.act	  = tcf_sample_act,
 	.dump	  = tcf_sample_dump,
@@ -242,6 +289,7 @@
 	.cleanup  = tcf_sample_cleanup,
 	.walk	  = tcf_sample_walker,
 	.lookup	  = tcf_sample_search,
+	.get_psample_group = tcf_sample_get_group,
 	.size	  = sizeof(struct tcf_sample),
 };
 
@@ -249,7 +297,7 @@
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 
-	return tc_action_net_init(tn, &act_sample_ops);
+	return tc_action_net_init(net, tn, &act_sample_ops);
 }
 
 static void __net_exit sample_exit_net(struct list_head *net_list)

diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 52400d4..6120e56 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c

@@ -1,13 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/act_simple.c	Simple example of an action
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Jamal Hadi Salim (2005-8)
- *
  */
 
 #include <linux/module.h>
@@ -18,8 +13,7 @@
 #include <linux/rtnetlink.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
-
-#define TCA_ACT_SIMP 22
+#include <net/pkt_cls.h>
 
 #include <linux/tc_act/tc_defact.h>
 #include <net/tc_act/tc_defact.h>
@@ -62,14 +56,26 @@
 	return 0;
 }
 
-static void reset_policy(struct tcf_defact *d, const struct nlattr *defdata,
-			 struct tc_defact *p)
+static int reset_policy(struct tc_action *a, const struct nlattr *defdata,
+			struct tc_defact *p, struct tcf_proto *tp,
+			struct netlink_ext_ack *extack)
 {
+	struct tcf_chain *goto_ch = NULL;
+	struct tcf_defact *d;
+	int err;
+
+	err = tcf_action_check_ctrlact(p->action, tp, &goto_ch, extack);
+	if (err < 0)
+		return err;
+	d = to_defact(a);
 	spin_lock_bh(&d->tcf_lock);
-	d->tcf_action = p->action;
+	goto_ch = tcf_action_set_ctrlact(a, p->action, goto_ch);
 	memset(d->tcfd_defdata, 0, SIMP_MAX_DATA);
 	nla_strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);
 	spin_unlock_bh(&d->tcf_lock);
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+	return 0;
 }
 
 static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
@@ -80,19 +86,22 @@
 static int tcf_simp_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
 			 int ovr, int bind, bool rtnl_held,
-			 struct netlink_ext_ack *extack)
+			 struct tcf_proto *tp, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 	struct nlattr *tb[TCA_DEF_MAX + 1];
+	struct tcf_chain *goto_ch = NULL;
 	struct tc_defact *parm;
 	struct tcf_defact *d;
 	bool exists = false;
 	int ret = 0, err;
+	u32 index;
 
 	if (nla == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_DEF_MAX, nla, simple_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_DEF_MAX, nla, simple_policy,
+					  NULL);
 	if (err < 0)
 		return err;
 
@@ -100,7 +109,8 @@
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_DEF_PARMS]);
-	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	index = parm->index;
+	err = tcf_idr_check_alloc(tn, &index, a, bind);
 	if (err < 0)
 		return err;
 	exists = err;
@@ -111,40 +121,50 @@
 		if (exists)
 			tcf_idr_release(*a, bind);
 		else
-			tcf_idr_cleanup(tn, parm->index);
+			tcf_idr_cleanup(tn, index);
 		return -EINVAL;
 	}
 
 	if (!exists) {
-		ret = tcf_idr_create(tn, parm->index, est, a,
+		ret = tcf_idr_create(tn, index, est, a,
 				     &act_simp_ops, bind, false);
 		if (ret) {
-			tcf_idr_cleanup(tn, parm->index);
+			tcf_idr_cleanup(tn, index);
 			return ret;
 		}
 
 		d = to_defact(*a);
-		ret = alloc_defdata(d, tb[TCA_DEF_DATA]);
-		if (ret < 0) {
-			tcf_idr_release(*a, bind);
-			return ret;
-		}
-		d->tcf_action = parm->action;
+		err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch,
+					       extack);
+		if (err < 0)
+			goto release_idr;
+
+		err = alloc_defdata(d, tb[TCA_DEF_DATA]);
+		if (err < 0)
+			goto put_chain;
+
+		tcf_action_set_ctrlact(*a, parm->action, goto_ch);
 		ret = ACT_P_CREATED;
 	} else {
-		d = to_defact(*a);
-
 		if (!ovr) {
-			tcf_idr_release(*a, bind);
-			return -EEXIST;
+			err = -EEXIST;
+			goto release_idr;
 		}
 
-		reset_policy(d, tb[TCA_DEF_DATA], parm);
+		err = reset_policy(*a, tb[TCA_DEF_DATA], parm, tp, extack);
+		if (err)
+			goto release_idr;
 	}
 
 	if (ret == ACT_P_CREATED)
 		tcf_idr_insert(tn, *a);
 	return ret;
+put_chain:
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+release_idr:
+	tcf_idr_release(*a, bind);
+	return err;
 }
 
 static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
@@ -188,8 +208,7 @@
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index,
-			   struct netlink_ext_ack *extack)
+static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 
@@ -198,7 +217,7 @@
 
 static struct tc_action_ops act_simp_ops = {
 	.kind		=	"simple",
-	.type		=	TCA_ACT_SIMP,
+	.id		=	TCA_ID_SIMP,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_simp_act,
 	.dump		=	tcf_simp_dump,
@@ -213,7 +232,7 @@
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 
-	return tc_action_net_init(tn, &act_simp_ops);
+	return tc_action_net_init(net, tn, &act_simp_ops);
 }
 
 static void __net_exit simp_exit_net(struct list_head *net_list)

diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 73e44ce..6a8d333 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c

@@ -1,18 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008, Intel Corporation.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
  * Author: Alexander Duyck <alexander.h.duyck@intel.com>
  */
 
@@ -26,6 +15,7 @@
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/dsfield.h>
+#include <net/pkt_cls.h>
 
 #include <linux/tc_act/tc_skbedit.h>
 #include <net/tc_act/tc_skbedit.h>
@@ -96,22 +86,26 @@
 static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 			    struct nlattr *est, struct tc_action **a,
 			    int ovr, int bind, bool rtnl_held,
+			    struct tcf_proto *tp,
 			    struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
-	struct tcf_skbedit_params *params_old, *params_new;
+	struct tcf_skbedit_params *params_new;
 	struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
+	struct tcf_chain *goto_ch = NULL;
 	struct tc_skbedit *parm;
 	struct tcf_skbedit *d;
 	u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL;
 	u16 *queue_mapping = NULL, *ptype = NULL;
 	bool exists = false;
 	int ret = 0, err;
+	u32 index;
 
 	if (nla == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_SKBEDIT_MAX, nla,
+					  skbedit_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -153,8 +147,8 @@
 	}
 
 	parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
-
-	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	index = parm->index;
+	err = tcf_idr_check_alloc(tn, &index, a, bind);
 	if (err < 0)
 		return err;
 	exists = err;
@@ -165,15 +159,15 @@
 		if (exists)
 			tcf_idr_release(*a, bind);
 		else
-			tcf_idr_cleanup(tn, parm->index);
+			tcf_idr_cleanup(tn, index);
 		return -EINVAL;
 	}
 
 	if (!exists) {
-		ret = tcf_idr_create(tn, parm->index, est, a,
+		ret = tcf_idr_create(tn, index, est, a,
 				     &act_skbedit_ops, bind, true);
 		if (ret) {
-			tcf_idr_cleanup(tn, parm->index);
+			tcf_idr_cleanup(tn, index);
 			return ret;
 		}
 
@@ -186,14 +180,14 @@
 			return -EEXIST;
 		}
 	}
-
-	ASSERT_RTNL();
+	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+	if (err < 0)
+		goto release_idr;
 
 	params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
 	if (unlikely(!params_new)) {
-		if (ret == ACT_P_CREATED)
-			tcf_idr_release(*a, bind);
-		return -ENOMEM;
+		err = -ENOMEM;
+		goto put_chain;
 	}
 
 	params_new->flags = flags;
@@ -210,15 +204,25 @@
 	if (flags & SKBEDIT_F_MASK)
 		params_new->mask = *mask;
 
-	d->tcf_action = parm->action;
-	params_old = rtnl_dereference(d->params);
-	rcu_assign_pointer(d->params, params_new);
-	if (params_old)
-		kfree_rcu(params_old, rcu);
+	spin_lock_bh(&d->tcf_lock);
+	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+	rcu_swap_protected(d->params, params_new,
+			   lockdep_is_held(&d->tcf_lock));
+	spin_unlock_bh(&d->tcf_lock);
+	if (params_new)
+		kfree_rcu(params_new, rcu);
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
 
 	if (ret == ACT_P_CREATED)
 		tcf_idr_insert(tn, *a);
 	return ret;
+put_chain:
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+release_idr:
+	tcf_idr_release(*a, bind);
+	return err;
 }
 
 static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
@@ -231,12 +235,14 @@
 		.index   = d->tcf_index,
 		.refcnt  = refcount_read(&d->tcf_refcnt) - ref,
 		.bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
-		.action  = d->tcf_action,
 	};
 	u64 pure_flags = 0;
 	struct tcf_t t;
 
-	params = rtnl_dereference(d->params);
+	spin_lock_bh(&d->tcf_lock);
+	params = rcu_dereference_protected(d->params,
+					   lockdep_is_held(&d->tcf_lock));
+	opt.action = d->tcf_action;
 
 	if (nla_put(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
@@ -264,9 +270,12 @@
 	tcf_tm_dump(&t, &d->tcf_tm);
 	if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD))
 		goto nla_put_failure;
+	spin_unlock_bh(&d->tcf_lock);
+
 	return skb->len;
 
 nla_put_failure:
+	spin_unlock_bh(&d->tcf_lock);
 	nlmsg_trim(skb, b);
 	return -1;
 }
@@ -291,23 +300,34 @@
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index,
-			      struct netlink_ext_ack *extack)
+static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 
 	return tcf_idr_search(tn, a, index);
 }
 
+static size_t tcf_skbedit_get_fill_size(const struct tc_action *act)
+{
+	return nla_total_size(sizeof(struct tc_skbedit))
+		+ nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_PRIORITY */
+		+ nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING */
+		+ nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MARK */
+		+ nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_PTYPE */
+		+ nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MASK */
+		+ nla_total_size_64bit(sizeof(u64)); /* TCA_SKBEDIT_FLAGS */
+}
+
 static struct tc_action_ops act_skbedit_ops = {
 	.kind		=	"skbedit",
-	.type		=	TCA_ACT_SKBEDIT,
+	.id		=	TCA_ID_SKBEDIT,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_skbedit_act,
 	.dump		=	tcf_skbedit_dump,
 	.init		=	tcf_skbedit_init,
 	.cleanup	=	tcf_skbedit_cleanup,
 	.walk		=	tcf_skbedit_walker,
+	.get_fill_size	=	tcf_skbedit_get_fill_size,
 	.lookup		=	tcf_skbedit_search,
 	.size		=	sizeof(struct tcf_skbedit),
 };
@@ -316,7 +336,7 @@
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 
-	return tc_action_net_init(tn, &act_skbedit_ops);
+	return tc_action_net_init(net, tn, &act_skbedit_ops);
 }
 
 static void __net_exit skbedit_exit_net(struct list_head *net_list)

diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 588077f..888437f 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/act_skbmod.c  skb data modifier
  *
  * Copyright (c) 2016 Jamal Hadi Salim <jhs@mojatatu.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
 */
 
 #include <linux/module.h>
@@ -16,6 +12,7 @@
 #include <linux/rtnetlink.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
 
 #include <linux/tc_act/tc_skbmod.h>
 #include <net/tc_act/tc_skbmod.h>
@@ -82,24 +79,27 @@
 static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a,
 			   int ovr, int bind, bool rtnl_held,
+			   struct tcf_proto *tp,
 			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 	struct nlattr *tb[TCA_SKBMOD_MAX + 1];
 	struct tcf_skbmod_params *p, *p_old;
+	struct tcf_chain *goto_ch = NULL;
 	struct tc_skbmod *parm;
+	u32 lflags = 0, index;
 	struct tcf_skbmod *d;
 	bool exists = false;
 	u8 *daddr = NULL;
 	u8 *saddr = NULL;
 	u16 eth_type = 0;
-	u32 lflags = 0;
 	int ret = 0, err;
 
 	if (!nla)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_SKBMOD_MAX, nla,
+					  skbmod_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -122,10 +122,11 @@
 	}
 
 	parm = nla_data(tb[TCA_SKBMOD_PARMS]);
+	index = parm->index;
 	if (parm->flags & SKBMOD_F_SWAPMAC)
 		lflags = SKBMOD_F_SWAPMAC;
 
-	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &index, a, bind);
 	if (err < 0)
 		return err;
 	exists = err;
@@ -136,15 +137,15 @@
 		if (exists)
 			tcf_idr_release(*a, bind);
 		else
-			tcf_idr_cleanup(tn, parm->index);
+			tcf_idr_cleanup(tn, index);
 		return -EINVAL;
 	}
 
 	if (!exists) {
-		ret = tcf_idr_create(tn, parm->index, est, a,
+		ret = tcf_idr_create(tn, index, est, a,
 				     &act_skbmod_ops, bind, true);
 		if (ret) {
-			tcf_idr_cleanup(tn, parm->index);
+			tcf_idr_cleanup(tn, index);
 			return ret;
 		}
 
@@ -153,21 +154,24 @@
 		tcf_idr_release(*a, bind);
 		return -EEXIST;
 	}
+	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+	if (err < 0)
+		goto release_idr;
 
 	d = to_skbmod(*a);
 
 	p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
 	if (unlikely(!p)) {
-		tcf_idr_release(*a, bind);
-		return -ENOMEM;
+		err = -ENOMEM;
+		goto put_chain;
 	}
 
 	p->flags = lflags;
-	d->tcf_action = parm->action;
 
 	if (ovr)
 		spin_lock_bh(&d->tcf_lock);
 	/* Protected by tcf_lock if overwriting existing action. */
+	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
 	p_old = rcu_dereference_protected(d->skbmod_p, 1);
 
 	if (lflags & SKBMOD_F_DMAC)
@@ -183,10 +187,18 @@
 
 	if (p_old)
 		kfree_rcu(p_old, rcu);
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
 
 	if (ret == ACT_P_CREATED)
 		tcf_idr_insert(tn, *a);
 	return ret;
+put_chain:
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+release_idr:
+	tcf_idr_release(*a, bind);
+	return err;
 }
 
 static void tcf_skbmod_cleanup(struct tc_action *a)
@@ -251,8 +263,7 @@
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index,
-			     struct netlink_ext_ack *extack)
+static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 
@@ -261,7 +272,7 @@
 
 static struct tc_action_ops act_skbmod_ops = {
 	.kind		=	"skbmod",
-	.type		=	TCA_ACT_SKBMOD,
+	.id		=	TCA_ACT_SKBMOD,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_skbmod_act,
 	.dump		=	tcf_skbmod_dump,
@@ -276,7 +287,7 @@
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 
-	return tc_action_net_init(tn, &act_skbmod_ops);
+	return tc_action_net_init(net, tn, &act_skbmod_ops);
 }
 
 static void __net_exit skbmod_exit_net(struct list_head *net_list)

diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 681f6f0..d55669e 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (c) 2016, Amir Vadai <amir@vadai.me>
  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -17,6 +13,7 @@
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/dst.h>
+#include <net/pkt_cls.h>
 
 #include <linux/tc_act/tc_tunnel_key.h>
 #include <net/tc_act/tc_tunnel_key.h>
@@ -75,8 +72,9 @@
 	int err, data_len, opt_len;
 	u8 *data;
 
-	err = nla_parse_nested(tb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX,
-			       nla, geneve_opt_policy, extack);
+	err = nla_parse_nested_deprecated(tb,
+					  TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX,
+					  nla, geneve_opt_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -124,8 +122,8 @@
 	int err, rem, opt_len, len = nla_len(nla), opts_len = 0;
 	const struct nlattr *attr, *head = nla_data(nla);
 
-	err = nla_validate(head, len, TCA_TUNNEL_KEY_ENC_OPTS_MAX,
-			   enc_opts_policy, extack);
+	err = nla_validate_deprecated(head, len, TCA_TUNNEL_KEY_ENC_OPTS_MAX,
+				      enc_opts_policy, extack);
 	if (err)
 		return err;
 
@@ -137,6 +135,10 @@
 			if (opt_len < 0)
 				return opt_len;
 			opts_len += opt_len;
+			if (opts_len > IP_TUNNEL_OPTS_MAX) {
+				NL_SET_ERR_MSG(extack, "Tunnel options exceeds max size");
+				return -EINVAL;
+			}
 			if (dst) {
 				dst_len -= opt_len;
 				dst += opt_len;
@@ -197,24 +199,37 @@
 	[TCA_TUNNEL_KEY_ENC_TTL]      = { .type = NLA_U8 },
 };
 
+static void tunnel_key_release_params(struct tcf_tunnel_key_params *p)
+{
+	if (!p)
+		return;
+	if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
+		dst_release(&p->tcft_enc_metadata->dst);
+
+	kfree_rcu(p, rcu);
+}
+
 static int tunnel_key_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a,
 			   int ovr, int bind, bool rtnl_held,
+			   struct tcf_proto *tp,
 			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 	struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
 	struct tcf_tunnel_key_params *params_new;
 	struct metadata_dst *metadata = NULL;
+	struct tcf_chain *goto_ch = NULL;
 	struct tc_tunnel_key *parm;
 	struct tcf_tunnel_key *t;
 	bool exists = false;
 	__be16 dst_port = 0;
+	__be64 key_id = 0;
 	int opts_len = 0;
-	__be64 key_id;
-	__be16 flags;
+	__be16 flags = 0;
 	u8 tos, ttl;
 	int ret = 0;
+	u32 index;
 	int err;
 
 	if (!nla) {
@@ -222,8 +237,8 @@
 		return -EINVAL;
 	}
 
-	err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy,
-			       extack);
+	err = nla_parse_nested_deprecated(tb, TCA_TUNNEL_KEY_MAX, nla,
+					  tunnel_key_policy, extack);
 	if (err < 0) {
 		NL_SET_ERR_MSG(extack, "Failed to parse nested tunnel key attributes");
 		return err;
@@ -235,7 +250,8 @@
 	}
 
 	parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]);
-	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	index = parm->index;
+	err = tcf_idr_check_alloc(tn, &index, a, bind);
 	if (err < 0)
 		return err;
 	exists = err;
@@ -246,15 +262,15 @@
 	case TCA_TUNNEL_KEY_ACT_RELEASE:
 		break;
 	case TCA_TUNNEL_KEY_ACT_SET:
-		if (!tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) {
-			NL_SET_ERR_MSG(extack, "Missing tunnel key id");
-			ret = -EINVAL;
-			goto err_out;
+		if (tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) {
+			__be32 key32;
+
+			key32 = nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID]);
+			key_id = key32_to_tunnel_id(key32);
+			flags = TUNNEL_KEY;
 		}
 
-		key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID]));
-
-		flags = TUNNEL_KEY | TUNNEL_CSUM;
+		flags |= TUNNEL_CSUM;
 		if (tb[TCA_TUNNEL_KEY_NO_CSUM] &&
 		    nla_get_u8(tb[TCA_TUNNEL_KEY_NO_CSUM]))
 			flags &= ~TUNNEL_CSUM;
@@ -312,6 +328,12 @@
 			goto err_out;
 		}
 
+#ifdef CONFIG_DST_CACHE
+		ret = dst_cache_init(&metadata->u.tun_info.dst_cache, GFP_KERNEL);
+		if (ret)
+			goto release_tun_meta;
+#endif
+
 		if (opts_len) {
 			ret = tunnel_key_opts_set(tb[TCA_TUNNEL_KEY_ENC_OPTS],
 						  &metadata->u.tun_info,
@@ -329,7 +351,7 @@
 	}
 
 	if (!exists) {
-		ret = tcf_idr_create(tn, parm->index, est, a,
+		ret = tcf_idr_create(tn, index, est, a,
 				     &act_tunnel_key_ops, bind, true);
 		if (ret) {
 			NL_SET_ERR_MSG(extack, "Cannot create TC IDR");
@@ -343,6 +365,12 @@
 		goto release_tun_meta;
 	}
 
+	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+	if (err < 0) {
+		ret = err;
+		exists = true;
+		goto release_tun_meta;
+	}
 	t = to_tunnel_key(*a);
 
 	params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
@@ -350,32 +378,38 @@
 		NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters");
 		ret = -ENOMEM;
 		exists = true;
-		goto release_tun_meta;
+		goto put_chain;
 	}
 	params_new->tcft_action = parm->t_action;
 	params_new->tcft_enc_metadata = metadata;
 
 	spin_lock_bh(&t->tcf_lock);
-	t->tcf_action = parm->action;
+	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
 	rcu_swap_protected(t->params, params_new,
 			   lockdep_is_held(&t->tcf_lock));
 	spin_unlock_bh(&t->tcf_lock);
-	if (params_new)
-		kfree_rcu(params_new, rcu);
+	tunnel_key_release_params(params_new);
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
 
 	if (ret == ACT_P_CREATED)
 		tcf_idr_insert(tn, *a);
 
 	return ret;
 
+put_chain:
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+
 release_tun_meta:
-	dst_release(&metadata->dst);
+	if (metadata)
+		dst_release(&metadata->dst);
 
 err_out:
 	if (exists)
 		tcf_idr_release(*a, bind);
 	else
-		tcf_idr_cleanup(tn, parm->index);
+		tcf_idr_cleanup(tn, index);
 	return ret;
 }
 
@@ -385,12 +419,7 @@
 	struct tcf_tunnel_key_params *params;
 
 	params = rcu_dereference_protected(t->params, 1);
-	if (params) {
-		if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
-			dst_release(&params->tcft_enc_metadata->dst);
-
-		kfree_rcu(params, rcu);
-	}
+	tunnel_key_release_params(params);
 }
 
 static int tunnel_key_geneve_opts_dump(struct sk_buff *skb,
@@ -400,7 +429,7 @@
 	u8 *src = (u8 *)(info + 1);
 	struct nlattr *start;
 
-	start = nla_nest_start(skb, TCA_TUNNEL_KEY_ENC_OPTS_GENEVE);
+	start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_GENEVE);
 	if (!start)
 		return -EMSGSIZE;
 
@@ -434,7 +463,7 @@
 	if (!info->options_len)
 		return 0;
 
-	start = nla_nest_start(skb, TCA_TUNNEL_KEY_ENC_OPTS);
+	start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS);
 	if (!start)
 		return -EMSGSIZE;
 
@@ -508,10 +537,13 @@
 		struct ip_tunnel_key *key = &info->key;
 		__be32 key_id = tunnel_id_to_key32(key->tun_id);
 
-		if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) ||
+		if (((key->tun_flags & TUNNEL_KEY) &&
+		     nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id)) ||
 		    tunnel_key_dump_addresses(skb,
 					      &params->tcft_enc_metadata->u.tun_info) ||
-		    nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT, key->tp_dst) ||
+		    (key->tp_dst &&
+		      nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT,
+				   key->tp_dst)) ||
 		    nla_put_u8(skb, TCA_TUNNEL_KEY_NO_CSUM,
 			       !(key->tun_flags & TUNNEL_CSUM)) ||
 		    tunnel_key_opts_dump(skb, info))
@@ -548,8 +580,7 @@
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index,
-			     struct netlink_ext_ack *extack)
+static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 
@@ -558,7 +589,7 @@
 
 static struct tc_action_ops act_tunnel_key_ops = {
 	.kind		=	"tunnel_key",
-	.type		=	TCA_ACT_TUNNEL_KEY,
+	.id		=	TCA_ID_TUNNEL_KEY,
 	.owner		=	THIS_MODULE,
 	.act		=	tunnel_key_act,
 	.dump		=	tunnel_key_dump,
@@ -573,7 +604,7 @@
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 
-	return tc_action_net_init(tn, &act_tunnel_key_ops);
+	return tc_action_net_init(net, tn, &act_tunnel_key_ops);
 }
 
 static void __net_exit tunnel_key_exit_net(struct list_head *net_list)

diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 033d273..08aaf71 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -15,6 +11,7 @@
 #include <linux/if_vlan.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
 
 #include <linux/tc_act/tc_vlan.h>
 #include <net/tc_act/tc_vlan.h>
@@ -63,7 +60,7 @@
 		/* extract existing tag (and guarantee no hw-accel tag) */
 		if (skb_vlan_tag_present(skb)) {
 			tci = skb_vlan_tag_get(skb);
-			skb->vlan_tci = 0;
+			__vlan_hwaccel_clear_tag(skb);
 		} else {
 			/* in-payload vlan tag, pop it */
 			err = __skb_vlan_pop(skb, &tci);
@@ -105,10 +102,11 @@
 static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
 			 int ovr, int bind, bool rtnl_held,
-			 struct netlink_ext_ack *extack)
+			 struct tcf_proto *tp, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 	struct nlattr *tb[TCA_VLAN_MAX + 1];
+	struct tcf_chain *goto_ch = NULL;
 	struct tcf_vlan_params *p;
 	struct tc_vlan *parm;
 	struct tcf_vlan *v;
@@ -118,18 +116,21 @@
 	u8 push_prio = 0;
 	bool exists = false;
 	int ret = 0, err;
+	u32 index;
 
 	if (!nla)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_VLAN_MAX, nla, vlan_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_VLAN_MAX, nla, vlan_policy,
+					  NULL);
 	if (err < 0)
 		return err;
 
 	if (!tb[TCA_VLAN_PARMS])
 		return -EINVAL;
 	parm = nla_data(tb[TCA_VLAN_PARMS]);
-	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	index = parm->index;
+	err = tcf_idr_check_alloc(tn, &index, a, bind);
 	if (err < 0)
 		return err;
 	exists = err;
@@ -145,7 +146,7 @@
 			if (exists)
 				tcf_idr_release(*a, bind);
 			else
-				tcf_idr_cleanup(tn, parm->index);
+				tcf_idr_cleanup(tn, index);
 			return -EINVAL;
 		}
 		push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]);
@@ -153,7 +154,7 @@
 			if (exists)
 				tcf_idr_release(*a, bind);
 			else
-				tcf_idr_cleanup(tn, parm->index);
+				tcf_idr_cleanup(tn, index);
 			return -ERANGE;
 		}
 
@@ -167,7 +168,7 @@
 				if (exists)
 					tcf_idr_release(*a, bind);
 				else
-					tcf_idr_cleanup(tn, parm->index);
+					tcf_idr_cleanup(tn, index);
 				return -EPROTONOSUPPORT;
 			}
 		} else {
@@ -181,16 +182,16 @@
 		if (exists)
 			tcf_idr_release(*a, bind);
 		else
-			tcf_idr_cleanup(tn, parm->index);
+			tcf_idr_cleanup(tn, index);
 		return -EINVAL;
 	}
 	action = parm->v_action;
 
 	if (!exists) {
-		ret = tcf_idr_create(tn, parm->index, est, a,
+		ret = tcf_idr_create(tn, index, est, a,
 				     &act_vlan_ops, bind, true);
 		if (ret) {
-			tcf_idr_cleanup(tn, parm->index);
+			tcf_idr_cleanup(tn, index);
 			return ret;
 		}
 
@@ -200,12 +201,16 @@
 		return -EEXIST;
 	}
 
+	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+	if (err < 0)
+		goto release_idr;
+
 	v = to_vlan(*a);
 
 	p = kzalloc(sizeof(*p), GFP_KERNEL);
 	if (!p) {
-		tcf_idr_release(*a, bind);
-		return -ENOMEM;
+		err = -ENOMEM;
+		goto put_chain;
 	}
 
 	p->tcfv_action = action;
@@ -214,16 +219,24 @@
 	p->tcfv_push_proto = push_proto;
 
 	spin_lock_bh(&v->tcf_lock);
-	v->tcf_action = parm->action;
+	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
 	rcu_swap_protected(v->vlan_p, p, lockdep_is_held(&v->tcf_lock));
 	spin_unlock_bh(&v->tcf_lock);
 
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
 	if (p)
 		kfree_rcu(p, rcu);
 
 	if (ret == ACT_P_CREATED)
 		tcf_idr_insert(tn, *a);
 	return ret;
+put_chain:
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+release_idr:
+	tcf_idr_release(*a, bind);
+	return err;
 }
 
 static void tcf_vlan_cleanup(struct tc_action *a)
@@ -288,23 +301,45 @@
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index,
-			   struct netlink_ext_ack *extack)
+static void tcf_vlan_stats_update(struct tc_action *a, u64 bytes, u32 packets,
+				  u64 lastuse, bool hw)
+{
+	struct tcf_vlan *v = to_vlan(a);
+	struct tcf_t *tm = &v->tcf_tm;
+
+	_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+	if (hw)
+		_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
+				   bytes, packets);
+	tm->lastuse = max_t(u64, tm->lastuse, lastuse);
+}
+
+static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 
 	return tcf_idr_search(tn, a, index);
 }
 
+static size_t tcf_vlan_get_fill_size(const struct tc_action *act)
+{
+	return nla_total_size(sizeof(struct tc_vlan))
+		+ nla_total_size(sizeof(u16)) /* TCA_VLAN_PUSH_VLAN_ID */
+		+ nla_total_size(sizeof(u16)) /* TCA_VLAN_PUSH_VLAN_PROTOCOL */
+		+ nla_total_size(sizeof(u8)); /* TCA_VLAN_PUSH_VLAN_PRIORITY */
+}
+
 static struct tc_action_ops act_vlan_ops = {
 	.kind		=	"vlan",
-	.type		=	TCA_ACT_VLAN,
+	.id		=	TCA_ID_VLAN,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_vlan_act,
 	.dump		=	tcf_vlan_dump,
 	.init		=	tcf_vlan_init,
 	.cleanup	=	tcf_vlan_cleanup,
 	.walk		=	tcf_vlan_walker,
+	.stats_update	=	tcf_vlan_stats_update,
+	.get_fill_size	=	tcf_vlan_get_fill_size,
 	.lookup		=	tcf_vlan_search,
 	.size		=	sizeof(struct tcf_vlan),
 };
@@ -313,7 +348,7 @@
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 
-	return tc_action_net_init(tn, &act_vlan_ops);
+	return tc_action_net_init(net, tn, &act_vlan_ops);
 }
 
 static void __net_exit vlan_exit_net(struct list_head *net_list)

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 70f144a..20d60b8 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c

@@ -1,17 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/cls_api.c	Packet classifier API.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  *
  * Changes:
  *
  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
- *
  */
 
 #include <linux/module.h>
@@ -25,11 +20,25 @@
 #include <linux/kmod.h>
 #include <linux/slab.h>
 #include <linux/idr.h>
+#include <linux/rhashtable.h>
+#include <linux/jhash.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
+#include <net/tc_act/tc_pedit.h>
+#include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_vlan.h>
+#include <net/tc_act/tc_tunnel_key.h>
+#include <net/tc_act/tc_csum.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_police.h>
+#include <net/tc_act/tc_sample.h>
+#include <net/tc_act/tc_skbedit.h>
+#include <net/tc_act/tc_ct.h>
+#include <net/tc_act/tc_mpls.h>
+#include <net/flow_offload.h>
 
 extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
 
@@ -39,6 +48,62 @@
 /* Protects list of registered TC modules. It is pure SMP lock. */
 static DEFINE_RWLOCK(cls_mod_lock);
 
+static u32 destroy_obj_hashfn(const struct tcf_proto *tp)
+{
+	return jhash_3words(tp->chain->index, tp->prio,
+			    (__force __u32)tp->protocol, 0);
+}
+
+static void tcf_proto_signal_destroying(struct tcf_chain *chain,
+					struct tcf_proto *tp)
+{
+	struct tcf_block *block = chain->block;
+
+	mutex_lock(&block->proto_destroy_lock);
+	hash_add_rcu(block->proto_destroy_ht, &tp->destroy_ht_node,
+		     destroy_obj_hashfn(tp));
+	mutex_unlock(&block->proto_destroy_lock);
+}
+
+static bool tcf_proto_cmp(const struct tcf_proto *tp1,
+			  const struct tcf_proto *tp2)
+{
+	return tp1->chain->index == tp2->chain->index &&
+	       tp1->prio == tp2->prio &&
+	       tp1->protocol == tp2->protocol;
+}
+
+static bool tcf_proto_exists_destroying(struct tcf_chain *chain,
+					struct tcf_proto *tp)
+{
+	u32 hash = destroy_obj_hashfn(tp);
+	struct tcf_proto *iter;
+	bool found = false;
+
+	rcu_read_lock();
+	hash_for_each_possible_rcu(chain->block->proto_destroy_ht, iter,
+				   destroy_ht_node, hash) {
+		if (tcf_proto_cmp(tp, iter)) {
+			found = true;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return found;
+}
+
+static void
+tcf_proto_signal_destroyed(struct tcf_chain *chain, struct tcf_proto *tp)
+{
+	struct tcf_block *block = chain->block;
+
+	mutex_lock(&block->proto_destroy_lock);
+	if (hash_hashed(&tp->destroy_ht_node))
+		hash_del_rcu(&tp->destroy_ht_node);
+	mutex_unlock(&block->proto_destroy_lock);
+}
+
 /* Find classifier type by string name */
 
 static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
@@ -60,7 +125,8 @@
 }
 
 static const struct tcf_proto_ops *
-tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack)
+tcf_proto_lookup_ops(const char *kind, bool rtnl_held,
+		     struct netlink_ext_ack *extack)
 {
 	const struct tcf_proto_ops *ops;
 
@@ -68,9 +134,11 @@
 	if (ops)
 		return ops;
 #ifdef CONFIG_MODULES
-	rtnl_unlock();
+	if (rtnl_held)
+		rtnl_unlock();
 	request_module("cls_%s", kind);
-	rtnl_lock();
+	if (rtnl_held)
+		rtnl_lock();
 	ops = __tcf_proto_lookup_ops(kind);
 	/* We dropped the RTNL semaphore in order to perform
 	 * the module load. So, even if we succeeded in loading
@@ -151,8 +219,37 @@
 	return TC_H_MAJ(first);
 }
 
+static bool tcf_proto_check_kind(struct nlattr *kind, char *name)
+{
+	if (kind)
+		return nla_strlcpy(name, kind, IFNAMSIZ) >= IFNAMSIZ;
+	memset(name, 0, IFNAMSIZ);
+	return false;
+}
+
+static bool tcf_proto_is_unlocked(const char *kind)
+{
+	const struct tcf_proto_ops *ops;
+	bool ret;
+
+	if (strlen(kind) == 0)
+		return false;
+
+	ops = tcf_proto_lookup_ops(kind, false, NULL);
+	/* On error return false to take rtnl lock. Proto lookup/create
+	 * functions will perform lookup again and properly handle errors.
+	 */
+	if (IS_ERR(ops))
+		return false;
+
+	ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED);
+	module_put(ops->owner);
+	return ret;
+}
+
 static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
 					  u32 prio, struct tcf_chain *chain,
+					  bool rtnl_held,
 					  struct netlink_ext_ack *extack)
 {
 	struct tcf_proto *tp;
@@ -162,7 +259,7 @@
 	if (!tp)
 		return ERR_PTR(-ENOBUFS);
 
-	tp->ops = tcf_proto_lookup_ops(kind, extack);
+	tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack);
 	if (IS_ERR(tp->ops)) {
 		err = PTR_ERR(tp->ops);
 		goto errout;
@@ -171,6 +268,8 @@
 	tp->protocol = protocol;
 	tp->prio = prio;
 	tp->chain = chain;
+	spin_lock_init(&tp->lock);
+	refcount_set(&tp->refcnt, 1);
 
 	err = tp->ops->init(tp);
 	if (err) {
@@ -184,14 +283,82 @@
 	return ERR_PTR(err);
 }
 
-static void tcf_proto_destroy(struct tcf_proto *tp,
-			      struct netlink_ext_ack *extack)
+static void tcf_proto_get(struct tcf_proto *tp)
 {
-	tp->ops->destroy(tp, extack);
+	refcount_inc(&tp->refcnt);
+}
+
+static void tcf_chain_put(struct tcf_chain *chain);
+
+static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
+			      bool sig_destroy, struct netlink_ext_ack *extack)
+{
+	tp->ops->destroy(tp, rtnl_held, extack);
+	if (sig_destroy)
+		tcf_proto_signal_destroyed(tp->chain, tp);
+	tcf_chain_put(tp->chain);
 	module_put(tp->ops->owner);
 	kfree_rcu(tp, rcu);
 }
 
+static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
+			  struct netlink_ext_ack *extack)
+{
+	if (refcount_dec_and_test(&tp->refcnt))
+		tcf_proto_destroy(tp, rtnl_held, true, extack);
+}
+
+static int walker_check_empty(struct tcf_proto *tp, void *fh,
+			      struct tcf_walker *arg)
+{
+	if (fh) {
+		arg->nonempty = true;
+		return -1;
+	}
+	return 0;
+}
+
+static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held)
+{
+	struct tcf_walker walker = { .fn = walker_check_empty, };
+
+	if (tp->ops->walk) {
+		tp->ops->walk(tp, &walker, rtnl_held);
+		return !walker.nonempty;
+	}
+	return true;
+}
+
+static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held)
+{
+	spin_lock(&tp->lock);
+	if (tcf_proto_is_empty(tp, rtnl_held))
+		tp->deleting = true;
+	spin_unlock(&tp->lock);
+	return tp->deleting;
+}
+
+static void tcf_proto_mark_delete(struct tcf_proto *tp)
+{
+	spin_lock(&tp->lock);
+	tp->deleting = true;
+	spin_unlock(&tp->lock);
+}
+
+static bool tcf_proto_is_deleting(struct tcf_proto *tp)
+{
+	bool deleting;
+
+	spin_lock(&tp->lock);
+	deleting = tp->deleting;
+	spin_unlock(&tp->lock);
+
+	return deleting;
+}
+
+#define ASSERT_BLOCK_LOCKED(block)					\
+	lockdep_assert_held(&(block)->lock)
+
 struct tcf_filter_chain_list_item {
 	struct list_head list;
 	tcf_chain_head_change_t *chain_head_change;
@@ -203,10 +370,13 @@
 {
 	struct tcf_chain *chain;
 
+	ASSERT_BLOCK_LOCKED(block);
+
 	chain = kzalloc(sizeof(*chain), GFP_KERNEL);
 	if (!chain)
 		return NULL;
 	list_add_tail(&chain->list, &block->chain_list);
+	mutex_init(&chain->filter_chain_lock);
 	chain->block = block;
 	chain->index = chain_index;
 	chain->refcnt = 1;
@@ -230,29 +400,60 @@
 
 	if (chain->index)
 		return;
+
+	mutex_lock(&block->lock);
 	list_for_each_entry(item, &block->chain0.filter_chain_list, list)
 		tcf_chain_head_change_item(item, tp_head);
+	mutex_unlock(&block->lock);
 }
 
-static void tcf_chain_destroy(struct tcf_chain *chain)
+/* Returns true if block can be safely freed. */
+
+static bool tcf_chain_detach(struct tcf_chain *chain)
 {
 	struct tcf_block *block = chain->block;
 
+	ASSERT_BLOCK_LOCKED(block);
+
 	list_del(&chain->list);
 	if (!chain->index)
 		block->chain0.chain = NULL;
-	kfree(chain);
-	if (list_empty(&block->chain_list) && block->refcnt == 0)
-		kfree(block);
+
+	if (list_empty(&block->chain_list) &&
+	    refcount_read(&block->refcnt) == 0)
+		return true;
+
+	return false;
+}
+
+static void tcf_block_destroy(struct tcf_block *block)
+{
+	mutex_destroy(&block->lock);
+	mutex_destroy(&block->proto_destroy_lock);
+	kfree_rcu(block, rcu);
+}
+
+static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
+{
+	struct tcf_block *block = chain->block;
+
+	mutex_destroy(&chain->filter_chain_lock);
+	kfree_rcu(chain, rcu);
+	if (free_block)
+		tcf_block_destroy(block);
 }
 
 static void tcf_chain_hold(struct tcf_chain *chain)
 {
+	ASSERT_BLOCK_LOCKED(chain->block);
+
 	++chain->refcnt;
 }
 
 static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
 {
+	ASSERT_BLOCK_LOCKED(chain->block);
+
 	/* In case all the references are action references, this
 	 * chain should not be shown to the user.
 	 */
@@ -264,6 +465,8 @@
 {
 	struct tcf_chain *chain;
 
+	ASSERT_BLOCK_LOCKED(block);
+
 	list_for_each_entry(chain, &block->chain_list, list) {
 		if (chain->index == chain_index)
 			return chain;
@@ -278,31 +481,40 @@
 					 u32 chain_index, bool create,
 					 bool by_act)
 {
-	struct tcf_chain *chain = tcf_chain_lookup(block, chain_index);
+	struct tcf_chain *chain = NULL;
+	bool is_first_reference;
 
+	mutex_lock(&block->lock);
+	chain = tcf_chain_lookup(block, chain_index);
 	if (chain) {
 		tcf_chain_hold(chain);
 	} else {
 		if (!create)
-			return NULL;
+			goto errout;
 		chain = tcf_chain_create(block, chain_index);
 		if (!chain)
-			return NULL;
+			goto errout;
 	}
 
 	if (by_act)
 		++chain->action_refcnt;
+	is_first_reference = chain->refcnt - chain->action_refcnt == 1;
+	mutex_unlock(&block->lock);
 
 	/* Send notification only in case we got the first
 	 * non-action reference. Until then, the chain acts only as
 	 * a placeholder for actions pointing to it and user ought
 	 * not know about them.
 	 */
-	if (chain->refcnt - chain->action_refcnt == 1 && !by_act)
+	if (is_first_reference && !by_act)
 		tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
 				RTM_NEWCHAIN, false);
 
 	return chain;
+
+errout:
+	mutex_unlock(&block->lock);
+	return chain;
 }
 
 static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
@@ -317,72 +529,208 @@
 }
 EXPORT_SYMBOL(tcf_chain_get_by_act);
 
-static void tc_chain_tmplt_del(struct tcf_chain *chain);
+static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
+			       void *tmplt_priv);
+static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
+				  void *tmplt_priv, u32 chain_index,
+				  struct tcf_block *block, struct sk_buff *oskb,
+				  u32 seq, u16 flags, bool unicast);
 
-static void __tcf_chain_put(struct tcf_chain *chain, bool by_act)
+static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
+			    bool explicitly_created)
 {
+	struct tcf_block *block = chain->block;
+	const struct tcf_proto_ops *tmplt_ops;
+	bool free_block = false;
+	unsigned int refcnt;
+	void *tmplt_priv;
+
+	mutex_lock(&block->lock);
+	if (explicitly_created) {
+		if (!chain->explicitly_created) {
+			mutex_unlock(&block->lock);
+			return;
+		}
+		chain->explicitly_created = false;
+	}
+
 	if (by_act)
 		chain->action_refcnt--;
-	chain->refcnt--;
+
+	/* tc_chain_notify_delete can't be called while holding block lock.
+	 * However, when block is unlocked chain can be changed concurrently, so
+	 * save these to temporary variables.
+	 */
+	refcnt = --chain->refcnt;
+	tmplt_ops = chain->tmplt_ops;
+	tmplt_priv = chain->tmplt_priv;
 
 	/* The last dropped non-action reference will trigger notification. */
-	if (chain->refcnt - chain->action_refcnt == 0 && !by_act)
-		tc_chain_notify(chain, NULL, 0, 0, RTM_DELCHAIN, false);
+	if (refcnt - chain->action_refcnt == 0 && !by_act) {
+		tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index,
+				       block, NULL, 0, 0, false);
+		/* Last reference to chain, no need to lock. */
+		chain->flushing = false;
+	}
 
-	if (chain->refcnt == 0) {
-		tc_chain_tmplt_del(chain);
-		tcf_chain_destroy(chain);
+	if (refcnt == 0)
+		free_block = tcf_chain_detach(chain);
+	mutex_unlock(&block->lock);
+
+	if (refcnt == 0) {
+		tc_chain_tmplt_del(tmplt_ops, tmplt_priv);
+		tcf_chain_destroy(chain, free_block);
 	}
 }
 
 static void tcf_chain_put(struct tcf_chain *chain)
 {
-	__tcf_chain_put(chain, false);
+	__tcf_chain_put(chain, false, false);
 }
 
 void tcf_chain_put_by_act(struct tcf_chain *chain)
 {
-	__tcf_chain_put(chain, true);
+	__tcf_chain_put(chain, true, false);
 }
 EXPORT_SYMBOL(tcf_chain_put_by_act);
 
 static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
 {
-	if (chain->explicitly_created)
-		tcf_chain_put(chain);
+	__tcf_chain_put(chain, false, true);
 }
 
-static void tcf_chain_flush(struct tcf_chain *chain)
+static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
 {
-	struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
+	struct tcf_proto *tp, *tp_next;
 
-	tcf_chain0_head_change(chain, NULL);
+	mutex_lock(&chain->filter_chain_lock);
+	tp = tcf_chain_dereference(chain->filter_chain, chain);
 	while (tp) {
-		RCU_INIT_POINTER(chain->filter_chain, tp->next);
-		tcf_proto_destroy(tp, NULL);
-		tp = rtnl_dereference(chain->filter_chain);
-		tcf_chain_put(chain);
+		tp_next = rcu_dereference_protected(tp->next, 1);
+		tcf_proto_signal_destroying(chain, tp);
+		tp = tp_next;
 	}
+	tp = tcf_chain_dereference(chain->filter_chain, chain);
+	RCU_INIT_POINTER(chain->filter_chain, NULL);
+	tcf_chain0_head_change(chain, NULL);
+	chain->flushing = true;
+	mutex_unlock(&chain->filter_chain_lock);
+
+	while (tp) {
+		tp_next = rcu_dereference_protected(tp->next, 1);
+		tcf_proto_put(tp, rtnl_held, NULL);
+		tp = tp_next;
+	}
+}
+
+static int tcf_block_setup(struct tcf_block *block,
+			   struct flow_block_offload *bo);
+
+static void tc_indr_block_ing_cmd(struct net_device *dev,
+				  struct tcf_block *block,
+				  flow_indr_block_bind_cb_t *cb,
+				  void *cb_priv,
+				  enum flow_block_command command)
+{
+	struct flow_block_offload bo = {
+		.command	= command,
+		.binder_type	= FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
+		.net		= dev_net(dev),
+		.block_shared	= tcf_block_non_null_shared(block),
+	};
+	INIT_LIST_HEAD(&bo.cb_list);
+
+	if (!block)
+		return;
+
+	bo.block = &block->flow_block;
+
+	down_write(&block->cb_lock);
+	cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
+
+	tcf_block_setup(block, &bo);
+	up_write(&block->cb_lock);
+}
+
+static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
+{
+	const struct Qdisc_class_ops *cops;
+	struct Qdisc *qdisc;
+
+	if (!dev_ingress_queue(dev))
+		return NULL;
+
+	qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
+	if (!qdisc)
+		return NULL;
+
+	cops = qdisc->ops->cl_ops;
+	if (!cops)
+		return NULL;
+
+	if (!cops->tcf_block)
+		return NULL;
+
+	return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
+}
+
+static void tc_indr_block_get_and_ing_cmd(struct net_device *dev,
+					  flow_indr_block_bind_cb_t *cb,
+					  void *cb_priv,
+					  enum flow_block_command command)
+{
+	struct tcf_block *block = tc_dev_ingress_block(dev);
+
+	tc_indr_block_ing_cmd(dev, block, cb, cb_priv, command);
+}
+
+static void tc_indr_block_call(struct tcf_block *block,
+			       struct net_device *dev,
+			       struct tcf_block_ext_info *ei,
+			       enum flow_block_command command,
+			       struct netlink_ext_ack *extack)
+{
+	struct flow_block_offload bo = {
+		.command	= command,
+		.binder_type	= ei->binder_type,
+		.net		= dev_net(dev),
+		.block		= &block->flow_block,
+		.block_shared	= tcf_block_shared(block),
+		.extack		= extack,
+	};
+	INIT_LIST_HEAD(&bo.cb_list);
+
+	flow_indr_block_call(dev, &bo, command);
+	tcf_block_setup(block, &bo);
 }
 
 static bool tcf_block_offload_in_use(struct tcf_block *block)
 {
-	return block->offloadcnt;
+	return atomic_read(&block->offloadcnt);
 }
 
 static int tcf_block_offload_cmd(struct tcf_block *block,
 				 struct net_device *dev,
 				 struct tcf_block_ext_info *ei,
-				 enum tc_block_command command,
+				 enum flow_block_command command,
 				 struct netlink_ext_ack *extack)
 {
-	struct tc_block_offload bo = {};
+	struct flow_block_offload bo = {};
+	int err;
 
+	bo.net = dev_net(dev);
 	bo.command = command;
 	bo.binder_type = ei->binder_type;
-	bo.block = block;
+	bo.block = &block->flow_block;
+	bo.block_shared = tcf_block_shared(block);
 	bo.extack = extack;
-	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
+	INIT_LIST_HEAD(&bo.cb_list);
+
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
+	if (err < 0)
+		return err;
+
+	return tcf_block_setup(block, &bo);
 }
 
 static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
@@ -392,6 +740,7 @@
 	struct net_device *dev = q->dev_queue->dev;
 	int err;
 
+	down_write(&block->cb_lock);
 	if (!dev->netdev_ops->ndo_setup_tc)
 		goto no_offload_dev_inc;
 
@@ -400,19 +749,31 @@
 	 */
 	if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) {
 		NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
-		return -EOPNOTSUPP;
+		err = -EOPNOTSUPP;
+		goto err_unlock;
 	}
 
-	err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND, extack);
+	err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_BIND, extack);
 	if (err == -EOPNOTSUPP)
 		goto no_offload_dev_inc;
-	return err;
+	if (err)
+		goto err_unlock;
+
+	tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack);
+	up_write(&block->cb_lock);
+	return 0;
 
 no_offload_dev_inc:
-	if (tcf_block_offload_in_use(block))
-		return -EOPNOTSUPP;
+	if (tcf_block_offload_in_use(block)) {
+		err = -EOPNOTSUPP;
+		goto err_unlock;
+	}
+	err = 0;
 	block->nooffloaddevcnt++;
-	return 0;
+	tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack);
+err_unlock:
+	up_write(&block->cb_lock);
+	return err;
 }
 
 static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
@@ -421,15 +782,20 @@
 	struct net_device *dev = q->dev_queue->dev;
 	int err;
 
+	down_write(&block->cb_lock);
+	tc_indr_block_call(block, dev, ei, FLOW_BLOCK_UNBIND, NULL);
+
 	if (!dev->netdev_ops->ndo_setup_tc)
 		goto no_offload_dev_dec;
-	err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND, NULL);
+	err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_UNBIND, NULL);
 	if (err == -EOPNOTSUPP)
 		goto no_offload_dev_dec;
+	up_write(&block->cb_lock);
 	return;
 
 no_offload_dev_dec:
 	WARN_ON(block->nooffloaddevcnt-- == 0);
+	up_write(&block->cb_lock);
 }
 
 static int
@@ -437,8 +803,8 @@
 			      struct tcf_block_ext_info *ei,
 			      struct netlink_ext_ack *extack)
 {
-	struct tcf_chain *chain0 = block->chain0.chain;
 	struct tcf_filter_chain_list_item *item;
+	struct tcf_chain *chain0;
 
 	item = kmalloc(sizeof(*item), GFP_KERNEL);
 	if (!item) {
@@ -447,9 +813,32 @@
 	}
 	item->chain_head_change = ei->chain_head_change;
 	item->chain_head_change_priv = ei->chain_head_change_priv;
-	if (chain0 && chain0->filter_chain)
-		tcf_chain_head_change_item(item, chain0->filter_chain);
-	list_add(&item->list, &block->chain0.filter_chain_list);
+
+	mutex_lock(&block->lock);
+	chain0 = block->chain0.chain;
+	if (chain0)
+		tcf_chain_hold(chain0);
+	else
+		list_add(&item->list, &block->chain0.filter_chain_list);
+	mutex_unlock(&block->lock);
+
+	if (chain0) {
+		struct tcf_proto *tp_head;
+
+		mutex_lock(&chain0->filter_chain_lock);
+
+		tp_head = tcf_chain_dereference(chain0->filter_chain, chain0);
+		if (tp_head)
+			tcf_chain_head_change_item(item, tp_head);
+
+		mutex_lock(&block->lock);
+		list_add(&item->list, &block->chain0.filter_chain_list);
+		mutex_unlock(&block->lock);
+
+		mutex_unlock(&chain0->filter_chain_lock);
+		tcf_chain_put(chain0);
+	}
+
 	return 0;
 }
 
@@ -457,24 +846,28 @@
 tcf_chain0_head_change_cb_del(struct tcf_block *block,
 			      struct tcf_block_ext_info *ei)
 {
-	struct tcf_chain *chain0 = block->chain0.chain;
 	struct tcf_filter_chain_list_item *item;
 
+	mutex_lock(&block->lock);
 	list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
 		if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
 		    (item->chain_head_change == ei->chain_head_change &&
 		     item->chain_head_change_priv == ei->chain_head_change_priv)) {
-			if (chain0)
+			if (block->chain0.chain)
 				tcf_chain_head_change_item(item, NULL);
 			list_del(&item->list);
+			mutex_unlock(&block->lock);
+
 			kfree(item);
 			return;
 		}
 	}
+	mutex_unlock(&block->lock);
 	WARN_ON(1);
 }
 
 struct tcf_net {
+	spinlock_t idr_lock; /* Protects idr */
 	struct idr idr;
 };
 
@@ -484,16 +877,25 @@
 			    struct netlink_ext_ack *extack)
 {
 	struct tcf_net *tn = net_generic(net, tcf_net_id);
+	int err;
 
-	return idr_alloc_u32(&tn->idr, block, &block->index, block->index,
-			     GFP_KERNEL);
+	idr_preload(GFP_KERNEL);
+	spin_lock(&tn->idr_lock);
+	err = idr_alloc_u32(&tn->idr, block, &block->index, block->index,
+			    GFP_NOWAIT);
+	spin_unlock(&tn->idr_lock);
+	idr_preload_end();
+
+	return err;
 }
 
 static void tcf_block_remove(struct tcf_block *block, struct net *net)
 {
 	struct tcf_net *tn = net_generic(net, tcf_net_id);
 
+	spin_lock(&tn->idr_lock);
 	idr_remove(&tn->idr, block->index);
+	spin_unlock(&tn->idr_lock);
 }
 
 static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
@@ -507,12 +909,15 @@
 		NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
 		return ERR_PTR(-ENOMEM);
 	}
+	mutex_init(&block->lock);
+	mutex_init(&block->proto_destroy_lock);
+	init_rwsem(&block->cb_lock);
+	flow_block_init(&block->flow_block);
 	INIT_LIST_HEAD(&block->chain_list);
-	INIT_LIST_HEAD(&block->cb_list);
 	INIT_LIST_HEAD(&block->owner_list);
 	INIT_LIST_HEAD(&block->chain0.filter_chain_list);
 
-	block->refcnt = 1;
+	refcount_set(&block->refcnt, 1);
 	block->net = net;
 	block->index = block_index;
 
@@ -529,6 +934,301 @@
 	return idr_find(&tn->idr, block_index);
 }
 
+static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
+{
+	struct tcf_block *block;
+
+	rcu_read_lock();
+	block = tcf_block_lookup(net, block_index);
+	if (block && !refcount_inc_not_zero(&block->refcnt))
+		block = NULL;
+	rcu_read_unlock();
+
+	return block;
+}
+
+static struct tcf_chain *
+__tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
+{
+	mutex_lock(&block->lock);
+	if (chain)
+		chain = list_is_last(&chain->list, &block->chain_list) ?
+			NULL : list_next_entry(chain, list);
+	else
+		chain = list_first_entry_or_null(&block->chain_list,
+						 struct tcf_chain, list);
+
+	/* skip all action-only chains */
+	while (chain && tcf_chain_held_by_acts_only(chain))
+		chain = list_is_last(&chain->list, &block->chain_list) ?
+			NULL : list_next_entry(chain, list);
+
+	if (chain)
+		tcf_chain_hold(chain);
+	mutex_unlock(&block->lock);
+
+	return chain;
+}
+
+/* Function to be used by all clients that want to iterate over all chains on
+ * block. It properly obtains block->lock and takes reference to chain before
+ * returning it. Users of this function must be tolerant to concurrent chain
+ * insertion/deletion or ensure that no concurrent chain modification is
+ * possible. Note that all netlink dump callbacks cannot guarantee to provide
+ * consistent dump because rtnl lock is released each time skb is filled with
+ * data and sent to user-space.
+ */
+
+struct tcf_chain *
+tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
+{
+	struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain);
+
+	if (chain)
+		tcf_chain_put(chain);
+
+	return chain_next;
+}
+EXPORT_SYMBOL(tcf_get_next_chain);
+
+static struct tcf_proto *
+__tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
+{
+	u32 prio = 0;
+
+	ASSERT_RTNL();
+	mutex_lock(&chain->filter_chain_lock);
+
+	if (!tp) {
+		tp = tcf_chain_dereference(chain->filter_chain, chain);
+	} else if (tcf_proto_is_deleting(tp)) {
+		/* 'deleting' flag is set and chain->filter_chain_lock was
+		 * unlocked, which means next pointer could be invalid. Restart
+		 * search.
+		 */
+		prio = tp->prio + 1;
+		tp = tcf_chain_dereference(chain->filter_chain, chain);
+
+		for (; tp; tp = tcf_chain_dereference(tp->next, chain))
+			if (!tp->deleting && tp->prio >= prio)
+				break;
+	} else {
+		tp = tcf_chain_dereference(tp->next, chain);
+	}
+
+	if (tp)
+		tcf_proto_get(tp);
+
+	mutex_unlock(&chain->filter_chain_lock);
+
+	return tp;
+}
+
+/* Function to be used by all clients that want to iterate over all tp's on
+ * chain. Users of this function must be tolerant to concurrent tp
+ * insertion/deletion or ensure that no concurrent chain modification is
+ * possible. Note that all netlink dump callbacks cannot guarantee to provide
+ * consistent dump because rtnl lock is released each time skb is filled with
+ * data and sent to user-space.
+ */
+
+struct tcf_proto *
+tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp,
+		   bool rtnl_held)
+{
+	struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp);
+
+	if (tp)
+		tcf_proto_put(tp, rtnl_held, NULL);
+
+	return tp_next;
+}
+EXPORT_SYMBOL(tcf_get_next_proto);
+
+static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held)
+{
+	struct tcf_chain *chain;
+
+	/* Last reference to block. At this point chains cannot be added or
+	 * removed concurrently.
+	 */
+	for (chain = tcf_get_next_chain(block, NULL);
+	     chain;
+	     chain = tcf_get_next_chain(block, chain)) {
+		tcf_chain_put_explicitly_created(chain);
+		tcf_chain_flush(chain, rtnl_held);
+	}
+}
+
+/* Lookup Qdisc and increments its reference counter.
+ * Set parent, if necessary.
+ */
+
+static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
+			    u32 *parent, int ifindex, bool rtnl_held,
+			    struct netlink_ext_ack *extack)
+{
+	const struct Qdisc_class_ops *cops;
+	struct net_device *dev;
+	int err = 0;
+
+	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
+		return 0;
+
+	rcu_read_lock();
+
+	/* Find link */
+	dev = dev_get_by_index_rcu(net, ifindex);
+	if (!dev) {
+		rcu_read_unlock();
+		return -ENODEV;
+	}
+
+	/* Find qdisc */
+	if (!*parent) {
+		*q = dev->qdisc;
+		*parent = (*q)->handle;
+	} else {
+		*q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
+		if (!*q) {
+			NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
+			err = -EINVAL;
+			goto errout_rcu;
+		}
+	}
+
+	*q = qdisc_refcount_inc_nz(*q);
+	if (!*q) {
+		NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
+		err = -EINVAL;
+		goto errout_rcu;
+	}
+
+	/* Is it classful? */
+	cops = (*q)->ops->cl_ops;
+	if (!cops) {
+		NL_SET_ERR_MSG(extack, "Qdisc not classful");
+		err = -EINVAL;
+		goto errout_qdisc;
+	}
+
+	if (!cops->tcf_block) {
+		NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
+		err = -EOPNOTSUPP;
+		goto errout_qdisc;
+	}
+
+errout_rcu:
+	/* At this point we know that qdisc is not noop_qdisc,
+	 * which means that qdisc holds a reference to net_device
+	 * and we hold a reference to qdisc, so it is safe to release
+	 * rcu read lock.
+	 */
+	rcu_read_unlock();
+	return err;
+
+errout_qdisc:
+	rcu_read_unlock();
+
+	if (rtnl_held)
+		qdisc_put(*q);
+	else
+		qdisc_put_unlocked(*q);
+	*q = NULL;
+
+	return err;
+}
+
+static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl,
+			       int ifindex, struct netlink_ext_ack *extack)
+{
+	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
+		return 0;
+
+	/* Do we search for filter, attached to class? */
+	if (TC_H_MIN(parent)) {
+		const struct Qdisc_class_ops *cops = q->ops->cl_ops;
+
+		*cl = cops->find(q, parent);
+		if (*cl == 0) {
+			NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
+			return -ENOENT;
+		}
+	}
+
+	return 0;
+}
+
+static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q,
+					  unsigned long cl, int ifindex,
+					  u32 block_index,
+					  struct netlink_ext_ack *extack)
+{
+	struct tcf_block *block;
+
+	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
+		block = tcf_block_refcnt_get(net, block_index);
+		if (!block) {
+			NL_SET_ERR_MSG(extack, "Block of given index was not found");
+			return ERR_PTR(-EINVAL);
+		}
+	} else {
+		const struct Qdisc_class_ops *cops = q->ops->cl_ops;
+
+		block = cops->tcf_block(q, cl, extack);
+		if (!block)
+			return ERR_PTR(-EINVAL);
+
+		if (tcf_block_shared(block)) {
+			NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
+			return ERR_PTR(-EOPNOTSUPP);
+		}
+
+		/* Always take reference to block in order to support execution
+		 * of rules update path of cls API without rtnl lock. Caller
+		 * must release block when it is finished using it. 'if' block
+		 * of this conditional obtain reference to block by calling
+		 * tcf_block_refcnt_get().
+		 */
+		refcount_inc(&block->refcnt);
+	}
+
+	return block;
+}
+
+static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
+			    struct tcf_block_ext_info *ei, bool rtnl_held)
+{
+	if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) {
+		/* Flushing/putting all chains will cause the block to be
+		 * deallocated when last chain is freed. However, if chain_list
+		 * is empty, block has to be manually deallocated. After block
+		 * reference counter reached 0, it is no longer possible to
+		 * increment it or add new chains to block.
+		 */
+		bool free_block = list_empty(&block->chain_list);
+
+		mutex_unlock(&block->lock);
+		if (tcf_block_shared(block))
+			tcf_block_remove(block, block->net);
+
+		if (q)
+			tcf_block_offload_unbind(block, q, ei);
+
+		if (free_block)
+			tcf_block_destroy(block);
+		else
+			tcf_block_flush_all_chains(block, rtnl_held);
+	} else if (q) {
+		tcf_block_offload_unbind(block, q, ei);
+	}
+}
+
+static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held)
+{
+	__tcf_block_put(block, NULL, NULL, rtnl_held);
+}
+
 /* Find tcf block.
  * Set q, parent, cl when appropriate.
  */
@@ -539,82 +1239,62 @@
 					struct netlink_ext_ack *extack)
 {
 	struct tcf_block *block;
+	int err = 0;
 
-	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
-		block = tcf_block_lookup(net, block_index);
-		if (!block) {
-			NL_SET_ERR_MSG(extack, "Block of given index was not found");
-			return ERR_PTR(-EINVAL);
-		}
-	} else {
-		const struct Qdisc_class_ops *cops;
-		struct net_device *dev;
+	ASSERT_RTNL();
 
-		/* Find link */
-		dev = __dev_get_by_index(net, ifindex);
-		if (!dev)
-			return ERR_PTR(-ENODEV);
+	err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack);
+	if (err)
+		goto errout;
 
-		/* Find qdisc */
-		if (!*parent) {
-			*q = dev->qdisc;
-			*parent = (*q)->handle;
-		} else {
-			*q = qdisc_lookup(dev, TC_H_MAJ(*parent));
-			if (!*q) {
-				NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
-				return ERR_PTR(-EINVAL);
-			}
-		}
+	err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack);
+	if (err)
+		goto errout_qdisc;
 
-		/* Is it classful? */
-		cops = (*q)->ops->cl_ops;
-		if (!cops) {
-			NL_SET_ERR_MSG(extack, "Qdisc not classful");
-			return ERR_PTR(-EINVAL);
-		}
-
-		if (!cops->tcf_block) {
-			NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
-			return ERR_PTR(-EOPNOTSUPP);
-		}
-
-		/* Do we search for filter, attached to class? */
-		if (TC_H_MIN(*parent)) {
-			*cl = cops->find(*q, *parent);
-			if (*cl == 0) {
-				NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
-				return ERR_PTR(-ENOENT);
-			}
-		}
-
-		/* And the last stroke */
-		block = cops->tcf_block(*q, *cl, extack);
-		if (!block)
-			return ERR_PTR(-EINVAL);
-		if (tcf_block_shared(block)) {
-			NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
-			return ERR_PTR(-EOPNOTSUPP);
-		}
+	block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack);
+	if (IS_ERR(block)) {
+		err = PTR_ERR(block);
+		goto errout_qdisc;
 	}
 
 	return block;
+
+errout_qdisc:
+	if (*q)
+		qdisc_put(*q);
+errout:
+	*q = NULL;
+	return ERR_PTR(err);
+}
+
+static void tcf_block_release(struct Qdisc *q, struct tcf_block *block,
+			      bool rtnl_held)
+{
+	if (!IS_ERR_OR_NULL(block))
+		tcf_block_refcnt_put(block, rtnl_held);
+
+	if (q) {
+		if (rtnl_held)
+			qdisc_put(q);
+		else
+			qdisc_put_unlocked(q);
+	}
 }
 
 struct tcf_block_owner_item {
 	struct list_head list;
 	struct Qdisc *q;
-	enum tcf_block_binder_type binder_type;
+	enum flow_block_binder_type binder_type;
 };
 
 static void
 tcf_block_owner_netif_keep_dst(struct tcf_block *block,
 			       struct Qdisc *q,
-			       enum tcf_block_binder_type binder_type)
+			       enum flow_block_binder_type binder_type)
 {
 	if (block->keep_dst &&
-	    binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
-	    binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
+	    binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
+	    binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
 		netif_keep_dst(qdisc_dev(q));
 }
 
@@ -631,7 +1311,7 @@
 
 static int tcf_block_owner_add(struct tcf_block *block,
 			       struct Qdisc *q,
-			       enum tcf_block_binder_type binder_type)
+			       enum flow_block_binder_type binder_type)
 {
 	struct tcf_block_owner_item *item;
 
@@ -646,7 +1326,7 @@
 
 static void tcf_block_owner_del(struct tcf_block *block,
 				struct Qdisc *q,
-				enum tcf_block_binder_type binder_type)
+				enum flow_block_binder_type binder_type)
 {
 	struct tcf_block_owner_item *item;
 
@@ -666,21 +1346,16 @@
 {
 	struct net *net = qdisc_net(q);
 	struct tcf_block *block = NULL;
-	bool created = false;
 	int err;
 
-	if (ei->block_index) {
+	if (ei->block_index)
 		/* block_index not 0 means the shared block is requested */
-		block = tcf_block_lookup(net, ei->block_index);
-		if (block)
-			block->refcnt++;
-	}
+		block = tcf_block_refcnt_get(net, ei->block_index);
 
 	if (!block) {
 		block = tcf_block_create(net, q, ei->block_index, extack);
 		if (IS_ERR(block))
 			return PTR_ERR(block);
-		created = true;
 		if (tcf_block_shared(block)) {
 			err = tcf_block_insert(block, net, extack);
 			if (err)
@@ -710,14 +1385,8 @@
 err_chain0_head_change_cb_add:
 	tcf_block_owner_del(block, q, ei->binder_type);
 err_block_owner_add:
-	if (created) {
-		if (tcf_block_shared(block))
-			tcf_block_remove(block, net);
 err_block_insert:
-		kfree(block);
-	} else {
-		block->refcnt--;
-	}
+	tcf_block_refcnt_put(block, true);
 	return err;
 }
 EXPORT_SYMBOL(tcf_block_get_ext);
@@ -749,42 +1418,12 @@
 void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
 		       struct tcf_block_ext_info *ei)
 {
-	struct tcf_chain *chain, *tmp;
-
 	if (!block)
 		return;
 	tcf_chain0_head_change_cb_del(block, ei);
 	tcf_block_owner_del(block, q, ei->binder_type);
 
-	if (block->refcnt == 1) {
-		if (tcf_block_shared(block))
-			tcf_block_remove(block, block->net);
-
-		/* Hold a refcnt for all chains, so that they don't disappear
-		 * while we are iterating.
-		 */
-		list_for_each_entry(chain, &block->chain_list, list)
-			tcf_chain_hold(chain);
-
-		list_for_each_entry(chain, &block->chain_list, list)
-			tcf_chain_flush(chain);
-	}
-
-	tcf_block_offload_unbind(block, q, ei);
-
-	if (block->refcnt == 1) {
-		/* At this point, all the chains should have refcnt >= 1. */
-		list_for_each_entry_safe(chain, tmp, &block->chain_list, list) {
-			tcf_chain_put_explicitly_created(chain);
-			tcf_chain_put(chain);
-		}
-
-		block->refcnt--;
-		if (list_empty(&block->chain_list))
-			kfree(block);
-	} else {
-		block->refcnt--;
-	}
+	__tcf_block_put(block, q, ei, true);
 }
 EXPORT_SYMBOL(tcf_block_put_ext);
 
@@ -799,55 +1438,26 @@
 
 EXPORT_SYMBOL(tcf_block_put);
 
-struct tcf_block_cb {
-	struct list_head list;
-	tc_setup_cb_t *cb;
-	void *cb_ident;
-	void *cb_priv;
-	unsigned int refcnt;
-};
-
-void *tcf_block_cb_priv(struct tcf_block_cb *block_cb)
-{
-	return block_cb->cb_priv;
-}
-EXPORT_SYMBOL(tcf_block_cb_priv);
-
-struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block,
-					 tc_setup_cb_t *cb, void *cb_ident)
-{	struct tcf_block_cb *block_cb;
-
-	list_for_each_entry(block_cb, &block->cb_list, list)
-		if (block_cb->cb == cb && block_cb->cb_ident == cb_ident)
-			return block_cb;
-	return NULL;
-}
-EXPORT_SYMBOL(tcf_block_cb_lookup);
-
-void tcf_block_cb_incref(struct tcf_block_cb *block_cb)
-{
-	block_cb->refcnt++;
-}
-EXPORT_SYMBOL(tcf_block_cb_incref);
-
-unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
-{
-	return --block_cb->refcnt;
-}
-EXPORT_SYMBOL(tcf_block_cb_decref);
-
 static int
-tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
+tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
 			    void *cb_priv, bool add, bool offload_in_use,
 			    struct netlink_ext_ack *extack)
 {
-	struct tcf_chain *chain;
-	struct tcf_proto *tp;
+	struct tcf_chain *chain, *chain_prev;
+	struct tcf_proto *tp, *tp_prev;
 	int err;
 
-	list_for_each_entry(chain, &block->chain_list, list) {
-		for (tp = rtnl_dereference(chain->filter_chain); tp;
-		     tp = rtnl_dereference(tp->next)) {
+	lockdep_assert_held(&block->cb_lock);
+
+	for (chain = __tcf_get_next_chain(block, NULL);
+	     chain;
+	     chain_prev = chain,
+		     chain = __tcf_get_next_chain(block, chain),
+		     tcf_chain_put(chain_prev)) {
+		for (tp = __tcf_get_next_proto(chain, NULL); tp;
+		     tp_prev = tp,
+			     tp = __tcf_get_next_proto(chain, tp),
+			     tcf_proto_put(tp_prev, true, NULL)) {
 			if (tp->ops->reoffload) {
 				err = tp->ops->reoffload(tp, add, cb, cb_priv,
 							 extack);
@@ -864,93 +1474,92 @@
 	return 0;
 
 err_playback_remove:
+	tcf_proto_put(tp, true, NULL);
+	tcf_chain_put(chain);
 	tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
 				    extack);
 	return err;
 }
 
-struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
-					     tc_setup_cb_t *cb, void *cb_ident,
-					     void *cb_priv,
-					     struct netlink_ext_ack *extack)
+static int tcf_block_bind(struct tcf_block *block,
+			  struct flow_block_offload *bo)
 {
-	struct tcf_block_cb *block_cb;
-	int err;
+	struct flow_block_cb *block_cb, *next;
+	int err, i = 0;
 
-	/* Replay any already present rules */
-	err = tcf_block_playback_offloads(block, cb, cb_priv, true,
-					  tcf_block_offload_in_use(block),
-					  extack);
-	if (err)
-		return ERR_PTR(err);
+	lockdep_assert_held(&block->cb_lock);
 
-	block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
-	if (!block_cb)
-		return ERR_PTR(-ENOMEM);
-	block_cb->cb = cb;
-	block_cb->cb_ident = cb_ident;
-	block_cb->cb_priv = cb_priv;
-	list_add(&block_cb->list, &block->cb_list);
-	return block_cb;
-}
-EXPORT_SYMBOL(__tcf_block_cb_register);
+	list_for_each_entry(block_cb, &bo->cb_list, list) {
+		err = tcf_block_playback_offloads(block, block_cb->cb,
+						  block_cb->cb_priv, true,
+						  tcf_block_offload_in_use(block),
+						  bo->extack);
+		if (err)
+			goto err_unroll;
+		if (!bo->unlocked_driver_cb)
+			block->lockeddevcnt++;
 
-int tcf_block_cb_register(struct tcf_block *block,
-			  tc_setup_cb_t *cb, void *cb_ident,
-			  void *cb_priv, struct netlink_ext_ack *extack)
-{
-	struct tcf_block_cb *block_cb;
-
-	block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv,
-					   extack);
-	return PTR_ERR_OR_ZERO(block_cb);
-}
-EXPORT_SYMBOL(tcf_block_cb_register);
-
-void __tcf_block_cb_unregister(struct tcf_block *block,
-			       struct tcf_block_cb *block_cb)
-{
-	tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv,
-				    false, tcf_block_offload_in_use(block),
-				    NULL);
-	list_del(&block_cb->list);
-	kfree(block_cb);
-}
-EXPORT_SYMBOL(__tcf_block_cb_unregister);
-
-void tcf_block_cb_unregister(struct tcf_block *block,
-			     tc_setup_cb_t *cb, void *cb_ident)
-{
-	struct tcf_block_cb *block_cb;
-
-	block_cb = tcf_block_cb_lookup(block, cb, cb_ident);
-	if (!block_cb)
-		return;
-	__tcf_block_cb_unregister(block, block_cb);
-}
-EXPORT_SYMBOL(tcf_block_cb_unregister);
-
-static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type,
-			     void *type_data, bool err_stop)
-{
-	struct tcf_block_cb *block_cb;
-	int ok_count = 0;
-	int err;
-
-	/* Make sure all netdevs sharing this block are offload-capable. */
-	if (block->nooffloaddevcnt && err_stop)
-		return -EOPNOTSUPP;
-
-	list_for_each_entry(block_cb, &block->cb_list, list) {
-		err = block_cb->cb(type, type_data, block_cb->cb_priv);
-		if (err) {
-			if (err_stop)
-				return err;
-		} else {
-			ok_count++;
-		}
+		i++;
 	}
-	return ok_count;
+	list_splice(&bo->cb_list, &block->flow_block.cb_list);
+
+	return 0;
+
+err_unroll:
+	list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
+		if (i-- > 0) {
+			list_del(&block_cb->list);
+			tcf_block_playback_offloads(block, block_cb->cb,
+						    block_cb->cb_priv, false,
+						    tcf_block_offload_in_use(block),
+						    NULL);
+			if (!bo->unlocked_driver_cb)
+				block->lockeddevcnt--;
+		}
+		flow_block_cb_free(block_cb);
+	}
+
+	return err;
+}
+
+static void tcf_block_unbind(struct tcf_block *block,
+			     struct flow_block_offload *bo)
+{
+	struct flow_block_cb *block_cb, *next;
+
+	lockdep_assert_held(&block->cb_lock);
+
+	list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
+		tcf_block_playback_offloads(block, block_cb->cb,
+					    block_cb->cb_priv, false,
+					    tcf_block_offload_in_use(block),
+					    NULL);
+		list_del(&block_cb->list);
+		flow_block_cb_free(block_cb);
+		if (!bo->unlocked_driver_cb)
+			block->lockeddevcnt--;
+	}
+}
+
+static int tcf_block_setup(struct tcf_block *block,
+			   struct flow_block_offload *bo)
+{
+	int err;
+
+	switch (bo->command) {
+	case FLOW_BLOCK_BIND:
+		err = tcf_block_bind(block, bo);
+		break;
+	case FLOW_BLOCK_UNBIND:
+		err = 0;
+		tcf_block_unbind(block, bo);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		err = -EOPNOTSUPP;
+	}
+
+	return err;
 }
 
 /* Main classifier routine: scans classifier chain attached
@@ -960,7 +1569,6 @@
 int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 		 struct tcf_result *res, bool compat_mode)
 {
-	__be16 protocol = tc_skb_protocol(skb);
 #ifdef CONFIG_NET_CLS_ACT
 	const int max_reclassify_loop = 4;
 	const struct tcf_proto *orig_tp = tp;
@@ -970,6 +1578,7 @@
 reclassify:
 #endif
 	for (; tp; tp = rcu_dereference_bh(tp->next)) {
+		__be16 protocol = tc_skb_protocol(skb);
 		int err;
 
 		if (tp->protocol != protocol &&
@@ -983,6 +1592,18 @@
 			goto reset;
 		} else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
 			first_tp = res->goto_tp;
+
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+			{
+				struct tc_skb_ext *ext;
+
+				ext = skb_ext_add(skb, TC_SKB_EXT);
+				if (WARN_ON_ONCE(!ext))
+					return TC_ACT_SHOT;
+
+				ext->chain = err & TC_ACT_EXT_VAL_MASK;
+			}
+#endif
 			goto reset;
 		}
 #endif
@@ -1002,7 +1623,6 @@
 	}
 
 	tp = first_tp;
-	protocol = tc_skb_protocol(skb);
 	goto reclassify;
 #endif
 }
@@ -1013,32 +1633,123 @@
 	struct tcf_proto __rcu *next;
 };
 
-static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info)
+static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain,
+					   struct tcf_chain_info *chain_info)
 {
-	return rtnl_dereference(*chain_info->pprev);
+	return tcf_chain_dereference(*chain_info->pprev, chain);
 }
 
-static void tcf_chain_tp_insert(struct tcf_chain *chain,
-				struct tcf_chain_info *chain_info,
-				struct tcf_proto *tp)
+static int tcf_chain_tp_insert(struct tcf_chain *chain,
+			       struct tcf_chain_info *chain_info,
+			       struct tcf_proto *tp)
 {
+	if (chain->flushing)
+		return -EAGAIN;
+
 	if (*chain_info->pprev == chain->filter_chain)
 		tcf_chain0_head_change(chain, tp);
-	RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
+	tcf_proto_get(tp);
+	RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
 	rcu_assign_pointer(*chain_info->pprev, tp);
-	tcf_chain_hold(chain);
+
+	return 0;
 }
 
 static void tcf_chain_tp_remove(struct tcf_chain *chain,
 				struct tcf_chain_info *chain_info,
 				struct tcf_proto *tp)
 {
-	struct tcf_proto *next = rtnl_dereference(chain_info->next);
+	struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain);
 
+	tcf_proto_mark_delete(tp);
 	if (tp == chain->filter_chain)
 		tcf_chain0_head_change(chain, next);
 	RCU_INIT_POINTER(*chain_info->pprev, next);
-	tcf_chain_put(chain);
+}
+
+static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
+					   struct tcf_chain_info *chain_info,
+					   u32 protocol, u32 prio,
+					   bool prio_allocate);
+
+/* Try to insert new proto.
+ * If proto with specified priority already exists, free new proto
+ * and return existing one.
+ */
+
+static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
+						    struct tcf_proto *tp_new,
+						    u32 protocol, u32 prio,
+						    bool rtnl_held)
+{
+	struct tcf_chain_info chain_info;
+	struct tcf_proto *tp;
+	int err = 0;
+
+	mutex_lock(&chain->filter_chain_lock);
+
+	if (tcf_proto_exists_destroying(chain, tp_new)) {
+		mutex_unlock(&chain->filter_chain_lock);
+		tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
+		return ERR_PTR(-EAGAIN);
+	}
+
+	tp = tcf_chain_tp_find(chain, &chain_info,
+			       protocol, prio, false);
+	if (!tp)
+		err = tcf_chain_tp_insert(chain, &chain_info, tp_new);
+	mutex_unlock(&chain->filter_chain_lock);
+
+	if (tp) {
+		tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
+		tp_new = tp;
+	} else if (err) {
+		tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
+		tp_new = ERR_PTR(err);
+	}
+
+	return tp_new;
+}
+
+static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
+				      struct tcf_proto *tp, bool rtnl_held,
+				      struct netlink_ext_ack *extack)
+{
+	struct tcf_chain_info chain_info;
+	struct tcf_proto *tp_iter;
+	struct tcf_proto **pprev;
+	struct tcf_proto *next;
+
+	mutex_lock(&chain->filter_chain_lock);
+
+	/* Atomically find and remove tp from chain. */
+	for (pprev = &chain->filter_chain;
+	     (tp_iter = tcf_chain_dereference(*pprev, chain));
+	     pprev = &tp_iter->next) {
+		if (tp_iter == tp) {
+			chain_info.pprev = pprev;
+			chain_info.next = tp_iter->next;
+			WARN_ON(tp_iter->deleting);
+			break;
+		}
+	}
+	/* Verify that tp still exists and no new filters were inserted
+	 * concurrently.
+	 * Mark tp for deletion if it is empty.
+	 */
+	if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) {
+		mutex_unlock(&chain->filter_chain_lock);
+		return;
+	}
+
+	tcf_proto_signal_destroying(chain, tp);
+	next = tcf_chain_dereference(chain_info.next, chain);
+	if (tp == chain->filter_chain)
+		tcf_chain0_head_change(chain, next);
+	RCU_INIT_POINTER(*chain_info.pprev, next);
+	mutex_unlock(&chain->filter_chain_lock);
+
+	tcf_proto_put(tp, rtnl_held, extack);
 }
 
 static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
@@ -1051,7 +1762,8 @@
 
 	/* Check the chain for existence of proto-tcf with this priority */
 	for (pprev = &chain->filter_chain;
-	     (tp = rtnl_dereference(*pprev)); pprev = &tp->next) {
+	     (tp = tcf_chain_dereference(*pprev, chain));
+	     pprev = &tp->next) {
 		if (tp->prio >= prio) {
 			if (tp->prio == prio) {
 				if (prio_allocate ||
@@ -1064,14 +1776,20 @@
 		}
 	}
 	chain_info->pprev = pprev;
-	chain_info->next = tp ? tp->next : NULL;
+	if (tp) {
+		chain_info->next = tp->next;
+		tcf_proto_get(tp);
+	} else {
+		chain_info->next = NULL;
+	}
 	return tp;
 }
 
 static int tcf_fill_node(struct net *net, struct sk_buff *skb,
 			 struct tcf_proto *tp, struct tcf_block *block,
 			 struct Qdisc *q, u32 parent, void *fh,
-			 u32 portid, u32 seq, u16 flags, int event)
+			 u32 portid, u32 seq, u16 flags, int event,
+			 bool rtnl_held)
 {
 	struct tcmsg *tcm;
 	struct nlmsghdr  *nlh;
@@ -1099,7 +1817,8 @@
 	if (!fh) {
 		tcm->tcm_handle = 0;
 	} else {
-		if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
+		if (tp->ops->dump &&
+		    tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
 			goto nla_put_failure;
 	}
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
@@ -1114,33 +1833,40 @@
 static int tfilter_notify(struct net *net, struct sk_buff *oskb,
 			  struct nlmsghdr *n, struct tcf_proto *tp,
 			  struct tcf_block *block, struct Qdisc *q,
-			  u32 parent, void *fh, int event, bool unicast)
+			  u32 parent, void *fh, int event, bool unicast,
+			  bool rtnl_held)
 {
 	struct sk_buff *skb;
 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+	int err = 0;
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb)
 		return -ENOBUFS;
 
 	if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
-			  n->nlmsg_seq, n->nlmsg_flags, event) <= 0) {
+			  n->nlmsg_seq, n->nlmsg_flags, event,
+			  rtnl_held) <= 0) {
 		kfree_skb(skb);
 		return -EINVAL;
 	}
 
 	if (unicast)
-		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+		err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+	else
+		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+				     n->nlmsg_flags & NLM_F_ECHO);
 
-	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
-			      n->nlmsg_flags & NLM_F_ECHO);
+	if (err > 0)
+		err = 0;
+	return err;
 }
 
 static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
 			      struct nlmsghdr *n, struct tcf_proto *tp,
 			      struct tcf_block *block, struct Qdisc *q,
 			      u32 parent, void *fh, bool unicast, bool *last,
-			      struct netlink_ext_ack *extack)
+			      bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb;
 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -1151,39 +1877,50 @@
 		return -ENOBUFS;
 
 	if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
-			  n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
+			  n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
+			  rtnl_held) <= 0) {
 		NL_SET_ERR_MSG(extack, "Failed to build del event notification");
 		kfree_skb(skb);
 		return -EINVAL;
 	}
 
-	err = tp->ops->delete(tp, fh, last, extack);
+	err = tp->ops->delete(tp, fh, last, rtnl_held, extack);
 	if (err) {
 		kfree_skb(skb);
 		return err;
 	}
 
 	if (unicast)
-		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
-
-	err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
-			     n->nlmsg_flags & NLM_F_ECHO);
+		err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+	else
+		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+				     n->nlmsg_flags & NLM_F_ECHO);
 	if (err < 0)
 		NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
+
+	if (err > 0)
+		err = 0;
 	return err;
 }
 
 static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
 				 struct tcf_block *block, struct Qdisc *q,
 				 u32 parent, struct nlmsghdr *n,
-				 struct tcf_chain *chain, int event)
+				 struct tcf_chain *chain, int event,
+				 bool rtnl_held)
 {
 	struct tcf_proto *tp;
 
-	for (tp = rtnl_dereference(chain->filter_chain);
-	     tp; tp = rtnl_dereference(tp->next))
+	for (tp = tcf_get_next_proto(chain, NULL, rtnl_held);
+	     tp; tp = tcf_get_next_proto(chain, tp, rtnl_held))
 		tfilter_notify(net, oskb, n, tp, block,
-			       q, parent, NULL, event, false);
+			       q, parent, NULL, event, false, rtnl_held);
+}
+
+static void tfilter_put(struct tcf_proto *tp, void *fh)
+{
+	if (tp->ops->put && fh)
+		tp->ops->put(tp, fh);
 }
 
 static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1191,6 +1928,7 @@
 {
 	struct net *net = sock_net(skb->sk);
 	struct nlattr *tca[TCA_MAX + 1];
+	char name[IFNAMSIZ];
 	struct tcmsg *t;
 	u32 protocol;
 	u32 prio;
@@ -1206,6 +1944,7 @@
 	void *fh;
 	int err;
 	int tp_created;
+	bool rtnl_held = false;
 
 	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
@@ -1213,7 +1952,8 @@
 replay:
 	tp_created = 0;
 
-	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
+	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
+				     rtm_tca_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -1222,7 +1962,9 @@
 	prio = TC_H_MAJ(t->tcm_info);
 	prio_allocate = false;
 	parent = t->tcm_parent;
+	tp = NULL;
 	cl = 0;
+	block = NULL;
 
 	if (prio == 0) {
 		/* If no priority is provided by the user,
@@ -1239,8 +1981,33 @@
 
 	/* Find head of filter chain. */
 
-	block = tcf_block_find(net, &q, &parent, &cl,
-			       t->tcm_ifindex, t->tcm_block_index, extack);
+	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
+	if (err)
+		return err;
+
+	if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
+		NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
+		err = -EINVAL;
+		goto errout;
+	}
+
+	/* Take rtnl mutex if rtnl_held was set to true on previous iteration,
+	 * block is shared (no qdisc found), qdisc is not unlocked, classifier
+	 * type is not specified, classifier is not unlocked.
+	 */
+	if (rtnl_held ||
+	    (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
+	    !tcf_proto_is_unlocked(name)) {
+		rtnl_held = true;
+		rtnl_lock();
+	}
+
+	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
+	if (err)
+		goto errout;
+
+	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
+				 extack);
 	if (IS_ERR(block)) {
 		err = PTR_ERR(block);
 		goto errout;
@@ -1259,40 +2026,62 @@
 		goto errout;
 	}
 
+	mutex_lock(&chain->filter_chain_lock);
 	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
 			       prio, prio_allocate);
 	if (IS_ERR(tp)) {
 		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
 		err = PTR_ERR(tp);
-		goto errout;
+		goto errout_locked;
 	}
 
 	if (tp == NULL) {
+		struct tcf_proto *tp_new = NULL;
+
+		if (chain->flushing) {
+			err = -EAGAIN;
+			goto errout_locked;
+		}
+
 		/* Proto-tcf does not exist, create new one */
 
 		if (tca[TCA_KIND] == NULL || !protocol) {
 			NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
 			err = -EINVAL;
-			goto errout;
+			goto errout_locked;
 		}
 
 		if (!(n->nlmsg_flags & NLM_F_CREATE)) {
 			NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
 			err = -ENOENT;
-			goto errout;
+			goto errout_locked;
 		}
 
 		if (prio_allocate)
-			prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info));
+			prio = tcf_auto_prio(tcf_chain_tp_prev(chain,
+							       &chain_info));
 
-		tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
-				      protocol, prio, chain, extack);
+		mutex_unlock(&chain->filter_chain_lock);
+		tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]),
+					  protocol, prio, chain, rtnl_held,
+					  extack);
+		if (IS_ERR(tp_new)) {
+			err = PTR_ERR(tp_new);
+			goto errout_tp;
+		}
+
+		tp_created = 1;
+		tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio,
+						rtnl_held);
 		if (IS_ERR(tp)) {
 			err = PTR_ERR(tp);
-			goto errout;
+			goto errout_tp;
 		}
-		tp_created = 1;
-	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
+	} else {
+		mutex_unlock(&chain->filter_chain_lock);
+	}
+
+	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
 		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
 		err = -EINVAL;
 		goto errout;
@@ -1307,6 +2096,7 @@
 			goto errout;
 		}
 	} else if (n->nlmsg_flags & NLM_F_EXCL) {
+		tfilter_put(tp, fh);
 		NL_SET_ERR_MSG(extack, "Filter already exists");
 		err = -EEXIST;
 		goto errout;
@@ -1320,24 +2110,44 @@
 
 	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
 			      n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
-			      extack);
+			      rtnl_held, extack);
 	if (err == 0) {
-		if (tp_created)
-			tcf_chain_tp_insert(chain, &chain_info, tp);
 		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
-			       RTM_NEWTFILTER, false);
-	} else {
-		if (tp_created)
-			tcf_proto_destroy(tp, NULL);
+			       RTM_NEWTFILTER, false, rtnl_held);
+		tfilter_put(tp, fh);
+		/* q pointer is NULL for shared blocks */
+		if (q)
+			q->flags &= ~TCQ_F_CAN_BYPASS;
 	}
 
 errout:
-	if (chain)
-		tcf_chain_put(chain);
-	if (err == -EAGAIN)
+	if (err && tp_created)
+		tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL);
+errout_tp:
+	if (chain) {
+		if (tp && !IS_ERR(tp))
+			tcf_proto_put(tp, rtnl_held, NULL);
+		if (!tp_created)
+			tcf_chain_put(chain);
+	}
+	tcf_block_release(q, block, rtnl_held);
+
+	if (rtnl_held)
+		rtnl_unlock();
+
+	if (err == -EAGAIN) {
+		/* Take rtnl lock in case EAGAIN is caused by concurrent flush
+		 * of target chain.
+		 */
+		rtnl_held = true;
 		/* Replay the request. */
 		goto replay;
+	}
 	return err;
+
+errout_locked:
+	mutex_unlock(&chain->filter_chain_lock);
+	goto errout;
 }
 
 static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1345,6 +2155,7 @@
 {
 	struct net *net = sock_net(skb->sk);
 	struct nlattr *tca[TCA_MAX + 1];
+	char name[IFNAMSIZ];
 	struct tcmsg *t;
 	u32 protocol;
 	u32 prio;
@@ -1353,16 +2164,18 @@
 	struct Qdisc *q = NULL;
 	struct tcf_chain_info chain_info;
 	struct tcf_chain *chain = NULL;
-	struct tcf_block *block;
+	struct tcf_block *block = NULL;
 	struct tcf_proto *tp = NULL;
 	unsigned long cl = 0;
 	void *fh = NULL;
 	int err;
+	bool rtnl_held = false;
 
 	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
-	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
+	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
+				     rtm_tca_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -1378,8 +2191,32 @@
 
 	/* Find head of filter chain. */
 
-	block = tcf_block_find(net, &q, &parent, &cl,
-			       t->tcm_ifindex, t->tcm_block_index, extack);
+	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
+	if (err)
+		return err;
+
+	if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
+		NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
+		err = -EINVAL;
+		goto errout;
+	}
+	/* Take rtnl mutex if flushing whole chain, block is shared (no qdisc
+	 * found), qdisc is not unlocked, classifier type is not specified,
+	 * classifier is not unlocked.
+	 */
+	if (!prio ||
+	    (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
+	    !tcf_proto_is_unlocked(name)) {
+		rtnl_held = true;
+		rtnl_lock();
+	}
+
+	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
+	if (err)
+		goto errout;
+
+	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
+				 extack);
 	if (IS_ERR(block)) {
 		err = PTR_ERR(block);
 		goto errout;
@@ -1407,55 +2244,70 @@
 
 	if (prio == 0) {
 		tfilter_notify_chain(net, skb, block, q, parent, n,
-				     chain, RTM_DELTFILTER);
-		tcf_chain_flush(chain);
+				     chain, RTM_DELTFILTER, rtnl_held);
+		tcf_chain_flush(chain, rtnl_held);
 		err = 0;
 		goto errout;
 	}
 
+	mutex_lock(&chain->filter_chain_lock);
 	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
 			       prio, false);
 	if (!tp || IS_ERR(tp)) {
 		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
 		err = tp ? PTR_ERR(tp) : -ENOENT;
-		goto errout;
+		goto errout_locked;
 	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
 		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
 		err = -EINVAL;
+		goto errout_locked;
+	} else if (t->tcm_handle == 0) {
+		tcf_proto_signal_destroying(chain, tp);
+		tcf_chain_tp_remove(chain, &chain_info, tp);
+		mutex_unlock(&chain->filter_chain_lock);
+
+		tcf_proto_put(tp, rtnl_held, NULL);
+		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
+			       RTM_DELTFILTER, false, rtnl_held);
+		err = 0;
 		goto errout;
 	}
+	mutex_unlock(&chain->filter_chain_lock);
 
 	fh = tp->ops->get(tp, t->tcm_handle);
 
 	if (!fh) {
-		if (t->tcm_handle == 0) {
-			tcf_chain_tp_remove(chain, &chain_info, tp);
-			tfilter_notify(net, skb, n, tp, block, q, parent, fh,
-				       RTM_DELTFILTER, false);
-			tcf_proto_destroy(tp, extack);
-			err = 0;
-		} else {
-			NL_SET_ERR_MSG(extack, "Specified filter handle not found");
-			err = -ENOENT;
-		}
+		NL_SET_ERR_MSG(extack, "Specified filter handle not found");
+		err = -ENOENT;
 	} else {
 		bool last;
 
 		err = tfilter_del_notify(net, skb, n, tp, block,
 					 q, parent, fh, false, &last,
-					 extack);
+					 rtnl_held, extack);
+
 		if (err)
 			goto errout;
-		if (last) {
-			tcf_chain_tp_remove(chain, &chain_info, tp);
-			tcf_proto_destroy(tp, extack);
-		}
+		if (last)
+			tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack);
 	}
 
 errout:
-	if (chain)
+	if (chain) {
+		if (tp && !IS_ERR(tp))
+			tcf_proto_put(tp, rtnl_held, NULL);
 		tcf_chain_put(chain);
+	}
+	tcf_block_release(q, block, rtnl_held);
+
+	if (rtnl_held)
+		rtnl_unlock();
+
 	return err;
+
+errout_locked:
+	mutex_unlock(&chain->filter_chain_lock);
+	goto errout;
 }
 
 static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1463,6 +2315,7 @@
 {
 	struct net *net = sock_net(skb->sk);
 	struct nlattr *tca[TCA_MAX + 1];
+	char name[IFNAMSIZ];
 	struct tcmsg *t;
 	u32 protocol;
 	u32 prio;
@@ -1471,13 +2324,15 @@
 	struct Qdisc *q = NULL;
 	struct tcf_chain_info chain_info;
 	struct tcf_chain *chain = NULL;
-	struct tcf_block *block;
+	struct tcf_block *block = NULL;
 	struct tcf_proto *tp = NULL;
 	unsigned long cl = 0;
 	void *fh = NULL;
 	int err;
+	bool rtnl_held = false;
 
-	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
+	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
+				     rtm_tca_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -1493,8 +2348,31 @@
 
 	/* Find head of filter chain. */
 
-	block = tcf_block_find(net, &q, &parent, &cl,
-			       t->tcm_ifindex, t->tcm_block_index, extack);
+	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
+	if (err)
+		return err;
+
+	if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
+		NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
+		err = -EINVAL;
+		goto errout;
+	}
+	/* Take rtnl mutex if block is shared (no qdisc found), qdisc is not
+	 * unlocked, classifier type is not specified, classifier is not
+	 * unlocked.
+	 */
+	if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
+	    !tcf_proto_is_unlocked(name)) {
+		rtnl_held = true;
+		rtnl_lock();
+	}
+
+	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
+	if (err)
+		goto errout;
+
+	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
+				 extack);
 	if (IS_ERR(block)) {
 		err = PTR_ERR(block);
 		goto errout;
@@ -1513,8 +2391,10 @@
 		goto errout;
 	}
 
+	mutex_lock(&chain->filter_chain_lock);
 	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
 			       prio, false);
+	mutex_unlock(&chain->filter_chain_lock);
 	if (!tp || IS_ERR(tp)) {
 		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
 		err = tp ? PTR_ERR(tp) : -ENOENT;
@@ -1532,14 +2412,23 @@
 		err = -ENOENT;
 	} else {
 		err = tfilter_notify(net, skb, n, tp, block, q, parent,
-				     fh, RTM_NEWTFILTER, true);
+				     fh, RTM_NEWTFILTER, true, rtnl_held);
 		if (err < 0)
 			NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
 	}
 
+	tfilter_put(tp, fh);
 errout:
-	if (chain)
+	if (chain) {
+		if (tp && !IS_ERR(tp))
+			tcf_proto_put(tp, rtnl_held, NULL);
 		tcf_chain_put(chain);
+	}
+	tcf_block_release(q, block, rtnl_held);
+
+	if (rtnl_held)
+		rtnl_unlock();
+
 	return err;
 }
 
@@ -1560,7 +2449,7 @@
 	return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
 			     n, NETLINK_CB(a->cb->skb).portid,
 			     a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
-			     RTM_NEWTFILTER);
+			     RTM_NEWTFILTER, true);
 }
 
 static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
@@ -1570,11 +2459,15 @@
 	struct net *net = sock_net(skb->sk);
 	struct tcf_block *block = chain->block;
 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
+	struct tcf_proto *tp, *tp_prev;
 	struct tcf_dump_args arg;
-	struct tcf_proto *tp;
 
-	for (tp = rtnl_dereference(chain->filter_chain);
-	     tp; tp = rtnl_dereference(tp->next), (*p_index)++) {
+	for (tp = __tcf_get_next_proto(chain, NULL);
+	     tp;
+	     tp_prev = tp,
+		     tp = __tcf_get_next_proto(chain, tp),
+		     tcf_proto_put(tp_prev, true, NULL),
+		     (*p_index)++) {
 		if (*p_index < index_start)
 			continue;
 		if (TC_H_MAJ(tcm->tcm_info) &&
@@ -1590,9 +2483,8 @@
 			if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
 					  NETLINK_CB(cb->skb).portid,
 					  cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					  RTM_NEWTFILTER) <= 0)
-				return false;
-
+					  RTM_NEWTFILTER, true) <= 0)
+				goto errout;
 			cb->args[1] = 1;
 		}
 		if (!tp->ops->walk)
@@ -1607,23 +2499,27 @@
 		arg.w.skip = cb->args[1] - 1;
 		arg.w.count = 0;
 		arg.w.cookie = cb->args[2];
-		tp->ops->walk(tp, &arg.w);
+		tp->ops->walk(tp, &arg.w, true);
 		cb->args[2] = arg.w.cookie;
 		cb->args[1] = arg.w.count + 1;
 		if (arg.w.stop)
-			return false;
+			goto errout;
 	}
 	return true;
+
+errout:
+	tcf_proto_put(tp, true, NULL);
+	return false;
 }
 
 /* called with RTNL */
 static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct tcf_chain *chain, *chain_prev;
 	struct net *net = sock_net(skb->sk);
 	struct nlattr *tca[TCA_MAX + 1];
 	struct Qdisc *q = NULL;
 	struct tcf_block *block;
-	struct tcf_chain *chain;
 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
 	long index_start;
 	long index;
@@ -1633,12 +2529,13 @@
 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
 		return skb->len;
 
-	err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
+	err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
+				     NULL, cb->extack);
 	if (err)
 		return err;
 
 	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
-		block = tcf_block_lookup(net, tcm->tcm_block_index);
+		block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
 		if (!block)
 			goto out;
 		/* If we work with block index, q is NULL and parent value
@@ -1686,17 +2583,24 @@
 	index_start = cb->args[0];
 	index = 0;
 
-	list_for_each_entry(chain, &block->chain_list, list) {
+	for (chain = __tcf_get_next_chain(block, NULL);
+	     chain;
+	     chain_prev = chain,
+		     chain = __tcf_get_next_chain(block, chain),
+		     tcf_chain_put(chain_prev)) {
 		if (tca[TCA_CHAIN] &&
 		    nla_get_u32(tca[TCA_CHAIN]) != chain->index)
 			continue;
 		if (!tcf_chain_dump(chain, q, parent, skb, cb,
 				    index_start, &index)) {
+			tcf_chain_put(chain);
 			err = -EMSGSIZE;
 			break;
 		}
 	}
 
+	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
+		tcf_block_refcnt_put(block, true);
 	cb->args[0] = index;
 
 out:
@@ -1706,8 +2610,10 @@
 	return skb->len;
 }
 
-static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net,
-			      struct sk_buff *skb, struct tcf_block *block,
+static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
+			      void *tmplt_priv, u32 chain_index,
+			      struct net *net, struct sk_buff *skb,
+			      struct tcf_block *block,
 			      u32 portid, u32 seq, u16 flags, int event)
 {
 	unsigned char *b = skb_tail_pointer(skb);
@@ -1716,8 +2622,8 @@
 	struct tcmsg *tcm;
 	void *priv;
 
-	ops = chain->tmplt_ops;
-	priv = chain->tmplt_priv;
+	ops = tmplt_ops;
+	priv = tmplt_priv;
 
 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
 	if (!nlh)
@@ -1735,7 +2641,7 @@
 		tcm->tcm_block_index = block->index;
 	}
 
-	if (nla_put_u32(skb, TCA_CHAIN, chain->index))
+	if (nla_put_u32(skb, TCA_CHAIN, chain_index))
 		goto nla_put_failure;
 
 	if (ops) {
@@ -1761,18 +2667,50 @@
 	struct tcf_block *block = chain->block;
 	struct net *net = block->net;
 	struct sk_buff *skb;
+	int err = 0;
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb)
 		return -ENOBUFS;
 
-	if (tc_chain_fill_node(chain, net, skb, block, portid,
+	if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
+			       chain->index, net, skb, block, portid,
 			       seq, flags, event) <= 0) {
 		kfree_skb(skb);
 		return -EINVAL;
 	}
 
 	if (unicast)
+		err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+	else
+		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+				     flags & NLM_F_ECHO);
+
+	if (err > 0)
+		err = 0;
+	return err;
+}
+
+static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
+				  void *tmplt_priv, u32 chain_index,
+				  struct tcf_block *block, struct sk_buff *oskb,
+				  u32 seq, u16 flags, bool unicast)
+{
+	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+	struct net *net = block->net;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb,
+			       block, portid, seq, flags, RTM_DELCHAIN) <= 0) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	if (unicast)
 		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
 
 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
@@ -1789,7 +2727,7 @@
 	if (!tca[TCA_KIND])
 		return 0;
 
-	ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), extack);
+	ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), true, extack);
 	if (IS_ERR(ops))
 		return PTR_ERR(ops);
 	if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
@@ -1807,16 +2745,15 @@
 	return 0;
 }
 
-static void tc_chain_tmplt_del(struct tcf_chain *chain)
+static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
+			       void *tmplt_priv)
 {
-	const struct tcf_proto_ops *ops = chain->tmplt_ops;
-
 	/* If template ops are set, no work to do for us. */
-	if (!ops)
+	if (!tmplt_ops)
 		return;
 
-	ops->tmplt_destroy(chain->tmplt_priv);
-	module_put(ops->owner);
+	tmplt_ops->tmplt_destroy(tmplt_priv);
+	module_put(tmplt_ops->owner);
 }
 
 /* Add/delete/get a chain */
@@ -1840,7 +2777,8 @@
 		return -EPERM;
 
 replay:
-	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
+	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
+				     rtm_tca_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -1856,8 +2794,11 @@
 	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
 	if (chain_index > TC_ACT_EXT_VAL_MASK) {
 		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
-		return -EINVAL;
+		err = -EINVAL;
+		goto errout_block;
 	}
+
+	mutex_lock(&block->lock);
 	chain = tcf_chain_lookup(block, chain_index);
 	if (n->nlmsg_type == RTM_NEWCHAIN) {
 		if (chain) {
@@ -1868,51 +2809,62 @@
 				tcf_chain_hold(chain);
 			} else {
 				NL_SET_ERR_MSG(extack, "Filter chain already exists");
-				return -EEXIST;
+				err = -EEXIST;
+				goto errout_block_locked;
 			}
 		} else {
 			if (!(n->nlmsg_flags & NLM_F_CREATE)) {
 				NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
-				return -ENOENT;
+				err = -ENOENT;
+				goto errout_block_locked;
 			}
 			chain = tcf_chain_create(block, chain_index);
 			if (!chain) {
 				NL_SET_ERR_MSG(extack, "Failed to create filter chain");
-				return -ENOMEM;
+				err = -ENOMEM;
+				goto errout_block_locked;
 			}
 		}
 	} else {
 		if (!chain || tcf_chain_held_by_acts_only(chain)) {
 			NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
-			return -EINVAL;
+			err = -EINVAL;
+			goto errout_block_locked;
 		}
 		tcf_chain_hold(chain);
 	}
 
-	switch (n->nlmsg_type) {
-	case RTM_NEWCHAIN:
-		err = tc_chain_tmplt_add(chain, net, tca, extack);
-		if (err)
-			goto errout;
-		/* In case the chain was successfully added, take a reference
-		 * to the chain. This ensures that an empty chain
-		 * does not disappear at the end of this function.
+	if (n->nlmsg_type == RTM_NEWCHAIN) {
+		/* Modifying chain requires holding parent block lock. In case
+		 * the chain was successfully added, take a reference to the
+		 * chain. This ensures that an empty chain does not disappear at
+		 * the end of this function.
 		 */
 		tcf_chain_hold(chain);
 		chain->explicitly_created = true;
+	}
+	mutex_unlock(&block->lock);
+
+	switch (n->nlmsg_type) {
+	case RTM_NEWCHAIN:
+		err = tc_chain_tmplt_add(chain, net, tca, extack);
+		if (err) {
+			tcf_chain_put_explicitly_created(chain);
+			goto errout;
+		}
+
 		tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
 				RTM_NEWCHAIN, false);
 		break;
 	case RTM_DELCHAIN:
 		tfilter_notify_chain(net, skb, block, q, parent, n,
-				     chain, RTM_DELTFILTER);
+				     chain, RTM_DELTFILTER, true);
 		/* Flush the chain first as the user requested chain removal. */
-		tcf_chain_flush(chain);
+		tcf_chain_flush(chain, true);
 		/* In case the chain was successfully deleted, put a reference
 		 * to the chain previously taken during addition.
 		 */
 		tcf_chain_put_explicitly_created(chain);
-		chain->explicitly_created = false;
 		break;
 	case RTM_GETCHAIN:
 		err = tc_chain_notify(chain, skb, n->nlmsg_seq,
@@ -1928,10 +2880,16 @@
 
 errout:
 	tcf_chain_put(chain);
+errout_block:
+	tcf_block_release(q, block, true);
 	if (err == -EAGAIN)
 		/* Replay the request. */
 		goto replay;
 	return err;
+
+errout_block_locked:
+	mutex_unlock(&block->lock);
+	goto errout_block;
 }
 
 /* called with RTNL */
@@ -1941,8 +2899,8 @@
 	struct nlattr *tca[TCA_MAX + 1];
 	struct Qdisc *q = NULL;
 	struct tcf_block *block;
-	struct tcf_chain *chain;
 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
+	struct tcf_chain *chain;
 	long index_start;
 	long index;
 	u32 parent;
@@ -1951,13 +2909,13 @@
 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
 		return skb->len;
 
-	err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
-			  NULL);
+	err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
+				     rtm_tca_policy, cb->extack);
 	if (err)
 		return err;
 
 	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
-		block = tcf_block_lookup(net, tcm->tcm_block_index);
+		block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
 		if (!block)
 			goto out;
 		/* If we work with block index, q is NULL and parent value
@@ -2005,6 +2963,7 @@
 	index_start = cb->args[0];
 	index = 0;
 
+	mutex_lock(&block->lock);
 	list_for_each_entry(chain, &block->chain_list, list) {
 		if ((tca[TCA_CHAIN] &&
 		     nla_get_u32(tca[TCA_CHAIN]) != chain->index))
@@ -2015,7 +2974,8 @@
 		}
 		if (tcf_chain_held_by_acts_only(chain))
 			continue;
-		err = tc_chain_fill_node(chain, net, skb, block,
+		err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
+					 chain->index, net, skb, block,
 					 NETLINK_CB(cb->skb).portid,
 					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
 					 RTM_NEWCHAIN);
@@ -2023,7 +2983,10 @@
 			break;
 		index++;
 	}
+	mutex_unlock(&block->lock);
 
+	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
+		tcf_block_refcnt_put(block, true);
 	cb->args[0] = index;
 
 out:
@@ -2036,8 +2999,10 @@
 void tcf_exts_destroy(struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
-	kfree(exts->actions);
+	if (exts->actions) {
+		tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
+		kfree(exts->actions);
+	}
 	exts->nr_actions = 0;
 #endif
 }
@@ -2045,7 +3010,7 @@
 
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 		      struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
-		      struct netlink_ext_ack *extack)
+		      bool rtnl_held, struct netlink_ext_ack *extack)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	{
@@ -2055,7 +3020,8 @@
 		if (exts->police && tb[exts->police]) {
 			act = tcf_action_init_1(net, tp, tb[exts->police],
 						rate_tlv, "police", ovr,
-						TCA_ACT_BIND, true, extack);
+						TCA_ACT_BIND, rtnl_held,
+						extack);
 			if (IS_ERR(act))
 				return PTR_ERR(act);
 
@@ -2067,13 +3033,12 @@
 
 			err = tcf_action_init(net, tp, tb[exts->action],
 					      rate_tlv, NULL, ovr, TCA_ACT_BIND,
-					      exts->actions, &attr_size, true,
-					      extack);
+					      exts->actions, &attr_size,
+					      rtnl_held, extack);
 			if (err < 0)
 				return err;
 			exts->nr_actions = err;
 		}
-		exts->net = net;
 	}
 #else
 	if ((exts->action && tb[exts->action]) ||
@@ -2120,7 +3085,7 @@
 		 * tc data even if iproute2  was newer - jhs
 		 */
 		if (exts->type != TCA_OLD_COMPAT) {
-			nest = nla_nest_start(skb, exts->action);
+			nest = nla_nest_start_noflag(skb, exts->action);
 			if (nest == NULL)
 				goto nla_put_failure;
 
@@ -2129,7 +3094,7 @@
 			nla_nest_end(skb, nest);
 		} else if (exts->police) {
 			struct tc_action *act = tcf_exts_first_act(exts);
-			nest = nla_nest_start(skb, exts->police);
+			nest = nla_nest_start_noflag(skb, exts->police);
 			if (nest == NULL || !act)
 				goto nla_put_failure;
 			if (tcf_action_dump_old(skb, act, 0, 0) < 0)
@@ -2160,62 +3125,489 @@
 }
 EXPORT_SYMBOL(tcf_exts_dump_stats);
 
-static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts,
-				       enum tc_setup_type type,
-				       void *type_data, bool err_stop)
+static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
 {
-	int ok_count = 0;
-#ifdef CONFIG_NET_CLS_ACT
-	const struct tc_action *a;
-	struct net_device *dev;
-	int i, ret;
+	if (*flags & TCA_CLS_FLAGS_IN_HW)
+		return;
+	*flags |= TCA_CLS_FLAGS_IN_HW;
+	atomic_inc(&block->offloadcnt);
+}
 
-	if (!tcf_exts_has_actions(exts))
-		return 0;
+static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
+{
+	if (!(*flags & TCA_CLS_FLAGS_IN_HW))
+		return;
+	*flags &= ~TCA_CLS_FLAGS_IN_HW;
+	atomic_dec(&block->offloadcnt);
+}
 
-	for (i = 0; i < exts->nr_actions; i++) {
-		a = exts->actions[i];
-		if (!a->ops->get_dev)
-			continue;
-		dev = a->ops->get_dev(a);
-		if (!dev)
-			continue;
-		ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop);
-		a->ops->put_dev(dev);
-		if (ret < 0)
-			return ret;
-		ok_count += ret;
+static void tc_cls_offload_cnt_update(struct tcf_block *block,
+				      struct tcf_proto *tp, u32 *cnt,
+				      u32 *flags, u32 diff, bool add)
+{
+	lockdep_assert_held(&block->cb_lock);
+
+	spin_lock(&tp->lock);
+	if (add) {
+		if (!*cnt)
+			tcf_block_offload_inc(block, flags);
+		*cnt += diff;
+	} else {
+		*cnt -= diff;
+		if (!*cnt)
+			tcf_block_offload_dec(block, flags);
 	}
-#endif
+	spin_unlock(&tp->lock);
+}
+
+static void
+tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp,
+			 u32 *cnt, u32 *flags)
+{
+	lockdep_assert_held(&block->cb_lock);
+
+	spin_lock(&tp->lock);
+	tcf_block_offload_dec(block, flags);
+	*cnt = 0;
+	spin_unlock(&tp->lock);
+}
+
+static int
+__tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
+		   void *type_data, bool err_stop)
+{
+	struct flow_block_cb *block_cb;
+	int ok_count = 0;
+	int err;
+
+	list_for_each_entry(block_cb, &block->flow_block.cb_list, list) {
+		err = block_cb->cb(type, type_data, block_cb->cb_priv);
+		if (err) {
+			if (err_stop)
+				return err;
+		} else {
+			ok_count++;
+		}
+	}
 	return ok_count;
 }
 
-int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
-		     enum tc_setup_type type, void *type_data, bool err_stop)
+int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
+		     void *type_data, bool err_stop, bool rtnl_held)
 {
+	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
 	int ok_count;
-	int ret;
 
-	ret = tcf_block_cb_call(block, type, type_data, err_stop);
-	if (ret < 0)
-		return ret;
-	ok_count = ret;
+retry:
+	if (take_rtnl)
+		rtnl_lock();
+	down_read(&block->cb_lock);
+	/* Need to obtain rtnl lock if block is bound to devs that require it.
+	 * In block bind code cb_lock is obtained while holding rtnl, so we must
+	 * obtain the locks in same order here.
+	 */
+	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
+		up_read(&block->cb_lock);
+		take_rtnl = true;
+		goto retry;
+	}
 
-	if (!exts || ok_count)
-		return ok_count;
-	ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop);
-	if (ret < 0)
-		return ret;
-	ok_count += ret;
+	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
 
+	up_read(&block->cb_lock);
+	if (take_rtnl)
+		rtnl_unlock();
 	return ok_count;
 }
 EXPORT_SYMBOL(tc_setup_cb_call);
 
+/* Non-destructive filter add. If filter that wasn't already in hardware is
+ * successfully offloaded, increment block offloads counter. On failure,
+ * previously offloaded filter is considered to be intact and offloads counter
+ * is not decremented.
+ */
+
+int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp,
+		    enum tc_setup_type type, void *type_data, bool err_stop,
+		    u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
+{
+	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
+	int ok_count;
+
+retry:
+	if (take_rtnl)
+		rtnl_lock();
+	down_read(&block->cb_lock);
+	/* Need to obtain rtnl lock if block is bound to devs that require it.
+	 * In block bind code cb_lock is obtained while holding rtnl, so we must
+	 * obtain the locks in same order here.
+	 */
+	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
+		up_read(&block->cb_lock);
+		take_rtnl = true;
+		goto retry;
+	}
+
+	/* Make sure all netdevs sharing this block are offload-capable. */
+	if (block->nooffloaddevcnt && err_stop) {
+		ok_count = -EOPNOTSUPP;
+		goto err_unlock;
+	}
+
+	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
+	if (ok_count < 0)
+		goto err_unlock;
+
+	if (tp->ops->hw_add)
+		tp->ops->hw_add(tp, type_data);
+	if (ok_count > 0)
+		tc_cls_offload_cnt_update(block, tp, in_hw_count, flags,
+					  ok_count, true);
+err_unlock:
+	up_read(&block->cb_lock);
+	if (take_rtnl)
+		rtnl_unlock();
+	return ok_count < 0 ? ok_count : 0;
+}
+EXPORT_SYMBOL(tc_setup_cb_add);
+
+/* Destructive filter replace. If filter that wasn't already in hardware is
+ * successfully offloaded, increment block offload counter. On failure,
+ * previously offloaded filter is considered to be destroyed and offload counter
+ * is decremented.
+ */
+
+int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp,
+			enum tc_setup_type type, void *type_data, bool err_stop,
+			u32 *old_flags, unsigned int *old_in_hw_count,
+			u32 *new_flags, unsigned int *new_in_hw_count,
+			bool rtnl_held)
+{
+	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
+	int ok_count;
+
+retry:
+	if (take_rtnl)
+		rtnl_lock();
+	down_read(&block->cb_lock);
+	/* Need to obtain rtnl lock if block is bound to devs that require it.
+	 * In block bind code cb_lock is obtained while holding rtnl, so we must
+	 * obtain the locks in same order here.
+	 */
+	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
+		up_read(&block->cb_lock);
+		take_rtnl = true;
+		goto retry;
+	}
+
+	/* Make sure all netdevs sharing this block are offload-capable. */
+	if (block->nooffloaddevcnt && err_stop) {
+		ok_count = -EOPNOTSUPP;
+		goto err_unlock;
+	}
+
+	tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags);
+	if (tp->ops->hw_del)
+		tp->ops->hw_del(tp, type_data);
+
+	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
+	if (ok_count < 0)
+		goto err_unlock;
+
+	if (tp->ops->hw_add)
+		tp->ops->hw_add(tp, type_data);
+	if (ok_count > 0)
+		tc_cls_offload_cnt_update(block, tp, new_in_hw_count,
+					  new_flags, ok_count, true);
+err_unlock:
+	up_read(&block->cb_lock);
+	if (take_rtnl)
+		rtnl_unlock();
+	return ok_count < 0 ? ok_count : 0;
+}
+EXPORT_SYMBOL(tc_setup_cb_replace);
+
+/* Destroy filter and decrement block offload counter, if filter was previously
+ * offloaded.
+ */
+
+int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp,
+			enum tc_setup_type type, void *type_data, bool err_stop,
+			u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
+{
+	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
+	int ok_count;
+
+retry:
+	if (take_rtnl)
+		rtnl_lock();
+	down_read(&block->cb_lock);
+	/* Need to obtain rtnl lock if block is bound to devs that require it.
+	 * In block bind code cb_lock is obtained while holding rtnl, so we must
+	 * obtain the locks in same order here.
+	 */
+	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
+		up_read(&block->cb_lock);
+		take_rtnl = true;
+		goto retry;
+	}
+
+	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
+
+	tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags);
+	if (tp->ops->hw_del)
+		tp->ops->hw_del(tp, type_data);
+
+	up_read(&block->cb_lock);
+	if (take_rtnl)
+		rtnl_unlock();
+	return ok_count < 0 ? ok_count : 0;
+}
+EXPORT_SYMBOL(tc_setup_cb_destroy);
+
+int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp,
+			  bool add, flow_setup_cb_t *cb,
+			  enum tc_setup_type type, void *type_data,
+			  void *cb_priv, u32 *flags, unsigned int *in_hw_count)
+{
+	int err = cb(type, type_data, cb_priv);
+
+	if (err) {
+		if (add && tc_skip_sw(*flags))
+			return err;
+	} else {
+		tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1,
+					  add);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(tc_setup_cb_reoffload);
+
+void tc_cleanup_flow_action(struct flow_action *flow_action)
+{
+	struct flow_action_entry *entry;
+	int i;
+
+	flow_action_for_each(i, entry, flow_action)
+		if (entry->destructor)
+			entry->destructor(entry->destructor_priv);
+}
+EXPORT_SYMBOL(tc_cleanup_flow_action);
+
+static void tcf_mirred_get_dev(struct flow_action_entry *entry,
+			       const struct tc_action *act)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	entry->dev = act->ops->get_dev(act, &entry->destructor);
+	if (!entry->dev)
+		return;
+	entry->destructor_priv = entry->dev;
+#endif
+}
+
+static void tcf_tunnel_encap_put_tunnel(void *priv)
+{
+	struct ip_tunnel_info *tunnel = priv;
+
+	kfree(tunnel);
+}
+
+static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry,
+				       const struct tc_action *act)
+{
+	entry->tunnel = tcf_tunnel_info_copy(act);
+	if (!entry->tunnel)
+		return -ENOMEM;
+	entry->destructor = tcf_tunnel_encap_put_tunnel;
+	entry->destructor_priv = entry->tunnel;
+	return 0;
+}
+
+static void tcf_sample_get_group(struct flow_action_entry *entry,
+				 const struct tc_action *act)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	entry->sample.psample_group =
+		act->ops->get_psample_group(act, &entry->destructor);
+	entry->destructor_priv = entry->sample.psample_group;
+#endif
+}
+
+int tc_setup_flow_action(struct flow_action *flow_action,
+			 const struct tcf_exts *exts, bool rtnl_held)
+{
+	const struct tc_action *act;
+	int i, j, k, err = 0;
+
+	if (!exts)
+		return 0;
+
+	if (!rtnl_held)
+		rtnl_lock();
+
+	j = 0;
+	tcf_exts_for_each_action(i, act, exts) {
+		struct flow_action_entry *entry;
+
+		entry = &flow_action->entries[j];
+		if (is_tcf_gact_ok(act)) {
+			entry->id = FLOW_ACTION_ACCEPT;
+		} else if (is_tcf_gact_shot(act)) {
+			entry->id = FLOW_ACTION_DROP;
+		} else if (is_tcf_gact_trap(act)) {
+			entry->id = FLOW_ACTION_TRAP;
+		} else if (is_tcf_gact_goto_chain(act)) {
+			entry->id = FLOW_ACTION_GOTO;
+			entry->chain_index = tcf_gact_goto_chain_index(act);
+		} else if (is_tcf_mirred_egress_redirect(act)) {
+			entry->id = FLOW_ACTION_REDIRECT;
+			tcf_mirred_get_dev(entry, act);
+		} else if (is_tcf_mirred_egress_mirror(act)) {
+			entry->id = FLOW_ACTION_MIRRED;
+			tcf_mirred_get_dev(entry, act);
+		} else if (is_tcf_mirred_ingress_redirect(act)) {
+			entry->id = FLOW_ACTION_REDIRECT_INGRESS;
+			tcf_mirred_get_dev(entry, act);
+		} else if (is_tcf_mirred_ingress_mirror(act)) {
+			entry->id = FLOW_ACTION_MIRRED_INGRESS;
+			tcf_mirred_get_dev(entry, act);
+		} else if (is_tcf_vlan(act)) {
+			switch (tcf_vlan_action(act)) {
+			case TCA_VLAN_ACT_PUSH:
+				entry->id = FLOW_ACTION_VLAN_PUSH;
+				entry->vlan.vid = tcf_vlan_push_vid(act);
+				entry->vlan.proto = tcf_vlan_push_proto(act);
+				entry->vlan.prio = tcf_vlan_push_prio(act);
+				break;
+			case TCA_VLAN_ACT_POP:
+				entry->id = FLOW_ACTION_VLAN_POP;
+				break;
+			case TCA_VLAN_ACT_MODIFY:
+				entry->id = FLOW_ACTION_VLAN_MANGLE;
+				entry->vlan.vid = tcf_vlan_push_vid(act);
+				entry->vlan.proto = tcf_vlan_push_proto(act);
+				entry->vlan.prio = tcf_vlan_push_prio(act);
+				break;
+			default:
+				err = -EOPNOTSUPP;
+				goto err_out;
+			}
+		} else if (is_tcf_tunnel_set(act)) {
+			entry->id = FLOW_ACTION_TUNNEL_ENCAP;
+			err = tcf_tunnel_encap_get_tunnel(entry, act);
+			if (err)
+				goto err_out;
+		} else if (is_tcf_tunnel_release(act)) {
+			entry->id = FLOW_ACTION_TUNNEL_DECAP;
+		} else if (is_tcf_pedit(act)) {
+			for (k = 0; k < tcf_pedit_nkeys(act); k++) {
+				switch (tcf_pedit_cmd(act, k)) {
+				case TCA_PEDIT_KEY_EX_CMD_SET:
+					entry->id = FLOW_ACTION_MANGLE;
+					break;
+				case TCA_PEDIT_KEY_EX_CMD_ADD:
+					entry->id = FLOW_ACTION_ADD;
+					break;
+				default:
+					err = -EOPNOTSUPP;
+					goto err_out;
+				}
+				entry->mangle.htype = tcf_pedit_htype(act, k);
+				entry->mangle.mask = tcf_pedit_mask(act, k);
+				entry->mangle.val = tcf_pedit_val(act, k);
+				entry->mangle.offset = tcf_pedit_offset(act, k);
+				entry = &flow_action->entries[++j];
+			}
+		} else if (is_tcf_csum(act)) {
+			entry->id = FLOW_ACTION_CSUM;
+			entry->csum_flags = tcf_csum_update_flags(act);
+		} else if (is_tcf_skbedit_mark(act)) {
+			entry->id = FLOW_ACTION_MARK;
+			entry->mark = tcf_skbedit_mark(act);
+		} else if (is_tcf_sample(act)) {
+			entry->id = FLOW_ACTION_SAMPLE;
+			entry->sample.trunc_size = tcf_sample_trunc_size(act);
+			entry->sample.truncate = tcf_sample_truncate(act);
+			entry->sample.rate = tcf_sample_rate(act);
+			tcf_sample_get_group(entry, act);
+		} else if (is_tcf_police(act)) {
+			entry->id = FLOW_ACTION_POLICE;
+			entry->police.burst = tcf_police_tcfp_burst(act);
+			entry->police.rate_bytes_ps =
+				tcf_police_rate_bytes_ps(act);
+		} else if (is_tcf_ct(act)) {
+			entry->id = FLOW_ACTION_CT;
+			entry->ct.action = tcf_ct_action(act);
+			entry->ct.zone = tcf_ct_zone(act);
+		} else if (is_tcf_mpls(act)) {
+			switch (tcf_mpls_action(act)) {
+			case TCA_MPLS_ACT_PUSH:
+				entry->id = FLOW_ACTION_MPLS_PUSH;
+				entry->mpls_push.proto = tcf_mpls_proto(act);
+				entry->mpls_push.label = tcf_mpls_label(act);
+				entry->mpls_push.tc = tcf_mpls_tc(act);
+				entry->mpls_push.bos = tcf_mpls_bos(act);
+				entry->mpls_push.ttl = tcf_mpls_ttl(act);
+				break;
+			case TCA_MPLS_ACT_POP:
+				entry->id = FLOW_ACTION_MPLS_POP;
+				entry->mpls_pop.proto = tcf_mpls_proto(act);
+				break;
+			case TCA_MPLS_ACT_MODIFY:
+				entry->id = FLOW_ACTION_MPLS_MANGLE;
+				entry->mpls_mangle.label = tcf_mpls_label(act);
+				entry->mpls_mangle.tc = tcf_mpls_tc(act);
+				entry->mpls_mangle.bos = tcf_mpls_bos(act);
+				entry->mpls_mangle.ttl = tcf_mpls_ttl(act);
+				break;
+			default:
+				goto err_out;
+			}
+		} else if (is_tcf_skbedit_ptype(act)) {
+			entry->id = FLOW_ACTION_PTYPE;
+			entry->ptype = tcf_skbedit_ptype(act);
+		} else {
+			err = -EOPNOTSUPP;
+			goto err_out;
+		}
+
+		if (!is_tcf_pedit(act))
+			j++;
+	}
+
+err_out:
+	if (!rtnl_held)
+		rtnl_unlock();
+
+	if (err)
+		tc_cleanup_flow_action(flow_action);
+
+	return err;
+}
+EXPORT_SYMBOL(tc_setup_flow_action);
+
+unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
+{
+	unsigned int num_acts = 0;
+	struct tc_action *act;
+	int i;
+
+	tcf_exts_for_each_action(i, act, exts) {
+		if (is_tcf_pedit(act))
+			num_acts += tcf_pedit_nkeys(act);
+		else
+			num_acts++;
+	}
+	return num_acts;
+}
+EXPORT_SYMBOL(tcf_exts_num_actions);
+
 static __net_init int tcf_net_init(struct net *net)
 {
 	struct tcf_net *tn = net_generic(net, tcf_net_id);
 
+	spin_lock_init(&tn->idr_lock);
 	idr_init(&tn->idr);
 	return 0;
 }
@@ -2234,6 +3626,11 @@
 	.size = sizeof(struct tcf_net),
 };
 
+static struct flow_indr_block_ing_entry block_ing_entry = {
+	.cb = tc_indr_block_get_and_ing_cmd,
+	.list = LIST_HEAD_INIT(block_ing_entry.list),
+};
+
 static int __init tc_filter_init(void)
 {
 	int err;
@@ -2246,10 +3643,14 @@
 	if (err)
 		goto err_register_pernet_subsys;
 
-	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0);
-	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0);
+	flow_indr_add_block_ing_cb(&block_ing_entry);
+
+	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
+		      RTNL_FLAG_DOIT_UNLOCKED);
+	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
+		      RTNL_FLAG_DOIT_UNLOCKED);
 	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
-		      tc_dump_tfilter, 0);
+		      tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED);
 	rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,

diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 6a5dce8..4aafbe3 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/cls_basic.c	Basic Packet Classifier.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Thomas Graf <tgraf@suug.ch>
  */
 
@@ -18,6 +14,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/skbuff.h>
 #include <linux/idr.h>
+#include <linux/percpu.h>
 #include <net/netlink.h>
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
@@ -35,6 +32,7 @@
 	struct tcf_result	res;
 	struct tcf_proto	*tp;
 	struct list_head	link;
+	struct tc_basic_pcnt __percpu *pf;
 	struct rcu_work		rwork;
 };
 
@@ -46,8 +44,10 @@
 	struct basic_filter *f;
 
 	list_for_each_entry_rcu(f, &head->flist, link) {
+		__this_cpu_inc(f->pf->rcnt);
 		if (!tcf_em_tree_match(skb, &f->ematches, NULL))
 			continue;
+		__this_cpu_inc(f->pf->rhit);
 		*res = f->res;
 		r = tcf_exts_exec(skb, &f->exts, res);
 		if (r < 0)
@@ -89,6 +89,7 @@
 	tcf_exts_destroy(&f->exts);
 	tcf_em_tree_destroy(&f->ematches);
 	tcf_exts_put_net(&f->exts);
+	free_percpu(f->pf);
 	kfree(f);
 }
 
@@ -102,7 +103,8 @@
 	rtnl_unlock();
 }
 
-static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void basic_destroy(struct tcf_proto *tp, bool rtnl_held,
+			  struct netlink_ext_ack *extack)
 {
 	struct basic_head *head = rtnl_dereference(tp->root);
 	struct basic_filter *f, *n;
@@ -121,7 +123,7 @@
 }
 
 static int basic_delete(struct tcf_proto *tp, void *arg, bool *last,
-			struct netlink_ext_ack *extack)
+			bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct basic_head *head = rtnl_dereference(tp->root);
 	struct basic_filter *f = arg;
@@ -148,7 +150,7 @@
 {
 	int err;
 
-	err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
+	err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
 	if (err < 0)
 		return err;
 
@@ -168,7 +170,7 @@
 static int basic_change(struct net *net, struct sk_buff *in_skb,
 			struct tcf_proto *tp, unsigned long base, u32 handle,
 			struct nlattr **tca, void **arg, bool ovr,
-			struct netlink_ext_ack *extack)
+			bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	int err;
 	struct basic_head *head = rtnl_dereference(tp->root);
@@ -179,8 +181,8 @@
 	if (tca[TCA_OPTIONS] == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS],
-			       basic_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS],
+					  basic_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -193,7 +195,7 @@
 	if (!fnew)
 		return -ENOBUFS;
 
-	err = tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE);
+	err = tcf_exts_init(&fnew->exts, net, TCA_BASIC_ACT, TCA_BASIC_POLICE);
 	if (err < 0)
 		goto errout;
 
@@ -208,6 +210,11 @@
 	if (err)
 		goto errout;
 	fnew->handle = handle;
+	fnew->pf = alloc_percpu(struct tc_basic_pcnt);
+	if (!fnew->pf) {
+		err = -ENOMEM;
+		goto errout;
+	}
 
 	err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr,
 			      extack);
@@ -231,12 +238,14 @@
 
 	return 0;
 errout:
+	free_percpu(fnew->pf);
 	tcf_exts_destroy(&fnew->exts);
 	kfree(fnew);
 	return err;
 }
 
-static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+		       bool rtnl_held)
 {
 	struct basic_head *head = rtnl_dereference(tp->root);
 	struct basic_filter *f;
@@ -263,17 +272,19 @@
 }
 
 static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh,
-		      struct sk_buff *skb, struct tcmsg *t)
+		      struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
+	struct tc_basic_pcnt gpf = {};
 	struct basic_filter *f = fh;
 	struct nlattr *nest;
+	int cpu;
 
 	if (f == NULL)
 		return skb->len;
 
 	t->tcm_handle = f->handle;
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
 
@@ -281,6 +292,18 @@
 	    nla_put_u32(skb, TCA_BASIC_CLASSID, f->res.classid))
 		goto nla_put_failure;
 
+	for_each_possible_cpu(cpu) {
+		struct tc_basic_pcnt *pf = per_cpu_ptr(f->pf, cpu);
+
+		gpf.rcnt += pf->rcnt;
+		gpf.rhit += pf->rhit;
+	}
+
+	if (nla_put_64bit(skb, TCA_BASIC_PCNT,
+			  sizeof(struct tc_basic_pcnt),
+			  &gpf, TCA_BASIC_PAD))
+		goto nla_put_failure;
+
 	if (tcf_exts_dump(skb, &f->exts) < 0 ||
 	    tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
 		goto nla_put_failure;

diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index fa6fe2f..8229ed4 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Berkeley Packet Filter based traffic classifier
  *
@@ -6,10 +7,6 @@
  * ematches.
  *
  * (C) 2013 Daniel Borkmann <dborkman@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -157,8 +154,7 @@
 	skip_sw = prog && tc_skip_sw(prog->gen_flags);
 	obj = prog ?: oldprog;
 
-	tc_cls_common_offload_init(&cls_bpf.common, tp, obj->gen_flags,
-				   extack);
+	tc_cls_common_offload_init(&cls_bpf.common, tp, obj->gen_flags, extack);
 	cls_bpf.command = TC_CLSBPF_OFFLOAD;
 	cls_bpf.exts = &obj->exts;
 	cls_bpf.prog = prog ? prog->filter : NULL;
@@ -166,18 +162,24 @@
 	cls_bpf.name = obj->bpf_name;
 	cls_bpf.exts_integrated = obj->exts_integrated;
 
-	if (oldprog)
-		tcf_block_offload_dec(block, &oldprog->gen_flags);
+	if (oldprog && prog)
+		err = tc_setup_cb_replace(block, tp, TC_SETUP_CLSBPF, &cls_bpf,
+					  skip_sw, &oldprog->gen_flags,
+					  &oldprog->in_hw_count,
+					  &prog->gen_flags, &prog->in_hw_count,
+					  true);
+	else if (prog)
+		err = tc_setup_cb_add(block, tp, TC_SETUP_CLSBPF, &cls_bpf,
+				      skip_sw, &prog->gen_flags,
+				      &prog->in_hw_count, true);
+	else
+		err = tc_setup_cb_destroy(block, tp, TC_SETUP_CLSBPF, &cls_bpf,
+					  skip_sw, &oldprog->gen_flags,
+					  &oldprog->in_hw_count, true);
 
-	err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
-	if (prog) {
-		if (err < 0) {
-			cls_bpf_offload_cmd(tp, oldprog, prog, extack);
-			return err;
-		} else if (err > 0) {
-			prog->in_hw_count = err;
-			tcf_block_offload_inc(block, &prog->gen_flags);
-		}
+	if (prog && err) {
+		cls_bpf_offload_cmd(tp, oldprog, prog, extack);
+		return err;
 	}
 
 	if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
@@ -234,7 +236,7 @@
 	cls_bpf.name = prog->bpf_name;
 	cls_bpf.exts_integrated = prog->exts_integrated;
 
-	tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false);
+	tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, false, true);
 }
 
 static int cls_bpf_init(struct tcf_proto *tp)
@@ -298,7 +300,7 @@
 }
 
 static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last,
-			  struct netlink_ext_ack *extack)
+			  bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
 
@@ -307,7 +309,7 @@
 	return 0;
 }
 
-static void cls_bpf_destroy(struct tcf_proto *tp,
+static void cls_bpf_destroy(struct tcf_proto *tp, bool rtnl_held,
 			    struct netlink_ext_ack *extack)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
@@ -417,7 +419,8 @@
 	if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
 		return -EINVAL;
 
-	ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, extack);
+	ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, true,
+				extack);
 	if (ret < 0)
 		return ret;
 
@@ -455,7 +458,8 @@
 static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 			  struct tcf_proto *tp, unsigned long base,
 			  u32 handle, struct nlattr **tca,
-			  void **arg, bool ovr, struct netlink_ext_ack *extack)
+			  void **arg, bool ovr, bool rtnl_held,
+			  struct netlink_ext_ack *extack)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
 	struct cls_bpf_prog *oldprog = *arg;
@@ -466,8 +470,8 @@
 	if (tca[TCA_OPTIONS] == NULL)
 		return -EINVAL;
 
-	ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy,
-			       NULL);
+	ret = nla_parse_nested_deprecated(tb, TCA_BPF_MAX, tca[TCA_OPTIONS],
+					  bpf_policy, NULL);
 	if (ret < 0)
 		return ret;
 
@@ -475,7 +479,7 @@
 	if (!prog)
 		return -ENOBUFS;
 
-	ret = tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+	ret = tcf_exts_init(&prog->exts, net, TCA_BPF_ACT, TCA_BPF_POLICE);
 	if (ret < 0)
 		goto errout;
 
@@ -575,7 +579,7 @@
 }
 
 static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, void *fh,
-			struct sk_buff *skb, struct tcmsg *tm)
+			struct sk_buff *skb, struct tcmsg *tm, bool rtnl_held)
 {
 	struct cls_bpf_prog *prog = fh;
 	struct nlattr *nest;
@@ -589,7 +593,7 @@
 
 	cls_bpf_offload_update_stats(tp, prog);
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
 
@@ -635,7 +639,8 @@
 		prog->res.class = cl;
 }
 
-static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+			 bool rtnl_held)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
 	struct cls_bpf_prog *prog;
@@ -652,7 +657,7 @@
 	}
 }
 
-static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
 			     void *cb_priv, struct netlink_ext_ack *extack)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
@@ -674,15 +679,11 @@
 		cls_bpf.name = prog->bpf_name;
 		cls_bpf.exts_integrated = prog->exts_integrated;
 
-		err = cb(TC_SETUP_CLSBPF, &cls_bpf, cb_priv);
-		if (err) {
-			if (add && tc_skip_sw(prog->gen_flags))
-				return err;
-			continue;
-		}
-
-		tc_cls_offload_cnt_update(block, &prog->in_hw_count,
-					  &prog->gen_flags, add);
+		err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSBPF,
+					    &cls_bpf, cb_priv, &prog->gen_flags,
+					    &prog->in_hw_count);
+		if (err)
+			return err;
 	}
 
 	return 0;

diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 3bc01bd..fb88114 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/cls_cgroup.c	Control Group Classifier
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Thomas Graf <tgraf@suug.ch>
  */
 
@@ -32,6 +28,8 @@
 	struct cls_cgroup_head *head = rcu_dereference_bh(tp->root);
 	u32 classid = task_get_classid(skb);
 
+	if (unlikely(!head))
+		return -1;
 	if (!classid)
 		return -1;
 	if (!tcf_em_tree_match(skb, &head->ematches, NULL))
@@ -78,7 +76,7 @@
 static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 			     struct tcf_proto *tp, unsigned long base,
 			     u32 handle, struct nlattr **tca,
-			     void **arg, bool ovr,
+			     void **arg, bool ovr, bool rtnl_held,
 			     struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[TCA_CGROUP_MAX + 1];
@@ -99,18 +97,19 @@
 	if (!new)
 		return -ENOBUFS;
 
-	err = tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
+	err = tcf_exts_init(&new->exts, net, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
 	if (err < 0)
 		goto errout;
 	new->handle = handle;
 	new->tp = tp;
-	err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS],
-			       cgroup_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_CGROUP_MAX,
+					  tca[TCA_OPTIONS], cgroup_policy,
+					  NULL);
 	if (err < 0)
 		goto errout;
 
 	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr,
-				extack);
+				true, extack);
 	if (err < 0)
 		goto errout;
 
@@ -130,7 +129,7 @@
 	return err;
 }
 
-static void cls_cgroup_destroy(struct tcf_proto *tp,
+static void cls_cgroup_destroy(struct tcf_proto *tp, bool rtnl_held,
 			       struct netlink_ext_ack *extack)
 {
 	struct cls_cgroup_head *head = rtnl_dereference(tp->root);
@@ -145,18 +144,21 @@
 }
 
 static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last,
-			     struct netlink_ext_ack *extack)
+			     bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	return -EOPNOTSUPP;
 }
 
-static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+			    bool rtnl_held)
 {
 	struct cls_cgroup_head *head = rtnl_dereference(tp->root);
 
 	if (arg->count < arg->skip)
 		goto skip;
 
+	if (!head)
+		return;
 	if (arg->fn(tp, head, arg) < 0) {
 		arg->stop = 1;
 		return;
@@ -166,14 +168,14 @@
 }
 
 static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, void *fh,
-			   struct sk_buff *skb, struct tcmsg *t)
+			   struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
 	struct cls_cgroup_head *head = rtnl_dereference(tp->root);
 	struct nlattr *nest;
 
 	t->tcm_handle = head->handle;
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
 

diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 2bb043c..80ae7b9 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/cls_flow.c		Generic flow classifier
  *
  * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
  */
 
 #include <linux/kernel.h>
@@ -391,7 +387,8 @@
 static int flow_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle, struct nlattr **tca,
-		       void **arg, bool ovr, struct netlink_ext_ack *extack)
+		       void **arg, bool ovr, bool rtnl_held,
+		       struct netlink_ext_ack *extack)
 {
 	struct flow_head *head = rtnl_dereference(tp->root);
 	struct flow_filter *fold, *fnew;
@@ -407,7 +404,8 @@
 	if (opt == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_FLOW_MAX, opt, flow_policy,
+					  NULL);
 	if (err < 0)
 		return err;
 
@@ -440,12 +438,12 @@
 	if (err < 0)
 		goto err1;
 
-	err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+	err = tcf_exts_init(&fnew->exts, net, TCA_FLOW_ACT, TCA_FLOW_POLICE);
 	if (err < 0)
 		goto err2;
 
 	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr,
-				extack);
+				true, extack);
 	if (err < 0)
 		goto err2;
 
@@ -566,7 +564,7 @@
 }
 
 static int flow_delete(struct tcf_proto *tp, void *arg, bool *last,
-		       struct netlink_ext_ack *extack)
+		       bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct flow_head *head = rtnl_dereference(tp->root);
 	struct flow_filter *f = arg;
@@ -590,7 +588,8 @@
 	return 0;
 }
 
-static void flow_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void flow_destroy(struct tcf_proto *tp, bool rtnl_held,
+			 struct netlink_ext_ack *extack)
 {
 	struct flow_head *head = rtnl_dereference(tp->root);
 	struct flow_filter *f, *next;
@@ -617,7 +616,7 @@
 }
 
 static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh,
-		     struct sk_buff *skb, struct tcmsg *t)
+		     struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
 	struct flow_filter *f = fh;
 	struct nlattr *nest;
@@ -627,7 +626,7 @@
 
 	t->tcm_handle = f->handle;
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
 
@@ -677,7 +676,8 @@
 	return -1;
 }
 
-static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+		      bool rtnl_held)
 {
 	struct flow_head *head = rtnl_dereference(tp->root);
 	struct flow_filter *f;

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 7fade71..74221e3 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/cls_flower.c		Flower classifier
  *
  * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/kernel.h>
@@ -14,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/rhashtable.h>
 #include <linux/workqueue.h>
+#include <linux/refcount.h>
 
 #include <linux/if_ether.h>
 #include <linux/in6.h>
@@ -29,8 +26,10 @@
 #include <net/dst.h>
 #include <net/dst_metadata.h>
 
+#include <uapi/linux/netfilter/nf_conntrack_common.h>
+
 struct fl_flow_key {
-	int	indev_ifindex;
+	struct flow_dissector_key_meta meta;
 	struct flow_dissector_key_control control;
 	struct flow_dissector_key_control enc_control;
 	struct flow_dissector_key_basic basic;
@@ -55,6 +54,9 @@
 	struct flow_dissector_key_ip ip;
 	struct flow_dissector_key_ip enc_ip;
 	struct flow_dissector_key_enc_opts enc_opts;
+	struct flow_dissector_key_ports tp_min;
+	struct flow_dissector_key_ports tp_max;
+	struct flow_dissector_key_ct ct;
 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
 
 struct fl_flow_mask_range {
@@ -65,6 +67,7 @@
 struct fl_flow_mask {
 	struct fl_flow_key key;
 	struct fl_flow_mask_range range;
+	u32 flags;
 	struct rhash_head ht_node;
 	struct rhashtable ht;
 	struct rhashtable_params filter_ht_params;
@@ -72,6 +75,7 @@
 	struct list_head filters;
 	struct rcu_work rwork;
 	struct list_head list;
+	refcount_t refcnt;
 };
 
 struct fl_flow_tmplt {
@@ -83,7 +87,9 @@
 
 struct cls_fl_head {
 	struct rhashtable ht;
+	spinlock_t masks_lock; /* Protect masks list */
 	struct list_head masks;
+	struct list_head hw_filters;
 	struct rcu_work rwork;
 	struct idr handle_idr;
 };
@@ -96,11 +102,18 @@
 	struct tcf_result res;
 	struct fl_flow_key key;
 	struct list_head list;
+	struct list_head hw_list;
 	u32 handle;
 	u32 flags;
-	unsigned int in_hw_count;
+	u32 in_hw_count;
 	struct rcu_work rwork;
 	struct net_device *hw_dev;
+	/* Flower classifier is unlocked, which means that its reference counter
+	 * can be changed concurrently without any kind of external
+	 * synchronization. Use atomic reference counter to be concurrency-safe.
+	 */
+	refcount_t refcnt;
+	bool deleted;
 };
 
 static const struct rhashtable_params mask_ht_params = {
@@ -179,36 +192,128 @@
 	memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
 }
 
-static struct cls_fl_filter *fl_lookup(struct fl_flow_mask *mask,
-				       struct fl_flow_key *mkey)
+static bool fl_range_port_dst_cmp(struct cls_fl_filter *filter,
+				  struct fl_flow_key *key,
+				  struct fl_flow_key *mkey)
+{
+	__be16 min_mask, max_mask, min_val, max_val;
+
+	min_mask = htons(filter->mask->key.tp_min.dst);
+	max_mask = htons(filter->mask->key.tp_max.dst);
+	min_val = htons(filter->key.tp_min.dst);
+	max_val = htons(filter->key.tp_max.dst);
+
+	if (min_mask && max_mask) {
+		if (htons(key->tp.dst) < min_val ||
+		    htons(key->tp.dst) > max_val)
+			return false;
+
+		/* skb does not have min and max values */
+		mkey->tp_min.dst = filter->mkey.tp_min.dst;
+		mkey->tp_max.dst = filter->mkey.tp_max.dst;
+	}
+	return true;
+}
+
+static bool fl_range_port_src_cmp(struct cls_fl_filter *filter,
+				  struct fl_flow_key *key,
+				  struct fl_flow_key *mkey)
+{
+	__be16 min_mask, max_mask, min_val, max_val;
+
+	min_mask = htons(filter->mask->key.tp_min.src);
+	max_mask = htons(filter->mask->key.tp_max.src);
+	min_val = htons(filter->key.tp_min.src);
+	max_val = htons(filter->key.tp_max.src);
+
+	if (min_mask && max_mask) {
+		if (htons(key->tp.src) < min_val ||
+		    htons(key->tp.src) > max_val)
+			return false;
+
+		/* skb does not have min and max values */
+		mkey->tp_min.src = filter->mkey.tp_min.src;
+		mkey->tp_max.src = filter->mkey.tp_max.src;
+	}
+	return true;
+}
+
+static struct cls_fl_filter *__fl_lookup(struct fl_flow_mask *mask,
+					 struct fl_flow_key *mkey)
 {
 	return rhashtable_lookup_fast(&mask->ht, fl_key_get_start(mkey, mask),
 				      mask->filter_ht_params);
 }
 
+static struct cls_fl_filter *fl_lookup_range(struct fl_flow_mask *mask,
+					     struct fl_flow_key *mkey,
+					     struct fl_flow_key *key)
+{
+	struct cls_fl_filter *filter, *f;
+
+	list_for_each_entry_rcu(filter, &mask->filters, list) {
+		if (!fl_range_port_dst_cmp(filter, key, mkey))
+			continue;
+
+		if (!fl_range_port_src_cmp(filter, key, mkey))
+			continue;
+
+		f = __fl_lookup(mask, mkey);
+		if (f)
+			return f;
+	}
+	return NULL;
+}
+
+static struct cls_fl_filter *fl_lookup(struct fl_flow_mask *mask,
+				       struct fl_flow_key *mkey,
+				       struct fl_flow_key *key)
+{
+	if ((mask->flags & TCA_FLOWER_MASK_FLAGS_RANGE))
+		return fl_lookup_range(mask, mkey, key);
+
+	return __fl_lookup(mask, mkey);
+}
+
+static u16 fl_ct_info_to_flower_map[] = {
+	[IP_CT_ESTABLISHED] =		TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
+					TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED,
+	[IP_CT_RELATED] =		TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
+					TCA_FLOWER_KEY_CT_FLAGS_RELATED,
+	[IP_CT_ESTABLISHED_REPLY] =	TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
+					TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED,
+	[IP_CT_RELATED_REPLY] =		TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
+					TCA_FLOWER_KEY_CT_FLAGS_RELATED,
+	[IP_CT_NEW] =			TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
+					TCA_FLOWER_KEY_CT_FLAGS_NEW,
+};
+
 static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 		       struct tcf_result *res)
 {
 	struct cls_fl_head *head = rcu_dereference_bh(tp->root);
-	struct cls_fl_filter *f;
-	struct fl_flow_mask *mask;
-	struct fl_flow_key skb_key;
 	struct fl_flow_key skb_mkey;
+	struct fl_flow_key skb_key;
+	struct fl_flow_mask *mask;
+	struct cls_fl_filter *f;
 
 	list_for_each_entry_rcu(mask, &head->masks, list) {
 		fl_clear_masked_range(&skb_key, mask);
 
-		skb_key.indev_ifindex = skb->skb_iif;
+		skb_flow_dissect_meta(skb, &mask->dissector, &skb_key);
 		/* skb_flow_dissect() does not set n_proto in case an unknown
 		 * protocol, so do it rather here.
 		 */
 		skb_key.basic.n_proto = skb->protocol;
 		skb_flow_dissect_tunnel_info(skb, &mask->dissector, &skb_key);
+		skb_flow_dissect_ct(skb, &mask->dissector, &skb_key,
+				    fl_ct_info_to_flower_map,
+				    ARRAY_SIZE(fl_ct_info_to_flower_map));
 		skb_flow_dissect(skb, &mask->dissector, &skb_key, 0);
 
 		fl_set_masked_key(&skb_mkey, &skb_key, mask);
 
-		f = fl_lookup(mask, &skb_mkey);
+		f = fl_lookup(mask, &skb_mkey, &skb_key);
 		if (f && !tc_skip_sw(f->flags)) {
 			*res = f->res;
 			return tcf_exts_exec(skb, &f->exts, res);
@@ -225,16 +330,22 @@
 	if (!head)
 		return -ENOBUFS;
 
+	spin_lock_init(&head->masks_lock);
 	INIT_LIST_HEAD_RCU(&head->masks);
+	INIT_LIST_HEAD(&head->hw_filters);
 	rcu_assign_pointer(tp->root, head);
 	idr_init(&head->handle_idr);
 
 	return rhashtable_init(&head->ht, &mask_ht_params);
 }
 
-static void fl_mask_free(struct fl_flow_mask *mask)
+static void fl_mask_free(struct fl_flow_mask *mask, bool mask_init_done)
 {
-	rhashtable_destroy(&mask->ht);
+	/* temporary masks don't have their filters list and ht initialized */
+	if (mask_init_done) {
+		WARN_ON(!list_empty(&mask->filters));
+		rhashtable_destroy(&mask->ht);
+	}
 	kfree(mask);
 }
 
@@ -243,25 +354,43 @@
 	struct fl_flow_mask *mask = container_of(to_rcu_work(work),
 						 struct fl_flow_mask, rwork);
 
-	fl_mask_free(mask);
+	fl_mask_free(mask, true);
 }
 
-static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask,
-			bool async)
+static void fl_uninit_mask_free_work(struct work_struct *work)
 {
-	if (!list_empty(&mask->filters))
+	struct fl_flow_mask *mask = container_of(to_rcu_work(work),
+						 struct fl_flow_mask, rwork);
+
+	fl_mask_free(mask, false);
+}
+
+static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask)
+{
+	if (!refcount_dec_and_test(&mask->refcnt))
 		return false;
 
 	rhashtable_remove_fast(&head->ht, &mask->ht_node, mask_ht_params);
+
+	spin_lock(&head->masks_lock);
 	list_del_rcu(&mask->list);
-	if (async)
-		tcf_queue_work(&mask->rwork, fl_mask_free_work);
-	else
-		fl_mask_free(mask);
+	spin_unlock(&head->masks_lock);
+
+	tcf_queue_work(&mask->rwork, fl_mask_free_work);
 
 	return true;
 }
 
+static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp)
+{
+	/* Flower classifier only changes root pointer during init and destroy.
+	 * Users must obtain reference to tcf_proto instance before calling its
+	 * API, so tp->root pointer is protected from concurrent call to
+	 * fl_destroy() by reference counting.
+	 */
+	return rcu_dereference_raw(tp->root);
+}
+
 static void __fl_destroy_filter(struct cls_fl_filter *f)
 {
 	tcf_exts_destroy(&f->exts);
@@ -274,52 +403,64 @@
 	struct cls_fl_filter *f = container_of(to_rcu_work(work),
 					struct cls_fl_filter, rwork);
 
-	rtnl_lock();
 	__fl_destroy_filter(f);
-	rtnl_unlock();
 }
 
 static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f,
-				 struct netlink_ext_ack *extack)
+				 bool rtnl_held, struct netlink_ext_ack *extack)
 {
-	struct tc_cls_flower_offload cls_flower = {};
 	struct tcf_block *block = tp->chain->block;
+	struct flow_cls_offload cls_flower = {};
 
 	tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
-	cls_flower.command = TC_CLSFLOWER_DESTROY;
+	cls_flower.command = FLOW_CLS_DESTROY;
 	cls_flower.cookie = (unsigned long) f;
 
-	tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
-			 &cls_flower, false);
-	tcf_block_offload_dec(block, &f->flags);
+	tc_setup_cb_destroy(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, false,
+			    &f->flags, &f->in_hw_count, rtnl_held);
+
 }
 
 static int fl_hw_replace_filter(struct tcf_proto *tp,
-				struct cls_fl_filter *f,
+				struct cls_fl_filter *f, bool rtnl_held,
 				struct netlink_ext_ack *extack)
 {
-	struct tc_cls_flower_offload cls_flower = {};
 	struct tcf_block *block = tp->chain->block;
+	struct flow_cls_offload cls_flower = {};
 	bool skip_sw = tc_skip_sw(f->flags);
-	int err;
+	int err = 0;
+
+	cls_flower.rule = flow_rule_alloc(tcf_exts_num_actions(&f->exts));
+	if (!cls_flower.rule)
+		return -ENOMEM;
 
 	tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
-	cls_flower.command = TC_CLSFLOWER_REPLACE;
+	cls_flower.command = FLOW_CLS_REPLACE;
 	cls_flower.cookie = (unsigned long) f;
-	cls_flower.dissector = &f->mask->dissector;
-	cls_flower.mask = &f->mask->key;
-	cls_flower.key = &f->mkey;
-	cls_flower.exts = &f->exts;
+	cls_flower.rule->match.dissector = &f->mask->dissector;
+	cls_flower.rule->match.mask = &f->mask->key;
+	cls_flower.rule->match.key = &f->mkey;
 	cls_flower.classid = f->res.classid;
 
-	err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
-			       &cls_flower, skip_sw);
-	if (err < 0) {
-		fl_hw_destroy_filter(tp, f, NULL);
+	err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts,
+				   rtnl_held);
+	if (err) {
+		kfree(cls_flower.rule);
+		if (skip_sw) {
+			NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
+			return err;
+		}
+		return 0;
+	}
+
+	err = tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower,
+			      skip_sw, &f->flags, &f->in_hw_count, rtnl_held);
+	tc_cleanup_flow_action(&cls_flower.rule->action);
+	kfree(cls_flower.rule);
+
+	if (err) {
+		fl_hw_destroy_filter(tp, f, rtnl_held, NULL);
 		return err;
-	} else if (err > 0) {
-		f->in_hw_count = err;
-		tcf_block_offload_inc(block, &f->flags);
 	}
 
 	if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW))
@@ -328,40 +469,77 @@
 	return 0;
 }
 
-static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
+static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f,
+			       bool rtnl_held)
 {
-	struct tc_cls_flower_offload cls_flower = {};
 	struct tcf_block *block = tp->chain->block;
+	struct flow_cls_offload cls_flower = {};
 
 	tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL);
-	cls_flower.command = TC_CLSFLOWER_STATS;
+	cls_flower.command = FLOW_CLS_STATS;
 	cls_flower.cookie = (unsigned long) f;
-	cls_flower.exts = &f->exts;
 	cls_flower.classid = f->res.classid;
 
-	tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
-			 &cls_flower, false);
+	tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false,
+			 rtnl_held);
+
+	tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes,
+			      cls_flower.stats.pkts,
+			      cls_flower.stats.lastused);
 }
 
-static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
-			struct netlink_ext_ack *extack)
+static void __fl_put(struct cls_fl_filter *f)
 {
-	struct cls_fl_head *head = rtnl_dereference(tp->root);
-	bool async = tcf_exts_get_net(&f->exts);
-	bool last;
+	if (!refcount_dec_and_test(&f->refcnt))
+		return;
 
-	idr_remove(&head->handle_idr, f->handle);
-	list_del_rcu(&f->list);
-	last = fl_mask_put(head, f->mask, async);
-	if (!tc_skip_hw(f->flags))
-		fl_hw_destroy_filter(tp, f, extack);
-	tcf_unbind_filter(tp, &f->res);
-	if (async)
+	if (tcf_exts_get_net(&f->exts))
 		tcf_queue_work(&f->rwork, fl_destroy_filter_work);
 	else
 		__fl_destroy_filter(f);
+}
 
-	return last;
+static struct cls_fl_filter *__fl_get(struct cls_fl_head *head, u32 handle)
+{
+	struct cls_fl_filter *f;
+
+	rcu_read_lock();
+	f = idr_find(&head->handle_idr, handle);
+	if (f && !refcount_inc_not_zero(&f->refcnt))
+		f = NULL;
+	rcu_read_unlock();
+
+	return f;
+}
+
+static int __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
+		       bool *last, bool rtnl_held,
+		       struct netlink_ext_ack *extack)
+{
+	struct cls_fl_head *head = fl_head_dereference(tp);
+
+	*last = false;
+
+	spin_lock(&tp->lock);
+	if (f->deleted) {
+		spin_unlock(&tp->lock);
+		return -ENOENT;
+	}
+
+	f->deleted = true;
+	rhashtable_remove_fast(&f->mask->ht, &f->ht_node,
+			       f->mask->filter_ht_params);
+	idr_remove(&head->handle_idr, f->handle);
+	list_del_rcu(&f->list);
+	spin_unlock(&tp->lock);
+
+	*last = fl_mask_put(head, f->mask);
+	if (!tc_skip_hw(f->flags))
+		fl_hw_destroy_filter(tp, f, rtnl_held, extack);
+	tcf_unbind_filter(tp, &f->res);
+	__fl_put(f);
+
+	return 0;
 }
 
 static void fl_destroy_sleepable(struct work_struct *work)
@@ -375,15 +553,18 @@
 	module_put(THIS_MODULE);
 }
 
-static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void fl_destroy(struct tcf_proto *tp, bool rtnl_held,
+		       struct netlink_ext_ack *extack)
 {
-	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_head *head = fl_head_dereference(tp);
 	struct fl_flow_mask *mask, *next_mask;
 	struct cls_fl_filter *f, *next;
+	bool last;
 
 	list_for_each_entry_safe(mask, next_mask, &head->masks, list) {
 		list_for_each_entry_safe(f, next, &mask->filters, list) {
-			if (__fl_delete(tp, f, extack))
+			__fl_delete(tp, f, &last, rtnl_held, extack);
+			if (last)
 				break;
 		}
 	}
@@ -393,11 +574,18 @@
 	tcf_queue_work(&head->rwork, fl_destroy_sleepable);
 }
 
+static void fl_put(struct tcf_proto *tp, void *arg)
+{
+	struct cls_fl_filter *f = arg;
+
+	__fl_put(f);
+}
+
 static void *fl_get(struct tcf_proto *tp, u32 handle)
 {
-	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_head *head = fl_head_dereference(tp);
 
-	return idr_find(&head->handle_idr, handle);
+	return __fl_get(head, handle);
 }
 
 static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
@@ -486,6 +674,16 @@
 	[TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 },
 	[TCA_FLOWER_KEY_ENC_OPTS]	= { .type = NLA_NESTED },
 	[TCA_FLOWER_KEY_ENC_OPTS_MASK]	= { .type = NLA_NESTED },
+	[TCA_FLOWER_KEY_CT_STATE]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_CT_STATE_MASK]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_CT_ZONE]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_CT_ZONE_MASK]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_CT_MARK]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_CT_MARK_MASK]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_CT_LABELS]	= { .type = NLA_BINARY,
+					    .len = 128 / BITS_PER_BYTE },
+	[TCA_FLOWER_KEY_CT_LABELS_MASK]	= { .type = NLA_BINARY,
+					    .len = 128 / BITS_PER_BYTE },
 };
 
 static const struct nla_policy
@@ -507,11 +705,36 @@
 {
 	if (!tb[val_type])
 		return;
-	memcpy(val, nla_data(tb[val_type]), len);
+	nla_memcpy(val, tb[val_type], len);
 	if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type])
 		memset(mask, 0xff, len);
 	else
-		memcpy(mask, nla_data(tb[mask_type]), len);
+		nla_memcpy(mask, tb[mask_type], len);
+}
+
+static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
+				 struct fl_flow_key *mask)
+{
+	fl_set_key_val(tb, &key->tp_min.dst,
+		       TCA_FLOWER_KEY_PORT_DST_MIN, &mask->tp_min.dst,
+		       TCA_FLOWER_UNSPEC, sizeof(key->tp_min.dst));
+	fl_set_key_val(tb, &key->tp_max.dst,
+		       TCA_FLOWER_KEY_PORT_DST_MAX, &mask->tp_max.dst,
+		       TCA_FLOWER_UNSPEC, sizeof(key->tp_max.dst));
+	fl_set_key_val(tb, &key->tp_min.src,
+		       TCA_FLOWER_KEY_PORT_SRC_MIN, &mask->tp_min.src,
+		       TCA_FLOWER_UNSPEC, sizeof(key->tp_min.src));
+	fl_set_key_val(tb, &key->tp_max.src,
+		       TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_max.src,
+		       TCA_FLOWER_UNSPEC, sizeof(key->tp_max.src));
+
+	if ((mask->tp_min.dst && mask->tp_max.dst &&
+	     htons(key->tp_max.dst) <= htons(key->tp_min.dst)) ||
+	     (mask->tp_min.src && mask->tp_max.src &&
+	      htons(key->tp_max.src) <= htons(key->tp_min.src)))
+		return -EINVAL;
+
+	return 0;
 }
 
 static int fl_set_key_mpls(struct nlattr **tb,
@@ -648,8 +871,9 @@
 		return -EINVAL;
 	}
 
-	err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX,
-			       nla, geneve_opt_policy, extack);
+	err = nla_parse_nested_deprecated(tb,
+					  TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX,
+					  nla, geneve_opt_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -711,18 +935,18 @@
 	const struct nlattr *nla_enc_key, *nla_opt_key, *nla_opt_msk = NULL;
 	int err, option_len, key_depth, msk_depth = 0;
 
-	err = nla_validate_nested(tb[TCA_FLOWER_KEY_ENC_OPTS],
-				  TCA_FLOWER_KEY_ENC_OPTS_MAX,
-				  enc_opts_policy, extack);
+	err = nla_validate_nested_deprecated(tb[TCA_FLOWER_KEY_ENC_OPTS],
+					     TCA_FLOWER_KEY_ENC_OPTS_MAX,
+					     enc_opts_policy, extack);
 	if (err)
 		return err;
 
 	nla_enc_key = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS]);
 
 	if (tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]) {
-		err = nla_validate_nested(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK],
-					  TCA_FLOWER_KEY_ENC_OPTS_MAX,
-					  enc_opts_policy, extack);
+		err = nla_validate_nested_deprecated(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK],
+						     TCA_FLOWER_KEY_ENC_OPTS_MAX,
+						     enc_opts_policy, extack);
 		if (err)
 			return err;
 
@@ -771,21 +995,65 @@
 	return 0;
 }
 
+static int fl_set_key_ct(struct nlattr **tb,
+			 struct flow_dissector_key_ct *key,
+			 struct flow_dissector_key_ct *mask,
+			 struct netlink_ext_ack *extack)
+{
+	if (tb[TCA_FLOWER_KEY_CT_STATE]) {
+		if (!IS_ENABLED(CONFIG_NF_CONNTRACK)) {
+			NL_SET_ERR_MSG(extack, "Conntrack isn't enabled");
+			return -EOPNOTSUPP;
+		}
+		fl_set_key_val(tb, &key->ct_state, TCA_FLOWER_KEY_CT_STATE,
+			       &mask->ct_state, TCA_FLOWER_KEY_CT_STATE_MASK,
+			       sizeof(key->ct_state));
+	}
+	if (tb[TCA_FLOWER_KEY_CT_ZONE]) {
+		if (!IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)) {
+			NL_SET_ERR_MSG(extack, "Conntrack zones isn't enabled");
+			return -EOPNOTSUPP;
+		}
+		fl_set_key_val(tb, &key->ct_zone, TCA_FLOWER_KEY_CT_ZONE,
+			       &mask->ct_zone, TCA_FLOWER_KEY_CT_ZONE_MASK,
+			       sizeof(key->ct_zone));
+	}
+	if (tb[TCA_FLOWER_KEY_CT_MARK]) {
+		if (!IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)) {
+			NL_SET_ERR_MSG(extack, "Conntrack mark isn't enabled");
+			return -EOPNOTSUPP;
+		}
+		fl_set_key_val(tb, &key->ct_mark, TCA_FLOWER_KEY_CT_MARK,
+			       &mask->ct_mark, TCA_FLOWER_KEY_CT_MARK_MASK,
+			       sizeof(key->ct_mark));
+	}
+	if (tb[TCA_FLOWER_KEY_CT_LABELS]) {
+		if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)) {
+			NL_SET_ERR_MSG(extack, "Conntrack labels aren't enabled");
+			return -EOPNOTSUPP;
+		}
+		fl_set_key_val(tb, key->ct_labels, TCA_FLOWER_KEY_CT_LABELS,
+			       mask->ct_labels, TCA_FLOWER_KEY_CT_LABELS_MASK,
+			       sizeof(key->ct_labels));
+	}
+
+	return 0;
+}
+
 static int fl_set_key(struct net *net, struct nlattr **tb,
 		      struct fl_flow_key *key, struct fl_flow_key *mask,
 		      struct netlink_ext_ack *extack)
 {
 	__be16 ethertype;
 	int ret = 0;
-#ifdef CONFIG_NET_CLS_IND
+
 	if (tb[TCA_FLOWER_INDEV]) {
 		int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV], extack);
 		if (err < 0)
 			return err;
-		key->indev_ifindex = err;
-		mask->indev_ifindex = 0xffffffff;
+		key->meta.ingress_ifindex = err;
+		mask->meta.ingress_ifindex = 0xffffffff;
 	}
-#endif
 
 	fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
 		       mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
@@ -921,6 +1189,14 @@
 			       sizeof(key->arp.tha));
 	}
 
+	if (key->basic.ip_proto == IPPROTO_TCP ||
+	    key->basic.ip_proto == IPPROTO_UDP ||
+	    key->basic.ip_proto == IPPROTO_SCTP) {
+		ret = fl_set_key_port_range(tb, key, mask);
+		if (ret)
+			return ret;
+	}
+
 	if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
 	    tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) {
 		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
@@ -973,6 +1249,10 @@
 			return ret;
 	}
 
+	ret = fl_set_key_ct(tb, &key->ct, &mask->ct, extack);
+	if (ret)
+		return ret;
+
 	if (tb[TCA_FLOWER_KEY_FLAGS])
 		ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags);
 
@@ -1005,7 +1285,7 @@
 }
 
 #define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
-#define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
+#define FL_KEY_MEMBER_SIZE(member) FIELD_SIZEOF(struct fl_flow_key, member)
 
 #define FL_KEY_IS_MASKED(mask, member)						\
 	memchr_inv(((char *)mask) + FL_KEY_MEMBER_OFFSET(member),		\
@@ -1030,6 +1310,8 @@
 	struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
 	size_t cnt = 0;
 
+	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+			     FLOW_DISSECTOR_KEY_META, meta);
 	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control);
 	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
 	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
@@ -1038,8 +1320,9 @@
 			     FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
 	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
 			     FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
-	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
-			     FLOW_DISSECTOR_KEY_PORTS, tp);
+	if (FL_KEY_IS_MASKED(mask, tp) ||
+	    FL_KEY_IS_MASKED(mask, tp_min) || FL_KEY_IS_MASKED(mask, tp_max))
+		FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_PORTS, tp);
 	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
 			     FLOW_DISSECTOR_KEY_IP, ip);
 	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
@@ -1070,6 +1353,8 @@
 			     FLOW_DISSECTOR_KEY_ENC_IP, enc_ip);
 	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
 			     FLOW_DISSECTOR_KEY_ENC_OPTS, enc_opts);
+	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+			     FLOW_DISSECTOR_KEY_CT, ct);
 
 	skb_flow_dissector_init(dissector, keys, cnt);
 }
@@ -1086,6 +1371,10 @@
 
 	fl_mask_copy(newmask, mask);
 
+	if ((newmask->key.tp_min.dst && newmask->key.tp_max.dst) ||
+	    (newmask->key.tp_min.src && newmask->key.tp_max.src))
+		newmask->flags |= TCA_FLOWER_MASK_FLAGS_RANGE;
+
 	err = fl_init_mask_hashtable(newmask);
 	if (err)
 		goto errout_free;
@@ -1094,12 +1383,15 @@
 
 	INIT_LIST_HEAD_RCU(&newmask->filters);
 
-	err = rhashtable_insert_fast(&head->ht, &newmask->ht_node,
-				     mask_ht_params);
+	refcount_set(&newmask->refcnt, 1);
+	err = rhashtable_replace_fast(&head->ht, &mask->ht_node,
+				      &newmask->ht_node, mask_ht_params);
 	if (err)
 		goto errout_destroy;
 
+	spin_lock(&head->masks_lock);
 	list_add_tail_rcu(&newmask->list, &head->masks);
+	spin_unlock(&head->masks_lock);
 
 	return newmask;
 
@@ -1117,40 +1409,71 @@
 				struct fl_flow_mask *mask)
 {
 	struct fl_flow_mask *newmask;
+	int ret = 0;
 
-	fnew->mask = rhashtable_lookup_fast(&head->ht, mask, mask_ht_params);
+	rcu_read_lock();
+
+	/* Insert mask as temporary node to prevent concurrent creation of mask
+	 * with same key. Any concurrent lookups with same key will return
+	 * -EAGAIN because mask's refcnt is zero.
+	 */
+	fnew->mask = rhashtable_lookup_get_insert_fast(&head->ht,
+						       &mask->ht_node,
+						       mask_ht_params);
 	if (!fnew->mask) {
-		if (fold)
-			return -EINVAL;
+		rcu_read_unlock();
+
+		if (fold) {
+			ret = -EINVAL;
+			goto errout_cleanup;
+		}
 
 		newmask = fl_create_new_mask(head, mask);
-		if (IS_ERR(newmask))
-			return PTR_ERR(newmask);
+		if (IS_ERR(newmask)) {
+			ret = PTR_ERR(newmask);
+			goto errout_cleanup;
+		}
 
 		fnew->mask = newmask;
+		return 0;
+	} else if (IS_ERR(fnew->mask)) {
+		ret = PTR_ERR(fnew->mask);
 	} else if (fold && fold->mask != fnew->mask) {
-		return -EINVAL;
+		ret = -EINVAL;
+	} else if (!refcount_inc_not_zero(&fnew->mask->refcnt)) {
+		/* Mask was deleted concurrently, try again */
+		ret = -EAGAIN;
 	}
+	rcu_read_unlock();
+	return ret;
 
-	return 0;
+errout_cleanup:
+	rhashtable_remove_fast(&head->ht, &mask->ht_node,
+			       mask_ht_params);
+	return ret;
 }
 
 static int fl_set_parms(struct net *net, struct tcf_proto *tp,
 			struct cls_fl_filter *f, struct fl_flow_mask *mask,
 			unsigned long base, struct nlattr **tb,
 			struct nlattr *est, bool ovr,
-			struct fl_flow_tmplt *tmplt,
+			struct fl_flow_tmplt *tmplt, bool rtnl_held,
 			struct netlink_ext_ack *extack)
 {
 	int err;
 
-	err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
+	err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, rtnl_held,
+				extack);
 	if (err < 0)
 		return err;
 
 	if (tb[TCA_FLOWER_CLASSID]) {
 		f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
+		if (!rtnl_held)
+			rtnl_lock();
 		tcf_bind_filter(tp, &f->res, base);
+		if (!rtnl_held)
+			rtnl_unlock();
 	}
 
 	err = fl_set_key(net, tb, &f->key, &mask->key, extack);
@@ -1168,27 +1491,61 @@
 	return 0;
 }
 
+static int fl_ht_insert_unique(struct cls_fl_filter *fnew,
+			       struct cls_fl_filter *fold,
+			       bool *in_ht)
+{
+	struct fl_flow_mask *mask = fnew->mask;
+	int err;
+
+	err = rhashtable_lookup_insert_fast(&mask->ht,
+					    &fnew->ht_node,
+					    mask->filter_ht_params);
+	if (err) {
+		*in_ht = false;
+		/* It is okay if filter with same key exists when
+		 * overwriting.
+		 */
+		return fold && err == -EEXIST ? 0 : err;
+	}
+
+	*in_ht = true;
+	return 0;
+}
+
 static int fl_change(struct net *net, struct sk_buff *in_skb,
 		     struct tcf_proto *tp, unsigned long base,
 		     u32 handle, struct nlattr **tca,
-		     void **arg, bool ovr, struct netlink_ext_ack *extack)
+		     void **arg, bool ovr, bool rtnl_held,
+		     struct netlink_ext_ack *extack)
 {
-	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_head *head = fl_head_dereference(tp);
 	struct cls_fl_filter *fold = *arg;
 	struct cls_fl_filter *fnew;
+	struct fl_flow_mask *mask;
 	struct nlattr **tb;
-	struct fl_flow_mask mask = {};
+	bool in_ht;
 	int err;
 
-	if (!tca[TCA_OPTIONS])
-		return -EINVAL;
+	if (!tca[TCA_OPTIONS]) {
+		err = -EINVAL;
+		goto errout_fold;
+	}
+
+	mask = kzalloc(sizeof(struct fl_flow_mask), GFP_KERNEL);
+	if (!mask) {
+		err = -ENOBUFS;
+		goto errout_fold;
+	}
 
 	tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL);
-	if (!tb)
-		return -ENOBUFS;
+	if (!tb) {
+		err = -ENOBUFS;
+		goto errout_mask_alloc;
+	}
 
-	err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS],
-			       fl_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_FLOWER_MAX,
+					  tca[TCA_OPTIONS], fl_policy, NULL);
 	if (err < 0)
 		goto errout_tb;
 
@@ -1202,208 +1559,340 @@
 		err = -ENOBUFS;
 		goto errout_tb;
 	}
+	INIT_LIST_HEAD(&fnew->hw_list);
+	refcount_set(&fnew->refcnt, 1);
 
-	err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
+	err = tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0);
 	if (err < 0)
 		goto errout;
 
-	if (!handle) {
-		handle = 1;
-		err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
-				    INT_MAX, GFP_KERNEL);
-	} else if (!fold) {
-		/* user specifies a handle and it doesn't exist */
-		err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
-				    handle, GFP_KERNEL);
-	}
-	if (err)
-		goto errout;
-	fnew->handle = handle;
-
 	if (tb[TCA_FLOWER_FLAGS]) {
 		fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
 
 		if (!tc_flags_valid(fnew->flags)) {
 			err = -EINVAL;
-			goto errout_idr;
+			goto errout;
 		}
 	}
 
-	err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr,
-			   tp->chain->tmplt_priv, extack);
+	err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr,
+			   tp->chain->tmplt_priv, rtnl_held, extack);
 	if (err)
-		goto errout_idr;
+		goto errout;
 
-	err = fl_check_assign_mask(head, fnew, fold, &mask);
+	err = fl_check_assign_mask(head, fnew, fold, mask);
 	if (err)
-		goto errout_idr;
+		goto errout;
 
-	if (!tc_skip_sw(fnew->flags)) {
-		if (!fold && fl_lookup(fnew->mask, &fnew->mkey)) {
-			err = -EEXIST;
-			goto errout_mask;
-		}
-
-		err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node,
-					     fnew->mask->filter_ht_params);
-		if (err)
-			goto errout_mask;
-	}
+	err = fl_ht_insert_unique(fnew, fold, &in_ht);
+	if (err)
+		goto errout_mask;
 
 	if (!tc_skip_hw(fnew->flags)) {
-		err = fl_hw_replace_filter(tp, fnew, extack);
+		err = fl_hw_replace_filter(tp, fnew, rtnl_held, extack);
 		if (err)
-			goto errout_mask;
+			goto errout_ht;
 	}
 
 	if (!tc_in_hw(fnew->flags))
 		fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
 
+	spin_lock(&tp->lock);
+
+	/* tp was deleted concurrently. -EAGAIN will cause caller to lookup
+	 * proto again or create new one, if necessary.
+	 */
+	if (tp->deleting) {
+		err = -EAGAIN;
+		goto errout_hw;
+	}
+
 	if (fold) {
-		if (!tc_skip_sw(fold->flags))
-			rhashtable_remove_fast(&fold->mask->ht,
-					       &fold->ht_node,
-					       fold->mask->filter_ht_params);
+		/* Fold filter was deleted concurrently. Retry lookup. */
+		if (fold->deleted) {
+			err = -EAGAIN;
+			goto errout_hw;
+		}
+
+		fnew->handle = handle;
+
+		if (!in_ht) {
+			struct rhashtable_params params =
+				fnew->mask->filter_ht_params;
+
+			err = rhashtable_insert_fast(&fnew->mask->ht,
+						     &fnew->ht_node,
+						     params);
+			if (err)
+				goto errout_hw;
+			in_ht = true;
+		}
+
+		refcount_inc(&fnew->refcnt);
+		rhashtable_remove_fast(&fold->mask->ht,
+				       &fold->ht_node,
+				       fold->mask->filter_ht_params);
+		idr_replace(&head->handle_idr, fnew, fnew->handle);
+		list_replace_rcu(&fold->list, &fnew->list);
+		fold->deleted = true;
+
+		spin_unlock(&tp->lock);
+
+		fl_mask_put(head, fold->mask);
 		if (!tc_skip_hw(fold->flags))
-			fl_hw_destroy_filter(tp, fold, NULL);
+			fl_hw_destroy_filter(tp, fold, rtnl_held, NULL);
+		tcf_unbind_filter(tp, &fold->res);
+		/* Caller holds reference to fold, so refcnt is always > 0
+		 * after this.
+		 */
+		refcount_dec(&fold->refcnt);
+		__fl_put(fold);
+	} else {
+		if (handle) {
+			/* user specifies a handle and it doesn't exist */
+			err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
+					    handle, GFP_ATOMIC);
+
+			/* Filter with specified handle was concurrently
+			 * inserted after initial check in cls_api. This is not
+			 * necessarily an error if NLM_F_EXCL is not set in
+			 * message flags. Returning EAGAIN will cause cls_api to
+			 * try to update concurrently inserted rule.
+			 */
+			if (err == -ENOSPC)
+				err = -EAGAIN;
+		} else {
+			handle = 1;
+			err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
+					    INT_MAX, GFP_ATOMIC);
+		}
+		if (err)
+			goto errout_hw;
+
+		refcount_inc(&fnew->refcnt);
+		fnew->handle = handle;
+		list_add_tail_rcu(&fnew->list, &fnew->mask->filters);
+		spin_unlock(&tp->lock);
 	}
 
 	*arg = fnew;
 
-	if (fold) {
-		idr_replace(&head->handle_idr, fnew, fnew->handle);
-		list_replace_rcu(&fold->list, &fnew->list);
-		tcf_unbind_filter(tp, &fold->res);
-		tcf_exts_get_net(&fold->exts);
-		tcf_queue_work(&fold->rwork, fl_destroy_filter_work);
-	} else {
-		list_add_tail_rcu(&fnew->list, &fnew->mask->filters);
-	}
-
 	kfree(tb);
+	tcf_queue_work(&mask->rwork, fl_uninit_mask_free_work);
 	return 0;
 
+errout_ht:
+	spin_lock(&tp->lock);
+errout_hw:
+	fnew->deleted = true;
+	spin_unlock(&tp->lock);
+	if (!tc_skip_hw(fnew->flags))
+		fl_hw_destroy_filter(tp, fnew, rtnl_held, NULL);
+	if (in_ht)
+		rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node,
+				       fnew->mask->filter_ht_params);
 errout_mask:
-	fl_mask_put(head, fnew->mask, false);
-
-errout_idr:
-	if (!fold)
-		idr_remove(&head->handle_idr, fnew->handle);
+	fl_mask_put(head, fnew->mask);
 errout:
-	tcf_exts_destroy(&fnew->exts);
-	kfree(fnew);
+	__fl_put(fnew);
 errout_tb:
 	kfree(tb);
+errout_mask_alloc:
+	tcf_queue_work(&mask->rwork, fl_uninit_mask_free_work);
+errout_fold:
+	if (fold)
+		__fl_put(fold);
 	return err;
 }
 
 static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
-		     struct netlink_ext_ack *extack)
+		     bool rtnl_held, struct netlink_ext_ack *extack)
 {
-	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_head *head = fl_head_dereference(tp);
 	struct cls_fl_filter *f = arg;
+	bool last_on_mask;
+	int err = 0;
 
-	if (!tc_skip_sw(f->flags))
-		rhashtable_remove_fast(&f->mask->ht, &f->ht_node,
-				       f->mask->filter_ht_params);
-	__fl_delete(tp, f, extack);
+	err = __fl_delete(tp, f, &last_on_mask, rtnl_held, extack);
 	*last = list_empty(&head->masks);
-	return 0;
+	__fl_put(f);
+
+	return err;
 }
 
-static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+		    bool rtnl_held)
 {
-	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_head *head = fl_head_dereference(tp);
+	unsigned long id = arg->cookie, tmp;
 	struct cls_fl_filter *f;
 
 	arg->count = arg->skip;
 
-	while ((f = idr_get_next_ul(&head->handle_idr,
-				    &arg->cookie)) != NULL) {
+	idr_for_each_entry_continue_ul(&head->handle_idr, f, tmp, id) {
+		/* don't return filters that are being deleted */
+		if (!refcount_inc_not_zero(&f->refcnt))
+			continue;
 		if (arg->fn(tp, f, arg) < 0) {
+			__fl_put(f);
 			arg->stop = 1;
 			break;
 		}
-		arg->cookie = f->handle + 1;
+		__fl_put(f);
 		arg->count++;
 	}
+	arg->cookie = id;
 }
 
-static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static struct cls_fl_filter *
+fl_get_next_hw_filter(struct tcf_proto *tp, struct cls_fl_filter *f, bool add)
+{
+	struct cls_fl_head *head = fl_head_dereference(tp);
+
+	spin_lock(&tp->lock);
+	if (list_empty(&head->hw_filters)) {
+		spin_unlock(&tp->lock);
+		return NULL;
+	}
+
+	if (!f)
+		f = list_entry(&head->hw_filters, struct cls_fl_filter,
+			       hw_list);
+	list_for_each_entry_continue(f, &head->hw_filters, hw_list) {
+		if (!(add && f->deleted) && refcount_inc_not_zero(&f->refcnt)) {
+			spin_unlock(&tp->lock);
+			return f;
+		}
+	}
+
+	spin_unlock(&tp->lock);
+	return NULL;
+}
+
+static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
 			void *cb_priv, struct netlink_ext_ack *extack)
 {
-	struct cls_fl_head *head = rtnl_dereference(tp->root);
-	struct tc_cls_flower_offload cls_flower = {};
 	struct tcf_block *block = tp->chain->block;
-	struct fl_flow_mask *mask;
-	struct cls_fl_filter *f;
+	struct flow_cls_offload cls_flower = {};
+	struct cls_fl_filter *f = NULL;
 	int err;
 
-	list_for_each_entry(mask, &head->masks, list) {
-		list_for_each_entry(f, &mask->filters, list) {
-			if (tc_skip_hw(f->flags))
-				continue;
+	/* hw_filters list can only be changed by hw offload functions after
+	 * obtaining rtnl lock. Make sure it is not changed while reoffload is
+	 * iterating it.
+	 */
+	ASSERT_RTNL();
 
-			tc_cls_common_offload_init(&cls_flower.common, tp,
-						   f->flags, extack);
-			cls_flower.command = add ?
-				TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY;
-			cls_flower.cookie = (unsigned long)f;
-			cls_flower.dissector = &mask->dissector;
-			cls_flower.mask = &mask->key;
-			cls_flower.key = &f->mkey;
-			cls_flower.exts = &f->exts;
-			cls_flower.classid = f->res.classid;
-
-			err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv);
-			if (err) {
-				if (add && tc_skip_sw(f->flags))
-					return err;
-				continue;
-			}
-
-			tc_cls_offload_cnt_update(block, &f->in_hw_count,
-						  &f->flags, add);
+	while ((f = fl_get_next_hw_filter(tp, f, add))) {
+		cls_flower.rule =
+			flow_rule_alloc(tcf_exts_num_actions(&f->exts));
+		if (!cls_flower.rule) {
+			__fl_put(f);
+			return -ENOMEM;
 		}
+
+		tc_cls_common_offload_init(&cls_flower.common, tp, f->flags,
+					   extack);
+		cls_flower.command = add ?
+			FLOW_CLS_REPLACE : FLOW_CLS_DESTROY;
+		cls_flower.cookie = (unsigned long)f;
+		cls_flower.rule->match.dissector = &f->mask->dissector;
+		cls_flower.rule->match.mask = &f->mask->key;
+		cls_flower.rule->match.key = &f->mkey;
+
+		err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts,
+					   true);
+		if (err) {
+			kfree(cls_flower.rule);
+			if (tc_skip_sw(f->flags)) {
+				NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
+				__fl_put(f);
+				return err;
+			}
+			goto next_flow;
+		}
+
+		cls_flower.classid = f->res.classid;
+
+		err = tc_setup_cb_reoffload(block, tp, add, cb,
+					    TC_SETUP_CLSFLOWER, &cls_flower,
+					    cb_priv, &f->flags,
+					    &f->in_hw_count);
+		tc_cleanup_flow_action(&cls_flower.rule->action);
+		kfree(cls_flower.rule);
+
+		if (err) {
+			__fl_put(f);
+			return err;
+		}
+next_flow:
+		__fl_put(f);
 	}
 
 	return 0;
 }
 
-static void fl_hw_create_tmplt(struct tcf_chain *chain,
-			       struct fl_flow_tmplt *tmplt)
+static void fl_hw_add(struct tcf_proto *tp, void *type_data)
 {
-	struct tc_cls_flower_offload cls_flower = {};
+	struct flow_cls_offload *cls_flower = type_data;
+	struct cls_fl_filter *f =
+		(struct cls_fl_filter *) cls_flower->cookie;
+	struct cls_fl_head *head = fl_head_dereference(tp);
+
+	spin_lock(&tp->lock);
+	list_add(&f->hw_list, &head->hw_filters);
+	spin_unlock(&tp->lock);
+}
+
+static void fl_hw_del(struct tcf_proto *tp, void *type_data)
+{
+	struct flow_cls_offload *cls_flower = type_data;
+	struct cls_fl_filter *f =
+		(struct cls_fl_filter *) cls_flower->cookie;
+
+	spin_lock(&tp->lock);
+	if (!list_empty(&f->hw_list))
+		list_del_init(&f->hw_list);
+	spin_unlock(&tp->lock);
+}
+
+static int fl_hw_create_tmplt(struct tcf_chain *chain,
+			      struct fl_flow_tmplt *tmplt)
+{
+	struct flow_cls_offload cls_flower = {};
 	struct tcf_block *block = chain->block;
-	struct tcf_exts dummy_exts = { 0, };
+
+	cls_flower.rule = flow_rule_alloc(0);
+	if (!cls_flower.rule)
+		return -ENOMEM;
 
 	cls_flower.common.chain_index = chain->index;
-	cls_flower.command = TC_CLSFLOWER_TMPLT_CREATE;
+	cls_flower.command = FLOW_CLS_TMPLT_CREATE;
 	cls_flower.cookie = (unsigned long) tmplt;
-	cls_flower.dissector = &tmplt->dissector;
-	cls_flower.mask = &tmplt->mask;
-	cls_flower.key = &tmplt->dummy_key;
-	cls_flower.exts = &dummy_exts;
+	cls_flower.rule->match.dissector = &tmplt->dissector;
+	cls_flower.rule->match.mask = &tmplt->mask;
+	cls_flower.rule->match.key = &tmplt->dummy_key;
 
 	/* We don't care if driver (any of them) fails to handle this
 	 * call. It serves just as a hint for it.
 	 */
-	tc_setup_cb_call(block, NULL, TC_SETUP_CLSFLOWER,
-			 &cls_flower, false);
+	tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, true);
+	kfree(cls_flower.rule);
+
+	return 0;
 }
 
 static void fl_hw_destroy_tmplt(struct tcf_chain *chain,
 				struct fl_flow_tmplt *tmplt)
 {
-	struct tc_cls_flower_offload cls_flower = {};
+	struct flow_cls_offload cls_flower = {};
 	struct tcf_block *block = chain->block;
 
 	cls_flower.common.chain_index = chain->index;
-	cls_flower.command = TC_CLSFLOWER_TMPLT_DESTROY;
+	cls_flower.command = FLOW_CLS_TMPLT_DESTROY;
 	cls_flower.cookie = (unsigned long) tmplt;
 
-	tc_setup_cb_call(block, NULL, TC_SETUP_CLSFLOWER,
-			 &cls_flower, false);
+	tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, true);
 }
 
 static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain,
@@ -1420,8 +1909,8 @@
 	tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL);
 	if (!tb)
 		return ERR_PTR(-ENOBUFS);
-	err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS],
-			       fl_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_FLOWER_MAX,
+					  tca[TCA_OPTIONS], fl_policy, NULL);
 	if (err)
 		goto errout_tb;
 
@@ -1434,12 +1923,14 @@
 	err = fl_set_key(net, tb, &tmplt->dummy_key, &tmplt->mask, extack);
 	if (err)
 		goto errout_tmplt;
-	kfree(tb);
 
 	fl_init_dissector(&tmplt->dissector, &tmplt->mask);
 
-	fl_hw_create_tmplt(chain, tmplt);
+	err = fl_hw_create_tmplt(chain, tmplt);
+	if (err)
+		goto errout_tmplt;
 
+	kfree(tb);
 	return tmplt;
 
 errout_tmplt:
@@ -1476,6 +1967,26 @@
 	return 0;
 }
 
+static int fl_dump_key_port_range(struct sk_buff *skb, struct fl_flow_key *key,
+				  struct fl_flow_key *mask)
+{
+	if (fl_dump_key_val(skb, &key->tp_min.dst, TCA_FLOWER_KEY_PORT_DST_MIN,
+			    &mask->tp_min.dst, TCA_FLOWER_UNSPEC,
+			    sizeof(key->tp_min.dst)) ||
+	    fl_dump_key_val(skb, &key->tp_max.dst, TCA_FLOWER_KEY_PORT_DST_MAX,
+			    &mask->tp_max.dst, TCA_FLOWER_UNSPEC,
+			    sizeof(key->tp_max.dst)) ||
+	    fl_dump_key_val(skb, &key->tp_min.src, TCA_FLOWER_KEY_PORT_SRC_MIN,
+			    &mask->tp_min.src, TCA_FLOWER_UNSPEC,
+			    sizeof(key->tp_min.src)) ||
+	    fl_dump_key_val(skb, &key->tp_max.src, TCA_FLOWER_KEY_PORT_SRC_MAX,
+			    &mask->tp_max.src, TCA_FLOWER_UNSPEC,
+			    sizeof(key->tp_max.src)))
+		return -1;
+
+	return 0;
+}
+
 static int fl_dump_key_mpls(struct sk_buff *skb,
 			    struct flow_dissector_key_mpls *mpls_key,
 			    struct flow_dissector_key_mpls *mpls_mask)
@@ -1597,7 +2108,7 @@
 	struct nlattr *nest;
 	int opt_off = 0;
 
-	nest = nla_nest_start(skb, TCA_FLOWER_KEY_ENC_OPTS_GENEVE);
+	nest = nla_nest_start_noflag(skb, TCA_FLOWER_KEY_ENC_OPTS_GENEVE);
 	if (!nest)
 		goto nla_put_failure;
 
@@ -1624,6 +2135,40 @@
 	return -EMSGSIZE;
 }
 
+static int fl_dump_key_ct(struct sk_buff *skb,
+			  struct flow_dissector_key_ct *key,
+			  struct flow_dissector_key_ct *mask)
+{
+	if (IS_ENABLED(CONFIG_NF_CONNTRACK) &&
+	    fl_dump_key_val(skb, &key->ct_state, TCA_FLOWER_KEY_CT_STATE,
+			    &mask->ct_state, TCA_FLOWER_KEY_CT_STATE_MASK,
+			    sizeof(key->ct_state)))
+		goto nla_put_failure;
+
+	if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
+	    fl_dump_key_val(skb, &key->ct_zone, TCA_FLOWER_KEY_CT_ZONE,
+			    &mask->ct_zone, TCA_FLOWER_KEY_CT_ZONE_MASK,
+			    sizeof(key->ct_zone)))
+		goto nla_put_failure;
+
+	if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
+	    fl_dump_key_val(skb, &key->ct_mark, TCA_FLOWER_KEY_CT_MARK,
+			    &mask->ct_mark, TCA_FLOWER_KEY_CT_MARK_MASK,
+			    sizeof(key->ct_mark)))
+		goto nla_put_failure;
+
+	if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
+	    fl_dump_key_val(skb, &key->ct_labels, TCA_FLOWER_KEY_CT_LABELS,
+			    &mask->ct_labels, TCA_FLOWER_KEY_CT_LABELS_MASK,
+			    sizeof(key->ct_labels)))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
 static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type,
 			       struct flow_dissector_key_enc_opts *enc_opts)
 {
@@ -1633,7 +2178,7 @@
 	if (!enc_opts->len)
 		return 0;
 
-	nest = nla_nest_start(skb, enc_opt_type);
+	nest = nla_nest_start_noflag(skb, enc_opt_type);
 	if (!nest)
 		goto nla_put_failure;
 
@@ -1670,10 +2215,10 @@
 static int fl_dump_key(struct sk_buff *skb, struct net *net,
 		       struct fl_flow_key *key, struct fl_flow_key *mask)
 {
-	if (mask->indev_ifindex) {
+	if (mask->meta.ingress_ifindex) {
 		struct net_device *dev;
 
-		dev = __dev_get_by_index(net, key->indev_ifindex);
+		dev = __dev_get_by_index(net, key->meta.ingress_ifindex);
 		if (dev && nla_put_string(skb, TCA_FLOWER_INDEV, dev->name))
 			goto nla_put_failure;
 	}
@@ -1812,6 +2357,12 @@
 				  sizeof(key->arp.tha))))
 		goto nla_put_failure;
 
+	if ((key->basic.ip_proto == IPPROTO_TCP ||
+	     key->basic.ip_proto == IPPROTO_UDP ||
+	     key->basic.ip_proto == IPPROTO_SCTP) &&
+	     fl_dump_key_port_range(skb, key, mask))
+		goto nla_put_failure;
+
 	if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
 	    (fl_dump_key_val(skb, &key->enc_ipv4.src,
 			    TCA_FLOWER_KEY_ENC_IPV4_SRC, &mask->enc_ipv4.src,
@@ -1851,6 +2402,9 @@
 	    fl_dump_key_enc_opt(skb, &key->enc_opts, &mask->enc_opts))
 		goto nla_put_failure;
 
+	if (fl_dump_key_ct(skb, &key->ct, &mask->ct))
+		goto nla_put_failure;
+
 	if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))
 		goto nla_put_failure;
 
@@ -1861,35 +2415,44 @@
 }
 
 static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
-		   struct sk_buff *skb, struct tcmsg *t)
+		   struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
 	struct cls_fl_filter *f = fh;
 	struct nlattr *nest;
 	struct fl_flow_key *key, *mask;
+	bool skip_hw;
 
 	if (!f)
 		return skb->len;
 
 	t->tcm_handle = f->handle;
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (!nest)
 		goto nla_put_failure;
 
+	spin_lock(&tp->lock);
+
 	if (f->res.classid &&
 	    nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
-		goto nla_put_failure;
+		goto nla_put_failure_locked;
 
 	key = &f->key;
 	mask = &f->mask->key;
+	skip_hw = tc_skip_hw(f->flags);
 
 	if (fl_dump_key(skb, net, key, mask))
-		goto nla_put_failure;
-
-	if (!tc_skip_hw(f->flags))
-		fl_hw_update_stats(tp, f);
+		goto nla_put_failure_locked;
 
 	if (f->flags && nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags))
+		goto nla_put_failure_locked;
+
+	spin_unlock(&tp->lock);
+
+	if (!skip_hw)
+		fl_hw_update_stats(tp, f, rtnl_held);
+
+	if (nla_put_u32(skb, TCA_FLOWER_IN_HW_COUNT, f->in_hw_count))
 		goto nla_put_failure;
 
 	if (tcf_exts_dump(skb, &f->exts))
@@ -1902,6 +2465,8 @@
 
 	return skb->len;
 
+nla_put_failure_locked:
+	spin_unlock(&tp->lock);
 nla_put_failure:
 	nla_nest_cancel(skb, nest);
 	return -1;
@@ -1913,7 +2478,7 @@
 	struct fl_flow_key *key, *mask;
 	struct nlattr *nest;
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (!nest)
 		goto nla_put_failure;
 
@@ -1946,16 +2511,20 @@
 	.init		= fl_init,
 	.destroy	= fl_destroy,
 	.get		= fl_get,
+	.put		= fl_put,
 	.change		= fl_change,
 	.delete		= fl_delete,
 	.walk		= fl_walk,
 	.reoffload	= fl_reoffload,
+	.hw_add		= fl_hw_add,
+	.hw_del		= fl_hw_del,
 	.dump		= fl_dump,
 	.bind_class	= fl_bind_class,
 	.tmplt_create	= fl_tmplt_create,
 	.tmplt_destroy	= fl_tmplt_destroy,
 	.tmplt_dump	= fl_tmplt_dump,
 	.owner		= THIS_MODULE,
+	.flags		= TCF_PROTO_OPS_DOIT_UNLOCKED,
 };
 
 static int __init cls_fl_init(void)

diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 29eeeaf..c9496c9 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c

@@ -1,21 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/cls_fw.c	Classifier mapping ipchains' fwmark to traffic class.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  *
  * Changes:
  * Karlis Peisenieks <karlis@mt.lv> : 990415 : fw_walk off by one
  * Karlis Peisenieks <karlis@mt.lv> : 990415 : fw_delete killed all the filter (and kernel).
  * Alex <alex@pilotsoft.com> : 2004xxyy: Added Action extension
- *
- * JHS: We should remove the CONFIG_NET_CLS_IND from here
- * eventually when the meta match extension is made available
- *
  */
 
 #include <linux/module.h>
@@ -42,9 +34,7 @@
 	struct fw_filter __rcu	*next;
 	u32			id;
 	struct tcf_result	res;
-#ifdef CONFIG_NET_CLS_IND
 	int			ifindex;
-#endif /* CONFIG_NET_CLS_IND */
 	struct tcf_exts		exts;
 	struct tcf_proto	*tp;
 	struct rcu_work		rwork;
@@ -72,10 +62,8 @@
 		     f = rcu_dereference_bh(f->next)) {
 			if (f->id == id) {
 				*res = f->res;
-#ifdef CONFIG_NET_CLS_IND
 				if (!tcf_match_indev(skb, f->ifindex))
 					continue;
-#endif /* CONFIG_NET_CLS_IND */
 				r = tcf_exts_exec(skb, &f->exts, res);
 				if (r < 0)
 					continue;
@@ -139,7 +127,8 @@
 	rtnl_unlock();
 }
 
-static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void fw_destroy(struct tcf_proto *tp, bool rtnl_held,
+		       struct netlink_ext_ack *extack)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
 	struct fw_filter *f;
@@ -163,7 +152,7 @@
 }
 
 static int fw_delete(struct tcf_proto *tp, void *arg, bool *last,
-		     struct netlink_ext_ack *extack)
+		     bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
 	struct fw_filter *f = arg;
@@ -217,7 +206,7 @@
 	int err;
 
 	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr,
-				extack);
+				true, extack);
 	if (err < 0)
 		return err;
 
@@ -226,7 +215,6 @@
 		tcf_bind_filter(tp, &f->res, base);
 	}
 
-#ifdef CONFIG_NET_CLS_IND
 	if (tb[TCA_FW_INDEV]) {
 		int ret;
 		ret = tcf_change_indev(net, tb[TCA_FW_INDEV], extack);
@@ -234,7 +222,6 @@
 			return ret;
 		f->ifindex = ret;
 	}
-#endif /* CONFIG_NET_CLS_IND */
 
 	err = -EINVAL;
 	if (tb[TCA_FW_MASK]) {
@@ -250,7 +237,8 @@
 static int fw_change(struct net *net, struct sk_buff *in_skb,
 		     struct tcf_proto *tp, unsigned long base,
 		     u32 handle, struct nlattr **tca, void **arg,
-		     bool ovr, struct netlink_ext_ack *extack)
+		     bool ovr, bool rtnl_held,
+		     struct netlink_ext_ack *extack)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
 	struct fw_filter *f = *arg;
@@ -261,7 +249,8 @@
 	if (!opt)
 		return handle ? -EINVAL : 0; /* Succeed if it is old method. */
 
-	err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_FW_MAX, opt, fw_policy,
+					  NULL);
 	if (err < 0)
 		return err;
 
@@ -278,12 +267,11 @@
 
 		fnew->id = f->id;
 		fnew->res = f->res;
-#ifdef CONFIG_NET_CLS_IND
 		fnew->ifindex = f->ifindex;
-#endif /* CONFIG_NET_CLS_IND */
 		fnew->tp = f->tp;
 
-		err = tcf_exts_init(&fnew->exts, TCA_FW_ACT, TCA_FW_POLICE);
+		err = tcf_exts_init(&fnew->exts, net, TCA_FW_ACT,
+				    TCA_FW_POLICE);
 		if (err < 0) {
 			kfree(fnew);
 			return err;
@@ -332,7 +320,7 @@
 	if (f == NULL)
 		return -ENOBUFS;
 
-	err = tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE);
+	err = tcf_exts_init(&f->exts, net, TCA_FW_ACT, TCA_FW_POLICE);
 	if (err < 0)
 		goto errout;
 	f->id = handle;
@@ -354,7 +342,8 @@
 	return err;
 }
 
-static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+		    bool rtnl_held)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
 	int h;
@@ -384,7 +373,7 @@
 }
 
 static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh,
-		   struct sk_buff *skb, struct tcmsg *t)
+		   struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
 	struct fw_filter *f = fh;
@@ -398,21 +387,19 @@
 	if (!f->res.classid && !tcf_exts_has_actions(&f->exts))
 		return skb->len;
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
 
 	if (f->res.classid &&
 	    nla_put_u32(skb, TCA_FW_CLASSID, f->res.classid))
 		goto nla_put_failure;
-#ifdef CONFIG_NET_CLS_IND
 	if (f->ifindex) {
 		struct net_device *dev;
 		dev = __dev_get_by_index(net, f->ifindex);
 		if (dev && nla_put_string(skb, TCA_FW_INDEV, dev->name))
 			goto nla_put_failure;
 	}
-#endif /* CONFIG_NET_CLS_IND */
 	if (head->mask != 0xFFFFFFFF &&
 	    nla_put_u32(skb, TCA_FW_MASK, head->mask))
 		goto nla_put_failure;

diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 856fa79..7fc2eb6 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c

@@ -1,17 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/cls_matchll.c		Match-all classifier
  *
  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/percpu.h>
 
 #include <net/sch_generic.h>
 #include <net/pkt_cls.h>
@@ -22,7 +19,9 @@
 	u32 handle;
 	u32 flags;
 	unsigned int in_hw_count;
+	struct tc_matchall_pcnt __percpu *pf;
 	struct rcu_work rwork;
+	bool deleting;
 };
 
 static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -30,10 +29,14 @@
 {
 	struct cls_mall_head *head = rcu_dereference_bh(tp->root);
 
+	if (unlikely(!head))
+		return -1;
+
 	if (tc_skip_sw(head->flags))
 		return -1;
 
 	*res = head->res;
+	__this_cpu_inc(head->pf->rhit);
 	return tcf_exts_exec(skb, &head->exts, res);
 }
 
@@ -46,6 +49,7 @@
 {
 	tcf_exts_destroy(&head->exts);
 	tcf_exts_put_net(&head->exts);
+	free_percpu(head->pf);
 	kfree(head);
 }
 
@@ -71,8 +75,8 @@
 	cls_mall.command = TC_CLSMATCHALL_DESTROY;
 	cls_mall.cookie = cookie;
 
-	tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, &cls_mall, false);
-	tcf_block_offload_dec(block, &head->flags);
+	tc_setup_cb_destroy(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall, false,
+			    &head->flags, &head->in_hw_count, true);
 }
 
 static int mall_replace_hw_filter(struct tcf_proto *tp,
@@ -85,19 +89,34 @@
 	bool skip_sw = tc_skip_sw(head->flags);
 	int err;
 
+	cls_mall.rule =	flow_rule_alloc(tcf_exts_num_actions(&head->exts));
+	if (!cls_mall.rule)
+		return -ENOMEM;
+
 	tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack);
 	cls_mall.command = TC_CLSMATCHALL_REPLACE;
-	cls_mall.exts = &head->exts;
 	cls_mall.cookie = cookie;
 
-	err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL,
-			       &cls_mall, skip_sw);
-	if (err < 0) {
+	err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts, true);
+	if (err) {
+		kfree(cls_mall.rule);
+		mall_destroy_hw_filter(tp, head, cookie, NULL);
+		if (skip_sw)
+			NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
+		else
+			err = 0;
+
+		return err;
+	}
+
+	err = tc_setup_cb_add(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall,
+			      skip_sw, &head->flags, &head->in_hw_count, true);
+	tc_cleanup_flow_action(&cls_mall.rule->action);
+	kfree(cls_mall.rule);
+
+	if (err) {
 		mall_destroy_hw_filter(tp, head, cookie, NULL);
 		return err;
-	} else if (err > 0) {
-		head->in_hw_count = err;
-		tcf_block_offload_inc(block, &head->flags);
 	}
 
 	if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW))
@@ -106,7 +125,8 @@
 	return 0;
 }
 
-static void mall_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void mall_destroy(struct tcf_proto *tp, bool rtnl_held,
+			 struct netlink_ext_ack *extack)
 {
 	struct cls_mall_head *head = rtnl_dereference(tp->root);
 
@@ -126,6 +146,11 @@
 
 static void *mall_get(struct tcf_proto *tp, u32 handle)
 {
+	struct cls_mall_head *head = rtnl_dereference(tp->root);
+
+	if (head && head->handle == handle)
+		return head;
+
 	return NULL;
 }
 
@@ -142,7 +167,8 @@
 {
 	int err;
 
-	err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, extack);
+	err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, true,
+				extack);
 	if (err < 0)
 		return err;
 
@@ -156,7 +182,8 @@
 static int mall_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle, struct nlattr **tca,
-		       void **arg, bool ovr, struct netlink_ext_ack *extack)
+		       void **arg, bool ovr, bool rtnl_held,
+		       struct netlink_ext_ack *extack)
 {
 	struct cls_mall_head *head = rtnl_dereference(tp->root);
 	struct nlattr *tb[TCA_MATCHALL_MAX + 1];
@@ -170,8 +197,8 @@
 	if (head)
 		return -EEXIST;
 
-	err = nla_parse_nested(tb, TCA_MATCHALL_MAX, tca[TCA_OPTIONS],
-			       mall_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_MATCHALL_MAX,
+					  tca[TCA_OPTIONS], mall_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -185,7 +212,7 @@
 	if (!new)
 		return -ENOBUFS;
 
-	err = tcf_exts_init(&new->exts, TCA_MATCHALL_ACT, 0);
+	err = tcf_exts_init(&new->exts, net, TCA_MATCHALL_ACT, 0);
 	if (err)
 		goto err_exts_init;
 
@@ -193,6 +220,11 @@
 		handle = 1;
 	new->handle = handle;
 	new->flags = flags;
+	new->pf = alloc_percpu(struct tc_matchall_pcnt);
+	if (!new->pf) {
+		err = -ENOMEM;
+		goto err_alloc_percpu;
+	}
 
 	err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr,
 			     extack);
@@ -215,6 +247,8 @@
 
 err_replace_hw_filter:
 err_set_parms:
+	free_percpu(new->pf);
+err_alloc_percpu:
 	tcf_exts_destroy(&new->exts);
 err_exts_init:
 	kfree(new);
@@ -222,24 +256,32 @@
 }
 
 static int mall_delete(struct tcf_proto *tp, void *arg, bool *last,
-		       struct netlink_ext_ack *extack)
+		       bool rtnl_held, struct netlink_ext_ack *extack)
 {
-	return -EOPNOTSUPP;
+	struct cls_mall_head *head = rtnl_dereference(tp->root);
+
+	head->deleting = true;
+	*last = true;
+	return 0;
 }
 
-static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+		      bool rtnl_held)
 {
 	struct cls_mall_head *head = rtnl_dereference(tp->root);
 
 	if (arg->count < arg->skip)
 		goto skip;
+
+	if (!head || head->deleting)
+		return;
 	if (arg->fn(tp, head, arg) < 0)
 		arg->stop = 1;
 skip:
 	arg->count++;
 }
 
-static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
 			  void *cb_priv, struct netlink_ext_ack *extack)
 {
 	struct cls_mall_head *head = rtnl_dereference(tp->root);
@@ -250,36 +292,71 @@
 	if (tc_skip_hw(head->flags))
 		return 0;
 
+	cls_mall.rule =	flow_rule_alloc(tcf_exts_num_actions(&head->exts));
+	if (!cls_mall.rule)
+		return -ENOMEM;
+
 	tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack);
 	cls_mall.command = add ?
 		TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY;
-	cls_mall.exts = &head->exts;
 	cls_mall.cookie = (unsigned long)head;
 
-	err = cb(TC_SETUP_CLSMATCHALL, &cls_mall, cb_priv);
+	err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts, true);
 	if (err) {
-		if (add && tc_skip_sw(head->flags))
+		kfree(cls_mall.rule);
+		if (add && tc_skip_sw(head->flags)) {
+			NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
 			return err;
+		}
 		return 0;
 	}
 
-	tc_cls_offload_cnt_update(block, &head->in_hw_count, &head->flags, add);
+	err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSMATCHALL,
+				    &cls_mall, cb_priv, &head->flags,
+				    &head->in_hw_count);
+	tc_cleanup_flow_action(&cls_mall.rule->action);
+	kfree(cls_mall.rule);
+
+	if (err)
+		return err;
 
 	return 0;
 }
 
-static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
-		     struct sk_buff *skb, struct tcmsg *t)
+static void mall_stats_hw_filter(struct tcf_proto *tp,
+				 struct cls_mall_head *head,
+				 unsigned long cookie)
 {
+	struct tc_cls_matchall_offload cls_mall = {};
+	struct tcf_block *block = tp->chain->block;
+
+	tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, NULL);
+	cls_mall.command = TC_CLSMATCHALL_STATS;
+	cls_mall.cookie = cookie;
+
+	tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false, true);
+
+	tcf_exts_stats_update(&head->exts, cls_mall.stats.bytes,
+			      cls_mall.stats.pkts, cls_mall.stats.lastused);
+}
+
+static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
+		     struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
+{
+	struct tc_matchall_pcnt gpf = {};
 	struct cls_mall_head *head = fh;
 	struct nlattr *nest;
+	int cpu;
 
 	if (!head)
 		return skb->len;
 
+	if (!tc_skip_hw(head->flags))
+		mall_stats_hw_filter(tp, head, (unsigned long)head);
+
 	t->tcm_handle = head->handle;
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (!nest)
 		goto nla_put_failure;
 
@@ -290,6 +367,17 @@
 	if (head->flags && nla_put_u32(skb, TCA_MATCHALL_FLAGS, head->flags))
 		goto nla_put_failure;
 
+	for_each_possible_cpu(cpu) {
+		struct tc_matchall_pcnt *pf = per_cpu_ptr(head->pf, cpu);
+
+		gpf.rhit += pf->rhit;
+	}
+
+	if (nla_put_64bit(skb, TCA_MATCHALL_PCNT,
+			  sizeof(struct tc_matchall_pcnt),
+			  &gpf, TCA_MATCHALL_PAD))
+		goto nla_put_failure;
+
 	if (tcf_exts_dump(skb, &head->exts))
 		goto nla_put_failure;
 

diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 0404aa5..2d9e0b4 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/cls_route.c	ROUTE4 classifier.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  */
 
@@ -276,7 +272,8 @@
 	tcf_queue_work(&f->rwork, route4_delete_filter_work);
 }
 
-static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void route4_destroy(struct tcf_proto *tp, bool rtnl_held,
+			   struct netlink_ext_ack *extack)
 {
 	struct route4_head *head = rtnl_dereference(tp->root);
 	int h1, h2;
@@ -312,7 +309,7 @@
 }
 
 static int route4_delete(struct tcf_proto *tp, void *arg, bool *last,
-			 struct netlink_ext_ack *extack)
+			 bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct route4_head *head = rtnl_dereference(tp->root);
 	struct route4_filter *f = arg;
@@ -393,7 +390,7 @@
 	struct route4_bucket *b;
 	int err;
 
-	err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
+	err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
 	if (err < 0)
 		return err;
 
@@ -468,7 +465,7 @@
 static int route4_change(struct net *net, struct sk_buff *in_skb,
 			 struct tcf_proto *tp, unsigned long base, u32 handle,
 			 struct nlattr **tca, void **arg, bool ovr,
-			 struct netlink_ext_ack *extack)
+			 bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct route4_head *head = rtnl_dereference(tp->root);
 	struct route4_filter __rcu **fp;
@@ -483,7 +480,8 @@
 	if (opt == NULL)
 		return handle ? -EINVAL : 0;
 
-	err = nla_parse_nested(tb, TCA_ROUTE4_MAX, opt, route4_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, opt,
+					  route4_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -496,7 +494,7 @@
 	if (!f)
 		goto errout;
 
-	err = tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
+	err = tcf_exts_init(&f->exts, net, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
 	if (err < 0)
 		goto errout;
 
@@ -560,15 +558,13 @@
 	return err;
 }
 
-static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+			bool rtnl_held)
 {
 	struct route4_head *head = rtnl_dereference(tp->root);
 	unsigned int h, h1;
 
-	if (head == NULL)
-		arg->stop = 1;
-
-	if (arg->stop)
+	if (head == NULL || arg->stop)
 		return;
 
 	for (h = 0; h <= 256; h++) {
@@ -597,7 +593,7 @@
 }
 
 static int route4_dump(struct net *net, struct tcf_proto *tp, void *fh,
-		       struct sk_buff *skb, struct tcmsg *t)
+		       struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
 	struct route4_filter *f = fh;
 	struct nlattr *nest;
@@ -608,7 +604,7 @@
 
 	t->tcm_handle = f->handle;
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
 

diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c
index cbb5e0d..de1c1d4 100644
--- a/net/sched/cls_rsvp.c
+++ b/net/sched/cls_rsvp.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/cls_rsvp.c	Special RSVP packet classifier for IPv4.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  */
 

diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index e9ccf7d..2f3c03b 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h

@@ -1,11 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * net/sched/cls_rsvp.h	Template file for RSVPv[46] classifiers.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  */
 
@@ -312,7 +308,8 @@
 		__rsvp_delete_filter(f);
 }
 
-static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void rsvp_destroy(struct tcf_proto *tp, bool rtnl_held,
+			 struct netlink_ext_ack *extack)
 {
 	struct rsvp_head *data = rtnl_dereference(tp->root);
 	int h1, h2;
@@ -341,7 +338,7 @@
 }
 
 static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last,
-		       struct netlink_ext_ack *extack)
+		       bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct rsvp_head *head = rtnl_dereference(tp->root);
 	struct rsvp_filter *nfp, *f = arg;
@@ -477,7 +474,8 @@
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle,
 		       struct nlattr **tca,
-		       void **arg, bool ovr, struct netlink_ext_ack *extack)
+		       void **arg, bool ovr, bool rtnl_held,
+		       struct netlink_ext_ack *extack)
 {
 	struct rsvp_head *data = rtnl_dereference(tp->root);
 	struct rsvp_filter *f, *nfp;
@@ -495,14 +493,16 @@
 	if (opt == NULL)
 		return handle ? -EINVAL : 0;
 
-	err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_RSVP_MAX, opt, rsvp_policy,
+					  NULL);
 	if (err < 0)
 		return err;
 
-	err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+	err = tcf_exts_init(&e, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
 	if (err < 0)
 		return err;
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, extack);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, true,
+				extack);
 	if (err < 0)
 		goto errout2;
 
@@ -520,7 +520,8 @@
 			goto errout2;
 		}
 
-		err = tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+		err = tcf_exts_init(&n->exts, net, TCA_RSVP_ACT,
+				    TCA_RSVP_POLICE);
 		if (err < 0) {
 			kfree(n);
 			goto errout2;
@@ -548,7 +549,7 @@
 	if (f == NULL)
 		goto errout2;
 
-	err = tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+	err = tcf_exts_init(&f->exts, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
 	if (err < 0)
 		goto errout;
 	h2 = 16;
@@ -654,7 +655,8 @@
 	return err;
 }
 
-static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+		      bool rtnl_held)
 {
 	struct rsvp_head *head = rtnl_dereference(tp->root);
 	unsigned int h, h1;
@@ -688,7 +690,7 @@
 }
 
 static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh,
-		     struct sk_buff *skb, struct tcmsg *t)
+		     struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
 	struct rsvp_filter *f = fh;
 	struct rsvp_session *s;
@@ -701,7 +703,7 @@
 
 	t->tcm_handle = f->handle;
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
 

diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c
index dd08aea..6407884 100644
--- a/net/sched/cls_rsvp6.c
+++ b/net/sched/cls_rsvp6.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/cls_rsvp6.c	Special RSVP packet classifier for IPv6.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  */
 

diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 9ccc93f..e573e5a 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * net/sched/cls_tcindex.c	Packet classifier for skb->tc_index
  *
@@ -48,7 +49,7 @@
 	u32 hash;		/* hash table size; 0 if undefined */
 	u32 alloc_hash;		/* allocated size */
 	u32 fall_through;	/* 0: only classify if explicit match */
-	struct rcu_head rcu;
+	struct rcu_work rwork;
 };
 
 static inline int tcindex_filter_is_set(struct tcindex_filter_result *r)
@@ -173,7 +174,7 @@
 }
 
 static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last,
-			  struct netlink_ext_ack *extack)
+			  bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcindex_filter_result *r = arg;
@@ -221,17 +222,11 @@
 	return 0;
 }
 
-static int tcindex_destroy_element(struct tcf_proto *tp,
-				   void *arg, struct tcf_walker *walker)
+static void tcindex_destroy_work(struct work_struct *work)
 {
-	bool last;
-
-	return tcindex_delete(tp, arg, &last, NULL);
-}
-
-static void __tcindex_destroy(struct rcu_head *head)
-{
-	struct tcindex_data *p = container_of(head, struct tcindex_data, rcu);
+	struct tcindex_data *p = container_of(to_rcu_work(work),
+					      struct tcindex_data,
+					      rwork);
 
 	kfree(p->perfect);
 	kfree(p->h);
@@ -252,15 +247,19 @@
 	[TCA_TCINDEX_CLASSID]		= { .type = NLA_U32 },
 };
 
-static int tcindex_filter_result_init(struct tcindex_filter_result *r)
+static int tcindex_filter_result_init(struct tcindex_filter_result *r,
+				      struct net *net)
 {
 	memset(r, 0, sizeof(*r));
-	return tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+	return tcf_exts_init(&r->exts, net, TCA_TCINDEX_ACT,
+			     TCA_TCINDEX_POLICE);
 }
 
-static void __tcindex_partial_destroy(struct rcu_head *head)
+static void tcindex_partial_destroy_work(struct work_struct *work)
 {
-	struct tcindex_data *p = container_of(head, struct tcindex_data, rcu);
+	struct tcindex_data *p = container_of(to_rcu_work(work),
+					      struct tcindex_data,
+					      rwork);
 
 	kfree(p->perfect);
 	kfree(p);
@@ -275,7 +274,7 @@
 	kfree(cp->perfect);
 }
 
-static int tcindex_alloc_perfect_hash(struct tcindex_data *cp)
+static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp)
 {
 	int i, err = 0;
 
@@ -285,7 +284,7 @@
 		return -ENOMEM;
 
 	for (i = 0; i < cp->hash; i++) {
-		err = tcf_exts_init(&cp->perfect[i].exts,
+		err = tcf_exts_init(&cp->perfect[i].exts, net,
 				    TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
 		if (err < 0)
 			goto errout;
@@ -305,16 +304,16 @@
 		  struct nlattr *est, bool ovr, struct netlink_ext_ack *extack)
 {
 	struct tcindex_filter_result new_filter_result, *old_r = r;
-	struct tcindex_filter_result cr;
 	struct tcindex_data *cp = NULL, *oldp;
 	struct tcindex_filter *f = NULL; /* make gcc behave */
+	struct tcf_result cr = {};
 	int err, balloc = 0;
 	struct tcf_exts e;
 
-	err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+	err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
 	if (err < 0)
 		return err;
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr, extack);
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr, true, extack);
 	if (err < 0)
 		goto errout;
 
@@ -337,7 +336,7 @@
 	if (p->perfect) {
 		int i;
 
-		if (tcindex_alloc_perfect_hash(cp) < 0)
+		if (tcindex_alloc_perfect_hash(net, cp) < 0)
 			goto errout;
 		for (i = 0; i < cp->hash; i++)
 			cp->perfect[i].res = p->perfect[i].res;
@@ -345,14 +344,11 @@
 	}
 	cp->h = p->h;
 
-	err = tcindex_filter_result_init(&new_filter_result);
-	if (err < 0)
-		goto errout1;
-	err = tcindex_filter_result_init(&cr);
+	err = tcindex_filter_result_init(&new_filter_result, net);
 	if (err < 0)
 		goto errout1;
 	if (old_r)
-		cr.res = r->res;
+		cr = r->res;
 
 	if (tb[TCA_TCINDEX_HASH])
 		cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
@@ -406,7 +402,7 @@
 	err = -ENOMEM;
 	if (!cp->perfect && !cp->h) {
 		if (valid_perfect_hash(cp)) {
-			if (tcindex_alloc_perfect_hash(cp) < 0)
+			if (tcindex_alloc_perfect_hash(net, cp) < 0)
 				goto errout_alloc;
 			balloc = 1;
 		} else {
@@ -435,7 +431,7 @@
 			goto errout_alloc;
 		f->key = handle;
 		f->next = NULL;
-		err = tcindex_filter_result_init(&f->result);
+		err = tcindex_filter_result_init(&f->result, net);
 		if (err < 0) {
 			kfree(f);
 			goto errout_alloc;
@@ -443,12 +439,12 @@
 	}
 
 	if (tb[TCA_TCINDEX_CLASSID]) {
-		cr.res.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]);
-		tcf_bind_filter(tp, &cr.res, base);
+		cr.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]);
+		tcf_bind_filter(tp, &cr, base);
 	}
 
 	if (old_r && old_r != r) {
-		err = tcindex_filter_result_init(old_r);
+		err = tcindex_filter_result_init(old_r, net);
 		if (err < 0) {
 			kfree(f);
 			goto errout_alloc;
@@ -456,7 +452,7 @@
 	}
 
 	oldp = p;
-	r->res = cr.res;
+	r->res = cr;
 	tcf_exts_change(&r->exts, &e);
 
 	rcu_assign_pointer(tp->root, cp);
@@ -475,10 +471,12 @@
 				; /* nothing */
 
 		rcu_assign_pointer(*fp, f);
+	} else {
+		tcf_exts_destroy(&new_filter_result.exts);
 	}
 
 	if (oldp)
-		call_rcu(&oldp->rcu, __tcindex_partial_destroy);
+		tcf_queue_work(&oldp->rwork, tcindex_partial_destroy_work);
 	return 0;
 
 errout_alloc:
@@ -487,7 +485,6 @@
 	else if (balloc == 2)
 		kfree(cp->h);
 errout1:
-	tcf_exts_destroy(&cr.exts);
 	tcf_exts_destroy(&new_filter_result.exts);
 errout:
 	kfree(cp);
@@ -499,7 +496,7 @@
 tcindex_change(struct net *net, struct sk_buff *in_skb,
 	       struct tcf_proto *tp, unsigned long base, u32 handle,
 	       struct nlattr **tca, void **arg, bool ovr,
-	       struct netlink_ext_ack *extack)
+	       bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_TCINDEX_MAX + 1];
@@ -514,7 +511,8 @@
 	if (!opt)
 		return 0;
 
-	err = nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, tcindex_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_TCINDEX_MAX, opt,
+					  tcindex_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -522,7 +520,8 @@
 				 tca[TCA_RATE], ovr, extack);
 }
 
-static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
+static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker,
+			 bool rtnl_held)
 {
 	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcindex_filter *f, *next;
@@ -558,24 +557,43 @@
 	}
 }
 
-static void tcindex_destroy(struct tcf_proto *tp,
+static void tcindex_destroy(struct tcf_proto *tp, bool rtnl_held,
 			    struct netlink_ext_ack *extack)
 {
 	struct tcindex_data *p = rtnl_dereference(tp->root);
-	struct tcf_walker walker;
+	int i;
 
 	pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
-	walker.count = 0;
-	walker.skip = 0;
-	walker.fn = tcindex_destroy_element;
-	tcindex_walk(tp, &walker);
 
-	call_rcu(&p->rcu, __tcindex_destroy);
+	if (p->perfect) {
+		for (i = 0; i < p->hash; i++) {
+			struct tcindex_filter_result *r = p->perfect + i;
+
+			tcf_unbind_filter(tp, &r->res);
+			if (tcf_exts_get_net(&r->exts))
+				tcf_queue_work(&r->rwork,
+					       tcindex_destroy_rexts_work);
+			else
+				__tcindex_destroy_rexts(r);
+		}
+	}
+
+	for (i = 0; p->h && i < p->hash; i++) {
+		struct tcindex_filter *f, *next;
+		bool last;
+
+		for (f = rtnl_dereference(p->h[i]); f; f = next) {
+			next = rtnl_dereference(f->next);
+			tcindex_delete(tp, &f->result, &last, rtnl_held, NULL);
+		}
+	}
+
+	tcf_queue_work(&p->rwork, tcindex_destroy_work);
 }
 
 
 static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh,
-			struct sk_buff *skb, struct tcmsg *t)
+			struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
 	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcindex_filter_result *r = fh;
@@ -585,7 +603,7 @@
 		 tp, fh, skb, t, p, r);
 	pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h);
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
 

diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index b2c3406..a0e6fac 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/cls_u32.c	Ugly (or Universal) 32bit key Packet Classifier.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  *
  *	The filters are packed to hash tables of key nodes
@@ -24,9 +20,6 @@
  *	pure RSVP doesn't need such a general approach and can use
  *	much simpler (and faster) schemes, sort of cls_rsvp.c.
  *
- *	JHS: We should remove the CONFIG_NET_CLS_IND from here
- *	eventually when the meta match extension is made available
- *
  *	nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
  */
 
@@ -52,9 +45,7 @@
 	u32			handle;
 	struct tc_u_hnode __rcu	*ht_up;
 	struct tcf_exts		exts;
-#ifdef CONFIG_NET_CLS_IND
 	int			ifindex;
-#endif
 	u8			fshift;
 	struct tcf_result	res;
 	struct tc_u_hnode __rcu	*ht_down;
@@ -68,7 +59,6 @@
 	u32			mask;
 	u32 __percpu		*pcpu_success;
 #endif
-	struct tcf_proto	*tp;
 	struct rcu_work		rwork;
 	/* The 'sel' field MUST be the last field in structure to allow for
 	 * tc_u32_keys allocated at end of structure.
@@ -80,10 +70,10 @@
 	struct tc_u_hnode __rcu	*next;
 	u32			handle;
 	u32			prio;
-	struct tc_u_common	*tp_c;
 	int			refcnt;
 	unsigned int		divisor;
 	struct idr		handle_idr;
+	bool			is_root;
 	struct rcu_head		rcu;
 	u32			flags;
 	/* The 'ht' field MUST be the last field in structure to allow for
@@ -98,7 +88,7 @@
 	int			refcnt;
 	struct idr		handle_idr;
 	struct hlist_node	hnode;
-	struct rcu_head		rcu;
+	long			knodes;
 };
 
 static inline unsigned int u32_hash_fold(__be32 key,
@@ -181,12 +171,10 @@
 			if (n->sel.flags & TC_U32_TERMINAL) {
 
 				*res = n->res;
-#ifdef CONFIG_NET_CLS_IND
 				if (!tcf_match_indev(skb, n->ifindex)) {
 					n = rcu_dereference_bh(n->next);
 					goto next_knode;
 				}
-#endif
 #ifdef CONFIG_CLS_U32_PERF
 				__this_cpu_inc(n->pf->rhit);
 #endif
@@ -344,19 +332,16 @@
 		return block->q;
 }
 
-static unsigned int tc_u_hash(const struct tcf_proto *tp)
+static struct hlist_head *tc_u_hash(void *key)
 {
-	return hash_ptr(tc_u_common_ptr(tp), U32_HASH_SHIFT);
+	return tc_u_common_hash + hash_ptr(key, U32_HASH_SHIFT);
 }
 
-static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
+static struct tc_u_common *tc_u_common_find(void *key)
 {
 	struct tc_u_common *tc;
-	unsigned int h;
-
-	h = tc_u_hash(tp);
-	hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) {
-		if (tc->ptr == tc_u_common_ptr(tp))
+	hlist_for_each_entry(tc, tc_u_hash(key), hnode) {
+		if (tc->ptr == key)
 			return tc;
 	}
 	return NULL;
@@ -365,10 +350,8 @@
 static int u32_init(struct tcf_proto *tp)
 {
 	struct tc_u_hnode *root_ht;
-	struct tc_u_common *tp_c;
-	unsigned int h;
-
-	tp_c = tc_u_common_find(tp);
+	void *key = tc_u_common_ptr(tp);
+	struct tc_u_common *tp_c = tc_u_common_find(key);
 
 	root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
 	if (root_ht == NULL)
@@ -377,6 +360,7 @@
 	root_ht->refcnt++;
 	root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
 	root_ht->prio = tp->prio;
+	root_ht->is_root = true;
 	idr_init(&root_ht->handle_idr);
 
 	if (tp_c == NULL) {
@@ -385,18 +369,16 @@
 			kfree(root_ht);
 			return -ENOBUFS;
 		}
-		tp_c->ptr = tc_u_common_ptr(tp);
+		tp_c->ptr = key;
 		INIT_HLIST_NODE(&tp_c->hnode);
 		idr_init(&tp_c->handle_idr);
 
-		h = tc_u_hash(tp);
-		hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]);
+		hlist_add_head(&tp_c->hnode, tc_u_hash(key));
 	}
 
 	tp_c->refcnt++;
 	RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
 	rcu_assign_pointer(tp_c->hlist, root_ht);
-	root_ht->tp_c = tp_c;
 
 	root_ht->refcnt++;
 	rcu_assign_pointer(tp->root, root_ht);
@@ -404,8 +386,7 @@
 	return 0;
 }
 
-static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
-			   bool free_pf)
+static int u32_destroy_key(struct tc_u_knode *n, bool free_pf)
 {
 	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
 
@@ -439,7 +420,7 @@
 					      struct tc_u_knode,
 					      rwork);
 	rtnl_lock();
-	u32_destroy_key(key->tp, key, false);
+	u32_destroy_key(key, false);
 	rtnl_unlock();
 }
 
@@ -456,12 +437,13 @@
 					      struct tc_u_knode,
 					      rwork);
 	rtnl_lock();
-	u32_destroy_key(key->tp, key, true);
+	u32_destroy_key(key, true);
 	rtnl_unlock();
 }
 
 static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
 {
+	struct tc_u_common *tp_c = tp->data;
 	struct tc_u_knode __rcu **kp;
 	struct tc_u_knode *pkp;
 	struct tc_u_hnode *ht = rtnl_dereference(key->ht_up);
@@ -472,6 +454,7 @@
 		     kp = &pkp->next, pkp = rtnl_dereference(*kp)) {
 			if (pkp == key) {
 				RCU_INIT_POINTER(*kp, key->next);
+				tp_c->knodes--;
 
 				tcf_unbind_filter(tp, &key->res);
 				idr_remove(&ht->handle_idr, key->handle);
@@ -497,7 +480,7 @@
 	cls_u32.hnode.handle = h->handle;
 	cls_u32.hnode.prio = h->prio;
 
-	tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
+	tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false, true);
 }
 
 static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
@@ -515,7 +498,7 @@
 	cls_u32.hnode.handle = h->handle;
 	cls_u32.hnode.prio = h->prio;
 
-	err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
+	err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw, true);
 	if (err < 0) {
 		u32_clear_hw_hnode(tp, h, NULL);
 		return err;
@@ -539,8 +522,8 @@
 	cls_u32.command = TC_CLSU32_DELETE_KNODE;
 	cls_u32.knode.handle = n->handle;
 
-	tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
-	tcf_block_offload_dec(block, &n->flags);
+	tc_setup_cb_destroy(block, tp, TC_SETUP_CLSU32, &cls_u32, false,
+			    &n->flags, &n->in_hw_count, true);
 }
 
 static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
@@ -564,17 +547,16 @@
 	cls_u32.knode.mask = 0;
 #endif
 	cls_u32.knode.sel = &n->sel;
+	cls_u32.knode.res = &n->res;
 	cls_u32.knode.exts = &n->exts;
 	if (n->ht_down)
 		cls_u32.knode.link_handle = ht->handle;
 
-	err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
-	if (err < 0) {
+	err = tc_setup_cb_add(block, tp, TC_SETUP_CLSU32, &cls_u32, skip_sw,
+			      &n->flags, &n->in_hw_count, true);
+	if (err) {
 		u32_remove_hw_knode(tp, n, NULL);
 		return err;
-	} else if (err > 0) {
-		n->in_hw_count = err;
-		tcf_block_offload_inc(block, &n->flags);
 	}
 
 	if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW))
@@ -586,6 +568,7 @@
 static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
 			    struct netlink_ext_ack *extack)
 {
+	struct tc_u_common *tp_c = tp->data;
 	struct tc_u_knode *n;
 	unsigned int h;
 
@@ -593,13 +576,14 @@
 		while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
 			RCU_INIT_POINTER(ht->ht[h],
 					 rtnl_dereference(n->next));
+			tp_c->knodes--;
 			tcf_unbind_filter(tp, &n->res);
 			u32_remove_hw_knode(tp, n, extack);
 			idr_remove(&ht->handle_idr, n->handle);
 			if (tcf_exts_get_net(&n->exts))
 				tcf_queue_work(&n->rwork, u32_delete_key_freepf_work);
 			else
-				u32_destroy_key(n->tp, n, true);
+				u32_destroy_key(n, true);
 		}
 	}
 }
@@ -632,18 +616,8 @@
 	return -ENOENT;
 }
 
-static bool ht_empty(struct tc_u_hnode *ht)
-{
-	unsigned int h;
-
-	for (h = 0; h <= ht->divisor; h++)
-		if (rcu_access_pointer(ht->ht[h]))
-			return false;
-
-	return true;
-}
-
-static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void u32_destroy(struct tcf_proto *tp, bool rtnl_held,
+			struct netlink_ext_ack *extack)
 {
 	struct tc_u_common *tp_c = tp->data;
 	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
@@ -677,23 +651,19 @@
 }
 
 static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
-		      struct netlink_ext_ack *extack)
+		      bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct tc_u_hnode *ht = arg;
-	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
 	struct tc_u_common *tp_c = tp->data;
 	int ret = 0;
 
-	if (ht == NULL)
-		goto out;
-
 	if (TC_U32_KEY(ht->handle)) {
 		u32_remove_hw_knode(tp, (struct tc_u_knode *)ht, extack);
 		ret = u32_delete_key(tp, (struct tc_u_knode *)ht);
 		goto out;
 	}
 
-	if (root_ht == ht) {
+	if (ht->is_root) {
 		NL_SET_ERR_MSG_MOD(extack, "Not allowed to delete root node");
 		return -EINVAL;
 	}
@@ -706,38 +676,7 @@
 	}
 
 out:
-	*last = true;
-	if (root_ht) {
-		if (root_ht->refcnt > 2) {
-			*last = false;
-			goto ret;
-		}
-		if (root_ht->refcnt == 2) {
-			if (!ht_empty(root_ht)) {
-				*last = false;
-				goto ret;
-			}
-		}
-	}
-
-	if (tp_c->refcnt > 1) {
-		*last = false;
-		goto ret;
-	}
-
-	if (tp_c->refcnt == 1) {
-		struct tc_u_hnode *ht;
-
-		for (ht = rtnl_dereference(tp_c->hlist);
-		     ht;
-		     ht = rtnl_dereference(ht->next))
-			if (!ht_empty(ht)) {
-				*last = false;
-				break;
-			}
-	}
-
-ret:
+	*last = tp_c->refcnt == 1 && tp_c->knodes == 0;
 	return ret;
 }
 
@@ -768,14 +707,14 @@
 };
 
 static int u32_set_parms(struct net *net, struct tcf_proto *tp,
-			 unsigned long base, struct tc_u_hnode *ht,
+			 unsigned long base,
 			 struct tc_u_knode *n, struct nlattr **tb,
 			 struct nlattr *est, bool ovr,
 			 struct netlink_ext_ack *extack)
 {
 	int err;
 
-	err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, extack);
+	err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, true, extack);
 	if (err < 0)
 		return err;
 
@@ -789,12 +728,16 @@
 		}
 
 		if (handle) {
-			ht_down = u32_lookup_ht(ht->tp_c, handle);
+			ht_down = u32_lookup_ht(tp->data, handle);
 
 			if (!ht_down) {
 				NL_SET_ERR_MSG_MOD(extack, "Link hash table not found");
 				return -EINVAL;
 			}
+			if (ht_down->is_root) {
+				NL_SET_ERR_MSG_MOD(extack, "Not linking to root node");
+				return -EINVAL;
+			}
 			ht_down->refcnt++;
 		}
 
@@ -809,7 +752,6 @@
 		tcf_bind_filter(tp, &n->res, base);
 	}
 
-#ifdef CONFIG_NET_CLS_IND
 	if (tb[TCA_U32_INDEV]) {
 		int ret;
 		ret = tcf_change_indev(net, tb[TCA_U32_INDEV], extack);
@@ -817,7 +759,6 @@
 			return -EINVAL;
 		n->ifindex = ret;
 	}
-#endif
 	return 0;
 }
 
@@ -848,7 +789,7 @@
 	rcu_assign_pointer(*ins, n);
 }
 
-static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
+static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
 					 struct tc_u_knode *n)
 {
 	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
@@ -865,9 +806,7 @@
 	new->handle = n->handle;
 	RCU_INIT_POINTER(new->ht_up, n->ht_up);
 
-#ifdef CONFIG_NET_CLS_IND
 	new->ifindex = n->ifindex;
-#endif
 	new->fshift = n->fshift;
 	new->res = n->res;
 	new->flags = n->flags;
@@ -891,10 +830,9 @@
 	/* Similarly success statistics must be moved as pointers */
 	new->pcpu_success = n->pcpu_success;
 #endif
-	new->tp = tp;
-	memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
+	memcpy(&new->sel, s, struct_size(s, keys, s->nkeys));
 
-	if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) {
+	if (tcf_exts_init(&new->exts, net, TCA_U32_ACT, TCA_U32_POLICE)) {
 		kfree(new);
 		return NULL;
 	}
@@ -904,7 +842,7 @@
 
 static int u32_change(struct net *net, struct sk_buff *in_skb,
 		      struct tcf_proto *tp, unsigned long base, u32 handle,
-		      struct nlattr **tca, void **arg, bool ovr,
+		      struct nlattr **tca, void **arg, bool ovr, bool rtnl_held,
 		      struct netlink_ext_ack *extack)
 {
 	struct tc_u_common *tp_c = tp->data;
@@ -929,7 +867,8 @@
 		}
 	}
 
-	err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, extack);
+	err = nla_parse_nested_deprecated(tb, TCA_U32_MAX, opt, u32_policy,
+					  extack);
 	if (err < 0)
 		return err;
 
@@ -956,22 +895,21 @@
 			return -EINVAL;
 		}
 
-		new = u32_init_knode(tp, n);
+		new = u32_init_knode(net, tp, n);
 		if (!new)
 			return -ENOMEM;
 
-		err = u32_set_parms(net, tp, base,
-				    rtnl_dereference(n->ht_up), new, tb,
+		err = u32_set_parms(net, tp, base, new, tb,
 				    tca[TCA_RATE], ovr, extack);
 
 		if (err) {
-			u32_destroy_key(tp, new, false);
+			u32_destroy_key(new, false);
 			return err;
 		}
 
 		err = u32_replace_hw_knode(tp, new, flags, extack);
 		if (err) {
-			u32_destroy_key(tp, new, false);
+			u32_destroy_key(new, false);
 			return err;
 		}
 
@@ -988,7 +926,11 @@
 	if (tb[TCA_U32_DIVISOR]) {
 		unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
 
-		if (--divisor > 0x100) {
+		if (!is_power_of_2(divisor)) {
+			NL_SET_ERR_MSG_MOD(extack, "Divisor is not a power of 2");
+			return -EINVAL;
+		}
+		if (divisor-- > 0x100) {
 			NL_SET_ERR_MSG_MOD(extack, "Exceeded maximum 256 hash buckets");
 			return -EINVAL;
 		}
@@ -1013,7 +955,6 @@
 				return err;
 			}
 		}
-		ht->tp_c = tp_c;
 		ht->refcnt = 1;
 		ht->divisor = divisor;
 		ht->handle = handle;
@@ -1103,9 +1044,8 @@
 	n->handle = handle;
 	n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
 	n->flags = flags;
-	n->tp = tp;
 
-	err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
+	err = tcf_exts_init(&n->exts, net, TCA_U32_ACT, TCA_U32_POLICE);
 	if (err < 0)
 		goto errout;
 
@@ -1125,7 +1065,7 @@
 	}
 #endif
 
-	err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr,
+	err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], ovr,
 			    extack);
 	if (err == 0) {
 		struct tc_u_knode __rcu **ins;
@@ -1146,6 +1086,7 @@
 
 		RCU_INIT_POINTER(n->next, pins);
 		rcu_assign_pointer(*ins, n);
+		tp_c->knodes++;
 		*arg = n;
 		return 0;
 	}
@@ -1167,7 +1108,8 @@
 	return err;
 }
 
-static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+		     bool rtnl_held)
 {
 	struct tc_u_common *tp_c = tp->data;
 	struct tc_u_hnode *ht;
@@ -1208,7 +1150,7 @@
 }
 
 static int u32_reoffload_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
-			       bool add, tc_setup_cb_t *cb, void *cb_priv,
+			       bool add, flow_setup_cb_t *cb, void *cb_priv,
 			       struct netlink_ext_ack *extack)
 {
 	struct tc_cls_u32_offload cls_u32 = {};
@@ -1228,7 +1170,7 @@
 }
 
 static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n,
-			       bool add, tc_setup_cb_t *cb, void *cb_priv,
+			       bool add, flow_setup_cb_t *cb, void *cb_priv,
 			       struct netlink_ext_ack *extack)
 {
 	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
@@ -1251,24 +1193,22 @@
 		cls_u32.knode.mask = 0;
 #endif
 		cls_u32.knode.sel = &n->sel;
+		cls_u32.knode.res = &n->res;
 		cls_u32.knode.exts = &n->exts;
 		if (n->ht_down)
 			cls_u32.knode.link_handle = ht->handle;
 	}
 
-	err = cb(TC_SETUP_CLSU32, &cls_u32, cb_priv);
-	if (err) {
-		if (add && tc_skip_sw(n->flags))
-			return err;
-		return 0;
-	}
-
-	tc_cls_offload_cnt_update(block, &n->in_hw_count, &n->flags, add);
+	err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSU32,
+				    &cls_u32, cb_priv, &n->flags,
+				    &n->in_hw_count);
+	if (err)
+		return err;
 
 	return 0;
 }
 
-static int u32_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static int u32_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
 			 void *cb_priv, struct netlink_ext_ack *extack)
 {
 	struct tc_u_common *tp_c = tp->data;
@@ -1324,7 +1264,7 @@
 }
 
 static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
-		    struct sk_buff *skb, struct tcmsg *t)
+		    struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
 	struct tc_u_knode *n = fh;
 	struct tc_u_hnode *ht_up, *ht_down;
@@ -1335,7 +1275,7 @@
 
 	t->tcm_handle = n->handle;
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
 
@@ -1395,14 +1335,12 @@
 		if (tcf_exts_dump(skb, &n->exts) < 0)
 			goto nla_put_failure;
 
-#ifdef CONFIG_NET_CLS_IND
 		if (n->ifindex) {
 			struct net_device *dev;
 			dev = __dev_get_by_index(net, n->ifindex);
 			if (dev && nla_put_string(skb, TCA_U32_INDEV, dev->name))
 				goto nla_put_failure;
 		}
-#endif
 #ifdef CONFIG_CLS_U32_PERF
 		gpf = kzalloc(sizeof(struct tc_u32_pcnt) +
 			      n->sel.nkeys * sizeof(u64),
@@ -1466,9 +1404,7 @@
 #ifdef CONFIG_CLS_U32_PERF
 	pr_info("    Performance counters on\n");
 #endif
-#ifdef CONFIG_NET_CLS_IND
 	pr_info("    input device check on\n");
-#endif
 #ifdef CONFIG_NET_CLS_ACT
 	pr_info("    Actions configured\n");
 #endif

diff --git a/net/sched/em_canid.c b/net/sched/em_canid.c
index ddd883c..b9a94fd 100644
--- a/net/sched/em_canid.c
+++ b/net/sched/em_canid.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * em_canid.c  Ematch rule to match CAN frames according to their CAN IDs
  *
- *              This program is free software; you can distribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * Idea:       Oliver Hartkopp <oliver.hartkopp@volkswagen.de>
  * Copyright:  (c) 2011 Czech Technical University in Prague
  *             (c) 2011 Volkswagen Group Research

diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c
index 1c8360a..a4d09b1 100644
--- a/net/sched/em_cmp.c
+++ b/net/sched/em_cmp.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/em_cmp.c	Simple packet data comparison ematch
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Thomas Graf <tgraf@suug.ch>
  */
 

diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index c1b23e3..df00566 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * net/sched/em_ipset.c	ipset ematch
  *
  * Copyright (c) 2012 Florian Westphal <fw@strlen.de>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
  */
 
 #include <linux/gfp.h>

diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c
index a5f34e9..9fff648 100644
--- a/net/sched/em_ipt.c
+++ b/net/sched/em_ipt.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/em_ipt.c IPtables matches Ematch
  *
  * (c) 2018 Eyal Birger <eyal.birger@gmail.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/gfp.h>
@@ -25,6 +21,7 @@
 struct em_ipt_match {
 	const struct xt_match *match;
 	u32 hook;
+	u8 nfproto;
 	u8 match_data[0] __aligned(8);
 };
 
@@ -75,11 +72,25 @@
 	return 0;
 }
 
+static int addrtype_validate_match_data(struct nlattr **tb, u8 mrev)
+{
+	if (mrev != 1) {
+		pr_err("only addrtype match revision 1 supported");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static const struct em_ipt_xt_match em_ipt_xt_matches[] = {
 	{
 		.match_name = "policy",
 		.validate_match_data = policy_validate_match_data
 	},
+	{
+		.match_name = "addrtype",
+		.validate_match_data = addrtype_validate_match_data
+	},
 	{}
 };
 
@@ -119,9 +130,10 @@
 	struct em_ipt_match *im = NULL;
 	struct xt_match *match;
 	int mdata_len, ret;
+	u8 nfproto;
 
-	ret = nla_parse(tb, TCA_EM_IPT_MAX, data, data_len, em_ipt_policy,
-			NULL);
+	ret = nla_parse_deprecated(tb, TCA_EM_IPT_MAX, data, data_len,
+				   em_ipt_policy, NULL);
 	if (ret < 0)
 		return ret;
 
@@ -129,6 +141,15 @@
 	    !tb[TCA_EM_IPT_MATCH_DATA] || !tb[TCA_EM_IPT_NFPROTO])
 		return -EINVAL;
 
+	nfproto = nla_get_u8(tb[TCA_EM_IPT_NFPROTO]);
+	switch (nfproto) {
+	case NFPROTO_IPV4:
+	case NFPROTO_IPV6:
+		break;
+	default:
+		return -EINVAL;
+	}
+
 	match = get_xt_match(tb);
 	if (IS_ERR(match)) {
 		pr_err("unable to load match\n");
@@ -144,6 +165,7 @@
 
 	im->match = match;
 	im->hook = nla_get_u32(tb[TCA_EM_IPT_HOOK]);
+	im->nfproto = nfproto;
 	nla_memcpy(im->match_data, tb[TCA_EM_IPT_MATCH_DATA], mdata_len);
 
 	ret = check_match(net, im, mdata_len);
@@ -186,15 +208,33 @@
 	const struct em_ipt_match *im = (const void *)em->data;
 	struct xt_action_param acpar = {};
 	struct net_device *indev = NULL;
+	u8 nfproto = im->match->family;
 	struct nf_hook_state state;
 	int ret;
 
+	switch (tc_skb_protocol(skb)) {
+	case htons(ETH_P_IP):
+		if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
+			return 0;
+		if (nfproto == NFPROTO_UNSPEC)
+			nfproto = NFPROTO_IPV4;
+		break;
+	case htons(ETH_P_IPV6):
+		if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
+			return 0;
+		if (nfproto == NFPROTO_UNSPEC)
+			nfproto = NFPROTO_IPV6;
+		break;
+	default:
+		return 0;
+	}
+
 	rcu_read_lock();
 
 	if (skb->skb_iif)
 		indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
 
-	nf_hook_state_init(&state, im->hook, im->match->family,
+	nf_hook_state_init(&state, im->hook, nfproto,
 			   indev ?: skb->dev, skb->dev, NULL, em->net, NULL);
 
 	acpar.match = im->match;
@@ -217,7 +257,7 @@
 		return -EMSGSIZE;
 	if (nla_put_u8(skb, TCA_EM_IPT_MATCH_REVISION, im->match->revision) < 0)
 		return -EMSGSIZE;
-	if (nla_put_u8(skb, TCA_EM_IPT_NFPROTO, im->match->family) < 0)
+	if (nla_put_u8(skb, TCA_EM_IPT_NFPROTO, im->nfproto) < 0)
 		return -EMSGSIZE;
 	if (nla_put(skb, TCA_EM_IPT_MATCH_DATA,
 		    im->match->usersize ?: im->match->matchsize,

diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index d6e9711..3177dcb 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/em_meta.c	Metadata ematch
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Thomas Graf <tgraf@suug.ch>
  *
  * ==========================================================================
@@ -450,7 +446,7 @@
 		*err = -1;
 		return;
 	}
-	dst->value = sk->sk_wmem_queued;
+	dst->value = READ_ONCE(sk->sk_wmem_queued);
 }
 
 META_COLLECTOR(int_sk_fwd_alloc)
@@ -558,7 +554,7 @@
 		*err = -1;
 		return;
 	}
-	dst->value = sk->sk_rcvlowat;
+	dst->value = READ_ONCE(sk->sk_rcvlowat);
 }
 
 META_COLLECTOR(int_sk_rcvtimeo)
@@ -912,7 +908,8 @@
 	struct tcf_meta_hdr *hdr;
 	struct meta_match *meta = NULL;
 
-	err = nla_parse(tb, TCA_EM_META_MAX, data, len, meta_policy, NULL);
+	err = nla_parse_deprecated(tb, TCA_EM_META_MAX, data, len,
+				   meta_policy, NULL);
 	if (err < 0)
 		goto errout;
 

diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
index 07c10ba..88c7ce4 100644
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/em_nbyte.c	N-Byte ematch
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Thomas Graf <tgraf@suug.ch>
  */
 

diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index 73e2ed5..6f3c1fb 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/em_text.c	Textsearch ematch
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Thomas Graf <tgraf@suug.ch>
  */
 

diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c
index 797bdb8..71b070d 100644
--- a/net/sched/em_u32.c
+++ b/net/sched/em_u32.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/em_u32.c	U32 Ematch
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Thomas Graf <tgraf@suug.ch>
  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  *

diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 1331a4c..8f2ad70 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/ematch.c		Extended Match API
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Thomas Graf <tgraf@suug.ch>
  *
  * ==========================================================================
@@ -314,7 +310,8 @@
 	if (!nla)
 		return 0;
 
-	err = nla_parse_nested(tb, TCA_EMATCH_TREE_MAX, nla, em_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_EMATCH_TREE_MAX, nla,
+					  em_policy, NULL);
 	if (err < 0)
 		goto errout;
 
@@ -440,14 +437,14 @@
 	struct nlattr *top_start;
 	struct nlattr *list_start;
 
-	top_start = nla_nest_start(skb, tlv);
+	top_start = nla_nest_start_noflag(skb, tlv);
 	if (top_start == NULL)
 		goto nla_put_failure;
 
 	if (nla_put(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr))
 		goto nla_put_failure;
 
-	list_start = nla_nest_start(skb, TCA_EMATCH_TREE_LIST);
+	list_start = nla_nest_start_noflag(skb, TCA_EMATCH_TREE_LIST);
 	if (list_start == NULL)
 		goto nla_put_failure;
 

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index be7cd14..1047825 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/sch_api.c	Packet scheduler API.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  *
  * Fixes:
@@ -27,7 +23,6 @@
 #include <linux/kmod.h>
 #include <linux/list.h>
 #include <linux/hrtimer.h>
-#include <linux/lockdep.h>
 #include <linux/slab.h>
 #include <linux/hashtable.h>
 
@@ -315,10 +310,27 @@
 	return q;
 }
 
+struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
+{
+	struct netdev_queue *nq;
+	struct Qdisc *q;
+
+	if (!handle)
+		return NULL;
+	q = qdisc_match_from_root(dev->qdisc, handle);
+	if (q)
+		goto out;
+
+	nq = dev_ingress_queue_rcu(dev);
+	if (nq)
+		q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
+out:
+	return q;
+}
+
 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
 {
 	unsigned long cl;
-	struct Qdisc *leaf;
 	const struct Qdisc_class_ops *cops = p->ops->cl_ops;
 
 	if (cops == NULL)
@@ -327,8 +339,7 @@
 
 	if (cl == 0)
 		return NULL;
-	leaf = cops->leaf(p, cl);
-	return leaf;
+	return cops->leaf(p, cl);
 }
 
 /* Find queueing discipline by name */
@@ -464,7 +475,8 @@
 	u16 *tab = NULL;
 	int err;
 
-	err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, extack);
+	err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
+					  extack);
 	if (err < 0)
 		return ERR_PTR(err);
 	if (!tb[TCA_STAB_BASE]) {
@@ -511,11 +523,6 @@
 	return stab;
 }
 
-static void stab_kfree_rcu(struct rcu_head *head)
-{
-	kfree(container_of(head, struct qdisc_size_table, rcu));
-}
-
 void qdisc_put_stab(struct qdisc_size_table *tab)
 {
 	if (!tab)
@@ -523,7 +530,7 @@
 
 	if (--tab->refcnt == 0) {
 		list_del(&tab->list);
-		call_rcu_bh(&tab->rcu, stab_kfree_rcu);
+		kfree_rcu(tab, rcu);
 	}
 }
 EXPORT_SYMBOL(qdisc_put_stab);
@@ -532,7 +539,7 @@
 {
 	struct nlattr *nest;
 
-	nest = nla_nest_start(skb, TCA_STAB);
+	nest = nla_nest_start_noflag(skb, TCA_STAB);
 	if (nest == NULL)
 		goto nla_put_failure;
 	if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
@@ -743,8 +750,7 @@
 	return 0;
 }
 
-void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
-			       unsigned int len)
+void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
 {
 	bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
 	const struct Qdisc_class_ops *cops;
@@ -793,6 +799,71 @@
 }
 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
 
+int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
+			      void *type_data)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	int err;
+
+	sch->flags &= ~TCQ_F_OFFLOADED;
+	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+		return 0;
+
+	err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
+	if (err == -EOPNOTSUPP)
+		return 0;
+
+	if (!err)
+		sch->flags |= TCQ_F_OFFLOADED;
+
+	return err;
+}
+EXPORT_SYMBOL(qdisc_offload_dump_helper);
+
+void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
+				struct Qdisc *new, struct Qdisc *old,
+				enum tc_setup_type type, void *type_data,
+				struct netlink_ext_ack *extack)
+{
+	bool any_qdisc_is_offloaded;
+	int err;
+
+	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+		return;
+
+	err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
+
+	/* Don't report error if the graft is part of destroy operation. */
+	if (!err || !new || new == &noop_qdisc)
+		return;
+
+	/* Don't report error if the parent, the old child and the new
+	 * one are not offloaded.
+	 */
+	any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
+	any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
+	any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
+
+	if (any_qdisc_is_offloaded)
+		NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
+}
+EXPORT_SYMBOL(qdisc_offload_graft_helper);
+
+static void qdisc_offload_graft_root(struct net_device *dev,
+				     struct Qdisc *new, struct Qdisc *old,
+				     struct netlink_ext_ack *extack)
+{
+	struct tc_root_qopt_offload graft_offload = {
+		.command	= TC_ROOT_GRAFT,
+		.handle		= new ? new->handle : 0,
+		.ingress	= (new && new->flags & TCQ_F_INGRESS) ||
+				  (old && old->flags & TCQ_F_INGRESS),
+	};
+
+	qdisc_offload_graft_helper(dev, NULL, new, old,
+				   TC_SETUP_ROOT_QDISC, &graft_offload, extack);
+}
+
 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 			 u32 portid, u32 seq, u16 flags, int event)
 {
@@ -921,7 +992,20 @@
 		qdisc_notify(net, skb, n, clid, old, new);
 
 	if (old)
-		qdisc_destroy(old);
+		qdisc_put(old);
+}
+
+static void qdisc_clear_nolock(struct Qdisc *sch)
+{
+	sch->flags &= ~TCQ_F_NOLOCK;
+	if (!(sch->flags & TCQ_F_CPUSTATS))
+		return;
+
+	free_percpu(sch->cpu_bstats);
+	free_percpu(sch->cpu_qstats);
+	sch->cpu_bstats = NULL;
+	sch->cpu_qstats = NULL;
+	sch->flags &= ~TCQ_F_CPUSTATS;
 }
 
 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
@@ -940,7 +1024,6 @@
 {
 	struct Qdisc *q = old;
 	struct net *net = dev_net(dev);
-	int err = 0;
 
 	if (parent == NULL) {
 		unsigned int i, num_q, ingress;
@@ -960,6 +1043,8 @@
 		if (dev->flags & IFF_UP)
 			dev_deactivate(dev);
 
+		qdisc_offload_graft_root(dev, new, old, extack);
+
 		if (new && new->ops->attach)
 			goto skip;
 
@@ -974,7 +1059,7 @@
 				qdisc_refcount_inc(new);
 
 			if (!ingress)
-				qdisc_destroy(old);
+				qdisc_put(old);
 		}
 
 skip:
@@ -995,28 +1080,29 @@
 			dev_activate(dev);
 	} else {
 		const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
+		unsigned long cl;
+		int err;
 
 		/* Only support running class lockless if parent is lockless */
 		if (new && (new->flags & TCQ_F_NOLOCK) &&
 		    parent && !(parent->flags & TCQ_F_NOLOCK))
-			new->flags &= ~TCQ_F_NOLOCK;
+			qdisc_clear_nolock(new);
 
-		err = -EOPNOTSUPP;
-		if (cops && cops->graft) {
-			unsigned long cl = cops->find(parent, classid);
+		if (!cops || !cops->graft)
+			return -EOPNOTSUPP;
 
-			if (cl) {
-				err = cops->graft(parent, cl, new, &old,
-						  extack);
-			} else {
-				NL_SET_ERR_MSG(extack, "Specified class not found");
-				err = -ENOENT;
-			}
+		cl = cops->find(parent, classid);
+		if (!cl) {
+			NL_SET_ERR_MSG(extack, "Specified class not found");
+			return -ENOENT;
 		}
-		if (!err)
-			notify_and_destroy(net, skb, n, classid, old, new);
+
+		err = cops->graft(parent, cl, new, &old, extack);
+		if (err)
+			return err;
+		notify_and_destroy(net, skb, n, classid, old, new);
 	}
-	return err;
+	return 0;
 }
 
 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
@@ -1053,10 +1139,6 @@
 	return 0;
 }
 
-/* lockdep annotation is needed for ingress; egress gets it only for name */
-static struct lock_class_key qdisc_tx_lock;
-static struct lock_class_key qdisc_rx_lock;
-
 /*
    Allocate and initialize new qdisc.
 
@@ -1121,15 +1203,15 @@
 	if (handle == TC_H_INGRESS) {
 		sch->flags |= TCQ_F_INGRESS;
 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
-		lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
 	} else {
 		if (handle == 0) {
 			handle = qdisc_alloc_handle(dev);
-			err = -ENOMEM;
-			if (handle == 0)
+			if (handle == 0) {
+				NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
+				err = -ENOSPC;
 				goto err_out3;
+			}
 		}
-		lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
 		if (!netif_is_multiqueue(dev))
 			sch->flags |= TCQ_F_ONETXQUEUE;
 	}
@@ -1338,8 +1420,8 @@
 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
-	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
-			  extack);
+	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
+				     rtm_tca_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -1423,8 +1505,8 @@
 
 replay:
 	/* Reinit, just in case something touches this. */
-	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
-			  extack);
+	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
+				     rtm_tca_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -1581,7 +1663,7 @@
 	err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
 	if (err) {
 		if (q)
-			qdisc_destroy(q);
+			qdisc_put(q);
 		return err;
 	}
 
@@ -1658,8 +1740,8 @@
 	idx = 0;
 	ASSERT_RTNL();
 
-	err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
-			  rtm_tca_policy, NULL);
+	err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
+				     rtm_tca_policy, cb->extack);
 	if (err < 0)
 		return err;
 
@@ -1752,6 +1834,7 @@
 {
 	struct sk_buff *skb;
 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+	int err = 0;
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb)
@@ -1762,8 +1845,11 @@
 		return -EINVAL;
 	}
 
-	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
-			      n->nlmsg_flags & NLM_F_ECHO);
+	err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+			     n->nlmsg_flags & NLM_F_ECHO);
+	if (err > 0)
+		err = 0;
+	return err;
 }
 
 static int tclass_del_notify(struct net *net,
@@ -1794,8 +1880,11 @@
 		return err;
 	}
 
-	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
-			      n->nlmsg_flags & NLM_F_ECHO);
+	err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+			     n->nlmsg_flags & NLM_F_ECHO);
+	if (err > 0)
+		err = 0;
+	return err;
 }
 
 #ifdef CONFIG_NET_CLS
@@ -1831,20 +1920,24 @@
 	cl = cops->find(q, portid);
 	if (!cl)
 		return;
+	if (!cops->tcf_block)
+		return;
 	block = cops->tcf_block(q, cl, NULL);
 	if (!block)
 		return;
-	list_for_each_entry(chain, &block->chain_list, list) {
+	for (chain = tcf_get_next_chain(block, NULL);
+	     chain;
+	     chain = tcf_get_next_chain(block, chain)) {
 		struct tcf_proto *tp;
 
-		for (tp = rtnl_dereference(chain->filter_chain);
-		     tp; tp = rtnl_dereference(tp->next)) {
+		for (tp = tcf_get_next_proto(chain, NULL, true);
+		     tp; tp = tcf_get_next_proto(chain, tp, true)) {
 			struct tcf_bind_args arg = {};
 
 			arg.w.fn = tcf_node_bind;
 			arg.classid = clid;
 			arg.cl = new_cl;
-			tp->ops->walk(tp, &arg.w);
+			tp->ops->walk(tp, &arg.w, true);
 		}
 	}
 }
@@ -1878,8 +1971,8 @@
 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
-	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
-			  extack);
+	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
+				     rtm_tca_policy, extack);
 	if (err < 0)
 		return err;
 

diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index cd49afc..f4f9b8c 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* net/sched/sch_atm.c - ATM VC selection "queueing discipline" */
 
 /* Written 1998-2000 by Werner Almesberger, EPFL ICA */
@@ -150,7 +151,7 @@
 	pr_debug("atm_tc_put: destroying\n");
 	list_del_init(&flow->list);
 	pr_debug("atm_tc_put: qdisc %p\n", flow->q);
-	qdisc_destroy(flow->q);
+	qdisc_put(flow->q);
 	tcf_block_put(flow->block);
 	if (flow->sock) {
 		pr_debug("atm_tc_put: f_count %ld\n",
@@ -223,7 +224,8 @@
 	if (opt == NULL)
 		return -EINVAL;
 
-	error = nla_parse_nested(tb, TCA_ATM_MAX, opt, atm_policy, NULL);
+	error = nla_parse_nested_deprecated(tb, TCA_ATM_MAX, opt, atm_policy,
+					    NULL);
 	if (error < 0)
 		return error;
 
@@ -609,7 +611,7 @@
 	tcm->tcm_handle = flow->common.classid;
 	tcm->tcm_info = flow->q->handle;
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
 

diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c
index 9c4c2bb..a7f7667 100644
--- a/net/sched/sch_blackhole.c
+++ b/net/sched/sch_blackhole.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/sch_blackhole.c	Black hole queue
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Thomas Graf <tgraf@suug.ch>
  *
  * Note: Quantum tunneling is not supported.

diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 793016d..53a80bc 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c

@@ -138,8 +138,8 @@
 struct cake_host {
 	u32 srchost_tag;
 	u32 dsthost_tag;
-	u16 srchost_refcnt;
-	u16 dsthost_refcnt;
+	u16 srchost_bulk_flow_count;
+	u16 dsthost_bulk_flow_count;
 };
 
 struct cake_heap_entry {
@@ -211,6 +211,9 @@
 	u8		ack_filter;
 	u8		atm_mode;
 
+	u32		fwmark_mask;
+	u16		fwmark_shft;
+
 	/* time_next = time_this + ((len * rate_ns) >> rate_shft) */
 	u16		rate_shft;
 	ktime_t		time_next_packet;
@@ -746,8 +749,10 @@
 		 * queue, accept the collision, update the host tags.
 		 */
 		q->way_collisions++;
-		q->hosts[q->flows[reduced_hash].srchost].srchost_refcnt--;
-		q->hosts[q->flows[reduced_hash].dsthost].dsthost_refcnt--;
+		if (q->flows[outer_hash + k].set == CAKE_SET_BULK) {
+			q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--;
+			q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--;
+		}
 		allocate_src = cake_dsrc(flow_mode);
 		allocate_dst = cake_ddst(flow_mode);
 found:
@@ -767,13 +772,14 @@
 			}
 			for (i = 0; i < CAKE_SET_WAYS;
 				i++, k = (k + 1) % CAKE_SET_WAYS) {
-				if (!q->hosts[outer_hash + k].srchost_refcnt)
+				if (!q->hosts[outer_hash + k].srchost_bulk_flow_count)
 					break;
 			}
 			q->hosts[outer_hash + k].srchost_tag = srchost_hash;
 found_src:
 			srchost_idx = outer_hash + k;
-			q->hosts[srchost_idx].srchost_refcnt++;
+			if (q->flows[reduced_hash].set == CAKE_SET_BULK)
+				q->hosts[srchost_idx].srchost_bulk_flow_count++;
 			q->flows[reduced_hash].srchost = srchost_idx;
 		}
 
@@ -789,13 +795,14 @@
 			}
 			for (i = 0; i < CAKE_SET_WAYS;
 			     i++, k = (k + 1) % CAKE_SET_WAYS) {
-				if (!q->hosts[outer_hash + k].dsthost_refcnt)
+				if (!q->hosts[outer_hash + k].dsthost_bulk_flow_count)
 					break;
 			}
 			q->hosts[outer_hash + k].dsthost_tag = dsthost_hash;
 found_dst:
 			dsthost_idx = outer_hash + k;
-			q->hosts[dsthost_idx].dsthost_refcnt++;
+			if (q->flows[reduced_hash].set == CAKE_SET_BULK)
+				q->hosts[dsthost_idx].dsthost_bulk_flow_count++;
 			q->flows[reduced_hash].dsthost = dsthost_idx;
 		}
 	}
@@ -812,7 +819,7 @@
 
 	if (skb) {
 		flow->head = skb->next;
-		skb->next = NULL;
+		skb_mark_not_on_list(skb);
 	}
 
 	return skb;
@@ -1252,7 +1259,7 @@
 	else
 		flow->head = elig_ack->next;
 
-	elig_ack->next = NULL;
+	skb_mark_not_on_list(elig_ack);
 
 	return elig_ack;
 }
@@ -1508,32 +1515,29 @@
 	return idx + (tin << 16);
 }
 
-static void cake_wash_diffserv(struct sk_buff *skb)
-{
-	switch (skb->protocol) {
-	case htons(ETH_P_IP):
-		ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
-		break;
-	case htons(ETH_P_IPV6):
-		ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
-		break;
-	default:
-		break;
-	}
-}
-
 static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
 {
+	int wlen = skb_network_offset(skb);
 	u8 dscp;
 
-	switch (skb->protocol) {
+	switch (tc_skb_protocol(skb)) {
 	case htons(ETH_P_IP):
+		wlen += sizeof(struct iphdr);
+		if (!pskb_may_pull(skb, wlen) ||
+		    skb_try_make_writable(skb, wlen))
+			return 0;
+
 		dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
 		if (wash && dscp)
 			ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
 		return dscp;
 
 	case htons(ETH_P_IPV6):
+		wlen += sizeof(struct ipv6hdr);
+		if (!pskb_may_pull(skb, wlen) ||
+		    skb_try_make_writable(skb, wlen))
+			return 0;
+
 		dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
 		if (wash && dscp)
 			ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
@@ -1552,26 +1556,32 @@
 					     struct sk_buff *skb)
 {
 	struct cake_sched_data *q = qdisc_priv(sch);
-	u32 tin;
+	u32 tin, mark;
+	u8 dscp;
 
-	if (TC_H_MAJ(skb->priority) == sch->handle &&
-	    TC_H_MIN(skb->priority) > 0 &&
-	    TC_H_MIN(skb->priority) <= q->tin_cnt) {
+	/* Tin selection: Default to diffserv-based selection, allow overriding
+	 * using firewall marks or skb->priority.
+	 */
+	dscp = cake_handle_diffserv(skb,
+				    q->rate_flags & CAKE_FLAG_WASH);
+	mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft;
+
+	if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT)
+		tin = 0;
+
+	else if (mark && mark <= q->tin_cnt)
+		tin = q->tin_order[mark - 1];
+
+	else if (TC_H_MAJ(skb->priority) == sch->handle &&
+		 TC_H_MIN(skb->priority) > 0 &&
+		 TC_H_MIN(skb->priority) <= q->tin_cnt)
 		tin = q->tin_order[TC_H_MIN(skb->priority) - 1];
 
-		if (q->rate_flags & CAKE_FLAG_WASH)
-			cake_wash_diffserv(skb);
-	} else if (q->tin_mode != CAKE_DIFFSERV_BESTEFFORT) {
-		/* extract the Diffserv Precedence field, if it exists */
-		/* and clear DSCP bits if washing */
-		tin = q->tin_index[cake_handle_diffserv(skb,
-				q->rate_flags & CAKE_FLAG_WASH)];
+	else {
+		tin = q->tin_index[dscp];
+
 		if (unlikely(tin >= q->tin_cnt))
 			tin = 0;
-	} else {
-		tin = 0;
-		if (q->rate_flags & CAKE_FLAG_WASH)
-			cake_wash_diffserv(skb);
 	}
 
 	return &q->tins[tin];
@@ -1667,7 +1677,7 @@
 	if (skb_is_gso(skb) && q->rate_flags & CAKE_FLAG_SPLIT_GSO) {
 		struct sk_buff *segs, *nskb;
 		netdev_features_t features = netif_skb_features(skb);
-		unsigned int slen = 0;
+		unsigned int slen = 0, numsegs = 0;
 
 		segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
 		if (IS_ERR_OR_NULL(segs))
@@ -1675,7 +1685,7 @@
 
 		while (segs) {
 			nskb = segs->next;
-			segs->next = NULL;
+			skb_mark_not_on_list(segs);
 			qdisc_skb_cb(segs)->pkt_len = segs->len;
 			cobalt_set_enqueue_time(segs, now);
 			get_cobalt_cb(segs)->adjusted_len = cake_overhead(q,
@@ -1683,6 +1693,7 @@
 			flow_queue_add(flow, segs);
 
 			sch->q.qlen++;
+			numsegs++;
 			slen += segs->len;
 			q->buffer_used += segs->truesize;
 			b->packets++;
@@ -1696,7 +1707,7 @@
 		sch->qstats.backlog += slen;
 		q->avg_window_bytes += slen;
 
-		qdisc_tree_reduce_backlog(sch, 1, len);
+		qdisc_tree_reduce_backlog(sch, 1-numsegs, len-slen);
 		consume_skb(skb);
 	} else {
 		/* not splitting */
@@ -1793,20 +1804,30 @@
 		b->sparse_flow_count++;
 
 		if (cake_dsrc(q->flow_mode))
-			host_load = max(host_load, srchost->srchost_refcnt);
+			host_load = max(host_load, srchost->srchost_bulk_flow_count);
 
 		if (cake_ddst(q->flow_mode))
-			host_load = max(host_load, dsthost->dsthost_refcnt);
+			host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
 
 		flow->deficit = (b->flow_quantum *
 				 quantum_div[host_load]) >> 16;
 	} else if (flow->set == CAKE_SET_SPARSE_WAIT) {
+		struct cake_host *srchost = &b->hosts[flow->srchost];
+		struct cake_host *dsthost = &b->hosts[flow->dsthost];
+
 		/* this flow was empty, accounted as a sparse flow, but actually
 		 * in the bulk rotation.
 		 */
 		flow->set = CAKE_SET_BULK;
 		b->sparse_flow_count--;
 		b->bulk_flow_count++;
+
+		if (cake_dsrc(q->flow_mode))
+			srchost->srchost_bulk_flow_count++;
+
+		if (cake_ddst(q->flow_mode))
+			dsthost->dsthost_bulk_flow_count++;
+
 	}
 
 	if (q->buffer_used > q->buffer_max_used)
@@ -1974,23 +1995,8 @@
 	dsthost = &b->hosts[flow->dsthost];
 	host_load = 1;
 
-	if (cake_dsrc(q->flow_mode))
-		host_load = max(host_load, srchost->srchost_refcnt);
-
-	if (cake_ddst(q->flow_mode))
-		host_load = max(host_load, dsthost->dsthost_refcnt);
-
-	WARN_ON(host_load > CAKE_QUEUES);
-
 	/* flow isolation (DRR++) */
 	if (flow->deficit <= 0) {
-		/* The shifted prandom_u32() is a way to apply dithering to
-		 * avoid accumulating roundoff errors
-		 */
-		flow->deficit += (b->flow_quantum * quantum_div[host_load] +
-				  (prandom_u32() >> 16)) >> 16;
-		list_move_tail(&flow->flowchain, &b->old_flows);
-
 		/* Keep all flows with deficits out of the sparse and decaying
 		 * rotations.  No non-empty flow can go into the decaying
 		 * rotation, so they can't get deficits
@@ -1999,6 +2005,13 @@
 			if (flow->head) {
 				b->sparse_flow_count--;
 				b->bulk_flow_count++;
+
+				if (cake_dsrc(q->flow_mode))
+					srchost->srchost_bulk_flow_count++;
+
+				if (cake_ddst(q->flow_mode))
+					dsthost->dsthost_bulk_flow_count++;
+
 				flow->set = CAKE_SET_BULK;
 			} else {
 				/* we've moved it to the bulk rotation for
@@ -2008,6 +2021,22 @@
 				flow->set = CAKE_SET_SPARSE_WAIT;
 			}
 		}
+
+		if (cake_dsrc(q->flow_mode))
+			host_load = max(host_load, srchost->srchost_bulk_flow_count);
+
+		if (cake_ddst(q->flow_mode))
+			host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
+
+		WARN_ON(host_load > CAKE_QUEUES);
+
+		/* The shifted prandom_u32() is a way to apply dithering to
+		 * avoid accumulating roundoff errors
+		 */
+		flow->deficit += (b->flow_quantum * quantum_div[host_load] +
+				  (prandom_u32() >> 16)) >> 16;
+		list_move_tail(&flow->flowchain, &b->old_flows);
+
 		goto retry;
 	}
 
@@ -2028,6 +2057,13 @@
 					       &b->decaying_flows);
 				if (flow->set == CAKE_SET_BULK) {
 					b->bulk_flow_count--;
+
+					if (cake_dsrc(q->flow_mode))
+						srchost->srchost_bulk_flow_count--;
+
+					if (cake_ddst(q->flow_mode))
+						dsthost->dsthost_bulk_flow_count--;
+
 					b->decaying_flow_count++;
 				} else if (flow->set == CAKE_SET_SPARSE ||
 					   flow->set == CAKE_SET_SPARSE_WAIT) {
@@ -2041,14 +2077,19 @@
 				if (flow->set == CAKE_SET_SPARSE ||
 				    flow->set == CAKE_SET_SPARSE_WAIT)
 					b->sparse_flow_count--;
-				else if (flow->set == CAKE_SET_BULK)
+				else if (flow->set == CAKE_SET_BULK) {
 					b->bulk_flow_count--;
-				else
+
+					if (cake_dsrc(q->flow_mode))
+						srchost->srchost_bulk_flow_count--;
+
+					if (cake_ddst(q->flow_mode))
+						dsthost->dsthost_bulk_flow_count--;
+
+				} else
 					b->decaying_flow_count--;
 
 				flow->set = CAKE_SET_NONE;
-				srchost->srchost_refcnt--;
-				dsthost->dsthost_refcnt--;
 			}
 			goto begin;
 		}
@@ -2143,6 +2184,7 @@
 	[TCA_CAKE_MPU]		 = { .type = NLA_U32 },
 	[TCA_CAKE_INGRESS]	 = { .type = NLA_U32 },
 	[TCA_CAKE_ACK_FILTER]	 = { .type = NLA_U32 },
+	[TCA_CAKE_FWMARK]	 = { .type = NLA_U32 },
 };
 
 static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
@@ -2489,7 +2531,8 @@
 	if (!opt)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_CAKE_MAX, opt, cake_policy, extack);
+	err = nla_parse_nested_deprecated(tb, TCA_CAKE_MAX, opt, cake_policy,
+					  extack);
 	if (err < 0)
 		return err;
 
@@ -2589,6 +2632,11 @@
 			q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
 	}
 
+	if (tb[TCA_CAKE_FWMARK]) {
+		q->fwmark_mask = nla_get_u32(tb[TCA_CAKE_FWMARK]);
+		q->fwmark_shft = q->fwmark_mask ? __ffs(q->fwmark_mask) : 0;
+	}
+
 	if (q->tins) {
 		sch_tree_lock(sch);
 		cake_reconfigure(sch);
@@ -2688,7 +2736,7 @@
 	struct cake_sched_data *q = qdisc_priv(sch);
 	struct nlattr *opts;
 
-	opts = nla_nest_start(skb, TCA_OPTIONS);
+	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (!opts)
 		goto nla_put_failure;
 
@@ -2748,6 +2796,9 @@
 			!!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
 		goto nla_put_failure;
 
+	if (nla_put_u32(skb, TCA_CAKE_FWMARK, q->fwmark_mask))
+		goto nla_put_failure;
+
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
@@ -2756,7 +2807,7 @@
 
 static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 {
-	struct nlattr *stats = nla_nest_start(d->skb, TCA_STATS_APP);
+	struct nlattr *stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP);
 	struct cake_sched_data *q = qdisc_priv(sch);
 	struct nlattr *tstats, *ts;
 	int i;
@@ -2786,7 +2837,7 @@
 #undef PUT_STAT_U32
 #undef PUT_STAT_U64
 
-	tstats = nla_nest_start(d->skb, TCA_CAKE_STATS_TIN_STATS);
+	tstats = nla_nest_start_noflag(d->skb, TCA_CAKE_STATS_TIN_STATS);
 	if (!tstats)
 		goto nla_put_failure;
 
@@ -2803,7 +2854,7 @@
 	for (i = 0; i < q->tin_cnt; i++) {
 		struct cake_tin_data *b = &q->tins[q->tin_order[i]];
 
-		ts = nla_nest_start(d->skb, i + 1);
+		ts = nla_nest_start_noflag(d->skb, i + 1);
 		if (!ts)
 			goto nla_put_failure;
 
@@ -2923,7 +2974,7 @@
 	if (flow) {
 		ktime_t now = ktime_get();
 
-		stats = nla_nest_start(d->skb, TCA_STATS_APP);
+		stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP);
 		if (!stats)
 			return -1;
 

diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index f42025d..39b427d 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c

@@ -1,13 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/sch_cbq.c	Class-Based Queueing discipline.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- *
  */
 
 #include <linux/module.h>
@@ -1132,6 +1127,33 @@
 	[TCA_CBQ_POLICE]	= { .len = sizeof(struct tc_cbq_police) },
 };
 
+static int cbq_opt_parse(struct nlattr *tb[TCA_CBQ_MAX + 1],
+			 struct nlattr *opt,
+			 struct netlink_ext_ack *extack)
+{
+	int err;
+
+	if (!opt) {
+		NL_SET_ERR_MSG(extack, "CBQ options are required for this operation");
+		return -EINVAL;
+	}
+
+	err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt,
+					  cbq_policy, extack);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_CBQ_WRROPT]) {
+		const struct tc_cbq_wrropt *wrr = nla_data(tb[TCA_CBQ_WRROPT]);
+
+		if (wrr->priority > TC_CBQ_MAXPRIO) {
+			NL_SET_ERR_MSG(extack, "priority is bigger than TC_CBQ_MAXPRIO");
+			err = -EINVAL;
+		}
+	}
+	return err;
+}
+
 static int cbq_init(struct Qdisc *sch, struct nlattr *opt,
 		    struct netlink_ext_ack *extack)
 {
@@ -1144,12 +1166,7 @@
 	hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
 	q->delay_timer.function = cbq_undelay;
 
-	if (!opt) {
-		NL_SET_ERR_MSG(extack, "CBQ options are required for this operation");
-		return -EINVAL;
-	}
-
-	err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, extack);
+	err = cbq_opt_parse(tb, opt, extack);
 	if (err < 0)
 		return err;
 
@@ -1305,7 +1322,7 @@
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct nlattr *nest;
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
 	if (cbq_dump_attr(skb, &q->link) < 0)
@@ -1340,7 +1357,7 @@
 	tcm->tcm_handle = cl->common.classid;
 	tcm->tcm_info = cl->q->handle;
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
 	if (cbq_dump_attr(skb, cl) < 0)
@@ -1358,9 +1375,11 @@
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl = (struct cbq_class *)arg;
+	__u32 qlen;
 
 	cl->xstats.avgidle = cl->avgidle;
 	cl->xstats.undertime = 0;
+	qdisc_qstats_qlen_backlog(cl->q, &qlen, &cl->qstats.backlog);
 
 	if (cl->undertime != PSCHED_PASTPERFECT)
 		cl->xstats.undertime = cl->undertime - q->now;
@@ -1368,7 +1387,7 @@
 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
 				  d, NULL, &cl->bstats) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
-	    gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0)
+	    gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
 		return -1;
 
 	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
@@ -1418,7 +1437,7 @@
 	WARN_ON(cl->filters);
 
 	tcf_block_put(cl->block);
-	qdisc_destroy(cl->q);
+	qdisc_put(cl->q);
 	qdisc_put_rtab(cl->R_tab);
 	gen_kill_estimator(&cl->rate_est);
 	if (cl != &q->link)
@@ -1466,12 +1485,7 @@
 	struct cbq_class *parent;
 	struct qdisc_rate_table *rtab = NULL;
 
-	if (!opt) {
-		NL_SET_ERR_MSG(extack, "Mandatory qdisc options missing");
-		return -EINVAL;
-	}
-
-	err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, extack);
+	err = cbq_opt_parse(tb, opt, extack);
 	if (err < 0)
 		return err;
 
@@ -1665,17 +1679,13 @@
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl = (struct cbq_class *)arg;
-	unsigned int qlen, backlog;
 
 	if (cl->filters || cl->children || cl == &q->link)
 		return -EBUSY;
 
 	sch_tree_lock(sch);
 
-	qlen = cl->q->q.qlen;
-	backlog = cl->q->qstats.backlog;
-	qdisc_reset(cl->q);
-	qdisc_tree_reduce_backlog(cl->q, qlen, backlog);
+	qdisc_purge_queue(cl->q);
 
 	if (cl->next_alive)
 		cbq_deactivate_class(cl);

diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index e26a240..b2905b0 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c

@@ -1,13 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/sch_cbs.c	Credit Based Shaper
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Vinicius Costa Gomes <vinicius.gomes@intel.com>
- *
  */
 
 /* Credit Based Shaper (CBS)
@@ -61,16 +56,20 @@
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/skbuff.h>
+#include <net/netevent.h>
 #include <net/netlink.h>
 #include <net/sch_generic.h>
 #include <net/pkt_sched.h>
 
+static LIST_HEAD(cbs_list);
+static DEFINE_SPINLOCK(cbs_list_lock);
+
 #define BYTES_PER_KBIT (1000LL / 8)
 
 struct cbs_sched_data {
 	bool offload;
 	int queue;
-	s64 port_rate; /* in bytes/s */
+	atomic64_t port_rate; /* in bytes/s */
 	s64 last; /* timestamp in ns */
 	s64 credits; /* in bytes */
 	s32 locredit; /* in bytes */
@@ -82,19 +81,21 @@
 		       struct sk_buff **to_free);
 	struct sk_buff *(*dequeue)(struct Qdisc *sch);
 	struct Qdisc *qdisc;
+	struct list_head cbs_list;
 };
 
 static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			     struct Qdisc *child,
 			     struct sk_buff **to_free)
 {
+	unsigned int len = qdisc_pkt_len(skb);
 	int err;
 
 	err = child->ops->enqueue(skb, child, to_free);
 	if (err != NET_XMIT_SUCCESS)
 		return err;
 
-	qdisc_qstats_backlog_inc(sch, skb);
+	sch->qstats.backlog += len;
 	sch->q.qlen++;
 
 	return NET_XMIT_SUCCESS;
@@ -206,7 +207,8 @@
 	/* As sendslope is a negative number, this will decrease the
 	 * amount of q->credits.
 	 */
-	credits = credits_from_len(len, q->sendslope, q->port_rate);
+	credits = credits_from_len(len, q->sendslope,
+				   atomic64_read(&q->port_rate));
 	credits += q->credits;
 
 	q->credits = max_t(s64, credits, q->locredit);
@@ -293,6 +295,58 @@
 	return 0;
 }
 
+static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q)
+{
+	struct ethtool_link_ksettings ecmd;
+	int speed = SPEED_10;
+	int port_rate;
+	int err;
+
+	err = __ethtool_get_link_ksettings(dev, &ecmd);
+	if (err < 0)
+		goto skip;
+
+	if (ecmd.base.speed && ecmd.base.speed != SPEED_UNKNOWN)
+		speed = ecmd.base.speed;
+
+skip:
+	port_rate = speed * 1000 * BYTES_PER_KBIT;
+
+	atomic64_set(&q->port_rate, port_rate);
+	netdev_dbg(dev, "cbs: set %s's port_rate to: %lld, linkspeed: %d\n",
+		   dev->name, (long long)atomic64_read(&q->port_rate),
+		   ecmd.base.speed);
+}
+
+static int cbs_dev_notifier(struct notifier_block *nb, unsigned long event,
+			    void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct cbs_sched_data *q;
+	struct net_device *qdev;
+	bool found = false;
+
+	ASSERT_RTNL();
+
+	if (event != NETDEV_UP && event != NETDEV_CHANGE)
+		return NOTIFY_DONE;
+
+	spin_lock(&cbs_list_lock);
+	list_for_each_entry(q, &cbs_list, cbs_list) {
+		qdev = qdisc_dev(q->qdisc);
+		if (qdev == dev) {
+			found = true;
+			break;
+		}
+	}
+	spin_unlock(&cbs_list_lock);
+
+	if (found)
+		cbs_set_port_rate(dev, q);
+
+	return NOTIFY_DONE;
+}
+
 static int cbs_change(struct Qdisc *sch, struct nlattr *opt,
 		      struct netlink_ext_ack *extack)
 {
@@ -302,7 +356,8 @@
 	struct tc_cbs_qopt *qopt;
 	int err;
 
-	err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, extack);
+	err = nla_parse_nested_deprecated(tb, TCA_CBS_MAX, opt, cbs_policy,
+					  extack);
 	if (err < 0)
 		return err;
 
@@ -314,16 +369,7 @@
 	qopt = nla_data(tb[TCA_CBS_PARMS]);
 
 	if (!qopt->offload) {
-		struct ethtool_link_ksettings ecmd;
-		s64 link_speed;
-
-		if (!__ethtool_get_link_ksettings(dev, &ecmd))
-			link_speed = ecmd.base.speed;
-		else
-			link_speed = SPEED_1000;
-
-		q->port_rate = link_speed * 1000 * BYTES_PER_KBIT;
-
+		cbs_set_port_rate(dev, q);
 		cbs_disable_offload(dev, q);
 	} else {
 		err = cbs_enable_offload(dev, q, qopt, extack);
@@ -357,6 +403,10 @@
 	if (!q->qdisc)
 		return -ENOMEM;
 
+	spin_lock(&cbs_list_lock);
+	list_add(&q->cbs_list, &cbs_list);
+	spin_unlock(&cbs_list_lock);
+
 	qdisc_hash_add(q->qdisc, false);
 
 	q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
@@ -374,12 +424,18 @@
 	struct cbs_sched_data *q = qdisc_priv(sch);
 	struct net_device *dev = qdisc_dev(sch);
 
-	qdisc_watchdog_cancel(&q->watchdog);
+	/* Nothing to do if we couldn't create the underlying qdisc */
+	if (!q->qdisc)
+		return;
 
+	qdisc_watchdog_cancel(&q->watchdog);
 	cbs_disable_offload(dev, q);
 
-	if (q->qdisc)
-		qdisc_destroy(q->qdisc);
+	spin_lock(&cbs_list_lock);
+	list_del(&q->cbs_list);
+	spin_unlock(&cbs_list_lock);
+
+	qdisc_put(q->qdisc);
 }
 
 static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -388,7 +444,7 @@
 	struct tc_cbs_qopt opt = { };
 	struct nlattr *nest;
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (!nest)
 		goto nla_put_failure;
 
@@ -486,14 +542,29 @@
 	.owner		=	THIS_MODULE,
 };
 
+static struct notifier_block cbs_device_notifier = {
+	.notifier_call = cbs_dev_notifier,
+};
+
 static int __init cbs_module_init(void)
 {
-	return register_qdisc(&cbs_qdisc_ops);
+	int err;
+
+	err = register_netdevice_notifier(&cbs_device_notifier);
+	if (err)
+		return err;
+
+	err = register_qdisc(&cbs_qdisc_ops);
+	if (err)
+		unregister_netdevice_notifier(&cbs_device_notifier);
+
+	return err;
 }
 
 static void __exit cbs_module_exit(void)
 {
 	unregister_qdisc(&cbs_qdisc_ops);
+	unregister_netdevice_notifier(&cbs_device_notifier);
 }
 module_init(cbs_module_init)
 module_exit(cbs_module_exit)

diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index eafc0d1..dba7037 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * net/sched/sch_choke.c	CHOKE scheduler
  *
  * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com>
  * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
  */
 
 #include <linux/module.h>
@@ -358,7 +354,8 @@
 	if (opt == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_CHOKE_MAX, opt,
+					  choke_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -452,7 +449,7 @@
 		.Scell_log	= q->parms.Scell_log,
 	};
 
-	opts = nla_nest_start(skb, TCA_OPTIONS);
+	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (opts == NULL)
 		goto nla_put_failure;
 

diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 17cd81f..30169b3 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c

@@ -71,10 +71,10 @@
 	struct Qdisc *sch = ctx;
 	struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
 
-	if (skb)
+	if (skb) {
 		sch->qstats.backlog -= qdisc_pkt_len(skb);
-
-	prefetch(&skb->end); /* we'll need skb_shinfo() */
+		prefetch(&skb->end); /* we'll need skb_shinfo() */
+	}
 	return skb;
 }
 
@@ -141,7 +141,8 @@
 	if (!opt)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_CODEL_MAX, opt, codel_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_CODEL_MAX, opt,
+					  codel_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -217,7 +218,7 @@
 	struct codel_sched_data *q = qdisc_priv(sch);
 	struct nlattr *opts;
 
-	opts = nla_nest_start(skb, TCA_OPTIONS);
+	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (opts == NULL)
 		goto nla_put_failure;
 

diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index e0b0cf8..07a2b0b 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * net/sched/sch_drr.c         Deficit Round Robin scheduler
  *
  * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -50,15 +47,6 @@
 	return container_of(clc, struct drr_class, common);
 }
 
-static void drr_purge_queue(struct drr_class *cl)
-{
-	unsigned int len = cl->qdisc->q.qlen;
-	unsigned int backlog = cl->qdisc->qstats.backlog;
-
-	qdisc_reset(cl->qdisc);
-	qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
-}
-
 static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = {
 	[TCA_DRR_QUANTUM]	= { .type = NLA_U32 },
 };
@@ -79,7 +67,8 @@
 		return -EINVAL;
 	}
 
-	err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy, extack);
+	err = nla_parse_nested_deprecated(tb, TCA_DRR_MAX, opt, drr_policy,
+					  extack);
 	if (err < 0)
 		return err;
 
@@ -134,7 +123,7 @@
 					    tca[TCA_RATE]);
 		if (err) {
 			NL_SET_ERR_MSG(extack, "Failed to replace estimator");
-			qdisc_destroy(cl->qdisc);
+			qdisc_put(cl->qdisc);
 			kfree(cl);
 			return err;
 		}
@@ -153,7 +142,7 @@
 static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl)
 {
 	gen_kill_estimator(&cl->rate_est);
-	qdisc_destroy(cl->qdisc);
+	qdisc_put(cl->qdisc);
 	kfree(cl);
 }
 
@@ -167,7 +156,7 @@
 
 	sch_tree_lock(sch);
 
-	drr_purge_queue(cl);
+	qdisc_purge_queue(cl->qdisc);
 	qdisc_class_hash_remove(&q->clhash, &cl->common);
 
 	sch_tree_unlock(sch);
@@ -253,7 +242,7 @@
 	tcm->tcm_handle	= cl->common.classid;
 	tcm->tcm_info	= cl->qdisc->handle;
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
 	if (nla_put_u32(skb, TCA_DRR_QUANTUM, cl->quantum))
@@ -269,7 +258,8 @@
 				struct gnet_dump *d)
 {
 	struct drr_class *cl = (struct drr_class *)arg;
-	__u32 qlen = cl->qdisc->q.qlen;
+	__u32 qlen = qdisc_qlen_sum(cl->qdisc);
+	struct Qdisc *cl_q = cl->qdisc;
 	struct tc_drr_stats xstats;
 
 	memset(&xstats, 0, sizeof(xstats));
@@ -279,7 +269,7 @@
 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
 				  d, NULL, &cl->bstats) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
-	    gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, qlen) < 0)
+	    gnet_stats_copy_queue(d, cl_q->cpu_qstats, &cl_q->qstats, qlen) < 0)
 		return -1;
 
 	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
@@ -350,9 +340,11 @@
 static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		       struct sk_buff **to_free)
 {
+	unsigned int len = qdisc_pkt_len(skb);
 	struct drr_sched *q = qdisc_priv(sch);
 	struct drr_class *cl;
 	int err = 0;
+	bool first;
 
 	cl = drr_classify(skb, sch, &err);
 	if (cl == NULL) {
@@ -362,6 +354,7 @@
 		return err;
 	}
 
+	first = !cl->qdisc->q.qlen;
 	err = qdisc_enqueue(skb, cl->qdisc, to_free);
 	if (unlikely(err != NET_XMIT_SUCCESS)) {
 		if (net_xmit_drop_count(err)) {
@@ -371,12 +364,12 @@
 		return err;
 	}
 
-	if (cl->qdisc->q.qlen == 1) {
+	if (first) {
 		list_add_tail(&cl->alist, &q->active);
 		cl->deficit = cl->quantum;
 	}
 
-	qdisc_qstats_backlog_inc(sch, skb);
+	sch->qstats.backlog += len;
 	sch->q.qlen++;
 	return err;
 }

diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 049714c..05605b3 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* net/sched/sch_dsmark.c - Differentiated Services field marker */
 
 /* Written 1998-2000 by Werner Almesberger, EPFL ICA */
@@ -132,7 +133,8 @@
 	if (!opt)
 		goto errout;
 
-	err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt,
+					  dsmark_policy, NULL);
 	if (err < 0)
 		goto errout;
 
@@ -199,6 +201,7 @@
 static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			  struct sk_buff **to_free)
 {
+	unsigned int len = qdisc_pkt_len(skb);
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 	int err;
 
@@ -271,7 +274,7 @@
 		return err;
 	}
 
-	qdisc_qstats_backlog_inc(sch, skb);
+	sch->qstats.backlog += len;
 	sch->q.qlen++;
 
 	return NET_XMIT_SUCCESS;
@@ -352,11 +355,14 @@
 	if (err)
 		return err;
 
-	err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt,
+					  dsmark_policy, NULL);
 	if (err < 0)
 		goto errout;
 
 	err = -EINVAL;
+	if (!tb[TCA_DSMARK_INDICES])
+		goto errout;
 	indices = nla_get_u16(tb[TCA_DSMARK_INDICES]);
 
 	if (hweight32(indices) != 1)
@@ -412,7 +418,7 @@
 	pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
 
 	tcf_block_put(p->block);
-	qdisc_destroy(p->q);
+	qdisc_put(p->q);
 	if (p->mv != p->embedded)
 		kfree(p->mv);
 }
@@ -431,7 +437,7 @@
 	tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1);
 	tcm->tcm_info = p->q->handle;
 
-	opts = nla_nest_start(skb, TCA_OPTIONS);
+	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (opts == NULL)
 		goto nla_put_failure;
 	if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mv[cl - 1].mask) ||
@@ -450,7 +456,7 @@
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 	struct nlattr *opts = NULL;
 
-	opts = nla_nest_start(skb, TCA_OPTIONS);
+	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (opts == NULL)
 		goto nla_put_failure;
 	if (nla_put_u16(skb, TCA_DSMARK_INDICES, p->indices))

diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index 1538d6f..b1da558 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c

@@ -22,15 +22,17 @@
 
 #define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
 #define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
+#define SKIP_SOCK_CHECK_IS_SET(x) ((x)->flags & TC_ETF_SKIP_SOCK_CHECK)
 
 struct etf_sched_data {
 	bool offload;
 	bool deadline_mode;
+	bool skip_sock_check;
 	int clockid;
 	int queue;
 	s32 delta; /* in ns */
 	ktime_t last; /* The txtime of the last skb sent to the netdevice. */
-	struct rb_root head;
+	struct rb_root_cached head;
 	struct qdisc_watchdog watchdog;
 	ktime_t (*get_time)(void);
 };
@@ -77,6 +79,9 @@
 	struct sock *sk = nskb->sk;
 	ktime_t now;
 
+	if (q->skip_sock_check)
+		goto skip;
+
 	if (!sk)
 		return false;
 
@@ -92,6 +97,7 @@
 	if (sk->sk_txtime_deadline_mode != q->deadline_mode)
 		return false;
 
+skip:
 	now = q->get_time();
 	if (ktime_before(txtime, now) || ktime_before(txtime, q->last))
 		return false;
@@ -104,7 +110,7 @@
 	struct etf_sched_data *q = qdisc_priv(sch);
 	struct rb_node *p;
 
-	p = rb_first(&q->head);
+	p = rb_first_cached(&q->head);
 	if (!p)
 		return NULL;
 
@@ -117,8 +123,10 @@
 	struct sk_buff *skb = etf_peek_timesortedlist(sch);
 	ktime_t next;
 
-	if (!skb)
+	if (!skb) {
+		qdisc_watchdog_cancel(&q->watchdog);
 		return;
+	}
 
 	next = ktime_sub_ns(skb->tstamp, q->delta);
 	qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next));
@@ -154,8 +162,9 @@
 				      struct sk_buff **to_free)
 {
 	struct etf_sched_data *q = qdisc_priv(sch);
-	struct rb_node **p = &q->head.rb_node, *parent = NULL;
+	struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL;
 	ktime_t txtime = nskb->tstamp;
+	bool leftmost = true;
 
 	if (!is_packet_valid(sch, nskb)) {
 		report_sock_error(nskb, EINVAL,
@@ -168,13 +177,15 @@
 
 		parent = *p;
 		skb = rb_to_skb(parent);
-		if (ktime_after(txtime, skb->tstamp))
+		if (ktime_compare(txtime, skb->tstamp) >= 0) {
 			p = &parent->rb_right;
-		else
+			leftmost = false;
+		} else {
 			p = &parent->rb_left;
+		}
 	}
 	rb_link_node(&nskb->rbnode, parent, p);
-	rb_insert_color(&nskb->rbnode, &q->head);
+	rb_insert_color_cached(&nskb->rbnode, &q->head, leftmost);
 
 	qdisc_qstats_backlog_inc(sch, nskb);
 	sch->q.qlen++;
@@ -185,12 +196,42 @@
 	return NET_XMIT_SUCCESS;
 }
 
-static void timesortedlist_erase(struct Qdisc *sch, struct sk_buff *skb,
-				 bool drop)
+static void timesortedlist_drop(struct Qdisc *sch, struct sk_buff *skb,
+				ktime_t now)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *to_free = NULL;
+	struct sk_buff *tmp = NULL;
+
+	skb_rbtree_walk_from_safe(skb, tmp) {
+		if (ktime_after(skb->tstamp, now))
+			break;
+
+		rb_erase_cached(&skb->rbnode, &q->head);
+
+		/* The rbnode field in the skb re-uses these fields, now that
+		 * we are done with the rbnode, reset them.
+		 */
+		skb->next = NULL;
+		skb->prev = NULL;
+		skb->dev = qdisc_dev(sch);
+
+		report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED);
+
+		qdisc_qstats_backlog_dec(sch, skb);
+		qdisc_drop(skb, sch, &to_free);
+		qdisc_qstats_overlimit(sch);
+		sch->q.qlen--;
+	}
+
+	kfree_skb_list(to_free);
+}
+
+static void timesortedlist_remove(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct etf_sched_data *q = qdisc_priv(sch);
 
-	rb_erase(&skb->rbnode, &q->head);
+	rb_erase_cached(&skb->rbnode, &q->head);
 
 	/* The rbnode field in the skb re-uses these fields, now that
 	 * we are done with the rbnode, reset them.
@@ -201,19 +242,9 @@
 
 	qdisc_qstats_backlog_dec(sch, skb);
 
-	if (drop) {
-		struct sk_buff *to_free = NULL;
+	qdisc_bstats_update(sch, skb);
 
-		report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED);
-
-		qdisc_drop(skb, sch, &to_free);
-		kfree_skb_list(to_free);
-		qdisc_qstats_overlimit(sch);
-	} else {
-		qdisc_bstats_update(sch, skb);
-
-		q->last = skb->tstamp;
-	}
+	q->last = skb->tstamp;
 
 	sch->q.qlen--;
 }
@@ -232,7 +263,7 @@
 
 	/* Drop if packet has expired while in queue. */
 	if (ktime_before(skb->tstamp, now)) {
-		timesortedlist_erase(sch, skb, true);
+		timesortedlist_drop(sch, skb, now);
 		skb = NULL;
 		goto out;
 	}
@@ -241,7 +272,7 @@
 	 * txtime from deadline to (now + delta).
 	 */
 	if (q->deadline_mode) {
-		timesortedlist_erase(sch, skb, false);
+		timesortedlist_remove(sch, skb);
 		skb->tstamp = now;
 		goto out;
 	}
@@ -250,7 +281,7 @@
 
 	/* Dequeue only if now is within the [txtime - delta, txtime] range. */
 	if (ktime_after(now, next))
-		timesortedlist_erase(sch, skb, false);
+		timesortedlist_remove(sch, skb);
 	else
 		skb = NULL;
 
@@ -326,7 +357,8 @@
 		return -EINVAL;
 	}
 
-	err = nla_parse_nested(tb, TCA_ETF_MAX, opt, etf_policy, extack);
+	err = nla_parse_nested_deprecated(tb, TCA_ETF_MAX, opt, etf_policy,
+					  extack);
 	if (err < 0)
 		return err;
 
@@ -359,6 +391,7 @@
 	q->clockid = qopt->clockid;
 	q->offload = OFFLOAD_IS_ON(qopt);
 	q->deadline_mode = DEADLINE_MODE_IS_ON(qopt);
+	q->skip_sock_check = SKIP_SOCK_CHECK_IS_SET(qopt);
 
 	switch (q->clockid) {
 	case CLOCK_REALTIME:
@@ -386,14 +419,14 @@
 static void timesortedlist_clear(struct Qdisc *sch)
 {
 	struct etf_sched_data *q = qdisc_priv(sch);
-	struct rb_node *p = rb_first(&q->head);
+	struct rb_node *p = rb_first_cached(&q->head);
 
 	while (p) {
 		struct sk_buff *skb = rb_to_skb(p);
 
 		p = rb_next(p);
 
-		rb_erase(&skb->rbnode, &q->head);
+		rb_erase_cached(&skb->rbnode, &q->head);
 		rtnl_kfree_skbs(skb, skb);
 		sch->q.qlen--;
 	}
@@ -435,7 +468,7 @@
 	struct tc_etf_qopt opt = { };
 	struct nlattr *nest;
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (!nest)
 		goto nla_put_failure;
 
@@ -447,6 +480,9 @@
 	if (q->deadline_mode)
 		opt.flags |= TC_ETF_DEADLINE_MODE_ON;
 
+	if (q->skip_sock_check)
+		opt.flags |= TC_ETF_SKIP_SOCK_CHECK;
+
 	if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
 

diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 24893d3..37c8aa7 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/sch_fifo.c	The simplest FIFO queue.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  */
 
@@ -177,7 +173,7 @@
 	if (q) {
 		err = fifo_set_limit(q, limit);
 		if (err < 0) {
-			qdisc_destroy(q);
+			qdisc_put(q);
 			q = NULL;
 		}
 	}

diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 4808713..98dd87c 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing)
  *
  *  Copyright (C) 2013-2015 Eric Dumazet <edumazet@google.com>
  *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
  *  Meant to be mostly used for locally generated traffic :
  *  Fast classification depends on skb->sk being set before reaching us.
  *  If not, (router workload), we use rxhash as fallback, with 32 bits wide hash.
@@ -54,10 +50,23 @@
 #include <net/tcp_states.h>
 #include <net/tcp.h>
 
+struct fq_skb_cb {
+	u64	        time_to_send;
+};
+
+static inline struct fq_skb_cb *fq_skb_cb(struct sk_buff *skb)
+{
+	qdisc_cb_private_validate(skb, sizeof(struct fq_skb_cb));
+	return (struct fq_skb_cb *)qdisc_skb_cb(skb)->data;
+}
+
 /*
- * Per flow structure, dynamically allocated
+ * Per flow structure, dynamically allocated.
+ * If packets have monotically increasing time_to_send, they are placed in O(1)
+ * in linear list (head,tail), otherwise are placed in a rbtree (t_root).
  */
 struct fq_flow {
+	struct rb_root	t_root;
 	struct sk_buff	*head;		/* list of skbs for this flow : first skb */
 	union {
 		struct sk_buff *tail;	/* last skb in the list */
@@ -92,8 +101,9 @@
 	u32		quantum;
 	u32		initial_quantum;
 	u32		flow_refill_delay;
-	u32		flow_max_rate;	/* optional max rate per flow */
 	u32		flow_plimit;	/* max packets per flow */
+	unsigned long	flow_max_rate;	/* optional max rate per flow */
+	u64		ce_threshold;
 	u32		orphan_mask;	/* mask for orphaned skb */
 	u32		low_rate_threshold;
 	struct rb_root	*fq_root;
@@ -106,8 +116,8 @@
 
 	u64		stat_gc_flows;
 	u64		stat_internal_packets;
-	u64		stat_tcp_retrans;
 	u64		stat_throttled;
+	u64		stat_ce_mark;
 	u64		stat_flows_plimit;
 	u64		stat_pkts_too_long;
 	u64		stat_allocation_errors;
@@ -256,6 +266,17 @@
 		 */
 		sk = (struct sock *)((hash << 1) | 1UL);
 		skb_orphan(skb);
+	} else if (sk->sk_state == TCP_CLOSE) {
+		unsigned long hash = skb_get_hash(skb) & q->orphan_mask;
+		/*
+		 * Sockets in TCP_CLOSE are non connected.
+		 * Typical use case is UDP sockets, they can send packets
+		 * with sendto() to many different destinations.
+		 * We probably could use a generic bit advertising
+		 * non connected sockets, instead of sk_state == TCP_CLOSE,
+		 * if we care enough.
+		 */
+		sk = (struct sock *)((hash << 1) | 1UL);
 	}
 
 	root = &q->fq_root[hash_ptr(sk, q->fq_trees_log)];
@@ -276,7 +297,7 @@
 			 * It not, we need to refill credit with
 			 * initial quantum
 			 */
-			if (unlikely(skb->sk &&
+			if (unlikely(skb->sk == sk &&
 				     f->socket_hash != sk->sk_hash)) {
 				f->credit = q->initial_quantum;
 				f->socket_hash = sk->sk_hash;
@@ -297,9 +318,11 @@
 		q->stat_allocation_errors++;
 		return &q->internal;
 	}
+	/* f->t_root is already zeroed after kmem_cache_zalloc() */
+
 	fq_flow_set_detached(f);
 	f->sk = sk;
-	if (skb->sk)
+	if (skb->sk == sk)
 		f->socket_hash = sk->sk_hash;
 	f->credit = q->initial_quantum;
 
@@ -311,15 +334,41 @@
 	return f;
 }
 
+static struct sk_buff *fq_peek(struct fq_flow *flow)
+{
+	struct sk_buff *skb = skb_rb_first(&flow->t_root);
+	struct sk_buff *head = flow->head;
+
+	if (!skb)
+		return head;
+
+	if (!head)
+		return skb;
+
+	if (fq_skb_cb(skb)->time_to_send < fq_skb_cb(head)->time_to_send)
+		return skb;
+	return head;
+}
+
+static void fq_erase_head(struct Qdisc *sch, struct fq_flow *flow,
+			  struct sk_buff *skb)
+{
+	if (skb == flow->head) {
+		flow->head = skb->next;
+	} else {
+		rb_erase(&skb->rbnode, &flow->t_root);
+		skb->dev = qdisc_dev(sch);
+	}
+}
 
 /* remove one skb from head of flow queue */
 static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow)
 {
-	struct sk_buff *skb = flow->head;
+	struct sk_buff *skb = fq_peek(flow);
 
 	if (skb) {
-		flow->head = skb->next;
-		skb->next = NULL;
+		fq_erase_head(sch, flow, skb);
+		skb_mark_not_on_list(skb);
 		flow->qlen--;
 		qdisc_qstats_backlog_dec(sch, skb);
 		sch->q.qlen--;
@@ -327,62 +376,38 @@
 	return skb;
 }
 
-/* We might add in the future detection of retransmits
- * For the time being, just return false
- */
-static bool skb_is_retransmit(struct sk_buff *skb)
-{
-	return false;
-}
-
-/* add skb to flow queue
- * flow queue is a linked list, kind of FIFO, except for TCP retransmits
- * We special case tcp retransmits to be transmitted before other packets.
- * We rely on fact that TCP retransmits are unlikely, so we do not waste
- * a separate queue or a pointer.
- * head->  [retrans pkt 1]
- *         [retrans pkt 2]
- *         [ normal pkt 1]
- *         [ normal pkt 2]
- *         [ normal pkt 3]
- * tail->  [ normal pkt 4]
- */
 static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
 {
-	struct sk_buff *prev, *head = flow->head;
+	struct rb_node **p, *parent;
+	struct sk_buff *head, *aux;
 
-	skb->next = NULL;
-	if (!head) {
-		flow->head = skb;
-		flow->tail = skb;
-		return;
-	}
-	if (likely(!skb_is_retransmit(skb))) {
-		flow->tail->next = skb;
-		flow->tail = skb;
-		return;
-	}
+	fq_skb_cb(skb)->time_to_send = skb->tstamp ?: ktime_get_ns();
 
-	/* This skb is a tcp retransmit,
-	 * find the last retrans packet in the queue
-	 */
-	prev = NULL;
-	while (skb_is_retransmit(head)) {
-		prev = head;
-		head = head->next;
+	head = flow->head;
+	if (!head ||
+	    fq_skb_cb(skb)->time_to_send >= fq_skb_cb(flow->tail)->time_to_send) {
 		if (!head)
-			break;
-	}
-	if (!prev) { /* no rtx packet in queue, become the new head */
-		skb->next = flow->head;
-		flow->head = skb;
-	} else {
-		if (prev == flow->tail)
-			flow->tail = skb;
+			flow->head = skb;
 		else
-			skb->next = prev->next;
-		prev->next = skb;
+			flow->tail->next = skb;
+		flow->tail = skb;
+		skb->next = NULL;
+		return;
 	}
+
+	p = &flow->t_root.rb_node;
+	parent = NULL;
+
+	while (*p) {
+		parent = *p;
+		aux = rb_to_skb(parent);
+		if (fq_skb_cb(skb)->time_to_send >= fq_skb_cb(aux)->time_to_send)
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+	rb_link_node(&skb->rbnode, parent, p);
+	rb_insert_color(&skb->rbnode, &flow->t_root);
 }
 
 static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
@@ -401,8 +426,6 @@
 	}
 
 	f->qlen++;
-	if (skb_is_retransmit(skb))
-		q->stat_tcp_retrans++;
 	qdisc_qstats_backlog_inc(sch, skb);
 	if (fq_flow_is_detached(f)) {
 		struct sock *sk = skb->sk;
@@ -460,15 +483,21 @@
 static struct sk_buff *fq_dequeue(struct Qdisc *sch)
 {
 	struct fq_sched_data *q = qdisc_priv(sch);
-	u64 now = ktime_get_ns();
 	struct fq_flow_head *head;
 	struct sk_buff *skb;
 	struct fq_flow *f;
-	u32 rate, plen;
+	unsigned long rate;
+	u32 plen;
+	u64 now;
+
+	if (!sch->q.qlen)
+		return NULL;
 
 	skb = fq_dequeue_head(sch, &q->internal);
 	if (skb)
 		goto out;
+
+	now = ktime_get_ns();
 	fq_check_throttled(q, now);
 begin:
 	head = &q->new_flows;
@@ -490,12 +519,22 @@
 		goto begin;
 	}
 
-	skb = f->head;
-	if (unlikely(skb && now < f->time_next_packet &&
-		     !skb_is_tcp_pure_ack(skb))) {
-		head->first = f->next;
-		fq_flow_set_throttled(q, f);
-		goto begin;
+	skb = fq_peek(f);
+	if (skb) {
+		u64 time_next_packet = max_t(u64, fq_skb_cb(skb)->time_to_send,
+					     f->time_next_packet);
+
+		if (now < time_next_packet) {
+			head->first = f->next;
+			f->time_next_packet = time_next_packet;
+			fq_flow_set_throttled(q, f);
+			goto begin;
+		}
+		if (time_next_packet &&
+		    (s64)(now - time_next_packet - q->ce_threshold) > 0) {
+			INET_ECN_set_ce(skb);
+			q->stat_ce_mark++;
+		}
 	}
 
 	skb = fq_dequeue_head(sch, f);
@@ -511,32 +550,35 @@
 		goto begin;
 	}
 	prefetch(&skb->end);
-	f->credit -= qdisc_pkt_len(skb);
+	plen = qdisc_pkt_len(skb);
+	f->credit -= plen;
 
 	if (!q->rate_enable)
 		goto out;
 
-	/* Do not pace locally generated ack packets */
-	if (skb_is_tcp_pure_ack(skb))
-		goto out;
-
 	rate = q->flow_max_rate;
-	if (skb->sk)
-		rate = min(skb->sk->sk_pacing_rate, rate);
 
-	if (rate <= q->low_rate_threshold) {
-		f->credit = 0;
-		plen = qdisc_pkt_len(skb);
-	} else {
-		plen = max(qdisc_pkt_len(skb), q->quantum);
-		if (f->credit > 0)
-			goto out;
+	/* If EDT time was provided for this skb, we need to
+	 * update f->time_next_packet only if this qdisc enforces
+	 * a flow max rate.
+	 */
+	if (!skb->tstamp) {
+		if (skb->sk)
+			rate = min(skb->sk->sk_pacing_rate, rate);
+
+		if (rate <= q->low_rate_threshold) {
+			f->credit = 0;
+		} else {
+			plen = max(plen, q->quantum);
+			if (f->credit > 0)
+				goto out;
+		}
 	}
-	if (rate != ~0U) {
+	if (rate != ~0UL) {
 		u64 len = (u64)plen * NSEC_PER_SEC;
 
 		if (likely(rate))
-			do_div(len, rate);
+			len = div64_ul(len, rate);
 		/* Since socket rate can change later,
 		 * clamp the delay to 1 second.
 		 * Really, providers of too big packets should be fixed !
@@ -560,6 +602,15 @@
 
 static void fq_flow_purge(struct fq_flow *flow)
 {
+	struct rb_node *p = rb_first(&flow->t_root);
+
+	while (p) {
+		struct sk_buff *skb = rb_to_skb(p);
+
+		p = rb_next(p);
+		rb_erase(&skb->rbnode, &flow->t_root);
+		rtnl_kfree_skbs(skb, skb);
+	}
 	rtnl_kfree_skbs(flow->head, flow->tail);
 	flow->head = NULL;
 	flow->qlen = 0;
@@ -696,6 +747,7 @@
 	[TCA_FQ_BUCKETS_LOG]		= { .type = NLA_U32 },
 	[TCA_FQ_FLOW_REFILL_DELAY]	= { .type = NLA_U32 },
 	[TCA_FQ_LOW_RATE_THRESHOLD]	= { .type = NLA_U32 },
+	[TCA_FQ_CE_THRESHOLD]		= { .type = NLA_U32 },
 };
 
 static int fq_change(struct Qdisc *sch, struct nlattr *opt,
@@ -710,7 +762,8 @@
 	if (!opt)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_FQ_MAX, opt, fq_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_FQ_MAX, opt, fq_policy,
+					  NULL);
 	if (err < 0)
 		return err;
 
@@ -748,9 +801,11 @@
 		pr_warn_ratelimited("sch_fq: defrate %u ignored.\n",
 				    nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]));
 
-	if (tb[TCA_FQ_FLOW_MAX_RATE])
-		q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
+	if (tb[TCA_FQ_FLOW_MAX_RATE]) {
+		u32 rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
 
+		q->flow_max_rate = (rate == ~0U) ? ~0UL : rate;
+	}
 	if (tb[TCA_FQ_LOW_RATE_THRESHOLD])
 		q->low_rate_threshold =
 			nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]);
@@ -773,6 +828,10 @@
 	if (tb[TCA_FQ_ORPHAN_MASK])
 		q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]);
 
+	if (tb[TCA_FQ_CE_THRESHOLD])
+		q->ce_threshold = (u64)NSEC_PER_USEC *
+				  nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]);
+
 	if (!err) {
 		sch_tree_unlock(sch);
 		err = fq_resize(sch, fq_log);
@@ -813,7 +872,7 @@
 	q->quantum		= 2 * psched_mtu(qdisc_dev(sch));
 	q->initial_quantum	= 10 * psched_mtu(qdisc_dev(sch));
 	q->flow_refill_delay	= msecs_to_jiffies(40);
-	q->flow_max_rate	= ~0U;
+	q->flow_max_rate	= ~0UL;
 	q->time_next_delayed_flow = ~0ULL;
 	q->rate_enable		= 1;
 	q->new_flows.first	= NULL;
@@ -823,7 +882,11 @@
 	q->fq_trees_log		= ilog2(1024);
 	q->orphan_mask		= 1024 - 1;
 	q->low_rate_threshold	= 550000 / 8;
-	qdisc_watchdog_init(&q->watchdog, sch);
+
+	/* Default ce_threshold of 4294 seconds */
+	q->ce_threshold		= (u64)NSEC_PER_USEC * ~0U;
+
+	qdisc_watchdog_init_clockid(&q->watchdog, sch, CLOCK_MONOTONIC);
 
 	if (opt)
 		err = fq_change(sch, opt, extack);
@@ -836,25 +899,30 @@
 static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct fq_sched_data *q = qdisc_priv(sch);
+	u64 ce_threshold = q->ce_threshold;
 	struct nlattr *opts;
 
-	opts = nla_nest_start(skb, TCA_OPTIONS);
+	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (opts == NULL)
 		goto nla_put_failure;
 
 	/* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */
 
+	do_div(ce_threshold, NSEC_PER_USEC);
+
 	if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
 	    nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
 	    nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
 	    nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||
 	    nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||
-	    nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) ||
+	    nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE,
+			min_t(unsigned long, q->flow_max_rate, ~0U)) ||
 	    nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
 			jiffies_to_usecs(q->flow_refill_delay)) ||
 	    nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
 	    nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD,
 			q->low_rate_threshold) ||
+	    nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) ||
 	    nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
 		goto nla_put_failure;
 
@@ -873,7 +941,7 @@
 
 	st.gc_flows		  = q->stat_gc_flows;
 	st.highprio_packets	  = q->stat_internal_packets;
-	st.tcp_retrans		  = q->stat_tcp_retrans;
+	st.tcp_retrans		  = 0;
 	st.throttled		  = q->stat_throttled;
 	st.flows_plimit		  = q->stat_flows_plimit;
 	st.pkts_too_long	  = q->stat_pkts_too_long;
@@ -884,6 +952,7 @@
 	st.throttled_flows	  = q->throttled_flows;
 	st.unthrottle_latency_ns  = min_t(unsigned long,
 					  q->unthrottle_latency_ns, ~0U);
+	st.ce_mark		  = q->stat_ce_mark;
 	sch_tree_unlock(sch);
 
 	return gnet_stats_copy_app(d, &st, sizeof(st));

diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 6c0a9d5..c261c0a 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Fair Queue CoDel discipline
  *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
  *  Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
  */
 
@@ -49,7 +45,6 @@
 	struct sk_buff	  *tail;
 	struct list_head  flowchain;
 	int		  deficit;
-	u32		  dropped; /* number of drops (or ECN marks) on this flow */
 	struct codel_vars cvars;
 }; /* please try to keep this structure <= 64 bytes */
 
@@ -124,7 +119,7 @@
 	struct sk_buff *skb = flow->head;
 
 	flow->head = skb->next;
-	skb->next = NULL;
+	skb_mark_not_on_list(skb);
 	return skb;
 }
 
@@ -177,7 +172,8 @@
 		__qdisc_drop(skb, to_free);
 	} while (++i < max_packets && len < threshold);
 
-	flow->dropped += i;
+	/* Tell codel to increase its signal strength also */
+	flow->cvars.count += i;
 	q->backlogs[idx] -= len;
 	q->memory_usage -= mem;
 	sch->qstats.drops += i;
@@ -215,7 +211,6 @@
 		list_add_tail(&flow->flowchain, &q->new_flows);
 		q->new_flow_count++;
 		flow->deficit = q->quantum;
-		flow->dropped = 0;
 	}
 	get_codel_cb(skb)->mem_usage = skb->truesize;
 	q->memory_usage += get_codel_cb(skb)->mem_usage;
@@ -290,7 +285,6 @@
 	struct sk_buff *skb;
 	struct fq_codel_flow *flow;
 	struct list_head *head;
-	u32 prev_drop_count, prev_ecn_mark;
 
 begin:
 	head = &q->new_flows;
@@ -307,16 +301,10 @@
 		goto begin;
 	}
 
-	prev_drop_count = q->cstats.drop_count;
-	prev_ecn_mark = q->cstats.ecn_mark;
-
 	skb = codel_dequeue(sch, &sch->qstats.backlog, &q->cparams,
 			    &flow->cvars, &q->cstats, qdisc_pkt_len,
 			    codel_get_enqueue_time, drop_func, dequeue_func);
 
-	flow->dropped += q->cstats.drop_count - prev_drop_count;
-	flow->dropped += q->cstats.ecn_mark - prev_ecn_mark;
-
 	if (!skb) {
 		/* force a pass through old_flows to prevent starvation */
 		if ((head == &q->new_flows) && !list_empty(&q->old_flows))
@@ -387,8 +375,8 @@
 	if (!opt)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_FQ_CODEL_MAX, opt, fq_codel_policy,
-			       NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_FQ_CODEL_MAX, opt,
+					  fq_codel_policy, NULL);
 	if (err < 0)
 		return err;
 	if (tb[TCA_FQ_CODEL_FLOWS]) {
@@ -527,7 +515,7 @@
 	struct fq_codel_sched_data *q = qdisc_priv(sch);
 	struct nlattr *opts;
 
-	opts = nla_nest_start(skb, TCA_OPTIONS);
+	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (opts == NULL)
 		goto nla_put_failure;
 
@@ -600,8 +588,6 @@
 static unsigned long fq_codel_bind(struct Qdisc *sch, unsigned long parent,
 			      u32 classid)
 {
-	/* we cannot bypass queue discipline anymore */
-	sch->flags &= ~TCQ_F_CAN_BYPASS;
 	return 0;
 }
 
@@ -664,7 +650,7 @@
 			sch_tree_unlock(sch);
 		}
 		qs.backlog = q->backlogs[idx];
-		qs.drops = flow->dropped;
+		qs.drops = 0;
 	}
 	if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
 		return -1;

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 69078c8..8769b4b 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/sch_generic.c	Generic packet scheduler routines.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
  *              - Ingress support
@@ -32,6 +28,7 @@
 #include <net/pkt_sched.h>
 #include <net/dst.h>
 #include <trace/events/qdisc.h>
+#include <trace/events/net.h>
 #include <net/xfrm.h>
 
 /* Qdisc to use by default */
@@ -49,6 +46,8 @@
  * - updates to tree and tree walking are only done under the rtnl mutex.
  */
 
+#define SKB_XOFF_MAGIC ((struct sk_buff *)1UL)
+
 static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
 {
 	const struct netdev_queue *txq = q->dev_queue;
@@ -74,7 +73,7 @@
 				q->q.qlen--;
 			}
 		} else {
-			skb = NULL;
+			skb = SKB_XOFF_MAGIC;
 		}
 	}
 
@@ -118,52 +117,36 @@
 		spin_unlock(lock);
 }
 
-static inline int __dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
+static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
-	while (skb) {
-		struct sk_buff *next = skb->next;
+	spinlock_t *lock = NULL;
 
-		__skb_queue_tail(&q->gso_skb, skb);
-		q->qstats.requeues++;
-		qdisc_qstats_backlog_inc(q, skb);
-		q->q.qlen++;	/* it's still part of the queue */
-
-		skb = next;
+	if (q->flags & TCQ_F_NOLOCK) {
+		lock = qdisc_lock(q);
+		spin_lock(lock);
 	}
-	__netif_schedule(q);
 
-	return 0;
-}
-
-static inline int dev_requeue_skb_locked(struct sk_buff *skb, struct Qdisc *q)
-{
-	spinlock_t *lock = qdisc_lock(q);
-
-	spin_lock(lock);
 	while (skb) {
 		struct sk_buff *next = skb->next;
 
 		__skb_queue_tail(&q->gso_skb, skb);
 
-		qdisc_qstats_cpu_requeues_inc(q);
-		qdisc_qstats_cpu_backlog_inc(q, skb);
-		qdisc_qstats_cpu_qlen_inc(q);
+		/* it's still part of the queue */
+		if (qdisc_is_percpu_stats(q)) {
+			qdisc_qstats_cpu_requeues_inc(q);
+			qdisc_qstats_cpu_backlog_inc(q, skb);
+			qdisc_qstats_cpu_qlen_inc(q);
+		} else {
+			q->qstats.requeues++;
+			qdisc_qstats_backlog_inc(q, skb);
+			q->q.qlen++;
+		}
 
 		skb = next;
 	}
-	spin_unlock(lock);
-
+	if (lock)
+		spin_unlock(lock);
 	__netif_schedule(q);
-
-	return 0;
-}
-
-static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
-{
-	if (q->flags & TCQ_F_NOLOCK)
-		return dev_requeue_skb_locked(skb, q);
-	else
-		return __dev_requeue_skb(skb, q);
 }
 
 static void try_bulk_dequeue_skb(struct Qdisc *q,
@@ -184,7 +167,7 @@
 		skb = nskb;
 		(*packets)++; /* GSO counts as one pkt */
 	}
-	skb->next = NULL;
+	skb_mark_not_on_list(skb);
 }
 
 /* This variant of try_bulk_dequeue_skb() makes sure
@@ -210,7 +193,7 @@
 		skb = nskb;
 	} while (++cnt < 8);
 	(*packets) += cnt;
-	skb->next = NULL;
+	skb_mark_not_on_list(skb);
 }
 
 /* Note that dequeue_skb can possibly return a SKB list (via skb->next).
@@ -272,8 +255,11 @@
 		return skb;
 
 	skb = qdisc_dequeue_skb_bad_txq(q);
-	if (unlikely(skb))
+	if (unlikely(skb)) {
+		if (skb == SKB_XOFF_MAGIC)
+			return NULL;
 		goto bulk;
+	}
 	skb = q->dequeue(q);
 	if (skb) {
 bulk:
@@ -457,6 +443,7 @@
 			}
 
 			if (some_queue_timedout) {
+				trace_net_dev_xmit_timeout(dev, i);
 				WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
 				       dev->name, netdev_drivername(dev), i);
 				dev->netdev_ops->ndo_tx_timeout(dev);
@@ -500,7 +487,7 @@
  *	netif_carrier_on - set carrier
  *	@dev: network device
  *
- * Device has detected that carrier.
+ * Device has detected acquisition of carrier.
  */
 void netif_carrier_on(struct net_device *dev)
 {
@@ -559,7 +546,7 @@
 };
 
 static struct netdev_queue noop_netdev_queue = {
-	.qdisc		=	&noop_qdisc,
+	RCU_POINTER_INITIALIZER(qdisc, &noop_qdisc),
 	.qdisc_sleeping	=	&noop_qdisc,
 };
 
@@ -572,6 +559,18 @@
 	.dev_queue	=	&noop_netdev_queue,
 	.running	=	SEQCNT_ZERO(noop_qdisc.running),
 	.busylock	=	__SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
+	.gso_skb = {
+		.next = (struct sk_buff *)&noop_qdisc.gso_skb,
+		.prev = (struct sk_buff *)&noop_qdisc.gso_skb,
+		.qlen = 0,
+		.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.gso_skb.lock),
+	},
+	.skb_bad_txq = {
+		.next = (struct sk_buff *)&noop_qdisc.skb_bad_txq,
+		.prev = (struct sk_buff *)&noop_qdisc.skb_bad_txq,
+		.qlen = 0,
+		.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.skb_bad_txq.lock),
+	},
 };
 EXPORT_SYMBOL(noop_qdisc);
 
@@ -630,14 +629,14 @@
 
 	err = skb_array_produce(q, skb);
 
-	if (unlikely(err))
-		return qdisc_drop_cpu(skb, qdisc, to_free);
+	if (unlikely(err)) {
+		if (qdisc_is_percpu_stats(qdisc))
+			return qdisc_drop_cpu(skb, qdisc, to_free);
+		else
+			return qdisc_drop(skb, qdisc, to_free);
+	}
 
-	qdisc_qstats_cpu_qlen_inc(qdisc);
-	/* Note: skb can not be used after skb_array_produce(),
-	 * so we better not use qdisc_qstats_cpu_backlog_inc()
-	 */
-	this_cpu_add(qdisc->cpu_qstats->backlog, pkt_len);
+	qdisc_update_stats_at_enqueue(qdisc, pkt_len);
 	return NET_XMIT_SUCCESS;
 }
 
@@ -656,9 +655,9 @@
 		skb = __skb_array_consume(q);
 	}
 	if (likely(skb)) {
-		qdisc_qstats_cpu_backlog_dec(qdisc, skb);
-		qdisc_bstats_cpu_update(qdisc, skb);
-		qdisc_qstats_cpu_qlen_dec(qdisc);
+		qdisc_update_stats_at_dequeue(qdisc, skb);
+	} else {
+		qdisc->empty = true;
 	}
 
 	return skb;
@@ -698,11 +697,14 @@
 			kfree_skb(skb);
 	}
 
-	for_each_possible_cpu(i) {
-		struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i);
+	if (qdisc_is_percpu_stats(qdisc)) {
+		for_each_possible_cpu(i) {
+			struct gnet_stats_queue *q;
 
-		q->backlog = 0;
-		q->qlen = 0;
+			q = per_cpu_ptr(qdisc->cpu_qstats, i);
+			q->backlog = 0;
+			q->qlen = 0;
+		}
 	}
 }
 
@@ -797,9 +799,6 @@
 };
 EXPORT_SYMBOL(pfifo_fast_ops);
 
-static struct lock_class_key qdisc_tx_busylock;
-static struct lock_class_key qdisc_running_key;
-
 struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 			  const struct Qdisc_ops *ops,
 			  struct netlink_ext_ack *extack)
@@ -852,26 +851,25 @@
 	}
 
 	spin_lock_init(&sch->busylock);
-	lockdep_set_class(&sch->busylock,
-			  dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
-
 	/* seqlock has the same scope of busylock, for NOLOCK qdisc */
 	spin_lock_init(&sch->seqlock);
-	lockdep_set_class(&sch->busylock,
-			  dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
-
 	seqcount_init(&sch->running);
-	lockdep_set_class(&sch->running,
-			  dev->qdisc_running_key ?: &qdisc_running_key);
 
 	sch->ops = ops;
 	sch->flags = ops->static_flags;
 	sch->enqueue = ops->enqueue;
 	sch->dequeue = ops->dequeue;
 	sch->dev_queue = dev_queue;
+	sch->empty = true;
 	dev_hold(dev);
 	refcount_set(&sch->refcnt, 1);
 
+	if (sch != &noop_qdisc) {
+		lockdep_set_class(&sch->busylock, &dev->qdisc_tx_busylock_key);
+		lockdep_set_class(&sch->seqlock, &dev->qdisc_tx_busylock_key);
+		lockdep_set_class(&sch->running, &dev->qdisc_running_key);
+	}
+
 	return sch;
 errout1:
 	kfree(p);
@@ -901,7 +899,7 @@
 	if (!ops->init || ops->init(sch, NULL, extack) == 0)
 		return sch;
 
-	qdisc_destroy(sch);
+	qdisc_put(sch);
 	return NULL;
 }
 EXPORT_SYMBOL(qdisc_create_dflt);
@@ -941,15 +939,18 @@
 	kfree((char *) qdisc - qdisc->padded);
 }
 
-void qdisc_destroy(struct Qdisc *qdisc)
+static void qdisc_free_cb(struct rcu_head *head)
+{
+	struct Qdisc *q = container_of(head, struct Qdisc, rcu);
+
+	qdisc_free(q);
+}
+
+static void qdisc_destroy(struct Qdisc *qdisc)
 {
 	const struct Qdisc_ops  *ops = qdisc->ops;
 	struct sk_buff *skb, *tmp;
 
-	if (qdisc->flags & TCQ_F_BUILTIN ||
-	    !refcount_dec_and_test(&qdisc->refcnt))
-		return;
-
 #ifdef CONFIG_NET_SCHED
 	qdisc_hash_del(qdisc);
 
@@ -974,9 +975,37 @@
 		kfree_skb_list(skb);
 	}
 
-	qdisc_free(qdisc);
+	call_rcu(&qdisc->rcu, qdisc_free_cb);
 }
-EXPORT_SYMBOL(qdisc_destroy);
+
+void qdisc_put(struct Qdisc *qdisc)
+{
+	if (!qdisc)
+		return;
+
+	if (qdisc->flags & TCQ_F_BUILTIN ||
+	    !refcount_dec_and_test(&qdisc->refcnt))
+		return;
+
+	qdisc_destroy(qdisc);
+}
+EXPORT_SYMBOL(qdisc_put);
+
+/* Version of qdisc_put() that is called with rtnl mutex unlocked.
+ * Intended to be used as optimization, this function only takes rtnl lock if
+ * qdisc reference counter reached zero.
+ */
+
+void qdisc_put_unlocked(struct Qdisc *qdisc)
+{
+	if (qdisc->flags & TCQ_F_BUILTIN ||
+	    !refcount_dec_and_rtnl_lock(&qdisc->refcnt))
+		return;
+
+	qdisc_destroy(qdisc);
+	rtnl_unlock();
+}
+EXPORT_SYMBOL(qdisc_put_unlocked);
 
 /* Attach toplevel qdisc to device queue. */
 struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
@@ -1009,6 +1038,8 @@
 
 	if (dev->priv_flags & IFF_NO_QUEUE)
 		ops = &noqueue_qdisc_ops;
+	else if(dev->type == ARPHRD_CAN)
+		ops = &pfifo_fast_ops;
 
 	qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL);
 	if (!qdisc) {
@@ -1245,8 +1276,6 @@
 
 	rcu_assign_pointer(dev_queue->qdisc, qdisc);
 	dev_queue->qdisc_sleeping = qdisc;
-	__skb_queue_head_init(&qdisc->gso_skb);
-	__skb_queue_head_init(&qdisc->skb_bad_txq);
 }
 
 void dev_init_scheduler(struct net_device *dev)
@@ -1270,7 +1299,7 @@
 		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
 		dev_queue->qdisc_sleeping = qdisc_default;
 
-		qdisc_destroy(qdisc);
+		qdisc_put(qdisc);
 	}
 }
 
@@ -1279,7 +1308,7 @@
 	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
 	if (dev_ingress_queue(dev))
 		shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
-	qdisc_destroy(dev->qdisc);
+	qdisc_put(dev->qdisc);
 	dev->qdisc = &noop_qdisc;
 
 	WARN_ON(timer_pending(&dev->watchdog_timer));
@@ -1328,13 +1357,17 @@
 void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
 			  struct tcf_proto *tp_head)
 {
-	struct mini_Qdisc *miniq_old = rtnl_dereference(*miniqp->p_miniq);
+	/* Protected with chain0->filter_chain_lock.
+	 * Can't access chain directly because tp_head can be NULL.
+	 */
+	struct mini_Qdisc *miniq_old =
+		rcu_dereference_protected(*miniqp->p_miniq, 1);
 	struct mini_Qdisc *miniq;
 
 	if (!tp_head) {
 		RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
 		/* Wait for flying RCU callback before it is freed. */
-		rcu_barrier_bh();
+		rcu_barrier();
 		return;
 	}
 
@@ -1342,10 +1375,10 @@
 		&miniqp->miniq1 : &miniqp->miniq2;
 
 	/* We need to make sure that readers won't see the miniq
-	 * we are about to modify. So wait until previous call_rcu_bh callback
+	 * we are about to modify. So wait until previous call_rcu callback
 	 * is done.
 	 */
-	rcu_barrier_bh();
+	rcu_barrier();
 	miniq->filter_list = tp_head;
 	rcu_assign_pointer(*miniqp->p_miniq, miniq);
 
@@ -1354,7 +1387,7 @@
 		 * block potential new user of miniq_old until all readers
 		 * are not seeing it.
 		 */
-		call_rcu_bh(&miniq_old->rcu, mini_qdisc_rcu_func);
+		call_rcu(&miniq_old->rcu, mini_qdisc_rcu_func);
 }
 EXPORT_SYMBOL(mini_qdisc_pair_swap);
 

diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 4a042ab..8599c6f 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c

@@ -1,12 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/sch_gred.c	Generic Random Early Detection queue.
  *
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * Authors:    J Hadi Salim (hadi@cyberus.ca) 1998-2002
  *
  *             991129: -  Bug fix with grio mode
@@ -23,19 +18,23 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
+#include <net/pkt_cls.h>
 #include <net/pkt_sched.h>
 #include <net/red.h>
 
 #define GRED_DEF_PRIO (MAX_DPs / 2)
 #define GRED_VQ_MASK (MAX_DPs - 1)
 
+#define GRED_VQ_RED_FLAGS	(TC_RED_ECN | TC_RED_HARDDROP)
+
 struct gred_sched_data;
 struct gred_sched;
 
 struct gred_sched_data {
 	u32		limit;		/* HARD maximal queue length	*/
 	u32		DP;		/* the drop parameters */
-	u32		bytesin;	/* bytes seen on virtualQ so far*/
+	u32		red_flags;	/* virtualQ version of red_flags */
+	u64		bytesin;	/* bytes seen on virtualQ so far*/
 	u32		packetsin;	/* packets seen on virtualQ so far*/
 	u32		backlog;	/* bytes on the virtualQ */
 	u8		prio;		/* the prio of this vq */
@@ -139,14 +138,27 @@
 	table->wred_set.qidlestart = q->vars.qidlestart;
 }
 
-static inline int gred_use_ecn(struct gred_sched *t)
+static int gred_use_ecn(struct gred_sched_data *q)
 {
-	return t->red_flags & TC_RED_ECN;
+	return q->red_flags & TC_RED_ECN;
 }
 
-static inline int gred_use_harddrop(struct gred_sched *t)
+static int gred_use_harddrop(struct gred_sched_data *q)
 {
-	return t->red_flags & TC_RED_HARDDROP;
+	return q->red_flags & TC_RED_HARDDROP;
+}
+
+static bool gred_per_vq_red_flags_used(struct gred_sched *table)
+{
+	unsigned int i;
+
+	/* Local per-vq flags couldn't have been set unless global are 0 */
+	if (table->red_flags)
+		return false;
+	for (i = 0; i < MAX_DPs; i++)
+		if (table->tab[i] && table->tab[i]->red_flags)
+			return true;
+	return false;
 }
 
 static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch,
@@ -212,7 +224,7 @@
 
 	case RED_PROB_MARK:
 		qdisc_qstats_overlimit(sch);
-		if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) {
+		if (!gred_use_ecn(q) || !INET_ECN_set_ce(skb)) {
 			q->stats.prob_drop++;
 			goto congestion_drop;
 		}
@@ -222,7 +234,7 @@
 
 	case RED_HARD_MARK:
 		qdisc_qstats_overlimit(sch);
-		if (gred_use_harddrop(t) || !gred_use_ecn(t) ||
+		if (gred_use_harddrop(q) || !gred_use_ecn(q) ||
 		    !INET_ECN_set_ce(skb)) {
 			q->stats.forced_drop++;
 			goto congestion_drop;
@@ -295,15 +307,103 @@
 	}
 }
 
+static void gred_offload(struct Qdisc *sch, enum tc_gred_command command)
+{
+	struct gred_sched *table = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct tc_gred_qopt_offload opt = {
+		.command	= command,
+		.handle		= sch->handle,
+		.parent		= sch->parent,
+	};
+
+	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+		return;
+
+	if (command == TC_GRED_REPLACE) {
+		unsigned int i;
+
+		opt.set.grio_on = gred_rio_mode(table);
+		opt.set.wred_on = gred_wred_mode(table);
+		opt.set.dp_cnt = table->DPs;
+		opt.set.dp_def = table->def;
+
+		for (i = 0; i < table->DPs; i++) {
+			struct gred_sched_data *q = table->tab[i];
+
+			if (!q)
+				continue;
+			opt.set.tab[i].present = true;
+			opt.set.tab[i].limit = q->limit;
+			opt.set.tab[i].prio = q->prio;
+			opt.set.tab[i].min = q->parms.qth_min >> q->parms.Wlog;
+			opt.set.tab[i].max = q->parms.qth_max >> q->parms.Wlog;
+			opt.set.tab[i].is_ecn = gred_use_ecn(q);
+			opt.set.tab[i].is_harddrop = gred_use_harddrop(q);
+			opt.set.tab[i].probability = q->parms.max_P;
+			opt.set.tab[i].backlog = &q->backlog;
+		}
+		opt.set.qstats = &sch->qstats;
+	}
+
+	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, &opt);
+}
+
+static int gred_offload_dump_stats(struct Qdisc *sch)
+{
+	struct gred_sched *table = qdisc_priv(sch);
+	struct tc_gred_qopt_offload *hw_stats;
+	unsigned int i;
+	int ret;
+
+	hw_stats = kzalloc(sizeof(*hw_stats), GFP_KERNEL);
+	if (!hw_stats)
+		return -ENOMEM;
+
+	hw_stats->command = TC_GRED_STATS;
+	hw_stats->handle = sch->handle;
+	hw_stats->parent = sch->parent;
+
+	for (i = 0; i < MAX_DPs; i++)
+		if (table->tab[i])
+			hw_stats->stats.xstats[i] = &table->tab[i]->stats;
+
+	ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats);
+	/* Even if driver returns failure adjust the stats - in case offload
+	 * ended but driver still wants to adjust the values.
+	 */
+	for (i = 0; i < MAX_DPs; i++) {
+		if (!table->tab[i])
+			continue;
+		table->tab[i]->packetsin += hw_stats->stats.bstats[i].packets;
+		table->tab[i]->bytesin += hw_stats->stats.bstats[i].bytes;
+		table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog;
+
+		_bstats_update(&sch->bstats,
+			       hw_stats->stats.bstats[i].bytes,
+			       hw_stats->stats.bstats[i].packets);
+		sch->qstats.qlen += hw_stats->stats.qstats[i].qlen;
+		sch->qstats.backlog += hw_stats->stats.qstats[i].backlog;
+		sch->qstats.drops += hw_stats->stats.qstats[i].drops;
+		sch->qstats.requeues += hw_stats->stats.qstats[i].requeues;
+		sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits;
+	}
+
+	kfree(hw_stats);
+	return ret;
+}
+
 static inline void gred_destroy_vq(struct gred_sched_data *q)
 {
 	kfree(q);
 }
 
-static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
+static int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps,
+				 struct netlink_ext_ack *extack)
 {
 	struct gred_sched *table = qdisc_priv(sch);
 	struct tc_gred_sopt *sopt;
+	bool red_flags_changed;
 	int i;
 
 	if (!dps)
@@ -311,13 +411,28 @@
 
 	sopt = nla_data(dps);
 
-	if (sopt->DPs > MAX_DPs || sopt->DPs == 0 ||
-	    sopt->def_DP >= sopt->DPs)
+	if (sopt->DPs > MAX_DPs) {
+		NL_SET_ERR_MSG_MOD(extack, "number of virtual queues too high");
 		return -EINVAL;
+	}
+	if (sopt->DPs == 0) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "number of virtual queues can't be 0");
+		return -EINVAL;
+	}
+	if (sopt->def_DP >= sopt->DPs) {
+		NL_SET_ERR_MSG_MOD(extack, "default virtual queue above virtual queue count");
+		return -EINVAL;
+	}
+	if (sopt->flags && gred_per_vq_red_flags_used(table)) {
+		NL_SET_ERR_MSG_MOD(extack, "can't set per-Qdisc RED flags when per-virtual queue flags are used");
+		return -EINVAL;
+	}
 
 	sch_tree_lock(sch);
 	table->DPs = sopt->DPs;
 	table->def = sopt->def_DP;
+	red_flags_changed = table->red_flags != sopt->flags;
 	table->red_flags = sopt->flags;
 
 	/*
@@ -337,6 +452,12 @@
 		gred_disable_wred_mode(table);
 	}
 
+	if (red_flags_changed)
+		for (i = 0; i < table->DPs; i++)
+			if (table->tab[i])
+				table->tab[i]->red_flags =
+					table->red_flags & GRED_VQ_RED_FLAGS;
+
 	for (i = table->DPs; i < MAX_DPs; i++) {
 		if (table->tab[i]) {
 			pr_warn("GRED: Warning: Destroying shadowed VQ 0x%x\n",
@@ -346,25 +467,30 @@
 		}
 	}
 
+	gred_offload(sch, TC_GRED_REPLACE);
 	return 0;
 }
 
 static inline int gred_change_vq(struct Qdisc *sch, int dp,
 				 struct tc_gred_qopt *ctl, int prio,
 				 u8 *stab, u32 max_P,
-				 struct gred_sched_data **prealloc)
+				 struct gred_sched_data **prealloc,
+				 struct netlink_ext_ack *extack)
 {
 	struct gred_sched *table = qdisc_priv(sch);
 	struct gred_sched_data *q = table->tab[dp];
 
-	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) {
+		NL_SET_ERR_MSG_MOD(extack, "invalid RED parameters");
 		return -EINVAL;
+	}
 
 	if (!q) {
 		table->tab[dp] = q = *prealloc;
 		*prealloc = NULL;
 		if (!q)
 			return -ENOMEM;
+		q->red_flags = table->red_flags & GRED_VQ_RED_FLAGS;
 	}
 
 	q->DP = dp;
@@ -384,14 +510,128 @@
 	return 0;
 }
 
+static const struct nla_policy gred_vq_policy[TCA_GRED_VQ_MAX + 1] = {
+	[TCA_GRED_VQ_DP]	= { .type = NLA_U32 },
+	[TCA_GRED_VQ_FLAGS]	= { .type = NLA_U32 },
+};
+
+static const struct nla_policy gred_vqe_policy[TCA_GRED_VQ_ENTRY_MAX + 1] = {
+	[TCA_GRED_VQ_ENTRY]	= { .type = NLA_NESTED },
+};
+
 static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = {
 	[TCA_GRED_PARMS]	= { .len = sizeof(struct tc_gred_qopt) },
 	[TCA_GRED_STAB]		= { .len = 256 },
 	[TCA_GRED_DPS]		= { .len = sizeof(struct tc_gred_sopt) },
 	[TCA_GRED_MAX_P]	= { .type = NLA_U32 },
 	[TCA_GRED_LIMIT]	= { .type = NLA_U32 },
+	[TCA_GRED_VQ_LIST]	= { .type = NLA_NESTED },
 };
 
+static void gred_vq_apply(struct gred_sched *table, const struct nlattr *entry)
+{
+	struct nlattr *tb[TCA_GRED_VQ_MAX + 1];
+	u32 dp;
+
+	nla_parse_nested_deprecated(tb, TCA_GRED_VQ_MAX, entry,
+				    gred_vq_policy, NULL);
+
+	dp = nla_get_u32(tb[TCA_GRED_VQ_DP]);
+
+	if (tb[TCA_GRED_VQ_FLAGS])
+		table->tab[dp]->red_flags = nla_get_u32(tb[TCA_GRED_VQ_FLAGS]);
+}
+
+static void gred_vqs_apply(struct gred_sched *table, struct nlattr *vqs)
+{
+	const struct nlattr *attr;
+	int rem;
+
+	nla_for_each_nested(attr, vqs, rem) {
+		switch (nla_type(attr)) {
+		case TCA_GRED_VQ_ENTRY:
+			gred_vq_apply(table, attr);
+			break;
+		}
+	}
+}
+
+static int gred_vq_validate(struct gred_sched *table, u32 cdp,
+			    const struct nlattr *entry,
+			    struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[TCA_GRED_VQ_MAX + 1];
+	int err;
+	u32 dp;
+
+	err = nla_parse_nested_deprecated(tb, TCA_GRED_VQ_MAX, entry,
+					  gred_vq_policy, extack);
+	if (err < 0)
+		return err;
+
+	if (!tb[TCA_GRED_VQ_DP]) {
+		NL_SET_ERR_MSG_MOD(extack, "Virtual queue with no index specified");
+		return -EINVAL;
+	}
+	dp = nla_get_u32(tb[TCA_GRED_VQ_DP]);
+	if (dp >= table->DPs) {
+		NL_SET_ERR_MSG_MOD(extack, "Virtual queue with index out of bounds");
+		return -EINVAL;
+	}
+	if (dp != cdp && !table->tab[dp]) {
+		NL_SET_ERR_MSG_MOD(extack, "Virtual queue not yet instantiated");
+		return -EINVAL;
+	}
+
+	if (tb[TCA_GRED_VQ_FLAGS]) {
+		u32 red_flags = nla_get_u32(tb[TCA_GRED_VQ_FLAGS]);
+
+		if (table->red_flags && table->red_flags != red_flags) {
+			NL_SET_ERR_MSG_MOD(extack, "can't change per-virtual queue RED flags when per-Qdisc flags are used");
+			return -EINVAL;
+		}
+		if (red_flags & ~GRED_VQ_RED_FLAGS) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "invalid RED flags specified");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int gred_vqs_validate(struct gred_sched *table, u32 cdp,
+			     struct nlattr *vqs, struct netlink_ext_ack *extack)
+{
+	const struct nlattr *attr;
+	int rem, err;
+
+	err = nla_validate_nested_deprecated(vqs, TCA_GRED_VQ_ENTRY_MAX,
+					     gred_vqe_policy, extack);
+	if (err < 0)
+		return err;
+
+	nla_for_each_nested(attr, vqs, rem) {
+		switch (nla_type(attr)) {
+		case TCA_GRED_VQ_ENTRY:
+			err = gred_vq_validate(table, cdp, attr, extack);
+			if (err)
+				return err;
+			break;
+		default:
+			NL_SET_ERR_MSG_MOD(extack, "GRED_VQ_LIST can contain only entry attributes");
+			return -EINVAL;
+		}
+	}
+
+	if (rem > 0) {
+		NL_SET_ERR_MSG_MOD(extack, "Trailing data after parsing virtual queue list");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int gred_change(struct Qdisc *sch, struct nlattr *opt,
 		       struct netlink_ext_ack *extack)
 {
@@ -406,29 +646,40 @@
 	if (opt == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_GRED_MAX, opt, gred_policy,
+					  extack);
 	if (err < 0)
 		return err;
 
 	if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) {
 		if (tb[TCA_GRED_LIMIT] != NULL)
 			sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
-		return gred_change_table_def(sch, tb[TCA_GRED_DPS]);
+		return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack);
 	}
 
 	if (tb[TCA_GRED_PARMS] == NULL ||
 	    tb[TCA_GRED_STAB] == NULL ||
-	    tb[TCA_GRED_LIMIT] != NULL)
+	    tb[TCA_GRED_LIMIT] != NULL) {
+		NL_SET_ERR_MSG_MOD(extack, "can't configure Qdisc and virtual queue at the same time");
 		return -EINVAL;
+	}
 
 	max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0;
 
-	err = -EINVAL;
 	ctl = nla_data(tb[TCA_GRED_PARMS]);
 	stab = nla_data(tb[TCA_GRED_STAB]);
 
-	if (ctl->DP >= table->DPs)
-		goto errout;
+	if (ctl->DP >= table->DPs) {
+		NL_SET_ERR_MSG_MOD(extack, "virtual queue index above virtual queue count");
+		return -EINVAL;
+	}
+
+	if (tb[TCA_GRED_VQ_LIST]) {
+		err = gred_vqs_validate(table, ctl->DP, tb[TCA_GRED_VQ_LIST],
+					extack);
+		if (err)
+			return err;
+	}
 
 	if (gred_rio_mode(table)) {
 		if (ctl->prio == 0) {
@@ -448,9 +699,13 @@
 	prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL);
 	sch_tree_lock(sch);
 
-	err = gred_change_vq(sch, ctl->DP, ctl, prio, stab, max_P, &prealloc);
+	err = gred_change_vq(sch, ctl->DP, ctl, prio, stab, max_P, &prealloc,
+			     extack);
 	if (err < 0)
-		goto errout_locked;
+		goto err_unlock_free;
+
+	if (tb[TCA_GRED_VQ_LIST])
+		gred_vqs_apply(table, tb[TCA_GRED_VQ_LIST]);
 
 	if (gred_rio_mode(table)) {
 		gred_disable_wred_mode(table);
@@ -458,12 +713,15 @@
 			gred_enable_wred_mode(table);
 	}
 
-	err = 0;
-
-errout_locked:
 	sch_tree_unlock(sch);
 	kfree(prealloc);
-errout:
+
+	gred_offload(sch, TC_GRED_REPLACE);
+	return 0;
+
+err_unlock_free:
+	sch_tree_unlock(sch);
+	kfree(prealloc);
 	return err;
 }
 
@@ -476,12 +734,16 @@
 	if (!opt)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_GRED_MAX, opt, gred_policy,
+					  extack);
 	if (err < 0)
 		return err;
 
-	if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB])
+	if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB]) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "virtual queue configuration can't be specified at initialization time");
 		return -EINVAL;
+	}
 
 	if (tb[TCA_GRED_LIMIT])
 		sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
@@ -489,13 +751,13 @@
 		sch->limit = qdisc_dev(sch)->tx_queue_len
 		             * psched_mtu(qdisc_dev(sch));
 
-	return gred_change_table_def(sch, tb[TCA_GRED_DPS]);
+	return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack);
 }
 
 static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct gred_sched *table = qdisc_priv(sch);
-	struct nlattr *parms, *opts = NULL;
+	struct nlattr *parms, *vqs, *opts = NULL;
 	int i;
 	u32 max_p[MAX_DPs];
 	struct tc_gred_sopt sopt = {
@@ -505,7 +767,10 @@
 		.flags	= table->red_flags,
 	};
 
-	opts = nla_nest_start(skb, TCA_OPTIONS);
+	if (gred_offload_dump_stats(sch))
+		goto nla_put_failure;
+
+	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (opts == NULL)
 		goto nla_put_failure;
 	if (nla_put(skb, TCA_GRED_DPS, sizeof(sopt), &sopt))
@@ -522,7 +787,8 @@
 	if (nla_put_u32(skb, TCA_GRED_LIMIT, sch->limit))
 		goto nla_put_failure;
 
-	parms = nla_nest_start(skb, TCA_GRED_PARMS);
+	/* Old style all-in-one dump of VQs */
+	parms = nla_nest_start_noflag(skb, TCA_GRED_PARMS);
 	if (parms == NULL)
 		goto nla_put_failure;
 
@@ -572,6 +838,58 @@
 
 	nla_nest_end(skb, parms);
 
+	/* Dump the VQs again, in more structured way */
+	vqs = nla_nest_start_noflag(skb, TCA_GRED_VQ_LIST);
+	if (!vqs)
+		goto nla_put_failure;
+
+	for (i = 0; i < MAX_DPs; i++) {
+		struct gred_sched_data *q = table->tab[i];
+		struct nlattr *vq;
+
+		if (!q)
+			continue;
+
+		vq = nla_nest_start_noflag(skb, TCA_GRED_VQ_ENTRY);
+		if (!vq)
+			goto nla_put_failure;
+
+		if (nla_put_u32(skb, TCA_GRED_VQ_DP, q->DP))
+			goto nla_put_failure;
+
+		if (nla_put_u32(skb, TCA_GRED_VQ_FLAGS, q->red_flags))
+			goto nla_put_failure;
+
+		/* Stats */
+		if (nla_put_u64_64bit(skb, TCA_GRED_VQ_STAT_BYTES, q->bytesin,
+				      TCA_GRED_VQ_PAD))
+			goto nla_put_failure;
+		if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PACKETS, q->packetsin))
+			goto nla_put_failure;
+		if (nla_put_u32(skb, TCA_GRED_VQ_STAT_BACKLOG,
+				gred_backlog(table, q, sch)))
+			goto nla_put_failure;
+		if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PROB_DROP,
+				q->stats.prob_drop))
+			goto nla_put_failure;
+		if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PROB_MARK,
+				q->stats.prob_mark))
+			goto nla_put_failure;
+		if (nla_put_u32(skb, TCA_GRED_VQ_STAT_FORCED_DROP,
+				q->stats.forced_drop))
+			goto nla_put_failure;
+		if (nla_put_u32(skb, TCA_GRED_VQ_STAT_FORCED_MARK,
+				q->stats.forced_mark))
+			goto nla_put_failure;
+		if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PDROP, q->stats.pdrop))
+			goto nla_put_failure;
+		if (nla_put_u32(skb, TCA_GRED_VQ_STAT_OTHER, q->stats.other))
+			goto nla_put_failure;
+
+		nla_nest_end(skb, vq);
+	}
+	nla_nest_end(skb, vqs);
+
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
@@ -588,6 +906,7 @@
 		if (table->tab[i])
 			gred_destroy_vq(table->tab[i]);
 	}
+	gred_offload(sch, TC_GRED_DESTROY);
 }
 
 static struct Qdisc_ops gred_qdisc_ops __read_mostly = {

diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3278a76..433f219 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c

@@ -845,16 +845,6 @@
 }
 
 static void
-hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl)
-{
-	unsigned int len = cl->qdisc->q.qlen;
-	unsigned int backlog = cl->qdisc->qstats.backlog;
-
-	qdisc_reset(cl->qdisc);
-	qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
-}
-
-static void
 hfsc_adjust_levels(struct hfsc_class *cl)
 {
 	struct hfsc_class *p;
@@ -936,7 +926,8 @@
 	if (opt == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_HFSC_MAX, opt, hfsc_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_HFSC_MAX, opt, hfsc_policy,
+					  NULL);
 	if (err < 0)
 		return err;
 
@@ -1076,7 +1067,7 @@
 	qdisc_class_hash_insert(&q->clhash, &cl->cl_common);
 	list_add_tail(&cl->siblings, &parent->children);
 	if (parent->level == 0)
-		hfsc_purge_queue(sch, parent);
+		qdisc_purge_queue(parent->qdisc);
 	hfsc_adjust_levels(parent);
 	sch_tree_unlock(sch);
 
@@ -1092,7 +1083,7 @@
 	struct hfsc_sched *q = qdisc_priv(sch);
 
 	tcf_block_put(cl->block);
-	qdisc_destroy(cl->qdisc);
+	qdisc_put(cl->qdisc);
 	gen_kill_estimator(&cl->rate_est);
 	if (cl != &q->root)
 		kfree(cl);
@@ -1112,7 +1103,7 @@
 	list_del(&cl->siblings);
 	hfsc_adjust_levels(cl->cl_parent);
 
-	hfsc_purge_queue(sch, cl);
+	qdisc_purge_queue(cl->qdisc);
 	qdisc_class_hash_remove(&q->clhash, &cl->cl_common);
 
 	sch_tree_unlock(sch);
@@ -1310,7 +1301,7 @@
 	if (cl->level == 0)
 		tcm->tcm_info = cl->qdisc->handle;
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
 	if (hfsc_dump_curves(skb, cl) < 0)
@@ -1328,8 +1319,9 @@
 {
 	struct hfsc_class *cl = (struct hfsc_class *)arg;
 	struct tc_hfsc_stats xstats;
+	__u32 qlen;
 
-	cl->qstats.backlog = cl->qdisc->qstats.backlog;
+	qdisc_qstats_qlen_backlog(cl->qdisc, &qlen, &cl->qstats.backlog);
 	xstats.level   = cl->level;
 	xstats.period  = cl->cl_vtperiod;
 	xstats.work    = cl->cl_total;
@@ -1337,7 +1329,7 @@
 
 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
-	    gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0)
+	    gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
 		return -1;
 
 	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
@@ -1539,8 +1531,10 @@
 static int
 hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
 {
+	unsigned int len = qdisc_pkt_len(skb);
 	struct hfsc_class *cl;
 	int uninitialized_var(err);
+	bool first;
 
 	cl = hfsc_classify(skb, sch, &err);
 	if (cl == NULL) {
@@ -1550,6 +1544,7 @@
 		return err;
 	}
 
+	first = !cl->qdisc->q.qlen;
 	err = qdisc_enqueue(skb, cl->qdisc, to_free);
 	if (unlikely(err != NET_XMIT_SUCCESS)) {
 		if (net_xmit_drop_count(err)) {
@@ -1559,9 +1554,7 @@
 		return err;
 	}
 
-	if (cl->qdisc->q.qlen == 1) {
-		unsigned int len = qdisc_pkt_len(skb);
-
+	if (first) {
 		if (cl->cl_flags & HFSC_RSC)
 			init_ed(cl, len);
 		if (cl->cl_flags & HFSC_FSC)
@@ -1576,7 +1569,7 @@
 
 	}
 
-	qdisc_qstats_backlog_inc(sch, skb);
+	sch->qstats.backlog += len;
 	sch->q.qlen++;
 
 	return NET_XMIT_SUCCESS;

diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index c3a8388..be35f03 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c

@@ -1,14 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* net/sched/sch_hhf.c		Heavy-Hitter Filter (HHF)
  *
  * Copyright (C) 2013 Terry Lam <vtlam@google.com>
  * Copyright (C) 2013 Nandita Dukkipati <nanditad@google.com>
  */
 
-#include <linux/jhash.h>
 #include <linux/jiffies.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/vmalloc.h>
+#include <linux/siphash.h>
 #include <net/pkt_sched.h>
 #include <net/sock.h>
 
@@ -125,7 +126,7 @@
 
 struct hhf_sched_data {
 	struct wdrr_bucket buckets[WDRR_BUCKET_CNT];
-	u32		   perturbation;   /* hash perturbation */
+	siphash_key_t	   perturbation;   /* hash perturbation */
 	u32		   quantum;        /* psched_mtu(qdisc_dev(sch)); */
 	u32		   drop_overlimit; /* number of times max qdisc packet
 					    * limit was hit
@@ -263,7 +264,7 @@
 	}
 
 	/* Get hashed flow-id of the skb. */
-	hash = skb_get_hash_perturb(skb, q->perturbation);
+	hash = skb_get_hash_perturb(skb, &q->perturbation);
 
 	/* Check if this packet belongs to an already established HH flow. */
 	flow_pos = hash & HHF_BIT_MASK;
@@ -330,7 +331,7 @@
 	struct sk_buff *skb = bucket->head;
 
 	bucket->head = skb->next;
-	skb->next = NULL;
+	skb_mark_not_on_list(skb);
 	return skb;
 }
 
@@ -518,7 +519,8 @@
 	if (!opt)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_HHF_MAX, opt, hhf_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_HHF_MAX, opt, hhf_policy,
+					  NULL);
 	if (err < 0)
 		return err;
 
@@ -529,7 +531,7 @@
 		new_hhf_non_hh_weight = nla_get_u32(tb[TCA_HHF_NON_HH_WEIGHT]);
 
 	non_hh_quantum = (u64)new_quantum * new_hhf_non_hh_weight;
-	if (non_hh_quantum > INT_MAX)
+	if (non_hh_quantum == 0 || non_hh_quantum > INT_MAX)
 		return -EINVAL;
 
 	sch_tree_lock(sch);
@@ -580,7 +582,7 @@
 
 	sch->limit = 1000;
 	q->quantum = psched_mtu(qdisc_dev(sch));
-	q->perturbation = prandom_u32();
+	get_random_bytes(&q->perturbation, sizeof(q->perturbation));
 	INIT_LIST_HEAD(&q->new_buckets);
 	INIT_LIST_HEAD(&q->old_buckets);
 
@@ -654,7 +656,7 @@
 	struct hhf_sched_data *q = qdisc_priv(sch);
 	struct nlattr *opts;
 
-	opts = nla_nest_start(skb, TCA_OPTIONS);
+	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (opts == NULL)
 		goto nla_put_failure;
 

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 43c4bfe..8184c87 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/sch_htb.c	Hierarchical token bucket, feed tree version
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Martin Devera, <devik@cdi.cz>
  *
  * Credits (in time order) for older HTB versions:
@@ -132,7 +128,7 @@
 		struct htb_class_inner {
 			struct htb_prio clprio[TC_HTB_NUMPRIO];
 		} inner;
-	} un;
+	};
 	s64			pq_key;
 
 	int			prio_activity;	/* for which prios are we active */
@@ -165,7 +161,8 @@
 
 	/* non shaped skbs; let them go directly thru */
 	struct qdisc_skb_head	direct_queue;
-	long			direct_pkts;
+	u32			direct_pkts;
+	u32			overlimits;
 
 	struct qdisc_watchdog	watchdog;
 
@@ -411,13 +408,13 @@
 			int prio = ffz(~m);
 			m &= ~(1 << prio);
 
-			if (p->un.inner.clprio[prio].feed.rb_node)
+			if (p->inner.clprio[prio].feed.rb_node)
 				/* parent already has its feed in use so that
 				 * reset bit in mask as parent is already ok
 				 */
 				mask &= ~(1 << prio);
 
-			htb_add_to_id_tree(&p->un.inner.clprio[prio].feed, cl, prio);
+			htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
 		}
 		p->prio_activity |= mask;
 		cl = p;
@@ -447,19 +444,19 @@
 			int prio = ffz(~m);
 			m &= ~(1 << prio);
 
-			if (p->un.inner.clprio[prio].ptr == cl->node + prio) {
+			if (p->inner.clprio[prio].ptr == cl->node + prio) {
 				/* we are removing child which is pointed to from
 				 * parent feed - forget the pointer but remember
 				 * classid
 				 */
-				p->un.inner.clprio[prio].last_ptr_id = cl->common.classid;
-				p->un.inner.clprio[prio].ptr = NULL;
+				p->inner.clprio[prio].last_ptr_id = cl->common.classid;
+				p->inner.clprio[prio].ptr = NULL;
 			}
 
 			htb_safe_rb_erase(cl->node + prio,
-					  &p->un.inner.clprio[prio].feed);
+					  &p->inner.clprio[prio].feed);
 
-			if (!p->un.inner.clprio[prio].feed.rb_node)
+			if (!p->inner.clprio[prio].feed.rb_node)
 				mask |= 1 << prio;
 		}
 
@@ -533,8 +530,10 @@
 	if (new_mode == cl->cmode)
 		return;
 
-	if (new_mode == HTB_CANT_SEND)
+	if (new_mode == HTB_CANT_SEND) {
 		cl->overlimits++;
+		q->overlimits++;
+	}
 
 	if (cl->prio_activity) {	/* not necessary: speed optimization */
 		if (cl->cmode != HTB_CANT_SEND)
@@ -555,7 +554,7 @@
  */
 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
 {
-	WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
+	WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
 
 	if (!cl->prio_activity) {
 		cl->prio_activity = 1 << cl->prio;
@@ -577,33 +576,18 @@
 	cl->prio_activity = 0;
 }
 
-static void htb_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
-			     struct qdisc_skb_head *qh)
-{
-	struct sk_buff *last = qh->tail;
-
-	if (last) {
-		skb->next = NULL;
-		last->next = skb;
-		qh->tail = skb;
-	} else {
-		qh->tail = skb;
-		qh->head = skb;
-	}
-	qh->qlen++;
-}
-
 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		       struct sk_buff **to_free)
 {
 	int uninitialized_var(ret);
+	unsigned int len = qdisc_pkt_len(skb);
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = htb_classify(skb, sch, &ret);
 
 	if (cl == HTB_DIRECT) {
 		/* enqueue to helper queue */
 		if (q->direct_queue.qlen < q->direct_qlen) {
-			htb_enqueue_tail(skb, sch, &q->direct_queue);
+			__qdisc_enqueue_tail(skb, &q->direct_queue);
 			q->direct_pkts++;
 		} else {
 			return qdisc_drop(skb, sch, to_free);
@@ -615,7 +599,7 @@
 		__qdisc_drop(skb, to_free);
 		return ret;
 #endif
-	} else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q,
+	} else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
 					to_free)) != NET_XMIT_SUCCESS) {
 		if (net_xmit_drop_count(ret)) {
 			qdisc_qstats_drop(sch);
@@ -626,7 +610,7 @@
 		htb_activate(q, cl);
 	}
 
-	qdisc_qstats_backlog_inc(sch, skb);
+	sch->qstats.backlog += len;
 	sch->q.qlen++;
 	return NET_XMIT_SUCCESS;
 }
@@ -823,7 +807,7 @@
 			cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
 			if (!cl->level)
 				return cl;
-			clp = &cl->un.inner.clprio[prio];
+			clp = &cl->inner.clprio[prio];
 			(++sp)->root = clp->feed.rb_node;
 			sp->pptr = &clp->ptr;
 			sp->pid = &clp->last_ptr_id;
@@ -857,7 +841,7 @@
 		 * graft operation on the leaf since last dequeue;
 		 * simply deactivate and skip such class
 		 */
-		if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
+		if (unlikely(cl->leaf.q->q.qlen == 0)) {
 			struct htb_class *next;
 			htb_deactivate(q, cl);
 
@@ -873,12 +857,12 @@
 			goto next;
 		}
 
-		skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
+		skb = cl->leaf.q->dequeue(cl->leaf.q);
 		if (likely(skb != NULL))
 			break;
 
-		qdisc_warn_nonwc("htb", cl->un.leaf.q);
-		htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr:
+		qdisc_warn_nonwc("htb", cl->leaf.q);
+		htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
 					 &q->hlevel[0].hprio[prio].ptr);
 		cl = htb_lookup_leaf(hprio, prio);
 
@@ -886,16 +870,16 @@
 
 	if (likely(skb != NULL)) {
 		bstats_update(&cl->bstats, skb);
-		cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
-		if (cl->un.leaf.deficit[level] < 0) {
-			cl->un.leaf.deficit[level] += cl->quantum;
-			htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr :
+		cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
+		if (cl->leaf.deficit[level] < 0) {
+			cl->leaf.deficit[level] += cl->quantum;
+			htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
 						 &q->hlevel[0].hprio[prio].ptr);
 		}
 		/* this used to be after charge_class but this constelation
 		 * gives us slightly better performance
 		 */
-		if (!cl->un.leaf.q->q.qlen)
+		if (!cl->leaf.q->q.qlen)
 			htb_deactivate(q, cl);
 		htb_charge_class(q, cl, level, skb);
 	}
@@ -952,7 +936,6 @@
 				goto ok;
 		}
 	}
-	qdisc_qstats_overlimit(sch);
 	if (likely(next_event > q->now))
 		qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
 	else
@@ -972,10 +955,10 @@
 	for (i = 0; i < q->clhash.hashsize; i++) {
 		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
 			if (cl->level)
-				memset(&cl->un.inner, 0, sizeof(cl->un.inner));
+				memset(&cl->inner, 0, sizeof(cl->inner));
 			else {
-				if (cl->un.leaf.q)
-					qdisc_reset(cl->un.leaf.q);
+				if (cl->leaf.q)
+					qdisc_reset(cl->leaf.q);
 			}
 			cl->prio_activity = 0;
 			cl->cmode = HTB_CAN_SEND;
@@ -1027,7 +1010,8 @@
 	if (err)
 		return err;
 
-	err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
+					  NULL);
 	if (err < 0)
 		return err;
 
@@ -1062,6 +1046,7 @@
 	struct nlattr *nest;
 	struct tc_htb_glob gopt;
 
+	sch->qstats.overlimits = q->overlimits;
 	/* Its safe to not acquire qdisc lock. As we hold RTNL,
 	 * no change can happen on the qdisc parameters.
 	 */
@@ -1072,7 +1057,7 @@
 	gopt.defcls = q->defcls;
 	gopt.debug = 0;
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
 	if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
@@ -1098,10 +1083,10 @@
 	 */
 	tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
 	tcm->tcm_handle = cl->common.classid;
-	if (!cl->level && cl->un.leaf.q)
-		tcm->tcm_info = cl->un.leaf.q->handle;
+	if (!cl->level && cl->leaf.q)
+		tcm->tcm_info = cl->leaf.q->handle;
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
 
@@ -1142,10 +1127,9 @@
 	};
 	__u32 qlen = 0;
 
-	if (!cl->level && cl->un.leaf.q) {
-		qlen = cl->un.leaf.q->q.qlen;
-		qs.backlog = cl->un.leaf.q->qstats.backlog;
-	}
+	if (!cl->level && cl->leaf.q)
+		qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
+
 	cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
 				    INT_MIN, INT_MAX);
 	cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
@@ -1172,14 +1156,14 @@
 				     cl->common.classid, extack)) == NULL)
 		return -ENOBUFS;
 
-	*old = qdisc_replace(sch, new, &cl->un.leaf.q);
+	*old = qdisc_replace(sch, new, &cl->leaf.q);
 	return 0;
 }
 
 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
 {
 	struct htb_class *cl = (struct htb_class *)arg;
-	return !cl->level ? cl->un.leaf.q : NULL;
+	return !cl->level ? cl->leaf.q : NULL;
 }
 
 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
@@ -1205,15 +1189,15 @@
 {
 	struct htb_class *parent = cl->parent;
 
-	WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
+	WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
 
 	if (parent->cmode != HTB_CAN_SEND)
 		htb_safe_rb_erase(&parent->pq_node,
 				  &q->hlevel[parent->level].wait_pq);
 
 	parent->level = 0;
-	memset(&parent->un.inner, 0, sizeof(parent->un.inner));
-	parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
+	memset(&parent->inner, 0, sizeof(parent->inner));
+	parent->leaf.q = new_q ? new_q : &noop_qdisc;
 	parent->tokens = parent->buffer;
 	parent->ctokens = parent->cbuffer;
 	parent->t_c = ktime_get_ns();
@@ -1223,8 +1207,8 @@
 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 {
 	if (!cl->level) {
-		WARN_ON(!cl->un.leaf.q);
-		qdisc_destroy(cl->un.leaf.q);
+		WARN_ON(!cl->leaf.q);
+		qdisc_put(cl->leaf.q);
 	}
 	gen_kill_estimator(&cl->rate_est);
 	tcf_block_put(cl->block);
@@ -1285,13 +1269,8 @@
 
 	sch_tree_lock(sch);
 
-	if (!cl->level) {
-		unsigned int qlen = cl->un.leaf.q->q.qlen;
-		unsigned int backlog = cl->un.leaf.q->qstats.backlog;
-
-		qdisc_reset(cl->un.leaf.q);
-		qdisc_tree_reduce_backlog(cl->un.leaf.q, qlen, backlog);
-	}
+	if (!cl->level)
+		qdisc_purge_queue(cl->leaf.q);
 
 	/* delete from hash and active; remainder in destroy_class */
 	qdisc_class_hash_remove(&q->clhash, &cl->common);
@@ -1323,6 +1302,7 @@
 	struct htb_class *cl = (struct htb_class *)*arg, *parent;
 	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_HTB_MAX + 1];
+	struct Qdisc *parent_qdisc = NULL;
 	struct tc_htb_opt *hopt;
 	u64 rate64, ceil64;
 	int warn = 0;
@@ -1331,7 +1311,8 @@
 	if (!opt)
 		goto failure;
 
-	err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
+					  NULL);
 	if (err < 0)
 		goto failure;
 
@@ -1419,13 +1400,9 @@
 					  classid, NULL);
 		sch_tree_lock(sch);
 		if (parent && !parent->level) {
-			unsigned int qlen = parent->un.leaf.q->q.qlen;
-			unsigned int backlog = parent->un.leaf.q->qstats.backlog;
-
 			/* turn parent into inner node */
-			qdisc_reset(parent->un.leaf.q);
-			qdisc_tree_reduce_backlog(parent->un.leaf.q, qlen, backlog);
-			qdisc_destroy(parent->un.leaf.q);
+			qdisc_purge_queue(parent->leaf.q);
+			parent_qdisc = parent->leaf.q;
 			if (parent->prio_activity)
 				htb_deactivate(q, parent);
 
@@ -1436,10 +1413,10 @@
 			}
 			parent->level = (parent->parent ? parent->parent->level
 					 : TC_HTB_MAXDEPTH) - 1;
-			memset(&parent->un.inner, 0, sizeof(parent->un.inner));
+			memset(&parent->inner, 0, sizeof(parent->inner));
 		}
 		/* leaf (we) needs elementary qdisc */
-		cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
+		cl->leaf.q = new_q ? new_q : &noop_qdisc;
 
 		cl->common.classid = classid;
 		cl->parent = parent;
@@ -1455,8 +1432,8 @@
 		qdisc_class_hash_insert(&q->clhash, &cl->common);
 		if (parent)
 			parent->children++;
-		if (cl->un.leaf.q != &noop_qdisc)
-			qdisc_hash_add(cl->un.leaf.q, true);
+		if (cl->leaf.q != &noop_qdisc)
+			qdisc_hash_add(cl->leaf.q, true);
 	} else {
 		if (tca[TCA_RATE]) {
 			err = gen_replace_estimator(&cl->bstats, NULL,
@@ -1478,7 +1455,7 @@
 	psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
 
 	/* it used to be a nasty bug here, we have to check that node
-	 * is really leaf before changing cl->un.leaf !
+	 * is really leaf before changing cl->leaf !
 	 */
 	if (!cl->level) {
 		u64 quantum = cl->rate.rate_bytes_ps;
@@ -1504,6 +1481,7 @@
 	cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
 
 	sch_tree_unlock(sch);
+	qdisc_put(parent_qdisc);
 
 	if (warn)
 		pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",

diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index ce3f552..bf56aa5 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* net/sched/sch_ingress.c - Ingress and clsact qdisc
  *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
  * Authors:     Jamal Hadi Salim 1999
  */
 
@@ -87,7 +83,7 @@
 
 	mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
 
-	q->block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+	q->block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
 	q->block_info.chain_head_change = clsact_chain_head_change;
 	q->block_info.chain_head_change_priv = &q->miniqp;
 
@@ -106,7 +102,7 @@
 {
 	struct nlattr *nest;
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
 
@@ -118,6 +114,7 @@
 }
 
 static const struct Qdisc_class_ops ingress_class_ops = {
+	.flags		=	QDISC_CLASS_OPS_DOIT_UNLOCKED,
 	.leaf		=	ingress_leaf,
 	.find		=	ingress_find,
 	.walk		=	ingress_walk,
@@ -220,7 +217,7 @@
 
 	mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress);
 
-	q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+	q->ingress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
 	q->ingress_block_info.chain_head_change = clsact_chain_head_change;
 	q->ingress_block_info.chain_head_change_priv = &q->miniqp_ingress;
 
@@ -231,7 +228,7 @@
 
 	mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress);
 
-	q->egress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS;
+	q->egress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS;
 	q->egress_block_info.chain_head_change = clsact_chain_head_change;
 	q->egress_block_info.chain_head_change_priv = &q->miniqp_egress;
 
@@ -250,6 +247,7 @@
 }
 
 static const struct Qdisc_class_ops clsact_class_ops = {
+	.flags		=	QDISC_CLASS_OPS_DOIT_UNLOCKED,
 	.leaf		=	ingress_leaf,
 	.find		=	clsact_find,
 	.walk		=	ingress_walk,

diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index d6b8ae4..278c0b2 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * net/sched/sch_mq.c		Classful multiqueue dummy scheduler
  *
  * Copyright (c) 2009 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
  */
 
 #include <linux/types.h>
@@ -38,9 +35,8 @@
 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQ, &opt);
 }
 
-static void mq_offload_stats(struct Qdisc *sch)
+static int mq_offload_stats(struct Qdisc *sch)
 {
-	struct net_device *dev = qdisc_dev(sch);
 	struct tc_mq_qopt_offload opt = {
 		.command = TC_MQ_STATS,
 		.handle = sch->handle,
@@ -50,8 +46,7 @@
 		},
 	};
 
-	if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc)
-		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQ, &opt);
+	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_MQ, &opt);
 }
 
 static void mq_destroy(struct Qdisc *sch)
@@ -65,7 +60,7 @@
 	if (!priv->qdiscs)
 		return;
 	for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++)
-		qdisc_destroy(priv->qdiscs[ntx]);
+		qdisc_put(priv->qdiscs[ntx]);
 	kfree(priv->qdiscs);
 }
 
@@ -119,7 +114,7 @@
 		qdisc = priv->qdiscs[ntx];
 		old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
 		if (old)
-			qdisc_destroy(old);
+			qdisc_put(old);
 #ifdef CONFIG_NET_SCHED
 		if (ntx < dev->real_num_tx_queues)
 			qdisc_hash_add(qdisc, false);
@@ -171,9 +166,8 @@
 
 		spin_unlock_bh(qdisc_lock(qdisc));
 	}
-	mq_offload_stats(sch);
 
-	return 0;
+	return mq_offload_stats(sch);
 }
 
 static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl)
@@ -196,6 +190,7 @@
 		    struct Qdisc **old, struct netlink_ext_ack *extack)
 {
 	struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
+	struct tc_mq_qopt_offload graft_offload;
 	struct net_device *dev = qdisc_dev(sch);
 
 	if (dev->flags & IFF_UP)
@@ -206,6 +201,14 @@
 		new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 	if (dev->flags & IFF_UP)
 		dev_activate(dev);
+
+	graft_offload.handle = sch->handle;
+	graft_offload.graft_params.queue = cl - 1;
+	graft_offload.graft_params.child_handle = new ? new->handle : 0;
+	graft_offload.command = TC_MQ_GRAFT;
+
+	qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, *old,
+				   TC_SETUP_QDISC_MQ, &graft_offload, extack);
 	return 0;
 }
 
@@ -242,8 +245,9 @@
 	struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
 
 	sch = dev_queue->qdisc_sleeping;
-	if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
-	    gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0)
+	if (gnet_stats_copy_basic(&sch->running, d, sch->cpu_bstats,
+				  &sch->bstats) < 0 ||
+	    qdisc_qstats_copy(d, sch) < 0)
 		return -1;
 	return 0;
 }

diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 0e9d761..0d0113a 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * net/sched/sch_mqprio.c
  *
  * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
  */
 
 #include <linux/types.h>
@@ -40,7 +37,7 @@
 		for (ntx = 0;
 		     ntx < dev->num_tx_queues && priv->qdiscs[ntx];
 		     ntx++)
-			qdisc_destroy(priv->qdiscs[ntx]);
+			qdisc_put(priv->qdiscs[ntx]);
 		kfree(priv->qdiscs);
 	}
 
@@ -125,8 +122,9 @@
 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
 
 	if (nested_len >= nla_attr_size(0))
-		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
-				 nested_len, policy, NULL);
+		return nla_parse_deprecated(tb, maxtype,
+					    nla_data(nla) + NLA_ALIGN(len),
+					    nested_len, policy, NULL);
 
 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
 	return 0;
@@ -300,7 +298,7 @@
 		qdisc = priv->qdiscs[ntx];
 		old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
 		if (old)
-			qdisc_destroy(old);
+			qdisc_put(old);
 		if (ntx < dev->real_num_tx_queues)
 			qdisc_hash_add(qdisc, false);
 	}
@@ -349,7 +347,7 @@
 	int i;
 
 	if (priv->flags & TC_MQPRIO_F_MIN_RATE) {
-		nest = nla_nest_start(skb, TCA_MQPRIO_MIN_RATE64);
+		nest = nla_nest_start_noflag(skb, TCA_MQPRIO_MIN_RATE64);
 		if (!nest)
 			goto nla_put_failure;
 
@@ -363,7 +361,7 @@
 	}
 
 	if (priv->flags & TC_MQPRIO_F_MAX_RATE) {
-		nest = nla_nest_start(skb, TCA_MQPRIO_MAX_RATE64);
+		nest = nla_nest_start_noflag(skb, TCA_MQPRIO_MAX_RATE64);
 		if (!nest)
 			goto nla_put_failure;
 
@@ -559,10 +557,9 @@
 		struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
 
 		sch = dev_queue->qdisc_sleeping;
-		if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-					  d, NULL, &sch->bstats) < 0 ||
-		    gnet_stats_copy_queue(d, NULL,
-					  &sch->qstats, sch->q.qlen) < 0)
+		if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d,
+					  sch->cpu_bstats, &sch->bstats) < 0 ||
+		    qdisc_qstats_copy(d, sch) < 0)
 			return -1;
 	}
 	return 0;

diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 1da7ea8..1330ad2 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c

@@ -1,18 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008, Intel Corporation.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
  * Author: Alexander Duyck <alexander.h.duyck@intel.com>
  */
 
@@ -175,7 +164,7 @@
 
 	tcf_block_put(q->block);
 	for (band = 0; band < q->bands; band++)
-		qdisc_destroy(q->queues[band]);
+		qdisc_put(q->queues[band]);
 
 	kfree(q->queues);
 }
@@ -185,7 +174,8 @@
 {
 	struct multiq_sched_data *q = qdisc_priv(sch);
 	struct tc_multiq_qopt *qopt;
-	int i;
+	struct Qdisc **removed;
+	int i, n_removed = 0;
 
 	if (!netif_is_multiqueue(qdisc_dev(sch)))
 		return -EOPNOTSUPP;
@@ -196,20 +186,29 @@
 
 	qopt->bands = qdisc_dev(sch)->real_num_tx_queues;
 
+	removed = kmalloc(sizeof(*removed) * (q->max_bands - q->bands),
+			  GFP_KERNEL);
+	if (!removed)
+		return -ENOMEM;
+
 	sch_tree_lock(sch);
 	q->bands = qopt->bands;
 	for (i = q->bands; i < q->max_bands; i++) {
 		if (q->queues[i] != &noop_qdisc) {
 			struct Qdisc *child = q->queues[i];
+
 			q->queues[i] = &noop_qdisc;
-			qdisc_tree_reduce_backlog(child, child->q.qlen,
-						  child->qstats.backlog);
-			qdisc_destroy(child);
+			qdisc_purge_queue(child);
+			removed[n_removed++] = child;
 		}
 	}
 
 	sch_tree_unlock(sch);
 
+	for (i = 0; i < n_removed; i++)
+		qdisc_put(removed[i]);
+	kfree(removed);
+
 	for (i = 0; i < q->bands; i++) {
 		if (q->queues[i] == &noop_qdisc) {
 			struct Qdisc *child, *old;
@@ -224,13 +223,10 @@
 				if (child != &noop_qdisc)
 					qdisc_hash_add(child, true);
 
-				if (old != &noop_qdisc) {
-					qdisc_tree_reduce_backlog(old,
-								  old->q.qlen,
-								  old->qstats.backlog);
-					qdisc_destroy(old);
-				}
+				if (old != &noop_qdisc)
+					qdisc_purge_queue(old);
 				sch_tree_unlock(sch);
+				qdisc_put(old);
 			}
 		}
 	}
@@ -343,8 +339,8 @@
 
 	cl_q = q->queues[cl - 1];
 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-				  d, NULL, &cl_q->bstats) < 0 ||
-	    gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
+				  d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
+	    qdisc_qstats_copy(d, cl_q) < 0)
 		return -1;
 
 	return 0;

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 74c0f65..42e557d 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * net/sched/sch_netem.c	Network emulator
  *
- * 		This program is free software; you can redistribute it and/or
- * 		modify it under the terms of the GNU General Public License
- * 		as published by the Free Software Foundation; either version
- * 		2 of the License.
- *
  *  		Many of the algorithms and ideas for this came from
  *		NIST Net which is not copyrighted.
  *
@@ -77,6 +73,10 @@
 	/* internal t(ime)fifo qdisc uses t_root and sch->limit */
 	struct rb_root t_root;
 
+	/* a linear queue; reduces rbtree rebalancing when jitter is low */
+	struct sk_buff	*t_head;
+	struct sk_buff	*t_tail;
+
 	/* optional qdisc for classful handling (NULL at netem init) */
 	struct Qdisc	*qdisc;
 
@@ -369,26 +369,39 @@
 		rb_erase(&skb->rbnode, &q->t_root);
 		rtnl_kfree_skbs(skb, skb);
 	}
+
+	rtnl_kfree_skbs(q->t_head, q->t_tail);
+	q->t_head = NULL;
+	q->t_tail = NULL;
 }
 
 static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 	u64 tnext = netem_skb_cb(nskb)->time_to_send;
-	struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
 
-	while (*p) {
-		struct sk_buff *skb;
-
-		parent = *p;
-		skb = rb_to_skb(parent);
-		if (tnext >= netem_skb_cb(skb)->time_to_send)
-			p = &parent->rb_right;
+	if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) {
+		if (q->t_tail)
+			q->t_tail->next = nskb;
 		else
-			p = &parent->rb_left;
+			q->t_head = nskb;
+		q->t_tail = nskb;
+	} else {
+		struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
+
+		while (*p) {
+			struct sk_buff *skb;
+
+			parent = *p;
+			skb = rb_to_skb(parent);
+			if (tnext >= netem_skb_cb(skb)->time_to_send)
+				p = &parent->rb_right;
+			else
+				p = &parent->rb_left;
+		}
+		rb_link_node(&nskb->rbnode, parent, p);
+		rb_insert_color(&nskb->rbnode, &q->t_root);
 	}
-	rb_link_node(&nskb->rbnode, parent, p);
-	rb_insert_color(&nskb->rbnode, &q->t_root);
 	sch->q.qlen++;
 }
 
@@ -412,16 +425,6 @@
 	return segs;
 }
 
-static void netem_enqueue_skb_head(struct qdisc_skb_head *qh, struct sk_buff *skb)
-{
-	skb->next = qh->head;
-
-	if (!qh->head)
-		qh->tail = skb;
-	qh->head = skb;
-	qh->qlen++;
-}
-
 /*
  * Insert one skb into qdisc.
  * Note: parent depends on return value to account for queue length.
@@ -436,10 +439,10 @@
 	struct netem_skb_cb *cb;
 	struct sk_buff *skb2;
 	struct sk_buff *segs = NULL;
-	unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb);
-	int nb = 0;
+	unsigned int prev_len = qdisc_pkt_len(skb);
 	int count = 1;
 	int rc = NET_XMIT_SUCCESS;
+	int rc_drop = NET_XMIT_DROP;
 
 	/* Do not fool qdisc_drop_all() */
 	skb->prev = NULL;
@@ -473,12 +476,13 @@
 	 * skb will be queued.
 	 */
 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
-		struct Qdisc *rootq = qdisc_root(sch);
+		struct Qdisc *rootq = qdisc_root_bh(sch);
 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
 
 		q->duplicate = 0;
 		rootq->enqueue(skb2, rootq, to_free);
 		q->duplicate = dupsave;
+		rc_drop = NET_XMIT_SUCCESS;
 	}
 
 	/*
@@ -489,16 +493,14 @@
 	 */
 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
 		if (skb_is_gso(skb)) {
-			segs = netem_segment(skb, sch, to_free);
-			if (!segs)
-				return NET_XMIT_DROP;
-		} else {
-			segs = skb;
+			skb = netem_segment(skb, sch, to_free);
+			if (!skb)
+				return rc_drop;
+			segs = skb->next;
+			skb_mark_not_on_list(skb);
+			qdisc_skb_cb(skb)->pkt_len = skb->len;
 		}
 
-		skb = segs;
-		segs = segs->next;
-
 		skb = skb_unshare(skb, GFP_ATOMIC);
 		if (unlikely(!skb)) {
 			qdisc_qstats_drop(sch);
@@ -507,6 +509,7 @@
 		if (skb->ip_summed == CHECKSUM_PARTIAL &&
 		    skb_checksum_help(skb)) {
 			qdisc_drop(skb, sch, to_free);
+			skb = NULL;
 			goto finish_segs;
 		}
 
@@ -514,8 +517,12 @@
 			1<<(prandom_u32() % 8);
 	}
 
-	if (unlikely(sch->q.qlen >= sch->limit))
-		return qdisc_drop_all(skb, sch, to_free);
+	if (unlikely(sch->q.qlen >= sch->limit)) {
+		/* re-link segs, so that qdisc_drop_all() frees them all */
+		skb->next = segs;
+		qdisc_drop_all(skb, sch, to_free);
+		return rc_drop;
+	}
 
 	qdisc_qstats_backlog_inc(sch, skb);
 
@@ -543,9 +550,16 @@
 				t_skb = skb_rb_last(&q->t_root);
 				t_last = netem_skb_cb(t_skb);
 				if (!last ||
-				    t_last->time_to_send > last->time_to_send) {
+				    t_last->time_to_send > last->time_to_send)
 					last = t_last;
-				}
+			}
+			if (q->t_tail) {
+				struct netem_skb_cb *t_last =
+					netem_skb_cb(q->t_tail);
+
+				if (!last ||
+				    t_last->time_to_send > last->time_to_send)
+					last = t_last;
 			}
 
 			if (last) {
@@ -573,15 +587,21 @@
 		cb->time_to_send = ktime_get_ns();
 		q->counter = 0;
 
-		netem_enqueue_skb_head(&sch->q, skb);
+		__qdisc_enqueue_head(skb, &sch->q);
 		sch->qstats.requeues++;
 	}
 
 finish_segs:
 	if (segs) {
+		unsigned int len, last_len;
+		int nb;
+
+		len = skb ? skb->len : 0;
+		nb = skb ? 1 : 0;
+
 		while (segs) {
 			skb2 = segs->next;
-			segs->next = NULL;
+			skb_mark_not_on_list(segs);
 			qdisc_skb_cb(segs)->pkt_len = segs->len;
 			last_len = segs->len;
 			rc = qdisc_enqueue(segs, sch, to_free);
@@ -594,9 +614,10 @@
 			}
 			segs = skb2;
 		}
-		sch->q.qlen += nb;
-		if (nb > 1)
-			qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
+		/* Parent qdiscs accounted for 1 skb of size @prev_len */
+		qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len));
+	} else if (!skb) {
+		return NET_XMIT_DROP;
 	}
 	return NET_XMIT_SUCCESS;
 }
@@ -624,11 +645,38 @@
 	q->slot.bytes_left = q->slot_config.max_bytes;
 }
 
+static struct sk_buff *netem_peek(struct netem_sched_data *q)
+{
+	struct sk_buff *skb = skb_rb_first(&q->t_root);
+	u64 t1, t2;
+
+	if (!skb)
+		return q->t_head;
+	if (!q->t_head)
+		return skb;
+
+	t1 = netem_skb_cb(skb)->time_to_send;
+	t2 = netem_skb_cb(q->t_head)->time_to_send;
+	if (t1 < t2)
+		return skb;
+	return q->t_head;
+}
+
+static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
+{
+	if (skb == q->t_head) {
+		q->t_head = skb->next;
+		if (!q->t_head)
+			q->t_tail = NULL;
+	} else {
+		rb_erase(&skb->rbnode, &q->t_root);
+	}
+}
+
 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb;
-	struct rb_node *p;
 
 tfifo_dequeue:
 	skb = __qdisc_dequeue_head(&sch->q);
@@ -638,20 +686,18 @@
 		qdisc_bstats_update(sch, skb);
 		return skb;
 	}
-	p = rb_first(&q->t_root);
-	if (p) {
+	skb = netem_peek(q);
+	if (skb) {
 		u64 time_to_send;
 		u64 now = ktime_get_ns();
 
-		skb = rb_to_skb(p);
-
 		/* if more time remaining? */
 		time_to_send = netem_skb_cb(skb)->time_to_send;
 		if (q->slot.slot_next && q->slot.slot_next < time_to_send)
 			get_slot_next(q, now);
 
-		if (time_to_send <= now &&  q->slot.slot_next <= now) {
-			rb_erase(p, &q->t_root);
+		if (time_to_send <= now && q->slot.slot_next <= now) {
+			netem_erase_head(q, skb);
 			sch->q.qlen--;
 			qdisc_qstats_backlog_dec(sch, skb);
 			skb->next = NULL;
@@ -661,15 +707,6 @@
 			 */
 			skb->dev = qdisc_dev(sch);
 
-#ifdef CONFIG_NET_CLS_ACT
-			/*
-			 * If it's at ingress let's pretend the delay is
-			 * from the network (tstamp will be updated).
-			 */
-			if (skb->tc_redirected && skb->tc_from_ingress)
-				skb->tstamp = 0;
-#endif
-
 			if (q->slot.slot_next) {
 				q->slot.packets_left--;
 				q->slot.bytes_left -= qdisc_pkt_len(skb);
@@ -745,7 +782,7 @@
 	struct disttable *d;
 	int i;
 
-	if (n > NETEM_DIST_MAX)
+	if (!n || n > NETEM_DIST_MAX)
 		return -EINVAL;
 
 	d = kvmalloc(sizeof(struct disttable) + n * sizeof(s16), GFP_KERNEL);
@@ -901,8 +938,9 @@
 	}
 
 	if (nested_len >= nla_attr_size(0))
-		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
-				 nested_len, policy, NULL);
+		return nla_parse_deprecated(tb, maxtype,
+					    nla_data(nla) + NLA_ALIGN(len),
+					    nested_len, policy, NULL);
 
 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
 	return 0;
@@ -1035,7 +1073,7 @@
 
 	qdisc_watchdog_cancel(&q->watchdog);
 	if (q->qdisc)
-		qdisc_destroy(q->qdisc);
+		qdisc_put(q->qdisc);
 	dist_free(q->delay_dist);
 	dist_free(q->slot_dist);
 }
@@ -1045,7 +1083,7 @@
 {
 	struct nlattr *nest;
 
-	nest = nla_nest_start(skb, TCA_NETEM_LOSS);
+	nest = nla_nest_start_noflag(skb, TCA_NETEM_LOSS);
 	if (nest == NULL)
 		goto nla_put_failure;
 

diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 18d30bb..df98a88 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c

@@ -1,15 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (C) 2013 Cisco Systems, Inc, 2013.
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Author: Vijay Subramanian <vijaynsu@cisco.com>
  * Author: Mythili Prabhu <mysuryan@cisco.com>
  *
@@ -17,9 +8,7 @@
  * University of Oslo, Norway.
  *
  * References:
- * IETF draft submission: http://tools.ietf.org/html/draft-pan-aqm-pie-00
- * IEEE  Conference on High Performance Switching and Routing 2013 :
- * "PIE: A * Lightweight Control Scheme to Address the Bufferbloat Problem"
+ * RFC 8033: https://tools.ietf.org/html/rfc8033
  */
 
 #include <linux/module.h>
@@ -31,9 +20,9 @@
 #include <net/pkt_sched.h>
 #include <net/inet_ecn.h>
 
-#define QUEUE_THRESHOLD 10000
+#define QUEUE_THRESHOLD 16384
 #define DQCOUNT_INVALID -1
-#define MAX_PROB  0xffffffff
+#define MAX_PROB 0xffffffffffffffff
 #define PIE_SCALE 8
 
 /* parameters used */
@@ -49,14 +38,16 @@
 
 /* variables used */
 struct pie_vars {
-	u32 prob;		/* probability but scaled by u32 limit. */
+	u64 prob;		/* probability but scaled by u64 limit. */
 	psched_time_t burst_time;
 	psched_time_t qdelay;
 	psched_time_t qdelay_old;
 	u64 dq_count;		/* measured in bytes */
 	psched_time_t dq_tstamp;	/* drain rate */
+	u64 accu_prob;		/* accumulated drop probability */
 	u32 avg_dq_rate;	/* bytes per pschedtime tick,scaled */
 	u32 qlen_old;		/* in bytes */
+	u8 accu_prob_overflows;	/* overflows of accu_prob */
 };
 
 /* statistics gathering */
@@ -81,9 +72,9 @@
 {
 	params->alpha = 2;
 	params->beta = 20;
-	params->tupdate = usecs_to_jiffies(30 * USEC_PER_MSEC);	/* 30 ms */
+	params->tupdate = usecs_to_jiffies(15 * USEC_PER_MSEC);	/* 15 ms */
 	params->limit = 1000;	/* default of 1000 packets */
-	params->target = PSCHED_NS2TICKS(20 * NSEC_PER_MSEC);	/* 20 ms */
+	params->target = PSCHED_NS2TICKS(15 * NSEC_PER_MSEC);	/* 15 ms */
 	params->ecn = false;
 	params->bytemode = false;
 }
@@ -91,16 +82,18 @@
 static void pie_vars_init(struct pie_vars *vars)
 {
 	vars->dq_count = DQCOUNT_INVALID;
+	vars->accu_prob = 0;
 	vars->avg_dq_rate = 0;
-	/* default of 100 ms in pschedtime */
-	vars->burst_time = PSCHED_NS2TICKS(100 * NSEC_PER_MSEC);
+	/* default of 150 ms in pschedtime */
+	vars->burst_time = PSCHED_NS2TICKS(150 * NSEC_PER_MSEC);
+	vars->accu_prob_overflows = 0;
 }
 
 static bool drop_early(struct Qdisc *sch, u32 packet_size)
 {
 	struct pie_sched_data *q = qdisc_priv(sch);
-	u32 rnd;
-	u32 local_prob = q->vars.prob;
+	u64 rnd;
+	u64 local_prob = q->vars.prob;
 	u32 mtu = psched_mtu(qdisc_dev(sch));
 
 	/* If there is still burst allowance left skip random early drop */
@@ -110,8 +103,8 @@
 	/* If current delay is less than half of target, and
 	 * if drop prob is low already, disable early_drop
 	 */
-	if ((q->vars.qdelay < q->params.target / 2)
-	    && (q->vars.prob < MAX_PROB / 5))
+	if ((q->vars.qdelay < q->params.target / 2) &&
+	    (q->vars.prob < MAX_PROB / 5))
 		return false;
 
 	/* If we have fewer than 2 mtu-sized packets, disable drop_early,
@@ -124,14 +117,34 @@
 	 * probablity. Smaller packets will have lower drop prob in this case
 	 */
 	if (q->params.bytemode && packet_size <= mtu)
-		local_prob = (local_prob / mtu) * packet_size;
+		local_prob = (u64)packet_size * div_u64(local_prob, mtu);
 	else
 		local_prob = q->vars.prob;
 
-	rnd = prandom_u32();
-	if (rnd < local_prob)
+	if (local_prob == 0) {
+		q->vars.accu_prob = 0;
+		q->vars.accu_prob_overflows = 0;
+	}
+
+	if (local_prob > MAX_PROB - q->vars.accu_prob)
+		q->vars.accu_prob_overflows++;
+
+	q->vars.accu_prob += local_prob;
+
+	if (q->vars.accu_prob_overflows == 0 &&
+	    q->vars.accu_prob < (MAX_PROB / 100) * 85)
+		return false;
+	if (q->vars.accu_prob_overflows == 8 &&
+	    q->vars.accu_prob >= MAX_PROB / 2)
 		return true;
 
+	prandom_bytes(&rnd, 8);
+	if (rnd < local_prob) {
+		q->vars.accu_prob = 0;
+		q->vars.accu_prob_overflows = 0;
+		return true;
+	}
+
 	return false;
 }
 
@@ -168,6 +181,8 @@
 
 out:
 	q->stats.dropped++;
+	q->vars.accu_prob = 0;
+	q->vars.accu_prob_overflows = 0;
 	return qdisc_drop(skb, sch, to_free);
 }
 
@@ -192,7 +207,8 @@
 	if (!opt)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_PIE_MAX, opt, pie_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_PIE_MAX, opt, pie_policy,
+					  NULL);
 	if (err < 0)
 		return err;
 
@@ -209,7 +225,8 @@
 
 	/* tupdate is in jiffies */
 	if (tb[TCA_PIE_TUPDATE])
-		q->params.tupdate = usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE]));
+		q->params.tupdate =
+			usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE]));
 
 	if (tb[TCA_PIE_LIMIT]) {
 		u32 limit = nla_get_u32(tb[TCA_PIE_LIMIT]);
@@ -247,7 +264,6 @@
 
 static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
 {
-
 	struct pie_sched_data *q = qdisc_priv(sch);
 	int qlen = sch->qstats.backlog;	/* current queue size in bytes */
 
@@ -294,9 +310,9 @@
 			 * dq_count to 0 to re-enter the if block when the next
 			 * packet is dequeued
 			 */
-			if (qlen < QUEUE_THRESHOLD)
+			if (qlen < QUEUE_THRESHOLD) {
 				q->vars.dq_count = DQCOUNT_INVALID;
-			else {
+			} else {
 				q->vars.dq_count = 0;
 				q->vars.dq_tstamp = psched_get_time();
 			}
@@ -317,9 +333,10 @@
 	u32 qlen = sch->qstats.backlog;	/* queue size in bytes */
 	psched_time_t qdelay = 0;	/* in pschedtime */
 	psched_time_t qdelay_old = q->vars.qdelay;	/* in pschedtime */
-	s32 delta = 0;		/* determines the change in probability */
-	u32 oldprob;
-	u32 alpha, beta;
+	s64 delta = 0;		/* determines the change in probability */
+	u64 oldprob;
+	u64 alpha, beta;
+	u32 power;
 	bool update_prob = true;
 
 	q->vars.qdelay_old = q->vars.qdelay;
@@ -339,38 +356,36 @@
 	 * value for alpha as 0.125. In this implementation, we use values 0-32
 	 * passed from user space to represent this. Also, alpha and beta have
 	 * unit of HZ and need to be scaled before they can used to update
-	 * probability. alpha/beta are updated locally below by 1) scaling them
-	 * appropriately 2) scaling down by 16 to come to 0-2 range.
-	 * Please see paper for details.
-	 *
-	 * We scale alpha and beta differently depending on whether we are in
-	 * light, medium or high dropping mode.
+	 * probability. alpha/beta are updated locally below by scaling down
+	 * by 16 to come to 0-2 range.
 	 */
-	if (q->vars.prob < MAX_PROB / 100) {
-		alpha =
-		    (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
-		beta =
-		    (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
-	} else if (q->vars.prob < MAX_PROB / 10) {
-		alpha =
-		    (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
-		beta =
-		    (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
-	} else {
-		alpha =
-		    (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
-		beta =
-		    (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+	alpha = ((u64)q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+	beta = ((u64)q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+
+	/* We scale alpha and beta differently depending on how heavy the
+	 * congestion is. Please see RFC 8033 for details.
+	 */
+	if (q->vars.prob < MAX_PROB / 10) {
+		alpha >>= 1;
+		beta >>= 1;
+
+		power = 100;
+		while (q->vars.prob < div_u64(MAX_PROB, power) &&
+		       power <= 1000000) {
+			alpha >>= 2;
+			beta >>= 2;
+			power *= 10;
+		}
 	}
 
 	/* alpha and beta should be between 0 and 32, in multiples of 1/16 */
-	delta += alpha * ((qdelay - q->params.target));
-	delta += beta * ((qdelay - qdelay_old));
+	delta += alpha * (u64)(qdelay - q->params.target);
+	delta += beta * (u64)(qdelay - qdelay_old);
 
 	oldprob = q->vars.prob;
 
 	/* to ensure we increase probability in steps of no more than 2% */
-	if (delta > (s32) (MAX_PROB / (100 / 2)) &&
+	if (delta > (s64)(MAX_PROB / (100 / 2)) &&
 	    q->vars.prob >= MAX_PROB / 10)
 		delta = (MAX_PROB / 100) * 2;
 
@@ -405,8 +420,9 @@
 	 * delay is 0 for 2 consecutive Tupdate periods.
 	 */
 
-	if ((qdelay == 0) && (qdelay_old == 0) && update_prob)
-		q->vars.prob = (q->vars.prob * 98) / 100;
+	if (qdelay == 0 && qdelay_old == 0 && update_prob)
+		/* Reduce drop probability to 98.4% */
+		q->vars.prob -= q->vars.prob / 64u;
 
 	q->vars.qdelay = qdelay;
 	q->vars.qlen_old = qlen;
@@ -419,8 +435,8 @@
 	 */
 	if ((q->vars.qdelay < q->params.target / 2) &&
 	    (q->vars.qdelay_old < q->params.target / 2) &&
-	    (q->vars.prob == 0) &&
-	    (q->vars.avg_dq_rate > 0))
+	    q->vars.prob == 0 &&
+	    q->vars.avg_dq_rate > 0)
 		pie_vars_init(&q->vars);
 }
 
@@ -437,7 +453,6 @@
 	if (q->params.tupdate)
 		mod_timer(&q->adapt_timer, jiffies + q->params.tupdate);
 	spin_unlock(root_lock);
-
 }
 
 static int pie_init(struct Qdisc *sch, struct nlattr *opt,
@@ -468,16 +483,17 @@
 	struct pie_sched_data *q = qdisc_priv(sch);
 	struct nlattr *opts;
 
-	opts = nla_nest_start(skb, TCA_OPTIONS);
-	if (opts == NULL)
+	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
+	if (!opts)
 		goto nla_put_failure;
 
 	/* convert target from pschedtime to us */
 	if (nla_put_u32(skb, TCA_PIE_TARGET,
-			((u32) PSCHED_TICKS2NS(q->params.target)) /
+			((u32)PSCHED_TICKS2NS(q->params.target)) /
 			NSEC_PER_USEC) ||
 	    nla_put_u32(skb, TCA_PIE_LIMIT, sch->limit) ||
-	    nla_put_u32(skb, TCA_PIE_TUPDATE, jiffies_to_usecs(q->params.tupdate)) ||
+	    nla_put_u32(skb, TCA_PIE_TUPDATE,
+			jiffies_to_usecs(q->params.tupdate)) ||
 	    nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) ||
 	    nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) ||
 	    nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) ||
@@ -489,7 +505,6 @@
 nla_put_failure:
 	nla_nest_cancel(skb, opts);
 	return -1;
-
 }
 
 static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
@@ -497,7 +512,7 @@
 	struct pie_sched_data *q = qdisc_priv(sch);
 	struct tc_pie_xstats st = {
 		.prob		= q->vars.prob,
-		.delay		= ((u32) PSCHED_TICKS2NS(q->vars.qdelay)) /
+		.delay		= ((u32)PSCHED_TICKS2NS(q->vars.qdelay)) /
 				   NSEC_PER_USEC,
 		/* unscale and return dq_rate in bytes per sec */
 		.avg_dq_rate	= q->vars.avg_dq_rate *
@@ -514,8 +529,7 @@
 
 static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch)
 {
-	struct sk_buff *skb;
-	skb = qdisc_dequeue_head(sch);
+	struct sk_buff *skb = qdisc_dequeue_head(sch);
 
 	if (!skb)
 		return NULL;
@@ -527,6 +541,7 @@
 static void pie_reset(struct Qdisc *sch)
 {
 	struct pie_sched_data *q = qdisc_priv(sch);
+
 	qdisc_reset_queue(sch);
 	pie_vars_init(&q->vars);
 }
@@ -534,6 +549,7 @@
 static void pie_destroy(struct Qdisc *sch)
 {
 	struct pie_sched_data *q = qdisc_priv(sch);
+
 	q->params.tupdate = 0;
 	del_timer_sync(&q->adapt_timer);
 }

diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
index 5619d2e..cbc2ebc 100644
--- a/net/sched/sch_plug.c
+++ b/net/sched/sch_plug.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * sch_plug.c Queue traffic until an explicit release command
  *
- *             This program is free software; you can redistribute it and/or
- *             modify it under the terms of the GNU General Public License
- *             as published by the Free Software Foundation; either version
- *             2 of the License, or (at your option) any later version.
- *
  * There are two ways to use this qdisc:
  * 1. A simple "instantaneous" plug/unplug operation, by issuing an alternating
  *    sequence of TCQ_PLUG_BUFFER & TCQ_PLUG_RELEASE_INDEFINITE commands.

diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 222e53d..18b884c 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/sch_prio.c	Simple 3-band priority "scheduler".
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  * Fixes:       19990609: J Hadi Salim <hadi@nortelnetworks.com>:
  *              Init --  EINVAL when opt undefined
@@ -72,6 +68,7 @@
 static int
 prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
 {
+	unsigned int len = qdisc_pkt_len(skb);
 	struct Qdisc *qdisc;
 	int ret;
 
@@ -88,7 +85,7 @@
 
 	ret = qdisc_enqueue(skb, qdisc, to_free);
 	if (ret == NET_XMIT_SUCCESS) {
-		qdisc_qstats_backlog_inc(sch, skb);
+		sch->qstats.backlog += len;
 		sch->q.qlen++;
 		return NET_XMIT_SUCCESS;
 	}
@@ -175,7 +172,7 @@
 	tcf_block_put(q->block);
 	prio_offload(sch, NULL);
 	for (prio = 0; prio < q->bands; prio++)
-		qdisc_destroy(q->queues[prio]);
+		qdisc_put(q->queues[prio]);
 }
 
 static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
@@ -205,7 +202,7 @@
 					      extack);
 		if (!queues[i]) {
 			while (i > oldbands)
-				qdisc_destroy(queues[--i]);
+				qdisc_put(queues[--i]);
 			return -ENOMEM;
 		}
 	}
@@ -215,13 +212,8 @@
 	q->bands = qopt->bands;
 	memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
 
-	for (i = q->bands; i < oldbands; i++) {
-		struct Qdisc *child = q->queues[i];
-
-		qdisc_tree_reduce_backlog(child, child->q.qlen,
-					  child->qstats.backlog);
-		qdisc_destroy(child);
-	}
+	for (i = q->bands; i < oldbands; i++)
+		qdisc_tree_flush_backlog(q->queues[i]);
 
 	for (i = oldbands; i < q->bands; i++) {
 		q->queues[i] = queues[i];
@@ -230,6 +222,9 @@
 	}
 
 	sch_tree_unlock(sch);
+
+	for (i = q->bands; i < oldbands; i++)
+		qdisc_put(q->queues[i]);
 	return 0;
 }
 
@@ -251,7 +246,6 @@
 
 static int prio_dump_offload(struct Qdisc *sch)
 {
-	struct net_device *dev = qdisc_dev(sch);
 	struct tc_prio_qopt_offload hw_stats = {
 		.command = TC_PRIO_STATS,
 		.handle = sch->handle,
@@ -263,21 +257,8 @@
 			},
 		},
 	};
-	int err;
 
-	sch->flags &= ~TCQ_F_OFFLOADED;
-	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
-		return 0;
-
-	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO,
-					    &hw_stats);
-	if (err == -EOPNOTSUPP)
-		return 0;
-
-	if (!err)
-		sch->flags |= TCQ_F_OFFLOADED;
-
-	return err;
+	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_PRIO, &hw_stats);
 }
 
 static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -309,43 +290,22 @@
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
 	struct tc_prio_qopt_offload graft_offload;
-	struct net_device *dev = qdisc_dev(sch);
 	unsigned long band = arg - 1;
-	bool any_qdisc_is_offloaded;
-	int err;
 
 	if (new == NULL)
 		new = &noop_qdisc;
 
 	*old = qdisc_replace(sch, new, &q->queues[band]);
 
-	if (!tc_can_offload(dev))
-		return 0;
-
 	graft_offload.handle = sch->handle;
 	graft_offload.parent = sch->parent;
 	graft_offload.graft_params.band = band;
 	graft_offload.graft_params.child_handle = new->handle;
 	graft_offload.command = TC_PRIO_GRAFT;
 
-	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO,
-					    &graft_offload);
-
-	/* Don't report error if the graft is part of destroy operation. */
-	if (err && new != &noop_qdisc) {
-		/* Don't report error if the parent, the old child and the new
-		 * one are not offloaded.
-		 */
-		any_qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
-		any_qdisc_is_offloaded |= new->flags & TCQ_F_OFFLOADED;
-		if (*old)
-			any_qdisc_is_offloaded |= (*old)->flags &
-						   TCQ_F_OFFLOADED;
-
-		if (any_qdisc_is_offloaded)
-			NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
-	}
-
+	qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, *old,
+				   TC_SETUP_QDISC_PRIO, &graft_offload,
+				   extack);
 	return 0;
 }
 
@@ -396,8 +356,8 @@
 
 	cl_q = q->queues[cl - 1];
 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-				  d, NULL, &cl_q->bstats) < 0 ||
-	    gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
+				  d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
+	    qdisc_qstats_copy(d, cl_q) < 0)
 		return -1;
 
 	return 0;

diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index bb1a9c1..0b05ac7 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * net/sched/sch_qfq.c         Quick Fair Queueing Plus Scheduler.
  *
  * Copyright (c) 2009 Fabio Checconi, Luigi Rizzo, and Paolo Valente.
  * Copyright (c) 2012 Paolo Valente.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -217,15 +214,6 @@
 	return container_of(clc, struct qfq_class, common);
 }
 
-static void qfq_purge_queue(struct qfq_class *cl)
-{
-	unsigned int len = cl->qdisc->q.qlen;
-	unsigned int backlog = cl->qdisc->qstats.backlog;
-
-	qdisc_reset(cl->qdisc);
-	qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
-}
-
 static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
 	[TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
 	[TCA_QFQ_LMAX] = { .type = NLA_U32 },
@@ -419,8 +407,8 @@
 		return -EINVAL;
 	}
 
-	err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy,
-			       NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS],
+					  qfq_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -526,7 +514,7 @@
 	return 0;
 
 destroy_class:
-	qdisc_destroy(cl->qdisc);
+	qdisc_put(cl->qdisc);
 	kfree(cl);
 	return err;
 }
@@ -537,7 +525,7 @@
 
 	qfq_rm_from_agg(q, cl);
 	gen_kill_estimator(&cl->rate_est);
-	qdisc_destroy(cl->qdisc);
+	qdisc_put(cl->qdisc);
 	kfree(cl);
 }
 
@@ -551,7 +539,7 @@
 
 	sch_tree_lock(sch);
 
-	qfq_purge_queue(cl);
+	qdisc_purge_queue(cl->qdisc);
 	qdisc_class_hash_remove(&q->clhash, &cl->common);
 
 	sch_tree_unlock(sch);
@@ -628,7 +616,7 @@
 	tcm->tcm_handle	= cl->common.classid;
 	tcm->tcm_info	= cl->qdisc->handle;
 
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
 	if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) ||
@@ -655,8 +643,7 @@
 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
 				  d, NULL, &cl->bstats) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
-	    gnet_stats_copy_queue(d, NULL,
-				  &cl->qdisc->qstats, cl->qdisc->q.qlen) < 0)
+	    qdisc_qstats_copy(d, cl->qdisc) < 0)
 		return -1;
 
 	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
@@ -1210,10 +1197,12 @@
 static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		       struct sk_buff **to_free)
 {
+	unsigned int len = qdisc_pkt_len(skb), gso_segs;
 	struct qfq_sched *q = qdisc_priv(sch);
 	struct qfq_class *cl;
 	struct qfq_aggregate *agg;
 	int err = 0;
+	bool first;
 
 	cl = qfq_classify(skb, sch, &err);
 	if (cl == NULL) {
@@ -1224,17 +1213,18 @@
 	}
 	pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
 
-	if (unlikely(cl->agg->lmax < qdisc_pkt_len(skb))) {
+	if (unlikely(cl->agg->lmax < len)) {
 		pr_debug("qfq: increasing maxpkt from %u to %u for class %u",
-			 cl->agg->lmax, qdisc_pkt_len(skb), cl->common.classid);
-		err = qfq_change_agg(sch, cl, cl->agg->class_weight,
-				     qdisc_pkt_len(skb));
+			 cl->agg->lmax, len, cl->common.classid);
+		err = qfq_change_agg(sch, cl, cl->agg->class_weight, len);
 		if (err) {
 			cl->qstats.drops++;
 			return qdisc_drop(skb, sch, to_free);
 		}
 	}
 
+	gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
+	first = !cl->qdisc->q.qlen;
 	err = qdisc_enqueue(skb, cl->qdisc, to_free);
 	if (unlikely(err != NET_XMIT_SUCCESS)) {
 		pr_debug("qfq_enqueue: enqueue failed %d\n", err);
@@ -1245,16 +1235,17 @@
 		return err;
 	}
 
-	bstats_update(&cl->bstats, skb);
-	qdisc_qstats_backlog_inc(sch, skb);
+	cl->bstats.bytes += len;
+	cl->bstats.packets += gso_segs;
+	sch->qstats.backlog += len;
 	++sch->q.qlen;
 
 	agg = cl->agg;
 	/* if the queue was not empty, then done here */
-	if (cl->qdisc->q.qlen != 1) {
+	if (!first) {
 		if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) &&
 		    list_first_entry(&agg->active, struct qfq_class, alist)
-		    == cl && cl->deficit < qdisc_pkt_len(skb))
+		    == cl && cl->deficit < len)
 			list_move_tail(&cl->alist, &agg->active);
 
 		return err;

diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 56c181c..1695421 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/sch_red.c	Random Early Detection queue.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  *
  * Changes:
@@ -166,7 +162,9 @@
 		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
 		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
 		opt.set.probability = q->parms.max_P;
+		opt.set.limit = q->limit;
 		opt.set.is_ecn = red_use_ecn(q);
+		opt.set.is_harddrop = red_use_harddrop(q);
 		opt.set.qstats = &sch->qstats;
 	} else {
 		opt.command = TC_RED_DESTROY;
@@ -181,7 +179,7 @@
 
 	del_timer_sync(&q->adapt_timer);
 	red_offload(sch, false);
-	qdisc_destroy(q->qdisc);
+	qdisc_put(q->qdisc);
 }
 
 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
@@ -193,17 +191,18 @@
 static int red_change(struct Qdisc *sch, struct nlattr *opt,
 		      struct netlink_ext_ack *extack)
 {
+	struct Qdisc *old_child = NULL, *child = NULL;
 	struct red_sched_data *q = qdisc_priv(sch);
 	struct nlattr *tb[TCA_RED_MAX + 1];
 	struct tc_red_qopt *ctl;
-	struct Qdisc *child = NULL;
 	int err;
 	u32 max_P;
 
 	if (opt == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
+					  NULL);
 	if (err < 0)
 		return err;
 
@@ -231,9 +230,8 @@
 	q->flags = ctl->flags;
 	q->limit = ctl->limit;
 	if (child) {
-		qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
-					  q->qdisc->qstats.backlog);
-		qdisc_destroy(q->qdisc);
+		qdisc_tree_flush_backlog(q->qdisc);
+		old_child = q->qdisc;
 		q->qdisc = child;
 	}
 
@@ -252,7 +250,11 @@
 		red_start_of_idle_period(&q->vars);
 
 	sch_tree_unlock(sch);
+
 	red_offload(sch, true);
+
+	if (old_child)
+		qdisc_put(old_child);
 	return 0;
 }
 
@@ -279,9 +281,8 @@
 	return red_change(sch, opt, extack);
 }
 
-static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
+static int red_dump_offload_stats(struct Qdisc *sch)
 {
-	struct net_device *dev = qdisc_dev(sch);
 	struct tc_red_qopt_offload hw_stats = {
 		.command = TC_RED_STATS,
 		.handle = sch->handle,
@@ -291,22 +292,8 @@
 			.stats.qstats = &sch->qstats,
 		},
 	};
-	int err;
 
-	sch->flags &= ~TCQ_F_OFFLOADED;
-
-	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
-		return 0;
-
-	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
-					    &hw_stats);
-	if (err == -EOPNOTSUPP)
-		return 0;
-
-	if (!err)
-		sch->flags |= TCQ_F_OFFLOADED;
-
-	return err;
+	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
 }
 
 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -324,11 +311,11 @@
 	};
 	int err;
 
-	err = red_dump_offload_stats(sch, &opt);
+	err = red_dump_offload_stats(sch);
 	if (err)
 		goto nla_put_failure;
 
-	opts = nla_nest_start(skb, TCA_OPTIONS);
+	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (opts == NULL)
 		goto nla_put_failure;
 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
@@ -377,6 +364,21 @@
 	return 0;
 }
 
+static void red_graft_offload(struct Qdisc *sch,
+			      struct Qdisc *new, struct Qdisc *old,
+			      struct netlink_ext_ack *extack)
+{
+	struct tc_red_qopt_offload graft_offload = {
+		.handle		= sch->handle,
+		.parent		= sch->parent,
+		.child_handle	= new->handle,
+		.command	= TC_RED_GRAFT,
+	};
+
+	qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
+				   TC_SETUP_QDISC_RED, &graft_offload, extack);
+}
+
 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 		     struct Qdisc **old, struct netlink_ext_ack *extack)
 {
@@ -386,6 +388,8 @@
 		new = &noop_qdisc;
 
 	*old = qdisc_replace(sch, new, &q->qdisc);
+
+	red_graft_offload(sch, new, *old, extack);
 	return 0;
 }
 

diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 7cbdad8..4074c50 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c

@@ -1,19 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * net/sched/sch_sfb.c	  Stochastic Fair Blue
  *
  * Copyright (c) 2008-2011 Juliusz Chroboczek <jch@pps.jussieu.fr>
  * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
  * W. Feng, D. Kandlur, D. Saha, K. Shin. Blue:
  * A New Class of Active Queue Management Algorithms.
  * U. Michigan CSE-TR-387-99, April 1999.
  *
  * http://www.thefengs.com/wuchang/blue/CSE-TR-387-99.pdf
- *
  */
 
 #include <linux/module.h>
@@ -22,7 +18,7 @@
 #include <linux/errno.h>
 #include <linux/skbuff.h>
 #include <linux/random.h>
-#include <linux/jhash.h>
+#include <linux/siphash.h>
 #include <net/ip.h>
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
@@ -49,7 +45,7 @@
  * (Section 4.4 of SFB reference : moving hash functions)
  */
 struct sfb_bins {
-	u32		  perturbation; /* jhash perturbation */
+	siphash_key_t	  perturbation; /* siphash key */
 	struct sfb_bucket bins[SFB_LEVELS][SFB_NUMBUCKETS];
 };
 
@@ -221,7 +217,8 @@
 
 static void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q)
 {
-	q->bins[slot].perturbation = prandom_u32();
+	get_random_bytes(&q->bins[slot].perturbation,
+			 sizeof(q->bins[slot].perturbation));
 }
 
 static void sfb_swap_slot(struct sfb_sched_data *q)
@@ -318,9 +315,9 @@
 		/* If using external classifiers, get result and record it. */
 		if (!sfb_classify(skb, fl, &ret, &salt))
 			goto other_drop;
-		sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
+		sfbhash = siphash_1u32(salt, &q->bins[slot].perturbation);
 	} else {
-		sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation);
+		sfbhash = skb_get_hash_perturb(skb, &q->bins[slot].perturbation);
 	}
 
 
@@ -356,7 +353,7 @@
 		/* Inelastic flow */
 		if (q->double_buffering) {
 			sfbhash = skb_get_hash_perturb(skb,
-			    q->bins[slot].perturbation);
+			    &q->bins[slot].perturbation);
 			if (!sfbhash)
 				sfbhash = 1;
 			sfb_skb_cb(skb)->hashes[slot] = sfbhash;
@@ -469,7 +466,7 @@
 	struct sfb_sched_data *q = qdisc_priv(sch);
 
 	tcf_block_put(q->block);
-	qdisc_destroy(q->qdisc);
+	qdisc_put(q->qdisc);
 }
 
 static const struct nla_policy sfb_policy[TCA_SFB_MAX + 1] = {
@@ -492,14 +489,15 @@
 		      struct netlink_ext_ack *extack)
 {
 	struct sfb_sched_data *q = qdisc_priv(sch);
-	struct Qdisc *child;
+	struct Qdisc *child, *old;
 	struct nlattr *tb[TCA_SFB_MAX + 1];
 	const struct tc_sfb_qopt *ctl = &sfb_default_ops;
 	u32 limit;
 	int err;
 
 	if (opt) {
-		err = nla_parse_nested(tb, TCA_SFB_MAX, opt, sfb_policy, NULL);
+		err = nla_parse_nested_deprecated(tb, TCA_SFB_MAX, opt,
+						  sfb_policy, NULL);
 		if (err < 0)
 			return -EINVAL;
 
@@ -521,9 +519,8 @@
 		qdisc_hash_add(child, true);
 	sch_tree_lock(sch);
 
-	qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
-				  q->qdisc->qstats.backlog);
-	qdisc_destroy(q->qdisc);
+	qdisc_purge_queue(q->qdisc);
+	old = q->qdisc;
 	q->qdisc = child;
 
 	q->rehash_interval = msecs_to_jiffies(ctl->rehash_interval);
@@ -546,6 +543,7 @@
 	sfb_init_perturbation(1, q);
 
 	sch_tree_unlock(sch);
+	qdisc_put(old);
 
 	return 0;
 }
@@ -581,7 +579,7 @@
 	};
 
 	sch->qstats.backlog = q->qdisc->qstats.backlog;
-	opts = nla_nest_start(skb, TCA_OPTIONS);
+	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (opts == NULL)
 		goto nla_put_failure;
 	if (nla_put(skb, TCA_SFB_PARMS, sizeof(opt), &opt))

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 2f26781..c787d4d 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/sch_sfq.c	Stochastic Fairness Queueing discipline.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  */
 
@@ -18,7 +14,7 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/skbuff.h>
-#include <linux/jhash.h>
+#include <linux/siphash.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <net/netlink.h>
@@ -121,7 +117,7 @@
 	u8		headdrop;
 	u8		maxdepth;	/* limit of packets per flow */
 
-	u32		perturbation;
+	siphash_key_t 	perturbation;
 	u8		cur_depth;	/* depth of longest slot */
 	u8		flags;
 	unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
@@ -161,7 +157,7 @@
 static unsigned int sfq_hash(const struct sfq_sched_data *q,
 			     const struct sk_buff *skb)
 {
-	return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1);
+	return skb_get_hash_perturb(skb, &q->perturbation) & (q->divisor - 1);
 }
 
 static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
@@ -611,9 +607,11 @@
 	struct sfq_sched_data *q = from_timer(q, t, perturb_timer);
 	struct Qdisc *sch = q->sch;
 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+	siphash_key_t nkey;
 
+	get_random_bytes(&nkey, sizeof(nkey));
 	spin_lock(root_lock);
-	q->perturbation = prandom_u32();
+	q->perturbation = nkey;
 	if (!q->filter_list && q->tail)
 		sfq_rehash(sch);
 	spin_unlock(root_lock);
@@ -692,7 +690,7 @@
 	del_timer(&q->perturb_timer);
 	if (q->perturb_period) {
 		mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
-		q->perturbation = prandom_u32();
+		get_random_bytes(&q->perturbation, sizeof(q->perturbation));
 	}
 	sch_tree_unlock(sch);
 	kfree(p);
@@ -749,7 +747,7 @@
 	q->quantum = psched_mtu(qdisc_dev(sch));
 	q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
 	q->perturb_period = 0;
-	q->perturbation = prandom_u32();
+	get_random_bytes(&q->perturbation, sizeof(q->perturbation));
 
 	if (opt) {
 		int err = sfq_change(sch, opt);
@@ -828,8 +826,6 @@
 static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
 			      u32 classid)
 {
-	/* we cannot bypass queue discipline anymore */
-	sch->flags &= ~TCQ_F_CAN_BYPASS;
 	return 0;
 }
 

diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
index 52c0b6d..0fb10ab 100644
--- a/net/sched/sch_skbprio.c
+++ b/net/sched/sch_skbprio.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/sch_skbprio.c  SKB Priority Queue.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Nishanth Devarajan, <ndev2021@gmail.com>
  *		Cody Doucette, <doucette@bu.edu>
  *	        original idea by Michel Machado, Cody Doucette, and Qiaobin Fu

diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
new file mode 100644
index 0000000..c609373
--- /dev/null
+++ b/net/sched/sch_taprio.c

@@ -0,0 +1,1933 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* net/sched/sch_taprio.c	 Time Aware Priority Scheduler
+ *
+ * Authors:	Vinicius Costa Gomes <vinicius.gomes@intel.com>
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/rcupdate.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+#include <net/sch_generic.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+
+static LIST_HEAD(taprio_list);
+static DEFINE_SPINLOCK(taprio_list_lock);
+
+#define TAPRIO_ALL_GATES_OPEN -1
+
+#define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)
+#define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
+
+struct sched_entry {
+	struct list_head list;
+
+	/* The instant that this entry "closes" and the next one
+	 * should open, the qdisc will make some effort so that no
+	 * packet leaves after this time.
+	 */
+	ktime_t close_time;
+	ktime_t next_txtime;
+	atomic_t budget;
+	int index;
+	u32 gate_mask;
+	u32 interval;
+	u8 command;
+};
+
+struct sched_gate_list {
+	struct rcu_head rcu;
+	struct list_head entries;
+	size_t num_entries;
+	ktime_t cycle_close_time;
+	s64 cycle_time;
+	s64 cycle_time_extension;
+	s64 base_time;
+};
+
+struct taprio_sched {
+	struct Qdisc **qdiscs;
+	struct Qdisc *root;
+	u32 flags;
+	enum tk_offsets tk_offset;
+	int clockid;
+	atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+
+				    * speeds it's sub-nanoseconds per byte
+				    */
+
+	/* Protects the update side of the RCU protected current_entry */
+	spinlock_t current_entry_lock;
+	struct sched_entry __rcu *current_entry;
+	struct sched_gate_list __rcu *oper_sched;
+	struct sched_gate_list __rcu *admin_sched;
+	struct hrtimer advance_timer;
+	struct list_head taprio_list;
+	struct sk_buff *(*dequeue)(struct Qdisc *sch);
+	struct sk_buff *(*peek)(struct Qdisc *sch);
+	u32 txtime_delay;
+};
+
+struct __tc_taprio_qopt_offload {
+	refcount_t users;
+	struct tc_taprio_qopt_offload offload;
+};
+
+static ktime_t sched_base_time(const struct sched_gate_list *sched)
+{
+	if (!sched)
+		return KTIME_MAX;
+
+	return ns_to_ktime(sched->base_time);
+}
+
+static ktime_t taprio_get_time(struct taprio_sched *q)
+{
+	ktime_t mono = ktime_get();
+
+	switch (q->tk_offset) {
+	case TK_OFFS_MAX:
+		return mono;
+	default:
+		return ktime_mono_to_any(mono, q->tk_offset);
+	}
+
+	return KTIME_MAX;
+}
+
+static void taprio_free_sched_cb(struct rcu_head *head)
+{
+	struct sched_gate_list *sched = container_of(head, struct sched_gate_list, rcu);
+	struct sched_entry *entry, *n;
+
+	if (!sched)
+		return;
+
+	list_for_each_entry_safe(entry, n, &sched->entries, list) {
+		list_del(&entry->list);
+		kfree(entry);
+	}
+
+	kfree(sched);
+}
+
+static void switch_schedules(struct taprio_sched *q,
+			     struct sched_gate_list **admin,
+			     struct sched_gate_list **oper)
+{
+	rcu_assign_pointer(q->oper_sched, *admin);
+	rcu_assign_pointer(q->admin_sched, NULL);
+
+	if (*oper)
+		call_rcu(&(*oper)->rcu, taprio_free_sched_cb);
+
+	*oper = *admin;
+	*admin = NULL;
+}
+
+/* Get how much time has been already elapsed in the current cycle. */
+static s32 get_cycle_time_elapsed(struct sched_gate_list *sched, ktime_t time)
+{
+	ktime_t time_since_sched_start;
+	s32 time_elapsed;
+
+	time_since_sched_start = ktime_sub(time, sched->base_time);
+	div_s64_rem(time_since_sched_start, sched->cycle_time, &time_elapsed);
+
+	return time_elapsed;
+}
+
+static ktime_t get_interval_end_time(struct sched_gate_list *sched,
+				     struct sched_gate_list *admin,
+				     struct sched_entry *entry,
+				     ktime_t intv_start)
+{
+	s32 cycle_elapsed = get_cycle_time_elapsed(sched, intv_start);
+	ktime_t intv_end, cycle_ext_end, cycle_end;
+
+	cycle_end = ktime_add_ns(intv_start, sched->cycle_time - cycle_elapsed);
+	intv_end = ktime_add_ns(intv_start, entry->interval);
+	cycle_ext_end = ktime_add(cycle_end, sched->cycle_time_extension);
+
+	if (ktime_before(intv_end, cycle_end))
+		return intv_end;
+	else if (admin && admin != sched &&
+		 ktime_after(admin->base_time, cycle_end) &&
+		 ktime_before(admin->base_time, cycle_ext_end))
+		return admin->base_time;
+	else
+		return cycle_end;
+}
+
+static int length_to_duration(struct taprio_sched *q, int len)
+{
+	return div_u64(len * atomic64_read(&q->picos_per_byte), 1000);
+}
+
+/* Returns the entry corresponding to next available interval. If
+ * validate_interval is set, it only validates whether the timestamp occurs
+ * when the gate corresponding to the skb's traffic class is open.
+ */
+static struct sched_entry *find_entry_to_transmit(struct sk_buff *skb,
+						  struct Qdisc *sch,
+						  struct sched_gate_list *sched,
+						  struct sched_gate_list *admin,
+						  ktime_t time,
+						  ktime_t *interval_start,
+						  ktime_t *interval_end,
+						  bool validate_interval)
+{
+	ktime_t curr_intv_start, curr_intv_end, cycle_end, packet_transmit_time;
+	ktime_t earliest_txtime = KTIME_MAX, txtime, cycle, transmit_end_time;
+	struct sched_entry *entry = NULL, *entry_found = NULL;
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	bool entry_available = false;
+	s32 cycle_elapsed;
+	int tc, n;
+
+	tc = netdev_get_prio_tc_map(dev, skb->priority);
+	packet_transmit_time = length_to_duration(q, qdisc_pkt_len(skb));
+
+	*interval_start = 0;
+	*interval_end = 0;
+
+	if (!sched)
+		return NULL;
+
+	cycle = sched->cycle_time;
+	cycle_elapsed = get_cycle_time_elapsed(sched, time);
+	curr_intv_end = ktime_sub_ns(time, cycle_elapsed);
+	cycle_end = ktime_add_ns(curr_intv_end, cycle);
+
+	list_for_each_entry(entry, &sched->entries, list) {
+		curr_intv_start = curr_intv_end;
+		curr_intv_end = get_interval_end_time(sched, admin, entry,
+						      curr_intv_start);
+
+		if (ktime_after(curr_intv_start, cycle_end))
+			break;
+
+		if (!(entry->gate_mask & BIT(tc)) ||
+		    packet_transmit_time > entry->interval)
+			continue;
+
+		txtime = entry->next_txtime;
+
+		if (ktime_before(txtime, time) || validate_interval) {
+			transmit_end_time = ktime_add_ns(time, packet_transmit_time);
+			if ((ktime_before(curr_intv_start, time) &&
+			     ktime_before(transmit_end_time, curr_intv_end)) ||
+			    (ktime_after(curr_intv_start, time) && !validate_interval)) {
+				entry_found = entry;
+				*interval_start = curr_intv_start;
+				*interval_end = curr_intv_end;
+				break;
+			} else if (!entry_available && !validate_interval) {
+				/* Here, we are just trying to find out the
+				 * first available interval in the next cycle.
+				 */
+				entry_available = 1;
+				entry_found = entry;
+				*interval_start = ktime_add_ns(curr_intv_start, cycle);
+				*interval_end = ktime_add_ns(curr_intv_end, cycle);
+			}
+		} else if (ktime_before(txtime, earliest_txtime) &&
+			   !entry_available) {
+			earliest_txtime = txtime;
+			entry_found = entry;
+			n = div_s64(ktime_sub(txtime, curr_intv_start), cycle);
+			*interval_start = ktime_add(curr_intv_start, n * cycle);
+			*interval_end = ktime_add(curr_intv_end, n * cycle);
+		}
+	}
+
+	return entry_found;
+}
+
+static bool is_valid_interval(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct sched_gate_list *sched, *admin;
+	ktime_t interval_start, interval_end;
+	struct sched_entry *entry;
+
+	rcu_read_lock();
+	sched = rcu_dereference(q->oper_sched);
+	admin = rcu_dereference(q->admin_sched);
+
+	entry = find_entry_to_transmit(skb, sch, sched, admin, skb->tstamp,
+				       &interval_start, &interval_end, true);
+	rcu_read_unlock();
+
+	return entry;
+}
+
+static bool taprio_flags_valid(u32 flags)
+{
+	/* Make sure no other flag bits are set. */
+	if (flags & ~(TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST |
+		      TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD))
+		return false;
+	/* txtime-assist and full offload are mutually exclusive */
+	if ((flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) &&
+	    (flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD))
+		return false;
+	return true;
+}
+
+/* This returns the tstamp value set by TCP in terms of the set clock. */
+static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb)
+{
+	unsigned int offset = skb_network_offset(skb);
+	const struct ipv6hdr *ipv6h;
+	const struct iphdr *iph;
+	struct ipv6hdr _ipv6h;
+
+	ipv6h = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h);
+	if (!ipv6h)
+		return 0;
+
+	if (ipv6h->version == 4) {
+		iph = (struct iphdr *)ipv6h;
+		offset += iph->ihl * 4;
+
+		/* special-case 6in4 tunnelling, as that is a common way to get
+		 * v6 connectivity in the home
+		 */
+		if (iph->protocol == IPPROTO_IPV6) {
+			ipv6h = skb_header_pointer(skb, offset,
+						   sizeof(_ipv6h), &_ipv6h);
+
+			if (!ipv6h || ipv6h->nexthdr != IPPROTO_TCP)
+				return 0;
+		} else if (iph->protocol != IPPROTO_TCP) {
+			return 0;
+		}
+	} else if (ipv6h->version == 6 && ipv6h->nexthdr != IPPROTO_TCP) {
+		return 0;
+	}
+
+	return ktime_mono_to_any(skb->skb_mstamp_ns, q->tk_offset);
+}
+
+/* There are a few scenarios where we will have to modify the txtime from
+ * what is read from next_txtime in sched_entry. They are:
+ * 1. If txtime is in the past,
+ *    a. The gate for the traffic class is currently open and packet can be
+ *       transmitted before it closes, schedule the packet right away.
+ *    b. If the gate corresponding to the traffic class is going to open later
+ *       in the cycle, set the txtime of packet to the interval start.
+ * 2. If txtime is in the future, there are packets corresponding to the
+ *    current traffic class waiting to be transmitted. So, the following
+ *    possibilities exist:
+ *    a. We can transmit the packet before the window containing the txtime
+ *       closes.
+ *    b. The window might close before the transmission can be completed
+ *       successfully. So, schedule the packet in the next open window.
+ */
+static long get_packet_txtime(struct sk_buff *skb, struct Qdisc *sch)
+{
+	ktime_t transmit_end_time, interval_end, interval_start, tcp_tstamp;
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct sched_gate_list *sched, *admin;
+	ktime_t minimum_time, now, txtime;
+	int len, packet_transmit_time;
+	struct sched_entry *entry;
+	bool sched_changed;
+
+	now = taprio_get_time(q);
+	minimum_time = ktime_add_ns(now, q->txtime_delay);
+
+	tcp_tstamp = get_tcp_tstamp(q, skb);
+	minimum_time = max_t(ktime_t, minimum_time, tcp_tstamp);
+
+	rcu_read_lock();
+	admin = rcu_dereference(q->admin_sched);
+	sched = rcu_dereference(q->oper_sched);
+	if (admin && ktime_after(minimum_time, admin->base_time))
+		switch_schedules(q, &admin, &sched);
+
+	/* Until the schedule starts, all the queues are open */
+	if (!sched || ktime_before(minimum_time, sched->base_time)) {
+		txtime = minimum_time;
+		goto done;
+	}
+
+	len = qdisc_pkt_len(skb);
+	packet_transmit_time = length_to_duration(q, len);
+
+	do {
+		sched_changed = 0;
+
+		entry = find_entry_to_transmit(skb, sch, sched, admin,
+					       minimum_time,
+					       &interval_start, &interval_end,
+					       false);
+		if (!entry) {
+			txtime = 0;
+			goto done;
+		}
+
+		txtime = entry->next_txtime;
+		txtime = max_t(ktime_t, txtime, minimum_time);
+		txtime = max_t(ktime_t, txtime, interval_start);
+
+		if (admin && admin != sched &&
+		    ktime_after(txtime, admin->base_time)) {
+			sched = admin;
+			sched_changed = 1;
+			continue;
+		}
+
+		transmit_end_time = ktime_add(txtime, packet_transmit_time);
+		minimum_time = transmit_end_time;
+
+		/* Update the txtime of current entry to the next time it's
+		 * interval starts.
+		 */
+		if (ktime_after(transmit_end_time, interval_end))
+			entry->next_txtime = ktime_add(interval_start, sched->cycle_time);
+	} while (sched_changed || ktime_after(transmit_end_time, interval_end));
+
+	entry->next_txtime = transmit_end_time;
+
+done:
+	rcu_read_unlock();
+	return txtime;
+}
+
+static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			  struct sk_buff **to_free)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct Qdisc *child;
+	int queue;
+
+	queue = skb_get_queue_mapping(skb);
+
+	child = q->qdiscs[queue];
+	if (unlikely(!child))
+		return qdisc_drop(skb, sch, to_free);
+
+	if (skb->sk && sock_flag(skb->sk, SOCK_TXTIME)) {
+		if (!is_valid_interval(skb, sch))
+			return qdisc_drop(skb, sch, to_free);
+	} else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
+		skb->tstamp = get_packet_txtime(skb, sch);
+		if (!skb->tstamp)
+			return qdisc_drop(skb, sch, to_free);
+	}
+
+	qdisc_qstats_backlog_inc(sch, skb);
+	sch->q.qlen++;
+
+	return qdisc_enqueue(skb, child, to_free);
+}
+
+static struct sk_buff *taprio_peek_soft(struct Qdisc *sch)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct sched_entry *entry;
+	struct sk_buff *skb;
+	u32 gate_mask;
+	int i;
+
+	rcu_read_lock();
+	entry = rcu_dereference(q->current_entry);
+	gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
+	rcu_read_unlock();
+
+	if (!gate_mask)
+		return NULL;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct Qdisc *child = q->qdiscs[i];
+		int prio;
+		u8 tc;
+
+		if (unlikely(!child))
+			continue;
+
+		skb = child->ops->peek(child);
+		if (!skb)
+			continue;
+
+		if (TXTIME_ASSIST_IS_ENABLED(q->flags))
+			return skb;
+
+		prio = skb->priority;
+		tc = netdev_get_prio_tc_map(dev, prio);
+
+		if (!(gate_mask & BIT(tc)))
+			continue;
+
+		return skb;
+	}
+
+	return NULL;
+}
+
+static struct sk_buff *taprio_peek_offload(struct Qdisc *sch)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct sk_buff *skb;
+	int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct Qdisc *child = q->qdiscs[i];
+
+		if (unlikely(!child))
+			continue;
+
+		skb = child->ops->peek(child);
+		if (!skb)
+			continue;
+
+		return skb;
+	}
+
+	return NULL;
+}
+
+static struct sk_buff *taprio_peek(struct Qdisc *sch)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+
+	return q->peek(sch);
+}
+
+static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry)
+{
+	atomic_set(&entry->budget,
+		   div64_u64((u64)entry->interval * 1000,
+			     atomic64_read(&q->picos_per_byte)));
+}
+
+static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct sk_buff *skb = NULL;
+	struct sched_entry *entry;
+	u32 gate_mask;
+	int i;
+
+	rcu_read_lock();
+	entry = rcu_dereference(q->current_entry);
+	/* if there's no entry, it means that the schedule didn't
+	 * start yet, so force all gates to be open, this is in
+	 * accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5
+	 * "AdminGateSates"
+	 */
+	gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
+
+	if (!gate_mask)
+		goto done;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct Qdisc *child = q->qdiscs[i];
+		ktime_t guard;
+		int prio;
+		int len;
+		u8 tc;
+
+		if (unlikely(!child))
+			continue;
+
+		if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
+			skb = child->ops->dequeue(child);
+			if (!skb)
+				continue;
+			goto skb_found;
+		}
+
+		skb = child->ops->peek(child);
+		if (!skb)
+			continue;
+
+		prio = skb->priority;
+		tc = netdev_get_prio_tc_map(dev, prio);
+
+		if (!(gate_mask & BIT(tc)))
+			continue;
+
+		len = qdisc_pkt_len(skb);
+		guard = ktime_add_ns(taprio_get_time(q),
+				     length_to_duration(q, len));
+
+		/* In the case that there's no gate entry, there's no
+		 * guard band ...
+		 */
+		if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
+		    ktime_after(guard, entry->close_time))
+			continue;
+
+		/* ... and no budget. */
+		if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
+		    atomic_sub_return(len, &entry->budget) < 0)
+			continue;
+
+		skb = child->ops->dequeue(child);
+		if (unlikely(!skb))
+			goto done;
+
+skb_found:
+		qdisc_bstats_update(sch, skb);
+		qdisc_qstats_backlog_dec(sch, skb);
+		sch->q.qlen--;
+
+		goto done;
+	}
+
+done:
+	rcu_read_unlock();
+
+	return skb;
+}
+
+static struct sk_buff *taprio_dequeue_offload(struct Qdisc *sch)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct sk_buff *skb;
+	int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct Qdisc *child = q->qdiscs[i];
+
+		if (unlikely(!child))
+			continue;
+
+		skb = child->ops->dequeue(child);
+		if (unlikely(!skb))
+			continue;
+
+		qdisc_bstats_update(sch, skb);
+		qdisc_qstats_backlog_dec(sch, skb);
+		sch->q.qlen--;
+
+		return skb;
+	}
+
+	return NULL;
+}
+
+static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+
+	return q->dequeue(sch);
+}
+
+static bool should_restart_cycle(const struct sched_gate_list *oper,
+				 const struct sched_entry *entry)
+{
+	if (list_is_last(&entry->list, &oper->entries))
+		return true;
+
+	if (ktime_compare(entry->close_time, oper->cycle_close_time) == 0)
+		return true;
+
+	return false;
+}
+
+static bool should_change_schedules(const struct sched_gate_list *admin,
+				    const struct sched_gate_list *oper,
+				    ktime_t close_time)
+{
+	ktime_t next_base_time, extension_time;
+
+	if (!admin)
+		return false;
+
+	next_base_time = sched_base_time(admin);
+
+	/* This is the simple case, the close_time would fall after
+	 * the next schedule base_time.
+	 */
+	if (ktime_compare(next_base_time, close_time) <= 0)
+		return true;
+
+	/* This is the cycle_time_extension case, if the close_time
+	 * plus the amount that can be extended would fall after the
+	 * next schedule base_time, we can extend the current schedule
+	 * for that amount.
+	 */
+	extension_time = ktime_add_ns(close_time, oper->cycle_time_extension);
+
+	/* FIXME: the IEEE 802.1Q-2018 Specification isn't clear about
+	 * how precisely the extension should be made. So after
+	 * conformance testing, this logic may change.
+	 */
+	if (ktime_compare(next_base_time, extension_time) <= 0)
+		return true;
+
+	return false;
+}
+
+static enum hrtimer_restart advance_sched(struct hrtimer *timer)
+{
+	struct taprio_sched *q = container_of(timer, struct taprio_sched,
+					      advance_timer);
+	struct sched_gate_list *oper, *admin;
+	struct sched_entry *entry, *next;
+	struct Qdisc *sch = q->root;
+	ktime_t close_time;
+
+	spin_lock(&q->current_entry_lock);
+	entry = rcu_dereference_protected(q->current_entry,
+					  lockdep_is_held(&q->current_entry_lock));
+	oper = rcu_dereference_protected(q->oper_sched,
+					 lockdep_is_held(&q->current_entry_lock));
+	admin = rcu_dereference_protected(q->admin_sched,
+					  lockdep_is_held(&q->current_entry_lock));
+
+	if (!oper)
+		switch_schedules(q, &admin, &oper);
+
+	/* This can happen in two cases: 1. this is the very first run
+	 * of this function (i.e. we weren't running any schedule
+	 * previously); 2. The previous schedule just ended. The first
+	 * entry of all schedules are pre-calculated during the
+	 * schedule initialization.
+	 */
+	if (unlikely(!entry || entry->close_time == oper->base_time)) {
+		next = list_first_entry(&oper->entries, struct sched_entry,
+					list);
+		close_time = next->close_time;
+		goto first_run;
+	}
+
+	if (should_restart_cycle(oper, entry)) {
+		next = list_first_entry(&oper->entries, struct sched_entry,
+					list);
+		oper->cycle_close_time = ktime_add_ns(oper->cycle_close_time,
+						      oper->cycle_time);
+	} else {
+		next = list_next_entry(entry, list);
+	}
+
+	close_time = ktime_add_ns(entry->close_time, next->interval);
+	close_time = min_t(ktime_t, close_time, oper->cycle_close_time);
+
+	if (should_change_schedules(admin, oper, close_time)) {
+		/* Set things so the next time this runs, the new
+		 * schedule runs.
+		 */
+		close_time = sched_base_time(admin);
+		switch_schedules(q, &admin, &oper);
+	}
+
+	next->close_time = close_time;
+	taprio_set_budget(q, next);
+
+first_run:
+	rcu_assign_pointer(q->current_entry, next);
+	spin_unlock(&q->current_entry_lock);
+
+	hrtimer_set_expires(&q->advance_timer, close_time);
+
+	rcu_read_lock();
+	__netif_schedule(sch);
+	rcu_read_unlock();
+
+	return HRTIMER_RESTART;
+}
+
+static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = {
+	[TCA_TAPRIO_SCHED_ENTRY_INDEX]	   = { .type = NLA_U32 },
+	[TCA_TAPRIO_SCHED_ENTRY_CMD]	   = { .type = NLA_U8 },
+	[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK] = { .type = NLA_U32 },
+	[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]  = { .type = NLA_U32 },
+};
+
+static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
+	[TCA_TAPRIO_ATTR_PRIOMAP]	       = {
+		.len = sizeof(struct tc_mqprio_qopt)
+	},
+	[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]           = { .type = NLA_NESTED },
+	[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]            = { .type = NLA_S64 },
+	[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]         = { .type = NLA_NESTED },
+	[TCA_TAPRIO_ATTR_SCHED_CLOCKID]              = { .type = NLA_S32 },
+	[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]           = { .type = NLA_S64 },
+	[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
+};
+
+static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry,
+			    struct netlink_ext_ack *extack)
+{
+	u32 interval = 0;
+
+	if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD])
+		entry->command = nla_get_u8(
+			tb[TCA_TAPRIO_SCHED_ENTRY_CMD]);
+
+	if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK])
+		entry->gate_mask = nla_get_u32(
+			tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]);
+
+	if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL])
+		interval = nla_get_u32(
+			tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]);
+
+	if (interval == 0) {
+		NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
+		return -EINVAL;
+	}
+
+	entry->interval = interval;
+
+	return 0;
+}
+
+static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry,
+			     int index, struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
+	int err;
+
+	err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n,
+					  entry_policy, NULL);
+	if (err < 0) {
+		NL_SET_ERR_MSG(extack, "Could not parse nested entry");
+		return -EINVAL;
+	}
+
+	entry->index = index;
+
+	return fill_sched_entry(tb, entry, extack);
+}
+
+static int parse_sched_list(struct nlattr *list,
+			    struct sched_gate_list *sched,
+			    struct netlink_ext_ack *extack)
+{
+	struct nlattr *n;
+	int err, rem;
+	int i = 0;
+
+	if (!list)
+		return -EINVAL;
+
+	nla_for_each_nested(n, list, rem) {
+		struct sched_entry *entry;
+
+		if (nla_type(n) != TCA_TAPRIO_SCHED_ENTRY) {
+			NL_SET_ERR_MSG(extack, "Attribute is not of type 'entry'");
+			continue;
+		}
+
+		entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+		if (!entry) {
+			NL_SET_ERR_MSG(extack, "Not enough memory for entry");
+			return -ENOMEM;
+		}
+
+		err = parse_sched_entry(n, entry, i, extack);
+		if (err < 0) {
+			kfree(entry);
+			return err;
+		}
+
+		list_add_tail(&entry->list, &sched->entries);
+		i++;
+	}
+
+	sched->num_entries = i;
+
+	return i;
+}
+
+static int parse_taprio_schedule(struct nlattr **tb,
+				 struct sched_gate_list *new,
+				 struct netlink_ext_ack *extack)
+{
+	int err = 0;
+
+	if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) {
+		NL_SET_ERR_MSG(extack, "Adding a single entry is not supported");
+		return -ENOTSUPP;
+	}
+
+	if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME])
+		new->base_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]);
+
+	if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION])
+		new->cycle_time_extension = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION]);
+
+	if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME])
+		new->cycle_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]);
+
+	if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST])
+		err = parse_sched_list(
+			tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], new, extack);
+	if (err < 0)
+		return err;
+
+	if (!new->cycle_time) {
+		struct sched_entry *entry;
+		ktime_t cycle = 0;
+
+		list_for_each_entry(entry, &new->entries, list)
+			cycle = ktime_add_ns(cycle, entry->interval);
+		new->cycle_time = cycle;
+	}
+
+	return 0;
+}
+
+static int taprio_parse_mqprio_opt(struct net_device *dev,
+				   struct tc_mqprio_qopt *qopt,
+				   struct netlink_ext_ack *extack,
+				   u32 taprio_flags)
+{
+	int i, j;
+
+	if (!qopt && !dev->num_tc) {
+		NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
+		return -EINVAL;
+	}
+
+	/* If num_tc is already set, it means that the user already
+	 * configured the mqprio part
+	 */
+	if (dev->num_tc)
+		return 0;
+
+	/* Verify num_tc is not out of max range */
+	if (qopt->num_tc > TC_MAX_QUEUE) {
+		NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range");
+		return -EINVAL;
+	}
+
+	/* taprio imposes that traffic classes map 1:n to tx queues */
+	if (qopt->num_tc > dev->num_tx_queues) {
+		NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues");
+		return -EINVAL;
+	}
+
+	/* Verify priority mapping uses valid tcs */
+	for (i = 0; i <= TC_BITMASK; i++) {
+		if (qopt->prio_tc_map[i] >= qopt->num_tc) {
+			NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping");
+			return -EINVAL;
+		}
+	}
+
+	for (i = 0; i < qopt->num_tc; i++) {
+		unsigned int last = qopt->offset[i] + qopt->count[i];
+
+		/* Verify the queue count is in tx range being equal to the
+		 * real_num_tx_queues indicates the last queue is in use.
+		 */
+		if (qopt->offset[i] >= dev->num_tx_queues ||
+		    !qopt->count[i] ||
+		    last > dev->real_num_tx_queues) {
+			NL_SET_ERR_MSG(extack, "Invalid queue in traffic class to queue mapping");
+			return -EINVAL;
+		}
+
+		if (TXTIME_ASSIST_IS_ENABLED(taprio_flags))
+			continue;
+
+		/* Verify that the offset and counts do not overlap */
+		for (j = i + 1; j < qopt->num_tc; j++) {
+			if (last > qopt->offset[j]) {
+				NL_SET_ERR_MSG(extack, "Detected overlap in the traffic class to queue mapping");
+				return -EINVAL;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int taprio_get_start_time(struct Qdisc *sch,
+				 struct sched_gate_list *sched,
+				 ktime_t *start)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	ktime_t now, base, cycle;
+	s64 n;
+
+	base = sched_base_time(sched);
+	now = taprio_get_time(q);
+
+	if (ktime_after(base, now)) {
+		*start = base;
+		return 0;
+	}
+
+	cycle = sched->cycle_time;
+
+	/* The qdisc is expected to have at least one sched_entry.  Moreover,
+	 * any entry must have 'interval' > 0. Thus if the cycle time is zero,
+	 * something went really wrong. In that case, we should warn about this
+	 * inconsistent state and return error.
+	 */
+	if (WARN_ON(!cycle))
+		return -EFAULT;
+
+	/* Schedule the start time for the beginning of the next
+	 * cycle.
+	 */
+	n = div64_s64(ktime_sub_ns(now, base), cycle);
+	*start = ktime_add_ns(base, (n + 1) * cycle);
+	return 0;
+}
+
+static void setup_first_close_time(struct taprio_sched *q,
+				   struct sched_gate_list *sched, ktime_t base)
+{
+	struct sched_entry *first;
+	ktime_t cycle;
+
+	first = list_first_entry(&sched->entries,
+				 struct sched_entry, list);
+
+	cycle = sched->cycle_time;
+
+	/* FIXME: find a better place to do this */
+	sched->cycle_close_time = ktime_add_ns(base, cycle);
+
+	first->close_time = ktime_add_ns(base, first->interval);
+	taprio_set_budget(q, first);
+	rcu_assign_pointer(q->current_entry, NULL);
+}
+
+static void taprio_start_sched(struct Qdisc *sch,
+			       ktime_t start, struct sched_gate_list *new)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	ktime_t expires;
+
+	if (FULL_OFFLOAD_IS_ENABLED(q->flags))
+		return;
+
+	expires = hrtimer_get_expires(&q->advance_timer);
+	if (expires == 0)
+		expires = KTIME_MAX;
+
+	/* If the new schedule starts before the next expiration, we
+	 * reprogram it to the earliest one, so we change the admin
+	 * schedule to the operational one at the right time.
+	 */
+	start = min_t(ktime_t, start, expires);
+
+	hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS);
+}
+
+static void taprio_set_picos_per_byte(struct net_device *dev,
+				      struct taprio_sched *q)
+{
+	struct ethtool_link_ksettings ecmd;
+	int speed = SPEED_10;
+	int picos_per_byte;
+	int err;
+
+	err = __ethtool_get_link_ksettings(dev, &ecmd);
+	if (err < 0)
+		goto skip;
+
+	if (ecmd.base.speed && ecmd.base.speed != SPEED_UNKNOWN)
+		speed = ecmd.base.speed;
+
+skip:
+	picos_per_byte = (USEC_PER_SEC * 8) / speed;
+
+	atomic64_set(&q->picos_per_byte, picos_per_byte);
+	netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n",
+		   dev->name, (long long)atomic64_read(&q->picos_per_byte),
+		   ecmd.base.speed);
+}
+
+static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
+			       void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct net_device *qdev;
+	struct taprio_sched *q;
+	bool found = false;
+
+	ASSERT_RTNL();
+
+	if (event != NETDEV_UP && event != NETDEV_CHANGE)
+		return NOTIFY_DONE;
+
+	spin_lock(&taprio_list_lock);
+	list_for_each_entry(q, &taprio_list, taprio_list) {
+		qdev = qdisc_dev(q->root);
+		if (qdev == dev) {
+			found = true;
+			break;
+		}
+	}
+	spin_unlock(&taprio_list_lock);
+
+	if (found)
+		taprio_set_picos_per_byte(dev, q);
+
+	return NOTIFY_DONE;
+}
+
+static void setup_txtime(struct taprio_sched *q,
+			 struct sched_gate_list *sched, ktime_t base)
+{
+	struct sched_entry *entry;
+	u32 interval = 0;
+
+	list_for_each_entry(entry, &sched->entries, list) {
+		entry->next_txtime = ktime_add_ns(base, interval);
+		interval += entry->interval;
+	}
+}
+
+static struct tc_taprio_qopt_offload *taprio_offload_alloc(int num_entries)
+{
+	size_t size = sizeof(struct tc_taprio_sched_entry) * num_entries +
+		      sizeof(struct __tc_taprio_qopt_offload);
+	struct __tc_taprio_qopt_offload *__offload;
+
+	__offload = kzalloc(size, GFP_KERNEL);
+	if (!__offload)
+		return NULL;
+
+	refcount_set(&__offload->users, 1);
+
+	return &__offload->offload;
+}
+
+struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload
+						  *offload)
+{
+	struct __tc_taprio_qopt_offload *__offload;
+
+	__offload = container_of(offload, struct __tc_taprio_qopt_offload,
+				 offload);
+
+	refcount_inc(&__offload->users);
+
+	return offload;
+}
+EXPORT_SYMBOL_GPL(taprio_offload_get);
+
+void taprio_offload_free(struct tc_taprio_qopt_offload *offload)
+{
+	struct __tc_taprio_qopt_offload *__offload;
+
+	__offload = container_of(offload, struct __tc_taprio_qopt_offload,
+				 offload);
+
+	if (!refcount_dec_and_test(&__offload->users))
+		return;
+
+	kfree(__offload);
+}
+EXPORT_SYMBOL_GPL(taprio_offload_free);
+
+/* The function will only serve to keep the pointers to the "oper" and "admin"
+ * schedules valid in relation to their base times, so when calling dump() the
+ * users looks at the right schedules.
+ * When using full offload, the admin configuration is promoted to oper at the
+ * base_time in the PHC time domain.  But because the system time is not
+ * necessarily in sync with that, we can't just trigger a hrtimer to call
+ * switch_schedules at the right hardware time.
+ * At the moment we call this by hand right away from taprio, but in the future
+ * it will be useful to create a mechanism for drivers to notify taprio of the
+ * offload state (PENDING, ACTIVE, INACTIVE) so it can be visible in dump().
+ * This is left as TODO.
+ */
+static void taprio_offload_config_changed(struct taprio_sched *q)
+{
+	struct sched_gate_list *oper, *admin;
+
+	spin_lock(&q->current_entry_lock);
+
+	oper = rcu_dereference_protected(q->oper_sched,
+					 lockdep_is_held(&q->current_entry_lock));
+	admin = rcu_dereference_protected(q->admin_sched,
+					  lockdep_is_held(&q->current_entry_lock));
+
+	switch_schedules(q, &admin, &oper);
+
+	spin_unlock(&q->current_entry_lock);
+}
+
+static void taprio_sched_to_offload(struct taprio_sched *q,
+				    struct sched_gate_list *sched,
+				    const struct tc_mqprio_qopt *mqprio,
+				    struct tc_taprio_qopt_offload *offload)
+{
+	struct sched_entry *entry;
+	int i = 0;
+
+	offload->base_time = sched->base_time;
+	offload->cycle_time = sched->cycle_time;
+	offload->cycle_time_extension = sched->cycle_time_extension;
+
+	list_for_each_entry(entry, &sched->entries, list) {
+		struct tc_taprio_sched_entry *e = &offload->entries[i];
+
+		e->command = entry->command;
+		e->interval = entry->interval;
+		e->gate_mask = entry->gate_mask;
+		i++;
+	}
+
+	offload->num_entries = i;
+}
+
+static int taprio_enable_offload(struct net_device *dev,
+				 struct tc_mqprio_qopt *mqprio,
+				 struct taprio_sched *q,
+				 struct sched_gate_list *sched,
+				 struct netlink_ext_ack *extack)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+	struct tc_taprio_qopt_offload *offload;
+	int err = 0;
+
+	if (!ops->ndo_setup_tc) {
+		NL_SET_ERR_MSG(extack,
+			       "Device does not support taprio offload");
+		return -EOPNOTSUPP;
+	}
+
+	offload = taprio_offload_alloc(sched->num_entries);
+	if (!offload) {
+		NL_SET_ERR_MSG(extack,
+			       "Not enough memory for enabling offload mode");
+		return -ENOMEM;
+	}
+	offload->enable = 1;
+	taprio_sched_to_offload(q, sched, mqprio, offload);
+
+	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
+	if (err < 0) {
+		NL_SET_ERR_MSG(extack,
+			       "Device failed to setup taprio offload");
+		goto done;
+	}
+
+done:
+	taprio_offload_free(offload);
+
+	return err;
+}
+
+static int taprio_disable_offload(struct net_device *dev,
+				  struct taprio_sched *q,
+				  struct netlink_ext_ack *extack)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+	struct tc_taprio_qopt_offload *offload;
+	int err;
+
+	if (!FULL_OFFLOAD_IS_ENABLED(q->flags))
+		return 0;
+
+	if (!ops->ndo_setup_tc)
+		return -EOPNOTSUPP;
+
+	offload = taprio_offload_alloc(0);
+	if (!offload) {
+		NL_SET_ERR_MSG(extack,
+			       "Not enough memory to disable offload mode");
+		return -ENOMEM;
+	}
+	offload->enable = 0;
+
+	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
+	if (err < 0) {
+		NL_SET_ERR_MSG(extack,
+			       "Device failed to disable offload");
+		goto out;
+	}
+
+out:
+	taprio_offload_free(offload);
+
+	return err;
+}
+
+/* If full offload is enabled, the only possible clockid is the net device's
+ * PHC. For that reason, specifying a clockid through netlink is incorrect.
+ * For txtime-assist, it is implicitly assumed that the device's PHC is kept
+ * in sync with the specified clockid via a user space daemon such as phc2sys.
+ * For both software taprio and txtime-assist, the clockid is used for the
+ * hrtimer that advances the schedule and hence mandatory.
+ */
+static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb,
+				struct netlink_ext_ack *extack)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	int err = -EINVAL;
+
+	if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
+		const struct ethtool_ops *ops = dev->ethtool_ops;
+		struct ethtool_ts_info info = {
+			.cmd = ETHTOOL_GET_TS_INFO,
+			.phc_index = -1,
+		};
+
+		if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
+			NL_SET_ERR_MSG(extack,
+				       "The 'clockid' cannot be specified for full offload");
+			goto out;
+		}
+
+		if (ops && ops->get_ts_info)
+			err = ops->get_ts_info(dev, &info);
+
+		if (err || info.phc_index < 0) {
+			NL_SET_ERR_MSG(extack,
+				       "Device does not have a PTP clock");
+			err = -ENOTSUPP;
+			goto out;
+		}
+	} else if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
+		int clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
+
+		/* We only support static clockids and we don't allow
+		 * for it to be modified after the first init.
+		 */
+		if (clockid < 0 ||
+		    (q->clockid != -1 && q->clockid != clockid)) {
+			NL_SET_ERR_MSG(extack,
+				       "Changing the 'clockid' of a running schedule is not supported");
+			err = -ENOTSUPP;
+			goto out;
+		}
+
+		switch (clockid) {
+		case CLOCK_REALTIME:
+			q->tk_offset = TK_OFFS_REAL;
+			break;
+		case CLOCK_MONOTONIC:
+			q->tk_offset = TK_OFFS_MAX;
+			break;
+		case CLOCK_BOOTTIME:
+			q->tk_offset = TK_OFFS_BOOT;
+			break;
+		case CLOCK_TAI:
+			q->tk_offset = TK_OFFS_TAI;
+			break;
+		default:
+			NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
+			err = -EINVAL;
+			goto out;
+		}
+
+		q->clockid = clockid;
+	} else {
+		NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory");
+		goto out;
+	}
+
+	/* Everything went ok, return success. */
+	err = 0;
+
+out:
+	return err;
+}
+
+static int taprio_mqprio_cmp(const struct net_device *dev,
+			     const struct tc_mqprio_qopt *mqprio)
+{
+	int i;
+
+	if (!mqprio || mqprio->num_tc != dev->num_tc)
+		return -1;
+
+	for (i = 0; i < mqprio->num_tc; i++)
+		if (dev->tc_to_txq[i].count != mqprio->count[i] ||
+		    dev->tc_to_txq[i].offset != mqprio->offset[i])
+			return -1;
+
+	for (i = 0; i <= TC_BITMASK; i++)
+		if (dev->prio_tc_map[i] != mqprio->prio_tc_map[i])
+			return -1;
+
+	return 0;
+}
+
+static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
+			 struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { };
+	struct sched_gate_list *oper, *admin, *new_admin;
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct tc_mqprio_qopt *mqprio = NULL;
+	u32 taprio_flags = 0;
+	unsigned long flags;
+	ktime_t start;
+	int i, err;
+
+	err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_ATTR_MAX, opt,
+					  taprio_policy, extack);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
+		mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
+
+	if (tb[TCA_TAPRIO_ATTR_FLAGS]) {
+		taprio_flags = nla_get_u32(tb[TCA_TAPRIO_ATTR_FLAGS]);
+
+		if (q->flags != 0 && q->flags != taprio_flags) {
+			NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported");
+			return -EOPNOTSUPP;
+		} else if (!taprio_flags_valid(taprio_flags)) {
+			NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid");
+			return -EINVAL;
+		}
+
+		q->flags = taprio_flags;
+	}
+
+	err = taprio_parse_mqprio_opt(dev, mqprio, extack, taprio_flags);
+	if (err < 0)
+		return err;
+
+	new_admin = kzalloc(sizeof(*new_admin), GFP_KERNEL);
+	if (!new_admin) {
+		NL_SET_ERR_MSG(extack, "Not enough memory for a new schedule");
+		return -ENOMEM;
+	}
+	INIT_LIST_HEAD(&new_admin->entries);
+
+	rcu_read_lock();
+	oper = rcu_dereference(q->oper_sched);
+	admin = rcu_dereference(q->admin_sched);
+	rcu_read_unlock();
+
+	/* no changes - no new mqprio settings */
+	if (!taprio_mqprio_cmp(dev, mqprio))
+		mqprio = NULL;
+
+	if (mqprio && (oper || admin)) {
+		NL_SET_ERR_MSG(extack, "Changing the traffic mapping of a running schedule is not supported");
+		err = -ENOTSUPP;
+		goto free_sched;
+	}
+
+	err = parse_taprio_schedule(tb, new_admin, extack);
+	if (err < 0)
+		goto free_sched;
+
+	if (new_admin->num_entries == 0) {
+		NL_SET_ERR_MSG(extack, "There should be at least one entry in the schedule");
+		err = -EINVAL;
+		goto free_sched;
+	}
+
+	err = taprio_parse_clockid(sch, tb, extack);
+	if (err < 0)
+		goto free_sched;
+
+	taprio_set_picos_per_byte(dev, q);
+
+	if (FULL_OFFLOAD_IS_ENABLED(taprio_flags))
+		err = taprio_enable_offload(dev, mqprio, q, new_admin, extack);
+	else
+		err = taprio_disable_offload(dev, q, extack);
+	if (err)
+		goto free_sched;
+
+	/* Protects against enqueue()/dequeue() */
+	spin_lock_bh(qdisc_lock(sch));
+
+	if (tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]) {
+		if (!TXTIME_ASSIST_IS_ENABLED(q->flags)) {
+			NL_SET_ERR_MSG_MOD(extack, "txtime-delay can only be set when txtime-assist mode is enabled");
+			err = -EINVAL;
+			goto unlock;
+		}
+
+		q->txtime_delay = nla_get_u32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]);
+	}
+
+	if (!TXTIME_ASSIST_IS_ENABLED(taprio_flags) &&
+	    !FULL_OFFLOAD_IS_ENABLED(taprio_flags) &&
+	    !hrtimer_active(&q->advance_timer)) {
+		hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
+		q->advance_timer.function = advance_sched;
+	}
+
+	if (mqprio) {
+		netdev_set_num_tc(dev, mqprio->num_tc);
+		for (i = 0; i < mqprio->num_tc; i++)
+			netdev_set_tc_queue(dev, i,
+					    mqprio->count[i],
+					    mqprio->offset[i]);
+
+		/* Always use supplied priority mappings */
+		for (i = 0; i <= TC_BITMASK; i++)
+			netdev_set_prio_tc_map(dev, i,
+					       mqprio->prio_tc_map[i]);
+	}
+
+	if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) {
+		q->dequeue = taprio_dequeue_offload;
+		q->peek = taprio_peek_offload;
+	} else {
+		/* Be sure to always keep the function pointers
+		 * in a consistent state.
+		 */
+		q->dequeue = taprio_dequeue_soft;
+		q->peek = taprio_peek_soft;
+	}
+
+	err = taprio_get_start_time(sch, new_admin, &start);
+	if (err < 0) {
+		NL_SET_ERR_MSG(extack, "Internal error: failed get start time");
+		goto unlock;
+	}
+
+	if (TXTIME_ASSIST_IS_ENABLED(taprio_flags)) {
+		setup_txtime(q, new_admin, start);
+
+		if (!oper) {
+			rcu_assign_pointer(q->oper_sched, new_admin);
+			err = 0;
+			new_admin = NULL;
+			goto unlock;
+		}
+
+		rcu_assign_pointer(q->admin_sched, new_admin);
+		if (admin)
+			call_rcu(&admin->rcu, taprio_free_sched_cb);
+	} else {
+		setup_first_close_time(q, new_admin, start);
+
+		/* Protects against advance_sched() */
+		spin_lock_irqsave(&q->current_entry_lock, flags);
+
+		taprio_start_sched(sch, start, new_admin);
+
+		rcu_assign_pointer(q->admin_sched, new_admin);
+		if (admin)
+			call_rcu(&admin->rcu, taprio_free_sched_cb);
+
+		spin_unlock_irqrestore(&q->current_entry_lock, flags);
+
+		if (FULL_OFFLOAD_IS_ENABLED(taprio_flags))
+			taprio_offload_config_changed(q);
+	}
+
+	new_admin = NULL;
+	err = 0;
+
+unlock:
+	spin_unlock_bh(qdisc_lock(sch));
+
+free_sched:
+	if (new_admin)
+		call_rcu(&new_admin->rcu, taprio_free_sched_cb);
+
+	return err;
+}
+
+static void taprio_destroy(struct Qdisc *sch)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	unsigned int i;
+
+	spin_lock(&taprio_list_lock);
+	list_del(&q->taprio_list);
+	spin_unlock(&taprio_list_lock);
+
+	hrtimer_cancel(&q->advance_timer);
+
+	taprio_disable_offload(dev, q, NULL);
+
+	if (q->qdiscs) {
+		for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++)
+			qdisc_put(q->qdiscs[i]);
+
+		kfree(q->qdiscs);
+	}
+	q->qdiscs = NULL;
+
+	netdev_set_num_tc(dev, 0);
+
+	if (q->oper_sched)
+		call_rcu(&q->oper_sched->rcu, taprio_free_sched_cb);
+
+	if (q->admin_sched)
+		call_rcu(&q->admin_sched->rcu, taprio_free_sched_cb);
+}
+
+static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
+		       struct netlink_ext_ack *extack)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	int i;
+
+	spin_lock_init(&q->current_entry_lock);
+
+	hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS);
+	q->advance_timer.function = advance_sched;
+
+	q->dequeue = taprio_dequeue_soft;
+	q->peek = taprio_peek_soft;
+
+	q->root = sch;
+
+	/* We only support static clockids. Use an invalid value as default
+	 * and get the valid one on taprio_change().
+	 */
+	q->clockid = -1;
+
+	spin_lock(&taprio_list_lock);
+	list_add(&q->taprio_list, &taprio_list);
+	spin_unlock(&taprio_list_lock);
+
+	if (sch->parent != TC_H_ROOT)
+		return -EOPNOTSUPP;
+
+	if (!netif_is_multiqueue(dev))
+		return -EOPNOTSUPP;
+
+	/* pre-allocate qdisc, attachment can't fail */
+	q->qdiscs = kcalloc(dev->num_tx_queues,
+			    sizeof(q->qdiscs[0]),
+			    GFP_KERNEL);
+
+	if (!q->qdiscs)
+		return -ENOMEM;
+
+	if (!opt)
+		return -EINVAL;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct netdev_queue *dev_queue;
+		struct Qdisc *qdisc;
+
+		dev_queue = netdev_get_tx_queue(dev, i);
+		qdisc = qdisc_create_dflt(dev_queue,
+					  &pfifo_qdisc_ops,
+					  TC_H_MAKE(TC_H_MAJ(sch->handle),
+						    TC_H_MIN(i + 1)),
+					  extack);
+		if (!qdisc)
+			return -ENOMEM;
+
+		if (i < dev->real_num_tx_queues)
+			qdisc_hash_add(qdisc, false);
+
+		q->qdiscs[i] = qdisc;
+	}
+
+	return taprio_change(sch, opt, extack);
+}
+
+static struct netdev_queue *taprio_queue_get(struct Qdisc *sch,
+					     unsigned long cl)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	unsigned long ntx = cl - 1;
+
+	if (ntx >= dev->num_tx_queues)
+		return NULL;
+
+	return netdev_get_tx_queue(dev, ntx);
+}
+
+static int taprio_graft(struct Qdisc *sch, unsigned long cl,
+			struct Qdisc *new, struct Qdisc **old,
+			struct netlink_ext_ack *extack)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+
+	if (!dev_queue)
+		return -EINVAL;
+
+	if (dev->flags & IFF_UP)
+		dev_deactivate(dev);
+
+	*old = q->qdiscs[cl - 1];
+	q->qdiscs[cl - 1] = new;
+
+	if (new)
+		new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
+
+	if (dev->flags & IFF_UP)
+		dev_activate(dev);
+
+	return 0;
+}
+
+static int dump_entry(struct sk_buff *msg,
+		      const struct sched_entry *entry)
+{
+	struct nlattr *item;
+
+	item = nla_nest_start_noflag(msg, TCA_TAPRIO_SCHED_ENTRY);
+	if (!item)
+		return -ENOSPC;
+
+	if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INDEX, entry->index))
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, TCA_TAPRIO_SCHED_ENTRY_CMD, entry->command))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_GATE_MASK,
+			entry->gate_mask))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INTERVAL,
+			entry->interval))
+		goto nla_put_failure;
+
+	return nla_nest_end(msg, item);
+
+nla_put_failure:
+	nla_nest_cancel(msg, item);
+	return -1;
+}
+
+static int dump_schedule(struct sk_buff *msg,
+			 const struct sched_gate_list *root)
+{
+	struct nlattr *entry_list;
+	struct sched_entry *entry;
+
+	if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_BASE_TIME,
+			root->base_time, TCA_TAPRIO_PAD))
+		return -1;
+
+	if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME,
+			root->cycle_time, TCA_TAPRIO_PAD))
+		return -1;
+
+	if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION,
+			root->cycle_time_extension, TCA_TAPRIO_PAD))
+		return -1;
+
+	entry_list = nla_nest_start_noflag(msg,
+					   TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST);
+	if (!entry_list)
+		goto error_nest;
+
+	list_for_each_entry(entry, &root->entries, list) {
+		if (dump_entry(msg, entry) < 0)
+			goto error_nest;
+	}
+
+	nla_nest_end(msg, entry_list);
+	return 0;
+
+error_nest:
+	nla_nest_cancel(msg, entry_list);
+	return -1;
+}
+
+static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct sched_gate_list *oper, *admin;
+	struct tc_mqprio_qopt opt = { 0 };
+	struct nlattr *nest, *sched_nest;
+	unsigned int i;
+
+	rcu_read_lock();
+	oper = rcu_dereference(q->oper_sched);
+	admin = rcu_dereference(q->admin_sched);
+
+	opt.num_tc = netdev_get_num_tc(dev);
+	memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
+
+	for (i = 0; i < netdev_get_num_tc(dev); i++) {
+		opt.count[i] = dev->tc_to_txq[i].count;
+		opt.offset[i] = dev->tc_to_txq[i].offset;
+	}
+
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
+	if (!nest)
+		goto start_error;
+
+	if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt))
+		goto options_error;
+
+	if (!FULL_OFFLOAD_IS_ENABLED(q->flags) &&
+	    nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid))
+		goto options_error;
+
+	if (q->flags && nla_put_u32(skb, TCA_TAPRIO_ATTR_FLAGS, q->flags))
+		goto options_error;
+
+	if (q->txtime_delay &&
+	    nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay))
+		goto options_error;
+
+	if (oper && dump_schedule(skb, oper))
+		goto options_error;
+
+	if (!admin)
+		goto done;
+
+	sched_nest = nla_nest_start_noflag(skb, TCA_TAPRIO_ATTR_ADMIN_SCHED);
+	if (!sched_nest)
+		goto options_error;
+
+	if (dump_schedule(skb, admin))
+		goto admin_error;
+
+	nla_nest_end(skb, sched_nest);
+
+done:
+	rcu_read_unlock();
+
+	return nla_nest_end(skb, nest);
+
+admin_error:
+	nla_nest_cancel(skb, sched_nest);
+
+options_error:
+	nla_nest_cancel(skb, nest);
+
+start_error:
+	rcu_read_unlock();
+	return -ENOSPC;
+}
+
+static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
+{
+	struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+
+	if (!dev_queue)
+		return NULL;
+
+	return dev_queue->qdisc_sleeping;
+}
+
+static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
+{
+	unsigned int ntx = TC_H_MIN(classid);
+
+	if (!taprio_queue_get(sch, ntx))
+		return 0;
+	return ntx;
+}
+
+static int taprio_dump_class(struct Qdisc *sch, unsigned long cl,
+			     struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+
+	tcm->tcm_parent = TC_H_ROOT;
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+
+	return 0;
+}
+
+static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+				   struct gnet_dump *d)
+	__releases(d->lock)
+	__acquires(d->lock)
+{
+	struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+
+	sch = dev_queue->qdisc_sleeping;
+	if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
+	    qdisc_qstats_copy(d, sch) < 0)
+		return -1;
+	return 0;
+}
+
+static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	unsigned long ntx;
+
+	if (arg->stop)
+		return;
+
+	arg->count = arg->skip;
+	for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
+		if (arg->fn(sch, ntx + 1, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+		arg->count++;
+	}
+}
+
+static struct netdev_queue *taprio_select_queue(struct Qdisc *sch,
+						struct tcmsg *tcm)
+{
+	return taprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
+}
+
+static const struct Qdisc_class_ops taprio_class_ops = {
+	.graft		= taprio_graft,
+	.leaf		= taprio_leaf,
+	.find		= taprio_find,
+	.walk		= taprio_walk,
+	.dump		= taprio_dump_class,
+	.dump_stats	= taprio_dump_class_stats,
+	.select_queue	= taprio_select_queue,
+};
+
+static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
+	.cl_ops		= &taprio_class_ops,
+	.id		= "taprio",
+	.priv_size	= sizeof(struct taprio_sched),
+	.init		= taprio_init,
+	.change		= taprio_change,
+	.destroy	= taprio_destroy,
+	.peek		= taprio_peek,
+	.dequeue	= taprio_dequeue,
+	.enqueue	= taprio_enqueue,
+	.dump		= taprio_dump,
+	.owner		= THIS_MODULE,
+};
+
+static struct notifier_block taprio_device_notifier = {
+	.notifier_call = taprio_dev_notifier,
+};
+
+static int __init taprio_module_init(void)
+{
+	int err = register_netdevice_notifier(&taprio_device_notifier);
+
+	if (err)
+		return err;
+
+	return register_qdisc(&taprio_qdisc_ops);
+}
+
+static void __exit taprio_module_exit(void)
+{
+	unregister_qdisc(&taprio_qdisc_ops);
+	unregister_netdevice_notifier(&taprio_device_notifier);
+}
+
+module_init(taprio_module_init);
+module_exit(taprio_module_exit);
+MODULE_LICENSE("GPL");

diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 6f74a42..5f72f3f 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c

@@ -1,15 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/sched/sch_tbf.c	Token Bucket Filter queue.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  *		Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
  *						 original idea by Martin Devera
- *
  */
 
 #include <linux/module.h>
@@ -162,7 +157,7 @@
 	nb = 0;
 	while (segs) {
 		nskb = segs->next;
-		segs->next = NULL;
+		skb_mark_not_on_list(segs);
 		qdisc_skb_cb(segs)->pkt_len = segs->len;
 		len += segs->len;
 		ret = qdisc_enqueue(segs, q->qdisc, to_free);
@@ -185,6 +180,7 @@
 		       struct sk_buff **to_free)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
+	unsigned int len = qdisc_pkt_len(skb);
 	int ret;
 
 	if (qdisc_pkt_len(skb) > q->max_size) {
@@ -200,7 +196,7 @@
 		return ret;
 	}
 
-	qdisc_qstats_backlog_inc(sch, skb);
+	sch->qstats.backlog += len;
 	sch->q.qlen++;
 	return NET_XMIT_SUCCESS;
 }
@@ -307,7 +303,8 @@
 	s64 buffer, mtu;
 	u64 rate64 = 0, prate64 = 0;
 
-	err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_TBF_MAX, opt, tbf_policy,
+					  NULL);
 	if (err < 0)
 		return err;
 
@@ -390,9 +387,8 @@
 
 	sch_tree_lock(sch);
 	if (child) {
-		qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
-					  q->qdisc->qstats.backlog);
-		qdisc_destroy(q->qdisc);
+		qdisc_tree_flush_backlog(q->qdisc);
+		qdisc_put(q->qdisc);
 		q->qdisc = child;
 	}
 	q->limit = qopt->limit;
@@ -438,7 +434,7 @@
 	struct tbf_sched_data *q = qdisc_priv(sch);
 
 	qdisc_watchdog_cancel(&q->watchdog);
-	qdisc_destroy(q->qdisc);
+	qdisc_put(q->qdisc);
 }
 
 static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -448,7 +444,7 @@
 	struct tc_tbf_qopt opt;
 
 	sch->qstats.backlog = q->qdisc->qstats.backlog;
-	nest = nla_nest_start(skb, TCA_OPTIONS);
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
 

diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 93f04cf..689ef6f 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  */
 

diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index 950ecf6..6e2eb1d 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # SCTP configuration
 #

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 914750b..41839b8 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
@@ -9,22 +10,6 @@
  *
  * This module provides the abstraction for an SCTP association.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -69,7 +54,6 @@
 					const struct sock *sk,
 					enum sctp_scope scope, gfp_t gfp)
 {
-	struct net *net = sock_net(sk);
 	struct sctp_sock *sp;
 	struct sctp_paramhdr *p;
 	int i;
@@ -80,6 +64,7 @@
 	/* Discarding const is appropriate here.  */
 	asoc->ep = (struct sctp_endpoint *)ep;
 	asoc->base.sk = (struct sock *)sk;
+	asoc->base.net = sock_net(sk);
 
 	sctp_endpoint_hold(asoc->ep);
 	sock_hold(asoc->base.sk);
@@ -101,7 +86,7 @@
 	 * socket values.
 	 */
 	asoc->max_retrans = sp->assocparams.sasoc_asocmaxrxt;
-	asoc->pf_retrans  = net->sctp.pf_retrans;
+	asoc->pf_retrans  = sp->pf_retrans;
 
 	asoc->rto_initial = msecs_to_jiffies(sp->rtoinfo.srto_initial);
 	asoc->rto_max = msecs_to_jiffies(sp->rtoinfo.srto_max);
@@ -132,6 +117,8 @@
 	 */
 	asoc->max_burst = sp->max_burst;
 
+	asoc->subscribe = sp->subscribe;
+
 	/* initialize association timers */
 	asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] = asoc->rto_initial;
 	asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] = asoc->rto_initial;
@@ -227,14 +214,6 @@
 	asoc->peer.sack_needed = 1;
 	asoc->peer.sack_generation = 1;
 
-	/* Assume that the peer will tell us if he recognizes ASCONF
-	 * as part of INIT exchange.
-	 * The sctp_addip_noauth option is there for backward compatibility
-	 * and will revert old behavior.
-	 */
-	if (net->sctp.addip_noauth)
-		asoc->peer.asconf_capable = 1;
-
 	/* Create an input queue.  */
 	sctp_inq_init(&asoc->base.inqueue);
 	sctp_inq_set_th_handler(&asoc->base.inqueue, sctp_assoc_bh_rcv);
@@ -274,8 +253,6 @@
 		goto stream_free;
 
 	asoc->active_key_id = ep->active_key_id;
-	asoc->prsctp_enable = ep->prsctp_enable;
-	asoc->reconf_enable = ep->reconf_enable;
 	asoc->strreset_enable = ep->strreset_enable;
 
 	/* Save the hmacs and chunks list into this association */
@@ -1649,8 +1626,11 @@
 	if (preload)
 		idr_preload(gfp);
 	spin_lock_bh(&sctp_assocs_id_lock);
-	/* 0 is not a valid assoc_id, must be >= 1 */
-	ret = idr_alloc_cyclic(&sctp_assocs_id, asoc, 1, 0, GFP_NOWAIT);
+	/* 0, 1, 2 are used as SCTP_FUTURE_ASSOC, SCTP_CURRENT_ASSOC and
+	 * SCTP_ALL_ASSOC, so an available id must be > SCTP_ALL_ASSOC.
+	 */
+	ret = idr_alloc_cyclic(&sctp_assocs_id, asoc, SCTP_ALL_ASSOC + 1, 0,
+			       GFP_NOWAIT);
 	spin_unlock_bh(&sctp_assocs_id_lock);
 	if (preload)
 		idr_preload_end();

diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 5b53761..4278764 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c

@@ -1,24 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright 2007 Hewlett-Packard Development Company, L.P.
  *
  * This file is part of the SCTP kernel implementation
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -404,7 +389,7 @@
 	/* If we don't support AUTH, or peer is not capable
 	 * we don't need to do anything.
 	 */
-	if (!asoc->ep->auth_enable || !asoc->peer.auth_capable)
+	if (!asoc->peer.auth_capable)
 		return 0;
 
 	/* If the key_id is non-zero and we couldn't find an
@@ -471,12 +456,6 @@
 	struct crypto_shash *tfm = NULL;
 	__u16   id;
 
-	/* If AUTH extension is disabled, we are done */
-	if (!ep->auth_enable) {
-		ep->auth_hmacs = NULL;
-		return 0;
-	}
-
 	/* If the transforms are already allocated, we are done */
 	if (ep->auth_hmacs)
 		return 0;
@@ -696,7 +675,7 @@
 	if (!asoc)
 		return 0;
 
-	if (!asoc->ep->auth_enable || !asoc->peer.auth_capable)
+	if (!asoc->peer.auth_capable)
 		return 0;
 
 	return __sctp_auth_cid(chunk, asoc->peer.peer_chunks);
@@ -708,7 +687,7 @@
 	if (!asoc)
 		return 0;
 
-	if (!asoc->ep->auth_enable)
+	if (!asoc->peer.auth_capable)
 		return 0;
 
 	return __sctp_auth_cid(chunk,
@@ -766,7 +745,6 @@
 		SHASH_DESC_ON_STACK(desc, tfm);
 
 		desc->tfm = tfm;
-		desc->flags = 0;
 		crypto_shash_digest(desc, (u8 *)auth,
 				    end - (unsigned char *)auth, digest);
 		shash_desc_zero(desc);
@@ -853,10 +831,15 @@
 	/* Try to find the given key id to see if
 	 * we are doing a replace, or adding a new key
 	 */
-	if (asoc)
+	if (asoc) {
+		if (!asoc->peer.auth_capable)
+			return -EACCES;
 		sh_keys = &asoc->endpoint_shared_keys;
-	else
+	} else {
+		if (!ep->auth_enable)
+			return -EACCES;
 		sh_keys = &ep->endpoint_shared_keys;
+	}
 
 	key_for_each(shkey, sh_keys) {
 		if (shkey->key_id == auth_key->sca_keynumber) {
@@ -897,10 +880,15 @@
 	int found = 0;
 
 	/* The key identifier MUST correst to an existing key */
-	if (asoc)
+	if (asoc) {
+		if (!asoc->peer.auth_capable)
+			return -EACCES;
 		sh_keys = &asoc->endpoint_shared_keys;
-	else
+	} else {
+		if (!ep->auth_enable)
+			return -EACCES;
 		sh_keys = &ep->endpoint_shared_keys;
+	}
 
 	key_for_each(key, sh_keys) {
 		if (key->key_id == key_id) {
@@ -933,11 +921,15 @@
 	 * The key identifier MUST correst to an existing key
 	 */
 	if (asoc) {
+		if (!asoc->peer.auth_capable)
+			return -EACCES;
 		if (asoc->active_key_id == key_id)
 			return -EINVAL;
 
 		sh_keys = &asoc->endpoint_shared_keys;
 	} else {
+		if (!ep->auth_enable)
+			return -EACCES;
 		if (ep->active_key_id == key_id)
 			return -EINVAL;
 
@@ -972,11 +964,15 @@
 	 * The key identifier MUST correst to an existing key
 	 */
 	if (asoc) {
+		if (!asoc->peer.auth_capable)
+			return -EACCES;
 		if (asoc->active_key_id == key_id)
 			return -EINVAL;
 
 		sh_keys = &asoc->endpoint_shared_keys;
 	} else {
+		if (!ep->auth_enable)
+			return -EACCES;
 		if (ep->active_key_id == key_id)
 			return -EINVAL;
 
@@ -1011,3 +1007,72 @@
 
 	return 0;
 }
+
+int sctp_auth_init(struct sctp_endpoint *ep, gfp_t gfp)
+{
+	int err = -ENOMEM;
+
+	/* Allocate space for HMACS and CHUNKS authentication
+	 * variables.  There are arrays that we encode directly
+	 * into parameters to make the rest of the operations easier.
+	 */
+	if (!ep->auth_hmacs_list) {
+		struct sctp_hmac_algo_param *auth_hmacs;
+
+		auth_hmacs = kzalloc(struct_size(auth_hmacs, hmac_ids,
+						 SCTP_AUTH_NUM_HMACS), gfp);
+		if (!auth_hmacs)
+			goto nomem;
+		/* Initialize the HMACS parameter.
+		 * SCTP-AUTH: Section 3.3
+		 *    Every endpoint supporting SCTP chunk authentication MUST
+		 *    support the HMAC based on the SHA-1 algorithm.
+		 */
+		auth_hmacs->param_hdr.type = SCTP_PARAM_HMAC_ALGO;
+		auth_hmacs->param_hdr.length =
+				htons(sizeof(struct sctp_paramhdr) + 2);
+		auth_hmacs->hmac_ids[0] = htons(SCTP_AUTH_HMAC_ID_SHA1);
+		ep->auth_hmacs_list = auth_hmacs;
+	}
+
+	if (!ep->auth_chunk_list) {
+		struct sctp_chunks_param *auth_chunks;
+
+		auth_chunks = kzalloc(sizeof(*auth_chunks) +
+				      SCTP_NUM_CHUNK_TYPES, gfp);
+		if (!auth_chunks)
+			goto nomem;
+		/* Initialize the CHUNKS parameter */
+		auth_chunks->param_hdr.type = SCTP_PARAM_CHUNKS;
+		auth_chunks->param_hdr.length =
+				htons(sizeof(struct sctp_paramhdr));
+		ep->auth_chunk_list = auth_chunks;
+	}
+
+	/* Allocate and initialize transorms arrays for supported
+	 * HMACs.
+	 */
+	err = sctp_auth_init_hmacs(ep, gfp);
+	if (err)
+		goto nomem;
+
+	return 0;
+
+nomem:
+	/* Free all allocations */
+	kfree(ep->auth_hmacs_list);
+	kfree(ep->auth_chunk_list);
+	ep->auth_hmacs_list = NULL;
+	ep->auth_chunk_list = NULL;
+	return err;
+}
+
+void sctp_auth_free(struct sctp_endpoint *ep)
+{
+	kfree(ep->auth_hmacs_list);
+	kfree(ep->auth_chunk_list);
+	ep->auth_hmacs_list = NULL;
+	ep->auth_chunk_list = NULL;
+	sctp_auth_destroy_hmacs(ep->auth_hmacs);
+	ep->auth_hmacs = NULL;
+}

diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 7df3704..53bc615 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2003
  * Copyright (c) Cisco 1999,2000
@@ -8,22 +9,6 @@
  *
  * A collection class to handle the storage of transport addresses.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -337,6 +322,34 @@
 	return match;
 }
 
+int sctp_bind_addrs_check(struct sctp_sock *sp,
+			  struct sctp_sock *sp2, int cnt2)
+{
+	struct sctp_bind_addr *bp2 = &sp2->ep->base.bind_addr;
+	struct sctp_bind_addr *bp = &sp->ep->base.bind_addr;
+	struct sctp_sockaddr_entry *laddr, *laddr2;
+	bool exist = false;
+	int cnt = 0;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+		list_for_each_entry_rcu(laddr2, &bp2->address_list, list) {
+			if (sp->pf->af->cmp_addr(&laddr->a, &laddr2->a) &&
+			    laddr->valid && laddr2->valid) {
+				exist = true;
+				goto next;
+			}
+		}
+		cnt = 0;
+		break;
+next:
+		cnt++;
+	}
+	rcu_read_unlock();
+
+	return (cnt == cnt2) ? 0 : (exist ? -EEXIST : 1);
+}
+
 /* Does the address 'addr' conflict with any addresses in
  * the bp.
  */
@@ -380,24 +393,19 @@
 {
 	struct sctp_sockaddr_entry *laddr;
 	struct sctp_af *af;
-	int state = -1;
 
 	af = sctp_get_af_specific(addr->sa.sa_family);
 	if (unlikely(!af))
-		return state;
+		return -1;
 
-	rcu_read_lock();
 	list_for_each_entry_rcu(laddr, &bp->address_list, list) {
 		if (!laddr->valid)
 			continue;
-		if (af->cmp_addr(&laddr->a, addr)) {
-			state = laddr->state;
-			break;
-		}
+		if (af->cmp_addr(&laddr->a, addr))
+			return laddr->state;
 	}
-	rcu_read_unlock();
 
-	return state;
+	return -1;
 }
 
 /* Find the first address in the bind address list that is not present in

diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index ce80878..cc0405c 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2003, 2004
  *
@@ -5,22 +6,6 @@
  *
  * This file contains the code relating the chunk abstraction.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -86,11 +71,10 @@
 /* Final destructruction of datamsg memory. */
 static void sctp_datamsg_destroy(struct sctp_datamsg *msg)
 {
+	struct sctp_association *asoc = NULL;
 	struct list_head *pos, *temp;
 	struct sctp_chunk *chunk;
-	struct sctp_sock *sp;
 	struct sctp_ulpevent *ev;
-	struct sctp_association *asoc = NULL;
 	int error = 0, notify;
 
 	/* If we failed, we may need to notify. */
@@ -108,9 +92,8 @@
 			else
 				error = asoc->outqueue.error;
 
-			sp = sctp_sk(asoc->base.sk);
-			notify = sctp_ulpevent_type_enabled(SCTP_SEND_FAILED,
-							    &sp->subscribe);
+			notify = sctp_ulpevent_type_enabled(asoc->subscribe,
+							    SCTP_SEND_FAILED);
 		}
 
 		/* Generate a SEND FAILED event only if enabled. */
@@ -191,6 +174,12 @@
 	 * the packet
 	 */
 	max_data = asoc->frag_point;
+	if (unlikely(!max_data)) {
+		max_data = sctp_min_frag_point(sctp_sk(asoc->base.sk),
+					       sctp_datachk_len(&asoc->stream));
+		pr_warn_ratelimited("%s: asoc:%p frag_point is zero, forcing max_data to default minimum (%zu)",
+				    __func__, asoc, max_data);
+	}
 
 	/* If the the peer requested that we authenticate DATA chunks
 	 * we need to account for bundling of the AUTH chunks along with

diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 8f6c2e8..c4d9c7f 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
@@ -9,22 +10,6 @@
  * This file converts numerical ID value to alphabetical names for SCTP
  * terms such as chunk type, parameter time, event type, etc.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>

diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index 078f01a..0851166 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright Red Hat Inc. 2017
  *
@@ -5,22 +6,6 @@
  *
  * These functions implement sctp diag support.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email addresched(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -190,7 +175,7 @@
 		mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
 		mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
 		mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
-		mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
+		mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
 		mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
 
 		if (nla_put(skb, INET_DIAG_SKMEMINFO, sizeof(mem), &mem) < 0)
@@ -256,6 +241,7 @@
 		+ nla_total_size(1) /* INET_DIAG_TOS */
 		+ nla_total_size(1) /* INET_DIAG_TCLASS */
 		+ nla_total_size(4) /* INET_DIAG_MARK */
+		+ nla_total_size(4) /* INET_DIAG_CLASS_ID */
 		+ nla_total_size(addrlen * asoc->peer.transport_count)
 		+ nla_total_size(addrlen * addrcnt)
 		+ nla_total_size(sizeof(struct inet_diag_meminfo))

diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 40c7eb9..3067deb 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
@@ -10,22 +11,6 @@
  *
  * This abstraction represents an SCTP endpoint.
  *
- * The SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * The SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -58,54 +43,20 @@
 						gfp_t gfp)
 {
 	struct net *net = sock_net(sk);
-	struct sctp_hmac_algo_param *auth_hmacs = NULL;
-	struct sctp_chunks_param *auth_chunks = NULL;
 	struct sctp_shared_key *null_key;
-	int err;
 
 	ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp);
 	if (!ep->digest)
 		return NULL;
 
+	ep->asconf_enable = net->sctp.addip_enable;
 	ep->auth_enable = net->sctp.auth_enable;
 	if (ep->auth_enable) {
-		/* Allocate space for HMACS and CHUNKS authentication
-		 * variables.  There are arrays that we encode directly
-		 * into parameters to make the rest of the operations easier.
-		 */
-		auth_hmacs = kzalloc(struct_size(auth_hmacs, hmac_ids,
-						 SCTP_AUTH_NUM_HMACS), gfp);
-		if (!auth_hmacs)
+		if (sctp_auth_init(ep, gfp))
 			goto nomem;
-
-		auth_chunks = kzalloc(sizeof(*auth_chunks) +
-				      SCTP_NUM_CHUNK_TYPES, gfp);
-		if (!auth_chunks)
-			goto nomem;
-
-		/* Initialize the HMACS parameter.
-		 * SCTP-AUTH: Section 3.3
-		 *    Every endpoint supporting SCTP chunk authentication MUST
-		 *    support the HMAC based on the SHA-1 algorithm.
-		 */
-		auth_hmacs->param_hdr.type = SCTP_PARAM_HMAC_ALGO;
-		auth_hmacs->param_hdr.length =
-					htons(sizeof(struct sctp_paramhdr) + 2);
-		auth_hmacs->hmac_ids[0] = htons(SCTP_AUTH_HMAC_ID_SHA1);
-
-		/* Initialize the CHUNKS parameter */
-		auth_chunks->param_hdr.type = SCTP_PARAM_CHUNKS;
-		auth_chunks->param_hdr.length =
-					htons(sizeof(struct sctp_paramhdr));
-
-		/* If the Add-IP functionality is enabled, we must
-		 * authenticate, ASCONF and ASCONF-ACK chunks
-		 */
-		if (net->sctp.addip_enable) {
-			auth_chunks->chunks[0] = SCTP_CID_ASCONF;
-			auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK;
-			auth_chunks->param_hdr.length =
-					htons(sizeof(struct sctp_paramhdr) + 2);
+		if (ep->asconf_enable) {
+			sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF);
+			sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF_ACK);
 		}
 	}
 
@@ -126,10 +77,6 @@
 	/* Initialize the bind addr area */
 	sctp_bind_addr_init(&ep->base.bind_addr, 0);
 
-	/* Remember who we are attached to.  */
-	ep->base.sk = sk;
-	sock_hold(ep->base.sk);
-
 	/* Create the lists of associations.  */
 	INIT_LIST_HEAD(&ep->asocs);
 
@@ -150,31 +97,27 @@
 	INIT_LIST_HEAD(&ep->endpoint_shared_keys);
 	null_key = sctp_auth_shkey_create(0, gfp);
 	if (!null_key)
-		goto nomem;
+		goto nomem_shkey;
 
 	list_add(&null_key->key_list, &ep->endpoint_shared_keys);
 
-	/* Allocate and initialize transorms arrays for supported HMACs. */
-	err = sctp_auth_init_hmacs(ep, gfp);
-	if (err)
-		goto nomem_hmacs;
-
 	/* Add the null key to the endpoint shared keys list and
 	 * set the hmcas and chunks pointers.
 	 */
-	ep->auth_hmacs_list = auth_hmacs;
-	ep->auth_chunk_list = auth_chunks;
 	ep->prsctp_enable = net->sctp.prsctp_enable;
 	ep->reconf_enable = net->sctp.reconf_enable;
+	ep->ecn_enable = net->sctp.ecn_enable;
+
+	/* Remember who we are attached to.  */
+	ep->base.sk = sk;
+	ep->base.net = sock_net(sk);
+	sock_hold(ep->base.sk);
 
 	return ep;
 
-nomem_hmacs:
-	sctp_auth_destroy_keys(&ep->endpoint_shared_keys);
+nomem_shkey:
+	sctp_auth_free(ep);
 nomem:
-	/* Free all allocations */
-	kfree(auth_hmacs);
-	kfree(auth_chunks);
 	kfree(ep->digest);
 	return NULL;
 
@@ -257,11 +200,7 @@
 	 * chunks and hmacs arrays that were allocated
 	 */
 	sctp_auth_destroy_keys(&ep->endpoint_shared_keys);
-	kfree(ep->auth_hmacs_list);
-	kfree(ep->auth_chunk_list);
-
-	/* AUTH - Free any allocated HMAC transform containers */
-	sctp_auth_destroy_hmacs(ep->auth_hmacs);
+	sctp_auth_free(ep);
 
 	/* Cleanup. */
 	sctp_inq_free(&ep->base.inqueue);

diff --git a/net/sctp/input.c b/net/sctp/input.c
index 5c36a99..4d2bcfc 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
@@ -10,22 +11,6 @@
  *
  * These functions handle all input from the IP layer into SCTP.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -57,6 +42,7 @@
 #include <net/sctp/checksum.h>
 #include <net/net_namespace.h>
 #include <linux/rhashtable.h>
+#include <net/sock_reuseport.h>
 
 /* Forward declarations for internal helpers. */
 static int sctp_rcv_ootb(struct sk_buff *);
@@ -65,8 +51,10 @@
 				      const union sctp_addr *paddr,
 				      const union sctp_addr *laddr,
 				      struct sctp_transport **transportp);
-static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net,
-						const union sctp_addr *laddr);
+static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(
+					struct net *net, struct sk_buff *skb,
+					const union sctp_addr *laddr,
+					const union sctp_addr *daddr);
 static struct sctp_association *__sctp_lookup_association(
 					struct net *net,
 					const union sctp_addr *local,
@@ -171,7 +159,7 @@
 	asoc = __sctp_rcv_lookup(net, skb, &src, &dest, &transport);
 
 	if (!asoc)
-		ep = __sctp_rcv_lookup_endpoint(net, &dest);
+		ep = __sctp_rcv_lookup_endpoint(net, skb, &dest, &src);
 
 	/* Retrieve the common input handling substructure. */
 	rcvr = asoc ? &asoc->base : &ep->base;
@@ -213,7 +201,7 @@
 
 	if (!xfrm_policy_check(sk, XFRM_POLICY_IN, skb, family))
 		goto discard_release;
-	nf_reset(skb);
+	nf_reset_ct(skb);
 
 	if (sk_filter(sk, skb))
 		goto discard_release;
@@ -255,7 +243,7 @@
 		bh_lock_sock(sk);
 	}
 
-	if (sock_owned_by_user(sk)) {
+	if (sock_owned_by_user(sk) || !sctp_newsk_ready(sk)) {
 		if (sctp_add_backlog(sk, skb)) {
 			bh_unlock_sock(sk);
 			sctp_chunk_free(chunk);
@@ -333,8 +321,8 @@
 		local_bh_disable();
 		bh_lock_sock(sk);
 
-		if (sock_owned_by_user(sk)) {
-			if (sk_add_backlog(sk, skb, sk->sk_rcvbuf))
+		if (sock_owned_by_user(sk) || !sctp_newsk_ready(sk)) {
+			if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)))
 				sctp_chunk_free(chunk);
 			else
 				backloged = 1;
@@ -348,7 +336,13 @@
 		if (backloged)
 			return 0;
 	} else {
-		sctp_inq_push(inqueue, chunk);
+		if (!sctp_newsk_ready(sk)) {
+			if (!sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)))
+				return 0;
+			sctp_chunk_free(chunk);
+		} else {
+			sctp_inq_push(inqueue, chunk);
+		}
 	}
 
 done:
@@ -370,7 +364,7 @@
 	struct sctp_ep_common *rcvr = chunk->rcvr;
 	int ret;
 
-	ret = sk_add_backlog(sk, skb, sk->sk_rcvbuf);
+	ret = sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf));
 	if (!ret) {
 		/* Hold the assoc/ep while hanging on the backlog queue.
 		 * This way, we know structures we need will not disappear
@@ -574,7 +568,7 @@
  * is probably better.
  *
  */
-void sctp_v4_err(struct sk_buff *skb, __u32 info)
+int sctp_v4_err(struct sk_buff *skb, __u32 info)
 {
 	const struct iphdr *iph = (const struct iphdr *)skb->data;
 	const int ihlen = iph->ihl * 4;
@@ -599,7 +593,7 @@
 	skb->transport_header = savesctp;
 	if (!sk) {
 		__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
-		return;
+		return -ENOENT;
 	}
 	/* Warning:  The sock lock is held.  Remember to call
 	 * sctp_err_finish!
@@ -653,6 +647,7 @@
 
 out_unlock:
 	sctp_err_finish(sk, transport);
+	return 0;
 }
 
 /*
@@ -720,43 +715,87 @@
 }
 
 /* Insert endpoint into the hash table.  */
-static void __sctp_hash_endpoint(struct sctp_endpoint *ep)
+static int __sctp_hash_endpoint(struct sctp_endpoint *ep)
 {
-	struct net *net = sock_net(ep->base.sk);
-	struct sctp_ep_common *epb;
+	struct sock *sk = ep->base.sk;
+	struct net *net = sock_net(sk);
 	struct sctp_hashbucket *head;
+	struct sctp_ep_common *epb;
 
 	epb = &ep->base;
-
 	epb->hashent = sctp_ep_hashfn(net, epb->bind_addr.port);
 	head = &sctp_ep_hashtable[epb->hashent];
 
+	if (sk->sk_reuseport) {
+		bool any = sctp_is_ep_boundall(sk);
+		struct sctp_ep_common *epb2;
+		struct list_head *list;
+		int cnt = 0, err = 1;
+
+		list_for_each(list, &ep->base.bind_addr.address_list)
+			cnt++;
+
+		sctp_for_each_hentry(epb2, &head->chain) {
+			struct sock *sk2 = epb2->sk;
+
+			if (!net_eq(sock_net(sk2), net) || sk2 == sk ||
+			    !uid_eq(sock_i_uid(sk2), sock_i_uid(sk)) ||
+			    !sk2->sk_reuseport)
+				continue;
+
+			err = sctp_bind_addrs_check(sctp_sk(sk2),
+						    sctp_sk(sk), cnt);
+			if (!err) {
+				err = reuseport_add_sock(sk, sk2, any);
+				if (err)
+					return err;
+				break;
+			} else if (err < 0) {
+				return err;
+			}
+		}
+
+		if (err) {
+			err = reuseport_alloc(sk, any);
+			if (err)
+				return err;
+		}
+	}
+
 	write_lock(&head->lock);
 	hlist_add_head(&epb->node, &head->chain);
 	write_unlock(&head->lock);
+	return 0;
 }
 
 /* Add an endpoint to the hash. Local BH-safe. */
-void sctp_hash_endpoint(struct sctp_endpoint *ep)
+int sctp_hash_endpoint(struct sctp_endpoint *ep)
 {
+	int err;
+
 	local_bh_disable();
-	__sctp_hash_endpoint(ep);
+	err = __sctp_hash_endpoint(ep);
 	local_bh_enable();
+
+	return err;
 }
 
 /* Remove endpoint from the hash table.  */
 static void __sctp_unhash_endpoint(struct sctp_endpoint *ep)
 {
-	struct net *net = sock_net(ep->base.sk);
+	struct sock *sk = ep->base.sk;
 	struct sctp_hashbucket *head;
 	struct sctp_ep_common *epb;
 
 	epb = &ep->base;
 
-	epb->hashent = sctp_ep_hashfn(net, epb->bind_addr.port);
+	epb->hashent = sctp_ep_hashfn(sock_net(sk), epb->bind_addr.port);
 
 	head = &sctp_ep_hashtable[epb->hashent];
 
+	if (rcu_access_pointer(sk->sk_reuseport_cb))
+		reuseport_detach_sock(sk);
+
 	write_lock(&head->lock);
 	hlist_del_init(&epb->node);
 	write_unlock(&head->lock);
@@ -770,16 +809,35 @@
 	local_bh_enable();
 }
 
+static inline __u32 sctp_hashfn(const struct net *net, __be16 lport,
+				const union sctp_addr *paddr, __u32 seed)
+{
+	__u32 addr;
+
+	if (paddr->sa.sa_family == AF_INET6)
+		addr = jhash(&paddr->v6.sin6_addr, 16, seed);
+	else
+		addr = (__force __u32)paddr->v4.sin_addr.s_addr;
+
+	return  jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 |
+			     (__force __u32)lport, net_hash_mix(net), seed);
+}
+
 /* Look up an endpoint. */
-static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net,
-						const union sctp_addr *laddr)
+static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(
+					struct net *net, struct sk_buff *skb,
+					const union sctp_addr *laddr,
+					const union sctp_addr *paddr)
 {
 	struct sctp_hashbucket *head;
 	struct sctp_ep_common *epb;
 	struct sctp_endpoint *ep;
+	struct sock *sk;
+	__be16 lport;
 	int hash;
 
-	hash = sctp_ep_hashfn(net, ntohs(laddr->v4.sin_port));
+	lport = laddr->v4.sin_port;
+	hash = sctp_ep_hashfn(net, ntohs(lport));
 	head = &sctp_ep_hashtable[hash];
 	read_lock(&head->lock);
 	sctp_for_each_hentry(epb, &head->chain) {
@@ -791,6 +849,15 @@
 	ep = sctp_sk(net->sctp.ctl_sock)->ep;
 
 hit:
+	sk = ep->base.sk;
+	if (sk->sk_reuseport) {
+		__u32 phash = sctp_hashfn(net, lport, paddr, 0);
+
+		sk = reuseport_select_sock(sk, phash, skb,
+					   sizeof(struct sctphdr));
+		if (sk)
+			ep = sctp_sk(sk)->ep;
+	}
 	sctp_endpoint_hold(ep);
 	read_unlock(&head->lock);
 	return ep;
@@ -815,7 +882,7 @@
 	if (!sctp_transport_hold(t))
 		return err;
 
-	if (!net_eq(sock_net(t->asoc->base.sk), x->net))
+	if (!net_eq(t->asoc->base.net, x->net))
 		goto out;
 	if (x->lport != htons(t->asoc->base.bind_addr.port))
 		goto out;
@@ -829,35 +896,17 @@
 static inline __u32 sctp_hash_obj(const void *data, u32 len, u32 seed)
 {
 	const struct sctp_transport *t = data;
-	const union sctp_addr *paddr = &t->ipaddr;
-	const struct net *net = sock_net(t->asoc->base.sk);
-	__be16 lport = htons(t->asoc->base.bind_addr.port);
-	__u32 addr;
 
-	if (paddr->sa.sa_family == AF_INET6)
-		addr = jhash(&paddr->v6.sin6_addr, 16, seed);
-	else
-		addr = (__force __u32)paddr->v4.sin_addr.s_addr;
-
-	return  jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 |
-			     (__force __u32)lport, net_hash_mix(net), seed);
+	return sctp_hashfn(t->asoc->base.net,
+			   htons(t->asoc->base.bind_addr.port),
+			   &t->ipaddr, seed);
 }
 
 static inline __u32 sctp_hash_key(const void *data, u32 len, u32 seed)
 {
 	const struct sctp_hash_cmp_arg *x = data;
-	const union sctp_addr *paddr = x->paddr;
-	const struct net *net = x->net;
-	__be16 lport = x->lport;
-	__u32 addr;
 
-	if (paddr->sa.sa_family == AF_INET6)
-		addr = jhash(&paddr->v6.sin6_addr, 16, seed);
-	else
-		addr = (__force __u32)paddr->v4.sin_addr.s_addr;
-
-	return  jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 |
-			     (__force __u32)lport, net_hash_mix(net), seed);
+	return sctp_hashfn(x->net, x->lport, x->paddr, seed);
 }
 
 static const struct rhashtable_params sctp_hash_params = {

diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index eb93ffe..7182c5a 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
@@ -11,22 +12,6 @@
  * (which might be bundles or fragments of chunks) and out of which you
  * pop SCTP whole chunks.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>

diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index fc6c5e4..dd860fe 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2002, 2004
  * Copyright (c) 2001 Nokia, Inc.
@@ -8,22 +9,6 @@
  *
  * SCTP over IPv6.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *		   ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -97,10 +82,9 @@
 
 	switch (ev) {
 	case NETDEV_UP:
-		addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC);
+		addr = kzalloc(sizeof(*addr), GFP_ATOMIC);
 		if (addr) {
 			addr->a.v6.sin6_family = AF_INET6;
-			addr->a.v6.sin6_port = 0;
 			addr->a.v6.sin6_addr = ifa->addr;
 			addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex;
 			addr->valid = 1;
@@ -138,7 +122,7 @@
 };
 
 /* ICMP error handler. */
-static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			u8 type, u8 code, int offset, __be32 info)
 {
 	struct inet6_dev *idev;
@@ -147,7 +131,7 @@
 	struct sctp_transport *transport;
 	struct ipv6_pinfo *np;
 	__u16 saveip, savesctp;
-	int err;
+	int err, ret = 0;
 	struct net *net = dev_net(skb->dev);
 
 	idev = in6_dev_get(skb->dev);
@@ -163,6 +147,7 @@
 	skb->transport_header = savesctp;
 	if (!sk) {
 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_INERRORS);
+		ret = -ENOENT;
 		goto out;
 	}
 
@@ -202,6 +187,8 @@
 out:
 	if (likely(idev != NULL))
 		in6_dev_put(idev);
+
+	return ret;
 }
 
 static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
@@ -228,7 +215,7 @@
 
 	rcu_read_lock();
 	res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt),
-		       tclass);
+		       tclass, sk->sk_priority);
 	rcu_read_unlock();
 	return res;
 }
@@ -266,7 +253,7 @@
 		struct ip6_flowlabel *flowlabel;
 
 		flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
-		if (!flowlabel)
+		if (IS_ERR(flowlabel))
 			goto out;
 		fl6_sock_release(flowlabel);
 	}
@@ -278,7 +265,8 @@
 
 	if (saddr) {
 		fl6->saddr = saddr->v6.sin6_addr;
-		fl6->fl6_sport = saddr->v6.sin6_port;
+		if (!fl6->fl6_sport)
+			fl6->fl6_sport = saddr->v6.sin6_port;
 
 		pr_debug("src=%pI6 - ", &fl6->saddr);
 	}
@@ -430,7 +418,6 @@
 		addr = kzalloc(sizeof(*addr), GFP_ATOMIC);
 		if (addr) {
 			addr->a.v6.sin6_family = AF_INET6;
-			addr->a.v6.sin6_port = 0;
 			addr->a.v6.sin6_addr = ifp->addr;
 			addr->a.v6.sin6_scope_id = dev->ifindex;
 			addr->valid = 1;
@@ -1028,6 +1015,7 @@
 	.getname	   = sctp_getname,
 	.poll		   = sctp_poll,
 	.ioctl		   = inet6_ioctl,
+	.gettstamp	   = sock_gettstamp,
 	.listen		   = sctp_inet_listen,
 	.shutdown	   = inet_shutdown,
 	.setsockopt	   = sock_common_setsockopt,

diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index a6179b2..0400c96 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  *
@@ -7,22 +8,6 @@
  * object allocations/deallocations for types instrumented for this
  * via the proc fs.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>

diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index 123e9f2..74847d6 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c

@@ -1,17 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * sctp_offload - GRO/GSO Offloading for SCTP
  *
  * Copyright (C) 2015, Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -36,6 +27,7 @@
 {
 	skb->ip_summed = CHECKSUM_NONE;
 	skb->csum_not_inet = 0;
+	gso_reset_checksum(skb, ~0);
 	return sctp_compute_cksum(skb, skb_transport_offset(skb));
 }
 
@@ -102,11 +94,6 @@
 	},
 };
 
-static const struct skb_checksum_ops crc32c_csum_ops = {
-	.update  = sctp_csum_update,
-	.combine = sctp_csum_combine,
-};
-
 int __init sctp_offload_init(void)
 {
 	int ret;
@@ -119,7 +106,7 @@
 	if (ret)
 		goto ipv4;
 
-	crc32c_csum_stub = &crc32c_csum_ops;
+	crc32c_csum_stub = &sctp_csum_ops;
 	return ret;
 
 ipv4:

diff --git a/net/sctp/output.c b/net/sctp/output.c
index 0860122..dbda7e7 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
@@ -7,22 +8,6 @@
  *
  * These functions handle output processing.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -297,6 +282,9 @@
 					sctp_chunk_free(sack);
 					goto out;
 				}
+				SCTP_INC_STATS(sock_net(asoc->base.sk),
+					       SCTP_MIB_OUTCTRLCHUNKS);
+				asoc->stats.octrlchunks++;
 				asoc->peer.sack_needed = 0;
 				if (del_timer(timer))
 					sctp_association_put(asoc);
@@ -399,25 +387,6 @@
 	return retval;
 }
 
-static void sctp_packet_release_owner(struct sk_buff *skb)
-{
-	sk_free(skb->sk);
-}
-
-static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk)
-{
-	skb_orphan(skb);
-	skb->sk = sk;
-	skb->destructor = sctp_packet_release_owner;
-
-	/*
-	 * The data chunks have already been accounted for in sctp_sendmsg(),
-	 * therefore only reserve a single byte to keep socket around until
-	 * the packet has been transmitted.
-	 */
-	refcount_inc(&sk->sk_wmem_alloc);
-}
-
 static void sctp_packet_gso_append(struct sk_buff *head, struct sk_buff *skb)
 {
 	if (SCTP_OUTPUT_CB(head)->last == head)
@@ -429,6 +398,7 @@
 	head->truesize += skb->truesize;
 	head->data_len += skb->len;
 	head->len += skb->len;
+	refcount_add(skb->truesize, &head->sk->sk_wmem_alloc);
 
 	__skb_header_release(skb);
 }
@@ -604,7 +574,7 @@
 	if (!head)
 		goto out;
 	skb_reserve(head, packet->overhead + MAX_HEADER);
-	sctp_packet_set_owner_w(head, sk);
+	skb_set_owner_w(head, sk);
 
 	/* set sctp header */
 	sh = skb_push(head, sizeof(struct sctphdr));

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 7bb8e56..0dab62b 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
@@ -9,22 +10,6 @@
  * These functions implement the sctp_outq class.   The outqueue handles
  * bundling and queueing of outgoing SCTP chunks.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -212,7 +197,7 @@
 	INIT_LIST_HEAD(&q->retransmit);
 	INIT_LIST_HEAD(&q->sacked);
 	INIT_LIST_HEAD(&q->abandoned);
-	sctp_sched_set_sched(asoc, SCTP_SS_DEFAULT);
+	sctp_sched_set_sched(asoc, sctp_sk(asoc->base.sk)->default_ss);
 }
 
 /* Free the outqueue structure and any related pending chunks.
@@ -385,9 +370,7 @@
 			asoc->outqueue.outstanding_bytes -= sctp_data_size(chk);
 		}
 
-		msg_len -= SCTP_DATA_SNDSIZE(chk) +
-			   sizeof(struct sk_buff) +
-			   sizeof(struct sctp_chunk);
+		msg_len -= chk->skb->truesize + sizeof(struct sctp_chunk);
 		if (msg_len <= 0)
 			break;
 	}
@@ -421,9 +404,7 @@
 			streamout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
 		}
 
-		msg_len -= SCTP_DATA_SNDSIZE(chk) +
-			   sizeof(struct sk_buff) +
-			   sizeof(struct sctp_chunk);
+		msg_len -= chk->skb->truesize + sizeof(struct sctp_chunk);
 		sctp_chunk_free(chk);
 		if (msg_len <= 0)
 			break;

diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c
index c0817f7..782d673 100644
--- a/net/sctp/primitive.c
+++ b/net/sctp/primitive.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
@@ -10,22 +11,6 @@
  * functions--this file is the functions which populate the struct proto
  * for SCTP which is the BOTTOM of the sockets interface.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -53,7 +38,7 @@
 int sctp_primitive_ ## name(struct net *net, struct sctp_association *asoc, \
 			    void *arg) { \
 	int error = 0; \
-	enum sctp_event event_type; union sctp_subtype subtype; \
+	enum sctp_event_type event_type; union sctp_subtype subtype; \
 	enum sctp_state state; \
 	struct sctp_endpoint *ep; \
 	\

diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index a644292..f7da88a 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c

@@ -1,24 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * Copyright (c) 2003 International Business Machines, Corp.
  *
  * This file is part of the SCTP kernel implementation
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>

diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e948db2..08d14d8 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
@@ -10,22 +11,6 @@
  *
  * Initialization/cleanup for SCTP protocol support.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -46,7 +31,7 @@
 #include <linux/netdevice.h>
 #include <linux/inetdevice.h>
 #include <linux/seq_file.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/highmem.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
@@ -96,12 +81,11 @@
 		return;
 	}
 
-	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
+	in_dev_for_each_ifa_rcu(ifa, in_dev) {
 		/* Add the address to the local list.  */
 		addr = kzalloc(sizeof(*addr), GFP_ATOMIC);
 		if (addr) {
 			addr->a.v4.sin_family = AF_INET;
-			addr->a.v4.sin_port = 0;
 			addr->a.v4.sin_addr.s_addr = ifa->ifa_local;
 			addr->valid = 1;
 			INIT_LIST_HEAD(&addr->list);
@@ -441,7 +425,8 @@
 	}
 	if (saddr) {
 		fl4->saddr = saddr->v4.sin_addr.s_addr;
-		fl4->fl4_sport = saddr->v4.sin_port;
+		if (!fl4->fl4_sport)
+			fl4->fl4_sport = saddr->v4.sin_port;
 	}
 
 	pr_debug("%s: dst:%pI4, src:%pI4 - ", __func__, &fl4->daddr,
@@ -600,6 +585,7 @@
 static int sctp_v4_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr)
 {
 	/* No address mapping for V4 sockets */
+	memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero));
 	return sizeof(struct sockaddr_in);
 }
 
@@ -776,10 +762,9 @@
 
 	switch (ev) {
 	case NETDEV_UP:
-		addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC);
+		addr = kzalloc(sizeof(*addr), GFP_ATOMIC);
 		if (addr) {
 			addr->a.v4.sin_family = AF_INET;
-			addr->a.v4.sin_port = 0;
 			addr->a.v4.sin_addr.s_addr = ifa->ifa_local;
 			addr->valid = 1;
 			spin_lock_bh(&net->sctp.local_addr_lock);
@@ -1026,6 +1011,7 @@
 	.getname	   = inet_getname,	/* Semantics are different.  */
 	.poll		   = sctp_poll,
 	.ioctl		   = inet_ioctl,
+	.gettstamp	   = sock_gettstamp,
 	.listen		   = sctp_inet_listen,
 	.shutdown	   = inet_shutdown,	/* Looks harmless.  */
 	.setsockopt	   = sock_common_setsockopt, /* IP_SOL IP_OPTION is a problem */
@@ -1268,6 +1254,9 @@
 	/* Disable AUTH by default. */
 	net->sctp.auth_enable = 0;
 
+	/* Enable ECN by default. */
+	net->sctp.ecn_enable = 1;
+
 	/* Set SCOPE policy to enabled */
 	net->sctp.scope_policy = SCTP_SCOPE_POLICY_ENABLE;
 
@@ -1350,7 +1339,7 @@
 	return status;
 }
 
-static void __net_init sctp_ctrlsock_exit(struct net *net)
+static void __net_exit sctp_ctrlsock_exit(struct net *net)
 {
 	/* Free the control endpoint.  */
 	inet_ctl_sock_destroy(net->sctp.ctl_sock);
@@ -1368,6 +1357,7 @@
 	int status = -EINVAL;
 	unsigned long goal;
 	unsigned long limit;
+	unsigned long nr_pages = totalram_pages();
 	int max_share;
 	int order;
 	int num_entries;
@@ -1426,10 +1416,10 @@
 	 * The methodology is similar to that of the tcp hash tables.
 	 * Though not identical.  Start by getting a goal size
 	 */
-	if (totalram_pages >= (128 * 1024))
-		goal = totalram_pages >> (22 - PAGE_SHIFT);
+	if (nr_pages >= (128 * 1024))
+		goal = nr_pages >> (22 - PAGE_SHIFT);
 	else
-		goal = totalram_pages >> (24 - PAGE_SHIFT);
+		goal = nr_pages >> (24 - PAGE_SHIFT);
 
 	/* Then compute the page order for said goal */
 	order = get_order(goal);

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index f4ac6c5..48d6395 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
@@ -10,22 +11,6 @@
  * to implement the state operations.  These functions implement the
  * steps which require modifying existing data structures.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -222,7 +207,6 @@
 				  const struct sctp_bind_addr *bp,
 				  gfp_t gfp, int vparam_len)
 {
-	struct net *net = sock_net(asoc->base.sk);
 	struct sctp_supported_ext_param ext_param;
 	struct sctp_adaptation_ind_param aiparam;
 	struct sctp_paramhdr *auth_chunks = NULL;
@@ -260,9 +244,11 @@
 
 	chunksize = sizeof(init) + addrs_len;
 	chunksize += SCTP_PAD4(SCTP_SAT_LEN(num_types));
-	chunksize += sizeof(ecap_param);
 
-	if (asoc->prsctp_enable)
+	if (asoc->ep->ecn_enable)
+		chunksize += sizeof(ecap_param);
+
+	if (asoc->ep->prsctp_enable)
 		chunksize += sizeof(prsctp_param);
 
 	/* ADDIP: Section 4.2.7:
@@ -270,13 +256,13 @@
 	 *  the ASCONF,the ASCONF-ACK, and the AUTH  chunks in its INIT and
 	 *  INIT-ACK parameters.
 	 */
-	if (net->sctp.addip_enable) {
+	if (asoc->ep->asconf_enable) {
 		extensions[num_ext] = SCTP_CID_ASCONF;
 		extensions[num_ext+1] = SCTP_CID_ASCONF_ACK;
 		num_ext += 2;
 	}
 
-	if (asoc->reconf_enable) {
+	if (asoc->ep->reconf_enable) {
 		extensions[num_ext] = SCTP_CID_RECONF;
 		num_ext += 1;
 	}
@@ -284,7 +270,7 @@
 	if (sp->adaptation_ind)
 		chunksize += sizeof(aiparam);
 
-	if (sp->strm_interleave) {
+	if (asoc->ep->intl_enable) {
 		extensions[num_ext] = SCTP_CID_I_DATA;
 		num_ext += 1;
 	}
@@ -351,7 +337,8 @@
 	sctp_addto_chunk(retval, sizeof(sat), &sat);
 	sctp_addto_chunk(retval, num_types * sizeof(__u16), &types);
 
-	sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param);
+	if (asoc->ep->ecn_enable)
+		sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param);
 
 	/* Add the supported extensions parameter.  Be nice and add this
 	 * fist before addiding the parameters for the extensions themselves
@@ -363,7 +350,7 @@
 		sctp_addto_param(retval, num_ext, extensions);
 	}
 
-	if (asoc->prsctp_enable)
+	if (asoc->ep->prsctp_enable)
 		sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param);
 
 	if (sp->adaptation_ind) {
@@ -453,7 +440,7 @@
 	if (sp->adaptation_ind)
 		chunksize += sizeof(aiparam);
 
-	if (asoc->intl_enable) {
+	if (asoc->peer.intl_capable) {
 		extensions[num_ext] = SCTP_CID_I_DATA;
 		num_ext += 1;
 	}
@@ -495,7 +482,10 @@
 	 *
 	 * [INIT ACK back to where the INIT came from.]
 	 */
-	retval->transport = chunk->transport;
+	if (chunk->transport)
+		retval->transport =
+			sctp_assoc_lookup_paddr(asoc,
+						&chunk->transport->ipaddr);
 
 	retval->subh.init_hdr =
 		sctp_addto_chunk(retval, sizeof(initack), &initack);
@@ -642,8 +632,10 @@
 	 *
 	 * [COOKIE ACK back to where the COOKIE ECHO came from.]
 	 */
-	if (retval && chunk)
-		retval->transport = chunk->transport;
+	if (retval && chunk && chunk->transport)
+		retval->transport =
+			sctp_assoc_lookup_paddr(asoc,
+						&chunk->transport->ipaddr);
 
 	return retval;
 }
@@ -1679,7 +1671,6 @@
 
 		/* Sign the message.  */
 		desc->tfm = sctp_sk(ep->base.sk)->hmac;
-		desc->flags = 0;
 
 		err = crypto_shash_setkey(desc->tfm, ep->secret_key,
 					  sizeof(ep->secret_key)) ?:
@@ -1750,7 +1741,6 @@
 		int err;
 
 		desc->tfm = sctp_sk(ep->base.sk)->hmac;
-		desc->flags = 0;
 
 		err = crypto_shash_setkey(desc->tfm, ep->secret_key,
 					  sizeof(ep->secret_key)) ?:
@@ -1976,7 +1966,9 @@
 	return 0;
 }
 
-static int sctp_verify_ext_param(struct net *net, union sctp_params param)
+static int sctp_verify_ext_param(struct net *net,
+				 const struct sctp_endpoint *ep,
+				 union sctp_params param)
 {
 	__u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
 	int have_asconf = 0;
@@ -2003,7 +1995,7 @@
 	if (net->sctp.addip_noauth)
 		return 1;
 
-	if (net->sctp.addip_enable && !have_auth && have_asconf)
+	if (ep->asconf_enable && !have_auth && have_asconf)
 		return 0;
 
 	return 1;
@@ -2013,18 +2005,16 @@
 				   union sctp_params param)
 {
 	__u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
-	struct net *net = sock_net(asoc->base.sk);
 	int i;
 
 	for (i = 0; i < num_ext; i++) {
 		switch (param.ext->chunks[i]) {
 		case SCTP_CID_RECONF:
-			if (asoc->reconf_enable &&
-			    !asoc->peer.reconf_capable)
+			if (asoc->ep->reconf_enable)
 				asoc->peer.reconf_capable = 1;
 			break;
 		case SCTP_CID_FWD_TSN:
-			if (asoc->prsctp_enable && !asoc->peer.prsctp_capable)
+			if (asoc->ep->prsctp_enable)
 				asoc->peer.prsctp_capable = 1;
 			break;
 		case SCTP_CID_AUTH:
@@ -2036,12 +2026,12 @@
 			break;
 		case SCTP_CID_ASCONF:
 		case SCTP_CID_ASCONF_ACK:
-			if (net->sctp.addip_enable)
+			if (asoc->ep->asconf_enable)
 				asoc->peer.asconf_capable = 1;
 			break;
 		case SCTP_CID_I_DATA:
-			if (sctp_sk(asoc->base.sk)->strm_interleave)
-				asoc->intl_enable = 1;
+			if (asoc->ep->intl_enable)
+				asoc->peer.intl_capable = 1;
 			break;
 		default:
 			break;
@@ -2158,14 +2148,14 @@
 		break;
 
 	case SCTP_PARAM_SUPPORTED_EXT:
-		if (!sctp_verify_ext_param(net, param))
+		if (!sctp_verify_ext_param(net, ep, param))
 			return SCTP_IERROR_ABORT;
 		break;
 
 	case SCTP_PARAM_SET_PRIMARY:
-		if (net->sctp.addip_enable)
+		if (ep->asconf_enable)
 			break;
-		goto fallthrough;
+		goto unhandled;
 
 	case SCTP_PARAM_HOST_NAME_ADDRESS:
 		/* Tell the peer, we won't support this param.  */
@@ -2176,11 +2166,11 @@
 	case SCTP_PARAM_FWD_TSN_SUPPORT:
 		if (ep->prsctp_enable)
 			break;
-		goto fallthrough;
+		goto unhandled;
 
 	case SCTP_PARAM_RANDOM:
 		if (!ep->auth_enable)
-			goto fallthrough;
+			goto unhandled;
 
 		/* SCTP-AUTH: Secion 6.1
 		 * If the random number is not 32 byte long the association
@@ -2197,7 +2187,7 @@
 
 	case SCTP_PARAM_CHUNKS:
 		if (!ep->auth_enable)
-			goto fallthrough;
+			goto unhandled;
 
 		/* SCTP-AUTH: Section 3.2
 		 * The CHUNKS parameter MUST be included once in the INIT or
@@ -2213,7 +2203,7 @@
 
 	case SCTP_PARAM_HMAC_ALGO:
 		if (!ep->auth_enable)
-			goto fallthrough;
+			goto unhandled;
 
 		hmacs = (struct sctp_hmac_algo_param *)param.p;
 		n_elt = (ntohs(param.p->length) -
@@ -2236,7 +2226,7 @@
 			retval = SCTP_IERROR_ABORT;
 		}
 		break;
-fallthrough:
+unhandled:
 	default:
 		pr_debug("%s: unrecognized param:%d for chunk:%d\n",
 			 __func__, ntohs(param.p->type), cid);
@@ -2324,7 +2314,6 @@
 	union sctp_addr addr;
 	struct sctp_af *af;
 	int src_match = 0;
-	char *cookie;
 
 	/* We must include the address that the INIT packet came from.
 	 * This is the only address that matters for an INIT packet.
@@ -2428,14 +2417,6 @@
 	/* Peer Rwnd   : Current calculated value of the peer's rwnd.  */
 	asoc->peer.rwnd = asoc->peer.i.a_rwnd;
 
-	/* Copy cookie in case we need to resend COOKIE-ECHO. */
-	cookie = asoc->peer.cookie;
-	if (cookie) {
-		asoc->peer.cookie = kmemdup(cookie, asoc->peer.cookie_len, gfp);
-		if (!asoc->peer.cookie)
-			goto clean_up;
-	}
-
 	/* RFC 2960 7.2.1 The initial value of ssthresh MAY be arbitrarily
 	 * high (for example, implementations MAY use the size of the receiver
 	 * advertised window).
@@ -2604,7 +2585,10 @@
 	case SCTP_PARAM_STATE_COOKIE:
 		asoc->peer.cookie_len =
 			ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
-		asoc->peer.cookie = param.cookie->body;
+		kfree(asoc->peer.cookie);
+		asoc->peer.cookie = kmemdup(param.cookie->body, asoc->peer.cookie_len, gfp);
+		if (!asoc->peer.cookie)
+			retval = 0;
 		break;
 
 	case SCTP_PARAM_HEARTBEAT_INFO:
@@ -2616,15 +2600,20 @@
 		break;
 
 	case SCTP_PARAM_ECN_CAPABLE:
-		asoc->peer.ecn_capable = 1;
-		break;
+		if (asoc->ep->ecn_enable) {
+			asoc->peer.ecn_capable = 1;
+			break;
+		}
+		/* Fall Through */
+		goto fall_through;
+
 
 	case SCTP_PARAM_ADAPTATION_LAYER_IND:
 		asoc->peer.adaptation_ind = ntohl(param.aind->adaptation_ind);
 		break;
 
 	case SCTP_PARAM_SET_PRIMARY:
-		if (!net->sctp.addip_enable)
+		if (!ep->asconf_enable)
 			goto fall_through;
 
 		addr_param = param.v + sizeof(struct sctp_addip_param);
@@ -2654,7 +2643,7 @@
 		break;
 
 	case SCTP_PARAM_FWD_TSN_SUPPORT:
-		if (asoc->prsctp_enable) {
+		if (asoc->ep->prsctp_enable) {
 			asoc->peer.prsctp_capable = 1;
 			break;
 		}
@@ -2666,6 +2655,7 @@
 			goto fall_through;
 
 		/* Save peer's random parameter */
+		kfree(asoc->peer.peer_random);
 		asoc->peer.peer_random = kmemdup(param.p,
 					    ntohs(param.p->length), gfp);
 		if (!asoc->peer.peer_random) {
@@ -2679,6 +2669,7 @@
 			goto fall_through;
 
 		/* Save peer's HMAC list */
+		kfree(asoc->peer.peer_hmacs);
 		asoc->peer.peer_hmacs = kmemdup(param.p,
 					    ntohs(param.p->length), gfp);
 		if (!asoc->peer.peer_hmacs) {
@@ -2694,6 +2685,7 @@
 		if (!ep->auth_enable)
 			goto fall_through;
 
+		kfree(asoc->peer.peer_chunks);
 		asoc->peer.peer_chunks = kmemdup(param.p,
 					    ntohs(param.p->length), gfp);
 		if (!asoc->peer.peer_chunks)

diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 85d3930..e52b212 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999 Cisco, Inc.
@@ -9,22 +10,6 @@
  * to implement that state operations.  These functions implement the
  * steps which require modifying existing data structures.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -52,7 +37,7 @@
 #include <net/sctp/sm.h>
 #include <net/sctp/stream_sched.h>
 
-static int sctp_cmd_interpreter(enum sctp_event event_type,
+static int sctp_cmd_interpreter(enum sctp_event_type event_type,
 				union sctp_subtype subtype,
 				enum sctp_state state,
 				struct sctp_endpoint *ep,
@@ -61,7 +46,7 @@
 				enum sctp_disposition status,
 				struct sctp_cmd_seq *commands,
 				gfp_t gfp);
-static int sctp_side_effects(enum sctp_event event_type,
+static int sctp_side_effects(enum sctp_event_type event_type,
 			     union sctp_subtype subtype,
 			     enum sctp_state state,
 			     struct sctp_endpoint *ep,
@@ -561,8 +546,8 @@
 	 */
 	if (net->sctp.pf_enable &&
 	   (transport->state == SCTP_ACTIVE) &&
-	   (asoc->pf_retrans < transport->pathmaxrxt) &&
-	   (transport->error_count > asoc->pf_retrans)) {
+	   (transport->error_count < transport->pathmaxrxt) &&
+	   (transport->error_count > transport->pf_retrans)) {
 
 		sctp_assoc_control_transport(asoc, transport,
 					     SCTP_TRANSPORT_PF,
@@ -623,7 +608,7 @@
 /* Worker routine to handle SCTP_CMD_ASSOC_FAILED.  */
 static void sctp_cmd_assoc_failed(struct sctp_cmd_seq *commands,
 				  struct sctp_association *asoc,
-				  enum sctp_event event_type,
+				  enum sctp_event_type event_type,
 				  union sctp_subtype subtype,
 				  struct sctp_chunk *chunk,
 				  unsigned int error)
@@ -898,6 +883,11 @@
 						asoc->rto_initial;
 	}
 
+	if (sctp_state(asoc, ESTABLISHED)) {
+		kfree(asoc->peer.cookie);
+		asoc->peer.cookie = NULL;
+	}
+
 	if (sctp_state(asoc, ESTABLISHED) ||
 	    sctp_state(asoc, CLOSED) ||
 	    sctp_state(asoc, SHUTDOWN_RECEIVED)) {
@@ -1112,32 +1102,6 @@
 }
 
 
-/* Sent the next ASCONF packet currently stored in the association.
- * This happens after the ASCONF_ACK was succeffully processed.
- */
-static void sctp_cmd_send_asconf(struct sctp_association *asoc)
-{
-	struct net *net = sock_net(asoc->base.sk);
-
-	/* Send the next asconf chunk from the addip chunk
-	 * queue.
-	 */
-	if (!list_empty(&asoc->addip_chunk_list)) {
-		struct list_head *entry = asoc->addip_chunk_list.next;
-		struct sctp_chunk *asconf = list_entry(entry,
-						struct sctp_chunk, list);
-		list_del_init(entry);
-
-		/* Hold the chunk until an ASCONF_ACK is received. */
-		sctp_chunk_hold(asconf);
-		if (sctp_primitive_ASCONF(net, asoc, asconf))
-			sctp_chunk_free(asconf);
-		else
-			asoc->addip_last_asconf = asconf;
-	}
-}
-
-
 /* These three macros allow us to pull the debugging code out of the
  * main flow of sctp_do_sm() to keep attention focused on the real
  * functionality there.
@@ -1162,7 +1126,7 @@
  * If you want to understand all of lksctp, this is a
  * good place to start.
  */
-int sctp_do_sm(struct net *net, enum sctp_event event_type,
+int sctp_do_sm(struct net *net, enum sctp_event_type event_type,
 	       union sctp_subtype subtype, enum sctp_state state,
 	       struct sctp_endpoint *ep, struct sctp_association *asoc,
 	       void *event_arg, gfp_t gfp)
@@ -1199,7 +1163,7 @@
 /*****************************************************************
  * This the master state function side effect processing function.
  *****************************************************************/
-static int sctp_side_effects(enum sctp_event event_type,
+static int sctp_side_effects(enum sctp_event_type event_type,
 			     union sctp_subtype subtype,
 			     enum sctp_state state,
 			     struct sctp_endpoint *ep,
@@ -1285,7 +1249,7 @@
  ********************************************************************/
 
 /* This is the side-effect interpreter.  */
-static int sctp_cmd_interpreter(enum sctp_event event_type,
+static int sctp_cmd_interpreter(enum sctp_event_type event_type,
 				union sctp_subtype subtype,
 				enum sctp_state state,
 				struct sctp_endpoint *ep,
@@ -1783,9 +1747,6 @@
 			}
 			sctp_cmd_send_msg(asoc, cmd->obj.msg, gfp);
 			break;
-		case SCTP_CMD_SEND_NEXT_ASCONF:
-			sctp_cmd_send_asconf(asoc);
-			break;
 		case SCTP_CMD_PURGE_ASCONF_QUEUE:
 			sctp_asconf_queue_teardown(asoc);
 			break;

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index c9ae340..4ab8208 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
@@ -9,22 +10,6 @@
  *
  * These are the state functions for the state machine.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -2175,8 +2160,10 @@
 
 	/* Update socket peer label if first association. */
 	if (security_sctp_assoc_request((struct sctp_endpoint *)ep,
-					chunk->skb))
+					chunk->skb)) {
+		sctp_association_free(new_asoc);
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+	}
 
 	/* Set temp so that it won't be added into hashtable */
 	new_asoc->temp = 1;
@@ -3736,7 +3723,8 @@
 	 * is received unauthenticated it MUST be silently discarded as
 	 * described in [I-D.ietf-tsvwg-sctp-auth].
 	 */
-	if (!net->sctp.addip_noauth && !chunk->auth)
+	if (!asoc->peer.asconf_capable ||
+	    (!net->sctp.addip_noauth && !chunk->auth))
 		return sctp_sf_discard_chunk(net, ep, asoc, type, arg,
 					     commands);
 
@@ -3824,6 +3812,29 @@
 	return SCTP_DISPOSITION_CONSUME;
 }
 
+static enum sctp_disposition sctp_send_next_asconf(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					struct sctp_association *asoc,
+					const union sctp_subtype type,
+					struct sctp_cmd_seq *commands)
+{
+	struct sctp_chunk *asconf;
+	struct list_head *entry;
+
+	if (list_empty(&asoc->addip_chunk_list))
+		return SCTP_DISPOSITION_CONSUME;
+
+	entry = asoc->addip_chunk_list.next;
+	asconf = list_entry(entry, struct sctp_chunk, list);
+
+	list_del_init(entry);
+	sctp_chunk_hold(asconf);
+	asoc->addip_last_asconf = asconf;
+
+	return sctp_sf_do_prm_asconf(net, ep, asoc, type, asconf, commands);
+}
+
 /*
  * ADDIP Section 4.3 General rules for address manipulation
  * When building TLV parameters for the ASCONF Chunk that will add or
@@ -3855,7 +3866,8 @@
 	 * is received unauthenticated it MUST be silently discarded as
 	 * described in [I-D.ietf-tsvwg-sctp-auth].
 	 */
-	if (!net->sctp.addip_noauth && !asconf_ack->auth)
+	if (!asoc->peer.asconf_capable ||
+	    (!net->sctp.addip_noauth && !asconf_ack->auth))
 		return sctp_sf_discard_chunk(net, ep, asoc, type, arg,
 					     commands);
 
@@ -3915,14 +3927,10 @@
 				SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO));
 
 		if (!sctp_process_asconf_ack((struct sctp_association *)asoc,
-					     asconf_ack)) {
-			/* Successfully processed ASCONF_ACK.  We can
-			 * release the next asconf if we have one.
-			 */
-			sctp_add_cmd_sf(commands, SCTP_CMD_SEND_NEXT_ASCONF,
-					SCTP_NULL());
-			return SCTP_DISPOSITION_CONSUME;
-		}
+					     asconf_ack))
+			return sctp_send_next_asconf(net, ep,
+					(struct sctp_association *)asoc,
+							type, commands);
 
 		abort = sctp_make_abort(asoc, asconf_ack,
 					sizeof(struct sctp_errhdr));
@@ -6412,13 +6420,15 @@
 	 * in sctp_ulpevent_make_rcvmsg will drop the frame if we grow our
 	 * memory usage too much
 	 */
-	if (*sk->sk_prot_creator->memory_pressure) {
+	if (sk_under_memory_pressure(sk)) {
 		if (sctp_tsnmap_has_gap(map) &&
 		    (sctp_tsnmap_get_ctsn(map) + 1) == tsn) {
 			pr_debug("%s: under pressure, reneging for tsn:%u\n",
 				 __func__, tsn);
 			deliver = SCTP_CMD_RENEGE;
-		 }
+		} else {
+			sk_mem_reclaim(sk);
+		}
 	}
 
 	/*

diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 691d9dc..88ea87f 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
@@ -9,22 +10,6 @@
  *
  * These are the state tables for the SCTP state machine.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -79,7 +64,7 @@
 
 const struct sctp_sm_table_entry *sctp_sm_lookup_event(
 					struct net *net,
-					enum sctp_event event_type,
+					enum sctp_event_type event_type,
 					enum sctp_state state,
 					union sctp_subtype event_subtype)
 {
@@ -991,26 +976,22 @@
 	if (cid <= SCTP_CID_BASE_MAX)
 		return &chunk_event_table[cid][state];
 
-	if (net->sctp.prsctp_enable) {
-		if (cid == SCTP_CID_FWD_TSN || cid == SCTP_CID_I_FWD_TSN)
-			return &prsctp_chunk_event_table[0][state];
-	}
+	switch ((u16)cid) {
+	case SCTP_CID_FWD_TSN:
+	case SCTP_CID_I_FWD_TSN:
+		return &prsctp_chunk_event_table[0][state];
 
-	if (net->sctp.addip_enable) {
-		if (cid == SCTP_CID_ASCONF)
-			return &addip_chunk_event_table[0][state];
+	case SCTP_CID_ASCONF:
+		return &addip_chunk_event_table[0][state];
 
-		if (cid == SCTP_CID_ASCONF_ACK)
-			return &addip_chunk_event_table[1][state];
-	}
+	case SCTP_CID_ASCONF_ACK:
+		return &addip_chunk_event_table[1][state];
 
-	if (net->sctp.reconf_enable)
-		if (cid == SCTP_CID_RECONF)
-			return &reconf_chunk_event_table[0][state];
+	case SCTP_CID_RECONF:
+		return &reconf_chunk_event_table[0][state];
 
-	if (net->sctp.auth_enable) {
-		if (cid == SCTP_CID_AUTH)
-			return &auth_chunk_event_table[0][state];
+	case SCTP_CID_AUTH:
+		return &auth_chunk_event_table[0][state];
 	}
 
 	return &chunk_event_table_unknown[state];

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 876393c..ffd3262 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
@@ -15,22 +16,6 @@
  * functions--this file is the functions which populate the struct proto
  * for SCTP which is the BOTTOM of the sockets interface.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -83,7 +68,7 @@
 #include <net/sctp/stream_sched.h>
 
 /* Forward declarations for internal helper functions. */
-static int sctp_writeable(struct sock *sk);
+static bool sctp_writeable(struct sock *sk);
 static void sctp_wfree(struct sk_buff *skb);
 static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
 				size_t msg_len);
@@ -102,9 +87,9 @@
 			    struct sctp_chunk *chunk);
 static int sctp_do_bind(struct sock *, union sctp_addr *, int);
 static int sctp_autobind(struct sock *sk);
-static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
-			      struct sctp_association *assoc,
-			      enum sctp_socket_type type);
+static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
+			     struct sctp_association *assoc,
+			     enum sctp_socket_type type);
 
 static unsigned long sctp_memory_pressure;
 static atomic_long_t sctp_memory_allocated;
@@ -119,25 +104,10 @@
 /* Get the sndbuf space available at the time on the association.  */
 static inline int sctp_wspace(struct sctp_association *asoc)
 {
-	int amt;
+	struct sock *sk = asoc->base.sk;
 
-	if (asoc->ep->sndbuf_policy)
-		amt = asoc->sndbuf_used;
-	else
-		amt = sk_wmem_alloc_get(asoc->base.sk);
-
-	if (amt >= asoc->base.sk->sk_sndbuf) {
-		if (asoc->base.sk->sk_userlocks & SOCK_SNDBUF_LOCK)
-			amt = 0;
-		else {
-			amt = sk_stream_wspace(asoc->base.sk);
-			if (amt < 0)
-				amt = 0;
-		}
-	} else {
-		amt = asoc->base.sk->sk_sndbuf - amt;
-	}
-	return amt;
+	return asoc->ep->sndbuf_policy ? sk->sk_sndbuf - asoc->sndbuf_used
+				       : sk_stream_wspace(sk);
 }
 
 /* Increment the used sndbuf space count of the corresponding association by
@@ -166,12 +136,9 @@
 	/* Save the chunk pointer in skb for sctp_wfree to use later.  */
 	skb_shinfo(chunk->skb)->destructor_arg = chunk;
 
-	asoc->sndbuf_used += SCTP_DATA_SNDSIZE(chunk) +
-				sizeof(struct sk_buff) +
-				sizeof(struct sctp_chunk);
-
 	refcount_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
-	sk->sk_wmem_queued += chunk->skb->truesize;
+	asoc->sndbuf_used += chunk->skb->truesize + sizeof(struct sctp_chunk);
+	sk->sk_wmem_queued += chunk->skb->truesize + sizeof(struct sctp_chunk);
 	sk_mem_charge(sk, chunk->skb->truesize);
 }
 
@@ -266,7 +233,7 @@
 	}
 
 	/* Otherwise this is a UDP-style socket. */
-	if (!id || (id == (sctp_assoc_t)-1))
+	if (id <= SCTP_ALL_ASSOC)
 		return NULL;
 
 	spin_lock_bh(&sctp_assocs_id_lock);
@@ -342,7 +309,7 @@
 	return retval;
 }
 
-static long sctp_get_port_local(struct sock *, union sctp_addr *);
+static int sctp_get_port_local(struct sock *, union sctp_addr *);
 
 /* Verify this is a valid sockaddr. */
 static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
@@ -432,9 +399,8 @@
 	 * detection.
 	 */
 	addr->v4.sin_port = htons(snum);
-	if ((ret = sctp_get_port_local(sk, addr))) {
+	if (sctp_get_port_local(sk, addr))
 		return -EADDRINUSE;
-	}
 
 	/* Refresh ephemeral port.  */
 	if (!bp->port)
@@ -446,11 +412,13 @@
 	ret = sctp_add_bind_addr(bp, addr, af->sockaddr_len,
 				 SCTP_ADDR_SRC, GFP_ATOMIC);
 
-	/* Copy back into socket for getsockname() use. */
-	if (!ret) {
-		inet_sk(sk)->inet_sport = htons(inet_sk(sk)->inet_num);
-		sp->pf->to_sk_saddr(addr, sk);
+	if (ret) {
+		sctp_put_port(sk);
+		return ret;
 	}
+	/* Copy back into socket for getsockname() use. */
+	inet_sk(sk)->inet_sport = htons(inet_sk(sk)->inet_num);
+	sp->pf->to_sk_saddr(addr, sk);
 
 	return ret;
 }
@@ -557,7 +525,6 @@
 				   struct sockaddr	*addrs,
 				   int 			addrcnt)
 {
-	struct net *net = sock_net(sk);
 	struct sctp_sock		*sp;
 	struct sctp_endpoint		*ep;
 	struct sctp_association		*asoc;
@@ -572,12 +539,12 @@
 	int 				i;
 	int 				retval = 0;
 
-	if (!net->sctp.addip_enable)
-		return retval;
-
 	sp = sctp_sk(sk);
 	ep = sp->ep;
 
+	if (!ep->asconf_enable)
+		return retval;
+
 	pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n",
 		 __func__, sk, addrs, addrcnt);
 
@@ -760,7 +727,6 @@
 				   struct sockaddr	*addrs,
 				   int			addrcnt)
 {
-	struct net *net = sock_net(sk);
 	struct sctp_sock	*sp;
 	struct sctp_endpoint	*ep;
 	struct sctp_association	*asoc;
@@ -776,12 +742,12 @@
 	int			stored = 0;
 
 	chunk = NULL;
-	if (!net->sctp.addip_enable)
-		return retval;
-
 	sp = sctp_sk(sk);
 	ep = sp->ep;
 
+	if (!ep->asconf_enable)
+		return retval;
+
 	pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n",
 		 __func__, sk, addrs, addrcnt);
 
@@ -1017,15 +983,15 @@
 	if (unlikely(addrs_size <= 0))
 		return -EINVAL;
 
-	kaddrs = vmemdup_user(addrs, addrs_size);
-	if (unlikely(IS_ERR(kaddrs)))
+	kaddrs = memdup_user(addrs, addrs_size);
+	if (IS_ERR(kaddrs))
 		return PTR_ERR(kaddrs);
 
 	/* Walk through the addrs buffer and count the number of addresses. */
 	addr_buf = kaddrs;
 	while (walk_size < addrs_size) {
 		if (walk_size + sizeof(sa_family_t) > addrs_size) {
-			kvfree(kaddrs);
+			kfree(kaddrs);
 			return -EINVAL;
 		}
 
@@ -1036,7 +1002,7 @@
 		 * causes the address buffer to overflow return EINVAL.
 		 */
 		if (!af || (walk_size + af->sockaddr_len) > addrs_size) {
-			kvfree(kaddrs);
+			kfree(kaddrs);
 			return -EINVAL;
 		}
 		addrcnt++;
@@ -1072,163 +1038,166 @@
 	}
 
 out:
-	kvfree(kaddrs);
+	kfree(kaddrs);
 
 	return err;
 }
 
+static int sctp_connect_new_asoc(struct sctp_endpoint *ep,
+				 const union sctp_addr *daddr,
+				 const struct sctp_initmsg *init,
+				 struct sctp_transport **tp)
+{
+	struct sctp_association *asoc;
+	struct sock *sk = ep->base.sk;
+	struct net *net = sock_net(sk);
+	enum sctp_scope scope;
+	int err;
+
+	if (sctp_endpoint_is_peeled_off(ep, daddr))
+		return -EADDRNOTAVAIL;
+
+	if (!ep->base.bind_addr.port) {
+		if (sctp_autobind(sk))
+			return -EAGAIN;
+	} else {
+		if (ep->base.bind_addr.port < inet_prot_sock(net) &&
+		    !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
+			return -EACCES;
+	}
+
+	scope = sctp_scope(daddr);
+	asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
+	if (!asoc)
+		return -ENOMEM;
+
+	err = sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL);
+	if (err < 0)
+		goto free;
+
+	*tp = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN);
+	if (!*tp) {
+		err = -ENOMEM;
+		goto free;
+	}
+
+	if (!init)
+		return 0;
+
+	if (init->sinit_num_ostreams) {
+		__u16 outcnt = init->sinit_num_ostreams;
+
+		asoc->c.sinit_num_ostreams = outcnt;
+		/* outcnt has been changed, need to re-init stream */
+		err = sctp_stream_init(&asoc->stream, outcnt, 0, GFP_KERNEL);
+		if (err)
+			goto free;
+	}
+
+	if (init->sinit_max_instreams)
+		asoc->c.sinit_max_instreams = init->sinit_max_instreams;
+
+	if (init->sinit_max_attempts)
+		asoc->max_init_attempts = init->sinit_max_attempts;
+
+	if (init->sinit_max_init_timeo)
+		asoc->max_init_timeo =
+			msecs_to_jiffies(init->sinit_max_init_timeo);
+
+	return 0;
+free:
+	sctp_association_free(asoc);
+	return err;
+}
+
+static int sctp_connect_add_peer(struct sctp_association *asoc,
+				 union sctp_addr *daddr, int addr_len)
+{
+	struct sctp_endpoint *ep = asoc->ep;
+	struct sctp_association *old;
+	struct sctp_transport *t;
+	int err;
+
+	err = sctp_verify_addr(ep->base.sk, daddr, addr_len);
+	if (err)
+		return err;
+
+	old = sctp_endpoint_lookup_assoc(ep, daddr, &t);
+	if (old && old != asoc)
+		return old->state >= SCTP_STATE_ESTABLISHED ? -EISCONN
+							    : -EALREADY;
+
+	if (sctp_endpoint_is_peeled_off(ep, daddr))
+		return -EADDRNOTAVAIL;
+
+	t = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN);
+	if (!t)
+		return -ENOMEM;
+
+	return 0;
+}
+
 /* __sctp_connect(struct sock* sk, struct sockaddr *kaddrs, int addrs_size)
  *
  * Common routine for handling connect() and sctp_connectx().
  * Connect will come in with just a single address.
  */
-static int __sctp_connect(struct sock *sk,
-			  struct sockaddr *kaddrs,
-			  int addrs_size, int flags,
-			  sctp_assoc_t *assoc_id)
+static int __sctp_connect(struct sock *sk, struct sockaddr *kaddrs,
+			  int addrs_size, int flags, sctp_assoc_t *assoc_id)
 {
-	struct net *net = sock_net(sk);
-	struct sctp_sock *sp;
-	struct sctp_endpoint *ep;
-	struct sctp_association *asoc = NULL;
-	struct sctp_association *asoc2;
+	struct sctp_sock *sp = sctp_sk(sk);
+	struct sctp_endpoint *ep = sp->ep;
 	struct sctp_transport *transport;
-	union sctp_addr to;
-	enum sctp_scope scope;
+	struct sctp_association *asoc;
+	void *addr_buf = kaddrs;
+	union sctp_addr *daddr;
+	struct sctp_af *af;
+	int walk_size, err;
 	long timeo;
-	int err = 0;
-	int addrcnt = 0;
-	int walk_size = 0;
-	union sctp_addr *sa_addr = NULL;
-	void *addr_buf;
-	unsigned short port;
 
-	sp = sctp_sk(sk);
-	ep = sp->ep;
-
-	/* connect() cannot be done on a socket that is already in ESTABLISHED
-	 * state - UDP-style peeled off socket or a TCP-style socket that
-	 * is already connected.
-	 * It cannot be done even on a TCP-style listening socket.
-	 */
 	if (sctp_sstate(sk, ESTABLISHED) || sctp_sstate(sk, CLOSING) ||
-	    (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))) {
-		err = -EISCONN;
-		goto out_free;
-	}
+	    (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)))
+		return -EISCONN;
 
-	/* Walk through the addrs buffer and count the number of addresses. */
-	addr_buf = kaddrs;
+	daddr = addr_buf;
+	af = sctp_get_af_specific(daddr->sa.sa_family);
+	if (!af || af->sockaddr_len > addrs_size)
+		return -EINVAL;
+
+	err = sctp_verify_addr(sk, daddr, af->sockaddr_len);
+	if (err)
+		return err;
+
+	asoc = sctp_endpoint_lookup_assoc(ep, daddr, &transport);
+	if (asoc)
+		return asoc->state >= SCTP_STATE_ESTABLISHED ? -EISCONN
+							     : -EALREADY;
+
+	err = sctp_connect_new_asoc(ep, daddr, NULL, &transport);
+	if (err)
+		return err;
+	asoc = transport->asoc;
+
+	addr_buf += af->sockaddr_len;
+	walk_size = af->sockaddr_len;
 	while (walk_size < addrs_size) {
-		struct sctp_af *af;
-
-		if (walk_size + sizeof(sa_family_t) > addrs_size) {
-			err = -EINVAL;
+		err = -EINVAL;
+		if (walk_size + sizeof(sa_family_t) > addrs_size)
 			goto out_free;
-		}
 
-		sa_addr = addr_buf;
-		af = sctp_get_af_specific(sa_addr->sa.sa_family);
-
-		/* If the address family is not supported or if this address
-		 * causes the address buffer to overflow return EINVAL.
-		 */
-		if (!af || (walk_size + af->sockaddr_len) > addrs_size) {
-			err = -EINVAL;
+		daddr = addr_buf;
+		af = sctp_get_af_specific(daddr->sa.sa_family);
+		if (!af || af->sockaddr_len + walk_size > addrs_size)
 			goto out_free;
-		}
 
-		port = ntohs(sa_addr->v4.sin_port);
+		if (asoc->peer.port != ntohs(daddr->v4.sin_port))
+			goto out_free;
 
-		/* Save current address so we can work with it */
-		memcpy(&to, sa_addr, af->sockaddr_len);
-
-		err = sctp_verify_addr(sk, &to, af->sockaddr_len);
+		err = sctp_connect_add_peer(asoc, daddr, af->sockaddr_len);
 		if (err)
 			goto out_free;
 
-		/* Make sure the destination port is correctly set
-		 * in all addresses.
-		 */
-		if (asoc && asoc->peer.port && asoc->peer.port != port) {
-			err = -EINVAL;
-			goto out_free;
-		}
-
-		/* Check if there already is a matching association on the
-		 * endpoint (other than the one created here).
-		 */
-		asoc2 = sctp_endpoint_lookup_assoc(ep, &to, &transport);
-		if (asoc2 && asoc2 != asoc) {
-			if (asoc2->state >= SCTP_STATE_ESTABLISHED)
-				err = -EISCONN;
-			else
-				err = -EALREADY;
-			goto out_free;
-		}
-
-		/* If we could not find a matching association on the endpoint,
-		 * make sure that there is no peeled-off association matching
-		 * the peer address even on another socket.
-		 */
-		if (sctp_endpoint_is_peeled_off(ep, &to)) {
-			err = -EADDRNOTAVAIL;
-			goto out_free;
-		}
-
-		if (!asoc) {
-			/* If a bind() or sctp_bindx() is not called prior to
-			 * an sctp_connectx() call, the system picks an
-			 * ephemeral port and will choose an address set
-			 * equivalent to binding with a wildcard address.
-			 */
-			if (!ep->base.bind_addr.port) {
-				if (sctp_autobind(sk)) {
-					err = -EAGAIN;
-					goto out_free;
-				}
-			} else {
-				/*
-				 * If an unprivileged user inherits a 1-many
-				 * style socket with open associations on a
-				 * privileged port, it MAY be permitted to
-				 * accept new associations, but it SHOULD NOT
-				 * be permitted to open new associations.
-				 */
-				if (ep->base.bind_addr.port <
-				    inet_prot_sock(net) &&
-				    !ns_capable(net->user_ns,
-				    CAP_NET_BIND_SERVICE)) {
-					err = -EACCES;
-					goto out_free;
-				}
-			}
-
-			scope = sctp_scope(&to);
-			asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
-			if (!asoc) {
-				err = -ENOMEM;
-				goto out_free;
-			}
-
-			err = sctp_assoc_set_bind_addr_from_ep(asoc, scope,
-							      GFP_KERNEL);
-			if (err < 0) {
-				goto out_free;
-			}
-
-		}
-
-		/* Prime the peer's transport structures.  */
-		transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL,
-						SCTP_UNKNOWN);
-		if (!transport) {
-			err = -ENOMEM;
-			goto out_free;
-		}
-
-		addrcnt++;
-		addr_buf += af->sockaddr_len;
+		addr_buf  += af->sockaddr_len;
 		walk_size += af->sockaddr_len;
 	}
 
@@ -1241,40 +1210,25 @@
 			goto out_free;
 	}
 
-	err = sctp_primitive_ASSOCIATE(net, asoc, NULL);
-	if (err < 0) {
+	err = sctp_primitive_ASSOCIATE(sock_net(sk), asoc, NULL);
+	if (err < 0)
 		goto out_free;
-	}
 
 	/* Initialize sk's dport and daddr for getpeername() */
 	inet_sk(sk)->inet_dport = htons(asoc->peer.port);
-	sp->pf->to_sk_daddr(sa_addr, sk);
+	sp->pf->to_sk_daddr(daddr, sk);
 	sk->sk_err = 0;
 
-	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
-
 	if (assoc_id)
 		*assoc_id = asoc->assoc_id;
 
-	err = sctp_wait_for_connect(asoc, &timeo);
-	/* Note: the asoc may be freed after the return of
-	 * sctp_wait_for_connect.
-	 */
-
-	/* Don't free association on exit. */
-	asoc = NULL;
+	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
+	return sctp_wait_for_connect(asoc, &timeo);
 
 out_free:
 	pr_debug("%s: took out_free path with asoc:%p kaddrs:%p err:%d\n",
 		 __func__, asoc, kaddrs, err);
-
-	if (asoc) {
-		/* sctp_primitive_ASSOCIATE may have added this association
-		 * To the hash table, try to unhash it, just in case, its a noop
-		 * if it wasn't hashed so we're safe
-		 */
-		sctp_association_free(asoc);
-	}
+	sctp_association_free(asoc);
 	return err;
 }
 
@@ -1344,11 +1298,12 @@
 	pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n",
 		 __func__, sk, addrs, addrs_size);
 
-	if (unlikely(addrs_size <= 0))
+	/* make sure the 1st addr's sa_family is accessible later */
+	if (unlikely(addrs_size < sizeof(sa_family_t)))
 		return -EINVAL;
 
-	kaddrs = vmemdup_user(addrs, addrs_size);
-	if (unlikely(IS_ERR(kaddrs)))
+	kaddrs = memdup_user(addrs, addrs_size);
+	if (IS_ERR(kaddrs))
 		return PTR_ERR(kaddrs);
 
 	/* Allow security module to validate connectx addresses. */
@@ -1367,7 +1322,7 @@
 	err = __sctp_connect(sk, kaddrs, addrs_size, flags, assoc_id);
 
 out_free:
-	kvfree(kaddrs);
+	kfree(kaddrs);
 
 	return err;
 }
@@ -1692,9 +1647,7 @@
 				 struct sctp_transport **tp)
 {
 	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
-	struct net *net = sock_net(sk);
 	struct sctp_association *asoc;
-	enum sctp_scope scope;
 	struct cmsghdr *cmsg;
 	__be32 flowinfo = 0;
 	struct sctp_af *af;
@@ -1709,20 +1662,6 @@
 				    sctp_sstate(sk, CLOSING)))
 		return -EADDRNOTAVAIL;
 
-	if (sctp_endpoint_is_peeled_off(ep, daddr))
-		return -EADDRNOTAVAIL;
-
-	if (!ep->base.bind_addr.port) {
-		if (sctp_autobind(sk))
-			return -EAGAIN;
-	} else {
-		if (ep->base.bind_addr.port < inet_prot_sock(net) &&
-		    !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
-			return -EACCES;
-	}
-
-	scope = sctp_scope(daddr);
-
 	/* Label connection socket for first association 1-to-many
 	 * style for client sequence socket()->sendmsg(). This
 	 * needs to be done before sctp_assoc_add_peer() as that will
@@ -1738,45 +1677,10 @@
 	if (err < 0)
 		return err;
 
-	asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
-	if (!asoc)
-		return -ENOMEM;
-
-	if (sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL) < 0) {
-		err = -ENOMEM;
-		goto free;
-	}
-
-	if (cmsgs->init) {
-		struct sctp_initmsg *init = cmsgs->init;
-
-		if (init->sinit_num_ostreams) {
-			__u16 outcnt = init->sinit_num_ostreams;
-
-			asoc->c.sinit_num_ostreams = outcnt;
-			/* outcnt has been changed, need to re-init stream */
-			err = sctp_stream_init(&asoc->stream, outcnt, 0,
-					       GFP_KERNEL);
-			if (err)
-				goto free;
-		}
-
-		if (init->sinit_max_instreams)
-			asoc->c.sinit_max_instreams = init->sinit_max_instreams;
-
-		if (init->sinit_max_attempts)
-			asoc->max_init_attempts = init->sinit_max_attempts;
-
-		if (init->sinit_max_init_timeo)
-			asoc->max_init_timeo =
-				msecs_to_jiffies(init->sinit_max_init_timeo);
-	}
-
-	*tp = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN);
-	if (!*tp) {
-		err = -ENOMEM;
-		goto free;
-	}
+	err = sctp_connect_new_asoc(ep, daddr, cmsgs->init, tp);
+	if (err)
+		return err;
+	asoc = (*tp)->asoc;
 
 	if (!cmsgs->addrs_msg)
 		return 0;
@@ -1786,8 +1690,6 @@
 
 	/* sendv addr list parse */
 	for_each_cmsghdr(cmsg, cmsgs->addrs_msg) {
-		struct sctp_transport *transport;
-		struct sctp_association *old;
 		union sctp_addr _daddr;
 		int dlen;
 
@@ -1821,30 +1723,10 @@
 			daddr->v6.sin6_port = htons(asoc->peer.port);
 			memcpy(&daddr->v6.sin6_addr, CMSG_DATA(cmsg), dlen);
 		}
-		err = sctp_verify_addr(sk, daddr, sizeof(*daddr));
+
+		err = sctp_connect_add_peer(asoc, daddr, sizeof(*daddr));
 		if (err)
 			goto free;
-
-		old = sctp_endpoint_lookup_assoc(ep, daddr, &transport);
-		if (old && old != asoc) {
-			if (old->state >= SCTP_STATE_ESTABLISHED)
-				err = -EISCONN;
-			else
-				err = -EALREADY;
-			goto free;
-		}
-
-		if (sctp_endpoint_is_peeled_off(ep, daddr)) {
-			err = -EADDRNOTAVAIL;
-			goto free;
-		}
-
-		transport = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL,
-						SCTP_UNKNOWN);
-		if (!transport) {
-			err = -ENOMEM;
-			goto free;
-		}
 	}
 
 	return 0;
@@ -1884,6 +1766,7 @@
 
 		pr_debug("%s: aborting association:%p\n", __func__, asoc);
 		sctp_primitive_ABORT(net, asoc, chunk);
+		iov_iter_revert(&msg->msg_iter, msg_len);
 
 		return 0;
 	}
@@ -1927,10 +1810,13 @@
 		asoc->pmtu_pending = 0;
 	}
 
-	if (sctp_wspace(asoc) < msg_len)
+	if (sctp_wspace(asoc) < (int)msg_len)
 		sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
 
-	if (!sctp_wspace(asoc)) {
+	if (sk_under_memory_pressure(sk))
+		sk_mem_reclaim(sk);
+
+	if (sctp_wspace(asoc) <= 0 || !sk_wmem_schedule(sk, msg_len)) {
 		timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 		err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
 		if (err)
@@ -1942,7 +1828,7 @@
 		if (err)
 			goto err;
 
-		if (sp->strm_interleave) {
+		if (asoc->ep->intl_enable) {
 			timeo = sock_sndtimeo(sk, 0);
 			err = sctp_wait_for_connect(asoc, &timeo);
 			if (err) {
@@ -2045,7 +1931,7 @@
 	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
 	struct sctp_transport *transport = NULL;
 	struct sctp_sndrcvinfo _sinfo, *sinfo;
-	struct sctp_association *asoc;
+	struct sctp_association *asoc, *tmp;
 	struct sctp_cmsgs cmsgs;
 	union sctp_addr *daddr;
 	bool new = false;
@@ -2071,7 +1957,7 @@
 
 	/* SCTP_SENDALL process */
 	if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP)) {
-		list_for_each_entry(asoc, &ep->asocs, asocs) {
+		list_for_each_entry_safe(asoc, tmp, &ep->asocs, asocs) {
 			err = sctp_sendmsg_check_sflags(asoc, sflags, msg,
 							msg_len);
 			if (err == 0)
@@ -2248,7 +2134,7 @@
 	if (sp->recvrcvinfo)
 		sctp_ulpevent_read_rcvinfo(event, msg);
 	/* Check if we allow SCTP_SNDRCVINFO. */
-	if (sp->subscribe.sctp_data_io_event)
+	if (sctp_ulpevent_type_enabled(sp->subscribe, SCTP_DATA_IO_EVENT))
 		sctp_ulpevent_read_sndrcvinfo(event, msg);
 
 	err = copied;
@@ -2322,22 +2208,33 @@
 static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
 				  unsigned int optlen)
 {
+	struct sctp_event_subscribe subscribe;
+	__u8 *sn_type = (__u8 *)&subscribe;
+	struct sctp_sock *sp = sctp_sk(sk);
 	struct sctp_association *asoc;
-	struct sctp_ulpevent *event;
+	int i;
 
 	if (optlen > sizeof(struct sctp_event_subscribe))
 		return -EINVAL;
-	if (copy_from_user(&sctp_sk(sk)->subscribe, optval, optlen))
+
+	if (copy_from_user(&subscribe, optval, optlen))
 		return -EFAULT;
 
+	for (i = 0; i < optlen; i++)
+		sctp_ulpevent_type_set(&sp->subscribe, SCTP_SN_TYPE_BASE + i,
+				       sn_type[i]);
+
+	list_for_each_entry(asoc, &sp->ep->asocs, asocs)
+		asoc->subscribe = sctp_sk(sk)->subscribe;
+
 	/* At the time when a user app subscribes to SCTP_SENDER_DRY_EVENT,
 	 * if there is no data to be sent or retransmit, the stack will
 	 * immediately send up this notification.
 	 */
-	if (sctp_ulpevent_type_enabled(SCTP_SENDER_DRY_EVENT,
-				       &sctp_sk(sk)->subscribe)) {
-		asoc = sctp_id2assoc(sk, 0);
+	if (sctp_ulpevent_type_enabled(sp->subscribe, SCTP_SENDER_DRY_EVENT)) {
+		struct sctp_ulpevent *event;
 
+		asoc = sctp_id2assoc(sk, 0);
 		if (asoc && sctp_outq_is_empty(&asoc->outqueue)) {
 			event = sctp_ulpevent_make_sender_dry_event(asoc,
 					GFP_USER | __GFP_NOWARN);
@@ -2757,12 +2654,13 @@
 			return -EINVAL;
 	}
 
-	/* Get association, if assoc_id != 0 and the socket is a one
-	 * to many style socket, and an association was not found, then
-	 * the id was invalid.
+	/* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the
+	 * socket is a one to many style socket, and an association
+	 * was not found, then the id was invalid.
 	 */
 	asoc = sctp_id2assoc(sk, params.spp_assoc_id);
-	if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP))
+	if (!asoc && params.spp_assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
 		return -EINVAL;
 
 	/* Heartbeat demand can only be sent on a transport or
@@ -2804,6 +2702,43 @@
 	return (param_flags & ~SPP_SACKDELAY) | SPP_SACKDELAY_DISABLE;
 }
 
+static void sctp_apply_asoc_delayed_ack(struct sctp_sack_info *params,
+					struct sctp_association *asoc)
+{
+	struct sctp_transport *trans;
+
+	if (params->sack_delay) {
+		asoc->sackdelay = msecs_to_jiffies(params->sack_delay);
+		asoc->param_flags =
+			sctp_spp_sackdelay_enable(asoc->param_flags);
+	}
+	if (params->sack_freq == 1) {
+		asoc->param_flags =
+			sctp_spp_sackdelay_disable(asoc->param_flags);
+	} else if (params->sack_freq > 1) {
+		asoc->sackfreq = params->sack_freq;
+		asoc->param_flags =
+			sctp_spp_sackdelay_enable(asoc->param_flags);
+	}
+
+	list_for_each_entry(trans, &asoc->peer.transport_addr_list,
+			    transports) {
+		if (params->sack_delay) {
+			trans->sackdelay = msecs_to_jiffies(params->sack_delay);
+			trans->param_flags =
+				sctp_spp_sackdelay_enable(trans->param_flags);
+		}
+		if (params->sack_freq == 1) {
+			trans->param_flags =
+				sctp_spp_sackdelay_disable(trans->param_flags);
+		} else if (params->sack_freq > 1) {
+			trans->sackfreq = params->sack_freq;
+			trans->param_flags =
+				sctp_spp_sackdelay_enable(trans->param_flags);
+		}
+	}
+}
+
 /*
  * 7.1.23.  Get or set delayed ack timer (SCTP_DELAYED_SACK)
  *
@@ -2843,10 +2778,9 @@
 static int sctp_setsockopt_delayed_ack(struct sock *sk,
 				       char __user *optval, unsigned int optlen)
 {
-	struct sctp_sack_info    params;
-	struct sctp_transport   *trans = NULL;
-	struct sctp_association *asoc = NULL;
-	struct sctp_sock        *sp = sctp_sk(sk);
+	struct sctp_sock *sp = sctp_sk(sk);
+	struct sctp_association *asoc;
+	struct sctp_sack_info params;
 
 	if (optlen == sizeof(struct sctp_sack_info)) {
 		if (copy_from_user(&params, optval, optlen))
@@ -2874,67 +2808,45 @@
 	if (params.sack_delay > 500)
 		return -EINVAL;
 
-	/* Get association, if sack_assoc_id != 0 and the socket is a one
-	 * to many style socket, and an association was not found, then
-	 * the id was invalid.
+	/* Get association, if sack_assoc_id != SCTP_FUTURE_ASSOC and the
+	 * socket is a one to many style socket, and an association
+	 * was not found, then the id was invalid.
 	 */
 	asoc = sctp_id2assoc(sk, params.sack_assoc_id);
-	if (!asoc && params.sack_assoc_id && sctp_style(sk, UDP))
+	if (!asoc && params.sack_assoc_id > SCTP_ALL_ASSOC &&
+	    sctp_style(sk, UDP))
 		return -EINVAL;
 
-	if (params.sack_delay) {
-		if (asoc) {
-			asoc->sackdelay =
-				msecs_to_jiffies(params.sack_delay);
-			asoc->param_flags =
-				sctp_spp_sackdelay_enable(asoc->param_flags);
-		} else {
+	if (asoc) {
+		sctp_apply_asoc_delayed_ack(&params, asoc);
+
+		return 0;
+	}
+
+	if (sctp_style(sk, TCP))
+		params.sack_assoc_id = SCTP_FUTURE_ASSOC;
+
+	if (params.sack_assoc_id == SCTP_FUTURE_ASSOC ||
+	    params.sack_assoc_id == SCTP_ALL_ASSOC) {
+		if (params.sack_delay) {
 			sp->sackdelay = params.sack_delay;
 			sp->param_flags =
 				sctp_spp_sackdelay_enable(sp->param_flags);
 		}
-	}
-
-	if (params.sack_freq == 1) {
-		if (asoc) {
-			asoc->param_flags =
-				sctp_spp_sackdelay_disable(asoc->param_flags);
-		} else {
+		if (params.sack_freq == 1) {
 			sp->param_flags =
 				sctp_spp_sackdelay_disable(sp->param_flags);
-		}
-	} else if (params.sack_freq > 1) {
-		if (asoc) {
-			asoc->sackfreq = params.sack_freq;
-			asoc->param_flags =
-				sctp_spp_sackdelay_enable(asoc->param_flags);
-		} else {
+		} else if (params.sack_freq > 1) {
 			sp->sackfreq = params.sack_freq;
 			sp->param_flags =
 				sctp_spp_sackdelay_enable(sp->param_flags);
 		}
 	}
 
-	/* If change is for association, also apply to each transport. */
-	if (asoc) {
-		list_for_each_entry(trans, &asoc->peer.transport_addr_list,
-				transports) {
-			if (params.sack_delay) {
-				trans->sackdelay =
-					msecs_to_jiffies(params.sack_delay);
-				trans->param_flags =
-					sctp_spp_sackdelay_enable(trans->param_flags);
-			}
-			if (params.sack_freq == 1) {
-				trans->param_flags =
-					sctp_spp_sackdelay_disable(trans->param_flags);
-			} else if (params.sack_freq > 1) {
-				trans->sackfreq = params.sack_freq;
-				trans->param_flags =
-					sctp_spp_sackdelay_enable(trans->param_flags);
-			}
-		}
-	}
+	if (params.sack_assoc_id == SCTP_CURRENT_ASSOC ||
+	    params.sack_assoc_id == SCTP_ALL_ASSOC)
+		list_for_each_entry(asoc, &sp->ep->asocs, asocs)
+			sctp_apply_asoc_delayed_ack(&params, asoc);
 
 	return 0;
 }
@@ -3004,15 +2916,25 @@
 		return -EINVAL;
 
 	asoc = sctp_id2assoc(sk, info.sinfo_assoc_id);
-	if (!asoc && info.sinfo_assoc_id && sctp_style(sk, UDP))
+	if (!asoc && info.sinfo_assoc_id > SCTP_ALL_ASSOC &&
+	    sctp_style(sk, UDP))
 		return -EINVAL;
+
 	if (asoc) {
 		asoc->default_stream = info.sinfo_stream;
 		asoc->default_flags = info.sinfo_flags;
 		asoc->default_ppid = info.sinfo_ppid;
 		asoc->default_context = info.sinfo_context;
 		asoc->default_timetolive = info.sinfo_timetolive;
-	} else {
+
+		return 0;
+	}
+
+	if (sctp_style(sk, TCP))
+		info.sinfo_assoc_id = SCTP_FUTURE_ASSOC;
+
+	if (info.sinfo_assoc_id == SCTP_FUTURE_ASSOC ||
+	    info.sinfo_assoc_id == SCTP_ALL_ASSOC) {
 		sp->default_stream = info.sinfo_stream;
 		sp->default_flags = info.sinfo_flags;
 		sp->default_ppid = info.sinfo_ppid;
@@ -3020,6 +2942,17 @@
 		sp->default_timetolive = info.sinfo_timetolive;
 	}
 
+	if (info.sinfo_assoc_id == SCTP_CURRENT_ASSOC ||
+	    info.sinfo_assoc_id == SCTP_ALL_ASSOC) {
+		list_for_each_entry(asoc, &sp->ep->asocs, asocs) {
+			asoc->default_stream = info.sinfo_stream;
+			asoc->default_flags = info.sinfo_flags;
+			asoc->default_ppid = info.sinfo_ppid;
+			asoc->default_context = info.sinfo_context;
+			asoc->default_timetolive = info.sinfo_timetolive;
+		}
+	}
+
 	return 0;
 }
 
@@ -3044,20 +2977,40 @@
 		return -EINVAL;
 
 	asoc = sctp_id2assoc(sk, info.snd_assoc_id);
-	if (!asoc && info.snd_assoc_id && sctp_style(sk, UDP))
+	if (!asoc && info.snd_assoc_id > SCTP_ALL_ASSOC &&
+	    sctp_style(sk, UDP))
 		return -EINVAL;
+
 	if (asoc) {
 		asoc->default_stream = info.snd_sid;
 		asoc->default_flags = info.snd_flags;
 		asoc->default_ppid = info.snd_ppid;
 		asoc->default_context = info.snd_context;
-	} else {
+
+		return 0;
+	}
+
+	if (sctp_style(sk, TCP))
+		info.snd_assoc_id = SCTP_FUTURE_ASSOC;
+
+	if (info.snd_assoc_id == SCTP_FUTURE_ASSOC ||
+	    info.snd_assoc_id == SCTP_ALL_ASSOC) {
 		sp->default_stream = info.snd_sid;
 		sp->default_flags = info.snd_flags;
 		sp->default_ppid = info.snd_ppid;
 		sp->default_context = info.snd_context;
 	}
 
+	if (info.snd_assoc_id == SCTP_CURRENT_ASSOC ||
+	    info.snd_assoc_id == SCTP_ALL_ASSOC) {
+		list_for_each_entry(asoc, &sp->ep->asocs, asocs) {
+			asoc->default_stream = info.snd_sid;
+			asoc->default_flags = info.snd_flags;
+			asoc->default_ppid = info.snd_ppid;
+			asoc->default_context = info.snd_context;
+		}
+	}
+
 	return 0;
 }
 
@@ -3151,7 +3104,8 @@
 	asoc = sctp_id2assoc(sk, rtoinfo.srto_assoc_id);
 
 	/* Set the values to the specific association */
-	if (!asoc && rtoinfo.srto_assoc_id && sctp_style(sk, UDP))
+	if (!asoc && rtoinfo.srto_assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
 		return -EINVAL;
 
 	rto_max = rtoinfo.srto_max;
@@ -3213,7 +3167,8 @@
 
 	asoc = sctp_id2assoc(sk, assocparams.sasoc_assoc_id);
 
-	if (!asoc && assocparams.sasoc_assoc_id && sctp_style(sk, UDP))
+	if (!asoc && assocparams.sasoc_assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
 		return -EINVAL;
 
 	/* Set the values to the specific association */
@@ -3326,7 +3281,7 @@
 				    current->comm, task_pid_nr(current));
 		if (copy_from_user(&val, optval, optlen))
 			return -EFAULT;
-		params.assoc_id = 0;
+		params.assoc_id = SCTP_FUTURE_ASSOC;
 	} else if (optlen == sizeof(struct sctp_assoc_value)) {
 		if (copy_from_user(&params, optval, optlen))
 			return -EFAULT;
@@ -3336,14 +3291,16 @@
 	}
 
 	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
+		return -EINVAL;
 
 	if (val) {
 		int min_len, max_len;
 		__u16 datasize = asoc ? sctp_datachk_len(&asoc->stream) :
 				 sizeof(struct sctp_data_chunk);
 
-		min_len = sctp_mtu_payload(sp, SCTP_DEFAULT_MINSEGMENT,
-					   datasize);
+		min_len = sctp_min_frag_point(sp, datasize);
 		max_len = SCTP_MAX_CHUNK_LEN - datasize;
 
 		if (val < min_len || val > max_len)
@@ -3354,8 +3311,6 @@
 		asoc->user_frag = val;
 		sctp_assoc_update_frag_point(asoc);
 	} else {
-		if (params.assoc_id && sctp_style(sk, UDP))
-			return -EINVAL;
 		sp->user_frag = val;
 	}
 
@@ -3374,7 +3329,6 @@
 static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optval,
 					     unsigned int optlen)
 {
-	struct net *net = sock_net(sk);
 	struct sctp_sock	*sp;
 	struct sctp_association	*asoc = NULL;
 	struct sctp_setpeerprim	prim;
@@ -3384,7 +3338,7 @@
 
 	sp = sctp_sk(sk);
 
-	if (!net->sctp.addip_enable)
+	if (!sp->ep->asconf_enable)
 		return -EPERM;
 
 	if (optlen != sizeof(struct sctp_setpeerprim))
@@ -3468,8 +3422,8 @@
 static int sctp_setsockopt_context(struct sock *sk, char __user *optval,
 				   unsigned int optlen)
 {
+	struct sctp_sock *sp = sctp_sk(sk);
 	struct sctp_assoc_value params;
-	struct sctp_sock *sp;
 	struct sctp_association *asoc;
 
 	if (optlen != sizeof(struct sctp_assoc_value))
@@ -3477,17 +3431,29 @@
 	if (copy_from_user(&params, optval, optlen))
 		return -EFAULT;
 
-	sp = sctp_sk(sk);
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id > SCTP_ALL_ASSOC &&
+	    sctp_style(sk, UDP))
+		return -EINVAL;
 
-	if (params.assoc_id != 0) {
-		asoc = sctp_id2assoc(sk, params.assoc_id);
-		if (!asoc)
-			return -EINVAL;
+	if (asoc) {
 		asoc->default_rcv_context = params.assoc_value;
-	} else {
-		sp->default_rcv_context = params.assoc_value;
+
+		return 0;
 	}
 
+	if (sctp_style(sk, TCP))
+		params.assoc_id = SCTP_FUTURE_ASSOC;
+
+	if (params.assoc_id == SCTP_FUTURE_ASSOC ||
+	    params.assoc_id == SCTP_ALL_ASSOC)
+		sp->default_rcv_context = params.assoc_value;
+
+	if (params.assoc_id == SCTP_CURRENT_ASSOC ||
+	    params.assoc_id == SCTP_ALL_ASSOC)
+		list_for_each_entry(asoc, &sp->ep->asocs, asocs)
+			asoc->default_rcv_context = params.assoc_value;
+
 	return 0;
 }
 
@@ -3529,7 +3495,7 @@
 	sctp_sk(sk)->frag_interleave = !!val;
 
 	if (!sctp_sk(sk)->frag_interleave)
-		sctp_sk(sk)->strm_interleave = 0;
+		sctp_sk(sk)->ep->intl_enable = 0;
 
 	return 0;
 }
@@ -3588,11 +3554,9 @@
 				    char __user *optval,
 				    unsigned int optlen)
 {
+	struct sctp_sock *sp = sctp_sk(sk);
 	struct sctp_assoc_value params;
-	struct sctp_sock *sp;
 	struct sctp_association *asoc;
-	int val;
-	int assoc_id = 0;
 
 	if (optlen == sizeof(int)) {
 		pr_warn_ratelimited(DEPRECATED
@@ -3600,25 +3564,37 @@
 				    "Use of int in max_burst socket option deprecated.\n"
 				    "Use struct sctp_assoc_value instead\n",
 				    current->comm, task_pid_nr(current));
-		if (copy_from_user(&val, optval, optlen))
+		if (copy_from_user(&params.assoc_value, optval, optlen))
 			return -EFAULT;
+		params.assoc_id = SCTP_FUTURE_ASSOC;
 	} else if (optlen == sizeof(struct sctp_assoc_value)) {
 		if (copy_from_user(&params, optval, optlen))
 			return -EFAULT;
-		val = params.assoc_value;
-		assoc_id = params.assoc_id;
 	} else
 		return -EINVAL;
 
-	sp = sctp_sk(sk);
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id > SCTP_ALL_ASSOC &&
+	    sctp_style(sk, UDP))
+		return -EINVAL;
 
-	if (assoc_id != 0) {
-		asoc = sctp_id2assoc(sk, assoc_id);
-		if (!asoc)
-			return -EINVAL;
-		asoc->max_burst = val;
-	} else
-		sp->max_burst = val;
+	if (asoc) {
+		asoc->max_burst = params.assoc_value;
+
+		return 0;
+	}
+
+	if (sctp_style(sk, TCP))
+		params.assoc_id = SCTP_FUTURE_ASSOC;
+
+	if (params.assoc_id == SCTP_FUTURE_ASSOC ||
+	    params.assoc_id == SCTP_ALL_ASSOC)
+		sp->max_burst = params.assoc_value;
+
+	if (params.assoc_id == SCTP_CURRENT_ASSOC ||
+	    params.assoc_id == SCTP_ALL_ASSOC)
+		list_for_each_entry(asoc, &sp->ep->asocs, asocs)
+			asoc->max_burst = params.assoc_value;
 
 	return 0;
 }
@@ -3710,35 +3686,54 @@
 	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
 	struct sctp_authkey *authkey;
 	struct sctp_association *asoc;
-	int ret;
-
-	if (!ep->auth_enable)
-		return -EACCES;
+	int ret = -EINVAL;
 
 	if (optlen <= sizeof(struct sctp_authkey))
 		return -EINVAL;
 	/* authkey->sca_keylength is u16, so optlen can't be bigger than
 	 * this.
 	 */
-	optlen = min_t(unsigned int, optlen, USHRT_MAX +
-					     sizeof(struct sctp_authkey));
+	optlen = min_t(unsigned int, optlen, USHRT_MAX + sizeof(*authkey));
 
 	authkey = memdup_user(optval, optlen);
 	if (IS_ERR(authkey))
 		return PTR_ERR(authkey);
 
-	if (authkey->sca_keylength > optlen - sizeof(struct sctp_authkey)) {
-		ret = -EINVAL;
+	if (authkey->sca_keylength > optlen - sizeof(*authkey))
 		goto out;
-	}
 
 	asoc = sctp_id2assoc(sk, authkey->sca_assoc_id);
-	if (!asoc && authkey->sca_assoc_id && sctp_style(sk, UDP)) {
-		ret = -EINVAL;
+	if (!asoc && authkey->sca_assoc_id > SCTP_ALL_ASSOC &&
+	    sctp_style(sk, UDP))
+		goto out;
+
+	if (asoc) {
+		ret = sctp_auth_set_key(ep, asoc, authkey);
 		goto out;
 	}
 
-	ret = sctp_auth_set_key(ep, asoc, authkey);
+	if (sctp_style(sk, TCP))
+		authkey->sca_assoc_id = SCTP_FUTURE_ASSOC;
+
+	if (authkey->sca_assoc_id == SCTP_FUTURE_ASSOC ||
+	    authkey->sca_assoc_id == SCTP_ALL_ASSOC) {
+		ret = sctp_auth_set_key(ep, asoc, authkey);
+		if (ret)
+			goto out;
+	}
+
+	ret = 0;
+
+	if (authkey->sca_assoc_id == SCTP_CURRENT_ASSOC ||
+	    authkey->sca_assoc_id == SCTP_ALL_ASSOC) {
+		list_for_each_entry(asoc, &ep->asocs, asocs) {
+			int res = sctp_auth_set_key(ep, asoc, authkey);
+
+			if (res && !ret)
+				ret = res;
+		}
+	}
+
 out:
 	kzfree(authkey);
 	return ret;
@@ -3755,11 +3750,9 @@
 				      unsigned int optlen)
 {
 	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
-	struct sctp_authkeyid val;
 	struct sctp_association *asoc;
-
-	if (!ep->auth_enable)
-		return -EACCES;
+	struct sctp_authkeyid val;
+	int ret = 0;
 
 	if (optlen != sizeof(struct sctp_authkeyid))
 		return -EINVAL;
@@ -3767,10 +3760,35 @@
 		return -EFAULT;
 
 	asoc = sctp_id2assoc(sk, val.scact_assoc_id);
-	if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP))
+	if (!asoc && val.scact_assoc_id > SCTP_ALL_ASSOC &&
+	    sctp_style(sk, UDP))
 		return -EINVAL;
 
-	return sctp_auth_set_active_key(ep, asoc, val.scact_keynumber);
+	if (asoc)
+		return sctp_auth_set_active_key(ep, asoc, val.scact_keynumber);
+
+	if (sctp_style(sk, TCP))
+		val.scact_assoc_id = SCTP_FUTURE_ASSOC;
+
+	if (val.scact_assoc_id == SCTP_FUTURE_ASSOC ||
+	    val.scact_assoc_id == SCTP_ALL_ASSOC) {
+		ret = sctp_auth_set_active_key(ep, asoc, val.scact_keynumber);
+		if (ret)
+			return ret;
+	}
+
+	if (val.scact_assoc_id == SCTP_CURRENT_ASSOC ||
+	    val.scact_assoc_id == SCTP_ALL_ASSOC) {
+		list_for_each_entry(asoc, &ep->asocs, asocs) {
+			int res = sctp_auth_set_active_key(ep, asoc,
+							   val.scact_keynumber);
+
+			if (res && !ret)
+				ret = res;
+		}
+	}
+
+	return ret;
 }
 
 /*
@@ -3783,11 +3801,9 @@
 				   unsigned int optlen)
 {
 	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
-	struct sctp_authkeyid val;
 	struct sctp_association *asoc;
-
-	if (!ep->auth_enable)
-		return -EACCES;
+	struct sctp_authkeyid val;
+	int ret = 0;
 
 	if (optlen != sizeof(struct sctp_authkeyid))
 		return -EINVAL;
@@ -3795,11 +3811,35 @@
 		return -EFAULT;
 
 	asoc = sctp_id2assoc(sk, val.scact_assoc_id);
-	if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP))
+	if (!asoc && val.scact_assoc_id > SCTP_ALL_ASSOC &&
+	    sctp_style(sk, UDP))
 		return -EINVAL;
 
-	return sctp_auth_del_key_id(ep, asoc, val.scact_keynumber);
+	if (asoc)
+		return sctp_auth_del_key_id(ep, asoc, val.scact_keynumber);
 
+	if (sctp_style(sk, TCP))
+		val.scact_assoc_id = SCTP_FUTURE_ASSOC;
+
+	if (val.scact_assoc_id == SCTP_FUTURE_ASSOC ||
+	    val.scact_assoc_id == SCTP_ALL_ASSOC) {
+		ret = sctp_auth_del_key_id(ep, asoc, val.scact_keynumber);
+		if (ret)
+			return ret;
+	}
+
+	if (val.scact_assoc_id == SCTP_CURRENT_ASSOC ||
+	    val.scact_assoc_id == SCTP_ALL_ASSOC) {
+		list_for_each_entry(asoc, &ep->asocs, asocs) {
+			int res = sctp_auth_del_key_id(ep, asoc,
+						       val.scact_keynumber);
+
+			if (res && !ret)
+				ret = res;
+		}
+	}
+
+	return ret;
 }
 
 /*
@@ -3811,11 +3851,9 @@
 					  unsigned int optlen)
 {
 	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
-	struct sctp_authkeyid val;
 	struct sctp_association *asoc;
-
-	if (!ep->auth_enable)
-		return -EACCES;
+	struct sctp_authkeyid val;
+	int ret = 0;
 
 	if (optlen != sizeof(struct sctp_authkeyid))
 		return -EINVAL;
@@ -3823,10 +3861,35 @@
 		return -EFAULT;
 
 	asoc = sctp_id2assoc(sk, val.scact_assoc_id);
-	if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP))
+	if (!asoc && val.scact_assoc_id > SCTP_ALL_ASSOC &&
+	    sctp_style(sk, UDP))
 		return -EINVAL;
 
-	return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber);
+	if (asoc)
+		return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber);
+
+	if (sctp_style(sk, TCP))
+		val.scact_assoc_id = SCTP_FUTURE_ASSOC;
+
+	if (val.scact_assoc_id == SCTP_FUTURE_ASSOC ||
+	    val.scact_assoc_id == SCTP_ALL_ASSOC) {
+		ret = sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber);
+		if (ret)
+			return ret;
+	}
+
+	if (val.scact_assoc_id == SCTP_CURRENT_ASSOC ||
+	    val.scact_assoc_id == SCTP_ALL_ASSOC) {
+		list_for_each_entry(asoc, &ep->asocs, asocs) {
+			int res = sctp_auth_deact_key_id(ep, asoc,
+							 val.scact_keynumber);
+
+			if (res && !ret)
+				ret = res;
+		}
+	}
+
+	return ret;
 }
 
 /*
@@ -3892,11 +3955,25 @@
 			   sizeof(struct sctp_paddrthlds)))
 		return -EFAULT;
 
-
-	if (sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) {
-		asoc = sctp_id2assoc(sk, val.spt_assoc_id);
-		if (!asoc)
+	if (!sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) {
+		trans = sctp_addr_id2transport(sk, &val.spt_address,
+					       val.spt_assoc_id);
+		if (!trans)
 			return -ENOENT;
+
+		if (val.spt_pathmaxrxt)
+			trans->pathmaxrxt = val.spt_pathmaxrxt;
+		trans->pf_retrans = val.spt_pathpfthld;
+
+		return 0;
+	}
+
+	asoc = sctp_id2assoc(sk, val.spt_assoc_id);
+	if (!asoc && val.spt_assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
+		return -EINVAL;
+
+	if (asoc) {
 		list_for_each_entry(trans, &asoc->peer.transport_addr_list,
 				    transports) {
 			if (val.spt_pathmaxrxt)
@@ -3908,14 +3985,11 @@
 			asoc->pathmaxrxt = val.spt_pathmaxrxt;
 		asoc->pf_retrans = val.spt_pathpfthld;
 	} else {
-		trans = sctp_addr_id2transport(sk, &val.spt_address,
-					       val.spt_assoc_id);
-		if (!trans)
-			return -ENOENT;
+		struct sctp_sock *sp = sctp_sk(sk);
 
 		if (val.spt_pathmaxrxt)
-			trans->pathmaxrxt = val.spt_pathmaxrxt;
-		trans->pf_retrans = val.spt_pathpfthld;
+			sp->pathmaxrxt = val.spt_pathmaxrxt;
+		sp->pf_retrans = val.spt_pathpfthld;
 	}
 
 	return 0;
@@ -3958,6 +4032,7 @@
 					unsigned int optlen)
 {
 	struct sctp_assoc_value params;
+	struct sctp_association *asoc;
 
 	if (optlen != sizeof(params))
 		return -EINVAL;
@@ -3965,6 +4040,11 @@
 	if (copy_from_user(&params, optval, optlen))
 		return -EFAULT;
 
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
+		return -EINVAL;
+
 	sctp_sk(sk)->ep->prsctp_enable = !!params.assoc_value;
 
 	return 0;
@@ -3974,6 +4054,7 @@
 					  char __user *optval,
 					  unsigned int optlen)
 {
+	struct sctp_sock *sp = sctp_sk(sk);
 	struct sctp_default_prinfo info;
 	struct sctp_association *asoc;
 	int retval = -EINVAL;
@@ -3993,19 +4074,34 @@
 		info.pr_value = 0;
 
 	asoc = sctp_id2assoc(sk, info.pr_assoc_id);
+	if (!asoc && info.pr_assoc_id > SCTP_ALL_ASSOC &&
+	    sctp_style(sk, UDP))
+		goto out;
+
+	retval = 0;
+
 	if (asoc) {
 		SCTP_PR_SET_POLICY(asoc->default_flags, info.pr_policy);
 		asoc->default_timetolive = info.pr_value;
-	} else if (!info.pr_assoc_id) {
-		struct sctp_sock *sp = sctp_sk(sk);
-
-		SCTP_PR_SET_POLICY(sp->default_flags, info.pr_policy);
-		sp->default_timetolive = info.pr_value;
-	} else {
 		goto out;
 	}
 
-	retval = 0;
+	if (sctp_style(sk, TCP))
+		info.pr_assoc_id = SCTP_FUTURE_ASSOC;
+
+	if (info.pr_assoc_id == SCTP_FUTURE_ASSOC ||
+	    info.pr_assoc_id == SCTP_ALL_ASSOC) {
+		SCTP_PR_SET_POLICY(sp->default_flags, info.pr_policy);
+		sp->default_timetolive = info.pr_value;
+	}
+
+	if (info.pr_assoc_id == SCTP_CURRENT_ASSOC ||
+	    info.pr_assoc_id == SCTP_ALL_ASSOC) {
+		list_for_each_entry(asoc, &sp->ep->asocs, asocs) {
+			SCTP_PR_SET_POLICY(asoc->default_flags, info.pr_policy);
+			asoc->default_timetolive = info.pr_value;
+		}
+	}
 
 out:
 	return retval;
@@ -4028,15 +4124,11 @@
 	}
 
 	asoc = sctp_id2assoc(sk, params.assoc_id);
-	if (asoc) {
-		asoc->reconf_enable = !!params.assoc_value;
-	} else if (!params.assoc_id) {
-		struct sctp_sock *sp = sctp_sk(sk);
-
-		sp->ep->reconf_enable = !!params.assoc_value;
-	} else {
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
 		goto out;
-	}
+
+	sctp_sk(sk)->ep->reconf_enable = !!params.assoc_value;
 
 	retval = 0;
 
@@ -4048,6 +4140,7 @@
 					   char __user *optval,
 					   unsigned int optlen)
 {
+	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
 	struct sctp_assoc_value params;
 	struct sctp_association *asoc;
 	int retval = -EINVAL;
@@ -4064,17 +4157,28 @@
 		goto out;
 
 	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id > SCTP_ALL_ASSOC &&
+	    sctp_style(sk, UDP))
+		goto out;
+
+	retval = 0;
+
 	if (asoc) {
 		asoc->strreset_enable = params.assoc_value;
-	} else if (!params.assoc_id) {
-		struct sctp_sock *sp = sctp_sk(sk);
-
-		sp->ep->strreset_enable = params.assoc_value;
-	} else {
 		goto out;
 	}
 
-	retval = 0;
+	if (sctp_style(sk, TCP))
+		params.assoc_id = SCTP_FUTURE_ASSOC;
+
+	if (params.assoc_id == SCTP_FUTURE_ASSOC ||
+	    params.assoc_id == SCTP_ALL_ASSOC)
+		ep->strreset_enable = params.assoc_value;
+
+	if (params.assoc_id == SCTP_CURRENT_ASSOC ||
+	    params.assoc_id == SCTP_ALL_ASSOC)
+		list_for_each_entry(asoc, &ep->asocs, asocs)
+			asoc->strreset_enable = params.assoc_value;
 
 out:
 	return retval;
@@ -4169,29 +4273,47 @@
 				     char __user *optval,
 				     unsigned int optlen)
 {
+	struct sctp_sock *sp = sctp_sk(sk);
 	struct sctp_association *asoc;
 	struct sctp_assoc_value params;
-	int retval = -EINVAL;
+	int retval = 0;
 
 	if (optlen < sizeof(params))
-		goto out;
+		return -EINVAL;
 
 	optlen = sizeof(params);
-	if (copy_from_user(&params, optval, optlen)) {
-		retval = -EFAULT;
-		goto out;
-	}
+	if (copy_from_user(&params, optval, optlen))
+		return -EFAULT;
 
 	if (params.assoc_value > SCTP_SS_MAX)
-		goto out;
+		return -EINVAL;
 
 	asoc = sctp_id2assoc(sk, params.assoc_id);
-	if (!asoc)
-		goto out;
+	if (!asoc && params.assoc_id > SCTP_ALL_ASSOC &&
+	    sctp_style(sk, UDP))
+		return -EINVAL;
 
-	retval = sctp_sched_set_sched(asoc, params.assoc_value);
+	if (asoc)
+		return sctp_sched_set_sched(asoc, params.assoc_value);
 
-out:
+	if (sctp_style(sk, TCP))
+		params.assoc_id = SCTP_FUTURE_ASSOC;
+
+	if (params.assoc_id == SCTP_FUTURE_ASSOC ||
+	    params.assoc_id == SCTP_ALL_ASSOC)
+		sp->default_ss = params.assoc_value;
+
+	if (params.assoc_id == SCTP_CURRENT_ASSOC ||
+	    params.assoc_id == SCTP_ALL_ASSOC) {
+		list_for_each_entry(asoc, &sp->ep->asocs, asocs) {
+			int ret = sctp_sched_set_sched(asoc,
+						       params.assoc_value);
+
+			if (ret && !retval)
+				retval = ret;
+		}
+	}
+
 	return retval;
 }
 
@@ -4199,8 +4321,8 @@
 					   char __user *optval,
 					   unsigned int optlen)
 {
-	struct sctp_association *asoc;
 	struct sctp_stream_value params;
+	struct sctp_association *asoc;
 	int retval = -EINVAL;
 
 	if (optlen < sizeof(params))
@@ -4213,11 +4335,24 @@
 	}
 
 	asoc = sctp_id2assoc(sk, params.assoc_id);
-	if (!asoc)
+	if (!asoc && params.assoc_id != SCTP_CURRENT_ASSOC &&
+	    sctp_style(sk, UDP))
 		goto out;
 
-	retval = sctp_sched_set_value(asoc, params.stream_id,
-				      params.stream_value, GFP_KERNEL);
+	if (asoc) {
+		retval = sctp_sched_set_value(asoc, params.stream_id,
+					      params.stream_value, GFP_KERNEL);
+		goto out;
+	}
+
+	retval = 0;
+
+	list_for_each_entry(asoc, &sctp_sk(sk)->ep->asocs, asocs) {
+		int ret = sctp_sched_set_value(asoc, params.stream_id,
+					       params.stream_value, GFP_KERNEL);
+		if (ret && !retval) /* try to return the 1st error. */
+			retval = ret;
+	}
 
 out:
 	return retval;
@@ -4228,8 +4363,8 @@
 						  unsigned int optlen)
 {
 	struct sctp_sock *sp = sctp_sk(sk);
-	struct net *net = sock_net(sk);
 	struct sctp_assoc_value params;
+	struct sctp_association *asoc;
 	int retval = -EINVAL;
 
 	if (optlen < sizeof(params))
@@ -4241,15 +4376,17 @@
 		goto out;
 	}
 
-	if (params.assoc_id)
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
 		goto out;
 
-	if (!net->sctp.intl_enable || !sp->frag_interleave) {
+	if (!sock_net(sk)->sctp.intl_enable || !sp->frag_interleave) {
 		retval = -EPERM;
 		goto out;
 	}
 
-	sp->strm_interleave = !!params.assoc_value;
+	sp->ep->intl_enable = !!params.assoc_value;
 
 	retval = 0;
 
@@ -4279,6 +4416,179 @@
 	return 0;
 }
 
+static int sctp_assoc_ulpevent_type_set(struct sctp_event *param,
+					struct sctp_association *asoc)
+{
+	struct sctp_ulpevent *event;
+
+	sctp_ulpevent_type_set(&asoc->subscribe, param->se_type, param->se_on);
+
+	if (param->se_type == SCTP_SENDER_DRY_EVENT && param->se_on) {
+		if (sctp_outq_is_empty(&asoc->outqueue)) {
+			event = sctp_ulpevent_make_sender_dry_event(asoc,
+					GFP_USER | __GFP_NOWARN);
+			if (!event)
+				return -ENOMEM;
+
+			asoc->stream.si->enqueue_event(&asoc->ulpq, event);
+		}
+	}
+
+	return 0;
+}
+
+static int sctp_setsockopt_event(struct sock *sk, char __user *optval,
+				 unsigned int optlen)
+{
+	struct sctp_sock *sp = sctp_sk(sk);
+	struct sctp_association *asoc;
+	struct sctp_event param;
+	int retval = 0;
+
+	if (optlen < sizeof(param))
+		return -EINVAL;
+
+	optlen = sizeof(param);
+	if (copy_from_user(&param, optval, optlen))
+		return -EFAULT;
+
+	if (param.se_type < SCTP_SN_TYPE_BASE ||
+	    param.se_type > SCTP_SN_TYPE_MAX)
+		return -EINVAL;
+
+	asoc = sctp_id2assoc(sk, param.se_assoc_id);
+	if (!asoc && param.se_assoc_id > SCTP_ALL_ASSOC &&
+	    sctp_style(sk, UDP))
+		return -EINVAL;
+
+	if (asoc)
+		return sctp_assoc_ulpevent_type_set(&param, asoc);
+
+	if (sctp_style(sk, TCP))
+		param.se_assoc_id = SCTP_FUTURE_ASSOC;
+
+	if (param.se_assoc_id == SCTP_FUTURE_ASSOC ||
+	    param.se_assoc_id == SCTP_ALL_ASSOC)
+		sctp_ulpevent_type_set(&sp->subscribe,
+				       param.se_type, param.se_on);
+
+	if (param.se_assoc_id == SCTP_CURRENT_ASSOC ||
+	    param.se_assoc_id == SCTP_ALL_ASSOC) {
+		list_for_each_entry(asoc, &sp->ep->asocs, asocs) {
+			int ret = sctp_assoc_ulpevent_type_set(&param, asoc);
+
+			if (ret && !retval)
+				retval = ret;
+		}
+	}
+
+	return retval;
+}
+
+static int sctp_setsockopt_asconf_supported(struct sock *sk,
+					    char __user *optval,
+					    unsigned int optlen)
+{
+	struct sctp_assoc_value params;
+	struct sctp_association *asoc;
+	struct sctp_endpoint *ep;
+	int retval = -EINVAL;
+
+	if (optlen != sizeof(params))
+		goto out;
+
+	if (copy_from_user(&params, optval, optlen)) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
+		goto out;
+
+	ep = sctp_sk(sk)->ep;
+	ep->asconf_enable = !!params.assoc_value;
+
+	if (ep->asconf_enable && ep->auth_enable) {
+		sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF);
+		sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF_ACK);
+	}
+
+	retval = 0;
+
+out:
+	return retval;
+}
+
+static int sctp_setsockopt_auth_supported(struct sock *sk,
+					  char __user *optval,
+					  unsigned int optlen)
+{
+	struct sctp_assoc_value params;
+	struct sctp_association *asoc;
+	struct sctp_endpoint *ep;
+	int retval = -EINVAL;
+
+	if (optlen != sizeof(params))
+		goto out;
+
+	if (copy_from_user(&params, optval, optlen)) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
+		goto out;
+
+	ep = sctp_sk(sk)->ep;
+	if (params.assoc_value) {
+		retval = sctp_auth_init(ep, GFP_KERNEL);
+		if (retval)
+			goto out;
+		if (ep->asconf_enable) {
+			sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF);
+			sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF_ACK);
+		}
+	}
+
+	ep->auth_enable = !!params.assoc_value;
+	retval = 0;
+
+out:
+	return retval;
+}
+
+static int sctp_setsockopt_ecn_supported(struct sock *sk,
+					 char __user *optval,
+					 unsigned int optlen)
+{
+	struct sctp_assoc_value params;
+	struct sctp_association *asoc;
+	int retval = -EINVAL;
+
+	if (optlen != sizeof(params))
+		goto out;
+
+	if (copy_from_user(&params, optval, optlen)) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
+		goto out;
+
+	sctp_sk(sk)->ep->ecn_enable = !!params.assoc_value;
+	retval = 0;
+
+out:
+	return retval;
+}
+
 /* API 6.2 setsockopt(), getsockopt()
  *
  * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4476,6 +4786,18 @@
 	case SCTP_REUSE_PORT:
 		retval = sctp_setsockopt_reuse_port(sk, optval, optlen);
 		break;
+	case SCTP_EVENT:
+		retval = sctp_setsockopt_event(sk, optval, optlen);
+		break;
+	case SCTP_ASCONF_SUPPORTED:
+		retval = sctp_setsockopt_asconf_supported(sk, optval, optlen);
+		break;
+	case SCTP_AUTH_SUPPORTED:
+		retval = sctp_setsockopt_auth_supported(sk, optval, optlen);
+		break;
+	case SCTP_ECN_SUPPORTED:
+		retval = sctp_setsockopt_ecn_supported(sk, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
@@ -4506,34 +4828,17 @@
 static int sctp_connect(struct sock *sk, struct sockaddr *addr,
 			int addr_len, int flags)
 {
-	struct inet_sock *inet = inet_sk(sk);
 	struct sctp_af *af;
-	int err = 0;
+	int err = -EINVAL;
 
 	lock_sock(sk);
-
 	pr_debug("%s: sk:%p, sockaddr:%p, addr_len:%d\n", __func__, sk,
 		 addr, addr_len);
 
-	/* We may need to bind the socket. */
-	if (!inet->inet_num) {
-		if (sk->sk_prot->get_port(sk, 0)) {
-			release_sock(sk);
-			return -EAGAIN;
-		}
-		inet->inet_sport = htons(inet->inet_num);
-	}
-
 	/* Validate addr_len before calling common connect/connectx routine. */
 	af = sctp_get_af_specific(addr->sa_family);
-	if (!af || addr_len < af->sockaddr_len) {
-		err = -EINVAL;
-	} else {
-		/* Pass correct addr len to common routine (so it knows there
-		 * is only one address being passed.
-		 */
+	if (af && addr_len >= af->sockaddr_len)
 		err = __sctp_connect(sk, addr, af->sockaddr_len, flags, NULL);
-	}
 
 	release_sock(sk);
 	return err;
@@ -4608,7 +4913,11 @@
 	/* Populate the fields of the newsk from the oldsk and migrate the
 	 * asoc to the newsk.
 	 */
-	sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP);
+	error = sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP);
+	if (error) {
+		sk_common_release(newsk);
+		newsk = NULL;
+	}
 
 out:
 	release_sock(sk);
@@ -4724,19 +5033,21 @@
 	/* Initialize default event subscriptions. By default, all the
 	 * options are off.
 	 */
-	memset(&sp->subscribe, 0, sizeof(struct sctp_event_subscribe));
+	sp->subscribe = 0;
 
 	/* Default Peer Address Parameters.  These defaults can
 	 * be modified via SCTP_PEER_ADDR_PARAMS
 	 */
 	sp->hbinterval  = net->sctp.hb_interval;
 	sp->pathmaxrxt  = net->sctp.max_retrans_path;
+	sp->pf_retrans  = net->sctp.pf_retrans;
 	sp->pathmtu     = 0; /* allow default discovery */
 	sp->sackdelay   = net->sctp.sack_timeout;
 	sp->sackfreq	= 2;
 	sp->param_flags = SPP_HB_ENABLE |
 			  SPP_PMTUD_ENABLE |
 			  SPP_SACKDELAY_ENABLE;
+	sp->default_ss = SCTP_SS_DEFAULT;
 
 	/* If enabled no SCTP message fragmentation will be performed.
 	 * Configure through SCTP_DISABLE_FRAGMENTS socket option.
@@ -5269,14 +5580,24 @@
 static int sctp_getsockopt_events(struct sock *sk, int len, char __user *optval,
 				  int __user *optlen)
 {
+	struct sctp_event_subscribe subscribe;
+	__u8 *sn_type = (__u8 *)&subscribe;
+	int i;
+
 	if (len == 0)
 		return -EINVAL;
 	if (len > sizeof(struct sctp_event_subscribe))
 		len = sizeof(struct sctp_event_subscribe);
 	if (put_user(len, optlen))
 		return -EFAULT;
-	if (copy_to_user(optval, &sctp_sk(sk)->subscribe, len))
+
+	for (i = 0; i < len; i++)
+		sn_type[i] = sctp_ulpevent_type_enabled(sctp_sk(sk)->subscribe,
+							SCTP_SN_TYPE_BASE + i);
+
+	if (copy_to_user(optval, &subscribe, len))
 		return -EFAULT;
+
 	return 0;
 }
 
@@ -5344,7 +5665,12 @@
 	/* Populate the fields of the newsk from the oldsk and migrate the
 	 * asoc to the newsk.
 	 */
-	sctp_sock_migrate(sk, sock->sk, asoc, SCTP_SOCKET_UDP_HIGH_BANDWIDTH);
+	err = sctp_sock_migrate(sk, sock->sk, asoc,
+				SCTP_SOCKET_UDP_HIGH_BANDWIDTH);
+	if (err) {
+		sock_release(sock);
+		sock = NULL;
+	}
 
 	*sockp = sock;
 
@@ -5620,12 +5946,13 @@
 		}
 	}
 
-	/* Get association, if assoc_id != 0 and the socket is a one
-	 * to many style socket, and an association was not found, then
-	 * the id was invalid.
+	/* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the
+	 * socket is a one to many style socket, and an association
+	 * was not found, then the id was invalid.
 	 */
 	asoc = sctp_id2assoc(sk, params.spp_assoc_id);
-	if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP)) {
+	if (!asoc && params.spp_assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP)) {
 		pr_debug("%s: failed no association\n", __func__);
 		return -EINVAL;
 	}
@@ -5754,19 +6081,19 @@
 	} else
 		return -EINVAL;
 
-	/* Get association, if sack_assoc_id != 0 and the socket is a one
-	 * to many style socket, and an association was not found, then
-	 * the id was invalid.
+	/* Get association, if sack_assoc_id != SCTP_FUTURE_ASSOC and the
+	 * socket is a one to many style socket, and an association
+	 * was not found, then the id was invalid.
 	 */
 	asoc = sctp_id2assoc(sk, params.sack_assoc_id);
-	if (!asoc && params.sack_assoc_id && sctp_style(sk, UDP))
+	if (!asoc && params.sack_assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
 		return -EINVAL;
 
 	if (asoc) {
 		/* Fetch association values. */
 		if (asoc->param_flags & SPP_SACKDELAY_ENABLE) {
-			params.sack_delay = jiffies_to_msecs(
-				asoc->sackdelay);
+			params.sack_delay = jiffies_to_msecs(asoc->sackdelay);
 			params.sack_freq = asoc->sackfreq;
 
 		} else {
@@ -6119,8 +6446,10 @@
 		return -EFAULT;
 
 	asoc = sctp_id2assoc(sk, info.sinfo_assoc_id);
-	if (!asoc && info.sinfo_assoc_id && sctp_style(sk, UDP))
+	if (!asoc && info.sinfo_assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
 		return -EINVAL;
+
 	if (asoc) {
 		info.sinfo_stream = asoc->default_stream;
 		info.sinfo_flags = asoc->default_flags;
@@ -6163,8 +6492,10 @@
 		return -EFAULT;
 
 	asoc = sctp_id2assoc(sk, info.snd_assoc_id);
-	if (!asoc && info.snd_assoc_id && sctp_style(sk, UDP))
+	if (!asoc && info.snd_assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
 		return -EINVAL;
+
 	if (asoc) {
 		info.snd_sid = asoc->default_stream;
 		info.snd_flags = asoc->default_flags;
@@ -6240,7 +6571,8 @@
 
 	asoc = sctp_id2assoc(sk, rtoinfo.srto_assoc_id);
 
-	if (!asoc && rtoinfo.srto_assoc_id && sctp_style(sk, UDP))
+	if (!asoc && rtoinfo.srto_assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
 		return -EINVAL;
 
 	/* Values corresponding to the specific association. */
@@ -6297,7 +6629,8 @@
 
 	asoc = sctp_id2assoc(sk, assocparams.sasoc_assoc_id);
 
-	if (!asoc && assocparams.sasoc_assoc_id && sctp_style(sk, UDP))
+	if (!asoc && assocparams.sasoc_assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
 		return -EINVAL;
 
 	/* Values correspoinding to the specific association */
@@ -6372,7 +6705,6 @@
 				   char __user *optval, int __user *optlen)
 {
 	struct sctp_assoc_value params;
-	struct sctp_sock *sp;
 	struct sctp_association *asoc;
 
 	if (len < sizeof(struct sctp_assoc_value))
@@ -6383,16 +6715,13 @@
 	if (copy_from_user(&params, optval, len))
 		return -EFAULT;
 
-	sp = sctp_sk(sk);
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
+		return -EINVAL;
 
-	if (params.assoc_id != 0) {
-		asoc = sctp_id2assoc(sk, params.assoc_id);
-		if (!asoc)
-			return -EINVAL;
-		params.assoc_value = asoc->default_rcv_context;
-	} else {
-		params.assoc_value = sp->default_rcv_context;
-	}
+	params.assoc_value = asoc ? asoc->default_rcv_context
+				  : sctp_sk(sk)->default_rcv_context;
 
 	if (put_user(len, optlen))
 		return -EFAULT;
@@ -6441,7 +6770,7 @@
 				    "Use of int in maxseg socket option.\n"
 				    "Use struct sctp_assoc_value instead\n",
 				    current->comm, task_pid_nr(current));
-		params.assoc_id = 0;
+		params.assoc_id = SCTP_FUTURE_ASSOC;
 	} else if (len >= sizeof(struct sctp_assoc_value)) {
 		len = sizeof(struct sctp_assoc_value);
 		if (copy_from_user(&params, optval, len))
@@ -6450,7 +6779,8 @@
 		return -EINVAL;
 
 	asoc = sctp_id2assoc(sk, params.assoc_id);
-	if (!asoc && params.assoc_id && sctp_style(sk, UDP))
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
 		return -EINVAL;
 
 	if (asoc)
@@ -6527,7 +6857,6 @@
 				    int __user *optlen)
 {
 	struct sctp_assoc_value params;
-	struct sctp_sock *sp;
 	struct sctp_association *asoc;
 
 	if (len == sizeof(int)) {
@@ -6536,7 +6865,7 @@
 				    "Use of int in max_burst socket option.\n"
 				    "Use struct sctp_assoc_value instead\n",
 				    current->comm, task_pid_nr(current));
-		params.assoc_id = 0;
+		params.assoc_id = SCTP_FUTURE_ASSOC;
 	} else if (len >= sizeof(struct sctp_assoc_value)) {
 		len = sizeof(struct sctp_assoc_value);
 		if (copy_from_user(&params, optval, len))
@@ -6544,15 +6873,12 @@
 	} else
 		return -EINVAL;
 
-	sp = sctp_sk(sk);
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
+		return -EINVAL;
 
-	if (params.assoc_id != 0) {
-		asoc = sctp_id2assoc(sk, params.assoc_id);
-		if (!asoc)
-			return -EINVAL;
-		params.assoc_value = asoc->max_burst;
-	} else
-		params.assoc_value = sp->max_burst;
+	params.assoc_value = asoc ? asoc->max_burst : sctp_sk(sk)->max_burst;
 
 	if (len == sizeof(int)) {
 		if (copy_to_user(optval, &params.assoc_value, len))
@@ -6609,9 +6935,6 @@
 	struct sctp_authkeyid val;
 	struct sctp_association *asoc;
 
-	if (!ep->auth_enable)
-		return -EACCES;
-
 	if (len < sizeof(struct sctp_authkeyid))
 		return -EINVAL;
 
@@ -6623,10 +6946,15 @@
 	if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP))
 		return -EINVAL;
 
-	if (asoc)
+	if (asoc) {
+		if (!asoc->peer.auth_capable)
+			return -EACCES;
 		val.scact_keynumber = asoc->active_key_id;
-	else
+	} else {
+		if (!ep->auth_enable)
+			return -EACCES;
 		val.scact_keynumber = ep->active_key_id;
+	}
 
 	if (put_user(len, optlen))
 		return -EFAULT;
@@ -6639,7 +6967,6 @@
 static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
 				    char __user *optval, int __user *optlen)
 {
-	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
 	struct sctp_authchunks __user *p = (void __user *)optval;
 	struct sctp_authchunks val;
 	struct sctp_association *asoc;
@@ -6647,9 +6974,6 @@
 	u32    num_chunks = 0;
 	char __user *to;
 
-	if (!ep->auth_enable)
-		return -EACCES;
-
 	if (len < sizeof(struct sctp_authchunks))
 		return -EINVAL;
 
@@ -6661,6 +6985,9 @@
 	if (!asoc)
 		return -EINVAL;
 
+	if (!asoc->peer.auth_capable)
+		return -EACCES;
+
 	ch = asoc->peer.peer_chunks;
 	if (!ch)
 		goto num;
@@ -6692,9 +7019,6 @@
 	u32    num_chunks = 0;
 	char __user *to;
 
-	if (!ep->auth_enable)
-		return -EACCES;
-
 	if (len < sizeof(struct sctp_authchunks))
 		return -EINVAL;
 
@@ -6703,14 +7027,19 @@
 
 	to = p->gauth_chunks;
 	asoc = sctp_id2assoc(sk, val.gauth_assoc_id);
-	if (!asoc && val.gauth_assoc_id && sctp_style(sk, UDP))
+	if (!asoc && val.gauth_assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
 		return -EINVAL;
 
-	if (asoc)
+	if (asoc) {
+		if (!asoc->peer.auth_capable)
+			return -EACCES;
 		ch = (struct sctp_chunks_param *)asoc->c.auth_chunks;
-	else
+	} else {
+		if (!ep->auth_enable)
+			return -EACCES;
 		ch = ep->auth_chunk_list;
-
+	}
 	if (!ch)
 		goto num;
 
@@ -6855,14 +7184,7 @@
 	if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval, len))
 		return -EFAULT;
 
-	if (sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) {
-		asoc = sctp_id2assoc(sk, val.spt_assoc_id);
-		if (!asoc)
-			return -ENOENT;
-
-		val.spt_pathpfthld = asoc->pf_retrans;
-		val.spt_pathmaxrxt = asoc->pathmaxrxt;
-	} else {
+	if (!sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) {
 		trans = sctp_addr_id2transport(sk, &val.spt_address,
 					       val.spt_assoc_id);
 		if (!trans)
@@ -6870,8 +7192,26 @@
 
 		val.spt_pathmaxrxt = trans->pathmaxrxt;
 		val.spt_pathpfthld = trans->pf_retrans;
+
+		goto out;
 	}
 
+	asoc = sctp_id2assoc(sk, val.spt_assoc_id);
+	if (!asoc && val.spt_assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
+		return -EINVAL;
+
+	if (asoc) {
+		val.spt_pathpfthld = asoc->pf_retrans;
+		val.spt_pathmaxrxt = asoc->pathmaxrxt;
+	} else {
+		struct sctp_sock *sp = sctp_sk(sk);
+
+		val.spt_pathpfthld = sp->pf_retrans;
+		val.spt_pathmaxrxt = sp->pathmaxrxt;
+	}
+
+out:
 	if (put_user(len, optlen) || copy_to_user(optval, &val, len))
 		return -EFAULT;
 
@@ -7000,17 +7340,15 @@
 		goto out;
 
 	asoc = sctp_id2assoc(sk, params.assoc_id);
-	if (asoc) {
-		params.assoc_value = asoc->prsctp_enable;
-	} else if (!params.assoc_id) {
-		struct sctp_sock *sp = sctp_sk(sk);
-
-		params.assoc_value = sp->ep->prsctp_enable;
-	} else {
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP)) {
 		retval = -EINVAL;
 		goto out;
 	}
 
+	params.assoc_value = asoc ? asoc->peer.prsctp_capable
+				  : sctp_sk(sk)->ep->prsctp_enable;
+
 	if (put_user(len, optlen))
 		goto out;
 
@@ -7041,17 +7379,20 @@
 		goto out;
 
 	asoc = sctp_id2assoc(sk, info.pr_assoc_id);
+	if (!asoc && info.pr_assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP)) {
+		retval = -EINVAL;
+		goto out;
+	}
+
 	if (asoc) {
 		info.pr_policy = SCTP_PR_POLICY(asoc->default_flags);
 		info.pr_value = asoc->default_timetolive;
-	} else if (!info.pr_assoc_id) {
+	} else {
 		struct sctp_sock *sp = sctp_sk(sk);
 
 		info.pr_policy = SCTP_PR_POLICY(sp->default_flags);
 		info.pr_value = sp->default_timetolive;
-	} else {
-		retval = -EINVAL;
-		goto out;
 	}
 
 	if (put_user(len, optlen))
@@ -7207,17 +7548,15 @@
 		goto out;
 
 	asoc = sctp_id2assoc(sk, params.assoc_id);
-	if (asoc) {
-		params.assoc_value = asoc->reconf_enable;
-	} else if (!params.assoc_id) {
-		struct sctp_sock *sp = sctp_sk(sk);
-
-		params.assoc_value = sp->ep->reconf_enable;
-	} else {
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP)) {
 		retval = -EINVAL;
 		goto out;
 	}
 
+	params.assoc_value = asoc ? asoc->peer.reconf_capable
+				  : sctp_sk(sk)->ep->reconf_enable;
+
 	if (put_user(len, optlen))
 		goto out;
 
@@ -7248,17 +7587,15 @@
 		goto out;
 
 	asoc = sctp_id2assoc(sk, params.assoc_id);
-	if (asoc) {
-		params.assoc_value = asoc->strreset_enable;
-	} else if (!params.assoc_id) {
-		struct sctp_sock *sp = sctp_sk(sk);
-
-		params.assoc_value = sp->ep->strreset_enable;
-	} else {
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP)) {
 		retval = -EINVAL;
 		goto out;
 	}
 
+	params.assoc_value = asoc ? asoc->strreset_enable
+				  : sctp_sk(sk)->ep->strreset_enable;
+
 	if (put_user(len, optlen))
 		goto out;
 
@@ -7289,12 +7626,14 @@
 		goto out;
 
 	asoc = sctp_id2assoc(sk, params.assoc_id);
-	if (!asoc) {
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP)) {
 		retval = -EINVAL;
 		goto out;
 	}
 
-	params.assoc_value = sctp_sched_get_sched(asoc);
+	params.assoc_value = asoc ? sctp_sched_get_sched(asoc)
+				  : sctp_sk(sk)->default_ss;
 
 	if (put_user(len, optlen))
 		goto out;
@@ -7368,17 +7707,15 @@
 		goto out;
 
 	asoc = sctp_id2assoc(sk, params.assoc_id);
-	if (asoc) {
-		params.assoc_value = asoc->intl_enable;
-	} else if (!params.assoc_id) {
-		struct sctp_sock *sp = sctp_sk(sk);
-
-		params.assoc_value = sp->strm_interleave;
-	} else {
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP)) {
 		retval = -EINVAL;
 		goto out;
 	}
 
+	params.assoc_value = asoc ? asoc->peer.intl_capable
+				  : sctp_sk(sk)->ep->intl_enable;
+
 	if (put_user(len, optlen))
 		goto out;
 
@@ -7411,6 +7748,158 @@
 	return 0;
 }
 
+static int sctp_getsockopt_event(struct sock *sk, int len, char __user *optval,
+				 int __user *optlen)
+{
+	struct sctp_association *asoc;
+	struct sctp_event param;
+	__u16 subscribe;
+
+	if (len < sizeof(param))
+		return -EINVAL;
+
+	len = sizeof(param);
+	if (copy_from_user(&param, optval, len))
+		return -EFAULT;
+
+	if (param.se_type < SCTP_SN_TYPE_BASE ||
+	    param.se_type > SCTP_SN_TYPE_MAX)
+		return -EINVAL;
+
+	asoc = sctp_id2assoc(sk, param.se_assoc_id);
+	if (!asoc && param.se_assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
+		return -EINVAL;
+
+	subscribe = asoc ? asoc->subscribe : sctp_sk(sk)->subscribe;
+	param.se_on = sctp_ulpevent_type_enabled(subscribe, param.se_type);
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+
+	if (copy_to_user(optval, &param, len))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int sctp_getsockopt_asconf_supported(struct sock *sk, int len,
+					    char __user *optval,
+					    int __user *optlen)
+{
+	struct sctp_assoc_value params;
+	struct sctp_association *asoc;
+	int retval = -EFAULT;
+
+	if (len < sizeof(params)) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	len = sizeof(params);
+	if (copy_from_user(&params, optval, len))
+		goto out;
+
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP)) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	params.assoc_value = asoc ? asoc->peer.asconf_capable
+				  : sctp_sk(sk)->ep->asconf_enable;
+
+	if (put_user(len, optlen))
+		goto out;
+
+	if (copy_to_user(optval, &params, len))
+		goto out;
+
+	retval = 0;
+
+out:
+	return retval;
+}
+
+static int sctp_getsockopt_auth_supported(struct sock *sk, int len,
+					  char __user *optval,
+					  int __user *optlen)
+{
+	struct sctp_assoc_value params;
+	struct sctp_association *asoc;
+	int retval = -EFAULT;
+
+	if (len < sizeof(params)) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	len = sizeof(params);
+	if (copy_from_user(&params, optval, len))
+		goto out;
+
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP)) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	params.assoc_value = asoc ? asoc->peer.auth_capable
+				  : sctp_sk(sk)->ep->auth_enable;
+
+	if (put_user(len, optlen))
+		goto out;
+
+	if (copy_to_user(optval, &params, len))
+		goto out;
+
+	retval = 0;
+
+out:
+	return retval;
+}
+
+static int sctp_getsockopt_ecn_supported(struct sock *sk, int len,
+					 char __user *optval,
+					 int __user *optlen)
+{
+	struct sctp_assoc_value params;
+	struct sctp_association *asoc;
+	int retval = -EFAULT;
+
+	if (len < sizeof(params)) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	len = sizeof(params);
+	if (copy_from_user(&params, optval, len))
+		goto out;
+
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP)) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	params.assoc_value = asoc ? asoc->peer.ecn_capable
+				  : sctp_sk(sk)->ep->ecn_enable;
+
+	if (put_user(len, optlen))
+		goto out;
+
+	if (copy_to_user(optval, &params, len))
+		goto out;
+
+	retval = 0;
+
+out:
+	return retval;
+}
+
 static int sctp_getsockopt(struct sock *sk, int level, int optname,
 			   char __user *optval, int __user *optlen)
 {
@@ -7609,6 +8098,20 @@
 	case SCTP_REUSE_PORT:
 		retval = sctp_getsockopt_reuse_port(sk, len, optval, optlen);
 		break;
+	case SCTP_EVENT:
+		retval = sctp_getsockopt_event(sk, len, optval, optlen);
+		break;
+	case SCTP_ASCONF_SUPPORTED:
+		retval = sctp_getsockopt_asconf_supported(sk, len, optval,
+							  optlen);
+		break;
+	case SCTP_AUTH_SUPPORTED:
+		retval = sctp_getsockopt_auth_supported(sk, len, optval,
+							optlen);
+		break;
+	case SCTP_ECN_SUPPORTED:
+		retval = sctp_getsockopt_ecn_supported(sk, len, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
@@ -7644,10 +8147,12 @@
 static struct sctp_bind_bucket *sctp_bucket_create(
 	struct sctp_bind_hashbucket *head, struct net *, unsigned short snum);
 
-static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
+static int sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
 {
-	bool reuse = (sk->sk_reuse || sctp_sk(sk)->reuse);
+	struct sctp_sock *sp = sctp_sk(sk);
+	bool reuse = (sk->sk_reuse || sp->reuse);
 	struct sctp_bind_hashbucket *head; /* hash list */
+	kuid_t uid = sock_i_uid(sk);
 	struct sctp_bind_bucket *pp;
 	unsigned short snum;
 	int ret;
@@ -7723,7 +8228,10 @@
 
 		pr_debug("%s: found a possible match\n", __func__);
 
-		if (pp->fastreuse && reuse && sk->sk_state != SCTP_SS_LISTENING)
+		if ((pp->fastreuse && reuse &&
+		     sk->sk_state != SCTP_SS_LISTENING) ||
+		    (pp->fastreuseport && sk->sk_reuseport &&
+		     uid_eq(pp->fastuid, uid)))
 			goto success;
 
 		/* Run through the list of sockets bound to the port
@@ -7737,17 +8245,19 @@
 		 * in an endpoint.
 		 */
 		sk_for_each_bound(sk2, &pp->owner) {
-			struct sctp_endpoint *ep2;
-			ep2 = sctp_sk(sk2)->ep;
+			struct sctp_sock *sp2 = sctp_sk(sk2);
+			struct sctp_endpoint *ep2 = sp2->ep;
 
 			if (sk == sk2 ||
-			    (reuse && (sk2->sk_reuse || sctp_sk(sk2)->reuse) &&
-			     sk2->sk_state != SCTP_SS_LISTENING))
+			    (reuse && (sk2->sk_reuse || sp2->reuse) &&
+			     sk2->sk_state != SCTP_SS_LISTENING) ||
+			    (sk->sk_reuseport && sk2->sk_reuseport &&
+			     uid_eq(uid, sock_i_uid(sk2))))
 				continue;
 
-			if (sctp_bind_addr_conflict(&ep2->base.bind_addr, addr,
-						 sctp_sk(sk2), sctp_sk(sk))) {
-				ret = (long)sk2;
+			if (sctp_bind_addr_conflict(&ep2->base.bind_addr,
+						    addr, sp2, sp)) {
+				ret = 1;
 				goto fail_unlock;
 			}
 		}
@@ -7769,19 +8279,32 @@
 			pp->fastreuse = 1;
 		else
 			pp->fastreuse = 0;
-	} else if (pp->fastreuse &&
-		   (!reuse || sk->sk_state == SCTP_SS_LISTENING))
-		pp->fastreuse = 0;
+
+		if (sk->sk_reuseport) {
+			pp->fastreuseport = 1;
+			pp->fastuid = uid;
+		} else {
+			pp->fastreuseport = 0;
+		}
+	} else {
+		if (pp->fastreuse &&
+		    (!reuse || sk->sk_state == SCTP_SS_LISTENING))
+			pp->fastreuse = 0;
+
+		if (pp->fastreuseport &&
+		    (!sk->sk_reuseport || !uid_eq(pp->fastuid, uid)))
+			pp->fastreuseport = 0;
+	}
 
 	/* We are set, so fill up all the data in the hash table
 	 * entry, tie the socket list information with the rest of the
 	 * sockets FIXME: Blurry, NPI (ipg).
 	 */
 success:
-	if (!sctp_sk(sk)->bind_hash) {
+	if (!sp->bind_hash) {
 		inet_sk(sk)->inet_num = snum;
 		sk_add_bind_node(sk, &pp->owner);
-		sctp_sk(sk)->bind_hash = pp;
+		sp->bind_hash = pp;
 	}
 	ret = 0;
 
@@ -7806,7 +8329,7 @@
 	addr.v4.sin_port = htons(snum);
 
 	/* Note: sk->sk_num gets filled in if ephemeral port request. */
-	return !!sctp_get_port_local(sk, &addr);
+	return sctp_get_port_local(sk, &addr);
 }
 
 /*
@@ -7854,8 +8377,7 @@
 	}
 
 	sk->sk_max_ack_backlog = backlog;
-	sctp_hash_endpoint(ep);
-	return 0;
+	return sctp_hash_endpoint(ep);
 }
 
 /*
@@ -7954,7 +8476,7 @@
 	mask = 0;
 
 	/* Is there any exceptional events?  */
-	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+	if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
 		mask |= EPOLLERR |
 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
@@ -7963,7 +8485,7 @@
 		mask |= EPOLLHUP;
 
 	/* Is it readable?  Reconsider this code with TCP-style support.  */
-	if (!skb_queue_empty(&sk->sk_receive_queue))
+	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
 		mask |= EPOLLIN | EPOLLRDNORM;
 
 	/* The association is either gone or not ready.  */
@@ -8349,7 +8871,7 @@
 		if (sk_can_busy_loop(sk)) {
 			sk_busy_loop(sk, noblock);
 
-			if (!skb_queue_empty(&sk->sk_receive_queue))
+			if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
 				continue;
 		}
 
@@ -8447,17 +8969,11 @@
 	struct sctp_association *asoc = chunk->asoc;
 	struct sock *sk = asoc->base.sk;
 
-	asoc->sndbuf_used -= SCTP_DATA_SNDSIZE(chunk) +
-				sizeof(struct sk_buff) +
-				sizeof(struct sctp_chunk);
-
-	WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc));
-
-	/*
-	 * This undoes what is done via sctp_set_owner_w and sk_mem_charge
-	 */
-	sk->sk_wmem_queued   -= skb->truesize;
 	sk_mem_uncharge(sk, skb->truesize);
+	sk->sk_wmem_queued -= skb->truesize + sizeof(struct sctp_chunk);
+	asoc->sndbuf_used -= skb->truesize + sizeof(struct sctp_chunk);
+	WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk),
+				      &sk->sk_wmem_alloc));
 
 	if (chunk->shkey) {
 		struct sctp_shared_key *shkey = chunk->shkey;
@@ -8531,7 +9047,10 @@
 			goto do_error;
 		if (signal_pending(current))
 			goto do_interrupted;
-		if (msg_len <= sctp_wspace(asoc))
+		if (sk_under_memory_pressure(sk))
+			sk_mem_reclaim(sk);
+		if ((int)msg_len <= sctp_wspace(asoc) &&
+		    sk_wmem_schedule(sk, msg_len))
 			break;
 
 		/* Let another process have a go.  Since we are going
@@ -8606,14 +9125,9 @@
  * UDP-style sockets or TCP-style sockets, this code should work.
  *  - Daisy
  */
-static int sctp_writeable(struct sock *sk)
+static bool sctp_writeable(struct sock *sk)
 {
-	int amt = 0;
-
-	amt = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
-	if (amt < 0)
-		amt = 0;
-	return amt;
+	return sk->sk_sndbuf > sk->sk_wmem_queued;
 }
 
 /* Wait for an association to go into ESTABLISHED state. If timeout is 0,
@@ -8792,7 +9306,7 @@
 	newinet->inet_rcv_saddr = inet->inet_rcv_saddr;
 	newinet->inet_dport = htons(asoc->peer.port);
 	newinet->pmtudisc = inet->pmtudisc;
-	newinet->inet_id = asoc->next_tsn ^ jiffies;
+	newinet->inet_id = prandom_u32();
 
 	newinet->uc_ttl = inet->uc_ttl;
 	newinet->mc_loop = 1;
@@ -8814,7 +9328,7 @@
 {
 	int ancestor_size = sizeof(struct inet_sock) +
 			    sizeof(struct sctp_sock) -
-			    offsetof(struct sctp_sock, auto_asconf_list);
+			    offsetof(struct sctp_sock, pd_lobby);
 
 	if (sk_from->sk_family == PF_INET6)
 		ancestor_size += sizeof(struct ipv6_pinfo);
@@ -8825,9 +9339,9 @@
 /* Populate the fields of the newsk from the oldsk and migrate the assoc
  * and its messages to the newsk.
  */
-static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
-			      struct sctp_association *assoc,
-			      enum sctp_socket_type type)
+static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
+			     struct sctp_association *assoc,
+			     enum sctp_socket_type type)
 {
 	struct sctp_sock *oldsp = sctp_sk(oldsk);
 	struct sctp_sock *newsp = sctp_sk(newsk);
@@ -8836,6 +9350,7 @@
 	struct sk_buff *skb, *tmp;
 	struct sctp_ulpevent *event;
 	struct sctp_bind_hashbucket *head;
+	int err;
 
 	/* Migrate socket buffer sizes and all the socket level options to the
 	 * new socket.
@@ -8864,8 +9379,20 @@
 	/* Copy the bind_addr list from the original endpoint to the new
 	 * endpoint so that we can handle restarts properly
 	 */
-	sctp_bind_addr_dup(&newsp->ep->base.bind_addr,
-				&oldsp->ep->base.bind_addr, GFP_KERNEL);
+	err = sctp_bind_addr_dup(&newsp->ep->base.bind_addr,
+				 &oldsp->ep->base.bind_addr, GFP_KERNEL);
+	if (err)
+		return err;
+
+	/* New ep's auth_hmacs should be set if old ep's is set, in case
+	 * that net->sctp.auth_enable has been changed to 0 by users and
+	 * new ep's auth_hmacs couldn't be set in sctp_endpoint_init().
+	 */
+	if (oldsp->ep->auth_hmacs) {
+		err = sctp_auth_init_hmacs(newsp->ep, GFP_KERNEL);
+		if (err)
+			return err;
+	}
 
 	/* Move any messages in the old socket's receive queue that are for the
 	 * peeled off association to the new socket's receive queue.
@@ -8885,7 +9412,6 @@
 	 * 2) Peeling off partial delivery; keep pd_lobby in new pd_lobby.
 	 * 3) Peeling off non-partial delivery; move pd_lobby to receive_queue.
 	 */
-	skb_queue_head_init(&newsp->pd_lobby);
 	atomic_set(&sctp_sk(newsk)->pd_mode, assoc->ulpq.pd_mode);
 
 	if (atomic_read(&sctp_sk(oldsk)->pd_mode)) {
@@ -8950,6 +9476,8 @@
 	}
 
 	release_sock(newsk);
+
+	return 0;
 }
 
 
@@ -8972,7 +9500,7 @@
 	.backlog_rcv =	sctp_backlog_rcv,
 	.hash        =	sctp_hash,
 	.unhash      =	sctp_unhash,
-	.get_port    =	sctp_get_port,
+	.no_autobind =	true,
 	.obj_size    =  sizeof(struct sctp_sock),
 	.useroffset  =  offsetof(struct sctp_sock, subscribe),
 	.usersize    =  offsetof(struct sctp_sock, initmsg) -
@@ -9014,7 +9542,7 @@
 	.backlog_rcv	= sctp_backlog_rcv,
 	.hash		= sctp_hash,
 	.unhash		= sctp_unhash,
-	.get_port	= sctp_get_port,
+	.no_autobind	= true,
 	.obj_size	= sizeof(struct sctp6_sock),
 	.useroffset	= offsetof(struct sctp6_sock, sctp.subscribe),
 	.usersize	= offsetof(struct sctp6_sock, sctp.initmsg) -

diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 3892e76..e83cdaa 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
@@ -8,22 +9,6 @@
  *
  * This file contains sctp stream maniuplation primitives and helpers.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -37,53 +22,6 @@
 #include <net/sctp/sm.h>
 #include <net/sctp/stream_sched.h>
 
-static struct flex_array *fa_alloc(size_t elem_size, size_t elem_count,
-				   gfp_t gfp)
-{
-	struct flex_array *result;
-	int err;
-
-	result = flex_array_alloc(elem_size, elem_count, gfp);
-	if (result) {
-		err = flex_array_prealloc(result, 0, elem_count, gfp);
-		if (err) {
-			flex_array_free(result);
-			result = NULL;
-		}
-	}
-
-	return result;
-}
-
-static void fa_free(struct flex_array *fa)
-{
-	if (fa)
-		flex_array_free(fa);
-}
-
-static void fa_copy(struct flex_array *fa, struct flex_array *from,
-		    size_t index, size_t count)
-{
-	void *elem;
-
-	while (count--) {
-		elem = flex_array_get(from, index);
-		flex_array_put(fa, index, elem, 0);
-		index++;
-	}
-}
-
-static void fa_zero(struct flex_array *fa, size_t index, size_t count)
-{
-	void *elem;
-
-	while (count--) {
-		elem = flex_array_get(fa, index);
-		memset(elem, 0, fa->element_size);
-		index++;
-	}
-}
-
 /* Migrates chunks from stream queues to new stream queues if needed,
  * but not across associations. Also, removes those chunks to streams
  * higher than the new max.
@@ -131,53 +69,41 @@
 		}
 	}
 
-	for (i = outcnt; i < stream->outcnt; i++)
+	for (i = outcnt; i < stream->outcnt; i++) {
 		kfree(SCTP_SO(stream, i)->ext);
+		SCTP_SO(stream, i)->ext = NULL;
+	}
 }
 
 static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,
 				 gfp_t gfp)
 {
-	struct flex_array *out;
-	size_t elem_size = sizeof(struct sctp_stream_out);
+	int ret;
 
-	out = fa_alloc(elem_size, outcnt, gfp);
-	if (!out)
-		return -ENOMEM;
+	if (outcnt <= stream->outcnt)
+		return 0;
 
-	if (stream->out) {
-		fa_copy(out, stream->out, 0, min(outcnt, stream->outcnt));
-		fa_free(stream->out);
-	}
+	ret = genradix_prealloc(&stream->out, outcnt, gfp);
+	if (ret)
+		return ret;
 
-	if (outcnt > stream->outcnt)
-		fa_zero(out, stream->outcnt, (outcnt - stream->outcnt));
-
-	stream->out = out;
-
+	stream->outcnt = outcnt;
 	return 0;
 }
 
 static int sctp_stream_alloc_in(struct sctp_stream *stream, __u16 incnt,
 				gfp_t gfp)
 {
-	struct flex_array *in;
-	size_t elem_size = sizeof(struct sctp_stream_in);
+	int ret;
 
-	in = fa_alloc(elem_size, incnt, gfp);
-	if (!in)
-		return -ENOMEM;
+	if (incnt <= stream->incnt)
+		return 0;
 
-	if (stream->in) {
-		fa_copy(in, stream->in, 0, min(incnt, stream->incnt));
-		fa_free(stream->in);
-	}
+	ret = genradix_prealloc(&stream->in, incnt, gfp);
+	if (ret)
+		return ret;
 
-	if (incnt > stream->incnt)
-		fa_zero(in, stream->incnt, (incnt - stream->incnt));
-
-	stream->in = in;
-
+	stream->incnt = incnt;
 	return 0;
 }
 
@@ -204,12 +130,9 @@
 	if (ret)
 		goto out;
 
-	stream->outcnt = outcnt;
 	for (i = 0; i < stream->outcnt; i++)
 		SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
 
-	sched->init(stream);
-
 in:
 	sctp_stream_interleave_init(stream);
 	if (!incnt)
@@ -218,14 +141,11 @@
 	ret = sctp_stream_alloc_in(stream, incnt, gfp);
 	if (ret) {
 		sched->free(stream);
-		fa_free(stream->out);
-		stream->out = NULL;
+		genradix_free(&stream->out);
 		stream->outcnt = 0;
 		goto out;
 	}
 
-	stream->incnt = incnt;
-
 out:
 	return ret;
 }
@@ -233,13 +153,20 @@
 int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid)
 {
 	struct sctp_stream_out_ext *soute;
+	int ret;
 
 	soute = kzalloc(sizeof(*soute), GFP_KERNEL);
 	if (!soute)
 		return -ENOMEM;
 	SCTP_SO(stream, sid)->ext = soute;
 
-	return sctp_sched_init_sid(stream, sid, GFP_KERNEL);
+	ret = sctp_sched_init_sid(stream, sid, GFP_KERNEL);
+	if (ret) {
+		kfree(SCTP_SO(stream, sid)->ext);
+		SCTP_SO(stream, sid)->ext = NULL;
+	}
+
+	return ret;
 }
 
 void sctp_stream_free(struct sctp_stream *stream)
@@ -250,8 +177,8 @@
 	sched->free(stream);
 	for (i = 0; i < stream->outcnt; i++)
 		kfree(SCTP_SO(stream, i)->ext);
-	fa_free(stream->out);
-	fa_free(stream->in);
+	genradix_free(&stream->out);
+	genradix_free(&stream->in);
 }
 
 void sctp_stream_clear(struct sctp_stream *stream)
@@ -282,8 +209,8 @@
 
 	sched->sched_all(stream);
 
-	new->out = NULL;
-	new->in  = NULL;
+	new->out.tree.root = NULL;
+	new->in.tree.root  = NULL;
 	new->outcnt = 0;
 	new->incnt  = 0;
 }
@@ -389,6 +316,7 @@
 		nstr_list[i] = htons(str_list[i]);
 
 	if (out && !sctp_stream_outq_is_empty(stream, str_nums, nstr_list)) {
+		kfree(nstr_list);
 		retval = -EAGAIN;
 		goto out;
 	}
@@ -535,8 +463,6 @@
 		goto out;
 	}
 
-	stream->outcnt = outcnt;
-
 	asoc->strreset_outstanding = !!out + !!in;
 
 out:
@@ -585,9 +511,9 @@
 	struct sctp_strreset_outreq *outreq = param.v;
 	struct sctp_stream *stream = &asoc->stream;
 	__u32 result = SCTP_STRRESET_DENIED;
-	__u16 i, nums, flags = 0;
 	__be16 *str_p = NULL;
 	__u32 request_seq;
+	__u16 i, nums;
 
 	request_seq = ntohl(outreq->request_seq);
 
@@ -615,6 +541,15 @@
 	if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_STREAM_REQ))
 		goto out;
 
+	nums = (ntohs(param.p->length) - sizeof(*outreq)) / sizeof(__u16);
+	str_p = outreq->list_of_streams;
+	for (i = 0; i < nums; i++) {
+		if (ntohs(str_p[i]) >= stream->incnt) {
+			result = SCTP_STRRESET_ERR_WRONG_SSN;
+			goto out;
+		}
+	}
+
 	if (asoc->strreset_chunk) {
 		if (!sctp_chunk_lookup_strreset_param(
 				asoc, outreq->response_seq,
@@ -637,32 +572,19 @@
 			sctp_chunk_put(asoc->strreset_chunk);
 			asoc->strreset_chunk = NULL;
 		}
-
-		flags = SCTP_STREAM_RESET_INCOMING_SSN;
 	}
 
-	nums = (ntohs(param.p->length) - sizeof(*outreq)) / sizeof(__u16);
-	if (nums) {
-		str_p = outreq->list_of_streams;
-		for (i = 0; i < nums; i++) {
-			if (ntohs(str_p[i]) >= stream->incnt) {
-				result = SCTP_STRRESET_ERR_WRONG_SSN;
-				goto out;
-			}
-		}
-
+	if (nums)
 		for (i = 0; i < nums; i++)
 			SCTP_SI(stream, ntohs(str_p[i]))->mid = 0;
-	} else {
+	else
 		for (i = 0; i < stream->incnt; i++)
 			SCTP_SI(stream, i)->mid = 0;
-	}
 
 	result = SCTP_STRRESET_PERFORMED;
 
 	*evp = sctp_ulpevent_make_stream_reset_event(asoc,
-		flags | SCTP_STREAM_RESET_OUTGOING_SSN, nums, str_p,
-		GFP_ATOMIC);
+		SCTP_STREAM_RESET_INCOMING_SSN, nums, str_p, GFP_ATOMIC);
 
 out:
 	sctp_update_strreset_result(asoc, result);
@@ -738,9 +660,6 @@
 
 	result = SCTP_STRRESET_PERFORMED;
 
-	*evp = sctp_ulpevent_make_stream_reset_event(asoc,
-		SCTP_STREAM_RESET_INCOMING_SSN, nums, str_p, GFP_ATOMIC);
-
 out:
 	sctp_update_strreset_result(asoc, result);
 err:
@@ -873,6 +792,14 @@
 	if (!(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ))
 		goto out;
 
+	in = ntohs(addstrm->number_of_streams);
+	incnt = stream->incnt + in;
+	if (!in || incnt > SCTP_MAX_STREAM)
+		goto out;
+
+	if (sctp_stream_alloc_in(stream, incnt, GFP_ATOMIC))
+		goto out;
+
 	if (asoc->strreset_chunk) {
 		if (!sctp_chunk_lookup_strreset_param(
 			asoc, 0, SCTP_PARAM_RESET_ADD_IN_STREAMS)) {
@@ -896,14 +823,6 @@
 		}
 	}
 
-	in = ntohs(addstrm->number_of_streams);
-	incnt = stream->incnt + in;
-	if (!in || incnt > SCTP_MAX_STREAM)
-		goto out;
-
-	if (sctp_stream_alloc_in(stream, incnt, GFP_ATOMIC))
-		goto out;
-
 	stream->incnt = incnt;
 
 	result = SCTP_STRRESET_PERFORMED;
@@ -973,9 +892,6 @@
 
 	result = SCTP_STRRESET_PERFORMED;
 
-	*evp = sctp_ulpevent_make_stream_change_event(asoc,
-		0, 0, ntohs(addstrm->number_of_streams), GFP_ATOMIC);
-
 out:
 	sctp_update_strreset_result(asoc, result);
 err:
@@ -1036,10 +952,10 @@
 					sout->mid_uo = 0;
 				}
 			}
-
-			flags = SCTP_STREAM_RESET_OUTGOING_SSN;
 		}
 
+		flags |= SCTP_STREAM_RESET_OUTGOING_SSN;
+
 		for (i = 0; i < stream->outcnt; i++)
 			SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
 
@@ -1058,6 +974,8 @@
 		nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) /
 		       sizeof(__u16);
 
+		flags |= SCTP_STREAM_RESET_INCOMING_SSN;
+
 		*evp = sctp_ulpevent_make_stream_reset_event(asoc, flags,
 			nums, str_p, GFP_ATOMIC);
 	} else if (req->type == SCTP_PARAM_RESET_TSN_REQUEST) {

diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c
index 0a78cdf..40c40be 100644
--- a/net/sctp/stream_interleave.c
+++ b/net/sctp/stream_interleave.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright Red Hat Inc. 2017
  *
@@ -6,22 +7,6 @@
  * These functions implement sctp stream message interleaving, mostly
  * including I-DATA and I-FORWARD-TSN chunks process.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email addresched(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -101,7 +86,7 @@
 
 static bool sctp_validate_data(struct sctp_chunk *chunk)
 {
-	const struct sctp_stream *stream;
+	struct sctp_stream *stream;
 	__u16 sid, ssn;
 
 	if (chunk->chunk_hdr->type != SCTP_CID_DATA)
@@ -140,7 +125,7 @@
 				  struct sctp_ulpevent *event)
 {
 	struct sctp_ulpevent *cevent;
-	struct sk_buff *pos;
+	struct sk_buff *pos, *loc;
 
 	pos = skb_peek_tail(&ulpq->reasm);
 	if (!pos) {
@@ -166,23 +151,30 @@
 		return;
 	}
 
+	loc = NULL;
 	skb_queue_walk(&ulpq->reasm, pos) {
 		cevent = sctp_skb2event(pos);
 
 		if (event->stream < cevent->stream ||
 		    (event->stream == cevent->stream &&
-		     MID_lt(event->mid, cevent->mid)))
+		     MID_lt(event->mid, cevent->mid))) {
+			loc = pos;
 			break;
-
+		}
 		if (event->stream == cevent->stream &&
 		    event->mid == cevent->mid &&
 		    !(cevent->msg_flags & SCTP_DATA_FIRST_FRAG) &&
 		    (event->msg_flags & SCTP_DATA_FIRST_FRAG ||
-		     event->fsn < cevent->fsn))
+		     event->fsn < cevent->fsn)) {
+			loc = pos;
 			break;
+		}
 	}
 
-	__skb_queue_before(&ulpq->reasm, pos, sctp_event2skb(event));
+	if (!loc)
+		__skb_queue_tail(&ulpq->reasm, sctp_event2skb(event));
+	else
+		__skb_queue_before(&ulpq->reasm, loc, sctp_event2skb(event));
 }
 
 static struct sctp_ulpevent *sctp_intl_retrieve_partial(
@@ -383,7 +375,7 @@
 				    struct sctp_ulpevent *event)
 {
 	struct sctp_ulpevent *cevent;
-	struct sk_buff *pos;
+	struct sk_buff *pos, *loc;
 
 	pos = skb_peek_tail(&ulpq->lobby);
 	if (!pos) {
@@ -403,18 +395,25 @@
 		return;
 	}
 
+	loc = NULL;
 	skb_queue_walk(&ulpq->lobby, pos) {
 		cevent = (struct sctp_ulpevent *)pos->cb;
 
-		if (cevent->stream > event->stream)
+		if (cevent->stream > event->stream) {
+			loc = pos;
 			break;
-
+		}
 		if (cevent->stream == event->stream &&
-		    MID_lt(event->mid, cevent->mid))
+		    MID_lt(event->mid, cevent->mid)) {
+			loc = pos;
 			break;
+		}
 	}
 
-	__skb_queue_before(&ulpq->lobby, pos, sctp_event2skb(event));
+	if (!loc)
+		__skb_queue_tail(&ulpq->lobby, sctp_event2skb(event));
+	else
+		__skb_queue_before(&ulpq->lobby, loc, sctp_event2skb(event));
 }
 
 static void sctp_intl_retrieve_ordered(struct sctp_ulpq *ulpq,
@@ -470,14 +469,15 @@
 }
 
 static int sctp_enqueue_event(struct sctp_ulpq *ulpq,
-			      struct sctp_ulpevent *event)
+			      struct sk_buff_head *skb_list)
 {
-	struct sk_buff *skb = sctp_event2skb(event);
 	struct sock *sk = ulpq->asoc->base.sk;
 	struct sctp_sock *sp = sctp_sk(sk);
-	struct sk_buff_head *skb_list;
+	struct sctp_ulpevent *event;
+	struct sk_buff *skb;
 
-	skb_list = (struct sk_buff_head *)skb->prev;
+	skb = __skb_peek(skb_list);
+	event = sctp_skb2event(skb);
 
 	if (sk->sk_shutdown & RCV_SHUTDOWN &&
 	    (sk->sk_shutdown & SEND_SHUTDOWN ||
@@ -489,7 +489,7 @@
 		sk_incoming_cpu_update(sk);
 	}
 
-	if (!sctp_ulpevent_is_enabled(event, &sp->subscribe))
+	if (!sctp_ulpevent_is_enabled(event, ulpq->asoc->subscribe))
 		goto out_free;
 
 	if (skb_list)
@@ -844,19 +844,24 @@
 
 	if (!(event->msg_flags & SCTP_DATA_UNORDERED)) {
 		event = sctp_intl_reasm(ulpq, event);
-		if (event && event->msg_flags & MSG_EOR) {
+		if (event) {
 			skb_queue_head_init(&temp);
 			__skb_queue_tail(&temp, sctp_event2skb(event));
 
-			event = sctp_intl_order(ulpq, event);
+			if (event->msg_flags & MSG_EOR)
+				event = sctp_intl_order(ulpq, event);
 		}
 	} else {
 		event = sctp_intl_reasm_uo(ulpq, event);
+		if (event) {
+			skb_queue_head_init(&temp);
+			__skb_queue_tail(&temp, sctp_event2skb(event));
+		}
 	}
 
 	if (event) {
 		event_eor = (event->msg_flags & MSG_EOR) ? 1 : 0;
-		sctp_enqueue_event(ulpq, event);
+		sctp_enqueue_event(ulpq, &temp);
 	}
 
 	return event_eor;
@@ -930,20 +935,27 @@
 static void sctp_intl_start_pd(struct sctp_ulpq *ulpq, gfp_t gfp)
 {
 	struct sctp_ulpevent *event;
+	struct sk_buff_head temp;
 
 	if (!skb_queue_empty(&ulpq->reasm)) {
 		do {
 			event = sctp_intl_retrieve_first(ulpq);
-			if (event)
-				sctp_enqueue_event(ulpq, event);
+			if (event) {
+				skb_queue_head_init(&temp);
+				__skb_queue_tail(&temp, sctp_event2skb(event));
+				sctp_enqueue_event(ulpq, &temp);
+			}
 		} while (event);
 	}
 
 	if (!skb_queue_empty(&ulpq->reasm_uo)) {
 		do {
 			event = sctp_intl_retrieve_first_uo(ulpq);
-			if (event)
-				sctp_enqueue_event(ulpq, event);
+			if (event) {
+				skb_queue_head_init(&temp);
+				__skb_queue_tail(&temp, sctp_event2skb(event));
+				sctp_enqueue_event(ulpq, &temp);
+			}
 		} while (event);
 	}
 }
@@ -980,17 +992,19 @@
 	struct sock *sk = ulpq->asoc->base.sk;
 	struct sctp_ulpevent *ev = NULL;
 
-	if (!sctp_ulpevent_type_enabled(SCTP_PARTIAL_DELIVERY_EVENT,
-					&sctp_sk(sk)->subscribe))
+	if (!sctp_ulpevent_type_enabled(ulpq->asoc->subscribe,
+					SCTP_PARTIAL_DELIVERY_EVENT))
 		return;
 
 	ev = sctp_ulpevent_make_pdapi(ulpq->asoc, SCTP_PARTIAL_DELIVERY_ABORTED,
 				      sid, mid, flags, gfp);
 	if (ev) {
+		struct sctp_sock *sp = sctp_sk(sk);
+
 		__skb_queue_tail(&sk->sk_receive_queue, sctp_event2skb(ev));
 
-		if (!sctp_sk(sk)->data_ready_signalled) {
-			sctp_sk(sk)->data_ready_signalled = 1;
+		if (!sp->data_ready_signalled) {
+			sp->data_ready_signalled = 1;
 			sk->sk_data_ready(sk);
 		}
 	}
@@ -1043,7 +1057,7 @@
 
 	if (event) {
 		sctp_intl_retrieve_ordered(ulpq, event);
-		sctp_enqueue_event(ulpq, event);
+		sctp_enqueue_event(ulpq, &temp);
 	}
 }
 
@@ -1282,6 +1296,15 @@
 			       ntohl(skip->mid), skip->flags);
 }
 
+static int do_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
+{
+	struct sk_buff_head temp;
+
+	skb_queue_head_init(&temp);
+	__skb_queue_tail(&temp, sctp_event2skb(event));
+	return sctp_ulpq_tail_event(ulpq, &temp);
+}
+
 static struct sctp_stream_interleave sctp_stream_interleave_0 = {
 	.data_chunk_len		= sizeof(struct sctp_data_chunk),
 	.ftsn_chunk_len		= sizeof(struct sctp_fwdtsn_chunk),
@@ -1290,7 +1313,7 @@
 	.assign_number		= sctp_chunk_assign_ssn,
 	.validate_data		= sctp_validate_data,
 	.ulpevent_data		= sctp_ulpq_tail_data,
-	.enqueue_event		= sctp_ulpq_tail_event,
+	.enqueue_event		= do_ulpq_tail_event,
 	.renege_events		= sctp_ulpq_renege,
 	.start_pd		= sctp_ulpq_partial_delivery,
 	.abort_pd		= sctp_ulpq_abort_pd,
@@ -1301,6 +1324,16 @@
 	.handle_ftsn		= sctp_handle_fwdtsn,
 };
 
+static int do_sctp_enqueue_event(struct sctp_ulpq *ulpq,
+				 struct sctp_ulpevent *event)
+{
+	struct sk_buff_head temp;
+
+	skb_queue_head_init(&temp);
+	__skb_queue_tail(&temp, sctp_event2skb(event));
+	return sctp_enqueue_event(ulpq, &temp);
+}
+
 static struct sctp_stream_interleave sctp_stream_interleave_1 = {
 	.data_chunk_len		= sizeof(struct sctp_idata_chunk),
 	.ftsn_chunk_len		= sizeof(struct sctp_ifwdtsn_chunk),
@@ -1309,7 +1342,7 @@
 	.assign_number		= sctp_chunk_assign_mid,
 	.validate_data		= sctp_validate_idata,
 	.ulpevent_data		= sctp_ulpevent_idata,
-	.enqueue_event		= sctp_enqueue_event,
+	.enqueue_event		= do_sctp_enqueue_event,
 	.renege_events		= sctp_renege_events,
 	.start_pd		= sctp_intl_start_pd,
 	.abort_pd		= sctp_intl_abort_pd,
@@ -1325,6 +1358,6 @@
 	struct sctp_association *asoc;
 
 	asoc = container_of(stream, struct sctp_association, stream);
-	stream->si = asoc->intl_enable ? &sctp_stream_interleave_1
-				       : &sctp_stream_interleave_0;
+	stream->si = asoc->peer.intl_capable ? &sctp_stream_interleave_1
+					     : &sctp_stream_interleave_0;
 }

diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
index a6c04a9..99e5f69 100644
--- a/net/sctp/stream_sched.c
+++ b/net/sctp/stream_sched.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright Red Hat Inc. 2017
  *
@@ -5,22 +6,6 @@
  *
  * These functions manipulate sctp stream queue/scheduling.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email addresched(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -243,7 +228,7 @@
 void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch)
 {
 	if (!list_is_last(&ch->frag_list, &ch->msg->chunks) &&
-	    !q->asoc->intl_enable) {
+	    !q->asoc->peer.intl_capable) {
 		struct sctp_stream_out *sout;
 		__u16 sid;
 

diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c
index 2245083..80b5a2c 100644
--- a/net/sctp/stream_sched_prio.c
+++ b/net/sctp/stream_sched_prio.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright Red Hat Inc. 2017
  *
@@ -5,22 +6,6 @@
  *
  * These functions manipulate sctp stream queue/scheduling.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email addresched(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>

diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c
index 52ba743..ff425ae 100644
--- a/net/sctp/stream_sched_rr.c
+++ b/net/sctp/stream_sched_rr.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright Red Hat Inc. 2017
  *
@@ -5,22 +6,6 @@
  *
  * These functions manipulate sctp stream queue/scheduling.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email addresched(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>

diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 33ca5b7..238cf17 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2002, 2004
  * Copyright (c) 2002 Intel Corp.
@@ -6,22 +7,6 @@
  *
  * Sysctl related interfaces for SCTP.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -40,10 +25,7 @@
 #include <net/sctp/sctp.h>
 #include <linux/sysctl.h>
 
-static int zero = 0;
-static int one = 1;
 static int timer_max = 86400000; /* ms in one day */
-static int int_max = INT_MAX;
 static int sack_timer_min = 1;
 static int sack_timer_max = 500;
 static int addr_scope_max = SCTP_SCOPE_POLICY_MAX;
@@ -107,7 +89,7 @@
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1         = &one,
+		.extra1         = SYSCTL_ONE,
 		.extra2         = &timer_max
 	},
 	{
@@ -116,7 +98,7 @@
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_sctp_do_rto_min,
-		.extra1         = &one,
+		.extra1         = SYSCTL_ONE,
 		.extra2         = &init_net.sctp.rto_max
 	},
 	{
@@ -152,8 +134,8 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &int_max
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_INT_MAX,
 	},
 	{
 		.procname	= "cookie_preserve_enable",
@@ -175,7 +157,7 @@
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1         = &one,
+		.extra1         = SYSCTL_ONE,
 		.extra2         = &timer_max
 	},
 	{
@@ -193,7 +175,7 @@
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1         = &one,
+		.extra1         = SYSCTL_ONE,
 		.extra2         = &timer_max
 	},
 	{
@@ -202,8 +184,8 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
-		.extra2		= &int_max
+		.extra1		= SYSCTL_ONE,
+		.extra2		= SYSCTL_INT_MAX,
 	},
 	{
 		.procname	= "path_max_retrans",
@@ -211,8 +193,8 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
-		.extra2		= &int_max
+		.extra1		= SYSCTL_ONE,
+		.extra2		= SYSCTL_INT_MAX,
 	},
 	{
 		.procname	= "max_init_retransmits",
@@ -220,8 +202,8 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
-		.extra2		= &int_max
+		.extra1		= SYSCTL_ONE,
+		.extra2		= SYSCTL_INT_MAX,
 	},
 	{
 		.procname	= "pf_retrans",
@@ -229,8 +211,8 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &int_max
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_INT_MAX,
 	},
 	{
 		.procname	= "sndbuf_policy",
@@ -296,12 +278,19 @@
 		.proc_handler	= proc_dointvec,
 	},
 	{
+		.procname	= "ecn_enable",
+		.data		= &init_net.sctp.ecn_enable,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
 		.procname	= "addr_scope_policy",
 		.data		= &init_net.sctp.scope_policy,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 		.extra2		= &addr_scope_max,
 	},
 	{
@@ -310,7 +299,7 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_minmax,
-		.extra1		= &one,
+		.extra1		= SYSCTL_ONE,
 		.extra2		= &rwnd_scale_max,
 	},
 	{

diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 033696e..7235a60 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
@@ -11,22 +12,6 @@
  * a remote transport address.  For local transport addresses, we just use
  * union sctp_addr.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -58,8 +43,8 @@
 						  gfp_t gfp)
 {
 	/* Copy in the address.  */
-	peer->ipaddr = *addr;
 	peer->af_specific = sctp_get_af_specific(addr->sa.sa_family);
+	memcpy(&peer->ipaddr, addr, peer->af_specific->sockaddr_len);
 	memset(&peer->saddr, 0, sizeof(union sctp_addr));
 
 	peer->sack_generation = 0;
@@ -207,7 +192,8 @@
 
 	/* When a data chunk is sent, reset the heartbeat interval.  */
 	expires = jiffies + sctp_transport_timeout(transport);
-	if (time_before(transport->hb_timer.expires, expires) &&
+	if ((time_before(transport->hb_timer.expires, expires) ||
+	     !timer_pending(&transport->hb_timer)) &&
 	    !mod_timer(&transport->hb_timer,
 		       expires + prandom_u32_max(transport->rto)))
 		sctp_transport_hold(transport);

diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c
index 7635f9f..a9c6af5 100644
--- a/net/sctp/tsnmap.c
+++ b/net/sctp/tsnmap.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
@@ -8,22 +9,6 @@
  *
  * These functions manipulate sctp tsn mapping array.
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>

diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 8cb7d98..e0cc1ed 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
@@ -9,22 +10,6 @@
  * These functions manipulate an sctp event.   The struct ulpevent is used
  * to carry notifications and data to the ULP (sockets).
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -634,8 +619,9 @@
 						gfp_t gfp)
 {
 	struct sctp_ulpevent *event = NULL;
-	struct sk_buff *skb;
-	size_t padding, len;
+	struct sk_buff *skb = chunk->skb;
+	struct sock *sk = asoc->base.sk;
+	size_t padding, datalen;
 	int rx_count;
 
 	/*
@@ -646,15 +632,12 @@
 	if (asoc->ep->rcvbuf_policy)
 		rx_count = atomic_read(&asoc->rmem_alloc);
 	else
-		rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc);
+		rx_count = atomic_read(&sk->sk_rmem_alloc);
 
-	if (rx_count >= asoc->base.sk->sk_rcvbuf) {
+	datalen = ntohs(chunk->chunk_hdr->length);
 
-		if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) ||
-		    (!sk_rmem_schedule(asoc->base.sk, chunk->skb,
-				       chunk->skb->truesize)))
-			goto fail;
-	}
+	if (rx_count >= sk->sk_rcvbuf || !sk_rmem_schedule(sk, skb, datalen))
+		goto fail;
 
 	/* Clone the original skb, sharing the data.  */
 	skb = skb_clone(chunk->skb, gfp);
@@ -681,8 +664,7 @@
 	 * The sender should never pad with more than 3 bytes.  The receiver
 	 * MUST ignore the padding bytes.
 	 */
-	len = ntohs(chunk->chunk_hdr->length);
-	padding = SCTP_PAD4(len) - len;
+	padding = SCTP_PAD4(datalen) - datalen;
 
 	/* Fixup cloned skb with just this chunks data.  */
 	skb_trim(skb, chunk->chunk_end - padding - skb->data);

diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 0b42710..b6536b7 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
@@ -8,22 +9,6 @@
  *
  * This abstraction carries sctp events to the ULP (sockets).
  *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, see
- * <http://www.gnu.org/licenses/>.
- *
  * Please send any bug reports or fixes you make to the
  * email address(es):
  *    lksctp developers <linux-sctp@vger.kernel.org>
@@ -116,12 +101,13 @@
 	event = sctp_ulpq_reasm(ulpq, event);
 
 	/* Do ordering if needed.  */
-	if ((event) && (event->msg_flags & MSG_EOR)) {
+	if (event) {
 		/* Create a temporary list to collect chunks on.  */
 		skb_queue_head_init(&temp);
 		__skb_queue_tail(&temp, sctp_event2skb(event));
 
-		event = sctp_ulpq_order(ulpq, event);
+		if (event->msg_flags & MSG_EOR)
+			event = sctp_ulpq_order(ulpq, event);
 	}
 
 	/* Send event to the ULP.  'event' is the sctp_ulpevent for
@@ -129,7 +115,7 @@
 	 */
 	if (event) {
 		event_eor = (event->msg_flags & MSG_EOR) ? 1 : 0;
-		sctp_ulpq_tail_event(ulpq, event);
+		sctp_ulpq_tail_event(ulpq, &temp);
 	}
 
 	return event_eor;
@@ -193,18 +179,17 @@
 	return sctp_clear_pd(ulpq->asoc->base.sk, ulpq->asoc);
 }
 
-/* If the SKB of 'event' is on a list, it is the first such member
- * of that list.
- */
-int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
+int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sk_buff_head *skb_list)
 {
 	struct sock *sk = ulpq->asoc->base.sk;
 	struct sctp_sock *sp = sctp_sk(sk);
-	struct sk_buff_head *queue, *skb_list;
-	struct sk_buff *skb = sctp_event2skb(event);
+	struct sctp_ulpevent *event;
+	struct sk_buff_head *queue;
+	struct sk_buff *skb;
 	int clear_pd = 0;
 
-	skb_list = (struct sk_buff_head *) skb->prev;
+	skb = __skb_peek(skb_list);
+	event = sctp_skb2event(skb);
 
 	/* If the socket is just going to throw this away, do not
 	 * even try to deliver it.
@@ -219,7 +204,7 @@
 		sk_incoming_cpu_update(sk);
 	}
 	/* Check if the user wishes to receive this event.  */
-	if (!sctp_ulpevent_is_enabled(event, &sp->subscribe))
+	if (!sctp_ulpevent_is_enabled(event, ulpq->asoc->subscribe))
 		goto out_free;
 
 	/* If we are in partial delivery mode, post to the lobby until
@@ -257,13 +242,7 @@
 		}
 	}
 
-	/* If we are harvesting multiple skbs they will be
-	 * collected on a list.
-	 */
-	if (skb_list)
-		skb_queue_splice_tail_init(skb_list, queue);
-	else
-		__skb_queue_tail(queue, skb);
+	skb_queue_splice_tail_init(skb_list, queue);
 
 	/* Did we just complete partial delivery and need to get
 	 * rolling again?  Move pending data to the receive
@@ -459,7 +438,7 @@
 			 * element in the queue, then count it towards
 			 * possible PD.
 			 */
-			if (pos == ulpq->reasm.next) {
+			if (skb_queue_is_first(&ulpq->reasm, pos)) {
 			    pd_first = pos;
 			    pd_last = pos;
 			    pd_len = pos->len;
@@ -738,25 +717,25 @@
 static void sctp_ulpq_reasm_drain(struct sctp_ulpq *ulpq)
 {
 	struct sctp_ulpevent *event = NULL;
-	struct sk_buff_head temp;
 
 	if (skb_queue_empty(&ulpq->reasm))
 		return;
 
 	while ((event = sctp_ulpq_retrieve_reassembled(ulpq)) != NULL) {
-		/* Do ordering if needed.  */
-		if ((event) && (event->msg_flags & MSG_EOR)) {
-			skb_queue_head_init(&temp);
-			__skb_queue_tail(&temp, sctp_event2skb(event));
+		struct sk_buff_head temp;
 
+		skb_queue_head_init(&temp);
+		__skb_queue_tail(&temp, sctp_event2skb(event));
+
+		/* Do ordering if needed.  */
+		if (event->msg_flags & MSG_EOR)
 			event = sctp_ulpq_order(ulpq, event);
-		}
 
 		/* Send event to the ULP.  'event' is the
 		 * sctp_ulpevent for  very first SKB on the  temp' list.
 		 */
 		if (event)
-			sctp_ulpq_tail_event(ulpq, event);
+			sctp_ulpq_tail_event(ulpq, &temp);
 	}
 }
 
@@ -956,7 +935,7 @@
 	if (event) {
 		/* see if we have more ordered that we can deliver */
 		sctp_ulpq_retrieve_ordered(ulpq, event);
-		sctp_ulpq_tail_event(ulpq, event);
+		sctp_ulpq_tail_event(ulpq, &temp);
 	}
 }
 
@@ -1082,7 +1061,11 @@
 		event = sctp_ulpq_retrieve_first(ulpq);
 		/* Send event to the ULP.   */
 		if (event) {
-			sctp_ulpq_tail_event(ulpq, event);
+			struct sk_buff_head temp;
+
+			skb_queue_head_init(&temp);
+			__skb_queue_tail(&temp, sctp_event2skb(event));
+			sctp_ulpq_tail_event(ulpq, &temp);
 			sctp_ulpq_set_pd(ulpq);
 			return;
 		}
@@ -1106,7 +1089,8 @@
 			freed += sctp_ulpq_renege_frags(ulpq, needed - freed);
 	}
 	/* If able to free enough room, accept this chunk. */
-	if (freed >= needed) {
+	if (sk_rmem_schedule(asoc->base.sk, chunk->skb, needed) &&
+	    freed >= needed) {
 		int retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
 		/*
 		 * Enter partial delivery if chunk has not been
@@ -1129,16 +1113,16 @@
 void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp)
 {
 	struct sctp_ulpevent *ev = NULL;
-	struct sock *sk;
 	struct sctp_sock *sp;
+	struct sock *sk;
 
 	if (!ulpq->pd_mode)
 		return;
 
 	sk = ulpq->asoc->base.sk;
 	sp = sctp_sk(sk);
-	if (sctp_ulpevent_type_enabled(SCTP_PARTIAL_DELIVERY_EVENT,
-				       &sctp_sk(sk)->subscribe))
+	if (sctp_ulpevent_type_enabled(ulpq->asoc->subscribe,
+				       SCTP_PARTIAL_DELIVERY_EVENT))
 		ev = sctp_ulpevent_make_pdapi(ulpq->asoc,
 					      SCTP_PARTIAL_DELIVERY_ABORTED,
 					      0, 0, 0, gfp);

diff --git a/net/smc/Kconfig b/net/smc/Kconfig
index c717ef0..f54a70b 100644
--- a/net/smc/Kconfig
+++ b/net/smc/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config SMC
 	tristate "SMC socket protocol family"
 	depends on INET && INFINIBAND

diff --git a/net/smc/Makefile b/net/smc/Makefile
index 4df96b4..cb12545 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_SMC)	+= smc.o
 obj-$(CONFIG_SMC_DIAG)	+= smc_diag.o
 smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 80e2119..737b499 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
@@ -30,6 +31,10 @@
 #include <net/smc.h>
 #include <asm/ioctls.h>
 
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include "smc_netns.h"
+
 #include "smc.h"
 #include "smc_clc.h"
 #include "smc_llc.h"
@@ -42,8 +47,11 @@
 #include "smc_rx.h"
 #include "smc_close.h"
 
-static DEFINE_MUTEX(smc_create_lgr_pending);	/* serialize link group
-						 * creation
+static DEFINE_MUTEX(smc_server_lgr_pending);	/* serialize link group
+						 * creation on server
+						 */
+static DEFINE_MUTEX(smc_client_lgr_pending);	/* serialize link group
+						 * creation on client
 						 */
 
 static void smc_tcp_listen_work(struct work_struct *);
@@ -115,6 +123,48 @@
 };
 EXPORT_SYMBOL_GPL(smc_proto6);
 
+static void smc_restore_fallback_changes(struct smc_sock *smc)
+{
+	smc->clcsock->file->private_data = smc->sk.sk_socket;
+	smc->clcsock->file = NULL;
+}
+
+static int __smc_release(struct smc_sock *smc)
+{
+	struct sock *sk = &smc->sk;
+	int rc = 0;
+
+	if (!smc->use_fallback) {
+		rc = smc_close_active(smc);
+		sock_set_flag(sk, SOCK_DEAD);
+		sk->sk_shutdown |= SHUTDOWN_MASK;
+	} else {
+		if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT)
+			sock_put(sk); /* passive closing */
+		if (sk->sk_state == SMC_LISTEN) {
+			/* wake up clcsock accept */
+			rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
+		}
+		sk->sk_state = SMC_CLOSED;
+		sk->sk_state_change(sk);
+		smc_restore_fallback_changes(smc);
+	}
+
+	sk->sk_prot->unhash(sk);
+
+	if (sk->sk_state == SMC_CLOSED) {
+		if (smc->clcsock) {
+			release_sock(sk);
+			smc_clcsock_release(smc);
+			lock_sock(sk);
+		}
+		if (!smc->use_fallback)
+			smc_conn_free(&smc->conn);
+	}
+
+	return rc;
+}
+
 static int smc_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
@@ -127,9 +177,9 @@
 	smc = smc_sk(sk);
 
 	/* cleanup for a dangling non-blocking connect */
+	if (smc->connect_nonblock && sk->sk_state == SMC_INIT)
+		tcp_abort(smc->clcsock->sk, ECONNABORTED);
 	flush_work(&smc->connect_work);
-	kfree(smc->connect_info);
-	smc->connect_info = NULL;
 
 	if (sk->sk_state == SMC_LISTEN)
 		/* smc_close_non_accepted() is called and acquires
@@ -139,30 +189,13 @@
 	else
 		lock_sock(sk);
 
-	if (!smc->use_fallback) {
-		rc = smc_close_active(smc);
-		sock_set_flag(sk, SOCK_DEAD);
-		sk->sk_shutdown |= SHUTDOWN_MASK;
-	}
-	if (smc->clcsock) {
-		sock_release(smc->clcsock);
-		smc->clcsock = NULL;
-	}
-	if (smc->use_fallback) {
-		if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT)
-			sock_put(sk); /* passive closing */
-		sk->sk_state = SMC_CLOSED;
-		sk->sk_state_change(sk);
-	}
+	rc = __smc_release(smc);
 
 	/* detach socket */
 	sock_orphan(sk);
 	sock->sk = NULL;
-	if (!smc->use_fallback && sk->sk_state == SMC_CLOSED)
-		smc_conn_free(&smc->conn);
 	release_sock(sk);
 
-	sk->sk_prot->unhash(sk);
 	sock_put(sk); /* final sock_put */
 out:
 	return rc;
@@ -203,6 +236,7 @@
 	spin_lock_init(&smc->conn.send_lock);
 	sk->sk_prot->hash(sk);
 	sk_refcnt_debug_inc(sk);
+	mutex_init(&smc->clcsock_release_lock);
 
 	return sk;
 }
@@ -236,7 +270,7 @@
 
 	/* Check if socket is already active */
 	rc = -EINVAL;
-	if (sk->sk_state != SMC_INIT)
+	if (sk->sk_state != SMC_INIT || smc->connect_nonblock)
 		goto out_rel;
 
 	smc->clcsock->sk->sk_reuse = sk->sk_reuse;
@@ -280,7 +314,8 @@
 			     (1UL << SOCK_RXQ_OVFL) | \
 			     (1UL << SOCK_WIFI_STATUS) | \
 			     (1UL << SOCK_NOFCS) | \
-			     (1UL << SOCK_FILTER_LOCKED))
+			     (1UL << SOCK_FILTER_LOCKED) | \
+			     (1UL << SOCK_TSTAMP_NEW))
 /* copy only relevant settings and flags of SOL_SOCKET level from smc to
  * clc socket (since smc is not called for these options from net/core)
  */
@@ -299,14 +334,17 @@
 	smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
 }
 
-/* register a new rmb, optionally send confirm_rkey msg to register with peer */
+/* register a new rmb, send confirm_rkey msg to register with peer */
 static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc,
 		       bool conf_rkey)
 {
-	/* register memory region for new rmb */
-	if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) {
-		rmb_desc->regerr = 1;
-		return -EFAULT;
+	if (!rmb_desc->wr_reg) {
+		/* register memory region for new rmb */
+		if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) {
+			rmb_desc->regerr = 1;
+			return -EFAULT;
+		}
+		rmb_desc->wr_reg = 1;
 	}
 	if (!conf_rkey)
 		return 0;
@@ -335,8 +373,8 @@
 		struct smc_clc_msg_decline dclc;
 
 		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
-				      SMC_CLC_DECLINE);
-		return rc;
+				      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
+		return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
 	}
 
 	if (link->llc_confirm_rc)
@@ -363,8 +401,8 @@
 		struct smc_clc_msg_decline dclc;
 
 		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
-				      SMC_CLC_DECLINE);
-		return rc;
+				      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
+		return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
 	}
 
 	/* send add link reject message, only one link supported for now */
@@ -423,12 +461,22 @@
 	link->peer_mtu = clc->qp_mtu;
 }
 
+static void smc_switch_to_fallback(struct smc_sock *smc)
+{
+	smc->use_fallback = true;
+	if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
+		smc->clcsock->file = smc->sk.sk_socket->file;
+		smc->clcsock->file->private_data = smc->clcsock;
+	}
+}
+
 /* fall back during connect */
 static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
 {
-	smc->use_fallback = true;
+	smc_switch_to_fallback(smc);
 	smc->fallback_rsn = reason_code;
 	smc_copy_sock_settings_to_clc(smc);
+	smc->connect_nonblock = 0;
 	if (smc->sk.sk_state == SMC_INIT)
 		smc->sk.sk_state = SMC_ACTIVE;
 	return 0;
@@ -461,48 +509,48 @@
 {
 	if (local_contact == SMC_FIRST_CONTACT)
 		smc_lgr_forget(smc->conn.lgr);
-	mutex_unlock(&smc_create_lgr_pending);
+	if (smc->conn.lgr->is_smcd)
+		/* there is only one lgr role for SMC-D; use server lock */
+		mutex_unlock(&smc_server_lgr_pending);
+	else
+		mutex_unlock(&smc_client_lgr_pending);
+
 	smc_conn_free(&smc->conn);
+	smc->connect_nonblock = 0;
 	return reason_code;
 }
 
 /* check if there is a rdma device available for this connection. */
 /* called for connect and listen */
-static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
-			  u8 *ibport, unsigned short vlan_id, u8 gid[])
+static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini)
 {
-	int reason_code = 0;
-
 	/* PNET table look up: search active ib_device and port
 	 * within same PNETID that also contains the ethernet device
 	 * used for the internal TCP socket
 	 */
-	smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport, vlan_id,
-				    gid);
-	if (!(*ibdev))
-		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
-
-	return reason_code;
+	smc_pnet_find_roce_resource(smc->clcsock->sk, ini);
+	if (!ini->ib_dev)
+		return SMC_CLC_DECL_NOSMCRDEV;
+	return 0;
 }
 
 /* check if there is an ISM device available for this connection. */
 /* called for connect and listen */
-static int smc_check_ism(struct smc_sock *smc, struct smcd_dev **ismdev)
+static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini)
 {
 	/* Find ISM device with same PNETID as connecting interface  */
-	smc_pnet_find_ism_resource(smc->clcsock->sk, ismdev);
-	if (!(*ismdev))
-		return SMC_CLC_DECL_CNFERR; /* configuration error */
+	smc_pnet_find_ism_resource(smc->clcsock->sk, ini);
+	if (!ini->ism_dev)
+		return SMC_CLC_DECL_NOSMCDDEV;
 	return 0;
 }
 
 /* Check for VLAN ID and register it on ISM device just for CLC handshake */
 static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
-				      struct smcd_dev *ismdev,
-				      unsigned short vlan_id)
+				      struct smc_init_info *ini)
 {
-	if (vlan_id && smc_ism_get_vlan(ismdev, vlan_id))
-		return SMC_CLC_DECL_CNFERR;
+	if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev, ini->vlan_id))
+		return SMC_CLC_DECL_ISMVLANERR;
 	return 0;
 }
 
@@ -510,12 +558,11 @@
  * used, the VLAN ID will be registered again during the connection setup.
  */
 static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
-					struct smcd_dev *ismdev,
-					unsigned short vlan_id)
+					struct smc_init_info *ini)
 {
 	if (!is_smcd)
 		return 0;
-	if (vlan_id && smc_ism_put_vlan(ismdev, vlan_id))
+	if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev, ini->vlan_id))
 		return SMC_CLC_DECL_CNFERR;
 	return 0;
 }
@@ -523,39 +570,37 @@
 /* CLC handshake during connect */
 static int smc_connect_clc(struct smc_sock *smc, int smc_type,
 			   struct smc_clc_msg_accept_confirm *aclc,
-			   struct smc_ib_device *ibdev, u8 ibport,
-			   u8 gid[], struct smcd_dev *ismdev)
+			   struct smc_init_info *ini)
 {
 	int rc = 0;
 
 	/* do inband token exchange */
-	rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, gid, ismdev);
+	rc = smc_clc_send_proposal(smc, smc_type, ini);
 	if (rc)
 		return rc;
 	/* receive SMC Accept CLC message */
-	return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT);
+	return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT,
+				CLC_WAIT_TIME);
 }
 
 /* setup for RDMA connection of client */
 static int smc_connect_rdma(struct smc_sock *smc,
 			    struct smc_clc_msg_accept_confirm *aclc,
-			    struct smc_ib_device *ibdev, u8 ibport)
+			    struct smc_init_info *ini)
 {
-	int local_contact = SMC_FIRST_CONTACT;
 	struct smc_link *link;
 	int reason_code = 0;
 
-	mutex_lock(&smc_create_lgr_pending);
-	local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev,
-					ibport, &aclc->lcl, NULL, 0);
-	if (local_contact < 0) {
-		if (local_contact == -ENOMEM)
-			reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
-		else if (local_contact == -ENOLINK)
-			reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
-		else
-			reason_code = SMC_CLC_DECL_INTERR; /* other error */
-		return smc_connect_abort(smc, reason_code, 0);
+	ini->is_smcd = false;
+	ini->ib_lcl = &aclc->lcl;
+	ini->ib_clcqpn = ntoh24(aclc->qpn);
+	ini->srv_first_contact = aclc->hdr.flag;
+
+	mutex_lock(&smc_client_lgr_pending);
+	reason_code = smc_conn_create(smc, ini);
+	if (reason_code) {
+		mutex_unlock(&smc_client_lgr_pending);
+		return reason_code;
 	}
 	link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
 
@@ -563,46 +608,48 @@
 
 	/* create send buffer and rmb */
 	if (smc_buf_create(smc, false))
-		return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
+		return smc_connect_abort(smc, SMC_CLC_DECL_MEM,
+					 ini->cln_first_contact);
 
-	if (local_contact == SMC_FIRST_CONTACT)
+	if (ini->cln_first_contact == SMC_FIRST_CONTACT)
 		smc_link_save_peer_info(link, aclc);
 
 	if (smc_rmb_rtoken_handling(&smc->conn, aclc))
 		return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
-					 local_contact);
+					 ini->cln_first_contact);
 
 	smc_close_init(smc);
 	smc_rx_init(smc);
 
-	if (local_contact == SMC_FIRST_CONTACT) {
+	if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
 		if (smc_ib_ready_link(link))
 			return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
-						 local_contact);
+						 ini->cln_first_contact);
 	} else {
-		if (!smc->conn.rmb_desc->reused &&
-		    smc_reg_rmb(link, smc->conn.rmb_desc, true))
+		if (smc_reg_rmb(link, smc->conn.rmb_desc, true))
 			return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
-						 local_contact);
+						 ini->cln_first_contact);
 	}
 	smc_rmb_sync_sg_for_device(&smc->conn);
 
 	reason_code = smc_clc_send_confirm(smc);
 	if (reason_code)
-		return smc_connect_abort(smc, reason_code, local_contact);
+		return smc_connect_abort(smc, reason_code,
+					 ini->cln_first_contact);
 
 	smc_tx_init(smc);
 
-	if (local_contact == SMC_FIRST_CONTACT) {
+	if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
 		/* QP confirmation over RoCE fabric */
 		reason_code = smc_clnt_conf_first_link(smc);
 		if (reason_code)
 			return smc_connect_abort(smc, reason_code,
-						 local_contact);
+						 ini->cln_first_contact);
 	}
-	mutex_unlock(&smc_create_lgr_pending);
+	mutex_unlock(&smc_client_lgr_pending);
 
 	smc_copy_sock_settings_to_clc(smc);
+	smc->connect_nonblock = 0;
 	if (smc->sk.sk_state == SMC_INIT)
 		smc->sk.sk_state = SMC_ACTIVE;
 
@@ -612,20 +659,26 @@
 /* setup for ISM connection of client */
 static int smc_connect_ism(struct smc_sock *smc,
 			   struct smc_clc_msg_accept_confirm *aclc,
-			   struct smcd_dev *ismdev)
+			   struct smc_init_info *ini)
 {
-	int local_contact = SMC_FIRST_CONTACT;
 	int rc = 0;
 
-	mutex_lock(&smc_create_lgr_pending);
-	local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0,
-					NULL, ismdev, aclc->gid);
-	if (local_contact < 0)
-		return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0);
+	ini->is_smcd = true;
+	ini->ism_gid = aclc->gid;
+	ini->srv_first_contact = aclc->hdr.flag;
+
+	/* there is only one lgr role for SMC-D; use server lock */
+	mutex_lock(&smc_server_lgr_pending);
+	rc = smc_conn_create(smc, ini);
+	if (rc) {
+		mutex_unlock(&smc_server_lgr_pending);
+		return rc;
+	}
 
 	/* Create send and receive buffers */
 	if (smc_buf_create(smc, true))
-		return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
+		return smc_connect_abort(smc, SMC_CLC_DECL_MEM,
+					 ini->cln_first_contact);
 
 	smc_conn_save_peer_info(smc, aclc);
 	smc_close_init(smc);
@@ -634,10 +687,11 @@
 
 	rc = smc_clc_send_confirm(smc);
 	if (rc)
-		return smc_connect_abort(smc, rc, local_contact);
-	mutex_unlock(&smc_create_lgr_pending);
+		return smc_connect_abort(smc, rc, ini->cln_first_contact);
+	mutex_unlock(&smc_server_lgr_pending);
 
 	smc_copy_sock_settings_to_clc(smc);
+	smc->connect_nonblock = 0;
 	if (smc->sk.sk_state == SMC_INIT)
 		smc->sk.sk_state = SMC_ACTIVE;
 
@@ -649,15 +703,9 @@
 {
 	bool ism_supported = false, rdma_supported = false;
 	struct smc_clc_msg_accept_confirm aclc;
-	struct smc_ib_device *ibdev;
-	struct smcd_dev *ismdev;
-	u8 gid[SMC_GID_SIZE];
-	unsigned short vlan;
+	struct smc_init_info ini = {0};
 	int smc_type;
 	int rc = 0;
-	u8 ibport;
-
-	sock_hold(&smc->sk); /* sock put in passive closing */
 
 	if (smc->use_fallback)
 		return smc_connect_fallback(smc, smc->fallback_rsn);
@@ -670,20 +718,21 @@
 	if (using_ipsec(smc))
 		return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
 
-	/* check for VLAN ID */
-	if (smc_vlan_by_tcpsk(smc->clcsock, &vlan))
-		return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
+	/* get vlan id from IP device */
+	if (smc_vlan_by_tcpsk(smc->clcsock, &ini))
+		return smc_connect_decline_fallback(smc,
+						    SMC_CLC_DECL_GETVLANERR);
 
 	/* check if there is an ism device available */
-	if (!smc_check_ism(smc, &ismdev) &&
-	    !smc_connect_ism_vlan_setup(smc, ismdev, vlan)) {
+	if (!smc_find_ism_device(smc, &ini) &&
+	    !smc_connect_ism_vlan_setup(smc, &ini)) {
 		/* ISM is supported for this connection */
 		ism_supported = true;
 		smc_type = SMC_TYPE_D;
 	}
 
 	/* check if there is a rdma device available */
-	if (!smc_check_rdma(smc, &ibdev, &ibport, vlan, gid)) {
+	if (!smc_find_rdma_device(smc, &ini)) {
 		/* RDMA is supported for this connection */
 		rdma_supported = true;
 		if (ism_supported)
@@ -697,25 +746,25 @@
 		return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV);
 
 	/* perform CLC handshake */
-	rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, gid, ismdev);
+	rc = smc_connect_clc(smc, smc_type, &aclc, &ini);
 	if (rc) {
-		smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
+		smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
 		return smc_connect_decline_fallback(smc, rc);
 	}
 
 	/* depending on previous steps, connect using rdma or ism */
 	if (rdma_supported && aclc.hdr.path == SMC_TYPE_R)
-		rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
+		rc = smc_connect_rdma(smc, &aclc, &ini);
 	else if (ism_supported && aclc.hdr.path == SMC_TYPE_D)
-		rc = smc_connect_ism(smc, &aclc, ismdev);
+		rc = smc_connect_ism(smc, &aclc, &ini);
 	else
 		rc = SMC_CLC_DECL_MODEUNSUPP;
 	if (rc) {
-		smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
+		smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
 		return smc_connect_decline_fallback(smc, rc);
 	}
 
-	smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
+	smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
 	return 0;
 }
 
@@ -723,17 +772,31 @@
 {
 	struct smc_sock *smc = container_of(work, struct smc_sock,
 					    connect_work);
-	int rc;
+	long timeo = smc->sk.sk_sndtimeo;
+	int rc = 0;
 
-	lock_sock(&smc->sk);
-	rc = kernel_connect(smc->clcsock, &smc->connect_info->addr,
-			    smc->connect_info->alen, smc->connect_info->flags);
+	if (!timeo)
+		timeo = MAX_SCHEDULE_TIMEOUT;
+	lock_sock(smc->clcsock->sk);
 	if (smc->clcsock->sk->sk_err) {
 		smc->sk.sk_err = smc->clcsock->sk->sk_err;
-		goto out;
+	} else if ((1 << smc->clcsock->sk->sk_state) &
+					(TCPF_SYN_SENT | TCP_SYN_RECV)) {
+		rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo);
+		if ((rc == -EPIPE) &&
+		    ((1 << smc->clcsock->sk->sk_state) &
+					(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)))
+			rc = 0;
 	}
-	if (rc < 0) {
-		smc->sk.sk_err = -rc;
+	release_sock(smc->clcsock->sk);
+	lock_sock(&smc->sk);
+	if (rc != 0 || smc->sk.sk_err) {
+		smc->sk.sk_state = SMC_CLOSED;
+		if (rc == -EPIPE || rc == -EAGAIN)
+			smc->sk.sk_err = EPIPE;
+		else if (signal_pending(current))
+			smc->sk.sk_err = -sock_intr_errno(timeo);
+		sock_put(&smc->sk); /* passive closing */
 		goto out;
 	}
 
@@ -742,12 +805,14 @@
 		smc->sk.sk_err = -rc;
 
 out:
-	if (smc->sk.sk_err)
-		smc->sk.sk_state_change(&smc->sk);
-	else
-		smc->sk.sk_write_space(&smc->sk);
-	kfree(smc->connect_info);
-	smc->connect_info = NULL;
+	if (!sock_flag(&smc->sk, SOCK_DEAD)) {
+		if (smc->sk.sk_err) {
+			smc->sk.sk_state_change(&smc->sk);
+		} else { /* allow polling before and after fallback decision */
+			smc->clcsock->sk->sk_write_space(smc->clcsock->sk);
+			smc->sk.sk_write_space(&smc->sk);
+		}
+	}
 	release_sock(&smc->sk);
 }
 
@@ -780,26 +845,20 @@
 
 	smc_copy_sock_settings_to_clc(smc);
 	tcp_sk(smc->clcsock->sk)->syn_smc = 1;
+	if (smc->connect_nonblock) {
+		rc = -EALREADY;
+		goto out;
+	}
+	rc = kernel_connect(smc->clcsock, addr, alen, flags);
+	if (rc && rc != -EINPROGRESS)
+		goto out;
+
+	sock_hold(&smc->sk); /* sock put in passive closing */
 	if (flags & O_NONBLOCK) {
-		if (smc->connect_info) {
-			rc = -EALREADY;
-			goto out;
-		}
-		smc->connect_info = kzalloc(alen + 2 * sizeof(int), GFP_KERNEL);
-		if (!smc->connect_info) {
-			rc = -ENOMEM;
-			goto out;
-		}
-		smc->connect_info->alen = alen;
-		smc->connect_info->flags = flags ^ O_NONBLOCK;
-		memcpy(&smc->connect_info->addr, addr, alen);
-		schedule_work(&smc->connect_work);
+		if (schedule_work(&smc->connect_work))
+			smc->connect_nonblock = 1;
 		rc = -EINPROGRESS;
 	} else {
-		rc = kernel_connect(smc->clcsock, addr, alen, flags);
-		if (rc)
-			goto out;
-
 		rc = __smc_connect(smc);
 		if (rc < 0)
 			goto out;
@@ -818,7 +877,7 @@
 	struct socket *new_clcsock = NULL;
 	struct sock *lsk = &lsmc->sk;
 	struct sock *new_sk;
-	int rc;
+	int rc = -EINVAL;
 
 	release_sock(lsk);
 	new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
@@ -831,16 +890,19 @@
 	}
 	*new_smc = smc_sk(new_sk);
 
-	rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
+	mutex_lock(&lsmc->clcsock_release_lock);
+	if (lsmc->clcsock)
+		rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
+	mutex_unlock(&lsmc->clcsock_release_lock);
 	lock_sock(lsk);
 	if  (rc < 0)
 		lsk->sk_err = -rc;
 	if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
+		new_sk->sk_prot->unhash(new_sk);
 		if (new_clcsock)
 			sock_release(new_clcsock);
 		new_sk->sk_state = SMC_CLOSED;
 		sock_set_flag(new_sk, SOCK_DEAD);
-		new_sk->sk_prot->unhash(new_sk);
 		sock_put(new_sk); /* final */
 		*new_smc = NULL;
 		goto out;
@@ -891,16 +953,21 @@
 
 		smc_accept_unlink(new_sk);
 		if (new_sk->sk_state == SMC_CLOSED) {
+			new_sk->sk_prot->unhash(new_sk);
 			if (isk->clcsock) {
 				sock_release(isk->clcsock);
 				isk->clcsock = NULL;
 			}
-			new_sk->sk_prot->unhash(new_sk);
 			sock_put(new_sk); /* final */
 			continue;
 		}
-		if (new_sock)
+		if (new_sock) {
 			sock_graft(new_sk, new_sock);
+			if (isk->use_fallback) {
+				smc_sk(new_sk)->clcsock->file = new_sock->file;
+				isk->clcsock->file->private_data = isk->clcsock;
+			}
+		}
 		return new_sk;
 	}
 	return NULL;
@@ -915,27 +982,8 @@
 	if (!sk->sk_lingertime)
 		/* wait for peer closing */
 		sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
-	if (!smc->use_fallback) {
-		smc_close_active(smc);
-		sock_set_flag(sk, SOCK_DEAD);
-		sk->sk_shutdown |= SHUTDOWN_MASK;
-	}
-	if (smc->clcsock) {
-		struct socket *tcp;
-
-		tcp = smc->clcsock;
-		smc->clcsock = NULL;
-		sock_release(tcp);
-	}
-	if (smc->use_fallback) {
-		sock_put(sk); /* passive closing */
-		sk->sk_state = SMC_CLOSED;
-	} else {
-		if (sk->sk_state == SMC_CLOSED)
-			smc_conn_free(&smc->conn);
-	}
+	__smc_release(smc);
 	release_sock(sk);
-	sk->sk_prot->unhash(sk);
 	sock_put(sk); /* final sock_put */
 }
 
@@ -965,8 +1013,8 @@
 		struct smc_clc_msg_decline dclc;
 
 		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
-				      SMC_CLC_DECLINE);
-		return rc;
+				      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
+		return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
 	}
 
 	if (link->llc_confirm_resp_rc)
@@ -986,8 +1034,8 @@
 		struct smc_clc_msg_decline dclc;
 
 		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
-				      SMC_CLC_DECLINE);
-		return rc;
+				      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
+		return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
 	}
 
 	smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
@@ -1001,13 +1049,13 @@
 	struct smc_sock *lsmc = new_smc->listen_smc;
 	struct sock *newsmcsk = &new_smc->sk;
 
-	lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
 	if (lsmc->sk.sk_state == SMC_LISTEN) {
+		lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
 		smc_accept_enqueue(&lsmc->sk, newsmcsk);
+		release_sock(&lsmc->sk);
 	} else { /* no longer listening */
 		smc_close_non_accepted(newsmcsk);
 	}
-	release_sock(&lsmc->sk);
 
 	/* Wake up accept */
 	lsmc->sk.sk_data_ready(&lsmc->sk);
@@ -1051,7 +1099,7 @@
 		return;
 	}
 	smc_conn_free(&new_smc->conn);
-	new_smc->use_fallback = true;
+	smc_switch_to_fallback(new_smc);
 	new_smc->fallback_rsn = reason_code;
 	if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
 		if (smc_clc_send_decline(new_smc, reason_code) < 0) {
@@ -1063,7 +1111,7 @@
 }
 
 /* listen worker: check prefixes */
-static int smc_listen_rdma_check(struct smc_sock *new_smc,
+static int smc_listen_prfx_check(struct smc_sock *new_smc,
 				 struct smc_clc_msg_proposal *pclc)
 {
 	struct smc_clc_msg_proposal_prefix *pclc_prfx;
@@ -1071,25 +1119,21 @@
 
 	pclc_prfx = smc_clc_proposal_get_prefix(pclc);
 	if (smc_clc_prfx_match(newclcsock, pclc_prfx))
-		return SMC_CLC_DECL_CNFERR;
+		return SMC_CLC_DECL_DIFFPREFIX;
 
 	return 0;
 }
 
 /* listen worker: initialize connection and buffers */
 static int smc_listen_rdma_init(struct smc_sock *new_smc,
-				struct smc_clc_msg_proposal *pclc,
-				struct smc_ib_device *ibdev, u8 ibport,
-				int *local_contact)
+				struct smc_init_info *ini)
 {
+	int rc;
+
 	/* allocate connection / link group */
-	*local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport,
-					 &pclc->lcl, NULL, 0);
-	if (*local_contact < 0) {
-		if (*local_contact == -ENOMEM)
-			return SMC_CLC_DECL_MEM;/* insufficient memory*/
-		return SMC_CLC_DECL_INTERR; /* other error */
-	}
+	rc = smc_conn_create(new_smc, ini);
+	if (rc)
+		return rc;
 
 	/* create send buffer and rmb */
 	if (smc_buf_create(new_smc, false))
@@ -1101,33 +1145,30 @@
 /* listen worker: initialize connection and buffers for SMC-D */
 static int smc_listen_ism_init(struct smc_sock *new_smc,
 			       struct smc_clc_msg_proposal *pclc,
-			       struct smcd_dev *ismdev,
-			       int *local_contact)
+			       struct smc_init_info *ini)
 {
 	struct smc_clc_msg_smcd *pclc_smcd;
+	int rc;
 
 	pclc_smcd = smc_get_clc_msg_smcd(pclc);
-	*local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, NULL,
-					 ismdev, pclc_smcd->gid);
-	if (*local_contact < 0) {
-		if (*local_contact == -ENOMEM)
-			return SMC_CLC_DECL_MEM;/* insufficient memory*/
-		return SMC_CLC_DECL_INTERR; /* other error */
-	}
+	ini->ism_gid = pclc_smcd->gid;
+	rc = smc_conn_create(new_smc, ini);
+	if (rc)
+		return rc;
 
 	/* Check if peer can be reached via ISM device */
 	if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid,
 			    new_smc->conn.lgr->vlan_id,
 			    new_smc->conn.lgr->smcd)) {
-		if (*local_contact == SMC_FIRST_CONTACT)
+		if (ini->cln_first_contact == SMC_FIRST_CONTACT)
 			smc_lgr_forget(new_smc->conn.lgr);
 		smc_conn_free(&new_smc->conn);
-		return SMC_CLC_DECL_CNFERR;
+		return SMC_CLC_DECL_SMCDNOTALK;
 	}
 
 	/* Create send and receive buffers */
 	if (smc_buf_create(new_smc, true)) {
-		if (*local_contact == SMC_FIRST_CONTACT)
+		if (ini->cln_first_contact == SMC_FIRST_CONTACT)
 			smc_lgr_forget(new_smc->conn.lgr);
 		smc_conn_free(&new_smc->conn);
 		return SMC_CLC_DECL_MEM;
@@ -1142,10 +1183,8 @@
 	struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
 
 	if (local_contact != SMC_FIRST_CONTACT) {
-		if (!new_smc->conn.rmb_desc->reused) {
-			if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
-				return SMC_CLC_DECL_ERR_REGRMB;
-		}
+		if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
+			return SMC_CLC_DECL_ERR_REGRMB;
 	}
 	smc_rmb_sync_sg_for_device(&new_smc->conn);
 
@@ -1181,7 +1220,6 @@
 	return 0;
 
 decline:
-	mutex_unlock(&smc_create_lgr_pending);
 	smc_listen_decline(new_smc, reason_code, local_contact);
 	return reason_code;
 }
@@ -1194,15 +1232,13 @@
 	struct socket *newclcsock = new_smc->clcsock;
 	struct smc_clc_msg_accept_confirm cclc;
 	struct smc_clc_msg_proposal *pclc;
-	struct smc_ib_device *ibdev;
+	struct smc_init_info ini = {0};
 	bool ism_supported = false;
-	struct smcd_dev *ismdev;
 	u8 buf[SMC_CLC_MAX_LEN];
-	int local_contact = 0;
-	unsigned short vlan;
-	int reason_code = 0;
 	int rc = 0;
-	u8 ibport;
+
+	if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
+		return smc_listen_out_err(new_smc);
 
 	if (new_smc->use_fallback) {
 		smc_listen_out_connected(new_smc);
@@ -1211,7 +1247,7 @@
 
 	/* check if peer is smc capable */
 	if (!tcp_sk(newclcsock->sk)->syn_smc) {
-		new_smc->use_fallback = true;
+		smc_switch_to_fallback(new_smc);
 		new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC;
 		smc_listen_out_connected(new_smc);
 		return;
@@ -1221,72 +1257,101 @@
 	 * wait for and receive SMC Proposal CLC message
 	 */
 	pclc = (struct smc_clc_msg_proposal *)&buf;
-	reason_code = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN,
-				       SMC_CLC_PROPOSAL);
-	if (reason_code) {
-		smc_listen_decline(new_smc, reason_code, 0);
-		return;
-	}
+	rc = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN,
+			      SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
+	if (rc)
+		goto out_decl;
 
 	/* IPSec connections opt out of SMC-R optimizations */
 	if (using_ipsec(new_smc)) {
-		smc_listen_decline(new_smc, SMC_CLC_DECL_IPSEC, 0);
-		return;
+		rc = SMC_CLC_DECL_IPSEC;
+		goto out_decl;
 	}
 
-	mutex_lock(&smc_create_lgr_pending);
+	/* check for matching IP prefix and subnet length */
+	rc = smc_listen_prfx_check(new_smc, pclc);
+	if (rc)
+		goto out_decl;
+
+	/* get vlan id from IP device */
+	if (smc_vlan_by_tcpsk(new_smc->clcsock, &ini)) {
+		rc = SMC_CLC_DECL_GETVLANERR;
+		goto out_decl;
+	}
+
+	mutex_lock(&smc_server_lgr_pending);
 	smc_close_init(new_smc);
 	smc_rx_init(new_smc);
 	smc_tx_init(new_smc);
 
 	/* check if ISM is available */
-	if ((pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) &&
-	    !smc_check_ism(new_smc, &ismdev) &&
-	    !smc_listen_ism_init(new_smc, pclc, ismdev, &local_contact)) {
-		ism_supported = true;
+	if (pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) {
+		ini.is_smcd = true; /* prepare ISM check */
+		rc = smc_find_ism_device(new_smc, &ini);
+		if (!rc)
+			rc = smc_listen_ism_init(new_smc, pclc, &ini);
+		if (!rc)
+			ism_supported = true;
+		else if (pclc->hdr.path == SMC_TYPE_D)
+			goto out_unlock; /* skip RDMA and decline */
 	}
 
 	/* check if RDMA is available */
-	if (!ism_supported &&
-	    ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) ||
-	     smc_vlan_by_tcpsk(new_smc->clcsock, &vlan) ||
-	     smc_check_rdma(new_smc, &ibdev, &ibport, vlan, NULL) ||
-	     smc_listen_rdma_check(new_smc, pclc) ||
-	     smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
-				  &local_contact) ||
-	     smc_listen_rdma_reg(new_smc, local_contact))) {
-		/* SMC not supported, decline */
-		mutex_unlock(&smc_create_lgr_pending);
-		smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP,
-				   local_contact);
-		return;
+	if (!ism_supported) { /* SMC_TYPE_R or SMC_TYPE_B */
+		/* prepare RDMA check */
+		ini.is_smcd = false;
+		ini.ism_dev = NULL;
+		ini.ib_lcl = &pclc->lcl;
+		rc = smc_find_rdma_device(new_smc, &ini);
+		if (rc) {
+			/* no RDMA device found */
+			if (pclc->hdr.path == SMC_TYPE_B)
+				/* neither ISM nor RDMA device found */
+				rc = SMC_CLC_DECL_NOSMCDEV;
+			goto out_unlock;
+		}
+		rc = smc_listen_rdma_init(new_smc, &ini);
+		if (rc)
+			goto out_unlock;
+		rc = smc_listen_rdma_reg(new_smc, ini.cln_first_contact);
+		if (rc)
+			goto out_unlock;
 	}
 
 	/* send SMC Accept CLC message */
-	rc = smc_clc_send_accept(new_smc, local_contact);
-	if (rc) {
-		mutex_unlock(&smc_create_lgr_pending);
-		smc_listen_decline(new_smc, rc, local_contact);
-		return;
-	}
+	rc = smc_clc_send_accept(new_smc, ini.cln_first_contact);
+	if (rc)
+		goto out_unlock;
+
+	/* SMC-D does not need this lock any more */
+	if (ism_supported)
+		mutex_unlock(&smc_server_lgr_pending);
 
 	/* receive SMC Confirm CLC message */
-	reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
-				       SMC_CLC_CONFIRM);
-	if (reason_code) {
-		mutex_unlock(&smc_create_lgr_pending);
-		smc_listen_decline(new_smc, reason_code, local_contact);
-		return;
+	rc = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
+			      SMC_CLC_CONFIRM, CLC_WAIT_TIME);
+	if (rc) {
+		if (!ism_supported)
+			goto out_unlock;
+		goto out_decl;
 	}
 
 	/* finish worker */
 	if (!ism_supported) {
-		if (smc_listen_rdma_finish(new_smc, &cclc, local_contact))
+		rc = smc_listen_rdma_finish(new_smc, &cclc,
+					    ini.cln_first_contact);
+		mutex_unlock(&smc_server_lgr_pending);
+		if (rc)
 			return;
 	}
 	smc_conn_save_peer_info(new_smc, &cclc);
-	mutex_unlock(&smc_create_lgr_pending);
 	smc_listen_out_connected(new_smc);
+	return;
+
+out_unlock:
+	mutex_unlock(&smc_server_lgr_pending);
+out_decl:
+	smc_listen_decline(new_smc, rc, ini.cln_first_contact);
 }
 
 static void smc_tcp_listen_work(struct work_struct *work)
@@ -1333,7 +1398,8 @@
 	lock_sock(sk);
 
 	rc = -EINVAL;
-	if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN))
+	if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) ||
+	    smc->connect_nonblock)
 		goto out;
 
 	rc = 0;
@@ -1354,7 +1420,6 @@
 	sk->sk_max_ack_backlog = backlog;
 	sk->sk_ack_backlog = 0;
 	sk->sk_state = SMC_LISTEN;
-	INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
 	sock_hold(sk); /* sock_hold in tcp_listen_worker */
 	if (!schedule_work(&smc->tcp_listen_work))
 		sock_put(sk);
@@ -1462,8 +1527,8 @@
 		goto out;
 
 	if (msg->msg_flags & MSG_FASTOPEN) {
-		if (sk->sk_state == SMC_INIT) {
-			smc->use_fallback = true;
+		if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
+			smc_switch_to_fallback(smc);
 			smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
 		} else {
 			rc = -EINVAL;
@@ -1489,6 +1554,11 @@
 
 	smc = smc_sk(sk);
 	lock_sock(sk);
+	if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
+		/* socket was connected before, no more data to read */
+		rc = 0;
+		goto out;
+	}
 	if ((sk->sk_state == SMC_INIT) ||
 	    (sk->sk_state == SMC_LISTEN) ||
 	    (sk->sk_state == SMC_CLOSED))
@@ -1528,8 +1598,8 @@
 			     poll_table *wait)
 {
 	struct sock *sk = sock->sk;
-	__poll_t mask = 0;
 	struct smc_sock *smc;
+	__poll_t mask = 0;
 
 	if (!sk)
 		return EPOLLNVAL;
@@ -1539,8 +1609,6 @@
 		/* delegate to CLC child sock */
 		mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
 		sk->sk_err = smc->clcsock->sk->sk_err;
-		if (sk->sk_err)
-			mask |= EPOLLERR;
 	} else {
 		if (sk->sk_state != SMC_CLOSED)
 			sock_poll_wait(file, sock, wait);
@@ -1551,9 +1619,14 @@
 			mask |= EPOLLHUP;
 		if (sk->sk_state == SMC_LISTEN) {
 			/* woken up by sk_data_ready in smc_listen_work() */
-			mask = smc_accept_poll(sk);
+			mask |= smc_accept_poll(sk);
+		} else if (smc->use_fallback) { /* as result of connect_work()*/
+			mask |= smc->clcsock->ops->poll(file, smc->clcsock,
+							   wait);
+			sk->sk_err = smc->clcsock->sk->sk_err;
 		} else {
-			if (atomic_read(&smc->conn.sndbuf_space) ||
+			if ((sk->sk_state != SMC_INIT &&
+			     atomic_read(&smc->conn.sndbuf_space)) ||
 			    sk->sk_shutdown & SEND_SHUTDOWN) {
 				mask |= EPOLLOUT | EPOLLWRNORM;
 			} else {
@@ -1659,8 +1732,8 @@
 	case TCP_FASTOPEN_KEY:
 	case TCP_FASTOPEN_NO_COOKIE:
 		/* option not supported by SMC */
-		if (sk->sk_state == SMC_INIT) {
-			smc->use_fallback = true;
+		if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
+			smc_switch_to_fallback(smc);
 			smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
 		} else {
 			if (!smc->use_fallback)
@@ -1668,14 +1741,18 @@
 		}
 		break;
 	case TCP_NODELAY:
-		if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
+		if (sk->sk_state != SMC_INIT &&
+		    sk->sk_state != SMC_LISTEN &&
+		    sk->sk_state != SMC_CLOSED) {
 			if (val && !smc->use_fallback)
 				mod_delayed_work(system_wq, &smc->conn.tx_work,
 						 0);
 		}
 		break;
 	case TCP_CORK:
-		if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
+		if (sk->sk_state != SMC_INIT &&
+		    sk->sk_state != SMC_LISTEN &&
+		    sk->sk_state != SMC_CLOSED) {
 			if (!val && !smc->use_fallback)
 				mod_delayed_work(system_wq, &smc->conn.tx_work,
 						 0);
@@ -1824,7 +1901,11 @@
 
 	smc = smc_sk(sk);
 	lock_sock(sk);
-
+	if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
+		/* socket was connected before, no more data to read */
+		rc = 0;
+		goto out;
+	}
 	if (sk->sk_state == SMC_INIT ||
 	    sk->sk_state == SMC_LISTEN ||
 	    sk->sk_state == SMC_CLOSED)
@@ -1923,14 +2004,37 @@
 	.create	= smc_create,
 };
 
+unsigned int smc_net_id;
+
+static __net_init int smc_net_init(struct net *net)
+{
+	return smc_pnet_net_init(net);
+}
+
+static void __net_exit smc_net_exit(struct net *net)
+{
+	smc_pnet_net_exit(net);
+}
+
+static struct pernet_operations smc_net_ops = {
+	.init = smc_net_init,
+	.exit = smc_net_exit,
+	.id   = &smc_net_id,
+	.size = sizeof(struct smc_net),
+};
+
 static int __init smc_init(void)
 {
 	int rc;
 
-	rc = smc_pnet_init();
+	rc = register_pernet_subsys(&smc_net_ops);
 	if (rc)
 		return rc;
 
+	rc = smc_pnet_init();
+	if (rc)
+		goto out_pernet_subsys;
+
 	rc = smc_llc_init();
 	if (rc) {
 		pr_err("%s: smc_llc_init fails with %d\n", __func__, rc);
@@ -1980,6 +2084,9 @@
 	proto_unregister(&smc_proto);
 out_pnet:
 	smc_pnet_exit();
+out_pernet_subsys:
+	unregister_pernet_subsys(&smc_net_ops);
+
 	return rc;
 }
 
@@ -1992,6 +2099,7 @@
 	proto_unregister(&smc_proto6);
 	proto_unregister(&smc_proto);
 	smc_pnet_exit();
+	unregister_pernet_subsys(&smc_net_ops);
 }
 
 module_init(smc_init);

diff --git a/net/smc/smc.h b/net/smc/smc.h
index 08786ac..878313f 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h

@@ -113,9 +113,9 @@
 } __aligned(8);
 
 enum smc_urg_state {
-	SMC_URG_VALID,			/* data present */
-	SMC_URG_NOTYET,			/* data pending */
-	SMC_URG_READ			/* data was already read */
+	SMC_URG_VALID	= 1,			/* data present */
+	SMC_URG_NOTYET	= 2,			/* data pending */
+	SMC_URG_READ	= 3,			/* data was already read */
 };
 
 struct smc_connection {
@@ -190,18 +190,11 @@
 	u64			peer_token;	/* SMC-D token of peer */
 };
 
-struct smc_connect_info {
-	int			flags;
-	int			alen;
-	struct sockaddr		addr;
-};
-
 struct smc_sock {				/* smc sock container */
 	struct sock		sk;
 	struct socket		*clcsock;	/* internal tcp socket */
 	struct smc_connection	conn;		/* smc connection */
 	struct smc_sock		*listen_smc;	/* listen parent */
-	struct smc_connect_info *connect_info;	/* connect address & flags */
 	struct work_struct	connect_work;	/* handle non-blocking connect*/
 	struct work_struct	tcp_listen_work;/* handle tcp socket accepts */
 	struct work_struct	smc_listen_work;/* prepare new accept socket */
@@ -219,6 +212,14 @@
 						 * started, waiting for unsent
 						 * data to be sent
 						 */
+	u8			connect_nonblock : 1;
+						/* non-blocking connect in
+						 * flight
+						 */
+	struct mutex            clcsock_release_lock;
+						/* protects clcsock of a listen
+						 * socket
+						 * */
 };
 
 static inline struct smc_sock *smc_sk(const struct sock *sk)

diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index ed5dcf0..d0b0f4c 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c

@@ -21,13 +21,6 @@
 
 /********************************** send *************************************/
 
-struct smc_cdc_tx_pend {
-	struct smc_connection	*conn;		/* socket connection */
-	union smc_host_cursor	cursor;	/* tx sndbuf cursor sent */
-	union smc_host_cursor	p_cursor;	/* rx RMBE cursor produced */
-	u16			ctrl_seq;	/* conn. tx sequence # */
-};
-
 /* handler for send/transmission completion of a CDC msg */
 static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
 			       struct smc_link *link,
@@ -61,12 +54,14 @@
 
 int smc_cdc_get_free_slot(struct smc_connection *conn,
 			  struct smc_wr_buf **wr_buf,
+			  struct smc_rdma_wr **wr_rdma_buf,
 			  struct smc_cdc_tx_pend **pend)
 {
 	struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
 	int rc;
 
 	rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
+				     wr_rdma_buf,
 				     (struct smc_wr_tx_pend_priv **)pend);
 	if (!conn->alert_token_local)
 		/* abnormal termination */
@@ -81,7 +76,7 @@
 		sizeof(struct smc_cdc_msg) > SMC_WR_BUF_SIZE,
 		"must increase SMC_WR_BUF_SIZE to at least sizeof(struct smc_cdc_msg)");
 	BUILD_BUG_ON_MSG(
-		sizeof(struct smc_cdc_msg) != SMC_WR_TX_SIZE,
+		offsetofend(struct smc_cdc_msg, reserved) > SMC_WR_TX_SIZE,
 		"must adapt SMC_WR_TX_SIZE to sizeof(struct smc_cdc_msg); if not all smc_wr upper layer protocols use the same message size any more, must start to set link->wr_tx_sges[i].length on each individual smc_wr_tx_send()");
 	BUILD_BUG_ON_MSG(
 		sizeof(struct smc_cdc_tx_pend) > SMC_WR_TX_PEND_PRIV_SIZE,
@@ -96,6 +91,7 @@
 		     struct smc_wr_buf *wr_buf,
 		     struct smc_cdc_tx_pend *pend)
 {
+	union smc_host_cursor cfed;
 	struct smc_link *link;
 	int rc;
 
@@ -105,12 +101,12 @@
 
 	conn->tx_cdc_seq++;
 	conn->local_tx_ctrl.seqno = conn->tx_cdc_seq;
-	smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf,
-			    &conn->local_tx_ctrl, conn);
+	smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, conn, &cfed);
 	rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend);
-	if (!rc)
-		smc_curs_copy(&conn->rx_curs_confirmed,
-			      &conn->local_tx_ctrl.cons, conn);
+	if (!rc) {
+		smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn);
+		conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0;
+	}
 
 	return rc;
 }
@@ -121,11 +117,14 @@
 	struct smc_wr_buf *wr_buf;
 	int rc;
 
-	rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
+	rc = smc_cdc_get_free_slot(conn, &wr_buf, NULL, &pend);
 	if (rc)
 		return rc;
 
-	return smc_cdc_msg_send(conn, wr_buf, pend);
+	spin_lock_bh(&conn->send_lock);
+	rc = smc_cdc_msg_send(conn, wr_buf, pend);
+	spin_unlock_bh(&conn->send_lock);
+	return rc;
 }
 
 int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
@@ -177,23 +176,25 @@
 int smcd_cdc_msg_send(struct smc_connection *conn)
 {
 	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+	union smc_host_cursor curs;
 	struct smcd_cdc_msg cdc;
 	int rc, diff;
 
 	memset(&cdc, 0, sizeof(cdc));
 	cdc.common.type = SMC_CDC_MSG_TYPE;
-	cdc.prod_wrap = conn->local_tx_ctrl.prod.wrap;
-	cdc.prod_count = conn->local_tx_ctrl.prod.count;
-
-	cdc.cons_wrap = conn->local_tx_ctrl.cons.wrap;
-	cdc.cons_count = conn->local_tx_ctrl.cons.count;
-	cdc.prod_flags = conn->local_tx_ctrl.prod_flags;
-	cdc.conn_state_flags = conn->local_tx_ctrl.conn_state_flags;
+	curs.acurs.counter = atomic64_read(&conn->local_tx_ctrl.prod.acurs);
+	cdc.prod.wrap = curs.wrap;
+	cdc.prod.count = curs.count;
+	curs.acurs.counter = atomic64_read(&conn->local_tx_ctrl.cons.acurs);
+	cdc.cons.wrap = curs.wrap;
+	cdc.cons.count = curs.count;
+	cdc.cons.prod_flags = conn->local_tx_ctrl.prod_flags;
+	cdc.cons.conn_state_flags = conn->local_tx_ctrl.conn_state_flags;
 	rc = smcd_tx_ism_write(conn, &cdc, sizeof(cdc), 0, 1);
 	if (rc)
 		return rc;
-	smc_curs_copy(&conn->rx_curs_confirmed, &conn->local_tx_ctrl.cons,
-		      conn);
+	smc_curs_copy(&conn->rx_curs_confirmed, &curs, conn);
+	conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0;
 	/* Calculate transmitted data and increment free send buffer space */
 	diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin,
 			     &conn->tx_curs_sent);
@@ -270,26 +271,18 @@
 		smp_mb__after_atomic();
 		smc->sk.sk_data_ready(&smc->sk);
 	} else {
-		if (conn->local_rx_ctrl.prod_flags.write_blocked ||
-		    conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
-		    conn->local_rx_ctrl.prod_flags.urg_data_pending) {
-			if (conn->local_rx_ctrl.prod_flags.urg_data_pending)
-				conn->urg_state = SMC_URG_NOTYET;
-			/* force immediate tx of current consumer cursor, but
-			 * under send_lock to guarantee arrival in seqno-order
-			 */
-			if (smc->sk.sk_state != SMC_INIT)
-				smc_tx_sndbuf_nonempty(conn);
-		}
+		if (conn->local_rx_ctrl.prod_flags.write_blocked)
+			smc->sk.sk_data_ready(&smc->sk);
+		if (conn->local_rx_ctrl.prod_flags.urg_data_pending)
+			conn->urg_state = SMC_URG_NOTYET;
 	}
 
-	/* piggy backed tx info */
 	/* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
-	if (diff_cons && smc_tx_prepared_sends(conn)) {
+	if ((diff_cons && smc_tx_prepared_sends(conn)) ||
+	    conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
+	    conn->local_rx_ctrl.prod_flags.urg_data_pending)
 		smc_tx_sndbuf_nonempty(conn);
-		/* trigger socket release if connection closed */
-		smc_close_wake_tx_prepared(smc);
-	}
+
 	if (diff_cons && conn->urg_tx_pend &&
 	    atomic_read(&conn->peer_rmbe_space) == conn->peer_rmbe_size) {
 		/* urg data confirmed by peer, indicate we're ready for more */
@@ -331,13 +324,16 @@
 static void smcd_cdc_rx_tsklet(unsigned long data)
 {
 	struct smc_connection *conn = (struct smc_connection *)data;
+	struct smcd_cdc_msg *data_cdc;
 	struct smcd_cdc_msg cdc;
 	struct smc_sock *smc;
 
 	if (!conn)
 		return;
 
-	memcpy(&cdc, conn->rmb_desc->cpu_addr, sizeof(cdc));
+	data_cdc = (struct smcd_cdc_msg *)conn->rmb_desc->cpu_addr;
+	smcd_curs_copy(&cdc.prod, &data_cdc->prod, conn);
+	smcd_curs_copy(&cdc.cons, &data_cdc->cons, conn);
 	smc = container_of(conn, struct smc_sock, conn);
 	smc_cdc_msg_recv(smc, (struct smc_cdc_msg *)&cdc);
 }

diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 934df44..861dc24 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h

@@ -48,21 +48,31 @@
 	struct smc_cdc_producer_flags	prod_flags;
 	struct smc_cdc_conn_state_flags	conn_state_flags;
 	u8				reserved[18];
-} __packed;					/* format defined in RFC7609 */
+};
+
+/* SMC-D cursor format */
+union smcd_cdc_cursor {
+	struct {
+		u16	wrap;
+		u32	count;
+		struct smc_cdc_producer_flags	prod_flags;
+		struct smc_cdc_conn_state_flags	conn_state_flags;
+	} __packed;
+#ifdef KERNEL_HAS_ATOMIC64
+	atomic64_t		acurs;		/* for atomic processing */
+#else
+	u64			acurs;		/* for atomic processing */
+#endif
+} __aligned(8);
 
 /* CDC message for SMC-D */
 struct smcd_cdc_msg {
 	struct smc_wr_rx_hdr common;	/* Type = 0xFE */
 	u8 res1[7];
-	u16 prod_wrap;
-	u32 prod_count;
-	u8 res2[2];
-	u16 cons_wrap;
-	u32 cons_count;
-	struct smc_cdc_producer_flags	prod_flags;
-	struct smc_cdc_conn_state_flags conn_state_flags;
+	union smcd_cdc_cursor	prod;
+	union smcd_cdc_cursor	cons;
 	u8 res3[8];
-} __packed;
+} __aligned(8);
 
 static inline bool smc_cdc_rxed_any_close(struct smc_connection *conn)
 {
@@ -135,7 +145,24 @@
 #endif
 }
 
-/* calculate cursor difference between old and new, where old <= new */
+static inline void smcd_curs_copy(union smcd_cdc_cursor *tgt,
+				  union smcd_cdc_cursor *src,
+				  struct smc_connection *conn)
+{
+#ifndef KERNEL_HAS_ATOMIC64
+	unsigned long flags;
+
+	spin_lock_irqsave(&conn->acurs_lock, flags);
+	tgt->acurs = src->acurs;
+	spin_unlock_irqrestore(&conn->acurs_lock, flags);
+#else
+	atomic64_set(&tgt->acurs, atomic64_read(&src->acurs));
+#endif
+}
+
+/* calculate cursor difference between old and new, where old <= new and
+ * difference cannot exceed size
+ */
 static inline int smc_curs_diff(unsigned int size,
 				union smc_host_cursor *old,
 				union smc_host_cursor *new)
@@ -160,28 +187,51 @@
 	return smc_curs_diff(size, old, new);
 }
 
+/* calculate cursor difference between old and new, where old <= new and
+ * difference may exceed size
+ */
+static inline int smc_curs_diff_large(unsigned int size,
+				      union smc_host_cursor *old,
+				      union smc_host_cursor *new)
+{
+	if (old->wrap < new->wrap)
+		return min_t(int,
+			     (size - old->count) + new->count +
+			     (new->wrap - old->wrap - 1) * size,
+			     size);
+
+	if (old->wrap > new->wrap) /* wrap has switched from 0xffff to 0x0000 */
+		return min_t(int,
+			     (size - old->count) + new->count +
+			     (new->wrap + 0xffff - old->wrap) * size,
+			     size);
+
+	return max_t(int, 0, (new->count - old->count));
+}
+
 static inline void smc_host_cursor_to_cdc(union smc_cdc_cursor *peer,
 					  union smc_host_cursor *local,
+					  union smc_host_cursor *save,
 					  struct smc_connection *conn)
 {
-	union smc_host_cursor temp;
-
-	smc_curs_copy(&temp, local, conn);
-	peer->count = htonl(temp.count);
-	peer->wrap = htons(temp.wrap);
+	smc_curs_copy(save, local, conn);
+	peer->count = htonl(save->count);
+	peer->wrap = htons(save->wrap);
 	/* peer->reserved = htons(0); must be ensured by caller */
 }
 
 static inline void smc_host_msg_to_cdc(struct smc_cdc_msg *peer,
-				       struct smc_host_cdc_msg *local,
-				       struct smc_connection *conn)
+				       struct smc_connection *conn,
+				       union smc_host_cursor *save)
 {
+	struct smc_host_cdc_msg *local = &conn->local_tx_ctrl;
+
 	peer->common.type = local->common.type;
 	peer->len = local->len;
 	peer->seqno = htons(local->seqno);
 	peer->token = htonl(local->token);
-	smc_host_cursor_to_cdc(&peer->prod, &local->prod, conn);
-	smc_host_cursor_to_cdc(&peer->cons, &local->cons, conn);
+	smc_host_cursor_to_cdc(&peer->prod, &local->prod, save, conn);
+	smc_host_cursor_to_cdc(&peer->cons, &local->cons, save, conn);
 	peer->prod_flags = local->prod_flags;
 	peer->conn_state_flags = local->conn_state_flags;
 }
@@ -220,14 +270,20 @@
 }
 
 static inline void smcd_cdc_msg_to_host(struct smc_host_cdc_msg *local,
-					struct smcd_cdc_msg *peer)
+					struct smcd_cdc_msg *peer,
+					struct smc_connection *conn)
 {
-	local->prod.wrap = peer->prod_wrap;
-	local->prod.count = peer->prod_count;
-	local->cons.wrap = peer->cons_wrap;
-	local->cons.count = peer->cons_count;
-	local->prod_flags = peer->prod_flags;
-	local->conn_state_flags = peer->conn_state_flags;
+	union smc_host_cursor temp;
+
+	temp.wrap = peer->prod.wrap;
+	temp.count = peer->prod.count;
+	smc_curs_copy(&local->prod, &temp, conn);
+
+	temp.wrap = peer->cons.wrap;
+	temp.count = peer->cons.count;
+	smc_curs_copy(&local->cons, &temp, conn);
+	local->prod_flags = peer->cons.prod_flags;
+	local->conn_state_flags = peer->cons.conn_state_flags;
 }
 
 static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
@@ -235,15 +291,21 @@
 				       struct smc_connection *conn)
 {
 	if (conn->lgr->is_smcd)
-		smcd_cdc_msg_to_host(local, (struct smcd_cdc_msg *)peer);
+		smcd_cdc_msg_to_host(local, (struct smcd_cdc_msg *)peer, conn);
 	else
 		smcr_cdc_msg_to_host(local, peer, conn);
 }
 
-struct smc_cdc_tx_pend;
+struct smc_cdc_tx_pend {
+	struct smc_connection	*conn;		/* socket connection */
+	union smc_host_cursor	cursor;		/* tx sndbuf cursor sent */
+	union smc_host_cursor	p_cursor;	/* rx RMBE cursor produced */
+	u16			ctrl_seq;	/* conn. tx sequence # */
+};
 
 int smc_cdc_get_free_slot(struct smc_connection *conn,
 			  struct smc_wr_buf **wr_buf,
+			  struct smc_rdma_wr **wr_rdma_buf,
 			  struct smc_cdc_tx_pend **pend);
 void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
 int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,

diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 52241d6..49bcebf 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c

@@ -97,17 +97,19 @@
 				 struct smc_clc_msg_proposal_prefix *prop)
 {
 	struct in_device *in_dev = __in_dev_get_rcu(dst->dev);
+	const struct in_ifaddr *ifa;
 
 	if (!in_dev)
 		return -ENODEV;
-	for_ifa(in_dev) {
+
+	in_dev_for_each_ifa_rcu(ifa, in_dev) {
 		if (!inet_ifa_match(ipv4, ifa))
 			continue;
 		prop->prefix_len = inet_mask_len(ifa->ifa_mask);
 		prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask;
 		/* prop->ipv6_prefixes_cnt = 0; already done by memset before */
 		return 0;
-	} endfor_ifa(in_dev);
+	}
 	return -ENOENT;
 }
 
@@ -190,14 +192,15 @@
 				   struct smc_clc_msg_proposal_prefix *prop)
 {
 	struct in_device *in_dev = __in_dev_get_rcu(dev);
+	const struct in_ifaddr *ifa;
 
 	if (!in_dev)
 		return -ENODEV;
-	for_ifa(in_dev) {
+	in_dev_for_each_ifa_rcu(ifa, in_dev) {
 		if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) &&
 		    inet_ifa_match(prop->outgoing_subnet, ifa))
 			return 0;
-	} endfor_ifa(in_dev);
+	}
 
 	return -ENOENT;
 }
@@ -265,7 +268,7 @@
  * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise.
  */
 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
-		     u8 expected_type)
+		     u8 expected_type, unsigned long timeout)
 {
 	long rcvtimeo = smc->clcsock->sk->sk_rcvtimeo;
 	struct sock *clc_sk = smc->clcsock->sk;
@@ -285,8 +288,8 @@
 	 * sizeof(struct smc_clc_msg_hdr)
 	 */
 	krflags = MSG_PEEK | MSG_WAITALL;
-	smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1,
+	clc_sk->sk_rcvtimeo = timeout;
+	iov_iter_kvec(&msg.msg_iter, READ, &vec, 1,
 			sizeof(struct smc_clc_msg_hdr));
 	len = sock_recvmsg(smc->clcsock, &msg, krflags);
 	if (signal_pending(current)) {
@@ -297,7 +300,11 @@
 	}
 	if (clc_sk->sk_err) {
 		reason_code = -clc_sk->sk_err;
-		smc->sk.sk_err = clc_sk->sk_err;
+		if (clc_sk->sk_err == EAGAIN &&
+		    expected_type == SMC_CLC_DECLINE)
+			clc_sk->sk_err = 0; /* reset for fallback usage */
+		else
+			smc->sk.sk_err = clc_sk->sk_err;
 		goto out;
 	}
 	if (!len) { /* peer has performed orderly shutdown */
@@ -306,7 +313,8 @@
 		goto out;
 	}
 	if (len < 0) {
-		smc->sk.sk_err = -len;
+		if (len != -EAGAIN || expected_type != SMC_CLC_DECLINE)
+			smc->sk.sk_err = -len;
 		reason_code = len;
 		goto out;
 	}
@@ -325,7 +333,7 @@
 
 	/* receive the complete CLC message */
 	memset(&msg, 0, sizeof(struct msghdr));
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, datlen);
+	iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, datlen);
 	krflags = MSG_WAITALL;
 	len = sock_recvmsg(smc->clcsock, &msg, krflags);
 	if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) {
@@ -346,7 +354,7 @@
 	}
 
 out:
-	smc->clcsock->sk->sk_rcvtimeo = rcvtimeo;
+	clc_sk->sk_rcvtimeo = rcvtimeo;
 	return reason_code;
 }
 
@@ -373,17 +381,14 @@
 	vec.iov_len = sizeof(struct smc_clc_msg_decline);
 	len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
 			     sizeof(struct smc_clc_msg_decline));
-	if (len < sizeof(struct smc_clc_msg_decline))
-		smc->sk.sk_err = EPROTO;
-	if (len < 0)
-		smc->sk.sk_err = -len;
-	return sock_error(&smc->sk);
+	if (len < 0 || len < sizeof(struct smc_clc_msg_decline))
+		len = -EPROTO;
+	return len > 0 ? 0 : len;
 }
 
 /* send CLC PROPOSAL message across internal TCP socket */
 int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
-			  struct smc_ib_device *ibdev, u8 ibport, u8 gid[],
-			  struct smcd_dev *ismdev)
+			  struct smc_init_info *ini)
 {
 	struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX];
 	struct smc_clc_msg_proposal_prefix pclc_prfx;
@@ -413,8 +418,9 @@
 		/* add SMC-R specifics */
 		memcpy(pclc.lcl.id_for_peer, local_systemid,
 		       sizeof(local_systemid));
-		memcpy(&pclc.lcl.gid, gid, SMC_GID_SIZE);
-		memcpy(&pclc.lcl.mac, &ibdev->mac[ibport - 1], ETH_ALEN);
+		memcpy(&pclc.lcl.gid, ini->ib_gid, SMC_GID_SIZE);
+		memcpy(&pclc.lcl.mac, &ini->ib_dev->mac[ini->ib_port - 1],
+		       ETH_ALEN);
 		pclc.iparea_offset = htons(0);
 	}
 	if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) {
@@ -422,7 +428,7 @@
 		memset(&pclc_smcd, 0, sizeof(pclc_smcd));
 		plen += sizeof(pclc_smcd);
 		pclc.iparea_offset = htons(SMC_CLC_PROPOSAL_MAX_OFFSET);
-		pclc_smcd.gid = ismdev->local_gid;
+		pclc_smcd.gid = ini->ism_dev->local_gid;
 	}
 	pclc.hdr.length = htons(plen);
 
@@ -536,7 +542,6 @@
 	struct smc_link *link;
 	struct msghdr msg;
 	struct kvec vec;
-	int rc = 0;
 	int len;
 
 	memset(&aclc, 0, sizeof(aclc));
@@ -589,13 +594,8 @@
 	vec.iov_len = ntohs(aclc.hdr.length);
 	len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1,
 			     ntohs(aclc.hdr.length));
-	if (len < ntohs(aclc.hdr.length)) {
-		if (len >= 0)
-			new_smc->sk.sk_err = EPROTO;
-		else
-			new_smc->sk.sk_err = new_smc->clcsock->sk->sk_err;
-		rc = sock_error(&new_smc->sk);
-	}
+	if (len < ntohs(aclc.hdr.length))
+		len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err;
 
-	return rc;
+	return len > 0 ? 0 : len;
 }

diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 18da89b..ca20927 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h

@@ -27,22 +27,29 @@
 #define SMC_TYPE_D		1		/* SMC-D only		      */
 #define SMC_TYPE_B		3		/* SMC-R and SMC-D	      */
 #define CLC_WAIT_TIME		(6 * HZ)	/* max. wait time on clcsock  */
+#define CLC_WAIT_TIME_SHORT	HZ		/* short wait time on clcsock */
 #define SMC_CLC_DECL_MEM	0x01010000  /* insufficient memory resources  */
 #define SMC_CLC_DECL_TIMEOUT_CL	0x02010000  /* timeout w4 QP confirm link     */
 #define SMC_CLC_DECL_TIMEOUT_AL	0x02020000  /* timeout w4 QP add link	      */
 #define SMC_CLC_DECL_CNFERR	0x03000000  /* configuration error            */
 #define SMC_CLC_DECL_PEERNOSMC	0x03010000  /* peer did not indicate SMC      */
 #define SMC_CLC_DECL_IPSEC	0x03020000  /* IPsec usage		      */
-#define SMC_CLC_DECL_NOSMCDEV	0x03030000  /* no SMC device found	      */
+#define SMC_CLC_DECL_NOSMCDEV	0x03030000  /* no SMC device found (R or D)   */
+#define SMC_CLC_DECL_NOSMCDDEV	0x03030001  /* no SMC-D device found	      */
+#define SMC_CLC_DECL_NOSMCRDEV	0x03030002  /* no SMC-R device found	      */
+#define SMC_CLC_DECL_SMCDNOTALK	0x03030003  /* SMC-D dev can't talk to peer   */
 #define SMC_CLC_DECL_MODEUNSUPP	0x03040000  /* smc modes do not match (R or D)*/
 #define SMC_CLC_DECL_RMBE_EC	0x03050000  /* peer has eyecatcher in RMBE    */
 #define SMC_CLC_DECL_OPTUNSUPP	0x03060000  /* fastopen sockopt not supported */
+#define SMC_CLC_DECL_DIFFPREFIX	0x03070000  /* IP prefix / subnet mismatch    */
+#define SMC_CLC_DECL_GETVLANERR	0x03080000  /* err to get vlan id of ip device*/
+#define SMC_CLC_DECL_ISMVLANERR	0x03090000  /* err to reg vlan id on ism dev  */
 #define SMC_CLC_DECL_SYNCERR	0x04000000  /* synchronization error          */
 #define SMC_CLC_DECL_PEERDECL	0x05000000  /* peer declined during handshake */
-#define SMC_CLC_DECL_INTERR	0x99990000  /* internal error                 */
-#define SMC_CLC_DECL_ERR_RTOK	0x99990001  /*	 rtoken handling failed       */
-#define SMC_CLC_DECL_ERR_RDYLNK	0x99990002  /*	 ib ready link failed	      */
-#define SMC_CLC_DECL_ERR_REGRMB	0x99990003  /*	 reg rmb failed		      */
+#define SMC_CLC_DECL_INTERR	0x09990000  /* internal error		      */
+#define SMC_CLC_DECL_ERR_RTOK	0x09990001  /*	 rtoken handling failed       */
+#define SMC_CLC_DECL_ERR_RDYLNK	0x09990002  /*	 ib ready link failed	      */
+#define SMC_CLC_DECL_ERR_REGRMB	0x09990003  /*	 reg rmb failed		      */
 
 struct smc_clc_msg_hdr {	/* header1 of clc messages */
 	u8 eyecatcher[4];	/* eye catcher */
@@ -178,15 +185,15 @@
 }
 
 struct smcd_dev;
+struct smc_init_info;
 
 int smc_clc_prfx_match(struct socket *clcsock,
 		       struct smc_clc_msg_proposal_prefix *prop);
 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
-		     u8 expected_type);
+		     u8 expected_type, unsigned long timeout);
 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
 int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
-			  struct smc_ib_device *smcibdev, u8 ibport, u8 gid[],
-			  struct smcd_dev *ismdev);
+			  struct smc_init_info *ini);
 int smc_clc_send_confirm(struct smc_sock *smc);
 int smc_clc_send_accept(struct smc_sock *smc, int srv_first_contact);
 

diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index ea2b87f..fc06720 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c

@@ -21,6 +21,22 @@
 
 #define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME	(5 * HZ)
 
+/* release the clcsock that is assigned to the smc_sock */
+void smc_clcsock_release(struct smc_sock *smc)
+{
+	struct socket *tcp;
+
+	if (smc->listen_smc && current_work() != &smc->smc_listen_work)
+		cancel_work_sync(&smc->smc_listen_work);
+	mutex_lock(&smc->clcsock_release_lock);
+	if (smc->clcsock) {
+		tcp = smc->clcsock;
+		smc->clcsock = NULL;
+		sock_release(tcp);
+	}
+	mutex_unlock(&smc->clcsock_release_lock);
+}
+
 static void smc_close_cleanup_listen(struct sock *parent)
 {
 	struct sock *sk;
@@ -321,6 +337,7 @@
 						   close_work);
 	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
 	struct smc_cdc_conn_state_flags *rxflags;
+	bool release_clcsock = false;
 	struct sock *sk = &smc->sk;
 	int old_state;
 
@@ -345,14 +362,7 @@
 
 	switch (sk->sk_state) {
 	case SMC_INIT:
-		if (atomic_read(&conn->bytes_to_rcv) ||
-		    (rxflags->peer_done_writing &&
-		     !smc_cdc_rxed_any_close(conn))) {
-			sk->sk_state = SMC_APPCLOSEWAIT1;
-		} else {
-			sk->sk_state = SMC_CLOSED;
-			sock_put(sk); /* passive closing */
-		}
+		sk->sk_state = SMC_APPCLOSEWAIT1;
 		break;
 	case SMC_ACTIVE:
 		sk->sk_state = SMC_APPCLOSEWAIT1;
@@ -405,10 +415,15 @@
 	if (old_state != sk->sk_state) {
 		sk->sk_state_change(sk);
 		if ((sk->sk_state == SMC_CLOSED) &&
-		    (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket))
+		    (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
 			smc_conn_free(conn);
+			if (smc->clcsock)
+				release_clcsock = true;
+		}
 	}
 	release_sock(sk);
+	if (release_clcsock)
+		smc_clcsock_release(smc);
 	sock_put(sk); /* sock_hold done by schedulers of close_work */
 }
 

diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h
index 19eb6a2..e0e3b5d 100644
--- a/net/smc/smc_close.h
+++ b/net/smc/smc_close.h

@@ -23,5 +23,6 @@
 int smc_close_active(struct smc_sock *smc);
 int smc_close_shutdown_write(struct smc_sock *smc);
 void smc_close_init(struct smc_sock *smc);
+void smc_clcsock_release(struct smc_sock *smc);
 
 #endif /* SMC_CLOSE_H */

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 18daebc..2ba97ff 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c

@@ -118,7 +118,6 @@
 	rb_erase(&conn->alert_node, &lgr->conns_all);
 	lgr->conns_num--;
 	conn->alert_token_local = 0;
-	conn->lgr = NULL;
 	sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
 }
 
@@ -128,6 +127,8 @@
 {
 	struct smc_link_group *lgr = conn->lgr;
 
+	if (!lgr)
+		return;
 	write_lock_bh(&lgr->conns_lock);
 	if (conn->alert_token_local) {
 		__smc_lgr_unregister_conn(conn);
@@ -149,6 +150,8 @@
 	return -ENOTCONN;
 }
 
+static void smc_lgr_free(struct smc_link_group *lgr);
+
 static void smc_lgr_free_work(struct work_struct *work)
 {
 	struct smc_link_group *lgr = container_of(to_delayed_work(work),
@@ -157,8 +160,6 @@
 	bool conns;
 
 	spin_lock_bh(&smc_lgr_list.lock);
-	if (list_empty(&lgr->list))
-		goto free;
 	read_lock_bh(&lgr->conns_lock);
 	conns = RB_EMPTY_ROOT(&lgr->conns_all);
 	read_unlock_bh(&lgr->conns_lock);
@@ -166,13 +167,16 @@
 		spin_unlock_bh(&smc_lgr_list.lock);
 		return;
 	}
-	list_del_init(&lgr->list); /* remove from smc_lgr_list */
-free:
+	if (!list_empty(&lgr->list))
+		list_del_init(&lgr->list); /* remove from smc_lgr_list */
 	spin_unlock_bh(&smc_lgr_list.lock);
 
 	if (!lgr->is_smcd && !lgr->terminating)	{
+		struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+
 		/* try to send del link msg, on error free lgr immediately */
-		if (!smc_link_send_delete(&lgr->lnk[SMC_SINGLE_LINK])) {
+		if (lnk->state == SMC_LNK_ACTIVE &&
+		    !smc_link_send_delete(lnk)) {
 			/* reschedule in case we never receive a response */
 			smc_lgr_schedule_free_work(lgr);
 			return;
@@ -184,15 +188,14 @@
 
 		if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
 			smc_llc_link_inactive(lnk);
+		if (lgr->is_smcd)
+			smc_ism_signal_shutdown(lgr);
 		smc_lgr_free(lgr);
 	}
 }
 
 /* create a new SMC link group */
-static int smc_lgr_create(struct smc_sock *smc, bool is_smcd,
-			  struct smc_ib_device *smcibdev, u8 ibport,
-			  char *peer_systemid, unsigned short vlan_id,
-			  struct smcd_dev *smcismdev, u64 peer_gid)
+static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
 {
 	struct smc_link_group *lgr;
 	struct smc_link *lnk;
@@ -200,20 +203,21 @@
 	int rc = 0;
 	int i;
 
-	if (is_smcd && vlan_id) {
-		rc = smc_ism_get_vlan(smcismdev, vlan_id);
-		if (rc)
+	if (ini->is_smcd && ini->vlan_id) {
+		if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
+			rc = SMC_CLC_DECL_ISMVLANERR;
 			goto out;
+		}
 	}
 
 	lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
 	if (!lgr) {
-		rc = -ENOMEM;
-		goto out;
+		rc = SMC_CLC_DECL_MEM;
+		goto ism_put_vlan;
 	}
-	lgr->is_smcd = is_smcd;
+	lgr->is_smcd = ini->is_smcd;
 	lgr->sync_err = 0;
-	lgr->vlan_id = vlan_id;
+	lgr->vlan_id = ini->vlan_id;
 	rwlock_init(&lgr->sndbufs_lock);
 	rwlock_init(&lgr->rmbs_lock);
 	rwlock_init(&lgr->conns_lock);
@@ -225,29 +229,32 @@
 	memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
 	INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
 	lgr->conns_all = RB_ROOT;
-	if (is_smcd) {
+	if (ini->is_smcd) {
 		/* SMC-D specific settings */
-		lgr->peer_gid = peer_gid;
-		lgr->smcd = smcismdev;
+		lgr->peer_gid = ini->ism_gid;
+		lgr->smcd = ini->ism_dev;
 	} else {
 		/* SMC-R specific settings */
 		lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
-		memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
+		memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
+		       SMC_SYSTEMID_LEN);
 
 		lnk = &lgr->lnk[SMC_SINGLE_LINK];
 		/* initialize link */
 		lnk->state = SMC_LNK_ACTIVATING;
 		lnk->link_id = SMC_SINGLE_LINK;
-		lnk->smcibdev = smcibdev;
-		lnk->ibport = ibport;
-		lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
-		if (!smcibdev->initialized)
-			smc_ib_setup_per_ibdev(smcibdev);
+		lnk->smcibdev = ini->ib_dev;
+		lnk->ibport = ini->ib_port;
+		lnk->path_mtu =
+			ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
+		if (!ini->ib_dev->initialized)
+			smc_ib_setup_per_ibdev(ini->ib_dev);
 		get_random_bytes(rndvec, sizeof(rndvec));
 		lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
 			(rndvec[2] << 16);
 		rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
-					  vlan_id, lnk->gid, &lnk->sgid_index);
+					  ini->vlan_id, lnk->gid,
+					  &lnk->sgid_index);
 		if (rc)
 			goto free_lgr;
 		rc = smc_llc_link_init(lnk);
@@ -282,7 +289,16 @@
 	smc_llc_link_clear(lnk);
 free_lgr:
 	kfree(lgr);
+ism_put_vlan:
+	if (ini->is_smcd && ini->vlan_id)
+		smc_ism_put_vlan(ini->ism_dev, ini->vlan_id);
 out:
+	if (rc < 0) {
+		if (rc == -ENOMEM)
+			rc = SMC_CLC_DECL_MEM;
+		else
+			rc = SMC_CLC_DECL_INTERR;
+	}
 	return rc;
 }
 
@@ -293,7 +309,12 @@
 		conn->sndbuf_desc->used = 0;
 	if (conn->rmb_desc) {
 		if (!conn->rmb_desc->regerr) {
-			conn->rmb_desc->reused = 1;
+			if (!lgr->is_smcd) {
+				/* unregister rmb with peer */
+				smc_llc_do_delete_rkey(
+						&lgr->lnk[SMC_SINGLE_LINK],
+						conn->rmb_desc);
+			}
 			conn->rmb_desc->used = 0;
 		} else {
 			/* buf registration failed, reuse not possible */
@@ -319,8 +340,9 @@
 	} else {
 		smc_cdc_tx_dismiss_slots(conn);
 	}
-	smc_lgr_unregister_conn(conn);		/* unsets conn->lgr */
+	smc_lgr_unregister_conn(conn);
 	smc_buf_unuse(conn, lgr);		/* allow buffer reuse */
+	conn->lgr = NULL;
 
 	if (!lgr->conns_num)
 		smc_lgr_schedule_free_work(lgr);
@@ -408,7 +430,7 @@
 }
 
 /* remove a link group */
-void smc_lgr_free(struct smc_link_group *lgr)
+static void smc_lgr_free(struct smc_link_group *lgr)
 {
 	smc_lgr_free_bufs(lgr);
 	if (lgr->is_smcd)
@@ -450,6 +472,7 @@
 		sock_hold(&smc->sk); /* sock_put in close work */
 		conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
 		__smc_lgr_unregister_conn(conn);
+		conn->lgr = NULL;
 		write_unlock_bh(&lgr->conns_lock);
 		if (!schedule_work(&conn->close_work))
 			sock_put(&smc->sk);
@@ -485,7 +508,7 @@
 }
 
 /* Called when SMC-D device is terminated or peer is lost */
-void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid)
+void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
 {
 	struct smc_link_group *lgr, *l;
 	LIST_HEAD(lgr_free_list);
@@ -495,7 +518,7 @@
 	list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
 		if (lgr->is_smcd && lgr->smcd == dev &&
 		    (!peer_gid || lgr->peer_gid == peer_gid) &&
-		    !list_empty(&lgr->list)) {
+		    (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
 			__smc_lgr_terminate(lgr);
 			list_move(&lgr->list, &lgr_free_list);
 		}
@@ -506,6 +529,8 @@
 	list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
 		list_del_init(&lgr->list);
 		cancel_delayed_work_sync(&lgr->free_work);
+		if (!peer_gid && vlan == VLAN_VID_MASK) /* dev terminated? */
+			smc_ism_signal_shutdown(lgr);
 		smc_lgr_free(lgr);
 	}
 }
@@ -513,13 +538,13 @@
 /* Determine vlan of internal TCP socket.
  * @vlan_id: address to store the determined vlan id into
  */
-int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
+int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
 {
 	struct dst_entry *dst = sk_dst_get(clcsock->sk);
 	struct net_device *ndev;
 	int i, nest_lvl, rc = 0;
 
-	*vlan_id = 0;
+	ini->vlan_id = 0;
 	if (!dst) {
 		rc = -ENOTCONN;
 		goto out;
@@ -531,12 +556,12 @@
 
 	ndev = dst->dev;
 	if (is_vlan_dev(ndev)) {
-		*vlan_id = vlan_dev_vlan_id(ndev);
+		ini->vlan_id = vlan_dev_vlan_id(ndev);
 		goto out_rel;
 	}
 
 	rtnl_lock();
-	nest_lvl = dev_get_nest_level(ndev);
+	nest_lvl = ndev->lower_level;
 	for (i = 0; i < nest_lvl; i++) {
 		struct list_head *lower = &ndev->adj_list.lower;
 
@@ -545,7 +570,7 @@
 		lower = lower->next;
 		ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
 		if (is_vlan_dev(ndev)) {
-			*vlan_id = vlan_dev_vlan_id(ndev);
+			ini->vlan_id = vlan_dev_vlan_id(ndev);
 			break;
 		}
 	}
@@ -559,7 +584,7 @@
 
 static bool smcr_lgr_match(struct smc_link_group *lgr,
 			   struct smc_clc_msg_local *lcl,
-			   enum smc_lgr_role role)
+			   enum smc_lgr_role role, u32 clcqpn)
 {
 	return !memcmp(lgr->peer_systemid, lcl->id_for_peer,
 		       SMC_SYSTEMID_LEN) &&
@@ -567,7 +592,9 @@
 			SMC_GID_SIZE) &&
 		!memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
 			sizeof(lcl->mac)) &&
-		lgr->role == role;
+		lgr->role == role &&
+		(lgr->role == SMC_SERV ||
+		 lgr->lnk[SMC_SINGLE_LINK].peer_qpn == clcqpn);
 }
 
 static bool smcd_lgr_match(struct smc_link_group *lgr,
@@ -577,24 +604,16 @@
 }
 
 /* create a new SMC connection (and a new link group if necessary) */
-int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
-		    struct smc_ib_device *smcibdev, u8 ibport,
-		    struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
-		    u64 peer_gid)
+int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
 {
 	struct smc_connection *conn = &smc->conn;
-	int local_contact = SMC_FIRST_CONTACT;
 	struct smc_link_group *lgr;
-	unsigned short vlan_id;
 	enum smc_lgr_role role;
 	int rc = 0;
 
+	ini->cln_first_contact = SMC_FIRST_CONTACT;
 	role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
-	rc = smc_vlan_by_tcpsk(smc->clcsock, &vlan_id);
-	if (rc)
-		return rc;
-
-	if ((role == SMC_CLNT) && srv_first_contact)
+	if (role == SMC_CLNT && ini->srv_first_contact)
 		/* create new link group as well */
 		goto create;
 
@@ -602,16 +621,19 @@
 	spin_lock_bh(&smc_lgr_list.lock);
 	list_for_each_entry(lgr, &smc_lgr_list.list, list) {
 		write_lock_bh(&lgr->conns_lock);
-		if ((is_smcd ? smcd_lgr_match(lgr, smcd, peer_gid) :
-		     smcr_lgr_match(lgr, lcl, role)) &&
+		if ((ini->is_smcd ?
+		     smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
+		     smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
 		    !lgr->sync_err &&
-		    lgr->vlan_id == vlan_id &&
+		    lgr->vlan_id == ini->vlan_id &&
 		    (role == SMC_CLNT ||
 		     lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
 			/* link group found */
-			local_contact = SMC_REUSE_CONTACT;
+			ini->cln_first_contact = SMC_REUSE_CONTACT;
 			conn->lgr = lgr;
 			smc_lgr_register_conn(conn); /* add smc conn to lgr */
+			if (delayed_work_pending(&lgr->free_work))
+				cancel_delayed_work(&lgr->free_work);
 			write_unlock_bh(&lgr->conns_lock);
 			break;
 		}
@@ -619,27 +641,29 @@
 	}
 	spin_unlock_bh(&smc_lgr_list.lock);
 
-	if (role == SMC_CLNT && !srv_first_contact &&
-	    (local_contact == SMC_FIRST_CONTACT)) {
+	if (role == SMC_CLNT && !ini->srv_first_contact &&
+	    ini->cln_first_contact == SMC_FIRST_CONTACT) {
 		/* Server reuses a link group, but Client wants to start
 		 * a new one
 		 * send out_of_sync decline, reason synchr. error
 		 */
-		return -ENOLINK;
+		return SMC_CLC_DECL_SYNCERR;
 	}
 
 create:
-	if (local_contact == SMC_FIRST_CONTACT) {
-		rc = smc_lgr_create(smc, is_smcd, smcibdev, ibport,
-				    lcl->id_for_peer, vlan_id, smcd, peer_gid);
+	if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
+		rc = smc_lgr_create(smc, ini);
 		if (rc)
 			goto out;
+		lgr = conn->lgr;
+		write_lock_bh(&lgr->conns_lock);
 		smc_lgr_register_conn(conn); /* add smc conn to lgr */
+		write_unlock_bh(&lgr->conns_lock);
 	}
 	conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
 	conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
 	conn->urg_state = SMC_URG_READ;
-	if (is_smcd) {
+	if (ini->is_smcd) {
 		conn->rx_off = sizeof(struct smcd_cdc_msg);
 		smcd_cdc_rx_init(conn); /* init tasklet for this conn */
 	}
@@ -648,7 +672,7 @@
 #endif
 
 out:
-	return rc ? rc : local_contact;
+	return rc;
 }
 
 /* convert the RMB size into the compressed notation - minimum 16K.
@@ -1024,6 +1048,8 @@
 			smc_llc_link_inactive(lnk);
 		}
 		cancel_delayed_work_sync(&lgr->free_work);
+		if (lgr->is_smcd)
+			smc_ism_signal_shutdown(lgr);
 		smc_lgr_free(lgr); /* free link group */
 	}
 }

diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index c156674..c00ac61 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h

@@ -52,6 +52,24 @@
 	FAILED		/* ib_wr_reg_mr response: failure */
 };
 
+struct smc_rdma_sge {				/* sges for RDMA writes */
+	struct ib_sge		wr_tx_rdma_sge[SMC_IB_MAX_SEND_SGE];
+};
+
+#define SMC_MAX_RDMA_WRITES	2		/* max. # of RDMA writes per
+						 * message send
+						 */
+
+struct smc_rdma_sges {				/* sges per message send */
+	struct smc_rdma_sge	tx_rdma_sge[SMC_MAX_RDMA_WRITES];
+};
+
+struct smc_rdma_wr {				/* work requests per message
+						 * send
+						 */
+	struct ib_rdma_wr	wr_tx_rdma[SMC_MAX_RDMA_WRITES];
+};
+
 struct smc_link {
 	struct smc_ib_device	*smcibdev;	/* ib-device */
 	u8			ibport;		/* port - values 1 | 2 */
@@ -64,6 +82,8 @@
 	struct smc_wr_buf	*wr_tx_bufs;	/* WR send payload buffers */
 	struct ib_send_wr	*wr_tx_ibs;	/* WR send meta data */
 	struct ib_sge		*wr_tx_sges;	/* WR send gather meta data */
+	struct smc_rdma_sges	*wr_tx_rdma_sges;/*RDMA WRITE gather meta data*/
+	struct smc_rdma_wr	*wr_tx_rdmas;	/* WR RDMA WRITE */
 	struct smc_wr_tx_pend	*wr_tx_pends;	/* WR send waiting for CQE */
 	/* above four vectors have wr_tx_cnt elements and use the same index */
 	dma_addr_t		wr_tx_dma_addr;	/* DMA address of wr_tx_bufs */
@@ -109,6 +129,9 @@
 	int			llc_testlink_time; /* testlink interval */
 	struct completion	llc_confirm_rkey; /* wait 4 rx of cnf rkey */
 	int			llc_confirm_rkey_rc; /* rc from cnf rkey msg */
+	struct completion	llc_delete_rkey; /* wait 4 rx of del rkey */
+	int			llc_delete_rkey_rc; /* rc from del rkey msg */
+	struct mutex		llc_delete_rkey_mutex; /* serialize usage */
 };
 
 /* For now we just allow one parallel link per link group. The SMC protocol
@@ -127,7 +150,7 @@
 	struct page		*pages;
 	int			len;		/* length of buffer */
 	u32			used;		/* currently used / unused */
-	u8			reused	: 1;	/* new created / reused */
+	u8			wr_reg	: 1;	/* mem region registered */
 	u8			regerr	: 1;	/* err during registration */
 	union {
 		struct { /* SMC-R */
@@ -206,6 +229,24 @@
 	};
 };
 
+struct smc_clc_msg_local;
+
+struct smc_init_info {
+	u8			is_smcd;
+	unsigned short		vlan_id;
+	int			srv_first_contact;
+	int			cln_first_contact;
+	/* SMC-R */
+	struct smc_clc_msg_local *ib_lcl;
+	struct smc_ib_device	*ib_dev;
+	u8			ib_gid[SMC_GID_SIZE];
+	u8			ib_port;
+	u32			ib_clcqpn;
+	/* SMC-D */
+	u64			ism_gid;
+	struct smcd_dev		*ism_dev;
+};
+
 /* Find the connection associated with the given alert token in the link group.
  * To use rbtrees we have to implement our own search core.
  * Requires @conns_lock
@@ -243,11 +284,11 @@
 struct smc_clc_msg_accept_confirm;
 struct smc_clc_msg_local;
 
-void smc_lgr_free(struct smc_link_group *lgr);
 void smc_lgr_forget(struct smc_link_group *lgr);
 void smc_lgr_terminate(struct smc_link_group *lgr);
 void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
-void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid);
+void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
+			unsigned short vlan);
 int smc_buf_create(struct smc_sock *smc, bool is_smcd);
 int smc_uncompress_bufsize(u8 compressed);
 int smc_rmb_rtoken_handling(struct smc_connection *conn,
@@ -258,13 +299,10 @@
 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
 void smc_rmb_sync_sg_for_device(struct smc_connection *conn);
-int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id);
+int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini);
 
 void smc_conn_free(struct smc_connection *conn);
-int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
-		    struct smc_ib_device *smcibdev, u8 ibport,
-		    struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
-		    u64 peer_gid);
+int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini);
 void smcd_conn_free(struct smc_connection *conn);
 void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr);
 void smc_core_exit(void);

diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index dbf64a9..f38727e 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
@@ -38,6 +39,7 @@
 {
 	struct smc_sock *smc = smc_sk(sk);
 
+	r->diag_family = sk->sk_family;
 	if (!smc->clcsock)
 		return;
 	r->id.idiag_sport = htons(smc->clcsock->sk->sk_num);
@@ -45,14 +47,12 @@
 	r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if;
 	sock_diag_save_cookie(sk, r->id.idiag_cookie);
 	if (sk->sk_protocol == SMCPROTO_SMC) {
-		r->diag_family = PF_INET;
 		memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
 		memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
 		r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr;
 		r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr;
 #if IS_ENABLED(CONFIG_IPV6)
 	} else if (sk->sk_protocol == SMCPROTO_SMC6) {
-		r->diag_family = PF_INET6;
 		memcpy(&r->id.idiag_src, &smc->clcsock->sk->sk_v6_rcv_saddr,
 		       sizeof(smc->clcsock->sk->sk_v6_rcv_saddr));
 		memcpy(&r->id.idiag_dst, &smc->clcsock->sk->sk_v6_daddr,

diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index e519ef2..d14ca4a 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c

@@ -146,18 +146,13 @@
 static int smc_ib_fill_mac(struct smc_ib_device *smcibdev, u8 ibport)
 {
 	const struct ib_gid_attr *attr;
-	int rc = 0;
+	int rc;
 
 	attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, 0);
 	if (IS_ERR(attr))
 		return -ENODEV;
 
-	if (attr->ndev)
-		memcpy(smcibdev->mac[ibport - 1], attr->ndev->dev_addr,
-		       ETH_ALEN);
-	else
-		rc = -ENODEV;
-
+	rc = rdma_read_gid_l2_fields(attr, NULL, smcibdev->mac[ibport - 1]);
 	rdma_put_gid_attr(attr);
 	return rc;
 }
@@ -185,6 +180,7 @@
 			 unsigned short vlan_id, u8 gid[], u8 *sgid_index)
 {
 	const struct ib_gid_attr *attr;
+	const struct net_device *ndev;
 	int i;
 
 	for (i = 0; i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) {
@@ -192,11 +188,14 @@
 		if (IS_ERR(attr))
 			continue;
 
-		if (attr->ndev &&
+		rcu_read_lock();
+		ndev = rdma_read_gid_attr_ndev_rcu(attr);
+		if (!IS_ERR(ndev) &&
 		    ((!vlan_id && !is_vlan_dev(attr->ndev)) ||
 		     (vlan_id && is_vlan_dev(attr->ndev) &&
 		      vlan_dev_vlan_id(attr->ndev) == vlan_id)) &&
 		    attr->gid_type == IB_GID_TYPE_ROCE) {
+			rcu_read_unlock();
 			if (gid)
 				memcpy(gid, &attr->gid, SMC_GID_SIZE);
 			if (sgid_index)
@@ -204,6 +203,7 @@
 			rdma_put_gid_attr(attr);
 			return 0;
 		}
+		rcu_read_unlock();
 		rdma_put_gid_attr(attr);
 	}
 	return -ENODEV;
@@ -257,13 +257,21 @@
 	smcibdev = container_of(handler, struct smc_ib_device, event_handler);
 
 	switch (ibevent->event) {
-	case IB_EVENT_PORT_ERR:
 	case IB_EVENT_DEVICE_FATAL:
-	case IB_EVENT_PORT_ACTIVE:
-		port_idx = ibevent->element.port_num - 1;
-		set_bit(port_idx, &smcibdev->port_event_mask);
+		/* terminate all ports on device */
+		for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++)
+			set_bit(port_idx, &smcibdev->port_event_mask);
 		schedule_work(&smcibdev->port_event_work);
 		break;
+	case IB_EVENT_PORT_ERR:
+	case IB_EVENT_PORT_ACTIVE:
+	case IB_EVENT_GID_CHANGE:
+		port_idx = ibevent->element.port_num - 1;
+		if (port_idx < SMC_MAX_PORTS) {
+			set_bit(port_idx, &smcibdev->port_event_mask);
+			schedule_work(&smcibdev->port_event_work);
+		}
+		break;
 	default:
 		break;
 	}
@@ -289,18 +297,18 @@
 
 static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
 {
-	struct smc_ib_device *smcibdev =
-		(struct smc_ib_device *)ibevent->device;
+	struct smc_link *lnk = (struct smc_link *)priv;
+	struct smc_ib_device *smcibdev = lnk->smcibdev;
 	u8 port_idx;
 
 	switch (ibevent->event) {
-	case IB_EVENT_DEVICE_FATAL:
-	case IB_EVENT_GID_CHANGE:
-	case IB_EVENT_PORT_ERR:
+	case IB_EVENT_QP_FATAL:
 	case IB_EVENT_QP_ACCESS_ERR:
-		port_idx = ibevent->element.port_num - 1;
-		set_bit(port_idx, &smcibdev->port_event_mask);
-		schedule_work(&smcibdev->port_event_work);
+		port_idx = ibevent->element.qp->port - 1;
+		if (port_idx < SMC_MAX_PORTS) {
+			set_bit(port_idx, &smcibdev->port_event_mask);
+			schedule_work(&smcibdev->port_event_work);
+		}
 		break;
 	default:
 		break;
@@ -556,7 +564,6 @@
 	spin_lock(&smc_ib_devices.lock);
 	list_del_init(&smcibdev->list); /* remove from smc_ib_devices */
 	spin_unlock(&smc_ib_devices.lock);
-	smc_pnet_remove_by_ibdev(smcibdev);
 	smc_ib_cleanup_per_ibdev(smcibdev);
 	ib_unregister_event_handler(&smcibdev->event_handler);
 	kfree(smcibdev);

diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index bac7fd6..da60ab9 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h

@@ -42,6 +42,8 @@
 						/* mac address per port*/
 	u8			pnetid[SMC_MAX_PORTS][SMC_MAX_PNETID_LEN];
 						/* pnetid per port */
+	bool			pnetid_by_user[SMC_MAX_PORTS];
+						/* pnetid defined by user? */
 	u8			initialized : 1; /* ib dev CQ, evthdl done */
 	struct work_struct	port_event_work;
 	unsigned long		port_event_mask;

diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index e36f21c..e89e918 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c

@@ -187,22 +187,28 @@
 #define ISM_EVENT_REQUEST		0x0001
 #define ISM_EVENT_RESPONSE		0x0002
 #define ISM_EVENT_REQUEST_IR		0x00000001
+#define ISM_EVENT_CODE_SHUTDOWN		0x80
 #define ISM_EVENT_CODE_TESTLINK		0x83
 
+union smcd_sw_event_info {
+	u64	info;
+	struct {
+		u8		uid[SMC_LGR_ID_SIZE];
+		unsigned short	vlan_id;
+		u16		code;
+	};
+};
+
 static void smcd_handle_sw_event(struct smc_ism_event_work *wrk)
 {
-	union {
-		u64	info;
-		struct {
-			u32		uid;
-			unsigned short	vlanid;
-			u16		code;
-		};
-	} ev_info;
+	union smcd_sw_event_info ev_info;
 
+	ev_info.info = wrk->event.info;
 	switch (wrk->event.code) {
+	case ISM_EVENT_CODE_SHUTDOWN:	/* Peer shut down DMBs */
+		smc_smcd_terminate(wrk->smcd, wrk->event.tok, ev_info.vlan_id);
+		break;
 	case ISM_EVENT_CODE_TESTLINK:	/* Activity timer */
-		ev_info.info = wrk->event.info;
 		if (ev_info.code == ISM_EVENT_REQUEST) {
 			ev_info.code = ISM_EVENT_RESPONSE;
 			wrk->smcd->ops->signal_event(wrk->smcd,
@@ -215,6 +221,21 @@
 	}
 }
 
+int smc_ism_signal_shutdown(struct smc_link_group *lgr)
+{
+	int rc;
+	union smcd_sw_event_info ev_info;
+
+	memcpy(ev_info.uid, lgr->id, SMC_LGR_ID_SIZE);
+	ev_info.vlan_id = lgr->vlan_id;
+	ev_info.code = ISM_EVENT_REQUEST;
+	rc = lgr->smcd->ops->signal_event(lgr->smcd, lgr->peer_gid,
+					  ISM_EVENT_REQUEST_IR,
+					  ISM_EVENT_CODE_SHUTDOWN,
+					  ev_info.info);
+	return rc;
+}
+
 /* worker for SMC-D events */
 static void smc_ism_event_work(struct work_struct *work)
 {
@@ -223,7 +244,7 @@
 
 	switch (wrk->event.type) {
 	case ISM_EVENT_GID:	/* GID event, token is peer GID */
-		smc_smcd_terminate(wrk->smcd, wrk->event.tok);
+		smc_smcd_terminate(wrk->smcd, wrk->event.tok, VLAN_VID_MASK);
 		break;
 	case ISM_EVENT_DMB:
 		break;
@@ -268,6 +289,11 @@
 	INIT_LIST_HEAD(&smcd->vlan);
 	smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)",
 						 WQ_MEM_RECLAIM, name);
+	if (!smcd->event_wq) {
+		kfree(smcd->conn);
+		kfree(smcd);
+		return NULL;
+	}
 	return smcd;
 }
 EXPORT_SYMBOL_GPL(smcd_alloc_dev);
@@ -289,7 +315,7 @@
 	spin_unlock(&smcd_dev_list.lock);
 	flush_workqueue(smcd->event_wq);
 	destroy_workqueue(smcd->event_wq);
-	smc_smcd_terminate(smcd, 0);
+	smc_smcd_terminate(smcd, 0, VLAN_VID_MASK);
 
 	device_del(&smcd->dev);
 }

diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
index aee45b8..4da946c 100644
--- a/net/smc/smc_ism.h
+++ b/net/smc/smc_ism.h

@@ -45,4 +45,5 @@
 int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc);
 int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos,
 		  void *data, size_t len);
+int smc_ism_signal_shutdown(struct smc_link_group *lgr);
 #endif

diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 9c916c7..4fd60c5 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c

@@ -166,7 +166,8 @@
 {
 	int rc;
 
-	rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, pend);
+	rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, NULL,
+				     pend);
 	if (rc < 0)
 		return rc;
 	BUILD_BUG_ON_MSG(
@@ -238,6 +239,29 @@
 	return rc;
 }
 
+/* send LLC delete rkey request */
+static int smc_llc_send_delete_rkey(struct smc_link *link,
+				    struct smc_buf_desc *rmb_desc)
+{
+	struct smc_llc_msg_delete_rkey *rkeyllc;
+	struct smc_wr_tx_pend_priv *pend;
+	struct smc_wr_buf *wr_buf;
+	int rc;
+
+	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+	if (rc)
+		return rc;
+	rkeyllc = (struct smc_llc_msg_delete_rkey *)wr_buf;
+	memset(rkeyllc, 0, sizeof(*rkeyllc));
+	rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY;
+	rkeyllc->hd.length = sizeof(struct smc_llc_msg_delete_rkey);
+	rkeyllc->num_rkeys = 1;
+	rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+	/* send llc message */
+	rc = smc_wr_tx_send(link, pend);
+	return rc;
+}
+
 /* prepare an add link message */
 static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc,
 				  struct smc_link *link, u8 mac[], u8 gid[],
@@ -509,7 +533,9 @@
 	int i, max;
 
 	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
-		/* unused as long as we don't send this type of msg */
+		link->llc_delete_rkey_rc = llc->hd.flags &
+					    SMC_LLC_FLAG_RKEY_NEG;
+		complete(&link->llc_delete_rkey);
 	} else {
 		max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
 		for (i = 0; i < max; i++) {
@@ -610,6 +636,8 @@
 	init_completion(&link->llc_add);
 	init_completion(&link->llc_add_resp);
 	init_completion(&link->llc_confirm_rkey);
+	init_completion(&link->llc_delete_rkey);
+	mutex_init(&link->llc_delete_rkey_mutex);
 	init_completion(&link->llc_testlink_resp);
 	INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work);
 	return 0;
@@ -650,8 +678,11 @@
 {
 	int rc;
 
+	/* protected by mutex smc_create_lgr_pending */
 	reinit_completion(&link->llc_confirm_rkey);
-	smc_llc_send_confirm_rkey(link, rmb_desc);
+	rc = smc_llc_send_confirm_rkey(link, rmb_desc);
+	if (rc)
+		return rc;
 	/* receive CONFIRM RKEY response from server over RoCE fabric */
 	rc = wait_for_completion_interruptible_timeout(&link->llc_confirm_rkey,
 						       SMC_LLC_WAIT_TIME);
@@ -660,6 +691,29 @@
 	return 0;
 }
 
+/* unregister an rtoken at the remote peer */
+int smc_llc_do_delete_rkey(struct smc_link *link,
+			   struct smc_buf_desc *rmb_desc)
+{
+	int rc;
+
+	mutex_lock(&link->llc_delete_rkey_mutex);
+	reinit_completion(&link->llc_delete_rkey);
+	rc = smc_llc_send_delete_rkey(link, rmb_desc);
+	if (rc)
+		goto out;
+	/* receive DELETE RKEY response from server over RoCE fabric */
+	rc = wait_for_completion_interruptible_timeout(&link->llc_delete_rkey,
+						       SMC_LLC_WAIT_TIME);
+	if (rc <= 0 || link->llc_delete_rkey_rc)
+		rc = -EFAULT;
+	else
+		rc = 0;
+out:
+	mutex_unlock(&link->llc_delete_rkey_mutex);
+	return rc;
+}
+
 /***************************** init, exit, misc ******************************/
 
 static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {

diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 9e2ff08..461c0c3 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h

@@ -49,6 +49,8 @@
 void smc_llc_link_clear(struct smc_link *link);
 int smc_llc_do_confirm_rkey(struct smc_link *link,
 			    struct smc_buf_desc *rmb_desc);
+int smc_llc_do_delete_rkey(struct smc_link *link,
+			   struct smc_buf_desc *rmb_desc);
 int smc_llc_init(void) __init;
 
 #endif /* SMC_LLC_H */

diff --git a/net/smc/smc_netns.h b/net/smc/smc_netns.h
new file mode 100644
index 0000000..e7a8fc4
--- /dev/null
+++ b/net/smc/smc_netns.h

@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Shared Memory Communications
+ *
+ * Network namespace definitions.
+ *
+ * Copyright IBM Corp. 2018
+ */
+
+#ifndef SMC_NETNS_H
+#define SMC_NETNS_H
+
+#include "smc_pnet.h"
+
+extern unsigned int smc_net_id;
+
+/* per-network namespace private data */
+struct smc_net {
+	struct smc_pnettable pnettable;
+};
+#endif

diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 7cb3e4f..571e6d8 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c

@@ -20,14 +20,22 @@
 
 #include <rdma/ib_verbs.h>
 
+#include <net/netns/generic.h>
+#include "smc_netns.h"
+
 #include "smc_pnet.h"
 #include "smc_ib.h"
 #include "smc_ism.h"
+#include "smc_core.h"
+
+#define SMC_ASCII_BLANK 32
+
+static struct net_device *pnet_find_base_ndev(struct net_device *ndev);
 
 static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
 	[SMC_PNETID_NAME] = {
 		.type = NLA_NUL_STRING,
-		.len = SMC_MAX_PNETID_LEN - 1
+		.len = SMC_MAX_PNETID_LEN
 	},
 	[SMC_PNETID_ETHNAME] = {
 		.type = NLA_NUL_STRING,
@@ -43,82 +51,107 @@
 static struct genl_family smc_pnet_nl_family;
 
 /**
- * struct smc_pnettable - SMC PNET table anchor
- * @lock: Lock for list action
- * @pnetlist: List of PNETIDs
- */
-static struct smc_pnettable {
-	rwlock_t lock;
-	struct list_head pnetlist;
-} smc_pnettable = {
-	.pnetlist = LIST_HEAD_INIT(smc_pnettable.pnetlist),
-	.lock = __RW_LOCK_UNLOCKED(smc_pnettable.lock)
-};
-
-/**
- * struct smc_pnetentry - pnet identifier name entry
+ * struct smc_user_pnetentry - pnet identifier name entry for/from user
  * @list: List node.
  * @pnet_name: Pnet identifier name
  * @ndev: pointer to network device.
  * @smcibdev: Pointer to IB device.
+ * @ib_port: Port of IB device.
+ * @smcd_dev: Pointer to smcd device.
  */
-struct smc_pnetentry {
+struct smc_user_pnetentry {
 	struct list_head list;
 	char pnet_name[SMC_MAX_PNETID_LEN + 1];
 	struct net_device *ndev;
 	struct smc_ib_device *smcibdev;
 	u8 ib_port;
+	struct smcd_dev *smcd_dev;
 };
 
-/* Check if two RDMA device entries are identical. Use device name and port
- * number for comparison.
- */
-static bool smc_pnet_same_ibname(struct smc_pnetentry *pnetelem, char *ibname,
-				 u8 ibport)
-{
-	return pnetelem->ib_port == ibport &&
-	       !strncmp(pnetelem->smcibdev->ibdev->name, ibname,
-			sizeof(pnetelem->smcibdev->ibdev->name));
-}
+/* pnet entry stored in pnet table */
+struct smc_pnetentry {
+	struct list_head list;
+	char pnet_name[SMC_MAX_PNETID_LEN + 1];
+	struct net_device *ndev;
+};
 
-/* Find a pnetid in the pnet table.
- */
-static struct smc_pnetentry *smc_pnet_find_pnetid(char *pnet_name)
+/* Check if two given pnetids match */
+static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2)
 {
-	struct smc_pnetentry *pnetelem, *found_pnetelem = NULL;
+	int i;
 
-	read_lock(&smc_pnettable.lock);
-	list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
-		if (!strncmp(pnetelem->pnet_name, pnet_name,
-			     sizeof(pnetelem->pnet_name))) {
-			found_pnetelem = pnetelem;
+	for (i = 0; i < SMC_MAX_PNETID_LEN; i++) {
+		if ((pnetid1[i] == 0 || pnetid1[i] == SMC_ASCII_BLANK) &&
+		    (pnetid2[i] == 0 || pnetid2[i] == SMC_ASCII_BLANK))
 			break;
-		}
+		if (pnetid1[i] != pnetid2[i])
+			return false;
 	}
-	read_unlock(&smc_pnettable.lock);
-	return found_pnetelem;
+	return true;
 }
 
 /* Remove a pnetid from the pnet table.
  */
-static int smc_pnet_remove_by_pnetid(char *pnet_name)
+static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
 {
 	struct smc_pnetentry *pnetelem, *tmp_pe;
+	struct smc_pnettable *pnettable;
+	struct smc_ib_device *ibdev;
+	struct smcd_dev *smcd_dev;
+	struct smc_net *sn;
 	int rc = -ENOENT;
+	int ibport;
 
-	write_lock(&smc_pnettable.lock);
-	list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
+	/* get pnettable for namespace */
+	sn = net_generic(net, smc_net_id);
+	pnettable = &sn->pnettable;
+
+	/* remove netdevices */
+	write_lock(&pnettable->lock);
+	list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist,
 				 list) {
-		if (!strncmp(pnetelem->pnet_name, pnet_name,
-			     sizeof(pnetelem->pnet_name))) {
+		if (!pnet_name ||
+		    smc_pnet_match(pnetelem->pnet_name, pnet_name)) {
 			list_del(&pnetelem->list);
 			dev_put(pnetelem->ndev);
 			kfree(pnetelem);
 			rc = 0;
-			break;
 		}
 	}
-	write_unlock(&smc_pnettable.lock);
+	write_unlock(&pnettable->lock);
+
+	/* if this is not the initial namespace, stop here */
+	if (net != &init_net)
+		return rc;
+
+	/* remove ib devices */
+	spin_lock(&smc_ib_devices.lock);
+	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
+		for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) {
+			if (ibdev->pnetid_by_user[ibport] &&
+			    (!pnet_name ||
+			     smc_pnet_match(pnet_name,
+					    ibdev->pnetid[ibport]))) {
+				memset(ibdev->pnetid[ibport], 0,
+				       SMC_MAX_PNETID_LEN);
+				ibdev->pnetid_by_user[ibport] = false;
+				rc = 0;
+			}
+		}
+	}
+	spin_unlock(&smc_ib_devices.lock);
+	/* remove smcd devices */
+	spin_lock(&smcd_dev_list.lock);
+	list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
+		if (smcd_dev->pnetid_by_user &&
+		    (!pnet_name ||
+		     smc_pnet_match(pnet_name, smcd_dev->pnetid))) {
+			memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN);
+			smcd_dev->pnetid_by_user = false;
+			rc = 0;
+		}
+	}
+	spin_unlock(&smcd_dev_list.lock);
 	return rc;
 }
 
@@ -127,11 +160,17 @@
 static int smc_pnet_remove_by_ndev(struct net_device *ndev)
 {
 	struct smc_pnetentry *pnetelem, *tmp_pe;
+	struct smc_pnettable *pnettable;
+	struct net *net = dev_net(ndev);
+	struct smc_net *sn;
 	int rc = -ENOENT;
 
-	write_lock(&smc_pnettable.lock);
-	list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
-				 list) {
+	/* get pnettable for namespace */
+	sn = net_generic(net, smc_net_id);
+	pnettable = &sn->pnettable;
+
+	write_lock(&pnettable->lock);
+	list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
 		if (pnetelem->ndev == ndev) {
 			list_del(&pnetelem->list);
 			dev_put(pnetelem->ndev);
@@ -140,57 +179,84 @@
 			break;
 		}
 	}
-	write_unlock(&smc_pnettable.lock);
-	return rc;
-}
-
-/* Remove a pnet entry mentioning a given ib device from the pnet table.
- */
-int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev)
-{
-	struct smc_pnetentry *pnetelem, *tmp_pe;
-	int rc = -ENOENT;
-
-	write_lock(&smc_pnettable.lock);
-	list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
-				 list) {
-		if (pnetelem->smcibdev == ibdev) {
-			list_del(&pnetelem->list);
-			dev_put(pnetelem->ndev);
-			kfree(pnetelem);
-			rc = 0;
-			break;
-		}
-	}
-	write_unlock(&smc_pnettable.lock);
+	write_unlock(&pnettable->lock);
 	return rc;
 }
 
 /* Append a pnetid to the end of the pnet table if not already on this list.
  */
-static int smc_pnet_enter(struct smc_pnetentry *new_pnetelem)
+static int smc_pnet_enter(struct smc_pnettable *pnettable,
+			  struct smc_user_pnetentry *new_pnetelem)
 {
+	u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
+	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
+	struct smc_pnetentry *tmp_pnetelem;
 	struct smc_pnetentry *pnetelem;
-	int rc = -EEXIST;
+	bool new_smcddev = false;
+	struct net_device *ndev;
+	bool new_netdev = true;
+	bool new_ibdev = false;
 
-	write_lock(&smc_pnettable.lock);
-	list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
-		if (!strncmp(pnetelem->pnet_name, new_pnetelem->pnet_name,
-			     sizeof(new_pnetelem->pnet_name)) ||
-		    !strncmp(pnetelem->ndev->name, new_pnetelem->ndev->name,
-			     sizeof(new_pnetelem->ndev->name)) ||
-		    smc_pnet_same_ibname(pnetelem,
-					 new_pnetelem->smcibdev->ibdev->name,
-					 new_pnetelem->ib_port)) {
-			dev_put(pnetelem->ndev);
-			goto found;
+	if (new_pnetelem->smcibdev) {
+		struct smc_ib_device *ib_dev = new_pnetelem->smcibdev;
+		int ib_port = new_pnetelem->ib_port;
+
+		spin_lock(&smc_ib_devices.lock);
+		if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) {
+			memcpy(ib_dev->pnetid[ib_port - 1],
+			       new_pnetelem->pnet_name, SMC_MAX_PNETID_LEN);
+			ib_dev->pnetid_by_user[ib_port - 1] = true;
+			new_ibdev = true;
 		}
+		spin_unlock(&smc_ib_devices.lock);
 	}
-	list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist);
-	rc = 0;
-found:
-	write_unlock(&smc_pnettable.lock);
-	return rc;
+	if (new_pnetelem->smcd_dev) {
+		struct smcd_dev *smcd_dev = new_pnetelem->smcd_dev;
+
+		spin_lock(&smcd_dev_list.lock);
+		if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) {
+			memcpy(smcd_dev->pnetid, new_pnetelem->pnet_name,
+			       SMC_MAX_PNETID_LEN);
+			smcd_dev->pnetid_by_user = true;
+			new_smcddev = true;
+		}
+		spin_unlock(&smcd_dev_list.lock);
+	}
+
+	if (!new_pnetelem->ndev)
+		return (new_ibdev || new_smcddev) ? 0 : -EEXIST;
+
+	/* check if (base) netdev already has a pnetid. If there is one, we do
+	 * not want to add a pnet table entry
+	 */
+	ndev = pnet_find_base_ndev(new_pnetelem->ndev);
+	if (!smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
+				    ndev_pnetid))
+		return (new_ibdev || new_smcddev) ? 0 : -EEXIST;
+
+	/* add a new netdev entry to the pnet table if there isn't one */
+	tmp_pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL);
+	if (!tmp_pnetelem)
+		return -ENOMEM;
+	memcpy(tmp_pnetelem->pnet_name, new_pnetelem->pnet_name,
+	       SMC_MAX_PNETID_LEN);
+	tmp_pnetelem->ndev = new_pnetelem->ndev;
+
+	write_lock(&pnettable->lock);
+	list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
+		if (pnetelem->ndev == new_pnetelem->ndev)
+			new_netdev = false;
+	}
+	if (new_netdev) {
+		dev_hold(tmp_pnetelem->ndev);
+		list_add_tail(&tmp_pnetelem->list, &pnettable->pnetlist);
+		write_unlock(&pnettable->lock);
+	} else {
+		write_unlock(&pnettable->lock);
+		kfree(tmp_pnetelem);
+	}
+
+	return (new_netdev || new_ibdev || new_smcddev) ? 0 : -EEXIST;
 }
 
 /* The limit for pnetid is 16 characters.
@@ -228,7 +294,9 @@
 	spin_lock(&smc_ib_devices.lock);
 	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
 		if (!strncmp(ibdev->ibdev->name, ib_name,
-			     sizeof(ibdev->ibdev->name))) {
+			     sizeof(ibdev->ibdev->name)) ||
+		    !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name,
+			     IB_DEVICE_NAME_MAX - 1)) {
 			goto out;
 		}
 	}
@@ -238,10 +306,28 @@
 	return ibdev;
 }
 
+/* Find an smcd device by a given name. The device might not exist. */
+static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name)
+{
+	struct smcd_dev *smcd_dev;
+
+	spin_lock(&smcd_dev_list.lock);
+	list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
+		if (!strncmp(dev_name(&smcd_dev->dev), smcd_name,
+			     IB_DEVICE_NAME_MAX - 1))
+			goto out;
+	}
+	smcd_dev = NULL;
+out:
+	spin_unlock(&smcd_dev_list.lock);
+	return smcd_dev;
+}
+
 /* Parse the supplied netlink attributes and fill a pnetentry structure.
  * For ethernet and infiniband device names verify that the devices exist.
  */
-static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem,
+static int smc_pnet_fill_entry(struct net *net,
+			       struct smc_user_pnetentry *pnetelem,
 			       struct nlattr *tb[])
 {
 	char *string, *ibname;
@@ -258,113 +344,101 @@
 		goto error;
 
 	rc = -EINVAL;
-	if (!tb[SMC_PNETID_ETHNAME])
-		goto error;
-	rc = -ENOENT;
-	string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
-	pnetelem->ndev = dev_get_by_name(net, string);
-	if (!pnetelem->ndev)
-		goto error;
+	if (tb[SMC_PNETID_ETHNAME]) {
+		string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
+		pnetelem->ndev = dev_get_by_name(net, string);
+		if (!pnetelem->ndev)
+			goto error;
+	}
+
+	/* if this is not the initial namespace, stop here */
+	if (net != &init_net)
+		return 0;
 
 	rc = -EINVAL;
-	if (!tb[SMC_PNETID_IBNAME])
-		goto error;
-	rc = -ENOENT;
-	ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
-	ibname = strim(ibname);
-	pnetelem->smcibdev = smc_pnet_find_ib(ibname);
-	if (!pnetelem->smcibdev)
-		goto error;
-
-	rc = -EINVAL;
-	if (!tb[SMC_PNETID_IBPORT])
-		goto error;
-	pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
-	if (pnetelem->ib_port < 1 || pnetelem->ib_port > SMC_MAX_PORTS)
-		goto error;
+	if (tb[SMC_PNETID_IBNAME]) {
+		ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
+		ibname = strim(ibname);
+		pnetelem->smcibdev = smc_pnet_find_ib(ibname);
+		pnetelem->smcd_dev = smc_pnet_find_smcd(ibname);
+		if (!pnetelem->smcibdev && !pnetelem->smcd_dev)
+			goto error;
+		if (pnetelem->smcibdev) {
+			if (!tb[SMC_PNETID_IBPORT])
+				goto error;
+			pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
+			if (pnetelem->ib_port < 1 ||
+			    pnetelem->ib_port > SMC_MAX_PORTS)
+				goto error;
+		}
+	}
 
 	return 0;
 
 error:
-	if (pnetelem->ndev)
-		dev_put(pnetelem->ndev);
 	return rc;
 }
 
 /* Convert an smc_pnetentry to a netlink attribute sequence */
-static int smc_pnet_set_nla(struct sk_buff *msg, struct smc_pnetentry *pnetelem)
+static int smc_pnet_set_nla(struct sk_buff *msg,
+			    struct smc_user_pnetentry *pnetelem)
 {
-	if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name) ||
-	    nla_put_string(msg, SMC_PNETID_ETHNAME, pnetelem->ndev->name) ||
-	    nla_put_string(msg, SMC_PNETID_IBNAME,
-			   pnetelem->smcibdev->ibdev->name) ||
-	    nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port))
+	if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name))
 		return -1;
+	if (pnetelem->ndev) {
+		if (nla_put_string(msg, SMC_PNETID_ETHNAME,
+				   pnetelem->ndev->name))
+			return -1;
+	} else {
+		if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a"))
+			return -1;
+	}
+	if (pnetelem->smcibdev) {
+		if (nla_put_string(msg, SMC_PNETID_IBNAME,
+			dev_name(pnetelem->smcibdev->ibdev->dev.parent)) ||
+		    nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port))
+			return -1;
+	} else if (pnetelem->smcd_dev) {
+		if (nla_put_string(msg, SMC_PNETID_IBNAME,
+				   dev_name(&pnetelem->smcd_dev->dev)) ||
+		    nla_put_u8(msg, SMC_PNETID_IBPORT, 1))
+			return -1;
+	} else {
+		if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") ||
+		    nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff))
+			return -1;
+	}
+
 	return 0;
 }
 
-/* Retrieve one PNETID entry */
-static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info)
-{
-	struct smc_pnetentry *pnetelem;
-	struct sk_buff *msg;
-	void *hdr;
-	int rc;
-
-	if (!info->attrs[SMC_PNETID_NAME])
-		return -EINVAL;
-	pnetelem = smc_pnet_find_pnetid(
-				(char *)nla_data(info->attrs[SMC_PNETID_NAME]));
-	if (!pnetelem)
-		return -ENOENT;
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
-
-	hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
-			  &smc_pnet_nl_family, 0, SMC_PNETID_GET);
-	if (!hdr) {
-		rc = -EMSGSIZE;
-		goto err_out;
-	}
-
-	if (smc_pnet_set_nla(msg, pnetelem)) {
-		rc = -ENOBUFS;
-		goto err_out;
-	}
-
-	genlmsg_end(msg, hdr);
-	return genlmsg_reply(msg, info);
-
-err_out:
-	nlmsg_free(msg);
-	return rc;
-}
-
 static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
 {
 	struct net *net = genl_info_net(info);
-	struct smc_pnetentry *pnetelem;
+	struct smc_user_pnetentry pnetelem;
+	struct smc_pnettable *pnettable;
+	struct smc_net *sn;
 	int rc;
 
-	pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL);
-	if (!pnetelem)
-		return -ENOMEM;
-	rc = smc_pnet_fill_entry(net, pnetelem, info->attrs);
+	/* get pnettable for namespace */
+	sn = net_generic(net, smc_net_id);
+	pnettable = &sn->pnettable;
+
+	rc = smc_pnet_fill_entry(net, &pnetelem, info->attrs);
 	if (!rc)
-		rc = smc_pnet_enter(pnetelem);
-	if (rc) {
-		kfree(pnetelem);
-		return rc;
-	}
+		rc = smc_pnet_enter(pnettable, &pnetelem);
+	if (pnetelem.ndev)
+		dev_put(pnetelem.ndev);
 	return rc;
 }
 
 static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
 {
+	struct net *net = genl_info_net(info);
+
 	if (!info->attrs[SMC_PNETID_NAME])
 		return -EINVAL;
-	return smc_pnet_remove_by_pnetid(
+	return smc_pnet_remove_by_pnetid(net,
 				(char *)nla_data(info->attrs[SMC_PNETID_NAME]));
 }
 
@@ -376,7 +450,7 @@
 
 static int smc_pnet_dumpinfo(struct sk_buff *skb,
 			     u32 portid, u32 seq, u32 flags,
-			     struct smc_pnetentry *pnetelem)
+			     struct smc_user_pnetentry *pnetelem)
 {
 	void *hdr;
 
@@ -392,41 +466,143 @@
 	return 0;
 }
 
-static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb)
+static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid,
+			  u32 seq, u8 *pnetid, int start_idx)
 {
+	struct smc_user_pnetentry tmp_entry;
+	struct smc_pnettable *pnettable;
 	struct smc_pnetentry *pnetelem;
+	struct smc_ib_device *ibdev;
+	struct smcd_dev *smcd_dev;
+	struct smc_net *sn;
 	int idx = 0;
+	int ibport;
 
-	read_lock(&smc_pnettable.lock);
-	list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
-		if (idx++ < cb->args[0])
+	/* get pnettable for namespace */
+	sn = net_generic(net, smc_net_id);
+	pnettable = &sn->pnettable;
+
+	/* dump netdevices */
+	read_lock(&pnettable->lock);
+	list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
+		if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid))
 			continue;
-		if (smc_pnet_dumpinfo(skb, NETLINK_CB(cb->skb).portid,
-				      cb->nlh->nlmsg_seq, NLM_F_MULTI,
-				      pnetelem)) {
+		if (idx++ < start_idx)
+			continue;
+		memset(&tmp_entry, 0, sizeof(tmp_entry));
+		memcpy(&tmp_entry.pnet_name, pnetelem->pnet_name,
+		       SMC_MAX_PNETID_LEN);
+		tmp_entry.ndev = pnetelem->ndev;
+		if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI,
+				      &tmp_entry)) {
 			--idx;
 			break;
 		}
 	}
+	read_unlock(&pnettable->lock);
+
+	/* if this is not the initial namespace, stop here */
+	if (net != &init_net)
+		return idx;
+
+	/* dump ib devices */
+	spin_lock(&smc_ib_devices.lock);
+	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
+		for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) {
+			if (ibdev->pnetid_by_user[ibport]) {
+				if (pnetid &&
+				    !smc_pnet_match(ibdev->pnetid[ibport],
+						    pnetid))
+					continue;
+				if (idx++ < start_idx)
+					continue;
+				memset(&tmp_entry, 0, sizeof(tmp_entry));
+				memcpy(&tmp_entry.pnet_name,
+				       ibdev->pnetid[ibport],
+				       SMC_MAX_PNETID_LEN);
+				tmp_entry.smcibdev = ibdev;
+				tmp_entry.ib_port = ibport + 1;
+				if (smc_pnet_dumpinfo(skb, portid, seq,
+						      NLM_F_MULTI,
+						      &tmp_entry)) {
+					--idx;
+					break;
+				}
+			}
+		}
+	}
+	spin_unlock(&smc_ib_devices.lock);
+
+	/* dump smcd devices */
+	spin_lock(&smcd_dev_list.lock);
+	list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
+		if (smcd_dev->pnetid_by_user) {
+			if (pnetid && !smc_pnet_match(smcd_dev->pnetid, pnetid))
+				continue;
+			if (idx++ < start_idx)
+				continue;
+			memset(&tmp_entry, 0, sizeof(tmp_entry));
+			memcpy(&tmp_entry.pnet_name, smcd_dev->pnetid,
+			       SMC_MAX_PNETID_LEN);
+			tmp_entry.smcd_dev = smcd_dev;
+			if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI,
+					      &tmp_entry)) {
+				--idx;
+				break;
+			}
+		}
+	}
+	spin_unlock(&smcd_dev_list.lock);
+
+	return idx;
+}
+
+static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	int idx;
+
+	idx = _smc_pnet_dump(net, skb, NETLINK_CB(cb->skb).portid,
+			     cb->nlh->nlmsg_seq, NULL, cb->args[0]);
+
 	cb->args[0] = idx;
-	read_unlock(&smc_pnettable.lock);
 	return skb->len;
 }
 
+/* Retrieve one PNETID entry */
+static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net *net = genl_info_net(info);
+	struct sk_buff *msg;
+	void *hdr;
+
+	if (!info->attrs[SMC_PNETID_NAME])
+		return -EINVAL;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	_smc_pnet_dump(net, msg, info->snd_portid, info->snd_seq,
+		       nla_data(info->attrs[SMC_PNETID_NAME]), 0);
+
+	/* finish multi part message and send it */
+	hdr = nlmsg_put(msg, info->snd_portid, info->snd_seq, NLMSG_DONE, 0,
+			NLM_F_MULTI);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return -EMSGSIZE;
+	}
+	return genlmsg_reply(msg, info);
+}
+
 /* Remove and delete all pnetids from pnet table.
  */
 static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info)
 {
-	struct smc_pnetentry *pnetelem, *tmp_pe;
+	struct net *net = genl_info_net(info);
 
-	write_lock(&smc_pnettable.lock);
-	list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
-				 list) {
-		list_del(&pnetelem->list);
-		dev_put(pnetelem->ndev);
-		kfree(pnetelem);
-	}
-	write_unlock(&smc_pnettable.lock);
+	smc_pnet_remove_by_pnetid(net, NULL);
 	return 0;
 }
 
@@ -434,28 +610,28 @@
 static const struct genl_ops smc_pnet_ops[] = {
 	{
 		.cmd = SMC_PNETID_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM,
-		.policy = smc_pnet_policy,
 		.doit = smc_pnet_get,
 		.dumpit = smc_pnet_dump,
 		.start = smc_pnet_dump_start
 	},
 	{
 		.cmd = SMC_PNETID_ADD,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM,
-		.policy = smc_pnet_policy,
 		.doit = smc_pnet_add
 	},
 	{
 		.cmd = SMC_PNETID_DEL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM,
-		.policy = smc_pnet_policy,
 		.doit = smc_pnet_del
 	},
 	{
 		.cmd = SMC_PNETID_FLUSH,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM,
-		.policy = smc_pnet_policy,
 		.doit = smc_pnet_flush
 	}
 };
@@ -466,6 +642,7 @@
 	.name = SMCR_GENL_FAMILY_NAME,
 	.version = SMCR_GENL_FAMILY_VERSION,
 	.maxattr = SMC_PNETID_MAX,
+	.policy = smc_pnet_policy,
 	.netnsok = true,
 	.module = THIS_MODULE,
 	.ops = smc_pnet_ops,
@@ -491,6 +668,18 @@
 	.notifier_call = smc_pnet_netdev_event
 };
 
+/* init network namespace */
+int smc_pnet_net_init(struct net *net)
+{
+	struct smc_net *sn = net_generic(net, smc_net_id);
+	struct smc_pnettable *pnettable = &sn->pnettable;
+
+	INIT_LIST_HEAD(&pnettable->pnetlist);
+	rwlock_init(&pnettable->lock);
+
+	return 0;
+}
+
 int __init smc_pnet_init(void)
 {
 	int rc;
@@ -504,9 +693,15 @@
 	return rc;
 }
 
+/* exit network namespace */
+void smc_pnet_net_exit(struct net *net)
+{
+	/* flush pnet table */
+	smc_pnet_remove_by_pnetid(net, NULL);
+}
+
 void smc_pnet_exit(void)
 {
-	smc_pnet_flush(NULL, NULL);
 	unregister_netdevice_notifier(&smc_netdev_notifier);
 	genl_unregister_family(&smc_pnet_nl_family);
 }
@@ -521,7 +716,7 @@
 	int i, nest_lvl;
 
 	rtnl_lock();
-	nest_lvl = dev_get_nest_level(ndev);
+	nest_lvl = ndev->lower_level;
 	for (i = 0; i < nest_lvl; i++) {
 		struct list_head *lower = &ndev->adj_list.lower;
 
@@ -534,14 +729,75 @@
 	return ndev;
 }
 
+static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
+					      u8 *pnetid)
+{
+	struct smc_pnettable *pnettable;
+	struct net *net = dev_net(ndev);
+	struct smc_pnetentry *pnetelem;
+	struct smc_net *sn;
+	int rc = -ENOENT;
+
+	/* get pnettable for namespace */
+	sn = net_generic(net, smc_net_id);
+	pnettable = &sn->pnettable;
+
+	read_lock(&pnettable->lock);
+	list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
+		if (ndev == pnetelem->ndev) {
+			/* get pnetid of netdev device */
+			memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN);
+			rc = 0;
+			break;
+		}
+	}
+	read_unlock(&pnettable->lock);
+	return rc;
+}
+
+/* if handshake network device belongs to a roce device, return its
+ * IB device and port
+ */
+static void smc_pnet_find_rdma_dev(struct net_device *netdev,
+				   struct smc_init_info *ini)
+{
+	struct smc_ib_device *ibdev;
+
+	spin_lock(&smc_ib_devices.lock);
+	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
+		struct net_device *ndev;
+		int i;
+
+		for (i = 1; i <= SMC_MAX_PORTS; i++) {
+			if (!rdma_is_port_valid(ibdev->ibdev, i))
+				continue;
+			if (!ibdev->ibdev->ops.get_netdev)
+				continue;
+			ndev = ibdev->ibdev->ops.get_netdev(ibdev->ibdev, i);
+			if (!ndev)
+				continue;
+			dev_put(ndev);
+			if (netdev == ndev &&
+			    smc_ib_port_active(ibdev, i) &&
+			    !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
+						  ini->ib_gid, NULL)) {
+				ini->ib_dev = ibdev;
+				ini->ib_port = i;
+				break;
+			}
+		}
+	}
+	spin_unlock(&smc_ib_devices.lock);
+}
+
 /* Determine the corresponding IB device port based on the hardware PNETID.
  * Searching stops at the first matching active IB device port with vlan_id
  * configured.
+ * If nothing found, check pnetid table.
+ * If nothing found, try to use handshake device
  */
 static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
-					 struct smc_ib_device **smcibdev,
-					 u8 *ibport, unsigned short vlan_id,
-					 u8 gid[])
+					 struct smc_init_info *ini)
 {
 	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
 	struct smc_ib_device *ibdev;
@@ -549,21 +805,23 @@
 
 	ndev = pnet_find_base_ndev(ndev);
 	if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
-				   ndev_pnetid))
+				   ndev_pnetid) &&
+	    smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) {
+		smc_pnet_find_rdma_dev(ndev, ini);
 		return; /* pnetid could not be determined */
+	}
 
 	spin_lock(&smc_ib_devices.lock);
 	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
 		for (i = 1; i <= SMC_MAX_PORTS; i++) {
 			if (!rdma_is_port_valid(ibdev->ibdev, i))
 				continue;
-			if (!memcmp(ibdev->pnetid[i - 1], ndev_pnetid,
-				    SMC_MAX_PNETID_LEN) &&
+			if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) &&
 			    smc_ib_port_active(ibdev, i) &&
-			    !smc_ib_determine_gid(ibdev, i, vlan_id, gid,
-						  NULL))  {
-				*smcibdev = ibdev;
-				*ibport = i;
+			    !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
+						  ini->ib_gid, NULL)) {
+				ini->ib_dev = ibdev;
+				ini->ib_port = i;
 				goto out;
 			}
 		}
@@ -573,76 +831,43 @@
 }
 
 static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
-					struct smcd_dev **smcismdev)
+					struct smc_init_info *ini)
 {
 	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
 	struct smcd_dev *ismdev;
 
 	ndev = pnet_find_base_ndev(ndev);
 	if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
-				   ndev_pnetid))
+				   ndev_pnetid) &&
+	    smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid))
 		return; /* pnetid could not be determined */
 
 	spin_lock(&smcd_dev_list.lock);
 	list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
-		if (!memcmp(ismdev->pnetid, ndev_pnetid, SMC_MAX_PNETID_LEN)) {
-			*smcismdev = ismdev;
+		if (smc_pnet_match(ismdev->pnetid, ndev_pnetid)) {
+			ini->ism_dev = ismdev;
 			break;
 		}
 	}
 	spin_unlock(&smcd_dev_list.lock);
 }
 
-/* Lookup of coupled ib_device via SMC pnet table */
-static void smc_pnet_find_roce_by_table(struct net_device *netdev,
-					struct smc_ib_device **smcibdev,
-					u8 *ibport, unsigned short vlan_id,
-					u8 gid[])
-{
-	struct smc_pnetentry *pnetelem;
-
-	read_lock(&smc_pnettable.lock);
-	list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
-		if (netdev == pnetelem->ndev) {
-			if (smc_ib_port_active(pnetelem->smcibdev,
-					       pnetelem->ib_port) &&
-			    !smc_ib_determine_gid(pnetelem->smcibdev,
-						  pnetelem->ib_port, vlan_id,
-						  gid, NULL)) {
-				*smcibdev = pnetelem->smcibdev;
-				*ibport = pnetelem->ib_port;
-			}
-			break;
-		}
-	}
-	read_unlock(&smc_pnettable.lock);
-}
-
 /* PNET table analysis for a given sock:
  * determine ib_device and port belonging to used internal TCP socket
  * ethernet interface.
  */
-void smc_pnet_find_roce_resource(struct sock *sk,
-				 struct smc_ib_device **smcibdev, u8 *ibport,
-				 unsigned short vlan_id, u8 gid[])
+void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini)
 {
 	struct dst_entry *dst = sk_dst_get(sk);
 
-	*smcibdev = NULL;
-	*ibport = 0;
-
+	ini->ib_dev = NULL;
+	ini->ib_port = 0;
 	if (!dst)
 		goto out;
 	if (!dst->dev)
 		goto out_rel;
 
-	/* if possible, lookup via hardware-defined pnetid */
-	smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport, vlan_id, gid);
-	if (*smcibdev)
-		goto out_rel;
-
-	/* lookup via SMC PNET table */
-	smc_pnet_find_roce_by_table(dst->dev, smcibdev, ibport, vlan_id, gid);
+	smc_pnet_find_roce_by_pnetid(dst->dev, ini);
 
 out_rel:
 	dst_release(dst);
@@ -650,18 +875,17 @@
 	return;
 }
 
-void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev)
+void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini)
 {
 	struct dst_entry *dst = sk_dst_get(sk);
 
-	*smcismdev = NULL;
+	ini->ism_dev = NULL;
 	if (!dst)
 		goto out;
 	if (!dst->dev)
 		goto out_rel;
 
-	/* if possible, lookup via hardware-defined pnetid */
-	smc_pnet_find_ism_by_pnetid(dst->dev, smcismdev);
+	smc_pnet_find_ism_by_pnetid(dst->dev, ini);
 
 out_rel:
 	dst_release(dst);

diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index 8ff7776..4564e4d 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h

@@ -18,6 +18,17 @@
 
 struct smc_ib_device;
 struct smcd_dev;
+struct smc_init_info;
+
+/**
+ * struct smc_pnettable - SMC PNET table anchor
+ * @lock: Lock for list action
+ * @pnetlist: List of PNETIDs
+ */
+struct smc_pnettable {
+	rwlock_t lock;
+	struct list_head pnetlist;
+};
 
 static inline int smc_pnetid_by_dev_port(struct device *dev,
 					 unsigned short port, u8 *pnetid)
@@ -30,11 +41,10 @@
 }
 
 int smc_pnet_init(void) __init;
+int smc_pnet_net_init(struct net *net);
 void smc_pnet_exit(void);
-int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev);
-void smc_pnet_find_roce_resource(struct sock *sk,
-				 struct smc_ib_device **smcibdev, u8 *ibport,
-				 unsigned short vlan_id, u8 gid[]);
-void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev);
+void smc_pnet_net_exit(struct net *net);
+void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini);
+void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini);
 
 #endif

diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index bbcf0fe..97e8369 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c

@@ -136,7 +136,6 @@
 }
 
 static const struct pipe_buf_operations smc_pipe_ops = {
-	.can_merge = 0,
 	.confirm = generic_pipe_buf_confirm,
 	.release = smc_rx_pipe_buf_release,
 	.steal = smc_rx_pipe_buf_nosteal,
@@ -212,8 +211,7 @@
 	rc = sk_wait_event(sk, timeo,
 			   sk->sk_err ||
 			   sk->sk_shutdown & RCV_SHUTDOWN ||
-			   fcrit(conn) ||
-			   smc_cdc_rxed_any_close_or_senddone(conn),
+			   fcrit(conn),
 			   &wait);
 	remove_wait_queue(sk_sleep(sk), &wait);
 	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
@@ -263,6 +261,18 @@
 	return -EAGAIN;
 }
 
+static bool smc_rx_recvmsg_data_available(struct smc_sock *smc)
+{
+	struct smc_connection *conn = &smc->conn;
+
+	if (smc_rx_data_available(conn))
+		return true;
+	else if (conn->urg_state == SMC_URG_VALID)
+		/* we received a single urgent Byte - skip */
+		smc_rx_update_cons(smc, 0);
+	return false;
+}
+
 /* smc_rx_recvmsg - receive data from RMBE
  * @msg:	copy data to receive buffer
  * @pipe:	copy data to pipe if set - indicates splice() call
@@ -304,16 +314,18 @@
 		if (read_done >= target || (pipe && read_done))
 			break;
 
-		if (atomic_read(&conn->bytes_to_rcv))
+		if (smc_rx_recvmsg_data_available(smc))
 			goto copy;
-		else if (conn->urg_state == SMC_URG_VALID)
-			/* we received a single urgent Byte - skip */
-			smc_rx_update_cons(smc, 0);
 
 		if (sk->sk_shutdown & RCV_SHUTDOWN ||
-		    smc_cdc_rxed_any_close_or_senddone(conn) ||
-		    conn->local_tx_ctrl.conn_state_flags.peer_conn_abort)
+		    conn->local_tx_ctrl.conn_state_flags.peer_conn_abort) {
+			/* smc_cdc_msg_recv_action() could have run after
+			 * above smc_rx_recvmsg_data_available()
+			 */
+			if (smc_rx_recvmsg_data_available(smc))
+				goto copy;
 			break;
+		}
 
 		if (read_done) {
 			if (sk->sk_err ||

diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index d8366ed..6c8f09c 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c

@@ -24,10 +24,11 @@
 #include "smc.h"
 #include "smc_wr.h"
 #include "smc_cdc.h"
+#include "smc_close.h"
 #include "smc_ism.h"
 #include "smc_tx.h"
 
-#define SMC_TX_WORK_DELAY	HZ
+#define SMC_TX_WORK_DELAY	0
 #define SMC_TX_CORK_DELAY	(HZ >> 2)	/* 250 ms */
 
 /***************************** sndbuf producer *******************************/
@@ -75,13 +76,11 @@
 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 	struct smc_connection *conn = &smc->conn;
 	struct sock *sk = &smc->sk;
-	bool noblock;
 	long timeo;
 	int rc = 0;
 
 	/* similar to sk_stream_wait_memory */
 	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
-	noblock = timeo ? false : true;
 	add_wait_queue(sk_sleep(sk), &wait);
 	while (1) {
 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
@@ -96,8 +95,8 @@
 			break;
 		}
 		if (!timeo) {
-			if (noblock)
-				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+			/* ensure EPOLLOUT is subsequently generated */
+			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 			rc = -EAGAIN;
 			break;
 		}
@@ -165,12 +164,11 @@
 			conn->local_tx_ctrl.prod_flags.urg_data_pending = 1;
 
 		if (!atomic_read(&conn->sndbuf_space) || conn->urg_tx_pend) {
+			if (send_done)
+				return send_done;
 			rc = smc_tx_wait(smc, msg->msg_flags);
-			if (rc) {
-				if (send_done)
-					return send_done;
+			if (rc)
 				goto out_err;
-			}
 			continue;
 		}
 
@@ -267,27 +265,23 @@
 
 /* sndbuf consumer: actual data transfer of one target chunk with RDMA write */
 static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
-			     int num_sges, struct ib_sge sges[])
+			     int num_sges, struct ib_rdma_wr *rdma_wr)
 {
 	struct smc_link_group *lgr = conn->lgr;
-	struct ib_rdma_wr rdma_wr;
 	struct smc_link *link;
 	int rc;
 
-	memset(&rdma_wr, 0, sizeof(rdma_wr));
 	link = &lgr->lnk[SMC_SINGLE_LINK];
-	rdma_wr.wr.wr_id = smc_wr_tx_get_next_wr_id(link);
-	rdma_wr.wr.sg_list = sges;
-	rdma_wr.wr.num_sge = num_sges;
-	rdma_wr.wr.opcode = IB_WR_RDMA_WRITE;
-	rdma_wr.remote_addr =
+	rdma_wr->wr.wr_id = smc_wr_tx_get_next_wr_id(link);
+	rdma_wr->wr.num_sge = num_sges;
+	rdma_wr->remote_addr =
 		lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr +
 		/* RMBE within RMB */
 		conn->tx_off +
 		/* offset within RMBE */
 		peer_rmbe_offset;
-	rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
-	rc = ib_post_send(link->roce_qp, &rdma_wr.wr, NULL);
+	rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
+	rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL);
 	if (rc) {
 		conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
 		smc_lgr_terminate(lgr);
@@ -314,24 +308,25 @@
 /* SMC-R helper for smc_tx_rdma_writes() */
 static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
 			       size_t src_off, size_t src_len,
-			       size_t dst_off, size_t dst_len)
+			       size_t dst_off, size_t dst_len,
+			       struct smc_rdma_wr *wr_rdma_buf)
 {
 	dma_addr_t dma_addr =
 		sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
-	struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
 	int src_len_sum = src_len, dst_len_sum = dst_len;
-	struct ib_sge sges[SMC_IB_MAX_SEND_SGE];
 	int sent_count = src_off;
 	int srcchunk, dstchunk;
 	int num_sges;
 	int rc;
 
 	for (dstchunk = 0; dstchunk < 2; dstchunk++) {
+		struct ib_sge *sge =
+			wr_rdma_buf->wr_tx_rdma[dstchunk].wr.sg_list;
+
 		num_sges = 0;
 		for (srcchunk = 0; srcchunk < 2; srcchunk++) {
-			sges[srcchunk].addr = dma_addr + src_off;
-			sges[srcchunk].length = src_len;
-			sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
+			sge[srcchunk].addr = dma_addr + src_off;
+			sge[srcchunk].length = src_len;
 			num_sges++;
 
 			src_off += src_len;
@@ -344,7 +339,8 @@
 			src_len = dst_len - src_len; /* remainder */
 			src_len_sum += src_len;
 		}
-		rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges);
+		rc = smc_tx_rdma_write(conn, dst_off, num_sges,
+				       &wr_rdma_buf->wr_tx_rdma[dstchunk]);
 		if (rc)
 			return rc;
 		if (dst_len_sum == len)
@@ -403,7 +399,8 @@
 /* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit;
  * usable snd_wnd as max transmit
  */
-static int smc_tx_rdma_writes(struct smc_connection *conn)
+static int smc_tx_rdma_writes(struct smc_connection *conn,
+			      struct smc_rdma_wr *wr_rdma_buf)
 {
 	size_t len, src_len, dst_off, dst_len; /* current chunk values */
 	union smc_host_cursor sent, prep, prod, cons;
@@ -464,7 +461,7 @@
 					 dst_off, dst_len);
 	else
 		rc = smcr_tx_rdma_writes(conn, len, sent.count, src_len,
-					 dst_off, dst_len);
+					 dst_off, dst_len, wr_rdma_buf);
 	if (rc)
 		return rc;
 
@@ -484,32 +481,31 @@
  */
 static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
 {
-	struct smc_cdc_producer_flags *pflags;
+	struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
+	struct smc_rdma_wr *wr_rdma_buf;
 	struct smc_cdc_tx_pend *pend;
 	struct smc_wr_buf *wr_buf;
 	int rc;
 
-	spin_lock_bh(&conn->send_lock);
-	rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
+	rc = smc_cdc_get_free_slot(conn, &wr_buf, &wr_rdma_buf, &pend);
 	if (rc < 0) {
 		if (rc == -EBUSY) {
 			struct smc_sock *smc =
 				container_of(conn, struct smc_sock, conn);
 
-			if (smc->sk.sk_err == ECONNABORTED) {
-				rc = sock_error(&smc->sk);
-				goto out_unlock;
-			}
+			if (smc->sk.sk_err == ECONNABORTED)
+				return sock_error(&smc->sk);
 			rc = 0;
 			if (conn->alert_token_local) /* connection healthy */
 				mod_delayed_work(system_wq, &conn->tx_work,
 						 SMC_TX_WORK_DELAY);
 		}
-		goto out_unlock;
+		return rc;
 	}
 
-	if (!conn->local_tx_ctrl.prod_flags.urg_data_present) {
-		rc = smc_tx_rdma_writes(conn);
+	spin_lock_bh(&conn->send_lock);
+	if (!pflags->urg_data_present) {
+		rc = smc_tx_rdma_writes(conn, wr_rdma_buf);
 		if (rc) {
 			smc_wr_tx_put_slot(&conn->lgr->lnk[SMC_SINGLE_LINK],
 					   (struct smc_wr_tx_pend_priv *)pend);
@@ -518,7 +514,6 @@
 	}
 
 	rc = smc_cdc_msg_send(conn, wr_buf, pend);
-	pflags = &conn->local_tx_ctrl.prod_flags;
 	if (!rc && pflags->urg_data_present) {
 		pflags->urg_data_pending = 0;
 		pflags->urg_data_present = 0;
@@ -536,7 +531,7 @@
 
 	spin_lock_bh(&conn->send_lock);
 	if (!pflags->urg_data_present)
-		rc = smc_tx_rdma_writes(conn);
+		rc = smc_tx_rdma_writes(conn, NULL);
 	if (!rc)
 		rc = smcd_cdc_msg_send(conn);
 
@@ -557,6 +552,12 @@
 	else
 		rc = smcr_tx_sndbuf_nonempty(conn);
 
+	if (!rc) {
+		/* trigger socket release if connection is closing */
+		struct smc_sock *smc = container_of(conn, struct smc_sock,
+						    conn);
+		smc_close_wake_tx_prepared(smc);
+	}
 	return rc;
 }
 
@@ -598,7 +599,8 @@
 	if (to_confirm > conn->rmbe_update_limit) {
 		smc_curs_copy(&prod, &conn->local_rx_ctrl.prod, conn);
 		sender_free = conn->rmb_desc->len -
-			      smc_curs_diff(conn->rmb_desc->len, &prod, &cfed);
+			      smc_curs_diff_large(conn->rmb_desc->len,
+						  &cfed, &prod);
 	}
 
 	if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
@@ -612,9 +614,6 @@
 					      SMC_TX_WORK_DELAY);
 			return;
 		}
-		smc_curs_copy(&conn->rx_curs_confirmed,
-			      &conn->local_tx_ctrl.cons, conn);
-		conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0;
 	}
 	if (conn->local_rx_ctrl.prod_flags.write_blocked &&
 	    !atomic_read(&conn->bytes_to_rcv))

diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 3c458d2..253aa75 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c

@@ -160,6 +160,7 @@
  * @link:		Pointer to smc_link used to later send the message.
  * @handler:		Send completion handler function pointer.
  * @wr_buf:		Out value returns pointer to message buffer.
+ * @wr_rdma_buf:	Out value returns pointer to rdma work request.
  * @wr_pend_priv:	Out value returns pointer serving as handler context.
  *
  * Return: 0 on success, or -errno on error.
@@ -167,6 +168,7 @@
 int smc_wr_tx_get_free_slot(struct smc_link *link,
 			    smc_wr_tx_handler handler,
 			    struct smc_wr_buf **wr_buf,
+			    struct smc_rdma_wr **wr_rdma_buf,
 			    struct smc_wr_tx_pend_priv **wr_pend_priv)
 {
 	struct smc_wr_tx_pend *wr_pend;
@@ -204,6 +206,8 @@
 	wr_ib = &link->wr_tx_ibs[idx];
 	wr_ib->wr_id = wr_id;
 	*wr_buf = &link->wr_tx_bufs[idx];
+	if (wr_rdma_buf)
+		*wr_rdma_buf = &link->wr_tx_rdmas[idx];
 	*wr_pend_priv = &wr_pend->priv;
 	return 0;
 }
@@ -215,12 +219,14 @@
 
 	pend = container_of(wr_pend_priv, struct smc_wr_tx_pend, priv);
 	if (pend->idx < link->wr_tx_cnt) {
+		u32 idx = pend->idx;
+
 		/* clear the full struct smc_wr_tx_pend including .priv */
-		memset(&link->wr_tx_pends[pend->idx], 0,
-		       sizeof(link->wr_tx_pends[pend->idx]));
-		memset(&link->wr_tx_bufs[pend->idx], 0,
-		       sizeof(link->wr_tx_bufs[pend->idx]));
-		test_and_clear_bit(pend->idx, link->wr_tx_mask);
+		memset(&link->wr_tx_pends[idx], 0,
+		       sizeof(link->wr_tx_pends[idx]));
+		memset(&link->wr_tx_bufs[idx], 0,
+		       sizeof(link->wr_tx_bufs[idx]));
+		test_and_clear_bit(idx, link->wr_tx_mask);
 		return 1;
 	}
 
@@ -463,12 +469,26 @@
 			lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE;
 		lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE;
 		lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey;
+		lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[0].lkey =
+			lnk->roce_pd->local_dma_lkey;
+		lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[1].lkey =
+			lnk->roce_pd->local_dma_lkey;
+		lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[0].lkey =
+			lnk->roce_pd->local_dma_lkey;
+		lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[1].lkey =
+			lnk->roce_pd->local_dma_lkey;
 		lnk->wr_tx_ibs[i].next = NULL;
 		lnk->wr_tx_ibs[i].sg_list = &lnk->wr_tx_sges[i];
 		lnk->wr_tx_ibs[i].num_sge = 1;
 		lnk->wr_tx_ibs[i].opcode = IB_WR_SEND;
 		lnk->wr_tx_ibs[i].send_flags =
 			IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+		lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.opcode = IB_WR_RDMA_WRITE;
+		lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.opcode = IB_WR_RDMA_WRITE;
+		lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.sg_list =
+			lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge;
+		lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.sg_list =
+			lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge;
 	}
 	for (i = 0; i < lnk->wr_rx_cnt; i++) {
 		lnk->wr_rx_sges[i].addr =
@@ -519,8 +539,12 @@
 	lnk->wr_tx_mask = NULL;
 	kfree(lnk->wr_tx_sges);
 	lnk->wr_tx_sges = NULL;
+	kfree(lnk->wr_tx_rdma_sges);
+	lnk->wr_tx_rdma_sges = NULL;
 	kfree(lnk->wr_rx_sges);
 	lnk->wr_rx_sges = NULL;
+	kfree(lnk->wr_tx_rdmas);
+	lnk->wr_tx_rdmas = NULL;
 	kfree(lnk->wr_rx_ibs);
 	lnk->wr_rx_ibs = NULL;
 	kfree(lnk->wr_tx_ibs);
@@ -550,10 +574,20 @@
 				  GFP_KERNEL);
 	if (!link->wr_rx_ibs)
 		goto no_mem_wr_tx_ibs;
+	link->wr_tx_rdmas = kcalloc(SMC_WR_BUF_CNT,
+				    sizeof(link->wr_tx_rdmas[0]),
+				    GFP_KERNEL);
+	if (!link->wr_tx_rdmas)
+		goto no_mem_wr_rx_ibs;
+	link->wr_tx_rdma_sges = kcalloc(SMC_WR_BUF_CNT,
+					sizeof(link->wr_tx_rdma_sges[0]),
+					GFP_KERNEL);
+	if (!link->wr_tx_rdma_sges)
+		goto no_mem_wr_tx_rdmas;
 	link->wr_tx_sges = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_sges[0]),
 				   GFP_KERNEL);
 	if (!link->wr_tx_sges)
-		goto no_mem_wr_rx_ibs;
+		goto no_mem_wr_tx_rdma_sges;
 	link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3,
 				   sizeof(link->wr_rx_sges[0]),
 				   GFP_KERNEL);
@@ -577,6 +611,10 @@
 	kfree(link->wr_rx_sges);
 no_mem_wr_tx_sges:
 	kfree(link->wr_tx_sges);
+no_mem_wr_tx_rdma_sges:
+	kfree(link->wr_tx_rdma_sges);
+no_mem_wr_tx_rdmas:
+	kfree(link->wr_tx_rdmas);
 no_mem_wr_rx_ibs:
 	kfree(link->wr_rx_ibs);
 no_mem_wr_tx_ibs:

diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 1d85bb1..09bf32f 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h

@@ -85,6 +85,7 @@
 
 int smc_wr_tx_get_free_slot(struct smc_link *link, smc_wr_tx_handler handler,
 			    struct smc_wr_buf **wr_buf,
+			    struct smc_rdma_wr **wrs,
 			    struct smc_wr_tx_pend_priv **wr_pend_priv);
 int smc_wr_tx_put_slot(struct smc_link *link,
 		       struct smc_wr_tx_pend_priv *wr_pend_priv);

diff --git a/net/socket.c b/net/socket.c
index 390a8ec..d7a1060 100644
--- a/net/socket.c
+++ b/net/socket.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * NET		An implementation of the SOCKET network access protocol.
  *
@@ -45,13 +46,6 @@
  *		Tigran Aivazian	:	Made listen(2) backlog sanity checks
  *					protocol-independent
  *
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
- *
  *	This module is effectively the top level interface to the BSD socket
  *	paradigm.
  *
@@ -79,6 +73,7 @@
 #include <linux/module.h>
 #include <linux/highmem.h>
 #include <linux/mount.h>
+#include <linux/pseudo_fs.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/compat.h>
@@ -90,6 +85,7 @@
 #include <linux/slab.h>
 #include <linux/xattr.h>
 #include <linux/nospec.h>
+#include <linux/indirect_call_wrapper.h>
 
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
@@ -239,20 +235,13 @@
 static struct inode *sock_alloc_inode(struct super_block *sb)
 {
 	struct socket_alloc *ei;
-	struct socket_wq *wq;
 
 	ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
-	wq = kmalloc(sizeof(*wq), GFP_KERNEL);
-	if (!wq) {
-		kmem_cache_free(sock_inode_cachep, ei);
-		return NULL;
-	}
-	init_waitqueue_head(&wq->wait);
-	wq->fasync_list = NULL;
-	wq->flags = 0;
-	ei->socket.wq = wq;
+	init_waitqueue_head(&ei->socket.wq.wait);
+	ei->socket.wq.fasync_list = NULL;
+	ei->socket.wq.flags = 0;
 
 	ei->socket.state = SS_UNCONNECTED;
 	ei->socket.flags = 0;
@@ -263,12 +252,11 @@
 	return &ei->vfs_inode;
 }
 
-static void sock_destroy_inode(struct inode *inode)
+static void sock_free_inode(struct inode *inode)
 {
 	struct socket_alloc *ei;
 
 	ei = container_of(inode, struct socket_alloc, vfs_inode);
-	kfree_rcu(ei->socket.wq, rcu);
 	kmem_cache_free(sock_inode_cachep, ei);
 }
 
@@ -293,7 +281,7 @@
 
 static const struct super_operations sockfs_ops = {
 	.alloc_inode	= sock_alloc_inode,
-	.destroy_inode	= sock_destroy_inode,
+	.free_inode	= sock_free_inode,
 	.statfs		= simple_statfs,
 };
 
@@ -351,19 +339,22 @@
 	NULL
 };
 
-static struct dentry *sockfs_mount(struct file_system_type *fs_type,
-			 int flags, const char *dev_name, void *data)
+static int sockfs_init_fs_context(struct fs_context *fc)
 {
-	return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
-				  sockfs_xattr_handlers,
-				  &sockfs_dentry_operations, SOCKFS_MAGIC);
+	struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
+	if (!ctx)
+		return -ENOMEM;
+	ctx->ops = &sockfs_ops;
+	ctx->dops = &sockfs_dentry_operations;
+	ctx->xattr = sockfs_xattr_handlers;
+	return 0;
 }
 
 static struct vfsmount *sock_mnt __read_mostly;
 
 static struct file_system_type sock_fs_type = {
 	.name =		"sockfs",
-	.mount =	sockfs_mount,
+	.init_fs_context = sockfs_init_fs_context,
 	.kill_sb =	kill_anon_super,
 };
 
@@ -384,6 +375,18 @@
  *	but we take care of internal coherence yet.
  */
 
+/**
+ *	sock_alloc_file - Bind a &socket to a &file
+ *	@sock: socket
+ *	@flags: file status flags
+ *	@dname: protocol name
+ *
+ *	Returns the &file bound with @sock, implicitly storing it
+ *	in sock->file. If dname is %NULL, sets to "".
+ *	On failure the return is a ERR pointer (see linux/err.h).
+ *	This function uses GFP_KERNEL internally.
+ */
+
 struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
 {
 	struct file *file;
@@ -415,7 +418,7 @@
 	}
 
 	newfile = sock_alloc_file(sock, flags, NULL);
-	if (likely(!IS_ERR(newfile))) {
+	if (!IS_ERR(newfile)) {
 		fd_install(fd, newfile);
 		return fd;
 	}
@@ -424,6 +427,14 @@
 	return PTR_ERR(newfile);
 }
 
+/**
+ *	sock_from_file - Return the &socket bounded to @file.
+ *	@file: file
+ *	@err: pointer to an error code return
+ *
+ *	On failure returns %NULL and assigns -ENOTSOCK to @err.
+ */
+
 struct socket *sock_from_file(struct file *file, int *err)
 {
 	if (file->f_op == &socket_file_ops)
@@ -532,11 +543,11 @@
 };
 
 /**
- *	sock_alloc	-	allocate a socket
+ *	sock_alloc - allocate a socket
  *
  *	Allocate a new inode and socket object. The two are bound together
  *	and initialised. The socket is then returned. If we are out of inodes
- *	NULL is returned.
+ *	NULL is returned. This functions uses GFP_KERNEL internally.
  */
 
 struct socket *sock_alloc(void)
@@ -561,7 +572,7 @@
 EXPORT_SYMBOL(sock_alloc);
 
 /**
- *	sock_release	-	close a socket
+ *	sock_release - close a socket
  *	@sock: socket to close
  *
  *	The socket is released from the protocol stack if it has a release
@@ -577,13 +588,14 @@
 		if (inode)
 			inode_lock(inode);
 		sock->ops->release(sock);
+		sock->sk = NULL;
 		if (inode)
 			inode_unlock(inode);
 		sock->ops = NULL;
 		module_put(owner);
 	}
 
-	if (sock->wq->fasync_list)
+	if (sock->wq.fasync_list)
 		pr_err("%s: fasync list not empty!\n", __func__);
 
 	if (!sock->file) {
@@ -616,13 +628,27 @@
 }
 EXPORT_SYMBOL(__sock_tx_timestamp);
 
+INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
+					   size_t));
+INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
+					    size_t));
 static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
 {
-	int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
+	int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
+				     inet_sendmsg, sock, msg,
+				     msg_data_left(msg));
 	BUG_ON(ret == -EIOCBQUEUED);
 	return ret;
 }
 
+/**
+ *	sock_sendmsg - send a message through @sock
+ *	@sock: socket
+ *	@msg: message to send
+ *
+ *	Sends @msg through @sock, passing through LSM.
+ *	Returns the number of bytes sent, or an error code.
+ */
 int sock_sendmsg(struct socket *sock, struct msghdr *msg)
 {
 	int err = security_socket_sendmsg(sock, msg,
@@ -632,14 +658,39 @@
 }
 EXPORT_SYMBOL(sock_sendmsg);
 
+/**
+ *	kernel_sendmsg - send a message through @sock (kernel-space)
+ *	@sock: socket
+ *	@msg: message header
+ *	@vec: kernel vec
+ *	@num: vec array length
+ *	@size: total message data size
+ *
+ *	Builds the message data with @vec and sends it through @sock.
+ *	Returns the number of bytes sent, or an error code.
+ */
+
 int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 		   struct kvec *vec, size_t num, size_t size)
 {
-	iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
+	iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
 	return sock_sendmsg(sock, msg);
 }
 EXPORT_SYMBOL(kernel_sendmsg);
 
+/**
+ *	kernel_sendmsg_locked - send a message through @sock (kernel-space)
+ *	@sk: sock
+ *	@msg: message header
+ *	@vec: output s/g array
+ *	@num: output s/g array length
+ *	@size: total message data size
+ *
+ *	Builds the message data with @vec and sends it through @sock.
+ *	Returns the number of bytes sent, or an error code.
+ *	Caller must hold @sk.
+ */
+
 int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
 			  struct kvec *vec, size_t num, size_t size)
 {
@@ -648,7 +699,7 @@
 	if (!sock->ops->sendmsg_locked)
 		return sock_no_sendmsg_locked(sk, msg, size);
 
-	iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
+	iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
 
 	return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
 }
@@ -669,7 +720,7 @@
  * before the software timestamp is received, a hardware TX timestamp may be
  * returned only if there is no software TX timestamp. Ignore false software
  * timestamps, which may be made in the __sock_recv_timestamp() call when the
- * option SO_TIMESTAMP(NS) is enabled on the socket, even when the skb has a
+ * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
  * hardware timestamp.
  */
 static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
@@ -705,7 +756,9 @@
 	struct sk_buff *skb)
 {
 	int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
-	struct scm_timestamping tss;
+	int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
+	struct scm_timestamping_internal tss;
+
 	int empty = 1, false_tstamp = 0;
 	struct skb_shared_hwtstamps *shhwtstamps =
 		skb_hwtstamps(skb);
@@ -719,34 +772,54 @@
 
 	if (need_software_tstamp) {
 		if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
-			struct timeval tv;
-			skb_get_timestamp(skb, &tv);
-			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
-				 sizeof(tv), &tv);
+			if (new_tstamp) {
+				struct __kernel_sock_timeval tv;
+
+				skb_get_new_timestamp(skb, &tv);
+				put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
+					 sizeof(tv), &tv);
+			} else {
+				struct __kernel_old_timeval tv;
+
+				skb_get_timestamp(skb, &tv);
+				put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
+					 sizeof(tv), &tv);
+			}
 		} else {
-			struct timespec ts;
-			skb_get_timestampns(skb, &ts);
-			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
-				 sizeof(ts), &ts);
+			if (new_tstamp) {
+				struct __kernel_timespec ts;
+
+				skb_get_new_timestampns(skb, &ts);
+				put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
+					 sizeof(ts), &ts);
+			} else {
+				struct timespec ts;
+
+				skb_get_timestampns(skb, &ts);
+				put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
+					 sizeof(ts), &ts);
+			}
 		}
 	}
 
 	memset(&tss, 0, sizeof(tss));
 	if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
-	    ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
+	    ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
 		empty = 0;
 	if (shhwtstamps &&
 	    (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
 	    !skb_is_swtx_tstamp(skb, false_tstamp) &&
-	    ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
+	    ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
 		empty = 0;
 		if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
 		    !skb_is_err_queue(skb))
 			put_ts_pktinfo(msg, skb);
 	}
 	if (!empty) {
-		put_cmsg(msg, SOL_SOCKET,
-			 SCM_TIMESTAMPING, sizeof(tss), &tss);
+		if (sock_flag(sk, SOCK_TSTAMP_NEW))
+			put_cmsg_scm_timestamping64(msg, &tss);
+		else
+			put_cmsg_scm_timestamping(msg, &tss);
 
 		if (skb_is_err_queue(skb) && skb->len &&
 		    SKB_EXT_ERR(skb)->opt_stats)
@@ -788,12 +861,27 @@
 }
 EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
 
+INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
+					   size_t, int));
+INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
+					    size_t, int));
 static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
 				     int flags)
 {
-	return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
+	return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
+				  inet_recvmsg, sock, msg, msg_data_left(msg),
+				  flags);
 }
 
+/**
+ *	sock_recvmsg - receive a message from @sock
+ *	@sock: socket
+ *	@msg: message to receive
+ *	@flags: message flags
+ *
+ *	Receives @msg from @sock, passing through LSM. Returns the total number
+ *	of bytes received, or an error.
+ */
 int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
 {
 	int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
@@ -803,27 +891,28 @@
 EXPORT_SYMBOL(sock_recvmsg);
 
 /**
- * kernel_recvmsg - Receive a message from a socket (kernel space)
- * @sock:       The socket to receive the message from
- * @msg:        Received message
- * @vec:        Input s/g array for message data
- * @num:        Size of input s/g array
- * @size:       Number of bytes to read
- * @flags:      Message flags (MSG_DONTWAIT, etc...)
+ *	kernel_recvmsg - Receive a message from a socket (kernel space)
+ *	@sock: The socket to receive the message from
+ *	@msg: Received message
+ *	@vec: Input s/g array for message data
+ *	@num: Size of input s/g array
+ *	@size: Number of bytes to read
+ *	@flags: Message flags (MSG_DONTWAIT, etc...)
  *
- * On return the msg structure contains the scatter/gather array passed in the
- * vec argument. The array is modified so that it consists of the unfilled
- * portion of the original array.
+ *	On return the msg structure contains the scatter/gather array passed in the
+ *	vec argument. The array is modified so that it consists of the unfilled
+ *	portion of the original array.
  *
- * The returned value is the total number of bytes received, or an error.
+ *	The returned value is the total number of bytes received, or an error.
  */
+
 int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
 		   struct kvec *vec, size_t num, size_t size, int flags)
 {
 	mm_segment_t oldfs = get_fs();
 	int result;
 
-	iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
+	iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
 	set_fs(KERNEL_DS);
 	result = sock_recvmsg(sock, msg, flags);
 	set_fs(oldfs);
@@ -853,7 +942,7 @@
 	struct socket *sock = file->private_data;
 
 	if (unlikely(!sock->ops->splice_read))
-		return -EINVAL;
+		return generic_file_splice_read(file, ppos, pipe, len, flags);
 
 	return sock->ops->splice_read(sock, ppos, pipe, len, flags);
 }
@@ -941,8 +1030,7 @@
 EXPORT_SYMBOL(dlci_ioctl_set);
 
 static long sock_do_ioctl(struct net *net, struct socket *sock,
-			  unsigned int cmd, unsigned long arg,
-			  unsigned int ifreq_size)
+			  unsigned int cmd, unsigned long arg)
 {
 	int err;
 	void __user *argp = (void __user *)arg;
@@ -968,11 +1056,11 @@
 	} else {
 		struct ifreq ifr;
 		bool need_copyout;
-		if (copy_from_user(&ifr, argp, ifreq_size))
+		if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
 			return -EFAULT;
 		err = dev_ioctl(net, cmd, &ifr, &need_copyout);
 		if (!err && need_copyout)
-			if (copy_to_user(argp, &ifr, ifreq_size))
+			if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
 				return -EFAULT;
 	}
 	return err;
@@ -983,6 +1071,13 @@
  *	what to do with it - that's up to the protocol still.
  */
 
+/**
+ *	get_net_ns - increment the refcount of the network namespace
+ *	@ns: common namespace (net)
+ *
+ *	Returns the net's common namespace.
+ */
+
 struct ns_common *get_net_ns(struct ns_common *ns)
 {
 	return &get_net(container_of(ns, struct net, ns))->ns;
@@ -1070,14 +1165,46 @@
 
 			err = open_related_ns(&net->ns, get_net_ns);
 			break;
+		case SIOCGSTAMP_OLD:
+		case SIOCGSTAMPNS_OLD:
+			if (!sock->ops->gettstamp) {
+				err = -ENOIOCTLCMD;
+				break;
+			}
+			err = sock->ops->gettstamp(sock, argp,
+						   cmd == SIOCGSTAMP_OLD,
+						   !IS_ENABLED(CONFIG_64BIT));
+			break;
+		case SIOCGSTAMP_NEW:
+		case SIOCGSTAMPNS_NEW:
+			if (!sock->ops->gettstamp) {
+				err = -ENOIOCTLCMD;
+				break;
+			}
+			err = sock->ops->gettstamp(sock, argp,
+						   cmd == SIOCGSTAMP_NEW,
+						   false);
+			break;
 		default:
-			err = sock_do_ioctl(net, sock, cmd, arg,
-					    sizeof(struct ifreq));
+			err = sock_do_ioctl(net, sock, cmd, arg);
 			break;
 		}
 	return err;
 }
 
+/**
+ *	sock_create_lite - creates a socket
+ *	@family: protocol family (AF_INET, ...)
+ *	@type: communication type (SOCK_STREAM, ...)
+ *	@protocol: protocol (0, ...)
+ *	@res: new socket
+ *
+ *	Creates a new socket and assigns it to @res, passing through LSM.
+ *	The new socket initialization is not complete, see kernel_accept().
+ *	Returns 0 or an error. On failure @res is set to %NULL.
+ *	This function internally uses GFP_KERNEL.
+ */
+
 int sock_create_lite(int family, int type, int protocol, struct socket **res)
 {
 	int err;
@@ -1157,13 +1284,12 @@
 {
 	struct socket *sock = filp->private_data;
 	struct sock *sk = sock->sk;
-	struct socket_wq *wq;
+	struct socket_wq *wq = &sock->wq;
 
 	if (sk == NULL)
 		return -EINVAL;
 
 	lock_sock(sk);
-	wq = sock->wq;
 	fasync_helper(fd, filp, on, &wq->fasync_list);
 
 	if (!wq->fasync_list)
@@ -1203,6 +1329,21 @@
 }
 EXPORT_SYMBOL(sock_wake_async);
 
+/**
+ *	__sock_create - creates a socket
+ *	@net: net namespace
+ *	@family: protocol family (AF_INET, ...)
+ *	@type: communication type (SOCK_STREAM, ...)
+ *	@protocol: protocol (0, ...)
+ *	@res: new socket
+ *	@kern: boolean for kernel space sockets
+ *
+ *	Creates a new socket and assigns it to @res, passing through LSM.
+ *	Returns 0 or an error. On failure @res is set to %NULL. @kern must
+ *	be set to true if the socket resides in kernel space.
+ *	This function internally uses GFP_KERNEL.
+ */
+
 int __sock_create(struct net *net, int family, int type, int protocol,
 			 struct socket **res, int kern)
 {
@@ -1312,12 +1453,35 @@
 }
 EXPORT_SYMBOL(__sock_create);
 
+/**
+ *	sock_create - creates a socket
+ *	@family: protocol family (AF_INET, ...)
+ *	@type: communication type (SOCK_STREAM, ...)
+ *	@protocol: protocol (0, ...)
+ *	@res: new socket
+ *
+ *	A wrapper around __sock_create().
+ *	Returns 0 or an error. This function internally uses GFP_KERNEL.
+ */
+
 int sock_create(int family, int type, int protocol, struct socket **res)
 {
 	return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
 }
 EXPORT_SYMBOL(sock_create);
 
+/**
+ *	sock_create_kern - creates a socket (kernel space)
+ *	@net: net namespace
+ *	@family: protocol family (AF_INET, ...)
+ *	@type: communication type (SOCK_STREAM, ...)
+ *	@protocol: protocol (0, ...)
+ *	@res: new socket
+ *
+ *	A wrapper around __sock_create().
+ *	Returns 0 or an error. This function internally uses GFP_KERNEL.
+ */
+
 int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
 {
 	return __sock_create(net, family, type, protocol, res, 1);
@@ -1475,7 +1639,7 @@
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (sock) {
 		err = move_addr_to_kernel(umyaddr, addrlen, &address);
-		if (err >= 0) {
+		if (!err) {
 			err = security_socket_bind(sock,
 						   (struct sockaddr *)&address,
 						   addrlen);
@@ -1881,6 +2045,8 @@
 static int __sys_setsockopt(int fd, int level, int optname,
 			    char __user *optval, int optlen)
 {
+	mm_segment_t oldfs = get_fs();
+	char *kernel_optval = NULL;
 	int err, fput_needed;
 	struct socket *sock;
 
@@ -1893,6 +2059,22 @@
 		if (err)
 			goto out_put;
 
+		err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level,
+						     &optname, optval, &optlen,
+						     &kernel_optval);
+
+		if (err < 0) {
+			goto out_put;
+		} else if (err > 0) {
+			err = 0;
+			goto out_put;
+		}
+
+		if (kernel_optval) {
+			set_fs(KERNEL_DS);
+			optval = (char __user __force *)kernel_optval;
+		}
+
 		if (level == SOL_SOCKET)
 			err =
 			    sock_setsockopt(sock, level, optname, optval,
@@ -1901,6 +2083,11 @@
 			err =
 			    sock->ops->setsockopt(sock, level, optname, optval,
 						  optlen);
+
+		if (kernel_optval) {
+			set_fs(oldfs);
+			kfree(kernel_optval);
+		}
 out_put:
 		fput_light(sock->file, fput_needed);
 	}
@@ -1923,6 +2110,7 @@
 {
 	int err, fput_needed;
 	struct socket *sock;
+	int max_optlen;
 
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (sock != NULL) {
@@ -1930,6 +2118,8 @@
 		if (err)
 			goto out_put;
 
+		max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
+
 		if (level == SOL_SOCKET)
 			err =
 			    sock_getsockopt(sock, level, optname, optval,
@@ -1938,6 +2128,10 @@
 			err =
 			    sock->ops->getsockopt(sock, level, optname, optval,
 						  optlen);
+
+		err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
+						     optval, optlen,
+						     max_optlen, err);
 out_put:
 		fput_light(sock->file, fput_needed);
 	}
@@ -2032,20 +2226,16 @@
 
 	kmsg->msg_iocb = NULL;
 
-	return import_iovec(save_addr ? READ : WRITE,
+	err = import_iovec(save_addr ? READ : WRITE,
 			    msg.msg_iov, msg.msg_iovlen,
 			    UIO_FASTIOV, iov, &kmsg->msg_iter);
+	return err < 0 ? err : 0;
 }
 
-static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
-			 struct msghdr *msg_sys, unsigned int flags,
-			 struct used_address *used_address,
-			 unsigned int allowed_msghdr_flags)
+static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
+			   unsigned int flags, struct used_address *used_address,
+			   unsigned int allowed_msghdr_flags)
 {
-	struct compat_msghdr __user *msg_compat =
-	    (struct compat_msghdr __user *)msg;
-	struct sockaddr_storage address;
-	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
 	unsigned char ctl[sizeof(struct cmsghdr) + 20]
 				__aligned(sizeof(__kernel_size_t));
 	/* 20 is size of ipv6_pktinfo */
@@ -2053,19 +2243,10 @@
 	int ctl_len;
 	ssize_t err;
 
-	msg_sys->msg_name = &address;
-
-	if (MSG_CMSG_COMPAT & flags)
-		err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
-	else
-		err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
-	if (err < 0)
-		return err;
-
 	err = -ENOBUFS;
 
 	if (msg_sys->msg_controllen > INT_MAX)
-		goto out_freeiov;
+		goto out;
 	flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
 	ctl_len = msg_sys->msg_controllen;
 	if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
@@ -2073,7 +2254,7 @@
 		    cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
 						     sizeof(ctl));
 		if (err)
-			goto out_freeiov;
+			goto out;
 		ctl_buf = msg_sys->msg_control;
 		ctl_len = msg_sys->msg_controllen;
 	} else if (ctl_len) {
@@ -2082,7 +2263,7 @@
 		if (ctl_len > sizeof(ctl)) {
 			ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
 			if (ctl_buf == NULL)
-				goto out_freeiov;
+				goto out;
 		}
 		err = -EFAULT;
 		/*
@@ -2128,7 +2309,47 @@
 out_freectl:
 	if (ctl_buf != ctl)
 		sock_kfree_s(sock->sk, ctl_buf, ctl_len);
-out_freeiov:
+out:
+	return err;
+}
+
+static int sendmsg_copy_msghdr(struct msghdr *msg,
+			       struct user_msghdr __user *umsg, unsigned flags,
+			       struct iovec **iov)
+{
+	int err;
+
+	if (flags & MSG_CMSG_COMPAT) {
+		struct compat_msghdr __user *msg_compat;
+
+		msg_compat = (struct compat_msghdr __user *) umsg;
+		err = get_compat_msghdr(msg, msg_compat, NULL, iov);
+	} else {
+		err = copy_msghdr_from_user(msg, umsg, NULL, iov);
+	}
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
+			 struct msghdr *msg_sys, unsigned int flags,
+			 struct used_address *used_address,
+			 unsigned int allowed_msghdr_flags)
+{
+	struct sockaddr_storage address;
+	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
+	ssize_t err;
+
+	msg_sys->msg_name = &address;
+
+	err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
+	if (err < 0)
+		return err;
+
+	err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
+				allowed_msghdr_flags);
 	kfree(iov);
 	return err;
 }
@@ -2136,6 +2357,28 @@
 /*
  *	BSD sendmsg interface
  */
+long __sys_sendmsg_sock(struct socket *sock, struct user_msghdr __user *umsg,
+			unsigned int flags)
+{
+	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
+	struct sockaddr_storage address;
+	struct msghdr msg = { .msg_name = &address };
+	ssize_t err;
+
+	err = sendmsg_copy_msghdr(&msg, umsg, flags, &iov);
+	if (err)
+		return err;
+	/* disallow ancillary data requests from this path */
+	if (msg.msg_control || msg.msg_controllen) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = ____sys_sendmsg(sock, &msg, flags, NULL, 0);
+out:
+	kfree(iov);
+	return err;
+}
 
 long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
 		   bool forbid_cmsg_compat)
@@ -2240,33 +2483,41 @@
 	return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
 }
 
-static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
-			 struct msghdr *msg_sys, unsigned int flags, int nosec)
+static int recvmsg_copy_msghdr(struct msghdr *msg,
+			       struct user_msghdr __user *umsg, unsigned flags,
+			       struct sockaddr __user **uaddr,
+			       struct iovec **iov)
+{
+	ssize_t err;
+
+	if (MSG_CMSG_COMPAT & flags) {
+		struct compat_msghdr __user *msg_compat;
+
+		msg_compat = (struct compat_msghdr __user *) umsg;
+		err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
+	} else {
+		err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
+	}
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
+			   struct user_msghdr __user *msg,
+			   struct sockaddr __user *uaddr,
+			   unsigned int flags, int nosec)
 {
 	struct compat_msghdr __user *msg_compat =
-	    (struct compat_msghdr __user *)msg;
-	struct iovec iovstack[UIO_FASTIOV];
-	struct iovec *iov = iovstack;
+					(struct compat_msghdr __user *) msg;
+	int __user *uaddr_len = COMPAT_NAMELEN(msg);
+	struct sockaddr_storage addr;
 	unsigned long cmsg_ptr;
 	int len;
 	ssize_t err;
 
-	/* kernel mode address */
-	struct sockaddr_storage addr;
-
-	/* user mode address pointers */
-	struct sockaddr __user *uaddr;
-	int __user *uaddr_len = COMPAT_NAMELEN(msg);
-
 	msg_sys->msg_name = &addr;
-
-	if (MSG_CMSG_COMPAT & flags)
-		err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
-	else
-		err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
-	if (err < 0)
-		return err;
-
 	cmsg_ptr = (unsigned long)msg_sys->msg_control;
 	msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
 
@@ -2277,7 +2528,7 @@
 		flags |= MSG_DONTWAIT;
 	err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
 	if (err < 0)
-		goto out_freeiov;
+		goto out;
 	len = err;
 
 	if (uaddr != NULL) {
@@ -2285,12 +2536,12 @@
 					msg_sys->msg_namelen, uaddr,
 					uaddr_len);
 		if (err < 0)
-			goto out_freeiov;
+			goto out;
 	}
 	err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
 			 COMPAT_FLAGS(msg));
 	if (err)
-		goto out_freeiov;
+		goto out;
 	if (MSG_CMSG_COMPAT & flags)
 		err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
 				 &msg_compat->msg_controllen);
@@ -2298,10 +2549,25 @@
 		err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
 				 &msg->msg_controllen);
 	if (err)
-		goto out_freeiov;
+		goto out;
 	err = len;
+out:
+	return err;
+}
 
-out_freeiov:
+static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
+			 struct msghdr *msg_sys, unsigned int flags, int nosec)
+{
+	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
+	/* user mode address pointers */
+	struct sockaddr __user *uaddr;
+	ssize_t err;
+
+	err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
+	if (err < 0)
+		return err;
+
+	err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
 	kfree(iov);
 	return err;
 }
@@ -2310,6 +2576,30 @@
  *	BSD recvmsg interface
  */
 
+long __sys_recvmsg_sock(struct socket *sock, struct user_msghdr __user *umsg,
+			unsigned int flags)
+{
+	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
+	struct sockaddr_storage address;
+	struct msghdr msg = { .msg_name = &address };
+	struct sockaddr __user *uaddr;
+	ssize_t err;
+
+	err = recvmsg_copy_msghdr(&msg, umsg, flags, &uaddr, &iov);
+	if (err)
+		return err;
+	/* disallow ancillary data requests from this path */
+	if (msg.msg_control || msg.msg_controllen) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = ____sys_recvmsg(sock, &msg, umsg, uaddr, flags, 0);
+out:
+	kfree(iov);
+	return err;
+}
+
 long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
 		   bool forbid_cmsg_compat)
 {
@@ -2341,8 +2631,9 @@
  *     Linux recvmmsg interface
  */
 
-int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
-		   unsigned int flags, struct timespec *timeout)
+static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
+			  unsigned int vlen, unsigned int flags,
+			  struct timespec64 *timeout)
 {
 	int fput_needed, err, datagrams;
 	struct socket *sock;
@@ -2407,8 +2698,7 @@
 
 		if (timeout) {
 			ktime_get_ts64(&timeout64);
-			*timeout = timespec64_to_timespec(
-					timespec64_sub(end_time, timeout64));
+			*timeout = timespec64_sub(end_time, timeout64);
 			if (timeout->tv_sec < 0) {
 				timeout->tv_sec = timeout->tv_nsec = 0;
 				break;
@@ -2452,26 +2742,32 @@
 	return datagrams;
 }
 
-static int do_sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
-			   unsigned int vlen, unsigned int flags,
-			   struct timespec __user *timeout)
+int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
+		   unsigned int vlen, unsigned int flags,
+		   struct __kernel_timespec __user *timeout,
+		   struct old_timespec32 __user *timeout32)
 {
 	int datagrams;
-	struct timespec timeout_sys;
+	struct timespec64 timeout_sys;
 
-	if (flags & MSG_CMSG_COMPAT)
-		return -EINVAL;
-
-	if (!timeout)
-		return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
-
-	if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
+	if (timeout && get_timespec64(&timeout_sys, timeout))
 		return -EFAULT;
 
-	datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
+	if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
+		return -EFAULT;
 
-	if (datagrams > 0 &&
-	    copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
+	if (!timeout && !timeout32)
+		return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
+
+	datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
+
+	if (datagrams <= 0)
+		return datagrams;
+
+	if (timeout && put_timespec64(&timeout_sys, timeout))
+		datagrams = -EFAULT;
+
+	if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
 		datagrams = -EFAULT;
 
 	return datagrams;
@@ -2479,11 +2775,26 @@
 
 SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
 		unsigned int, vlen, unsigned int, flags,
-		struct timespec __user *, timeout)
+		struct __kernel_timespec __user *, timeout)
 {
-	return do_sys_recvmmsg(fd, mmsg, vlen, flags, timeout);
+	if (flags & MSG_CMSG_COMPAT)
+		return -EINVAL;
+
+	return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
 }
 
+#ifdef CONFIG_COMPAT_32BIT_TIME
+SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
+		unsigned int, vlen, unsigned int, flags,
+		struct old_timespec32 __user *, timeout)
+{
+	if (flags & MSG_CMSG_COMPAT)
+		return -EINVAL;
+
+	return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
+}
+#endif
+
 #ifdef __ARCH_WANT_SYS_SOCKETCALL
 /* Argument list sizes for sys_socketcall */
 #define AL(x) ((x) * sizeof(unsigned long))
@@ -2602,8 +2913,15 @@
 				    a[2], true);
 		break;
 	case SYS_RECVMMSG:
-		err = do_sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2],
-				      a[3], (struct timespec __user *)a[4]);
+		if (IS_ENABLED(CONFIG_64BIT) || !IS_ENABLED(CONFIG_64BIT_TIME))
+			err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
+					     a[2], a[3],
+					     (struct __kernel_timespec __user *)a[4],
+					     NULL);
+		else
+			err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
+					     a[2], a[3], NULL,
+					     (struct old_timespec32 __user *)a[4]);
 		break;
 	case SYS_ACCEPT4:
 		err = __sys_accept4(a0, (struct sockaddr __user *)a1,
@@ -2744,40 +3062,6 @@
 #endif				/* CONFIG_PROC_FS */
 
 #ifdef CONFIG_COMPAT
-static int do_siocgstamp(struct net *net, struct socket *sock,
-			 unsigned int cmd, void __user *up)
-{
-	mm_segment_t old_fs = get_fs();
-	struct timeval ktv;
-	int err;
-
-	set_fs(KERNEL_DS);
-	err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv,
-			    sizeof(struct compat_ifreq));
-	set_fs(old_fs);
-	if (!err)
-		err = compat_put_timeval(&ktv, up);
-
-	return err;
-}
-
-static int do_siocgstampns(struct net *net, struct socket *sock,
-			   unsigned int cmd, void __user *up)
-{
-	mm_segment_t old_fs = get_fs();
-	struct timespec kts;
-	int err;
-
-	set_fs(KERNEL_DS);
-	err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts,
-			    sizeof(struct compat_ifreq));
-	set_fs(old_fs);
-	if (!err)
-		err = compat_put_timespec(&kts, up);
-
-	return err;
-}
-
 static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
 {
 	struct compat_ifconf ifc32;
@@ -2966,6 +3250,54 @@
 	return dev_ioctl(net, cmd, &ifreq, NULL);
 }
 
+static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
+			      unsigned int cmd,
+			      struct compat_ifreq __user *uifr32)
+{
+	struct ifreq __user *uifr;
+	int err;
+
+	/* Handle the fact that while struct ifreq has the same *layout* on
+	 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
+	 * which are handled elsewhere, it still has different *size* due to
+	 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
+	 * resulting in struct ifreq being 32 and 40 bytes respectively).
+	 * As a result, if the struct happens to be at the end of a page and
+	 * the next page isn't readable/writable, we get a fault. To prevent
+	 * that, copy back and forth to the full size.
+	 */
+
+	uifr = compat_alloc_user_space(sizeof(*uifr));
+	if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
+		return -EFAULT;
+
+	err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
+
+	if (!err) {
+		switch (cmd) {
+		case SIOCGIFFLAGS:
+		case SIOCGIFMETRIC:
+		case SIOCGIFMTU:
+		case SIOCGIFMEM:
+		case SIOCGIFHWADDR:
+		case SIOCGIFINDEX:
+		case SIOCGIFADDR:
+		case SIOCGIFBRDADDR:
+		case SIOCGIFDSTADDR:
+		case SIOCGIFNETMASK:
+		case SIOCGIFPFLAGS:
+		case SIOCGIFTXQLEN:
+		case SIOCGMIIPHY:
+		case SIOCGMIIREG:
+		case SIOCGIFNAME:
+			if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
+				err = -EFAULT;
+			break;
+		}
+	}
+	return err;
+}
+
 static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
 			struct compat_ifreq __user *uifr32)
 {
@@ -3081,8 +3413,7 @@
 	}
 
 	set_fs(KERNEL_DS);
-	ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r,
-			    sizeof(struct compat_ifreq));
+	ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
 	set_fs(old_fs);
 
 out:
@@ -3130,10 +3461,13 @@
 	case SIOCADDRT:
 	case SIOCDELRT:
 		return routing_ioctl(net, sock, cmd, argp);
-	case SIOCGSTAMP:
-		return do_siocgstamp(net, sock, cmd, argp);
-	case SIOCGSTAMPNS:
-		return do_siocgstampns(net, sock, cmd, argp);
+	case SIOCGSTAMP_OLD:
+	case SIOCGSTAMPNS_OLD:
+		if (!sock->ops->gettstamp)
+			return -ENOIOCTLCMD;
+		return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
+					    !COMPAT_USE_64BIT_TIME);
+
 	case SIOCBONDSLAVEINFOQUERY:
 	case SIOCBONDINFOQUERY:
 	case SIOCSHWTSTAMP:
@@ -3151,6 +3485,8 @@
 	case SIOCADDDLCI:
 	case SIOCDELDLCI:
 	case SIOCGSKNS:
+	case SIOCGSTAMP_NEW:
+	case SIOCGSTAMPNS_NEW:
 		return sock_ioctl(file, cmd, arg);
 
 	case SIOCGIFFLAGS:
@@ -3182,21 +3518,22 @@
 	case SIOCSIFTXQLEN:
 	case SIOCBRADDIF:
 	case SIOCBRDELIF:
+	case SIOCGIFNAME:
 	case SIOCSIFNAME:
 	case SIOCGMIIPHY:
 	case SIOCGMIIREG:
 	case SIOCSMIIREG:
-	case SIOCSARP:
-	case SIOCGARP:
-	case SIOCDARP:
-	case SIOCATMARK:
 	case SIOCBONDENSLAVE:
 	case SIOCBONDRELEASE:
 	case SIOCBONDSETHWADDR:
 	case SIOCBONDCHANGEACTIVE:
-	case SIOCGIFNAME:
-		return sock_do_ioctl(net, sock, cmd, arg,
-				     sizeof(struct compat_ifreq));
+		return compat_ifreq_ioctl(net, sock, cmd, argp);
+
+	case SIOCSARP:
+	case SIOCGARP:
+	case SIOCDARP:
+	case SIOCATMARK:
+		return sock_do_ioctl(net, sock, cmd, arg);
 	}
 
 	return -ENOIOCTLCMD;
@@ -3227,18 +3564,46 @@
 }
 #endif
 
+/**
+ *	kernel_bind - bind an address to a socket (kernel space)
+ *	@sock: socket
+ *	@addr: address
+ *	@addrlen: length of address
+ *
+ *	Returns 0 or an error.
+ */
+
 int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
 {
 	return sock->ops->bind(sock, addr, addrlen);
 }
 EXPORT_SYMBOL(kernel_bind);
 
+/**
+ *	kernel_listen - move socket to listening state (kernel space)
+ *	@sock: socket
+ *	@backlog: pending connections queue size
+ *
+ *	Returns 0 or an error.
+ */
+
 int kernel_listen(struct socket *sock, int backlog)
 {
 	return sock->ops->listen(sock, backlog);
 }
 EXPORT_SYMBOL(kernel_listen);
 
+/**
+ *	kernel_accept - accept a connection (kernel space)
+ *	@sock: listening socket
+ *	@newsock: new connected socket
+ *	@flags: flags
+ *
+ *	@flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
+ *	If it fails, @newsock is guaranteed to be %NULL.
+ *	Returns 0 or an error.
+ */
+
 int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
 {
 	struct sock *sk = sock->sk;
@@ -3264,6 +3629,19 @@
 }
 EXPORT_SYMBOL(kernel_accept);
 
+/**
+ *	kernel_connect - connect a socket (kernel space)
+ *	@sock: socket
+ *	@addr: address
+ *	@addrlen: address length
+ *	@flags: flags (O_NONBLOCK, ...)
+ *
+ *	For datagram sockets, @addr is the addres to which datagrams are sent
+ *	by default, and the only address from which datagrams are received.
+ *	For stream sockets, attempts to connect to @addr.
+ *	Returns 0 or an error code.
+ */
+
 int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
 		   int flags)
 {
@@ -3271,18 +3649,48 @@
 }
 EXPORT_SYMBOL(kernel_connect);
 
+/**
+ *	kernel_getsockname - get the address which the socket is bound (kernel space)
+ *	@sock: socket
+ *	@addr: address holder
+ *
+ * 	Fills the @addr pointer with the address which the socket is bound.
+ *	Returns 0 or an error code.
+ */
+
 int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
 {
 	return sock->ops->getname(sock, addr, 0);
 }
 EXPORT_SYMBOL(kernel_getsockname);
 
+/**
+ *	kernel_peername - get the address which the socket is connected (kernel space)
+ *	@sock: socket
+ *	@addr: address holder
+ *
+ * 	Fills the @addr pointer with the address which the socket is connected.
+ *	Returns 0 or an error code.
+ */
+
 int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
 {
 	return sock->ops->getname(sock, addr, 1);
 }
 EXPORT_SYMBOL(kernel_getpeername);
 
+/**
+ *	kernel_getsockopt - get a socket option (kernel space)
+ *	@sock: socket
+ *	@level: API level (SOL_SOCKET, ...)
+ *	@optname: option tag
+ *	@optval: option value
+ *	@optlen: option length
+ *
+ *	Assigns the option length to @optlen.
+ *	Returns 0 or an error.
+ */
+
 int kernel_getsockopt(struct socket *sock, int level, int optname,
 			char *optval, int *optlen)
 {
@@ -3305,6 +3713,17 @@
 }
 EXPORT_SYMBOL(kernel_getsockopt);
 
+/**
+ *	kernel_setsockopt - set a socket option (kernel space)
+ *	@sock: socket
+ *	@level: API level (SOL_SOCKET, ...)
+ *	@optname: option tag
+ *	@optval: option value
+ *	@optlen: option length
+ *
+ *	Returns 0 or an error.
+ */
+
 int kernel_setsockopt(struct socket *sock, int level, int optname,
 			char *optval, unsigned int optlen)
 {
@@ -3325,6 +3744,17 @@
 }
 EXPORT_SYMBOL(kernel_setsockopt);
 
+/**
+ *	kernel_sendpage - send a &page through a socket (kernel space)
+ *	@sock: socket
+ *	@page: page
+ *	@offset: page offset
+ *	@size: total size in bytes
+ *	@flags: flags (MSG_DONTWAIT, ...)
+ *
+ *	Returns the total amount sent in bytes or an error.
+ */
+
 int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 		    size_t size, int flags)
 {
@@ -3335,6 +3765,18 @@
 }
 EXPORT_SYMBOL(kernel_sendpage);
 
+/**
+ *	kernel_sendpage_locked - send a &page through the locked sock (kernel space)
+ *	@sk: sock
+ *	@page: page
+ *	@offset: page offset
+ *	@size: total size in bytes
+ *	@flags: flags (MSG_DONTWAIT, ...)
+ *
+ *	Returns the total amount sent in bytes or an error.
+ *	Caller must hold @sk.
+ */
+
 int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
 			   size_t size, int flags)
 {
@@ -3348,17 +3790,30 @@
 }
 EXPORT_SYMBOL(kernel_sendpage_locked);
 
+/**
+ *	kernel_shutdown - shut down part of a full-duplex connection (kernel space)
+ *	@sock: socket
+ *	@how: connection part
+ *
+ *	Returns 0 or an error.
+ */
+
 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
 {
 	return sock->ops->shutdown(sock, how);
 }
 EXPORT_SYMBOL(kernel_sock_shutdown);
 
-/* This routine returns the IP overhead imposed by a socket i.e.
- * the length of the underlying IP header, depending on whether
- * this is an IPv4 or IPv6 socket and the length from IP options turned
- * on at the socket. Assumes that the caller has a lock on the socket.
+/**
+ *	kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
+ *	@sk: socket
+ *
+ *	This routine returns the IP overhead imposed by a socket i.e.
+ *	the length of the underlying IP header, depending on whether
+ *	this is an IPv4 or IPv6 socket and the length from IP options turned
+ *	on at the socket. Assumes that the caller has a lock on the socket.
  */
+
 u32 kernel_sock_ip_overhead(struct sock *sk)
 {
 	struct inet_sock *inet;

diff --git a/net/strparser/Kconfig b/net/strparser/Kconfig
index 6cff3f6..e6146c2 100644
--- a/net/strparser/Kconfig
+++ b/net/strparser/Kconfig

@@ -1,4 +1,3 @@
-
+# SPDX-License-Identifier: GPL-2.0-only
 config STREAM_PARSER
-	tristate
-	default n
+	def_bool n

diff --git a/net/strparser/Makefile b/net/strparser/Makefile
index 858a126..9313191 100644
--- a/net/strparser/Makefile
+++ b/net/strparser/Makefile

@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_STREAM_PARSER) += strparser.o

diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index da1a676..b3815c1 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Stream Parser
  *
  * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
  */
 
 #include <linux/bpf.h>
@@ -14,7 +11,8 @@
 #include <linux/file.h>
 #include <linux/in.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/init.h>
 #include <linux/net.h>
 #include <linux/netdevice.h>
 #include <linux/poll.h>
@@ -140,13 +138,11 @@
 			/* We are going to append to the frags_list of head.
 			 * Need to unshare the frag_list.
 			 */
-			if (skb_has_frag_list(head)) {
-				err = skb_unclone(head, GFP_ATOMIC);
-				if (err) {
-					STRP_STATS_INCR(strp->stats.mem_fail);
-					desc->error = err;
-					return 0;
-				}
+			err = skb_unclone(head, GFP_ATOMIC);
+			if (err) {
+				STRP_STATS_INCR(strp->stats.mem_fail);
+				desc->error = err;
+				return 0;
 			}
 
 			if (unlikely(skb_shinfo(head)->frag_list)) {
@@ -161,18 +157,14 @@
 					return 0;
 				}
 
-				skb = alloc_skb(0, GFP_ATOMIC);
+				skb = alloc_skb_for_msg(head);
 				if (!skb) {
 					STRP_STATS_INCR(strp->stats.mem_fail);
 					desc->error = -ENOMEM;
 					return 0;
 				}
-				skb->len = head->len;
-				skb->data_len = head->len;
-				skb->truesize = head->truesize;
-				*_strp_msg(skb) = *_strp_msg(head);
+
 				strp->skb_nextp = &head->next;
-				skb_shinfo(skb)->frag_list = head;
 				strp->skb_head = skb;
 				head = skb;
 			} else {
@@ -299,7 +291,7 @@
 			break;
 		}
 
-		/* Positive extra indicates ore bytes than needed for the
+		/* Positive extra indicates more bytes than needed for the
 		 * message
 		 */
 
@@ -547,17 +539,12 @@
 }
 EXPORT_SYMBOL_GPL(strp_check_rcv);
 
-static int __init strp_mod_init(void)
+static int __init strp_dev_init(void)
 {
 	strp_wq = create_singlethread_workqueue("kstrp");
+	if (unlikely(!strp_wq))
+		return -ENOMEM;
 
 	return 0;
 }
-
-static void __exit strp_mod_exit(void)
-{
-	destroy_workqueue(strp_wq);
-}
-module_init(strp_mod_init);
-module_exit(strp_mod_exit);
-MODULE_LICENSE("GPL");
+device_initcall(strp_dev_init);

diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index ac09ca8..3bcf985 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config SUNRPC
 	tristate
 	depends on MULTIUSER
@@ -34,6 +35,22 @@
 
 	  If unsure, say Y.
 
+config SUNRPC_DISABLE_INSECURE_ENCTYPES
+	bool "Secure RPC: Disable insecure Kerberos encryption types"
+	depends on RPCSEC_GSS_KRB5
+	default n
+	help
+	  Choose Y here to disable the use of deprecated encryption types
+	  with the Kerberos version 5 GSS-API mechanism (RFC 1964). The
+	  deprecated encryption types include DES-CBC-MD5, DES-CBC-CRC,
+	  and DES-CBC-MD4. These types were deprecated by RFC 6649 because
+	  they were found to be insecure.
+
+	  N is the default because many sites have deployed KDCs and
+	  keytabs that contain only these deprecated encryption types.
+	  Choosing Y prevents the use of known-insecure encryption types
+	  but might result in compatibility problems.
+
 config SUNRPC_DEBUG
 	bool "RPC: Enable dprintk debugging"
 	depends on SUNRPC && SYSCTL

diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 090658c..9488600 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile

@@ -9,7 +9,7 @@
 obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
 
 sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
-	    auth.o auth_null.o auth_unix.o auth_generic.o \
+	    auth.o auth_null.o auth_unix.o \
 	    svc.o svcsock.o svcauth.o svcauth_unix.o \
 	    addr.o rpcb_clnt.o timer.o xdr.o \
 	    sunrpc_syms.o cache.o rpc_pipe.o \

diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
index 2e0a6f9..d024af4 100644
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2009, Oracle.  All rights reserved.
  *

diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 305ecea..cdb05b4 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * linux/net/sunrpc/auth.c
  *
@@ -17,9 +18,7 @@
 #include <linux/sunrpc/gss_api.h>
 #include <linux/spinlock.h>
 
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY	RPCDBG_AUTH
-#endif
+#include <trace/events/sunrpc.h>
 
 #define RPC_CREDCACHE_DEFAULT_HASHBITS	(4)
 struct rpc_cred_cache {
@@ -30,16 +29,32 @@
 
 static unsigned int auth_hashbits = RPC_CREDCACHE_DEFAULT_HASHBITS;
 
-static DEFINE_SPINLOCK(rpc_authflavor_lock);
-static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
-	&authnull_ops,		/* AUTH_NULL */
-	&authunix_ops,		/* AUTH_UNIX */
+static const struct rpc_authops __rcu *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
+	[RPC_AUTH_NULL] = (const struct rpc_authops __force __rcu *)&authnull_ops,
+	[RPC_AUTH_UNIX] = (const struct rpc_authops __force __rcu *)&authunix_ops,
 	NULL,			/* others can be loadable modules */
 };
 
 static LIST_HEAD(cred_unused);
 static unsigned long number_cred_unused;
 
+static struct cred machine_cred = {
+	.usage = ATOMIC_INIT(1),
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	.magic = CRED_MAGIC,
+#endif
+};
+
+/*
+ * Return the machine_cred pointer to be used whenever
+ * the a generic machine credential is needed.
+ */
+const struct cred *rpc_machine_cred(void)
+{
+	return &machine_cred;
+}
+EXPORT_SYMBOL_GPL(rpc_machine_cred);
+
 #define MAX_HASHTABLE_BITS (14)
 static int param_set_hashtbl_sz(const char *val, const struct kernel_param *kp)
 {
@@ -93,39 +108,65 @@
 int
 rpcauth_register(const struct rpc_authops *ops)
 {
+	const struct rpc_authops *old;
 	rpc_authflavor_t flavor;
-	int ret = -EPERM;
 
 	if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
 		return -EINVAL;
-	spin_lock(&rpc_authflavor_lock);
-	if (auth_flavors[flavor] == NULL) {
-		auth_flavors[flavor] = ops;
-		ret = 0;
-	}
-	spin_unlock(&rpc_authflavor_lock);
-	return ret;
+	old = cmpxchg((const struct rpc_authops ** __force)&auth_flavors[flavor], NULL, ops);
+	if (old == NULL || old == ops)
+		return 0;
+	return -EPERM;
 }
 EXPORT_SYMBOL_GPL(rpcauth_register);
 
 int
 rpcauth_unregister(const struct rpc_authops *ops)
 {
+	const struct rpc_authops *old;
 	rpc_authflavor_t flavor;
-	int ret = -EPERM;
 
 	if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
 		return -EINVAL;
-	spin_lock(&rpc_authflavor_lock);
-	if (auth_flavors[flavor] == ops) {
-		auth_flavors[flavor] = NULL;
-		ret = 0;
-	}
-	spin_unlock(&rpc_authflavor_lock);
-	return ret;
+
+	old = cmpxchg((const struct rpc_authops ** __force)&auth_flavors[flavor], ops, NULL);
+	if (old == ops || old == NULL)
+		return 0;
+	return -EPERM;
 }
 EXPORT_SYMBOL_GPL(rpcauth_unregister);
 
+static const struct rpc_authops *
+rpcauth_get_authops(rpc_authflavor_t flavor)
+{
+	const struct rpc_authops *ops;
+
+	if (flavor >= RPC_AUTH_MAXFLAVOR)
+		return NULL;
+
+	rcu_read_lock();
+	ops = rcu_dereference(auth_flavors[flavor]);
+	if (ops == NULL) {
+		rcu_read_unlock();
+		request_module("rpc-auth-%u", flavor);
+		rcu_read_lock();
+		ops = rcu_dereference(auth_flavors[flavor]);
+		if (ops == NULL)
+			goto out;
+	}
+	if (!try_module_get(ops->owner))
+		ops = NULL;
+out:
+	rcu_read_unlock();
+	return ops;
+}
+
+static void
+rpcauth_put_authops(const struct rpc_authops *ops)
+{
+	module_put(ops->owner);
+}
+
 /**
  * rpcauth_get_pseudoflavor - check if security flavor is supported
  * @flavor: a security flavor
@@ -138,25 +179,16 @@
 rpc_authflavor_t
 rpcauth_get_pseudoflavor(rpc_authflavor_t flavor, struct rpcsec_gss_info *info)
 {
-	const struct rpc_authops *ops;
+	const struct rpc_authops *ops = rpcauth_get_authops(flavor);
 	rpc_authflavor_t pseudoflavor;
 
-	ops = auth_flavors[flavor];
-	if (ops == NULL)
-		request_module("rpc-auth-%u", flavor);
-	spin_lock(&rpc_authflavor_lock);
-	ops = auth_flavors[flavor];
-	if (ops == NULL || !try_module_get(ops->owner)) {
-		spin_unlock(&rpc_authflavor_lock);
+	if (!ops)
 		return RPC_AUTH_MAXFLAVOR;
-	}
-	spin_unlock(&rpc_authflavor_lock);
-
 	pseudoflavor = flavor;
 	if (ops->info2flavor != NULL)
 		pseudoflavor = ops->info2flavor(info);
 
-	module_put(ops->owner);
+	rpcauth_put_authops(ops);
 	return pseudoflavor;
 }
 EXPORT_SYMBOL_GPL(rpcauth_get_pseudoflavor);
@@ -176,25 +208,15 @@
 	const struct rpc_authops *ops;
 	int result;
 
-	if (flavor >= RPC_AUTH_MAXFLAVOR)
-		return -EINVAL;
-
-	ops = auth_flavors[flavor];
+	ops = rpcauth_get_authops(flavor);
 	if (ops == NULL)
-		request_module("rpc-auth-%u", flavor);
-	spin_lock(&rpc_authflavor_lock);
-	ops = auth_flavors[flavor];
-	if (ops == NULL || !try_module_get(ops->owner)) {
-		spin_unlock(&rpc_authflavor_lock);
 		return -ENOENT;
-	}
-	spin_unlock(&rpc_authflavor_lock);
 
 	result = -ENOENT;
 	if (ops->flavor2info != NULL)
 		result = ops->flavor2info(pseudoflavor, info);
 
-	module_put(ops->owner);
+	rpcauth_put_authops(ops);
 	return result;
 }
 EXPORT_SYMBOL_GPL(rpcauth_get_gssinfo);
@@ -212,15 +234,13 @@
 int
 rpcauth_list_flavors(rpc_authflavor_t *array, int size)
 {
-	rpc_authflavor_t flavor;
-	int result = 0;
+	const struct rpc_authops *ops;
+	rpc_authflavor_t flavor, pseudos[4];
+	int i, len, result = 0;
 
-	spin_lock(&rpc_authflavor_lock);
+	rcu_read_lock();
 	for (flavor = 0; flavor < RPC_AUTH_MAXFLAVOR; flavor++) {
-		const struct rpc_authops *ops = auth_flavors[flavor];
-		rpc_authflavor_t pseudos[4];
-		int i, len;
-
+		ops = rcu_dereference(auth_flavors[flavor]);
 		if (result >= size) {
 			result = -ENOMEM;
 			break;
@@ -245,9 +265,7 @@
 			array[result++] = pseudos[i];
 		}
 	}
-	spin_unlock(&rpc_authflavor_lock);
-
-	dprintk("RPC:       %s returns %d\n", __func__, result);
+	rcu_read_unlock();
 	return result;
 }
 EXPORT_SYMBOL_GPL(rpcauth_list_flavors);
@@ -255,25 +273,17 @@
 struct rpc_auth *
 rpcauth_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 {
-	struct rpc_auth		*auth;
+	struct rpc_auth	*auth = ERR_PTR(-EINVAL);
 	const struct rpc_authops *ops;
-	u32			flavor = pseudoflavor_to_flavor(args->pseudoflavor);
+	u32 flavor = pseudoflavor_to_flavor(args->pseudoflavor);
 
-	auth = ERR_PTR(-EINVAL);
-	if (flavor >= RPC_AUTH_MAXFLAVOR)
+	ops = rpcauth_get_authops(flavor);
+	if (ops == NULL)
 		goto out;
 
-	if ((ops = auth_flavors[flavor]) == NULL)
-		request_module("rpc-auth-%u", flavor);
-	spin_lock(&rpc_authflavor_lock);
-	ops = auth_flavors[flavor];
-	if (ops == NULL || !try_module_get(ops->owner)) {
-		spin_unlock(&rpc_authflavor_lock);
-		goto out;
-	}
-	spin_unlock(&rpc_authflavor_lock);
 	auth = ops->create(args, clnt);
-	module_put(ops->owner);
+
+	rpcauth_put_authops(ops);
 	if (IS_ERR(auth))
 		return auth;
 	if (clnt->cl_auth)
@@ -288,32 +298,37 @@
 void
 rpcauth_release(struct rpc_auth *auth)
 {
-	if (!atomic_dec_and_test(&auth->au_count))
+	if (!refcount_dec_and_test(&auth->au_count))
 		return;
 	auth->au_ops->destroy(auth);
 }
 
 static DEFINE_SPINLOCK(rpc_credcache_lock);
 
-static void
+/*
+ * On success, the caller is responsible for freeing the reference
+ * held by the hashtable
+ */
+static bool
 rpcauth_unhash_cred_locked(struct rpc_cred *cred)
 {
+	if (!test_and_clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags))
+		return false;
 	hlist_del_rcu(&cred->cr_hash);
-	smp_mb__before_atomic();
-	clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
+	return true;
 }
 
-static int
+static bool
 rpcauth_unhash_cred(struct rpc_cred *cred)
 {
 	spinlock_t *cache_lock;
-	int ret;
+	bool ret;
 
+	if (!test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags))
+		return false;
 	cache_lock = &cred->cr_auth->au_credcache->lock;
 	spin_lock(cache_lock);
-	ret = atomic_read(&cred->cr_count) == 0;
-	if (ret)
-		rpcauth_unhash_cred_locked(cred);
+	ret = rpcauth_unhash_cred_locked(cred);
 	spin_unlock(cache_lock);
 	return ret;
 }
@@ -345,29 +360,6 @@
 }
 EXPORT_SYMBOL_GPL(rpcauth_init_credcache);
 
-/*
- * Setup a credential key lifetime timeout notification
- */
-int
-rpcauth_key_timeout_notify(struct rpc_auth *auth, struct rpc_cred *cred)
-{
-	if (!cred->cr_auth->au_ops->key_timeout)
-		return 0;
-	return cred->cr_auth->au_ops->key_timeout(auth, cred);
-}
-EXPORT_SYMBOL_GPL(rpcauth_key_timeout_notify);
-
-bool
-rpcauth_cred_key_to_expire(struct rpc_auth *auth, struct rpc_cred *cred)
-{
-	if (auth->au_flags & RPCAUTH_AUTH_NO_CRKEY_TIMEOUT)
-		return false;
-	if (!cred->cr_ops->crkey_to_expire)
-		return false;
-	return cred->cr_ops->crkey_to_expire(cred);
-}
-EXPORT_SYMBOL_GPL(rpcauth_cred_key_to_expire);
-
 char *
 rpcauth_stringify_acceptor(struct rpc_cred *cred)
 {
@@ -392,6 +384,44 @@
 	}
 }
 
+static void
+rpcauth_lru_add_locked(struct rpc_cred *cred)
+{
+	if (!list_empty(&cred->cr_lru))
+		return;
+	number_cred_unused++;
+	list_add_tail(&cred->cr_lru, &cred_unused);
+}
+
+static void
+rpcauth_lru_add(struct rpc_cred *cred)
+{
+	if (!list_empty(&cred->cr_lru))
+		return;
+	spin_lock(&rpc_credcache_lock);
+	rpcauth_lru_add_locked(cred);
+	spin_unlock(&rpc_credcache_lock);
+}
+
+static void
+rpcauth_lru_remove_locked(struct rpc_cred *cred)
+{
+	if (list_empty(&cred->cr_lru))
+		return;
+	number_cred_unused--;
+	list_del_init(&cred->cr_lru);
+}
+
+static void
+rpcauth_lru_remove(struct rpc_cred *cred)
+{
+	if (list_empty(&cred->cr_lru))
+		return;
+	spin_lock(&rpc_credcache_lock);
+	rpcauth_lru_remove_locked(cred);
+	spin_unlock(&rpc_credcache_lock);
+}
+
 /*
  * Clear the RPC credential cache, and delete those credentials
  * that are not referenced.
@@ -411,13 +441,10 @@
 		head = &cache->hashtable[i];
 		while (!hlist_empty(head)) {
 			cred = hlist_entry(head->first, struct rpc_cred, cr_hash);
-			get_rpccred(cred);
-			if (!list_empty(&cred->cr_lru)) {
-				list_del(&cred->cr_lru);
-				number_cred_unused--;
-			}
-			list_add_tail(&cred->cr_lru, &free);
 			rpcauth_unhash_cred_locked(cred);
+			/* Note: We now hold a reference to cred */
+			rpcauth_lru_remove_locked(cred);
+			list_add_tail(&cred->cr_lru, &free);
 		}
 	}
 	spin_unlock(&cache->lock);
@@ -451,7 +478,6 @@
 static long
 rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
 {
-	spinlock_t *cache_lock;
 	struct rpc_cred *cred, *next;
 	unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM;
 	long freed = 0;
@@ -460,32 +486,24 @@
 
 		if (nr_to_scan-- == 0)
 			break;
+		if (refcount_read(&cred->cr_count) > 1) {
+			rpcauth_lru_remove_locked(cred);
+			continue;
+		}
 		/*
 		 * Enforce a 60 second garbage collection moratorium
 		 * Note that the cred_unused list must be time-ordered.
 		 */
-		if (time_in_range(cred->cr_expire, expired, jiffies) &&
-		    test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) {
-			freed = SHRINK_STOP;
-			break;
-		}
-
-		list_del_init(&cred->cr_lru);
-		number_cred_unused--;
-		freed++;
-		if (atomic_read(&cred->cr_count) != 0)
+		if (!time_in_range(cred->cr_expire, expired, jiffies))
+			continue;
+		if (!rpcauth_unhash_cred(cred))
 			continue;
 
-		cache_lock = &cred->cr_auth->au_credcache->lock;
-		spin_lock(cache_lock);
-		if (atomic_read(&cred->cr_count) == 0) {
-			get_rpccred(cred);
-			list_add_tail(&cred->cr_lru, free);
-			rpcauth_unhash_cred_locked(cred);
-		}
-		spin_unlock(cache_lock);
+		rpcauth_lru_remove_locked(cred);
+		freed++;
+		list_add_tail(&cred->cr_lru, free);
 	}
-	return freed;
+	return freed ? freed : SHRINK_STOP;
 }
 
 static unsigned long
@@ -560,29 +578,15 @@
 	hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) {
 		if (!entry->cr_ops->crmatch(acred, entry, flags))
 			continue;
-		if (flags & RPCAUTH_LOOKUP_RCU) {
-			if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) &&
-			    !test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags))
-				cred = entry;
-			break;
-		}
-		spin_lock(&cache->lock);
-		if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) {
-			spin_unlock(&cache->lock);
-			continue;
-		}
 		cred = get_rpccred(entry);
-		spin_unlock(&cache->lock);
-		break;
+		if (cred)
+			break;
 	}
 	rcu_read_unlock();
 
 	if (cred != NULL)
 		goto found;
 
-	if (flags & RPCAUTH_LOOKUP_RCU)
-		return ERR_PTR(-ECHILD);
-
 	new = auth->au_ops->crcreate(auth, acred, flags, gfp);
 	if (IS_ERR(new)) {
 		cred = new;
@@ -594,11 +598,13 @@
 		if (!entry->cr_ops->crmatch(acred, entry, flags))
 			continue;
 		cred = get_rpccred(entry);
-		break;
+		if (cred)
+			break;
 	}
 	if (cred == NULL) {
 		cred = new;
 		set_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
+		refcount_inc(&cred->cr_count);
 		hlist_add_head_rcu(&cred->cr_hash, &cache->hashtable[nr]);
 	} else
 		list_add_tail(&new->cr_lru, &free);
@@ -627,13 +633,8 @@
 	struct rpc_cred *ret;
 	const struct cred *cred = current_cred();
 
-	dprintk("RPC:       looking up %s cred\n",
-		auth->au_ops->au_name);
-
 	memset(&acred, 0, sizeof(acred));
-	acred.uid = cred->fsuid;
-	acred.gid = cred->fsgid;
-	acred.group_info = cred->group_info;
+	acred.cred = cred;
 	ret = auth->au_ops->lookup_cred(auth, &acred, flags);
 	return ret;
 }
@@ -645,34 +646,40 @@
 {
 	INIT_HLIST_NODE(&cred->cr_hash);
 	INIT_LIST_HEAD(&cred->cr_lru);
-	atomic_set(&cred->cr_count, 1);
+	refcount_set(&cred->cr_count, 1);
 	cred->cr_auth = auth;
+	cred->cr_flags = 0;
 	cred->cr_ops = ops;
 	cred->cr_expire = jiffies;
-	cred->cr_uid = acred->uid;
+	cred->cr_cred = get_cred(acred->cred);
 }
 EXPORT_SYMBOL_GPL(rpcauth_init_cred);
 
-struct rpc_cred *
-rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
-{
-	dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid,
-			cred->cr_auth->au_ops->au_name, cred);
-	return get_rpccred(cred);
-}
-EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred);
-
 static struct rpc_cred *
 rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
 {
 	struct rpc_auth *auth = task->tk_client->cl_auth;
 	struct auth_cred acred = {
-		.uid = GLOBAL_ROOT_UID,
-		.gid = GLOBAL_ROOT_GID,
+		.cred = get_task_cred(&init_task),
+	};
+	struct rpc_cred *ret;
+
+	ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags);
+	put_cred(acred.cred);
+	return ret;
+}
+
+static struct rpc_cred *
+rpcauth_bind_machine_cred(struct rpc_task *task, int lookupflags)
+{
+	struct rpc_auth *auth = task->tk_client->cl_auth;
+	struct auth_cred acred = {
+		.principal = task->tk_client->cl_principal,
+		.cred = init_task.cred,
 	};
 
-	dprintk("RPC: %5u looking up %s cred\n",
-		task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
+	if (!acred.principal)
+		return NULL;
 	return auth->au_ops->lookup_cred(auth, &acred, lookupflags);
 }
 
@@ -681,24 +688,37 @@
 {
 	struct rpc_auth *auth = task->tk_client->cl_auth;
 
-	dprintk("RPC: %5u looking up %s cred\n",
-		task->tk_pid, auth->au_ops->au_name);
 	return rpcauth_lookupcred(auth, lookupflags);
 }
 
 static int
-rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags)
+rpcauth_bindcred(struct rpc_task *task, const struct cred *cred, int flags)
 {
 	struct rpc_rqst *req = task->tk_rqstp;
-	struct rpc_cred *new;
+	struct rpc_cred *new = NULL;
 	int lookupflags = 0;
+	struct rpc_auth *auth = task->tk_client->cl_auth;
+	struct auth_cred acred = {
+		.cred = cred,
+	};
 
 	if (flags & RPC_TASK_ASYNC)
 		lookupflags |= RPCAUTH_LOOKUP_NEW;
-	if (cred != NULL)
-		new = cred->cr_ops->crbind(task, cred, lookupflags);
-	else if (flags & RPC_TASK_ROOTCREDS)
+	if (task->tk_op_cred)
+		/* Task must use exactly this rpc_cred */
+		new = get_rpccred(task->tk_op_cred);
+	else if (cred != NULL && cred != &machine_cred)
+		new = auth->au_ops->lookup_cred(auth, &acred, lookupflags);
+	else if (cred == &machine_cred)
+		new = rpcauth_bind_machine_cred(task, lookupflags);
+
+	/* If machine cred couldn't be bound, try a root cred */
+	if (new)
+		;
+	else if (cred == &machine_cred || (flags & RPC_TASK_ROOTCREDS))
 		new = rpcauth_bind_root_cred(task, lookupflags);
+	else if (flags & RPC_TASK_NULLCREDS)
+		new = authnull_ops.lookup_cred(NULL, NULL, 0);
 	else
 		new = rpcauth_bind_new_cred(task, lookupflags);
 	if (IS_ERR(new))
@@ -713,108 +733,138 @@
 {
 	if (cred == NULL)
 		return;
-	/* Fast path for unhashed credentials */
-	if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) == 0) {
-		if (atomic_dec_and_test(&cred->cr_count))
-			cred->cr_ops->crdestroy(cred);
-		return;
+	rcu_read_lock();
+	if (refcount_dec_and_test(&cred->cr_count))
+		goto destroy;
+	if (refcount_read(&cred->cr_count) != 1 ||
+	    !test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags))
+		goto out;
+	if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) {
+		cred->cr_expire = jiffies;
+		rpcauth_lru_add(cred);
+		/* Race breaker */
+		if (unlikely(!test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags)))
+			rpcauth_lru_remove(cred);
+	} else if (rpcauth_unhash_cred(cred)) {
+		rpcauth_lru_remove(cred);
+		if (refcount_dec_and_test(&cred->cr_count))
+			goto destroy;
 	}
-
-	if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock))
-		return;
-	if (!list_empty(&cred->cr_lru)) {
-		number_cred_unused--;
-		list_del_init(&cred->cr_lru);
-	}
-	if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) {
-		if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) {
-			cred->cr_expire = jiffies;
-			list_add_tail(&cred->cr_lru, &cred_unused);
-			number_cred_unused++;
-			goto out_nodestroy;
-		}
-		if (!rpcauth_unhash_cred(cred)) {
-			/* We were hashed and someone looked us up... */
-			goto out_nodestroy;
-		}
-	}
-	spin_unlock(&rpc_credcache_lock);
-	cred->cr_ops->crdestroy(cred);
+out:
+	rcu_read_unlock();
 	return;
-out_nodestroy:
-	spin_unlock(&rpc_credcache_lock);
+destroy:
+	rcu_read_unlock();
+	cred->cr_ops->crdestroy(cred);
 }
 EXPORT_SYMBOL_GPL(put_rpccred);
 
-__be32 *
-rpcauth_marshcred(struct rpc_task *task, __be32 *p)
+/**
+ * rpcauth_marshcred - Append RPC credential to end of @xdr
+ * @task: controlling RPC task
+ * @xdr: xdr_stream containing initial portion of RPC Call header
+ *
+ * On success, an appropriate verifier is added to @xdr, @xdr is
+ * updated to point past the verifier, and zero is returned.
+ * Otherwise, @xdr is in an undefined state and a negative errno
+ * is returned.
+ */
+int rpcauth_marshcred(struct rpc_task *task, struct xdr_stream *xdr)
 {
-	struct rpc_cred	*cred = task->tk_rqstp->rq_cred;
+	const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;
 
-	dprintk("RPC: %5u marshaling %s cred %p\n",
-		task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
-
-	return cred->cr_ops->crmarshal(task, p);
+	return ops->crmarshal(task, xdr);
 }
 
-__be32 *
-rpcauth_checkverf(struct rpc_task *task, __be32 *p)
+/**
+ * rpcauth_wrap_req_encode - XDR encode the RPC procedure
+ * @task: controlling RPC task
+ * @xdr: stream where on-the-wire bytes are to be marshalled
+ *
+ * On success, @xdr contains the encoded and wrapped message.
+ * Otherwise, @xdr is in an undefined state.
+ */
+int rpcauth_wrap_req_encode(struct rpc_task *task, struct xdr_stream *xdr)
 {
-	struct rpc_cred	*cred = task->tk_rqstp->rq_cred;
+	kxdreproc_t encode = task->tk_msg.rpc_proc->p_encode;
 
-	dprintk("RPC: %5u validating %s cred %p\n",
-		task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
-
-	return cred->cr_ops->crvalidate(task, p);
-}
-
-static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
-				   __be32 *data, void *obj)
-{
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &rqstp->rq_snd_buf, data);
-	encode(rqstp, &xdr, obj);
-}
-
-int
-rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp,
-		__be32 *data, void *obj)
-{
-	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
-
-	dprintk("RPC: %5u using %s cred %p to wrap rpc data\n",
-			task->tk_pid, cred->cr_ops->cr_name, cred);
-	if (cred->cr_ops->crwrap_req)
-		return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj);
-	/* By default, we encode the arguments normally. */
-	rpcauth_wrap_req_encode(encode, rqstp, data, obj);
+	encode(task->tk_rqstp, xdr, task->tk_msg.rpc_argp);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(rpcauth_wrap_req_encode);
 
-static int
-rpcauth_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
-			  __be32 *data, void *obj)
+/**
+ * rpcauth_wrap_req - XDR encode and wrap the RPC procedure
+ * @task: controlling RPC task
+ * @xdr: stream where on-the-wire bytes are to be marshalled
+ *
+ * On success, @xdr contains the encoded and wrapped message,
+ * and zero is returned. Otherwise, @xdr is in an undefined
+ * state and a negative errno is returned.
+ */
+int rpcauth_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
 {
-	struct xdr_stream xdr;
+	const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, data);
-	return decode(rqstp, &xdr, obj);
+	return ops->crwrap_req(task, xdr);
 }
 
+/**
+ * rpcauth_checkverf - Validate verifier in RPC Reply header
+ * @task: controlling RPC task
+ * @xdr: xdr_stream containing RPC Reply header
+ *
+ * On success, @xdr is updated to point past the verifier and
+ * zero is returned. Otherwise, @xdr is in an undefined state
+ * and a negative errno is returned.
+ */
 int
-rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp,
-		__be32 *data, void *obj)
+rpcauth_checkverf(struct rpc_task *task, struct xdr_stream *xdr)
+{
+	const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;
+
+	return ops->crvalidate(task, xdr);
+}
+
+/**
+ * rpcauth_unwrap_resp_decode - Invoke XDR decode function
+ * @task: controlling RPC task
+ * @xdr: stream where the Reply message resides
+ *
+ * Returns zero on success; otherwise a negative errno is returned.
+ */
+int
+rpcauth_unwrap_resp_decode(struct rpc_task *task, struct xdr_stream *xdr)
+{
+	kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode;
+
+	return decode(task->tk_rqstp, xdr, task->tk_msg.rpc_resp);
+}
+EXPORT_SYMBOL_GPL(rpcauth_unwrap_resp_decode);
+
+/**
+ * rpcauth_unwrap_resp - Invoke unwrap and decode function for the cred
+ * @task: controlling RPC task
+ * @xdr: stream where the Reply message resides
+ *
+ * Returns zero on success; otherwise a negative errno is returned.
+ */
+int
+rpcauth_unwrap_resp(struct rpc_task *task, struct xdr_stream *xdr)
+{
+	const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;
+
+	return ops->crunwrap_resp(task, xdr);
+}
+
+bool
+rpcauth_xmit_need_reencode(struct rpc_task *task)
 {
 	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 
-	dprintk("RPC: %5u using %s cred %p to unwrap rpc data\n",
-			task->tk_pid, cred->cr_ops->cr_name, cred);
-	if (cred->cr_ops->crunwrap_resp)
-		return cred->cr_ops->crunwrap_resp(task, decode, rqstp,
-						   data, obj);
-	/* By default, we decode the arguments normally. */
-	return rpcauth_unwrap_req_decode(decode, rqstp, data, obj);
+	if (!cred || !cred->cr_ops->crneed_reencode)
+		return false;
+	return cred->cr_ops->crneed_reencode(task);
 }
 
 int
@@ -830,8 +880,6 @@
 			goto out;
 		cred = task->tk_rqstp->rq_cred;
 	}
-	dprintk("RPC: %5u refreshing %s cred %p\n",
-		task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
 
 	err = cred->cr_ops->crrefresh(task);
 out:
@@ -845,8 +893,6 @@
 {
 	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 
-	dprintk("RPC: %5u invalidating %s cred %p\n",
-		task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
 	if (cred)
 		clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
 }
@@ -873,15 +919,10 @@
 	err = rpc_init_authunix();
 	if (err < 0)
 		goto out1;
-	err = rpc_init_generic_auth();
-	if (err < 0)
-		goto out2;
 	err = register_shrinker(&rpc_cred_shrinker);
 	if (err < 0)
-		goto out3;
+		goto out2;
 	return 0;
-out3:
-	rpc_destroy_generic_auth();
 out2:
 	rpc_destroy_authunix();
 out1:
@@ -891,6 +932,5 @@
 void rpcauth_remove_module(void)
 {
 	rpc_destroy_authunix();
-	rpc_destroy_generic_auth();
 	unregister_shrinker(&rpc_cred_shrinker);
 }

diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
deleted file mode 100644
index 1ac08dc..0000000
--- a/net/sunrpc/auth_generic.c
+++ /dev/null

@@ -1,293 +0,0 @@
-/*
- * Generic RPC credential
- *
- * Copyright (C) 2008, Trond Myklebust <Trond.Myklebust@netapp.com>
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/sunrpc/auth.h>
-#include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/debug.h>
-#include <linux/sunrpc/sched.h>
-
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY	RPCDBG_AUTH
-#endif
-
-#define RPC_MACHINE_CRED_USERID		GLOBAL_ROOT_UID
-#define RPC_MACHINE_CRED_GROUPID	GLOBAL_ROOT_GID
-
-struct generic_cred {
-	struct rpc_cred gc_base;
-	struct auth_cred acred;
-};
-
-static struct rpc_auth generic_auth;
-static const struct rpc_credops generic_credops;
-
-/*
- * Public call interface
- */
-struct rpc_cred *rpc_lookup_cred(void)
-{
-	return rpcauth_lookupcred(&generic_auth, 0);
-}
-EXPORT_SYMBOL_GPL(rpc_lookup_cred);
-
-struct rpc_cred *
-rpc_lookup_generic_cred(struct auth_cred *acred, int flags, gfp_t gfp)
-{
-	return rpcauth_lookup_credcache(&generic_auth, acred, flags, gfp);
-}
-EXPORT_SYMBOL_GPL(rpc_lookup_generic_cred);
-
-struct rpc_cred *rpc_lookup_cred_nonblock(void)
-{
-	return rpcauth_lookupcred(&generic_auth, RPCAUTH_LOOKUP_RCU);
-}
-EXPORT_SYMBOL_GPL(rpc_lookup_cred_nonblock);
-
-/*
- * Public call interface for looking up machine creds.
- */
-struct rpc_cred *rpc_lookup_machine_cred(const char *service_name)
-{
-	struct auth_cred acred = {
-		.uid = RPC_MACHINE_CRED_USERID,
-		.gid = RPC_MACHINE_CRED_GROUPID,
-		.principal = service_name,
-		.machine_cred = 1,
-	};
-
-	dprintk("RPC:       looking up machine cred for service %s\n",
-			service_name);
-	return generic_auth.au_ops->lookup_cred(&generic_auth, &acred, 0);
-}
-EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred);
-
-static struct rpc_cred *generic_bind_cred(struct rpc_task *task,
-		struct rpc_cred *cred, int lookupflags)
-{
-	struct rpc_auth *auth = task->tk_client->cl_auth;
-	struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred;
-
-	return auth->au_ops->lookup_cred(auth, acred, lookupflags);
-}
-
-static int
-generic_hash_cred(struct auth_cred *acred, unsigned int hashbits)
-{
-	return hash_64(from_kgid(&init_user_ns, acred->gid) |
-		((u64)from_kuid(&init_user_ns, acred->uid) <<
-			(sizeof(gid_t) * 8)), hashbits);
-}
-
-/*
- * Lookup generic creds for current process
- */
-static struct rpc_cred *
-generic_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
-{
-	return rpcauth_lookup_credcache(&generic_auth, acred, flags, GFP_KERNEL);
-}
-
-static struct rpc_cred *
-generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp)
-{
-	struct generic_cred *gcred;
-
-	gcred = kmalloc(sizeof(*gcred), gfp);
-	if (gcred == NULL)
-		return ERR_PTR(-ENOMEM);
-
-	rpcauth_init_cred(&gcred->gc_base, acred, &generic_auth, &generic_credops);
-	gcred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
-
-	gcred->acred.uid = acred->uid;
-	gcred->acred.gid = acred->gid;
-	gcred->acred.group_info = acred->group_info;
-	gcred->acred.ac_flags = 0;
-	if (gcred->acred.group_info != NULL)
-		get_group_info(gcred->acred.group_info);
-	gcred->acred.machine_cred = acred->machine_cred;
-	gcred->acred.principal = acred->principal;
-
-	dprintk("RPC:       allocated %s cred %p for uid %d gid %d\n",
-			gcred->acred.machine_cred ? "machine" : "generic",
-			gcred,
-			from_kuid(&init_user_ns, acred->uid),
-			from_kgid(&init_user_ns, acred->gid));
-	return &gcred->gc_base;
-}
-
-static void
-generic_free_cred(struct rpc_cred *cred)
-{
-	struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base);
-
-	dprintk("RPC:       generic_free_cred %p\n", gcred);
-	if (gcred->acred.group_info != NULL)
-		put_group_info(gcred->acred.group_info);
-	kfree(gcred);
-}
-
-static void
-generic_free_cred_callback(struct rcu_head *head)
-{
-	struct rpc_cred *cred = container_of(head, struct rpc_cred, cr_rcu);
-	generic_free_cred(cred);
-}
-
-static void
-generic_destroy_cred(struct rpc_cred *cred)
-{
-	call_rcu(&cred->cr_rcu, generic_free_cred_callback);
-}
-
-static int
-machine_cred_match(struct auth_cred *acred, struct generic_cred *gcred, int flags)
-{
-	if (!gcred->acred.machine_cred ||
-	    gcred->acred.principal != acred->principal ||
-	    !uid_eq(gcred->acred.uid, acred->uid) ||
-	    !gid_eq(gcred->acred.gid, acred->gid))
-		return 0;
-	return 1;
-}
-
-/*
- * Match credentials against current process creds.
- */
-static int
-generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
-{
-	struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base);
-	int i;
-
-	if (acred->machine_cred)
-		return machine_cred_match(acred, gcred, flags);
-
-	if (!uid_eq(gcred->acred.uid, acred->uid) ||
-	    !gid_eq(gcred->acred.gid, acred->gid) ||
-	    gcred->acred.machine_cred != 0)
-		goto out_nomatch;
-
-	/* Optimisation in the case where pointers are identical... */
-	if (gcred->acred.group_info == acred->group_info)
-		goto out_match;
-
-	/* Slow path... */
-	if (gcred->acred.group_info->ngroups != acred->group_info->ngroups)
-		goto out_nomatch;
-	for (i = 0; i < gcred->acred.group_info->ngroups; i++) {
-		if (!gid_eq(gcred->acred.group_info->gid[i],
-				acred->group_info->gid[i]))
-			goto out_nomatch;
-	}
-out_match:
-	return 1;
-out_nomatch:
-	return 0;
-}
-
-int __init rpc_init_generic_auth(void)
-{
-	return rpcauth_init_credcache(&generic_auth);
-}
-
-void rpc_destroy_generic_auth(void)
-{
-	rpcauth_destroy_credcache(&generic_auth);
-}
-
-/*
- * Test the the current time (now) against the underlying credential key expiry
- * minus a timeout and setup notification.
- *
- * The normal case:
- * If 'now' is before the key expiry minus RPC_KEY_EXPIRE_TIMEO, set
- * the RPC_CRED_NOTIFY_TIMEOUT flag to setup the underlying credential
- * rpc_credops crmatch routine to notify this generic cred when it's key
- * expiration is within RPC_KEY_EXPIRE_TIMEO, and return 0.
- *
- * The error case:
- * If the underlying cred lookup fails, return -EACCES.
- *
- * The 'almost' error case:
- * If 'now' is within key expiry minus RPC_KEY_EXPIRE_TIMEO, but not within
- * key expiry minus RPC_KEY_EXPIRE_FAIL, set the RPC_CRED_EXPIRE_SOON bit
- * on the acred ac_flags and return 0.
- */
-static int
-generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred)
-{
-	struct auth_cred *acred = &container_of(cred, struct generic_cred,
-						gc_base)->acred;
-	struct rpc_cred *tcred;
-	int ret = 0;
-
-
-	/* Fast track for non crkey_timeout (no key) underlying credentials */
-	if (auth->au_flags & RPCAUTH_AUTH_NO_CRKEY_TIMEOUT)
-		return 0;
-
-	/* Fast track for the normal case */
-	if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags))
-		return 0;
-
-	/* lookup_cred either returns a valid referenced rpc_cred, or PTR_ERR */
-	tcred = auth->au_ops->lookup_cred(auth, acred, 0);
-	if (IS_ERR(tcred))
-		return -EACCES;
-
-	/* Test for the almost error case */
-	ret = tcred->cr_ops->crkey_timeout(tcred);
-	if (ret != 0) {
-		set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
-		ret = 0;
-	} else {
-		/* In case underlying cred key has been reset */
-		if (test_and_clear_bit(RPC_CRED_KEY_EXPIRE_SOON,
-					&acred->ac_flags))
-			dprintk("RPC:        UID %d Credential key reset\n",
-				from_kuid(&init_user_ns, tcred->cr_uid));
-		/* set up fasttrack for the normal case */
-		set_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags);
-	}
-
-	put_rpccred(tcred);
-	return ret;
-}
-
-static const struct rpc_authops generic_auth_ops = {
-	.owner = THIS_MODULE,
-	.au_name = "Generic",
-	.hash_cred = generic_hash_cred,
-	.lookup_cred = generic_lookup_cred,
-	.crcreate = generic_create_cred,
-	.key_timeout = generic_key_timeout,
-};
-
-static struct rpc_auth generic_auth = {
-	.au_ops = &generic_auth_ops,
-	.au_count = ATOMIC_INIT(0),
-};
-
-static bool generic_key_to_expire(struct rpc_cred *cred)
-{
-	struct auth_cred *acred = &container_of(cred, struct generic_cred,
-						gc_base)->acred;
-	return test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
-}
-
-static const struct rpc_credops generic_credops = {
-	.cr_name = "Generic cred",
-	.crdestroy = generic_destroy_cred,
-	.crbind = generic_bind_cred,
-	.crmatch = generic_match,
-	.crkey_to_expire = generic_key_to_expire,
-};

diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index c374268..4a29f4c 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile

@@ -7,7 +7,7 @@
 
 auth_rpcgss-y := auth_gss.o gss_generic_token.o \
 	gss_mech_switch.o svcauth_gss.o \
-	gss_rpc_upcall.o gss_rpc_xdr.o
+	gss_rpc_upcall.o gss_rpc_xdr.o trace.o
 
 obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
 

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 8cb7d81..d75fddc 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: BSD-3-Clause
 /*
  * linux/net/sunrpc/auth_gss/auth_gss.c
  *
@@ -8,34 +9,8 @@
  *
  *  Dug Song       <dugsong@monkey.org>
  *  Andy Adamson   <andros@umich.edu>
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the University nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
@@ -55,6 +30,8 @@
 
 #include "../netns.h"
 
+#include <trace/events/rpcgss.h>
+
 static const struct rpc_authops authgss_ops;
 
 static const struct rpc_credops gss_credops;
@@ -260,6 +237,7 @@
 	}
 	ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_NOFS);
 	if (ret < 0) {
+		trace_rpcgss_import_ctx(ret);
 		p = ERR_PTR(ret);
 		goto err;
 	}
@@ -275,12 +253,9 @@
 	if (IS_ERR(p))
 		goto err;
 done:
-	dprintk("RPC:       %s Success. gc_expiry %lu now %lu timeout %u acceptor %.*s\n",
-		__func__, ctx->gc_expiry, now, timeout, ctx->gc_acceptor.len,
-		ctx->gc_acceptor.data);
-	return p;
+	trace_rpcgss_context(ctx->gc_expiry, now, timeout,
+			     ctx->gc_acceptor.len, ctx->gc_acceptor.data);
 err:
-	dprintk("RPC:       %s returns error %ld\n", __func__, -PTR_ERR(p));
 	return p;
 }
 
@@ -294,6 +269,7 @@
 struct gss_upcall_msg {
 	refcount_t count;
 	kuid_t	uid;
+	const char *service_name;
 	struct rpc_pipe_msg msg;
 	struct list_head list;
 	struct gss_auth *auth;
@@ -341,6 +317,7 @@
 		gss_put_ctx(gss_msg->ctx);
 	rpc_destroy_wait_queue(&gss_msg->rpc_waitqueue);
 	gss_put_auth(gss_msg->auth);
+	kfree_const(gss_msg->service_name);
 	kfree(gss_msg);
 }
 
@@ -354,10 +331,8 @@
 		if (auth && pos->auth->service != auth->service)
 			continue;
 		refcount_inc(&pos->count);
-		dprintk("RPC:       %s found msg %p\n", __func__, pos);
 		return pos;
 	}
-	dprintk("RPC:       %s found nothing\n", __func__);
 	return NULL;
 }
 
@@ -437,9 +412,12 @@
 	gss_release_msg(gss_msg);
 }
 
-static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg)
+static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg,
+			      const struct cred *cred)
 {
-	uid_t uid = from_kuid(&init_user_ns, gss_msg->uid);
+	struct user_namespace *userns = cred->user_ns;
+
+	uid_t uid = from_kuid_munged(userns, gss_msg->uid);
 	memcpy(gss_msg->databuf, &uid, sizeof(uid));
 	gss_msg->msg.data = gss_msg->databuf;
 	gss_msg->msg.len = sizeof(uid);
@@ -447,17 +425,31 @@
 	BUILD_BUG_ON(sizeof(uid) > sizeof(gss_msg->databuf));
 }
 
+static ssize_t
+gss_v0_upcall(struct file *file, struct rpc_pipe_msg *msg,
+		char __user *buf, size_t buflen)
+{
+	struct gss_upcall_msg *gss_msg = container_of(msg,
+						      struct gss_upcall_msg,
+						      msg);
+	if (msg->copied == 0)
+		gss_encode_v0_msg(gss_msg, file->f_cred);
+	return rpc_pipe_generic_upcall(file, msg, buf, buflen);
+}
+
 static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
 				const char *service_name,
-				const char *target_name)
+				const char *target_name,
+				const struct cred *cred)
 {
+	struct user_namespace *userns = cred->user_ns;
 	struct gss_api_mech *mech = gss_msg->auth->mech;
 	char *p = gss_msg->databuf;
 	size_t buflen = sizeof(gss_msg->databuf);
 	int len;
 
-	len = scnprintf(p, buflen, "mech=%s uid=%d ", mech->gm_name,
-			from_kuid(&init_user_ns, gss_msg->uid));
+	len = scnprintf(p, buflen, "mech=%s uid=%d", mech->gm_name,
+			from_kuid_munged(userns, gss_msg->uid));
 	buflen -= len;
 	p += len;
 	gss_msg->msg.len = len;
@@ -467,7 +459,7 @@
 	 * identity that we are authenticating to.
 	 */
 	if (target_name) {
-		len = scnprintf(p, buflen, "target=%s ", target_name);
+		len = scnprintf(p, buflen, " target=%s", target_name);
 		buflen -= len;
 		p += len;
 		gss_msg->msg.len += len;
@@ -487,11 +479,11 @@
 		char *c = strchr(service_name, '@');
 
 		if (!c)
-			len = scnprintf(p, buflen, "service=%s ",
+			len = scnprintf(p, buflen, " service=%s",
 					service_name);
 		else
 			len = scnprintf(p, buflen,
-					"service=%.*s srchost=%s ",
+					" service=%.*s srchost=%s",
 					(int)(c - service_name),
 					service_name, c + 1);
 		buflen -= len;
@@ -500,17 +492,17 @@
 	}
 
 	if (mech->gm_upcall_enctypes) {
-		len = scnprintf(p, buflen, "enctypes=%s ",
+		len = scnprintf(p, buflen, " enctypes=%s",
 				mech->gm_upcall_enctypes);
 		buflen -= len;
 		p += len;
 		gss_msg->msg.len += len;
 	}
+	trace_rpcgss_upcall_msg(gss_msg->databuf);
 	len = scnprintf(p, buflen, "\n");
 	if (len == 0)
 		goto out_overflow;
 	gss_msg->msg.len += len;
-
 	gss_msg->msg.data = gss_msg->databuf;
 	return 0;
 out_overflow:
@@ -518,6 +510,25 @@
 	return -ENOMEM;
 }
 
+static ssize_t
+gss_v1_upcall(struct file *file, struct rpc_pipe_msg *msg,
+		char __user *buf, size_t buflen)
+{
+	struct gss_upcall_msg *gss_msg = container_of(msg,
+						      struct gss_upcall_msg,
+						      msg);
+	int err;
+	if (msg->copied == 0) {
+		err = gss_encode_v1_msg(gss_msg,
+					gss_msg->service_name,
+					gss_msg->auth->target_name,
+					file->f_cred);
+		if (err)
+			return err;
+	}
+	return rpc_pipe_generic_upcall(file, msg, buf, buflen);
+}
+
 static struct gss_upcall_msg *
 gss_alloc_msg(struct gss_auth *gss_auth,
 		kuid_t uid, const char *service_name)
@@ -540,16 +551,14 @@
 	refcount_set(&gss_msg->count, 1);
 	gss_msg->uid = uid;
 	gss_msg->auth = gss_auth;
-	switch (vers) {
-	case 0:
-		gss_encode_v0_msg(gss_msg);
-		break;
-	default:
-		err = gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name);
-		if (err)
-			goto err_put_pipe_version;
-	}
 	kref_get(&gss_auth->kref);
+	if (service_name) {
+		gss_msg->service_name = kstrdup_const(service_name, GFP_NOFS);
+		if (!gss_msg->service_name) {
+			err = -ENOMEM;
+			goto err_put_pipe_version;
+		}
+	}
 	return gss_msg;
 err_put_pipe_version:
 	put_pipe_version(gss_auth->net);
@@ -565,7 +574,7 @@
 	struct gss_cred *gss_cred = container_of(cred,
 			struct gss_cred, gc_base);
 	struct gss_upcall_msg *gss_new, *gss_msg;
-	kuid_t uid = cred->cr_uid;
+	kuid_t uid = cred->cr_cred->fsuid;
 
 	gss_new = gss_alloc_msg(gss_auth, uid, gss_cred->gc_principal);
 	if (IS_ERR(gss_new))
@@ -603,16 +612,15 @@
 	struct rpc_pipe *pipe;
 	int err = 0;
 
-	dprintk("RPC: %5u %s for uid %u\n",
-		task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_uid));
 	gss_msg = gss_setup_upcall(gss_auth, cred);
 	if (PTR_ERR(gss_msg) == -EAGAIN) {
 		/* XXX: warning on the first, under the assumption we
 		 * shouldn't normally hit this case on a refresh. */
 		warn_gssd();
-		task->tk_timeout = 15*HZ;
-		rpc_sleep_on(&pipe_version_rpc_waitqueue, task, NULL);
-		return -EAGAIN;
+		rpc_sleep_on_timeout(&pipe_version_rpc_waitqueue,
+				task, NULL, jiffies + (15 * HZ));
+		err = -EAGAIN;
+		goto out;
 	}
 	if (IS_ERR(gss_msg)) {
 		err = PTR_ERR(gss_msg);
@@ -623,7 +631,6 @@
 	if (gss_cred->gc_upcall != NULL)
 		rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL);
 	else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) {
-		task->tk_timeout = 0;
 		gss_cred->gc_upcall = gss_msg;
 		/* gss_upcall_callback will release the reference to gss_upcall_msg */
 		refcount_inc(&gss_msg->count);
@@ -635,9 +642,8 @@
 	spin_unlock(&pipe->lock);
 	gss_release_msg(gss_msg);
 out:
-	dprintk("RPC: %5u %s for uid %u result %d\n",
-		task->tk_pid, __func__,
-		from_kuid(&init_user_ns, cred->cr_uid),	err);
+	trace_rpcgss_upcall_result(from_kuid(&init_user_ns,
+					     cred->cr_cred->fsuid), err);
 	return err;
 }
 
@@ -652,14 +658,13 @@
 	DEFINE_WAIT(wait);
 	int err;
 
-	dprintk("RPC:       %s for uid %u\n",
-		__func__, from_kuid(&init_user_ns, cred->cr_uid));
 retry:
 	err = 0;
 	/* if gssd is down, just skip upcalling altogether */
 	if (!gssd_running(net)) {
 		warn_gssd();
-		return -EACCES;
+		err = -EACCES;
+		goto out;
 	}
 	gss_msg = gss_setup_upcall(gss_auth, cred);
 	if (PTR_ERR(gss_msg) == -EAGAIN) {
@@ -700,8 +705,8 @@
 	finish_wait(&gss_msg->waitqueue, &wait);
 	gss_release_msg(gss_msg);
 out:
-	dprintk("RPC:       %s for uid %u result %d\n",
-		__func__, from_kuid(&init_user_ns, cred->cr_uid), err);
+	trace_rpcgss_upcall_result(from_kuid(&init_user_ns,
+					     cred->cr_cred->fsuid), err);
 	return err;
 }
 
@@ -737,7 +742,7 @@
 		goto err;
 	}
 
-	uid = make_kuid(&init_user_ns, id);
+	uid = make_kuid(current_user_ns(), id);
 	if (!uid_valid(uid)) {
 		err = -EINVAL;
 		goto err;
@@ -794,7 +799,6 @@
 err:
 	kfree(buf);
 out:
-	dprintk("RPC:       %s returning %zd\n", __func__, err);
 	return err;
 }
 
@@ -863,8 +867,6 @@
 	struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg);
 
 	if (msg->errno < 0) {
-		dprintk("RPC:       %s releasing msg %p\n",
-			__func__, gss_msg);
 		refcount_inc(&gss_msg->count);
 		gss_unhash_msg(gss_msg);
 		if (msg->errno == -ETIMEDOUT)
@@ -1024,8 +1026,6 @@
 	struct rpc_auth * auth;
 	int err = -ENOMEM; /* XXX? */
 
-	dprintk("RPC:       creating GSS authenticator for client %p\n", clnt);
-
 	if (!try_module_get(THIS_MODULE))
 		return ERR_PTR(err);
 	if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL)))
@@ -1041,10 +1041,8 @@
 	gss_auth->net = get_net(rpc_net_ns(clnt));
 	err = -EINVAL;
 	gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
-	if (!gss_auth->mech) {
-		dprintk("RPC:       Pseudoflavor %d not found!\n", flavor);
+	if (!gss_auth->mech)
 		goto err_put_net;
-	}
 	gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
 	if (gss_auth->service == 0)
 		goto err_put_mech;
@@ -1053,12 +1051,14 @@
 	auth = &gss_auth->rpc_auth;
 	auth->au_cslack = GSS_CRED_SLACK >> 2;
 	auth->au_rslack = GSS_VERF_SLACK >> 2;
+	auth->au_verfsize = GSS_VERF_SLACK >> 2;
+	auth->au_ralign = GSS_VERF_SLACK >> 2;
 	auth->au_flags = 0;
 	auth->au_ops = &authgss_ops;
 	auth->au_flavor = flavor;
 	if (gss_pseudoflavor_to_datatouch(gss_auth->mech, flavor))
 		auth->au_flags |= RPCAUTH_AUTH_DATATOUCH;
-	atomic_set(&auth->au_count, 1);
+	refcount_set(&auth->au_count, 1);
 	kref_init(&gss_auth->kref);
 
 	err = rpcauth_init_credcache(auth);
@@ -1099,6 +1099,7 @@
 	kfree(gss_auth);
 out_dec:
 	module_put(THIS_MODULE);
+	trace_rpcgss_createauth(flavor, err);
 	return ERR_PTR(err);
 }
 
@@ -1135,9 +1136,6 @@
 	struct gss_auth *gss_auth = container_of(auth,
 			struct gss_auth, rpc_auth);
 
-	dprintk("RPC:       destroying GSS authenticator %p flavor %d\n",
-			auth, auth->au_flavor);
-
 	if (hash_hashed(&gss_auth->hash)) {
 		spin_lock(&gss_auth_hash_lock);
 		hash_del(&gss_auth->hash);
@@ -1187,7 +1185,7 @@
 			if (strcmp(gss_auth->target_name, args->target_name))
 				continue;
 		}
-		if (!atomic_inc_not_zero(&gss_auth->rpc_auth.au_count))
+		if (!refcount_inc_not_zero(&gss_auth->rpc_auth.au_count))
 			continue;
 		goto out;
 	}
@@ -1239,36 +1237,59 @@
 	return &gss_auth->rpc_auth;
 }
 
+static struct gss_cred *
+gss_dup_cred(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
+{
+	struct gss_cred *new;
+
+	/* Make a copy of the cred so that we can reference count it */
+	new = kzalloc(sizeof(*gss_cred), GFP_NOFS);
+	if (new) {
+		struct auth_cred acred = {
+			.cred = gss_cred->gc_base.cr_cred,
+		};
+		struct gss_cl_ctx *ctx =
+			rcu_dereference_protected(gss_cred->gc_ctx, 1);
+
+		rpcauth_init_cred(&new->gc_base, &acred,
+				&gss_auth->rpc_auth,
+				&gss_nullops);
+		new->gc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
+		new->gc_service = gss_cred->gc_service;
+		new->gc_principal = gss_cred->gc_principal;
+		kref_get(&gss_auth->kref);
+		rcu_assign_pointer(new->gc_ctx, ctx);
+		gss_get_ctx(ctx);
+	}
+	return new;
+}
+
 /*
- * gss_destroying_context will cause the RPCSEC_GSS to send a NULL RPC call
+ * gss_send_destroy_context will cause the RPCSEC_GSS to send a NULL RPC call
  * to the server with the GSS control procedure field set to
  * RPC_GSS_PROC_DESTROY. This should normally cause the server to release
  * all RPCSEC_GSS state associated with that context.
  */
-static int
-gss_destroying_context(struct rpc_cred *cred)
+static void
+gss_send_destroy_context(struct rpc_cred *cred)
 {
 	struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
 	struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth);
 	struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1);
+	struct gss_cred *new;
 	struct rpc_task *task;
 
-	if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0)
-		return 0;
+	new = gss_dup_cred(gss_auth, gss_cred);
+	if (new) {
+		ctx->gc_proc = RPC_GSS_PROC_DESTROY;
 
-	ctx->gc_proc = RPC_GSS_PROC_DESTROY;
-	cred->cr_ops = &gss_nullops;
+		task = rpc_call_null(gss_auth->client, &new->gc_base,
+				RPC_TASK_ASYNC|RPC_TASK_SOFT);
+		if (!IS_ERR(task))
+			rpc_put_task(task);
 
-	/* Take a reference to ensure the cred will be destroyed either
-	 * by the RPC call or by the put_rpccred() below */
-	get_rpccred(cred);
-
-	task = rpc_call_null(gss_auth->client, cred, RPC_TASK_ASYNC|RPC_TASK_SOFT);
-	if (!IS_ERR(task))
-		rpc_put_task(task);
-
-	put_rpccred(cred);
-	return 1;
+		put_rpccred(&new->gc_base);
+	}
 }
 
 /* gss_destroy_cred (and gss_free_ctx) are used to clean up after failure
@@ -1277,8 +1298,6 @@
 static void
 gss_do_free_ctx(struct gss_cl_ctx *ctx)
 {
-	dprintk("RPC:       %s\n", __func__);
-
 	gss_delete_sec_context(&ctx->gc_gss_ctx);
 	kfree(ctx->gc_wire_ctx.data);
 	kfree(ctx->gc_acceptor.data);
@@ -1301,7 +1320,6 @@
 static void
 gss_free_cred(struct gss_cred *gss_cred)
 {
-	dprintk("RPC:       %s cred=%p\n", __func__, gss_cred);
 	kfree(gss_cred);
 }
 
@@ -1320,6 +1338,7 @@
 	struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1);
 
 	RCU_INIT_POINTER(gss_cred->gc_ctx, NULL);
+	put_cred(cred->cr_cred);
 	call_rcu(&cred->cr_rcu, gss_free_cred_callback);
 	if (ctx)
 		gss_put_ctx(ctx);
@@ -1330,15 +1349,15 @@
 gss_destroy_cred(struct rpc_cred *cred)
 {
 
-	if (gss_destroying_context(cred))
-		return;
+	if (test_and_clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0)
+		gss_send_destroy_context(cred);
 	gss_destroy_nullcred(cred);
 }
 
 static int
 gss_hash_cred(struct auth_cred *acred, unsigned int hashbits)
 {
-	return hash_64(from_kuid(&init_user_ns, acred->uid), hashbits);
+	return hash_64(from_kuid(&init_user_ns, acred->cred->fsuid), hashbits);
 }
 
 /*
@@ -1357,10 +1376,6 @@
 	struct gss_cred	*cred = NULL;
 	int err = -ENOMEM;
 
-	dprintk("RPC:       %s for uid %d, flavor %d\n",
-		__func__, from_kuid(&init_user_ns, acred->uid),
-		auth->au_flavor);
-
 	if (!(cred = kzalloc(sizeof(*cred), gfp)))
 		goto out_err;
 
@@ -1371,14 +1386,11 @@
 	 */
 	cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW;
 	cred->gc_service = gss_auth->service;
-	cred->gc_principal = NULL;
-	if (acred->machine_cred)
-		cred->gc_principal = acred->principal;
+	cred->gc_principal = acred->principal;
 	kref_get(&gss_auth->kref);
 	return &cred->gc_base;
 
 out_err:
-	dprintk("RPC:       %s failed with error %d\n", __func__, err);
 	return ERR_PTR(err);
 }
 
@@ -1495,85 +1507,93 @@
 		if (gss_cred->gc_principal == NULL)
 			return 0;
 		ret = strcmp(acred->principal, gss_cred->gc_principal) == 0;
-		goto check_expire;
-	}
-	if (gss_cred->gc_principal != NULL)
-		return 0;
-	ret = uid_eq(rc->cr_uid, acred->uid);
-
-check_expire:
-	if (ret == 0)
-		return ret;
-
-	/* Notify acred users of GSS context expiration timeout */
-	if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags) &&
-	    (gss_key_timeout(rc) != 0)) {
-		/* test will now be done from generic cred */
-		test_and_clear_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags);
-		/* tell NFS layer that key will expire soon */
-		set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
+	} else {
+		if (gss_cred->gc_principal != NULL)
+			return 0;
+		ret = uid_eq(rc->cr_cred->fsuid, acred->cred->fsuid);
 	}
 	return ret;
 }
 
 /*
-* Marshal credentials.
-* Maybe we should keep a cached credential for performance reasons.
-*/
-static __be32 *
-gss_marshal(struct rpc_task *task, __be32 *p)
+ * Marshal credentials.
+ *
+ * The expensive part is computing the verifier. We can't cache a
+ * pre-computed version of the verifier because the seqno, which
+ * is different every time, is included in the MIC.
+ */
+static int gss_marshal(struct rpc_task *task, struct xdr_stream *xdr)
 {
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_cred *cred = req->rq_cred;
 	struct gss_cred	*gss_cred = container_of(cred, struct gss_cred,
 						 gc_base);
 	struct gss_cl_ctx	*ctx = gss_cred_get_ctx(cred);
-	__be32		*cred_len;
+	__be32		*p, *cred_len;
 	u32             maj_stat = 0;
 	struct xdr_netobj mic;
 	struct kvec	iov;
 	struct xdr_buf	verf_buf;
+	int status;
 
-	dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
+	/* Credential */
 
-	*p++ = htonl(RPC_AUTH_GSS);
+	p = xdr_reserve_space(xdr, 7 * sizeof(*p) +
+			      ctx->gc_wire_ctx.len);
+	if (!p)
+		goto marshal_failed;
+	*p++ = rpc_auth_gss;
 	cred_len = p++;
 
 	spin_lock(&ctx->gc_seq_lock);
-	req->rq_seqno = ctx->gc_seq++;
+	req->rq_seqno = (ctx->gc_seq < MAXSEQ) ? ctx->gc_seq++ : MAXSEQ;
 	spin_unlock(&ctx->gc_seq_lock);
+	if (req->rq_seqno == MAXSEQ)
+		goto expired;
+	trace_rpcgss_seqno(task);
 
-	*p++ = htonl((u32) RPC_GSS_VERSION);
-	*p++ = htonl((u32) ctx->gc_proc);
-	*p++ = htonl((u32) req->rq_seqno);
-	*p++ = htonl((u32) gss_cred->gc_service);
+	*p++ = cpu_to_be32(RPC_GSS_VERSION);
+	*p++ = cpu_to_be32(ctx->gc_proc);
+	*p++ = cpu_to_be32(req->rq_seqno);
+	*p++ = cpu_to_be32(gss_cred->gc_service);
 	p = xdr_encode_netobj(p, &ctx->gc_wire_ctx);
-	*cred_len = htonl((p - (cred_len + 1)) << 2);
+	*cred_len = cpu_to_be32((p - (cred_len + 1)) << 2);
+
+	/* Verifier */
 
 	/* We compute the checksum for the verifier over the xdr-encoded bytes
 	 * starting with the xid and ending at the end of the credential: */
-	iov.iov_base = xprt_skip_transport_header(req->rq_xprt,
-					req->rq_snd_buf.head[0].iov_base);
+	iov.iov_base = req->rq_snd_buf.head[0].iov_base;
 	iov.iov_len = (u8 *)p - (u8 *)iov.iov_base;
 	xdr_buf_from_iov(&iov, &verf_buf);
 
-	/* set verifier flavor*/
-	*p++ = htonl(RPC_AUTH_GSS);
-
+	p = xdr_reserve_space(xdr, sizeof(*p));
+	if (!p)
+		goto marshal_failed;
+	*p++ = rpc_auth_gss;
 	mic.data = (u8 *)(p + 1);
 	maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
-	if (maj_stat == GSS_S_CONTEXT_EXPIRED) {
-		clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
-	} else if (maj_stat != 0) {
-		printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat);
-		goto out_put_ctx;
-	}
-	p = xdr_encode_opaque(p, NULL, mic.len);
+	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+		goto expired;
+	else if (maj_stat != 0)
+		goto bad_mic;
+	if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0)
+		goto marshal_failed;
+	status = 0;
+out:
 	gss_put_ctx(ctx);
-	return p;
-out_put_ctx:
-	gss_put_ctx(ctx);
-	return NULL;
+	return status;
+expired:
+	clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+	status = -EKEYEXPIRED;
+	goto out;
+marshal_failed:
+	status = -EMSGSIZE;
+	goto out;
+bad_mic:
+	trace_rpcgss_get_mic(task, maj_stat);
+	status = -EIO;
+	goto out;
 }
 
 static int gss_renew_cred(struct rpc_task *task)
@@ -1584,9 +1604,8 @@
 						 gc_base);
 	struct rpc_auth *auth = oldcred->cr_auth;
 	struct auth_cred acred = {
-		.uid = oldcred->cr_uid,
+		.cred = oldcred->cr_cred,
 		.principal = gss_cred->gc_principal,
-		.machine_cred = (gss_cred->gc_principal != NULL ? 1 : 0),
 	};
 	struct rpc_cred *new;
 
@@ -1648,116 +1667,105 @@
 	return 0;
 }
 
-static __be32 *
-gss_validate(struct rpc_task *task, __be32 *p)
+static int
+gss_validate(struct rpc_task *task, struct xdr_stream *xdr)
 {
 	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
-	__be32		*seq = NULL;
+	__be32		*p, *seq = NULL;
 	struct kvec	iov;
 	struct xdr_buf	verf_buf;
 	struct xdr_netobj mic;
-	u32		flav,len;
-	u32		maj_stat;
-	__be32		*ret = ERR_PTR(-EIO);
+	u32		len, maj_stat;
+	int		status;
 
-	dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
+	p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+	if (!p)
+		goto validate_failed;
+	if (*p++ != rpc_auth_gss)
+		goto validate_failed;
+	len = be32_to_cpup(p);
+	if (len > RPC_MAX_AUTH_SIZE)
+		goto validate_failed;
+	p = xdr_inline_decode(xdr, len);
+	if (!p)
+		goto validate_failed;
 
-	flav = ntohl(*p++);
-	if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE)
-		goto out_bad;
-	if (flav != RPC_AUTH_GSS)
-		goto out_bad;
 	seq = kmalloc(4, GFP_NOFS);
 	if (!seq)
-		goto out_bad;
-	*seq = htonl(task->tk_rqstp->rq_seqno);
+		goto validate_failed;
+	*seq = cpu_to_be32(task->tk_rqstp->rq_seqno);
 	iov.iov_base = seq;
 	iov.iov_len = 4;
 	xdr_buf_from_iov(&iov, &verf_buf);
 	mic.data = (u8 *)p;
 	mic.len = len;
-
-	ret = ERR_PTR(-EACCES);
 	maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
 	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
 		clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
-	if (maj_stat) {
-		dprintk("RPC: %5u %s: gss_verify_mic returned error 0x%08x\n",
-			task->tk_pid, __func__, maj_stat);
-		goto out_bad;
-	}
+	if (maj_stat)
+		goto bad_mic;
+
 	/* We leave it to unwrap to calculate au_rslack. For now we just
 	 * calculate the length of the verifier: */
 	cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2;
+	status = 0;
+out:
 	gss_put_ctx(ctx);
-	dprintk("RPC: %5u %s: gss_verify_mic succeeded.\n",
-			task->tk_pid, __func__);
 	kfree(seq);
-	return p + XDR_QUADLEN(len);
-out_bad:
-	gss_put_ctx(ctx);
-	dprintk("RPC: %5u %s failed ret %ld.\n", task->tk_pid, __func__,
-		PTR_ERR(ret));
-	kfree(seq);
-	return ret;
+	return status;
+
+validate_failed:
+	status = -EIO;
+	goto out;
+bad_mic:
+	trace_rpcgss_verify_mic(task, maj_stat);
+	status = -EACCES;
+	goto out;
 }
 
-static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
-				__be32 *p, void *obj)
+static int gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+			      struct rpc_task *task, struct xdr_stream *xdr)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &rqstp->rq_snd_buf, p);
-	encode(rqstp, &xdr, obj);
-}
-
-static inline int
-gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-		   kxdreproc_t encode, struct rpc_rqst *rqstp,
-		   __be32 *p, void *obj)
-{
-	struct xdr_buf	*snd_buf = &rqstp->rq_snd_buf;
-	struct xdr_buf	integ_buf;
-	__be32          *integ_len = NULL;
+	struct rpc_rqst *rqstp = task->tk_rqstp;
+	struct xdr_buf integ_buf, *snd_buf = &rqstp->rq_snd_buf;
 	struct xdr_netobj mic;
-	u32		offset;
-	__be32		*q;
-	struct kvec	*iov;
-	u32             maj_stat = 0;
-	int		status = -EIO;
+	__be32 *p, *integ_len;
+	u32 offset, maj_stat;
 
+	p = xdr_reserve_space(xdr, 2 * sizeof(*p));
+	if (!p)
+		goto wrap_failed;
 	integ_len = p++;
+	*p = cpu_to_be32(rqstp->rq_seqno);
+
+	if (rpcauth_wrap_req_encode(task, xdr))
+		goto wrap_failed;
+
 	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
-	*p++ = htonl(rqstp->rq_seqno);
-
-	gss_wrap_req_encode(encode, rqstp, p, obj);
-
 	if (xdr_buf_subsegment(snd_buf, &integ_buf,
 				offset, snd_buf->len - offset))
-		return status;
-	*integ_len = htonl(integ_buf.len);
+		goto wrap_failed;
+	*integ_len = cpu_to_be32(integ_buf.len);
 
-	/* guess whether we're in the head or the tail: */
-	if (snd_buf->page_len || snd_buf->tail[0].iov_len)
-		iov = snd_buf->tail;
-	else
-		iov = snd_buf->head;
-	p = iov->iov_base + iov->iov_len;
+	p = xdr_reserve_space(xdr, 0);
+	if (!p)
+		goto wrap_failed;
 	mic.data = (u8 *)(p + 1);
-
 	maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
-	status = -EIO; /* XXX? */
 	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
 		clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
 	else if (maj_stat)
-		return status;
-	q = xdr_encode_opaque(p, NULL, mic.len);
-
-	offset = (u8 *)q - (u8 *)p;
-	iov->iov_len += offset;
-	snd_buf->len += offset;
+		goto bad_mic;
+	/* Check that the trailing MIC fit in the buffer, after the fact */
+	if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0)
+		goto wrap_failed;
 	return 0;
+wrap_failed:
+	return -EMSGSIZE;
+bad_mic:
+	trace_rpcgss_get_mic(task, maj_stat);
+	return -EIO;
 }
 
 static void
@@ -1808,61 +1816,62 @@
 	return -EAGAIN;
 }
 
-static inline int
-gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-		  kxdreproc_t encode, struct rpc_rqst *rqstp,
-		  __be32 *p, void *obj)
+static int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+			     struct rpc_task *task, struct xdr_stream *xdr)
 {
+	struct rpc_rqst *rqstp = task->tk_rqstp;
 	struct xdr_buf	*snd_buf = &rqstp->rq_snd_buf;
-	u32		offset;
-	u32             maj_stat;
+	u32		pad, offset, maj_stat;
 	int		status;
-	__be32		*opaque_len;
+	__be32		*p, *opaque_len;
 	struct page	**inpages;
 	int		first;
-	int		pad;
 	struct kvec	*iov;
-	char		*tmp;
 
+	status = -EIO;
+	p = xdr_reserve_space(xdr, 2 * sizeof(*p));
+	if (!p)
+		goto wrap_failed;
 	opaque_len = p++;
-	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
-	*p++ = htonl(rqstp->rq_seqno);
+	*p = cpu_to_be32(rqstp->rq_seqno);
 
-	gss_wrap_req_encode(encode, rqstp, p, obj);
+	if (rpcauth_wrap_req_encode(task, xdr))
+		goto wrap_failed;
 
 	status = alloc_enc_pages(rqstp);
-	if (status)
-		return status;
+	if (unlikely(status))
+		goto wrap_failed;
 	first = snd_buf->page_base >> PAGE_SHIFT;
 	inpages = snd_buf->pages + first;
 	snd_buf->pages = rqstp->rq_enc_pages;
 	snd_buf->page_base -= first << PAGE_SHIFT;
 	/*
-	 * Give the tail its own page, in case we need extra space in the
-	 * head when wrapping:
+	 * Move the tail into its own page, in case gss_wrap needs
+	 * more space in the head when wrapping.
 	 *
-	 * call_allocate() allocates twice the slack space required
-	 * by the authentication flavor to rq_callsize.
-	 * For GSS, slack is GSS_CRED_SLACK.
+	 * Still... Why can't gss_wrap just slide the tail down?
 	 */
 	if (snd_buf->page_len || snd_buf->tail[0].iov_len) {
+		char *tmp;
+
 		tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]);
 		memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len);
 		snd_buf->tail[0].iov_base = tmp;
 	}
+	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
 	maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages);
 	/* slack space should prevent this ever happening: */
-	BUG_ON(snd_buf->len > snd_buf->buflen);
-	status = -EIO;
+	if (unlikely(snd_buf->len > snd_buf->buflen))
+		goto wrap_failed;
 	/* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was
 	 * done anyway, so it's safe to put the request on the wire: */
 	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
 		clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
 	else if (maj_stat)
-		return status;
+		goto bad_wrap;
 
-	*opaque_len = htonl(snd_buf->len - offset);
-	/* guess whether we're in the head or the tail: */
+	*opaque_len = cpu_to_be32(snd_buf->len - offset);
+	/* guess whether the pad goes into the head or the tail: */
 	if (snd_buf->page_len || snd_buf->tail[0].iov_len)
 		iov = snd_buf->tail;
 	else
@@ -1874,158 +1883,229 @@
 	snd_buf->len += pad;
 
 	return 0;
+wrap_failed:
+	return status;
+bad_wrap:
+	trace_rpcgss_wrap(task, maj_stat);
+	return -EIO;
 }
 
-static int
-gss_wrap_req(struct rpc_task *task,
-	     kxdreproc_t encode, void *rqstp, __be32 *p, void *obj)
+static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
 {
 	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	struct gss_cred	*gss_cred = container_of(cred, struct gss_cred,
 			gc_base);
 	struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
-	int             status = -EIO;
+	int status;
 
-	dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
+	status = -EIO;
 	if (ctx->gc_proc != RPC_GSS_PROC_DATA) {
 		/* The spec seems a little ambiguous here, but I think that not
 		 * wrapping context destruction requests makes the most sense.
 		 */
-		gss_wrap_req_encode(encode, rqstp, p, obj);
-		status = 0;
+		status = rpcauth_wrap_req_encode(task, xdr);
 		goto out;
 	}
 	switch (gss_cred->gc_service) {
 	case RPC_GSS_SVC_NONE:
-		gss_wrap_req_encode(encode, rqstp, p, obj);
-		status = 0;
+		status = rpcauth_wrap_req_encode(task, xdr);
 		break;
 	case RPC_GSS_SVC_INTEGRITY:
-		status = gss_wrap_req_integ(cred, ctx, encode, rqstp, p, obj);
+		status = gss_wrap_req_integ(cred, ctx, task, xdr);
 		break;
 	case RPC_GSS_SVC_PRIVACY:
-		status = gss_wrap_req_priv(cred, ctx, encode, rqstp, p, obj);
+		status = gss_wrap_req_priv(cred, ctx, task, xdr);
 		break;
+	default:
+		status = -EIO;
 	}
 out:
 	gss_put_ctx(ctx);
-	dprintk("RPC: %5u %s returning %d\n", task->tk_pid, __func__, status);
 	return status;
 }
 
-static inline int
-gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-		struct rpc_rqst *rqstp, __be32 **p)
+static int
+gss_unwrap_resp_auth(struct rpc_cred *cred)
 {
-	struct xdr_buf	*rcv_buf = &rqstp->rq_rcv_buf;
-	struct xdr_buf integ_buf;
-	struct xdr_netobj mic;
-	u32 data_offset, mic_offset;
-	u32 integ_len;
-	u32 maj_stat;
-	int status = -EIO;
+	struct rpc_auth *auth = cred->cr_auth;
 
-	integ_len = ntohl(*(*p)++);
+	auth->au_rslack = auth->au_verfsize;
+	auth->au_ralign = auth->au_verfsize;
+	return 0;
+}
+
+static int
+gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred,
+		      struct gss_cl_ctx *ctx, struct rpc_rqst *rqstp,
+		      struct xdr_stream *xdr)
+{
+	struct xdr_buf integ_buf, *rcv_buf = &rqstp->rq_rcv_buf;
+	u32 data_offset, mic_offset, integ_len, maj_stat;
+	struct rpc_auth *auth = cred->cr_auth;
+	struct xdr_netobj mic;
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+	if (unlikely(!p))
+		goto unwrap_failed;
+	integ_len = be32_to_cpup(p++);
 	if (integ_len & 3)
-		return status;
-	data_offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base;
+		goto unwrap_failed;
+	data_offset = (u8 *)(p) - (u8 *)rcv_buf->head[0].iov_base;
 	mic_offset = integ_len + data_offset;
 	if (mic_offset > rcv_buf->len)
-		return status;
-	if (ntohl(*(*p)++) != rqstp->rq_seqno)
-		return status;
+		goto unwrap_failed;
+	if (be32_to_cpup(p) != rqstp->rq_seqno)
+		goto bad_seqno;
 
-	if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset,
-				mic_offset - data_offset))
-		return status;
-
-	if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset))
-		return status;
-
+	if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, integ_len))
+		goto unwrap_failed;
+	if (xdr_buf_read_mic(rcv_buf, &mic, mic_offset))
+		goto unwrap_failed;
 	maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
 	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
 		clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
 	if (maj_stat != GSS_S_COMPLETE)
-		return status;
+		goto bad_mic;
+
+	auth->au_rslack = auth->au_verfsize + 2 + 1 + XDR_QUADLEN(mic.len);
+	auth->au_ralign = auth->au_verfsize + 2;
 	return 0;
+unwrap_failed:
+	trace_rpcgss_unwrap_failed(task);
+	return -EIO;
+bad_seqno:
+	trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, be32_to_cpup(p));
+	return -EIO;
+bad_mic:
+	trace_rpcgss_verify_mic(task, maj_stat);
+	return -EIO;
 }
 
-static inline int
-gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-		struct rpc_rqst *rqstp, __be32 **p)
+static int
+gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
+		     struct gss_cl_ctx *ctx, struct rpc_rqst *rqstp,
+		     struct xdr_stream *xdr)
 {
-	struct xdr_buf  *rcv_buf = &rqstp->rq_rcv_buf;
-	u32 offset;
-	u32 opaque_len;
-	u32 maj_stat;
-	int status = -EIO;
+	struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
+	struct kvec *head = rqstp->rq_rcv_buf.head;
+	struct rpc_auth *auth = cred->cr_auth;
+	unsigned int savedlen = rcv_buf->len;
+	u32 offset, opaque_len, maj_stat;
+	__be32 *p;
 
-	opaque_len = ntohl(*(*p)++);
-	offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base;
+	p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+	if (unlikely(!p))
+		goto unwrap_failed;
+	opaque_len = be32_to_cpup(p++);
+	offset = (u8 *)(p) - (u8 *)head->iov_base;
 	if (offset + opaque_len > rcv_buf->len)
-		return status;
-	/* remove padding: */
+		goto unwrap_failed;
 	rcv_buf->len = offset + opaque_len;
 
 	maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf);
 	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
 		clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
 	if (maj_stat != GSS_S_COMPLETE)
-		return status;
-	if (ntohl(*(*p)++) != rqstp->rq_seqno)
-		return status;
+		goto bad_unwrap;
+	/* gss_unwrap decrypted the sequence number */
+	if (be32_to_cpup(p++) != rqstp->rq_seqno)
+		goto bad_seqno;
 
+	/* gss_unwrap redacts the opaque blob from the head iovec.
+	 * rcv_buf has changed, thus the stream needs to be reset.
+	 */
+	xdr_init_decode(xdr, rcv_buf, p, rqstp);
+
+	auth->au_rslack = auth->au_verfsize + 2 +
+			  XDR_QUADLEN(savedlen - rcv_buf->len);
+	auth->au_ralign = auth->au_verfsize + 2 +
+			  XDR_QUADLEN(savedlen - rcv_buf->len);
 	return 0;
+unwrap_failed:
+	trace_rpcgss_unwrap_failed(task);
+	return -EIO;
+bad_seqno:
+	trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, be32_to_cpup(--p));
+	return -EIO;
+bad_unwrap:
+	trace_rpcgss_unwrap(task, maj_stat);
+	return -EIO;
+}
+
+static bool
+gss_seq_is_newer(u32 new, u32 old)
+{
+	return (s32)(new - old) > 0;
+}
+
+static bool
+gss_xmit_need_reencode(struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct rpc_cred *cred = req->rq_cred;
+	struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
+	u32 win, seq_xmit = 0;
+	bool ret = true;
+
+	if (!ctx)
+		goto out;
+
+	if (gss_seq_is_newer(req->rq_seqno, READ_ONCE(ctx->gc_seq)))
+		goto out_ctx;
+
+	seq_xmit = READ_ONCE(ctx->gc_seq_xmit);
+	while (gss_seq_is_newer(req->rq_seqno, seq_xmit)) {
+		u32 tmp = seq_xmit;
+
+		seq_xmit = cmpxchg(&ctx->gc_seq_xmit, tmp, req->rq_seqno);
+		if (seq_xmit == tmp) {
+			ret = false;
+			goto out_ctx;
+		}
+	}
+
+	win = ctx->gc_win;
+	if (win > 0)
+		ret = !gss_seq_is_newer(req->rq_seqno, seq_xmit - win);
+
+out_ctx:
+	gss_put_ctx(ctx);
+out:
+	trace_rpcgss_need_reencode(task, seq_xmit, ret);
+	return ret;
 }
 
 static int
-gss_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
-		      __be32 *p, void *obj)
+gss_unwrap_resp(struct rpc_task *task, struct xdr_stream *xdr)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	return decode(rqstp, &xdr, obj);
-}
-
-static int
-gss_unwrap_resp(struct rpc_task *task,
-		kxdrdproc_t decode, void *rqstp, __be32 *p, void *obj)
-{
-	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+	struct rpc_rqst *rqstp = task->tk_rqstp;
+	struct rpc_cred *cred = rqstp->rq_cred;
 	struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
 			gc_base);
 	struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
-	__be32		*savedp = p;
-	struct kvec	*head = ((struct rpc_rqst *)rqstp)->rq_rcv_buf.head;
-	int		savedlen = head->iov_len;
-	int             status = -EIO;
+	int status = -EIO;
 
 	if (ctx->gc_proc != RPC_GSS_PROC_DATA)
 		goto out_decode;
 	switch (gss_cred->gc_service) {
 	case RPC_GSS_SVC_NONE:
+		status = gss_unwrap_resp_auth(cred);
 		break;
 	case RPC_GSS_SVC_INTEGRITY:
-		status = gss_unwrap_resp_integ(cred, ctx, rqstp, &p);
-		if (status)
-			goto out;
+		status = gss_unwrap_resp_integ(task, cred, ctx, rqstp, xdr);
 		break;
 	case RPC_GSS_SVC_PRIVACY:
-		status = gss_unwrap_resp_priv(cred, ctx, rqstp, &p);
-		if (status)
-			goto out;
+		status = gss_unwrap_resp_priv(task, cred, ctx, rqstp, xdr);
 		break;
 	}
-	/* take into account extra slack for integrity and privacy cases: */
-	cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp)
-						+ (savedlen - head->iov_len);
+	if (status)
+		goto out;
+
 out_decode:
-	status = gss_unwrap_req_decode(decode, rqstp, p, obj);
+	status = rpcauth_unwrap_resp_decode(task, xdr);
 out:
 	gss_put_ctx(ctx);
-	dprintk("RPC: %5u %s returning %d\n",
-		task->tk_pid, __func__, status);
 	return status;
 }
 
@@ -2047,7 +2127,6 @@
 	.cr_name		= "AUTH_GSS",
 	.crdestroy		= gss_destroy_cred,
 	.cr_init		= gss_cred_init,
-	.crbind			= rpcauth_generic_bind_cred,
 	.crmatch		= gss_match,
 	.crmarshal		= gss_marshal,
 	.crrefresh		= gss_refresh,
@@ -2056,12 +2135,12 @@
 	.crunwrap_resp		= gss_unwrap_resp,
 	.crkey_timeout		= gss_key_timeout,
 	.crstringify_acceptor	= gss_stringify_acceptor,
+	.crneed_reencode	= gss_xmit_need_reencode,
 };
 
 static const struct rpc_credops gss_nullops = {
 	.cr_name		= "AUTH_GSS",
 	.crdestroy		= gss_destroy_nullcred,
-	.crbind			= rpcauth_generic_bind_cred,
 	.crmatch		= gss_match,
 	.crmarshal		= gss_marshal,
 	.crrefresh		= gss_refresh_null,
@@ -2072,7 +2151,7 @@
 };
 
 static const struct rpc_pipe_ops gss_upcall_ops_v0 = {
-	.upcall		= rpc_pipe_generic_upcall,
+	.upcall		= gss_v0_upcall,
 	.downcall	= gss_pipe_downcall,
 	.destroy_msg	= gss_pipe_destroy_msg,
 	.open_pipe	= gss_pipe_open_v0,
@@ -2080,7 +2159,7 @@
 };
 
 static const struct rpc_pipe_ops gss_upcall_ops_v1 = {
-	.upcall		= rpc_pipe_generic_upcall,
+	.upcall		= gss_v1_upcall,
 	.downcall	= gss_pipe_downcall,
 	.destroy_msg	= gss_pipe_destroy_msg,
 	.open_pipe	= gss_pipe_open_v1,

diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 0220e1c..6f2d30d 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c

@@ -53,7 +53,7 @@
 
 u32
 krb5_encrypt(
-	struct crypto_skcipher *tfm,
+	struct crypto_sync_skcipher *tfm,
 	void * iv,
 	void * in,
 	void * out,
@@ -62,24 +62,24 @@
 	u32 ret = -EINVAL;
 	struct scatterlist sg[1];
 	u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
-	SKCIPHER_REQUEST_ON_STACK(req, tfm);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
 
-	if (length % crypto_skcipher_blocksize(tfm) != 0)
+	if (length % crypto_sync_skcipher_blocksize(tfm) != 0)
 		goto out;
 
-	if (crypto_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
+	if (crypto_sync_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
 		dprintk("RPC:       gss_k5encrypt: tfm iv size too large %d\n",
-			crypto_skcipher_ivsize(tfm));
+			crypto_sync_skcipher_ivsize(tfm));
 		goto out;
 	}
 
 	if (iv)
-		memcpy(local_iv, iv, crypto_skcipher_ivsize(tfm));
+		memcpy(local_iv, iv, crypto_sync_skcipher_ivsize(tfm));
 
 	memcpy(out, in, length);
 	sg_init_one(sg, out, length);
 
-	skcipher_request_set_tfm(req, tfm);
+	skcipher_request_set_sync_tfm(req, tfm);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, length, local_iv);
 
@@ -92,7 +92,7 @@
 
 u32
 krb5_decrypt(
-     struct crypto_skcipher *tfm,
+     struct crypto_sync_skcipher *tfm,
      void * iv,
      void * in,
      void * out,
@@ -101,23 +101,23 @@
 	u32 ret = -EINVAL;
 	struct scatterlist sg[1];
 	u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
-	SKCIPHER_REQUEST_ON_STACK(req, tfm);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
 
-	if (length % crypto_skcipher_blocksize(tfm) != 0)
+	if (length % crypto_sync_skcipher_blocksize(tfm) != 0)
 		goto out;
 
-	if (crypto_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
+	if (crypto_sync_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
 		dprintk("RPC:       gss_k5decrypt: tfm iv size too large %d\n",
-			crypto_skcipher_ivsize(tfm));
+			crypto_sync_skcipher_ivsize(tfm));
 		goto out;
 	}
 	if (iv)
-		memcpy(local_iv,iv, crypto_skcipher_ivsize(tfm));
+		memcpy(local_iv, iv, crypto_sync_skcipher_ivsize(tfm));
 
 	memcpy(out, in, length);
 	sg_init_one(sg, out, length);
 
-	skcipher_request_set_tfm(req, tfm);
+	skcipher_request_set_sync_tfm(req, tfm);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, length, local_iv);
 
@@ -466,7 +466,8 @@
 {
 	struct encryptor_desc *desc = data;
 	struct xdr_buf *outbuf = desc->outbuf;
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(desc->req);
+	struct crypto_sync_skcipher *tfm =
+		crypto_sync_skcipher_reqtfm(desc->req);
 	struct page *in_page;
 	int thislen = desc->fraglen + sg->length;
 	int fraglen, ret;
@@ -492,7 +493,7 @@
 	desc->fraglen += sg->length;
 	desc->pos += sg->length;
 
-	fraglen = thislen & (crypto_skcipher_blocksize(tfm) - 1);
+	fraglen = thislen & (crypto_sync_skcipher_blocksize(tfm) - 1);
 	thislen -= fraglen;
 
 	if (thislen == 0)
@@ -526,16 +527,16 @@
 }
 
 int
-gss_encrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *buf,
+gss_encrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *buf,
 		    int offset, struct page **pages)
 {
 	int ret;
 	struct encryptor_desc desc;
-	SKCIPHER_REQUEST_ON_STACK(req, tfm);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
 
-	BUG_ON((buf->len - offset) % crypto_skcipher_blocksize(tfm) != 0);
+	BUG_ON((buf->len - offset) % crypto_sync_skcipher_blocksize(tfm) != 0);
 
-	skcipher_request_set_tfm(req, tfm);
+	skcipher_request_set_sync_tfm(req, tfm);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 
 	memset(desc.iv, 0, sizeof(desc.iv));
@@ -567,7 +568,8 @@
 {
 	struct decryptor_desc *desc = data;
 	int thislen = desc->fraglen + sg->length;
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(desc->req);
+	struct crypto_sync_skcipher *tfm =
+		crypto_sync_skcipher_reqtfm(desc->req);
 	int fraglen, ret;
 
 	/* Worst case is 4 fragments: head, end of page 1, start
@@ -578,7 +580,7 @@
 	desc->fragno++;
 	desc->fraglen += sg->length;
 
-	fraglen = thislen & (crypto_skcipher_blocksize(tfm) - 1);
+	fraglen = thislen & (crypto_sync_skcipher_blocksize(tfm) - 1);
 	thislen -= fraglen;
 
 	if (thislen == 0)
@@ -608,17 +610,17 @@
 }
 
 int
-gss_decrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *buf,
+gss_decrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *buf,
 		    int offset)
 {
 	int ret;
 	struct decryptor_desc desc;
-	SKCIPHER_REQUEST_ON_STACK(req, tfm);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
 
 	/* XXXJBF: */
-	BUG_ON((buf->len - offset) % crypto_skcipher_blocksize(tfm) != 0);
+	BUG_ON((buf->len - offset) % crypto_sync_skcipher_blocksize(tfm) != 0);
 
-	skcipher_request_set_tfm(req, tfm);
+	skcipher_request_set_sync_tfm(req, tfm);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 
 	memset(desc.iv, 0, sizeof(desc.iv));
@@ -672,12 +674,12 @@
 }
 
 static u32
-gss_krb5_cts_crypt(struct crypto_skcipher *cipher, struct xdr_buf *buf,
+gss_krb5_cts_crypt(struct crypto_sync_skcipher *cipher, struct xdr_buf *buf,
 		   u32 offset, u8 *iv, struct page **pages, int encrypt)
 {
 	u32 ret;
 	struct scatterlist sg[1];
-	SKCIPHER_REQUEST_ON_STACK(req, cipher);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, cipher);
 	u8 *data;
 	struct page **save_pages;
 	u32 len = buf->len - offset;
@@ -706,7 +708,7 @@
 
 	sg_init_one(sg, data, len);
 
-	skcipher_request_set_tfm(req, cipher);
+	skcipher_request_set_sync_tfm(req, cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, len, iv);
 
@@ -735,7 +737,7 @@
 	struct xdr_netobj hmac;
 	u8 *cksumkey;
 	u8 *ecptr;
-	struct crypto_skcipher *cipher, *aux_cipher;
+	struct crypto_sync_skcipher *cipher, *aux_cipher;
 	int blocksize;
 	struct page **save_pages;
 	int nblocks, nbytes;
@@ -754,7 +756,7 @@
 		cksumkey = kctx->acceptor_integ;
 		usage = KG_USAGE_ACCEPTOR_SEAL;
 	}
-	blocksize = crypto_skcipher_blocksize(cipher);
+	blocksize = crypto_sync_skcipher_blocksize(cipher);
 
 	/* hide the gss token header and insert the confounder */
 	offset += GSS_KRB5_TOK_HDR_LEN;
@@ -807,7 +809,7 @@
 	memset(desc.iv, 0, sizeof(desc.iv));
 
 	if (cbcbytes) {
-		SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
 
 		desc.pos = offset + GSS_KRB5_TOK_HDR_LEN;
 		desc.fragno = 0;
@@ -816,7 +818,7 @@
 		desc.outbuf = buf;
 		desc.req = req;
 
-		skcipher_request_set_tfm(req, aux_cipher);
+		skcipher_request_set_sync_tfm(req, aux_cipher);
 		skcipher_request_set_callback(req, 0, NULL, NULL);
 
 		sg_init_table(desc.infrags, 4);
@@ -855,7 +857,7 @@
 	struct xdr_buf subbuf;
 	u32 ret = 0;
 	u8 *cksum_key;
-	struct crypto_skcipher *cipher, *aux_cipher;
+	struct crypto_sync_skcipher *cipher, *aux_cipher;
 	struct xdr_netobj our_hmac_obj;
 	u8 our_hmac[GSS_KRB5_MAX_CKSUM_LEN];
 	u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN];
@@ -874,7 +876,7 @@
 		cksum_key = kctx->initiator_integ;
 		usage = KG_USAGE_INITIATOR_SEAL;
 	}
-	blocksize = crypto_skcipher_blocksize(cipher);
+	blocksize = crypto_sync_skcipher_blocksize(cipher);
 
 
 	/* create a segment skipping the header and leaving out the checksum */
@@ -891,13 +893,13 @@
 	memset(desc.iv, 0, sizeof(desc.iv));
 
 	if (cbcbytes) {
-		SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
 
 		desc.fragno = 0;
 		desc.fraglen = 0;
 		desc.req = req;
 
-		skcipher_request_set_tfm(req, aux_cipher);
+		skcipher_request_set_sync_tfm(req, aux_cipher);
 		skcipher_request_set_callback(req, 0, NULL, NULL);
 
 		sg_init_table(desc.frags, 4);
@@ -946,7 +948,8 @@
  * Set the key of the given cipher.
  */
 int
-krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
+krb5_rc4_setup_seq_key(struct krb5_ctx *kctx,
+		       struct crypto_sync_skcipher *cipher,
 		       unsigned char *cksum)
 {
 	struct crypto_shash *hmac;
@@ -974,7 +977,6 @@
 	}
 
 	desc->tfm = hmac;
-	desc->flags = 0;
 
 	/* Compute intermediate Kseq from session key */
 	err = crypto_shash_setkey(hmac, kctx->Ksess, kctx->gk5e->keylength);
@@ -994,7 +996,7 @@
 	if (err)
 		goto out_err;
 
-	err = crypto_skcipher_setkey(cipher, Kseq, kctx->gk5e->keylength);
+	err = crypto_sync_skcipher_setkey(cipher, Kseq, kctx->gk5e->keylength);
 	if (err)
 		goto out_err;
 
@@ -1012,7 +1014,8 @@
  * Set the key of cipher kctx->enc.
  */
 int
-krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
+krb5_rc4_setup_enc_key(struct krb5_ctx *kctx,
+		       struct crypto_sync_skcipher *cipher,
 		       s32 seqnum)
 {
 	struct crypto_shash *hmac;
@@ -1041,7 +1044,6 @@
 	}
 
 	desc->tfm = hmac;
-	desc->flags = 0;
 
 	/* Compute intermediate Kcrypt from session key */
 	for (i = 0; i < kctx->gk5e->keylength; i++)
@@ -1069,7 +1071,8 @@
 	if (err)
 		goto out_err;
 
-	err = crypto_skcipher_setkey(cipher, Kcrypt, kctx->gk5e->keylength);
+	err = crypto_sync_skcipher_setkey(cipher, Kcrypt,
+					  kctx->gk5e->keylength);
 	if (err)
 		goto out_err;
 

diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index f7fe2d2..3b7f721 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c

@@ -147,7 +147,7 @@
 	size_t blocksize, keybytes, keylength, n;
 	unsigned char *inblockdata, *outblockdata, *rawkey;
 	struct xdr_netobj inblock, outblock;
-	struct crypto_skcipher *cipher;
+	struct crypto_sync_skcipher *cipher;
 	u32 ret = EINVAL;
 
 	blocksize = gk5e->blocksize;
@@ -157,11 +157,10 @@
 	if ((inkey->len != keylength) || (outkey->len != keylength))
 		goto err_return;
 
-	cipher = crypto_alloc_skcipher(gk5e->encrypt_name, 0,
-				       CRYPTO_ALG_ASYNC);
+	cipher = crypto_alloc_sync_skcipher(gk5e->encrypt_name, 0, 0);
 	if (IS_ERR(cipher))
 		goto err_return;
-	if (crypto_skcipher_setkey(cipher, inkey->data, inkey->len))
+	if (crypto_sync_skcipher_setkey(cipher, inkey->data, inkey->len))
 		goto err_return;
 
 	/* allocate and set up buffers */
@@ -229,16 +228,13 @@
 	ret = 0;
 
 err_free_raw:
-	memset(rawkey, 0, keybytes);
-	kfree(rawkey);
+	kzfree(rawkey);
 err_free_out:
-	memset(outblockdata, 0, blocksize);
-	kfree(outblockdata);
+	kzfree(outblockdata);
 err_free_in:
-	memset(inblockdata, 0, blocksize);
-	kfree(inblockdata);
+	kzfree(inblockdata);
 err_free_cipher:
-	crypto_free_skcipher(cipher);
+	crypto_free_sync_skcipher(cipher);
 err_return:
 	return ret;
 }

diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 7bb2514..6e5d6d2 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: BSD-3-Clause
 /*
  *  linux/net/sunrpc/gss_krb5_mech.c
  *
@@ -6,32 +7,6 @@
  *
  *  Andy Adamson <andros@umich.edu>
  *  J. Bruce Fields <bfields@umich.edu>
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the University nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <crypto/hash.h>
@@ -53,6 +28,7 @@
 static struct gss_api_mech gss_kerberos_mech;	/* forward declaration */
 
 static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
+#ifndef CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES
 	/*
 	 * DES (All DES enctypes are mapped to the same gss functionality)
 	 */
@@ -74,6 +50,7 @@
 	  .cksumlength = 8,
 	  .keyed_cksum = 0,
 	},
+#endif	/* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */
 	/*
 	 * RC4-HMAC
 	 */
@@ -218,7 +195,7 @@
 
 static inline const void *
 get_key(const void *p, const void *end,
-	struct krb5_ctx *ctx, struct crypto_skcipher **res)
+	struct krb5_ctx *ctx, struct crypto_sync_skcipher **res)
 {
 	struct xdr_netobj	key;
 	int			alg;
@@ -246,15 +223,14 @@
 	if (IS_ERR(p))
 		goto out_err;
 
-	*res = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
-							CRYPTO_ALG_ASYNC);
+	*res = crypto_alloc_sync_skcipher(ctx->gk5e->encrypt_name, 0, 0);
 	if (IS_ERR(*res)) {
 		printk(KERN_WARNING "gss_kerberos_mech: unable to initialize "
 			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
 		*res = NULL;
 		goto out_err_free_key;
 	}
-	if (crypto_skcipher_setkey(*res, key.data, key.len)) {
+	if (crypto_sync_skcipher_setkey(*res, key.data, key.len)) {
 		printk(KERN_WARNING "gss_kerberos_mech: error setting key for "
 			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
 		goto out_err_free_tfm;
@@ -264,7 +240,7 @@
 	return p;
 
 out_err_free_tfm:
-	crypto_free_skcipher(*res);
+	crypto_free_sync_skcipher(*res);
 out_err_free_key:
 	kfree(key.data);
 	p = ERR_PTR(-EINVAL);
@@ -275,6 +251,7 @@
 static int
 gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
 {
+	u32 seq_send;
 	int tmp;
 
 	p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
@@ -316,9 +293,10 @@
 	p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
 	if (IS_ERR(p))
 		goto out_err;
-	p = simple_get_bytes(p, end, &ctx->seq_send, sizeof(ctx->seq_send));
+	p = simple_get_bytes(p, end, &seq_send, sizeof(seq_send));
 	if (IS_ERR(p))
 		goto out_err;
+	atomic_set(&ctx->seq_send, seq_send);
 	p = simple_get_netobj(p, end, &ctx->mech_used);
 	if (IS_ERR(p))
 		goto out_err;
@@ -336,30 +314,30 @@
 	return 0;
 
 out_err_free_key2:
-	crypto_free_skcipher(ctx->seq);
+	crypto_free_sync_skcipher(ctx->seq);
 out_err_free_key1:
-	crypto_free_skcipher(ctx->enc);
+	crypto_free_sync_skcipher(ctx->enc);
 out_err_free_mech:
 	kfree(ctx->mech_used.data);
 out_err:
 	return PTR_ERR(p);
 }
 
-static struct crypto_skcipher *
+static struct crypto_sync_skcipher *
 context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key)
 {
-	struct crypto_skcipher *cp;
+	struct crypto_sync_skcipher *cp;
 
-	cp = crypto_alloc_skcipher(cname, 0, CRYPTO_ALG_ASYNC);
+	cp = crypto_alloc_sync_skcipher(cname, 0, 0);
 	if (IS_ERR(cp)) {
 		dprintk("gss_kerberos_mech: unable to initialize "
 			"crypto algorithm %s\n", cname);
 		return NULL;
 	}
-	if (crypto_skcipher_setkey(cp, key, ctx->gk5e->keylength)) {
+	if (crypto_sync_skcipher_setkey(cp, key, ctx->gk5e->keylength)) {
 		dprintk("gss_kerberos_mech: error setting key for "
 			"crypto algorithm %s\n", cname);
-		crypto_free_skcipher(cp);
+		crypto_free_sync_skcipher(cp);
 		return NULL;
 	}
 	return cp;
@@ -413,9 +391,9 @@
 	return 0;
 
 out_free_enc:
-	crypto_free_skcipher(ctx->enc);
+	crypto_free_sync_skcipher(ctx->enc);
 out_free_seq:
-	crypto_free_skcipher(ctx->seq);
+	crypto_free_sync_skcipher(ctx->seq);
 out_err:
 	return -EINVAL;
 }
@@ -460,7 +438,6 @@
 	}
 
 	desc->tfm = hmac;
-	desc->flags = 0;
 
 	err = crypto_shash_digest(desc, sigkeyconstant, slen, ctx->cksum);
 	kzfree(desc);
@@ -469,17 +446,15 @@
 	/*
 	 * allocate hash, and skciphers for data and seqnum encryption
 	 */
-	ctx->enc = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
-					 CRYPTO_ALG_ASYNC);
+	ctx->enc = crypto_alloc_sync_skcipher(ctx->gk5e->encrypt_name, 0, 0);
 	if (IS_ERR(ctx->enc)) {
 		err = PTR_ERR(ctx->enc);
 		goto out_err_free_hmac;
 	}
 
-	ctx->seq = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
-					 CRYPTO_ALG_ASYNC);
+	ctx->seq = crypto_alloc_sync_skcipher(ctx->gk5e->encrypt_name, 0, 0);
 	if (IS_ERR(ctx->seq)) {
-		crypto_free_skcipher(ctx->enc);
+		crypto_free_sync_skcipher(ctx->enc);
 		err = PTR_ERR(ctx->seq);
 		goto out_err_free_hmac;
 	}
@@ -591,7 +566,7 @@
 			context_v2_alloc_cipher(ctx, "cbc(aes)",
 						ctx->acceptor_seal);
 		if (ctx->acceptor_enc_aux == NULL) {
-			crypto_free_skcipher(ctx->initiator_enc_aux);
+			crypto_free_sync_skcipher(ctx->initiator_enc_aux);
 			goto out_free_acceptor_enc;
 		}
 	}
@@ -599,9 +574,9 @@
 	return 0;
 
 out_free_acceptor_enc:
-	crypto_free_skcipher(ctx->acceptor_enc);
+	crypto_free_sync_skcipher(ctx->acceptor_enc);
 out_free_initiator_enc:
-	crypto_free_skcipher(ctx->initiator_enc);
+	crypto_free_sync_skcipher(ctx->initiator_enc);
 out_err:
 	return -EINVAL;
 }
@@ -610,6 +585,7 @@
 gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
 		gfp_t gfp_mask)
 {
+	u64 seq_send64;
 	int keylen;
 
 	p = simple_get_bytes(p, end, &ctx->flags, sizeof(ctx->flags));
@@ -620,14 +596,15 @@
 	p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
 	if (IS_ERR(p))
 		goto out_err;
-	p = simple_get_bytes(p, end, &ctx->seq_send64, sizeof(ctx->seq_send64));
+	p = simple_get_bytes(p, end, &seq_send64, sizeof(seq_send64));
 	if (IS_ERR(p))
 		goto out_err;
+	atomic64_set(&ctx->seq_send64, seq_send64);
 	/* set seq_send for use by "older" enctypes */
-	ctx->seq_send = ctx->seq_send64;
-	if (ctx->seq_send64 != ctx->seq_send) {
-		dprintk("%s: seq_send64 %lx, seq_send %x overflow?\n", __func__,
-			(unsigned long)ctx->seq_send64, ctx->seq_send);
+	atomic_set(&ctx->seq_send, seq_send64);
+	if (seq_send64 != atomic_read(&ctx->seq_send)) {
+		dprintk("%s: seq_send64 %llx, seq_send %x overflow?\n", __func__,
+			seq_send64, atomic_read(&ctx->seq_send));
 		p = ERR_PTR(-EINVAL);
 		goto out_err;
 	}
@@ -713,12 +690,12 @@
 gss_delete_sec_context_kerberos(void *internal_ctx) {
 	struct krb5_ctx *kctx = internal_ctx;
 
-	crypto_free_skcipher(kctx->seq);
-	crypto_free_skcipher(kctx->enc);
-	crypto_free_skcipher(kctx->acceptor_enc);
-	crypto_free_skcipher(kctx->initiator_enc);
-	crypto_free_skcipher(kctx->acceptor_enc_aux);
-	crypto_free_skcipher(kctx->initiator_enc_aux);
+	crypto_free_sync_skcipher(kctx->seq);
+	crypto_free_sync_skcipher(kctx->enc);
+	crypto_free_sync_skcipher(kctx->acceptor_enc);
+	crypto_free_sync_skcipher(kctx->initiator_enc);
+	crypto_free_sync_skcipher(kctx->acceptor_enc_aux);
+	crypto_free_sync_skcipher(kctx->initiator_enc_aux);
 	kfree(kctx->mech_used.data);
 	kfree(kctx);
 }

diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index eaad9bc..48fe4a5 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c

@@ -63,13 +63,12 @@
 #include <linux/sunrpc/gss_krb5.h>
 #include <linux/random.h>
 #include <linux/crypto.h>
+#include <linux/atomic.h>
 
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY        RPCDBG_AUTH
 #endif
 
-DEFINE_SPINLOCK(krb5_seq_lock);
-
 static void *
 setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
 {
@@ -154,9 +153,7 @@
 
 	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
 
-	spin_lock(&krb5_seq_lock);
-	seq_send = ctx->seq_send++;
-	spin_unlock(&krb5_seq_lock);
+	seq_send = atomic_fetch_inc(&ctx->seq_send);
 
 	if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff,
 			      seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))
@@ -174,7 +171,6 @@
 				       .data = cksumdata};
 	void *krb5_hdr;
 	s32 now;
-	u64 seq_send;
 	u8 *cksumkey;
 	unsigned int cksum_usage;
 	__be64 seq_send_be64;
@@ -185,11 +181,7 @@
 
 	/* Set up the sequence number. Now 64-bits in clear
 	 * text and w/o direction indicator */
-	spin_lock(&krb5_seq_lock);
-	seq_send = ctx->seq_send64++;
-	spin_unlock(&krb5_seq_lock);
-
-	seq_send_be64 = cpu_to_be64(seq_send);
+	seq_send_be64 = cpu_to_be64(atomic64_fetch_inc(&ctx->seq_send64));
 	memcpy(krb5_hdr + 8, (char *) &seq_send_be64, 8);
 
 	if (ctx->initiate) {

diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index c8b9082..5071051 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c

@@ -43,16 +43,19 @@
 krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
 		      unsigned char *cksum, unsigned char *buf)
 {
-	struct crypto_skcipher *cipher;
-	unsigned char plain[8];
+	struct crypto_sync_skcipher *cipher;
+	unsigned char *plain;
 	s32 code;
 
 	dprintk("RPC:       %s:\n", __func__);
-	cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
-				       CRYPTO_ALG_ASYNC);
+	cipher = crypto_alloc_sync_skcipher(kctx->gk5e->encrypt_name, 0, 0);
 	if (IS_ERR(cipher))
 		return PTR_ERR(cipher);
 
+	plain = kmalloc(8, GFP_NOFS);
+	if (!plain)
+		return -ENOMEM;
+
 	plain[0] = (unsigned char) ((seqnum >> 24) & 0xff);
 	plain[1] = (unsigned char) ((seqnum >> 16) & 0xff);
 	plain[2] = (unsigned char) ((seqnum >> 8) & 0xff);
@@ -68,22 +71,28 @@
 
 	code = krb5_encrypt(cipher, cksum, plain, buf, 8);
 out:
-	crypto_free_skcipher(cipher);
+	kfree(plain);
+	crypto_free_sync_skcipher(cipher);
 	return code;
 }
 s32
 krb5_make_seq_num(struct krb5_ctx *kctx,
-		struct crypto_skcipher *key,
+		struct crypto_sync_skcipher *key,
 		int direction,
 		u32 seqnum,
 		unsigned char *cksum, unsigned char *buf)
 {
-	unsigned char plain[8];
+	unsigned char *plain;
+	s32 code;
 
 	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
 		return krb5_make_rc4_seq_num(kctx, direction, seqnum,
 					     cksum, buf);
 
+	plain = kmalloc(8, GFP_NOFS);
+	if (!plain)
+		return -ENOMEM;
+
 	plain[0] = (unsigned char) (seqnum & 0xff);
 	plain[1] = (unsigned char) ((seqnum >> 8) & 0xff);
 	plain[2] = (unsigned char) ((seqnum >> 16) & 0xff);
@@ -94,20 +103,21 @@
 	plain[6] = direction;
 	plain[7] = direction;
 
-	return krb5_encrypt(key, cksum, plain, buf, 8);
+	code = krb5_encrypt(key, cksum, plain, buf, 8);
+	kfree(plain);
+	return code;
 }
 
 static s32
 krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
 		     unsigned char *buf, int *direction, s32 *seqnum)
 {
-	struct crypto_skcipher *cipher;
-	unsigned char plain[8];
+	struct crypto_sync_skcipher *cipher;
+	unsigned char *plain;
 	s32 code;
 
 	dprintk("RPC:       %s:\n", __func__);
-	cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
-				       CRYPTO_ALG_ASYNC);
+	cipher = crypto_alloc_sync_skcipher(kctx->gk5e->encrypt_name, 0, 0);
 	if (IS_ERR(cipher))
 		return PTR_ERR(cipher);
 
@@ -115,22 +125,30 @@
 	if (code)
 		goto out;
 
+	plain = kmalloc(8, GFP_NOFS);
+	if (!plain) {
+		code = -ENOMEM;
+		goto out;
+	}
+
 	code = krb5_decrypt(cipher, cksum, buf, plain, 8);
 	if (code)
-		goto out;
+		goto out_plain;
 
 	if ((plain[4] != plain[5]) || (plain[4] != plain[6])
 				   || (plain[4] != plain[7])) {
 		code = (s32)KG_BAD_SEQ;
-		goto out;
+		goto out_plain;
 	}
 
 	*direction = plain[4];
 
 	*seqnum = ((plain[0] << 24) | (plain[1] << 16) |
 					(plain[2] << 8) | (plain[3]));
+out_plain:
+	kfree(plain);
 out:
-	crypto_free_skcipher(cipher);
+	crypto_free_sync_skcipher(cipher);
 	return code;
 }
 
@@ -141,26 +159,33 @@
 	       int *direction, u32 *seqnum)
 {
 	s32 code;
-	unsigned char plain[8];
-	struct crypto_skcipher *key = kctx->seq;
+	unsigned char *plain;
+	struct crypto_sync_skcipher *key = kctx->seq;
 
 	dprintk("RPC:       krb5_get_seq_num:\n");
 
 	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
 		return krb5_get_rc4_seq_num(kctx, cksum, buf,
 					    direction, seqnum);
+	plain = kmalloc(8, GFP_NOFS);
+	if (!plain)
+		return -ENOMEM;
 
 	if ((code = krb5_decrypt(key, cksum, buf, plain, 8)))
-		return code;
+		goto out;
 
 	if ((plain[4] != plain[5]) || (plain[4] != plain[6]) ||
-	    (plain[4] != plain[7]))
-		return (s32)KG_BAD_SEQ;
+	    (plain[4] != plain[7])) {
+		code = (s32)KG_BAD_SEQ;
+		goto out;
+	}
 
 	*direction = plain[4];
 
 	*seqnum = ((plain[0]) |
 		   (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24));
 
-	return 0;
+out:
+	kfree(plain);
+	return code;
 }

diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 39a2e67..14a0aff 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c

@@ -174,7 +174,7 @@
 
 	now = get_seconds();
 
-	blocksize = crypto_skcipher_blocksize(kctx->enc);
+	blocksize = crypto_sync_skcipher_blocksize(kctx->enc);
 	gss_krb5_add_padding(buf, offset, blocksize);
 	BUG_ON((buf->len - offset) % blocksize);
 	plainlen = conflen + buf->len - offset;
@@ -228,9 +228,7 @@
 
 	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
 
-	spin_lock(&krb5_seq_lock);
-	seq_send = kctx->seq_send++;
-	spin_unlock(&krb5_seq_lock);
+	seq_send = atomic_fetch_inc(&kctx->seq_send);
 
 	/* XXX would probably be more efficient to compute checksum
 	 * and encrypt at the same time: */
@@ -239,10 +237,10 @@
 		return GSS_S_FAILURE;
 
 	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
-		struct crypto_skcipher *cipher;
+		struct crypto_sync_skcipher *cipher;
 		int err;
-		cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
-					       CRYPTO_ALG_ASYNC);
+		cipher = crypto_alloc_sync_skcipher(kctx->gk5e->encrypt_name,
+						    0, 0);
 		if (IS_ERR(cipher))
 			return GSS_S_FAILURE;
 
@@ -250,7 +248,7 @@
 
 		err = gss_encrypt_xdr_buf(cipher, buf,
 					  offset + headlen - conflen, pages);
-		crypto_free_skcipher(cipher);
+		crypto_free_sync_skcipher(cipher);
 		if (err)
 			return GSS_S_FAILURE;
 	} else {
@@ -327,18 +325,18 @@
 		return GSS_S_BAD_SIG;
 
 	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
-		struct crypto_skcipher *cipher;
+		struct crypto_sync_skcipher *cipher;
 		int err;
 
-		cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
-					       CRYPTO_ALG_ASYNC);
+		cipher = crypto_alloc_sync_skcipher(kctx->gk5e->encrypt_name,
+						    0, 0);
 		if (IS_ERR(cipher))
 			return GSS_S_FAILURE;
 
 		krb5_rc4_setup_enc_key(kctx, cipher, seqnum);
 
 		err = gss_decrypt_xdr_buf(cipher, buf, crypt_offset);
-		crypto_free_skcipher(cipher);
+		crypto_free_sync_skcipher(cipher);
 		if (err)
 			return GSS_S_DEFECTIVE_TOKEN;
 	} else {
@@ -371,7 +369,7 @@
 	/* Copy the data back to the right position.  XXX: Would probably be
 	 * better to copy and encrypt at the same time. */
 
-	blocksize = crypto_skcipher_blocksize(kctx->enc);
+	blocksize = crypto_sync_skcipher_blocksize(kctx->enc);
 	data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) +
 					conflen;
 	orig_start = buf->head[0].iov_base + offset;
@@ -477,9 +475,7 @@
 	*be16ptr++ = 0;
 
 	be64ptr = (__be64 *)be16ptr;
-	spin_lock(&krb5_seq_lock);
-	*be64ptr = cpu_to_be64(kctx->seq_send64++);
-	spin_unlock(&krb5_seq_lock);
+	*be64ptr = cpu_to_be64(atomic64_fetch_inc(&kctx->seq_send64));
 
 	err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, pages);
 	if (err)
@@ -574,14 +570,16 @@
 	 */
 	movelen = min_t(unsigned int, buf->head[0].iov_len, buf->len);
 	movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip;
-	BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
-							buf->head[0].iov_len);
+	if (offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
+	    buf->head[0].iov_len)
+		return GSS_S_FAILURE;
 	memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen);
 	buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip;
 	buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip;
 
 	/* Trim off the trailing "extra count" and checksum blob */
-	xdr_buf_trim(buf, ec + GSS_KRB5_TOK_HDR_LEN + tailskip);
+	buf->len -= ec + GSS_KRB5_TOK_HDR_LEN + tailskip;
+
 	return GSS_S_COMPLETE;
 }
 

diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 5fec3ab..8206009 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: BSD-3-Clause
 /*
  *  linux/net/sunrpc/gss_mech_switch.c
  *
@@ -5,32 +6,6 @@
  *  All rights reserved.
  *
  *  J. Bruce Fields   <bfields@umich.edu>
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the University nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/types.h>
@@ -117,7 +92,7 @@
 	if (status)
 		return status;
 	spin_lock(&registered_mechs_lock);
-	list_add(&gm->gm_list, &registered_mechs);
+	list_add_rcu(&gm->gm_list, &registered_mechs);
 	spin_unlock(&registered_mechs_lock);
 	dprintk("RPC:       registered gss mechanism %s\n", gm->gm_name);
 	return 0;
@@ -132,7 +107,7 @@
 void gss_mech_unregister(struct gss_api_mech *gm)
 {
 	spin_lock(&registered_mechs_lock);
-	list_del(&gm->gm_list);
+	list_del_rcu(&gm->gm_list);
 	spin_unlock(&registered_mechs_lock);
 	dprintk("RPC:       unregistered gss mechanism %s\n", gm->gm_name);
 	gss_mech_free(gm);
@@ -151,15 +126,15 @@
 {
 	struct gss_api_mech	*pos, *gm = NULL;
 
-	spin_lock(&registered_mechs_lock);
-	list_for_each_entry(pos, &registered_mechs, gm_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(pos, &registered_mechs, gm_list) {
 		if (0 == strcmp(name, pos->gm_name)) {
 			if (try_module_get(pos->gm_owner))
 				gm = pos;
 			break;
 		}
 	}
-	spin_unlock(&registered_mechs_lock);
+	rcu_read_unlock();
 	return gm;
 
 }
@@ -186,8 +161,8 @@
 	dprintk("RPC:       %s(%s)\n", __func__, buf);
 	request_module("rpc-auth-gss-%s", buf);
 
-	spin_lock(&registered_mechs_lock);
-	list_for_each_entry(pos, &registered_mechs, gm_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(pos, &registered_mechs, gm_list) {
 		if (obj->len == pos->gm_oid.len) {
 			if (0 == memcmp(obj->data, pos->gm_oid.data, obj->len)) {
 				if (try_module_get(pos->gm_owner))
@@ -196,7 +171,7 @@
 			}
 		}
 	}
-	spin_unlock(&registered_mechs_lock);
+	rcu_read_unlock();
 	return gm;
 }
 
@@ -216,15 +191,15 @@
 {
 	struct gss_api_mech *gm = NULL, *pos;
 
-	spin_lock(&registered_mechs_lock);
-	list_for_each_entry(pos, &registered_mechs, gm_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(pos, &registered_mechs, gm_list) {
 		if (!mech_supports_pseudoflavor(pos, pseudoflavor))
 			continue;
 		if (try_module_get(pos->gm_owner))
 			gm = pos;
 		break;
 	}
-	spin_unlock(&registered_mechs_lock);
+	rcu_read_unlock();
 	return gm;
 }
 
@@ -244,7 +219,7 @@
 
 /**
  * gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors
- * @array: array to fill in
+ * @array_ptr: array to fill in
  * @size: size of "array"
  *
  * Returns the number of array items filled in, or a negative errno.
@@ -257,8 +232,8 @@
 	struct gss_api_mech *pos = NULL;
 	int j, i = 0;
 
-	spin_lock(&registered_mechs_lock);
-	list_for_each_entry(pos, &registered_mechs, gm_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(pos, &registered_mechs, gm_list) {
 		for (j = 0; j < pos->gm_pf_num; j++) {
 			if (i >= size) {
 				spin_unlock(&registered_mechs_lock);
@@ -267,7 +242,7 @@
 			array_ptr[i++] = pos->gm_pfs[j].pseudoflavor;
 		}
 	}
-	spin_unlock(&registered_mechs_lock);
+	rcu_read_unlock();
 	return i;
 }
 

diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index 73dcda0..0349f45 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c

@@ -1,21 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  *  linux/net/sunrpc/gss_rpc_upcall.c
  *
  *  Copyright (C) 2012 Simo Sorce <simo@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #include <linux/types.h>

diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.h b/net/sunrpc/auth_gss/gss_rpc_upcall.h
index 1e542ad..31e9634 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.h
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.h

@@ -1,21 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  *  linux/net/sunrpc/gss_rpc_upcall.h
  *
  *  Copyright (C) 2012 Simo Sorce <simo@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #ifndef _GSS_RPC_UPCALL_H
@@ -45,4 +32,5 @@
 void init_gssp_clnt(struct sunrpc_net *);
 int set_gssp_clnt(struct net *);
 void clear_gssp_clnt(struct sunrpc_net *);
+
 #endif /* _GSS_RPC_UPCALL_H */

diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index 444380f..2ff7b70 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c

@@ -1,21 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * GSS Proxy upcall module
  *
  *  Copyright (C) 2012 Simo Sorce <simo@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #include <linux/sunrpc/svcauth.h>
@@ -784,6 +771,7 @@
 	xdr_inline_pages(&req->rq_rcv_buf,
 		PAGE_SIZE/2 /* pretty arbitrary */,
 		arg->pages, 0 /* page base */, arg->npages * PAGE_SIZE);
+	req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
 done:
 	if (err)
 		dprintk("RPC:       gssx_enc_accept_sec_context: %d\n", err);

diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h
index 146c310..3f17411 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.h
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h

@@ -1,21 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * GSS Proxy upcall module
  *
  *  Copyright (C) 2012 Simo Sorce <simo@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #ifndef _LINUX_GSS_RPC_XDR_H
@@ -262,6 +249,4 @@
 #define GSSX_ARG_wrap_size_limit_sz 0
 #define GSSX_RES_wrap_size_limit_sz 0
 
-
-
 #endif /* _LINUX_GSS_RPC_XDR_H */

diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 860f2a1..8be2f20 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Neil Brown <neilb@cse.unsw.edu.au>
  * J. Bruce Fields <bfields@umich.edu>
@@ -76,6 +77,7 @@
 	struct xdr_netobj	in_handle, in_token;
 	struct xdr_netobj	out_handle, out_token;
 	int			major_status, minor_status;
+	struct rcu_head		rcu_head;
 };
 
 static struct rsi *rsi_update(struct cache_detail *cd, struct rsi *new, struct rsi *old);
@@ -89,11 +91,19 @@
 	kfree(rsii->out_token.data);
 }
 
+static void rsi_free_rcu(struct rcu_head *head)
+{
+	struct rsi *rsii = container_of(head, struct rsi, rcu_head);
+
+	rsi_free(rsii);
+	kfree(rsii);
+}
+
 static void rsi_put(struct kref *ref)
 {
 	struct rsi *rsii = container_of(ref, struct rsi, h.ref);
-	rsi_free(rsii);
-	kfree(rsii);
+
+	call_rcu(&rsii->rcu_head, rsi_free_rcu);
 }
 
 static inline int rsi_hash(struct rsi *item)
@@ -282,7 +292,7 @@
 	struct cache_head *ch;
 	int hash = rsi_hash(item);
 
-	ch = sunrpc_cache_lookup(cd, &item->h, hash);
+	ch = sunrpc_cache_lookup_rcu(cd, &item->h, hash);
 	if (ch)
 		return container_of(ch, struct rsi, h);
 	else
@@ -330,6 +340,7 @@
 	struct svc_cred		cred;
 	struct gss_svc_seq_data	seqdata;
 	struct gss_ctx		*mechctx;
+	struct rcu_head		rcu_head;
 };
 
 static struct rsc *rsc_update(struct cache_detail *cd, struct rsc *new, struct rsc *old);
@@ -343,12 +354,22 @@
 	free_svc_cred(&rsci->cred);
 }
 
+static void rsc_free_rcu(struct rcu_head *head)
+{
+	struct rsc *rsci = container_of(head, struct rsc, rcu_head);
+
+	kfree(rsci->handle.data);
+	kfree(rsci);
+}
+
 static void rsc_put(struct kref *ref)
 {
 	struct rsc *rsci = container_of(ref, struct rsc, h.ref);
 
-	rsc_free(rsci);
-	kfree(rsci);
+	if (rsci->mechctx)
+		gss_delete_sec_context(&rsci->mechctx);
+	free_svc_cred(&rsci->cred);
+	call_rcu(&rsci->rcu_head, rsc_free_rcu);
 }
 
 static inline int
@@ -453,12 +474,12 @@
 		 * treatment so are checked for validity here.)
 		 */
 		/* uid */
-		rsci.cred.cr_uid = make_kuid(&init_user_ns, id);
+		rsci.cred.cr_uid = make_kuid(current_user_ns(), id);
 
 		/* gid */
 		if (get_int(&mesg, &id))
 			goto out;
-		rsci.cred.cr_gid = make_kgid(&init_user_ns, id);
+		rsci.cred.cr_gid = make_kgid(current_user_ns(), id);
 
 		/* number of additional gid's */
 		if (get_int(&mesg, &N))
@@ -476,7 +497,7 @@
 			kgid_t kgid;
 			if (get_int(&mesg, &id))
 				goto out;
-			kgid = make_kgid(&init_user_ns, id);
+			kgid = make_kgid(current_user_ns(), id);
 			if (!gid_valid(kgid))
 				goto out;
 			rsci.cred.cr_group_info->gid[i] = kgid;
@@ -542,7 +563,7 @@
 	struct cache_head *ch;
 	int hash = rsc_hash(item);
 
-	ch = sunrpc_cache_lookup(cd, &item->h, hash);
+	ch = sunrpc_cache_lookup_rcu(cd, &item->h, hash);
 	if (ch)
 		return container_of(ch, struct rsc, h);
 	else
@@ -876,7 +897,7 @@
 	if (svc_getnl(&buf->head[0]) != seq)
 		goto out;
 	/* trim off the mic and padding at the end before returning */
-	xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4);
+	buf->len -= 4 + round_up_to_quad(mic.len);
 	stat = 0;
 out:
 	kfree(mic.data);
@@ -1122,7 +1143,7 @@
 	struct kvec *resv = &rqstp->rq_res.head[0];
 	struct rsi *rsip, rsikey;
 	int ret;
-	struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id);
+	struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
 
 	memset(&rsikey, 0, sizeof(rsikey));
 	ret = gss_read_verf(gc, argv, authp,
@@ -1233,7 +1254,7 @@
 	uint64_t handle;
 	int status;
 	int ret;
-	struct net *net = rqstp->rq_xprt->xpt_net;
+	struct net *net = SVC_NET(rqstp);
 	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 
 	memset(&ud, 0, sizeof(ud));
@@ -1424,7 +1445,7 @@
 	__be32		*rpcstart;
 	__be32		*reject_stat = resv->iov_base + resv->iov_len;
 	int		ret;
-	struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id);
+	struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
 
 	dprintk("RPC:       svcauth_gss: argv->iov_len = %zd\n",
 			argv->iov_len);
@@ -1714,7 +1735,7 @@
 	struct rpc_gss_wire_cred *gc = &gsd->clcred;
 	struct xdr_buf *resbuf = &rqstp->rq_res;
 	int stat = -EINVAL;
-	struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id);
+	struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
 
 	if (gc->gc_proc != RPC_GSS_PROC_DATA)
 		goto out;
@@ -1764,14 +1785,21 @@
 }
 
 static void
-svcauth_gss_domain_release(struct auth_domain *dom)
+svcauth_gss_domain_release_rcu(struct rcu_head *head)
 {
+	struct auth_domain *dom = container_of(head, struct auth_domain, rcu_head);
 	struct gss_domain *gd = container_of(dom, struct gss_domain, h);
 
 	kfree(dom->name);
 	kfree(gd);
 }
 
+static void
+svcauth_gss_domain_release(struct auth_domain *dom)
+{
+	call_rcu(&dom->rcu_head, svcauth_gss_domain_release_rcu);
+}
+
 static struct auth_ops svcauthops_gss = {
 	.name		= "rpcsec_gss",
 	.owner		= THIS_MODULE,

diff --git a/net/sunrpc/auth_gss/trace.c b/net/sunrpc/auth_gss/trace.c
new file mode 100644
index 0000000..5576f1e
--- /dev/null
+++ b/net/sunrpc/auth_gss/trace.c

@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, 2019 Oracle. All rights reserved.
+ */
+
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/gss_err.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/rpcgss.h>

diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 4b48228..41a633a 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c

@@ -21,7 +21,7 @@
 static struct rpc_auth *
 nul_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 {
-	atomic_inc(&null_auth.au_count);
+	refcount_inc(&null_auth.au_count);
 	return &null_auth;
 }
 
@@ -36,8 +36,6 @@
 static struct rpc_cred *
 nul_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
 {
-	if (flags & RPCAUTH_LOOKUP_RCU)
-		return &null_cred;
 	return get_rpccred(&null_cred);
 }
 
@@ -61,15 +59,21 @@
 /*
  * Marshal credential.
  */
-static __be32 *
-nul_marshal(struct rpc_task *task, __be32 *p)
+static int
+nul_marshal(struct rpc_task *task, struct xdr_stream *xdr)
 {
-	*p++ = htonl(RPC_AUTH_NULL);
-	*p++ = 0;
-	*p++ = htonl(RPC_AUTH_NULL);
-	*p++ = 0;
+	__be32 *p;
 
-	return p;
+	p = xdr_reserve_space(xdr, 4 * sizeof(*p));
+	if (!p)
+		return -EMSGSIZE;
+	/* Credential */
+	*p++ = rpc_auth_null;
+	*p++ = xdr_zero;
+	/* Verifier */
+	*p++ = rpc_auth_null;
+	*p   = xdr_zero;
+	return 0;
 }
 
 /*
@@ -82,25 +86,19 @@
 	return 0;
 }
 
-static __be32 *
-nul_validate(struct rpc_task *task, __be32 *p)
+static int
+nul_validate(struct rpc_task *task, struct xdr_stream *xdr)
 {
-	rpc_authflavor_t	flavor;
-	u32			size;
+	__be32 *p;
 
-	flavor = ntohl(*p++);
-	if (flavor != RPC_AUTH_NULL) {
-		printk("RPC: bad verf flavor: %u\n", flavor);
-		return ERR_PTR(-EIO);
-	}
-
-	size = ntohl(*p++);
-	if (size != 0) {
-		printk("RPC: bad verf size: %u\n", size);
-		return ERR_PTR(-EIO);
-	}
-
-	return p;
+	p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+	if (!p)
+		return -EIO;
+	if (*p++ != rpc_auth_null)
+		return -EIO;
+	if (*p != xdr_zero)
+		return -EIO;
+	return 0;
 }
 
 const struct rpc_authops authnull_ops = {
@@ -116,21 +114,23 @@
 struct rpc_auth null_auth = {
 	.au_cslack	= NUL_CALLSLACK,
 	.au_rslack	= NUL_REPLYSLACK,
-	.au_flags	= RPCAUTH_AUTH_NO_CRKEY_TIMEOUT,
+	.au_verfsize	= NUL_REPLYSLACK,
+	.au_ralign	= NUL_REPLYSLACK,
 	.au_ops		= &authnull_ops,
 	.au_flavor	= RPC_AUTH_NULL,
-	.au_count	= ATOMIC_INIT(0),
+	.au_count	= REFCOUNT_INIT(1),
 };
 
 static
 const struct rpc_credops null_credops = {
 	.cr_name	= "AUTH_NULL",
 	.crdestroy	= nul_destroy_cred,
-	.crbind		= rpcauth_generic_bind_cred,
 	.crmatch	= nul_match,
 	.crmarshal	= nul_marshal,
+	.crwrap_req	= rpcauth_wrap_req_encode,
 	.crrefresh	= nul_refresh,
 	.crvalidate	= nul_validate,
+	.crunwrap_resp	= rpcauth_unwrap_resp_decode,
 };
 
 static
@@ -138,6 +138,6 @@
 	.cr_lru		= LIST_HEAD_INIT(null_cred.cr_lru),
 	.cr_auth	= &null_auth,
 	.cr_ops		= &null_credops,
-	.cr_count	= ATOMIC_INIT(1),
+	.cr_count	= REFCOUNT_INIT(2),
 	.cr_flags	= 1UL << RPCAUTH_CRED_UPTODATE,
 };

diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 185e56d..e7df1f7 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c

@@ -11,16 +11,11 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/module.h>
+#include <linux/mempool.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/auth.h>
 #include <linux/user_namespace.h>
 
-struct unx_cred {
-	struct rpc_cred		uc_base;
-	kgid_t			uc_gid;
-	kgid_t			uc_gids[UNX_NGROUPS];
-};
-#define uc_uid			uc_base.cr_uid
 
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY	RPCDBG_AUTH
@@ -28,29 +23,18 @@
 
 static struct rpc_auth		unix_auth;
 static const struct rpc_credops	unix_credops;
+static mempool_t		*unix_pool;
 
 static struct rpc_auth *
 unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 {
-	dprintk("RPC:       creating UNIX authenticator for client %p\n",
-			clnt);
-	atomic_inc(&unix_auth.au_count);
+	refcount_inc(&unix_auth.au_count);
 	return &unix_auth;
 }
 
 static void
 unx_destroy(struct rpc_auth *auth)
 {
-	dprintk("RPC:       destroying UNIX authenticator %p\n", auth);
-	rpcauth_clear_credcache(auth->au_credcache);
-}
-
-static int
-unx_hash_cred(struct auth_cred *acred, unsigned int hashbits)
-{
-	return hash_64(from_kgid(&init_user_ns, acred->gid) |
-		((u64)from_kuid(&init_user_ns, acred->uid) <<
-			(sizeof(gid_t) * 8)), hashbits);
 }
 
 /*
@@ -59,52 +43,20 @@
 static struct rpc_cred *
 unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
 {
-	return rpcauth_lookup_credcache(auth, acred, flags, GFP_NOFS);
-}
+	struct rpc_cred *ret = mempool_alloc(unix_pool, GFP_NOFS);
 
-static struct rpc_cred *
-unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp)
-{
-	struct unx_cred	*cred;
-	unsigned int groups = 0;
-	unsigned int i;
-
-	dprintk("RPC:       allocating UNIX cred for uid %d gid %d\n",
-			from_kuid(&init_user_ns, acred->uid),
-			from_kgid(&init_user_ns, acred->gid));
-
-	if (!(cred = kmalloc(sizeof(*cred), gfp)))
-		return ERR_PTR(-ENOMEM);
-
-	rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops);
-	cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
-
-	if (acred->group_info != NULL)
-		groups = acred->group_info->ngroups;
-	if (groups > UNX_NGROUPS)
-		groups = UNX_NGROUPS;
-
-	cred->uc_gid = acred->gid;
-	for (i = 0; i < groups; i++)
-		cred->uc_gids[i] = acred->group_info->gid[i];
-	if (i < UNX_NGROUPS)
-		cred->uc_gids[i] = INVALID_GID;
-
-	return &cred->uc_base;
-}
-
-static void
-unx_free_cred(struct unx_cred *unx_cred)
-{
-	dprintk("RPC:       unx_free_cred %p\n", unx_cred);
-	kfree(unx_cred);
+	rpcauth_init_cred(ret, acred, auth, &unix_credops);
+	ret->cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
+	return ret;
 }
 
 static void
 unx_free_cred_callback(struct rcu_head *head)
 {
-	struct unx_cred *unx_cred = container_of(head, struct unx_cred, uc_base.cr_rcu);
-	unx_free_cred(unx_cred);
+	struct rpc_cred *rpc_cred = container_of(head, struct rpc_cred, cr_rcu);
+
+	put_cred(rpc_cred->cr_cred);
+	mempool_free(rpc_cred, unix_pool);
 }
 
 static void
@@ -114,30 +66,32 @@
 }
 
 /*
- * Match credentials against current process creds.
- * The root_override argument takes care of cases where the caller may
- * request root creds (e.g. for NFS swapping).
+ * Match credentials against current the auth_cred.
  */
 static int
-unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags)
+unx_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
 {
-	struct unx_cred	*cred = container_of(rcred, struct unx_cred, uc_base);
 	unsigned int groups = 0;
 	unsigned int i;
 
+	if (cred->cr_cred == acred->cred)
+		return 1;
 
-	if (!uid_eq(cred->uc_uid, acred->uid) || !gid_eq(cred->uc_gid, acred->gid))
+	if (!uid_eq(cred->cr_cred->fsuid, acred->cred->fsuid) || !gid_eq(cred->cr_cred->fsgid, acred->cred->fsgid))
 		return 0;
 
-	if (acred->group_info != NULL)
-		groups = acred->group_info->ngroups;
+	if (acred->cred->group_info != NULL)
+		groups = acred->cred->group_info->ngroups;
 	if (groups > UNX_NGROUPS)
 		groups = UNX_NGROUPS;
-	for (i = 0; i < groups ; i++)
-		if (!gid_eq(cred->uc_gids[i], acred->group_info->gid[i]))
-			return 0;
-	if (groups < UNX_NGROUPS && gid_valid(cred->uc_gids[groups]))
+	if (cred->cr_cred->group_info == NULL)
+		return groups == 0;
+	if (groups != cred->cr_cred->group_info->ngroups)
 		return 0;
+
+	for (i = 0; i < groups ; i++)
+		if (!gid_eq(cred->cr_cred->group_info->gid[i], acred->cred->group_info->gid[i]))
+			return 0;
 	return 1;
 }
 
@@ -145,35 +99,56 @@
  * Marshal credentials.
  * Maybe we should keep a cached credential for performance reasons.
  */
-static __be32 *
-unx_marshal(struct rpc_task *task, __be32 *p)
+static int
+unx_marshal(struct rpc_task *task, struct xdr_stream *xdr)
 {
 	struct rpc_clnt	*clnt = task->tk_client;
-	struct unx_cred	*cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base);
-	__be32		*base, *hold;
+	struct rpc_cred	*cred = task->tk_rqstp->rq_cred;
+	__be32		*p, *cred_len, *gidarr_len;
 	int		i;
+	struct group_info *gi = cred->cr_cred->group_info;
+	struct user_namespace *userns = clnt->cl_cred ?
+		clnt->cl_cred->user_ns : &init_user_ns;
 
-	*p++ = htonl(RPC_AUTH_UNIX);
-	base = p++;
-	*p++ = htonl(jiffies/HZ);
+	/* Credential */
 
-	/*
-	 * Copy the UTS nodename captured when the client was created.
-	 */
-	p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
+	p = xdr_reserve_space(xdr, 3 * sizeof(*p));
+	if (!p)
+		goto marshal_failed;
+	*p++ = rpc_auth_unix;
+	cred_len = p++;
+	*p++ = xdr_zero;	/* stamp */
+	if (xdr_stream_encode_opaque(xdr, clnt->cl_nodename,
+				     clnt->cl_nodelen) < 0)
+		goto marshal_failed;
+	p = xdr_reserve_space(xdr, 3 * sizeof(*p));
+	if (!p)
+		goto marshal_failed;
+	*p++ = cpu_to_be32(from_kuid_munged(userns, cred->cr_cred->fsuid));
+	*p++ = cpu_to_be32(from_kgid_munged(userns, cred->cr_cred->fsgid));
 
-	*p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid));
-	*p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid));
-	hold = p++;
-	for (i = 0; i < UNX_NGROUPS && gid_valid(cred->uc_gids[i]); i++)
-		*p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i]));
-	*hold = htonl(p - hold - 1);		/* gid array length */
-	*base = htonl((p - base - 1) << 2);	/* cred length */
+	gidarr_len = p++;
+	if (gi)
+		for (i = 0; i < UNX_NGROUPS && i < gi->ngroups; i++)
+			*p++ = cpu_to_be32(from_kgid_munged(userns, gi->gid[i]));
+	*gidarr_len = cpu_to_be32(p - gidarr_len - 1);
+	*cred_len = cpu_to_be32((p - cred_len - 1) << 2);
+	p = xdr_reserve_space(xdr, (p - gidarr_len - 1) << 2);
+	if (!p)
+		goto marshal_failed;
 
-	*p++ = htonl(RPC_AUTH_NULL);
-	*p++ = htonl(0);
+	/* Verifier */
 
-	return p;
+	p = xdr_reserve_space(xdr, 2 * sizeof(*p));
+	if (!p)
+		goto marshal_failed;
+	*p++ = rpc_auth_null;
+	*p   = xdr_zero;
+
+	return 0;
+
+marshal_failed:
+	return -EMSGSIZE;
 }
 
 /*
@@ -186,39 +161,46 @@
 	return 0;
 }
 
-static __be32 *
-unx_validate(struct rpc_task *task, __be32 *p)
+static int
+unx_validate(struct rpc_task *task, struct xdr_stream *xdr)
 {
-	rpc_authflavor_t	flavor;
-	u32			size;
+	struct rpc_auth *auth = task->tk_rqstp->rq_cred->cr_auth;
+	__be32 *p;
+	u32 size;
 
-	flavor = ntohl(*p++);
-	if (flavor != RPC_AUTH_NULL &&
-	    flavor != RPC_AUTH_UNIX &&
-	    flavor != RPC_AUTH_SHORT) {
-		printk("RPC: bad verf flavor: %u\n", flavor);
-		return ERR_PTR(-EIO);
+	p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+	if (!p)
+		return -EIO;
+	switch (*p++) {
+	case rpc_auth_null:
+	case rpc_auth_unix:
+	case rpc_auth_short:
+		break;
+	default:
+		return -EIO;
 	}
+	size = be32_to_cpup(p);
+	if (size > RPC_MAX_AUTH_SIZE)
+		return -EIO;
+	p = xdr_inline_decode(xdr, size);
+	if (!p)
+		return -EIO;
 
-	size = ntohl(*p++);
-	if (size > RPC_MAX_AUTH_SIZE) {
-		printk("RPC: giant verf size: %u\n", size);
-		return ERR_PTR(-EIO);
-	}
-	task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2;
-	p += (size >> 2);
-
-	return p;
+	auth->au_verfsize = XDR_QUADLEN(size) + 2;
+	auth->au_rslack = XDR_QUADLEN(size) + 2;
+	auth->au_ralign = XDR_QUADLEN(size) + 2;
+	return 0;
 }
 
 int __init rpc_init_authunix(void)
 {
-	return rpcauth_init_credcache(&unix_auth);
+	unix_pool = mempool_create_kmalloc_pool(16, sizeof(struct rpc_cred));
+	return unix_pool ? 0 : -ENOMEM;
 }
 
 void rpc_destroy_authunix(void)
 {
-	rpcauth_destroy_credcache(&unix_auth);
+	mempool_destroy(unix_pool);
 }
 
 const struct rpc_authops authunix_ops = {
@@ -227,28 +209,27 @@
 	.au_name	= "UNIX",
 	.create		= unx_create,
 	.destroy	= unx_destroy,
-	.hash_cred	= unx_hash_cred,
 	.lookup_cred	= unx_lookup_cred,
-	.crcreate	= unx_create_cred,
 };
 
 static
 struct rpc_auth		unix_auth = {
 	.au_cslack	= UNX_CALLSLACK,
 	.au_rslack	= NUL_REPLYSLACK,
-	.au_flags	= RPCAUTH_AUTH_NO_CRKEY_TIMEOUT,
+	.au_verfsize	= NUL_REPLYSLACK,
 	.au_ops		= &authunix_ops,
 	.au_flavor	= RPC_AUTH_UNIX,
-	.au_count	= ATOMIC_INIT(0),
+	.au_count	= REFCOUNT_INIT(1),
 };
 
 static
 const struct rpc_credops unix_credops = {
 	.cr_name	= "AUTH_UNIX",
 	.crdestroy	= unx_destroy_cred,
-	.crbind		= rpcauth_generic_bind_cred,
 	.crmatch	= unx_match,
 	.crmarshal	= unx_marshal,
+	.crwrap_req	= rpcauth_wrap_req_encode,
 	.crrefresh	= unx_refresh,
 	.crvalidate	= unx_validate,
+	.crunwrap_resp	= rpcauth_unwrap_resp_decode,
 };

diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 3c15a99..195b40c 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c

@@ -31,25 +31,20 @@
 #define RPCDBG_FACILITY	RPCDBG_TRANS
 #endif
 
+#define BC_MAX_SLOTS	64U
+
+unsigned int xprt_bc_max_slots(struct rpc_xprt *xprt)
+{
+	return BC_MAX_SLOTS;
+}
+
 /*
  * Helper routines that track the number of preallocation elements
  * on the transport.
  */
 static inline int xprt_need_to_requeue(struct rpc_xprt *xprt)
 {
-	return xprt->bc_alloc_count < atomic_read(&xprt->bc_free_slots);
-}
-
-static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n)
-{
-	atomic_add(n, &xprt->bc_free_slots);
-	xprt->bc_alloc_count += n;
-}
-
-static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n)
-{
-	atomic_sub(n, &xprt->bc_free_slots);
-	return xprt->bc_alloc_count -= n;
+	return xprt->bc_alloc_count < xprt->bc_alloc_max;
 }
 
 /*
@@ -91,7 +86,6 @@
 		return NULL;
 
 	req->rq_xprt = xprt;
-	INIT_LIST_HEAD(&req->rq_list);
 	INIT_LIST_HEAD(&req->rq_bc_list);
 
 	/* Preallocate one XDR receive buffer */
@@ -146,6 +140,9 @@
 
 	dprintk("RPC:       setup backchannel transport\n");
 
+	if (min_reqs > BC_MAX_SLOTS)
+		min_reqs = BC_MAX_SLOTS;
+
 	/*
 	 * We use a temporary list to keep track of the preallocated
 	 * buffers.  Once we're done building the list we splice it
@@ -173,7 +170,9 @@
 	 */
 	spin_lock(&xprt->bc_pa_lock);
 	list_splice(&tmp_list, &xprt->bc_pa_list);
-	xprt_inc_alloc_count(xprt, min_reqs);
+	xprt->bc_alloc_count += min_reqs;
+	xprt->bc_alloc_max += min_reqs;
+	atomic_add(min_reqs, &xprt->bc_slot_count);
 	spin_unlock(&xprt->bc_pa_lock);
 
 	dprintk("RPC:       setup backchannel transport done\n");
@@ -198,7 +197,7 @@
 /**
  * xprt_destroy_backchannel - Destroys the backchannel preallocated structures.
  * @xprt:	the transport holding the preallocated strucures
- * @max_reqs	the maximum number of preallocated structures to destroy
+ * @max_reqs:	the maximum number of preallocated structures to destroy
  *
  * Since these structures may have been allocated by multiple calls
  * to xprt_setup_backchannel, we only destroy up to the maximum number
@@ -221,11 +220,13 @@
 		goto out;
 
 	spin_lock_bh(&xprt->bc_pa_lock);
-	xprt_dec_alloc_count(xprt, max_reqs);
+	xprt->bc_alloc_max -= min(max_reqs, xprt->bc_alloc_max);
 	list_for_each_entry_safe(req, tmp, &xprt->bc_pa_list, rq_bc_pa_list) {
 		dprintk("RPC:        req=%p\n", req);
 		list_del(&req->rq_bc_pa_list);
 		xprt_free_allocation(req);
+		xprt->bc_alloc_count--;
+		atomic_dec(&xprt->bc_slot_count);
 		if (--max_reqs == 0)
 			break;
 	}
@@ -236,30 +237,30 @@
 		list_empty(&xprt->bc_pa_list) ? "true" : "false");
 }
 
-static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid)
+static struct rpc_rqst *xprt_get_bc_request(struct rpc_xprt *xprt, __be32 xid,
+		struct rpc_rqst *new)
 {
 	struct rpc_rqst *req = NULL;
 
 	dprintk("RPC:       allocate a backchannel request\n");
-	if (atomic_read(&xprt->bc_free_slots) <= 0)
-		goto not_found;
 	if (list_empty(&xprt->bc_pa_list)) {
-		req = xprt_alloc_bc_req(xprt, GFP_ATOMIC);
-		if (!req)
+		if (!new)
 			goto not_found;
-		list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
+		if (atomic_read(&xprt->bc_slot_count) >= BC_MAX_SLOTS)
+			goto not_found;
+		list_add_tail(&new->rq_bc_pa_list, &xprt->bc_pa_list);
 		xprt->bc_alloc_count++;
+		atomic_inc(&xprt->bc_slot_count);
 	}
 	req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst,
 				rq_bc_pa_list);
 	req->rq_reply_bytes_recvd = 0;
-	req->rq_bytes_sent = 0;
 	memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
 			sizeof(req->rq_private_buf));
 	req->rq_xid = xid;
 	req->rq_connect_cookie = xprt->connect_cookie;
-not_found:
 	dprintk("RPC:       backchannel req=%p\n", req);
+not_found:
 	return req;
 }
 
@@ -293,6 +294,7 @@
 	if (xprt_need_to_requeue(xprt)) {
 		list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
 		xprt->bc_alloc_count++;
+		atomic_inc(&xprt->bc_slot_count);
 		req = NULL;
 	}
 	spin_unlock_bh(&xprt->bc_pa_lock);
@@ -305,8 +307,8 @@
 		 */
 		dprintk("RPC:       Last session removed req=%p\n", req);
 		xprt_free_allocation(req);
-		return;
 	}
+	xprt_put(xprt);
 }
 
 /*
@@ -322,18 +324,27 @@
  */
 struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid)
 {
-	struct rpc_rqst *req;
+	struct rpc_rqst *req, *new = NULL;
 
-	spin_lock(&xprt->bc_pa_lock);
-	list_for_each_entry(req, &xprt->bc_pa_list, rq_bc_pa_list) {
-		if (req->rq_connect_cookie != xprt->connect_cookie)
-			continue;
-		if (req->rq_xid == xid)
-			goto found;
-	}
-	req = xprt_alloc_bc_request(xprt, xid);
+	do {
+		spin_lock(&xprt->bc_pa_lock);
+		list_for_each_entry(req, &xprt->bc_pa_list, rq_bc_pa_list) {
+			if (req->rq_connect_cookie != xprt->connect_cookie)
+				continue;
+			if (req->rq_xid == xid)
+				goto found;
+		}
+		req = xprt_get_bc_request(xprt, xid, new);
 found:
-	spin_unlock(&xprt->bc_pa_lock);
+		spin_unlock(&xprt->bc_pa_lock);
+		if (new) {
+			if (req != new)
+				xprt_free_allocation(new);
+			break;
+		} else if (req)
+			break;
+		new = xprt_alloc_bc_req(xprt, GFP_KERNEL);
+	} while (new);
 	return req;
 }
 
@@ -350,13 +361,14 @@
 
 	spin_lock(&xprt->bc_pa_lock);
 	list_del(&req->rq_bc_pa_list);
-	xprt_dec_alloc_count(xprt, 1);
+	xprt->bc_alloc_count--;
 	spin_unlock(&xprt->bc_pa_lock);
 
 	req->rq_private_buf.len = copied;
 	set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
 
 	dprintk("RPC:       add callback request to list\n");
+	xprt_get(xprt);
 	spin_lock(&bc_serv->sv_cb_lock);
 	list_add(&req->rq_bc_list, &bc_serv->sv_cb_list);
 	wake_up(&bc_serv->sv_cb_waitq);

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 109fbe5..a349094 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * net/sunrpc/cache.c
  *
@@ -5,9 +6,6 @@
  * used by sunrpc clients and servers.
  *
  * Copyright (C) 2002 Neil Brown <neilb@cse.unsw.edu.au>
- *
- * Released under terms in GPL version 2.  See COPYING.
- *
  */
 
 #include <linux/types.h>
@@ -40,6 +38,7 @@
 
 static bool cache_defer_req(struct cache_req *req, struct cache_head *item);
 static void cache_revisit_request(struct cache_head *item);
+static bool cache_listeners_exist(struct cache_detail *detail);
 
 static void cache_init(struct cache_head *h, struct cache_detail *detail)
 {
@@ -54,28 +53,39 @@
 	h->last_refresh = now;
 }
 
-struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
-				       struct cache_head *key, int hash)
+static inline int cache_is_valid(struct cache_head *h);
+static void cache_fresh_locked(struct cache_head *head, time_t expiry,
+				struct cache_detail *detail);
+static void cache_fresh_unlocked(struct cache_head *head,
+				struct cache_detail *detail);
+
+static struct cache_head *sunrpc_cache_find_rcu(struct cache_detail *detail,
+						struct cache_head *key,
+						int hash)
 {
-	struct cache_head *new = NULL, *freeme = NULL, *tmp = NULL;
-	struct hlist_head *head;
+	struct hlist_head *head = &detail->hash_table[hash];
+	struct cache_head *tmp;
 
-	head = &detail->hash_table[hash];
-
-	read_lock(&detail->hash_lock);
-
-	hlist_for_each_entry(tmp, head, cache_list) {
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(tmp, head, cache_list) {
 		if (detail->match(tmp, key)) {
 			if (cache_is_expired(detail, tmp))
-				/* This entry is expired, we will discard it. */
-				break;
-			cache_get(tmp);
-			read_unlock(&detail->hash_lock);
+				continue;
+			tmp = cache_get_rcu(tmp);
+			rcu_read_unlock();
 			return tmp;
 		}
 	}
-	read_unlock(&detail->hash_lock);
-	/* Didn't find anything, insert an empty entry */
+	rcu_read_unlock();
+	return NULL;
+}
+
+static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
+						 struct cache_head *key,
+						 int hash)
+{
+	struct cache_head *new, *tmp, *freeme = NULL;
+	struct hlist_head *head = &detail->hash_table[hash];
 
 	new = detail->alloc();
 	if (!new)
@@ -87,35 +97,51 @@
 	cache_init(new, detail);
 	detail->init(new, key);
 
-	write_lock(&detail->hash_lock);
+	spin_lock(&detail->hash_lock);
 
 	/* check if entry appeared while we slept */
-	hlist_for_each_entry(tmp, head, cache_list) {
+	hlist_for_each_entry_rcu(tmp, head, cache_list) {
 		if (detail->match(tmp, key)) {
 			if (cache_is_expired(detail, tmp)) {
-				hlist_del_init(&tmp->cache_list);
+				hlist_del_init_rcu(&tmp->cache_list);
 				detail->entries --;
+				if (cache_is_valid(tmp) == -EAGAIN)
+					set_bit(CACHE_NEGATIVE, &tmp->flags);
+				cache_fresh_locked(tmp, 0, detail);
 				freeme = tmp;
 				break;
 			}
 			cache_get(tmp);
-			write_unlock(&detail->hash_lock);
+			spin_unlock(&detail->hash_lock);
 			cache_put(new, detail);
 			return tmp;
 		}
 	}
 
-	hlist_add_head(&new->cache_list, head);
+	hlist_add_head_rcu(&new->cache_list, head);
 	detail->entries++;
 	cache_get(new);
-	write_unlock(&detail->hash_lock);
+	spin_unlock(&detail->hash_lock);
 
-	if (freeme)
+	if (freeme) {
+		cache_fresh_unlocked(freeme, detail);
 		cache_put(freeme, detail);
+	}
 	return new;
 }
-EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
 
+struct cache_head *sunrpc_cache_lookup_rcu(struct cache_detail *detail,
+					   struct cache_head *key, int hash)
+{
+	struct cache_head *ret;
+
+	ret = sunrpc_cache_find_rcu(detail, key, hash);
+	if (ret)
+		return ret;
+	/* Didn't find anything, insert an empty entry */
+	return sunrpc_cache_add_entry(detail, key, hash);
+}
+EXPORT_SYMBOL_GPL(sunrpc_cache_lookup_rcu);
 
 static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
 
@@ -151,18 +177,18 @@
 	struct cache_head *tmp;
 
 	if (!test_bit(CACHE_VALID, &old->flags)) {
-		write_lock(&detail->hash_lock);
+		spin_lock(&detail->hash_lock);
 		if (!test_bit(CACHE_VALID, &old->flags)) {
 			if (test_bit(CACHE_NEGATIVE, &new->flags))
 				set_bit(CACHE_NEGATIVE, &old->flags);
 			else
 				detail->update(old, new);
 			cache_fresh_locked(old, new->expiry_time, detail);
-			write_unlock(&detail->hash_lock);
+			spin_unlock(&detail->hash_lock);
 			cache_fresh_unlocked(old, detail);
 			return old;
 		}
-		write_unlock(&detail->hash_lock);
+		spin_unlock(&detail->hash_lock);
 	}
 	/* We need to insert a new entry */
 	tmp = detail->alloc();
@@ -173,7 +199,7 @@
 	cache_init(tmp, detail);
 	detail->init(tmp, old);
 
-	write_lock(&detail->hash_lock);
+	spin_lock(&detail->hash_lock);
 	if (test_bit(CACHE_NEGATIVE, &new->flags))
 		set_bit(CACHE_NEGATIVE, &tmp->flags);
 	else
@@ -183,7 +209,7 @@
 	cache_get(tmp);
 	cache_fresh_locked(tmp, new->expiry_time, detail);
 	cache_fresh_locked(old, 0, detail);
-	write_unlock(&detail->hash_lock);
+	spin_unlock(&detail->hash_lock);
 	cache_fresh_unlocked(tmp, detail);
 	cache_fresh_unlocked(old, detail);
 	cache_put(old, detail);
@@ -223,7 +249,7 @@
 {
 	int rv;
 
-	write_lock(&detail->hash_lock);
+	spin_lock(&detail->hash_lock);
 	rv = cache_is_valid(h);
 	if (rv == -EAGAIN) {
 		set_bit(CACHE_NEGATIVE, &h->flags);
@@ -231,7 +257,7 @@
 				   detail);
 		rv = -ENOENT;
 	}
-	write_unlock(&detail->hash_lock);
+	spin_unlock(&detail->hash_lock);
 	cache_fresh_unlocked(h, detail);
 	return rv;
 }
@@ -279,7 +305,8 @@
 				cache_fresh_unlocked(h, detail);
 				break;
 			}
-		}
+		} else if (!cache_listeners_exist(detail))
+			rv = try_to_negate_entry(detail, h);
 	}
 
 	if (rv == -EAGAIN) {
@@ -341,12 +368,12 @@
 
 void sunrpc_init_cache_detail(struct cache_detail *cd)
 {
-	rwlock_init(&cd->hash_lock);
+	spin_lock_init(&cd->hash_lock);
 	INIT_LIST_HEAD(&cd->queue);
 	spin_lock(&cache_list_lock);
 	cd->nextcheck = 0;
 	cd->entries = 0;
-	atomic_set(&cd->readers, 0);
+	atomic_set(&cd->writers, 0);
 	cd->last_close = 0;
 	cd->last_warn = -1;
 	list_add(&cd->others, &cache_list);
@@ -361,11 +388,11 @@
 {
 	cache_purge(cd);
 	spin_lock(&cache_list_lock);
-	write_lock(&cd->hash_lock);
+	spin_lock(&cd->hash_lock);
 	if (current_detail == cd)
 		current_detail = NULL;
 	list_del_init(&cd->others);
-	write_unlock(&cd->hash_lock);
+	spin_unlock(&cd->hash_lock);
 	spin_unlock(&cache_list_lock);
 	if (list_empty(&cache_list)) {
 		/* module must be being unloaded so its safe to kill the worker */
@@ -422,7 +449,7 @@
 		struct hlist_head *head;
 		struct hlist_node *tmp;
 
-		write_lock(&current_detail->hash_lock);
+		spin_lock(&current_detail->hash_lock);
 
 		/* Ok, now to clean this strand */
 
@@ -433,13 +460,13 @@
 			if (!cache_is_expired(current_detail, ch))
 				continue;
 
-			hlist_del_init(&ch->cache_list);
+			hlist_del_init_rcu(&ch->cache_list);
 			current_detail->entries--;
 			rv = 1;
 			break;
 		}
 
-		write_unlock(&current_detail->hash_lock);
+		spin_unlock(&current_detail->hash_lock);
 		d = current_detail;
 		if (!ch)
 			current_index ++;
@@ -494,9 +521,9 @@
 	struct hlist_node *tmp = NULL;
 	int i = 0;
 
-	write_lock(&detail->hash_lock);
+	spin_lock(&detail->hash_lock);
 	if (!detail->entries) {
-		write_unlock(&detail->hash_lock);
+		spin_unlock(&detail->hash_lock);
 		return;
 	}
 
@@ -504,17 +531,17 @@
 	for (i = 0; i < detail->hash_size; i++) {
 		head = &detail->hash_table[i];
 		hlist_for_each_entry_safe(ch, tmp, head, cache_list) {
-			hlist_del_init(&ch->cache_list);
+			hlist_del_init_rcu(&ch->cache_list);
 			detail->entries--;
 
 			set_bit(CACHE_CLEANED, &ch->flags);
-			write_unlock(&detail->hash_lock);
+			spin_unlock(&detail->hash_lock);
 			cache_fresh_unlocked(ch, detail);
 			cache_put(ch, detail);
-			write_lock(&detail->hash_lock);
+			spin_lock(&detail->hash_lock);
 		}
 	}
-	write_unlock(&detail->hash_lock);
+	spin_unlock(&detail->hash_lock);
 }
 EXPORT_SYMBOL_GPL(cache_purge);
 
@@ -1002,11 +1029,13 @@
 		}
 		rp->offset = 0;
 		rp->q.reader = 1;
-		atomic_inc(&cd->readers);
+
 		spin_lock(&queue_lock);
 		list_add(&rp->q.list, &cd->queue);
 		spin_unlock(&queue_lock);
 	}
+	if (filp->f_mode & FMODE_WRITE)
+		atomic_inc(&cd->writers);
 	filp->private_data = rp;
 	return 0;
 }
@@ -1035,8 +1064,10 @@
 		filp->private_data = NULL;
 		kfree(rp);
 
+	}
+	if (filp->f_mode & FMODE_WRITE) {
+		atomic_dec(&cd->writers);
 		cd->last_close = seconds_since_boot();
-		atomic_dec(&cd->readers);
 	}
 	module_put(cd->owner);
 	return 0;
@@ -1144,7 +1175,7 @@
 
 static bool cache_listeners_exist(struct cache_detail *detail)
 {
-	if (atomic_read(&detail->readers))
+	if (atomic_read(&detail->writers))
 		return true;
 	if (detail->last_close == 0)
 		/* This cache was never opened */
@@ -1289,21 +1320,19 @@
  * get a header, then pass each real item in the cache
  */
 
-void *cache_seq_start(struct seq_file *m, loff_t *pos)
-	__acquires(cd->hash_lock)
+static void *__cache_seq_start(struct seq_file *m, loff_t *pos)
 {
 	loff_t n = *pos;
 	unsigned int hash, entry;
 	struct cache_head *ch;
 	struct cache_detail *cd = m->private;
 
-	read_lock(&cd->hash_lock);
 	if (!n--)
 		return SEQ_START_TOKEN;
 	hash = n >> 32;
 	entry = n & ((1LL<<32) - 1);
 
-	hlist_for_each_entry(ch, &cd->hash_table[hash], cache_list)
+	hlist_for_each_entry_rcu(ch, &cd->hash_table[hash], cache_list)
 		if (!entry--)
 			return ch;
 	n &= ~((1LL<<32) - 1);
@@ -1315,12 +1344,12 @@
 	if (hash >= cd->hash_size)
 		return NULL;
 	*pos = n+1;
-	return hlist_entry_safe(cd->hash_table[hash].first,
+	return hlist_entry_safe(rcu_dereference_raw(
+				hlist_first_rcu(&cd->hash_table[hash])),
 				struct cache_head, cache_list);
 }
-EXPORT_SYMBOL_GPL(cache_seq_start);
 
-void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
+static void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
 {
 	struct cache_head *ch = p;
 	int hash = (*pos >> 32);
@@ -1333,7 +1362,8 @@
 		*pos += 1LL<<32;
 	} else {
 		++*pos;
-		return hlist_entry_safe(ch->cache_list.next,
+		return hlist_entry_safe(rcu_dereference_raw(
+					hlist_next_rcu(&ch->cache_list)),
 					struct cache_head, cache_list);
 	}
 	*pos &= ~((1LL<<32) - 1);
@@ -1345,18 +1375,31 @@
 	if (hash >= cd->hash_size)
 		return NULL;
 	++*pos;
-	return hlist_entry_safe(cd->hash_table[hash].first,
+	return hlist_entry_safe(rcu_dereference_raw(
+				hlist_first_rcu(&cd->hash_table[hash])),
 				struct cache_head, cache_list);
 }
-EXPORT_SYMBOL_GPL(cache_seq_next);
 
-void cache_seq_stop(struct seq_file *m, void *p)
-	__releases(cd->hash_lock)
+void *cache_seq_start_rcu(struct seq_file *m, loff_t *pos)
+	__acquires(RCU)
 {
-	struct cache_detail *cd = m->private;
-	read_unlock(&cd->hash_lock);
+	rcu_read_lock();
+	return __cache_seq_start(m, pos);
 }
-EXPORT_SYMBOL_GPL(cache_seq_stop);
+EXPORT_SYMBOL_GPL(cache_seq_start_rcu);
+
+void *cache_seq_next_rcu(struct seq_file *file, void *p, loff_t *pos)
+{
+	return cache_seq_next(file, p, pos);
+}
+EXPORT_SYMBOL_GPL(cache_seq_next_rcu);
+
+void cache_seq_stop_rcu(struct seq_file *m, void *p)
+	__releases(RCU)
+{
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(cache_seq_stop_rcu);
 
 static int c_show(struct seq_file *m, void *p)
 {
@@ -1384,9 +1427,9 @@
 }
 
 static const struct seq_operations cache_content_op = {
-	.start	= cache_seq_start,
-	.next	= cache_seq_next,
-	.stop	= cache_seq_stop,
+	.start	= cache_seq_start_rcu,
+	.next	= cache_seq_next_rcu,
+	.stop	= cache_seq_stop_rcu,
 	.show	= c_show,
 };
 
@@ -1481,6 +1524,9 @@
 	cd->nextcheck = now;
 	cache_flush();
 
+	if (cd->flush)
+		cd->flush();
+
 	*ppos += count;
 	return count;
 }
@@ -1844,13 +1890,13 @@
 
 void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h)
 {
-	write_lock(&cd->hash_lock);
+	spin_lock(&cd->hash_lock);
 	if (!hlist_unhashed(&h->cache_list)){
-		hlist_del_init(&h->cache_list);
+		hlist_del_init_rcu(&h->cache_list);
 		cd->entries--;
-		write_unlock(&cd->hash_lock);
+		spin_unlock(&cd->hash_lock);
 		cache_put(h, cd);
 	} else
-		write_unlock(&cd->hash_lock);
+		spin_unlock(&cd->hash_lock);
 }
 EXPORT_SYMBOL_GPL(sunrpc_cache_unhash);

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 8ea2f5f..f7f7856 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  linux/net/sunrpc/clnt.c
  *
@@ -61,24 +62,24 @@
 static void	call_reserve(struct rpc_task *task);
 static void	call_reserveresult(struct rpc_task *task);
 static void	call_allocate(struct rpc_task *task);
+static void	call_encode(struct rpc_task *task);
 static void	call_decode(struct rpc_task *task);
 static void	call_bind(struct rpc_task *task);
 static void	call_bind_status(struct rpc_task *task);
 static void	call_transmit(struct rpc_task *task);
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
-static void	call_bc_transmit(struct rpc_task *task);
-#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 static void	call_status(struct rpc_task *task);
 static void	call_transmit_status(struct rpc_task *task);
 static void	call_refresh(struct rpc_task *task);
 static void	call_refreshresult(struct rpc_task *task);
-static void	call_timeout(struct rpc_task *task);
 static void	call_connect(struct rpc_task *task);
 static void	call_connect_status(struct rpc_task *task);
 
-static __be32	*rpc_encode_header(struct rpc_task *task);
-static __be32	*rpc_verify_header(struct rpc_task *task);
+static int	rpc_encode_header(struct rpc_task *task,
+				  struct xdr_stream *xdr);
+static int	rpc_decode_header(struct rpc_task *task,
+				  struct xdr_stream *xdr);
 static int	rpc_ping(struct rpc_clnt *clnt);
+static void	rpc_check_timeout(struct rpc_task *task);
 
 static void rpc_register_client(struct rpc_clnt *clnt)
 {
@@ -394,6 +395,7 @@
 	if (err)
 		goto out_no_clid;
 
+	clnt->cl_cred	  = get_cred(args->cred);
 	clnt->cl_procinfo = version->procs;
 	clnt->cl_maxproc  = version->nrprocs;
 	clnt->cl_prog     = args->prognumber ? : program->number;
@@ -439,6 +441,7 @@
 out_no_path:
 	rpc_free_iostats(clnt->cl_metrics);
 out_no_stats:
+	put_cred(clnt->cl_cred);
 	rpc_free_clid(clnt);
 out_no_clid:
 	kfree(clnt);
@@ -484,8 +487,11 @@
 	}
 
 	clnt->cl_softrtry = 1;
-	if (args->flags & RPC_CLNT_CREATE_HARDRTRY)
+	if (args->flags & (RPC_CLNT_CREATE_HARDRTRY|RPC_CLNT_CREATE_SOFTERR)) {
 		clnt->cl_softrtry = 0;
+		if (args->flags & RPC_CLNT_CREATE_SOFTERR)
+			clnt->cl_softerr = 1;
+	}
 
 	if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
 		clnt->cl_autobind = 1;
@@ -522,6 +528,8 @@
 		.bc_xprt = args->bc_xprt,
 	};
 	char servername[48];
+	struct rpc_clnt *clnt;
+	int i;
 
 	if (args->bc_xprt) {
 		WARN_ON_ONCE(!(args->protocol & XPRT_TRANSPORT_BC));
@@ -584,7 +592,15 @@
 	if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
 		xprt->resvport = 0;
 
-	return rpc_create_xprt(args, xprt);
+	clnt = rpc_create_xprt(args, xprt);
+	if (IS_ERR(clnt) || args->nconnect <= 1)
+		return clnt;
+
+	for (i = 0; i < args->nconnect - 1; i++) {
+		if (rpc_clnt_add_xprt(clnt, &xprtargs, NULL, NULL) < 0)
+			break;
+	}
+	return clnt;
 }
 EXPORT_SYMBOL_GPL(rpc_create);
 
@@ -623,9 +639,11 @@
 	/* Turn off autobind on clones */
 	new->cl_autobind = 0;
 	new->cl_softrtry = clnt->cl_softrtry;
+	new->cl_softerr = clnt->cl_softerr;
 	new->cl_noretranstimeo = clnt->cl_noretranstimeo;
 	new->cl_discrtry = clnt->cl_discrtry;
 	new->cl_chatty = clnt->cl_chatty;
+	new->cl_principal = clnt->cl_principal;
 	return new;
 
 out_err:
@@ -647,6 +665,7 @@
 		.prognumber	= clnt->cl_prog,
 		.version	= clnt->cl_vers,
 		.authflavor	= clnt->cl_auth->au_flavor,
+		.cred		= clnt->cl_cred,
 	};
 	return __rpc_clone_client(&args, clnt);
 }
@@ -668,6 +687,7 @@
 		.prognumber	= clnt->cl_prog,
 		.version	= clnt->cl_vers,
 		.authflavor	= flavor,
+		.cred		= clnt->cl_cred,
 	};
 	return __rpc_clone_client(&args, clnt);
 }
@@ -826,17 +846,8 @@
 	 * Spin lock all_tasks to prevent changes...
 	 */
 	spin_lock(&clnt->cl_lock);
-	list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) {
-		if (!RPC_IS_ACTIVATED(rovr))
-			continue;
-		if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
-			rovr->tk_flags |= RPC_TASK_KILLED;
-			rpc_exit(rovr, -EIO);
-			if (RPC_IS_QUEUED(rovr))
-				rpc_wake_up_queued_task(rovr->tk_waitqueue,
-							rovr);
-		}
-	}
+	list_for_each_entry(rovr, &clnt->cl_tasks, tk_task)
+		rpc_signal_task(rovr);
 	spin_unlock(&clnt->cl_lock);
 }
 EXPORT_SYMBOL_GPL(rpc_killall_tasks);
@@ -884,6 +895,7 @@
 	xprt_put(rcu_dereference_raw(clnt->cl_xprt));
 	xprt_iter_destroy(&clnt->cl_xpi);
 	rpciod_down();
+	put_cred(clnt->cl_cred);
 	rpc_free_clid(clnt);
 	kfree(clnt);
 	return parent;
@@ -948,6 +960,7 @@
 		.prognumber	= program->number,
 		.version	= vers,
 		.authflavor	= old->cl_auth->au_flavor,
+		.cred		= old->cl_cred,
 	};
 	struct rpc_clnt *clnt;
 	int err;
@@ -965,13 +978,46 @@
 }
 EXPORT_SYMBOL_GPL(rpc_bind_new_program);
 
+struct rpc_xprt *
+rpc_task_get_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
+{
+	struct rpc_xprt_switch *xps;
+
+	if (!xprt)
+		return NULL;
+	rcu_read_lock();
+	xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
+	atomic_long_inc(&xps->xps_queuelen);
+	rcu_read_unlock();
+	atomic_long_inc(&xprt->queuelen);
+
+	return xprt;
+}
+
+static void
+rpc_task_release_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
+{
+	struct rpc_xprt_switch *xps;
+
+	atomic_long_dec(&xprt->queuelen);
+	rcu_read_lock();
+	xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
+	atomic_long_dec(&xps->xps_queuelen);
+	rcu_read_unlock();
+
+	xprt_put(xprt);
+}
+
 void rpc_task_release_transport(struct rpc_task *task)
 {
 	struct rpc_xprt *xprt = task->tk_xprt;
 
 	if (xprt) {
 		task->tk_xprt = NULL;
-		xprt_put(xprt);
+		if (task->tk_client)
+			rpc_task_release_xprt(task->tk_client, xprt);
+		else
+			xprt_put(xprt);
 	}
 }
 EXPORT_SYMBOL_GPL(rpc_task_release_transport);
@@ -980,6 +1026,7 @@
 {
 	struct rpc_clnt *clnt = task->tk_client;
 
+	rpc_task_release_transport(task);
 	if (clnt != NULL) {
 		/* Remove from client task list */
 		spin_lock(&clnt->cl_lock);
@@ -989,14 +1036,34 @@
 
 		rpc_release_client(clnt);
 	}
-	rpc_task_release_transport(task);
+}
+
+static struct rpc_xprt *
+rpc_task_get_first_xprt(struct rpc_clnt *clnt)
+{
+	struct rpc_xprt *xprt;
+
+	rcu_read_lock();
+	xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
+	rcu_read_unlock();
+	return rpc_task_get_xprt(clnt, xprt);
+}
+
+static struct rpc_xprt *
+rpc_task_get_next_xprt(struct rpc_clnt *clnt)
+{
+	return rpc_task_get_xprt(clnt, xprt_iter_get_next(&clnt->cl_xpi));
 }
 
 static
 void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt)
 {
-	if (!task->tk_xprt)
-		task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi);
+	if (task->tk_xprt)
+		return;
+	if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN)
+		task->tk_xprt = rpc_task_get_first_xprt(clnt);
+	else
+		task->tk_xprt = rpc_task_get_next_xprt(clnt);
 }
 
 static
@@ -1009,6 +1076,8 @@
 		atomic_inc(&clnt->cl_count);
 		if (clnt->cl_softrtry)
 			task->tk_flags |= RPC_TASK_SOFT;
+		if (clnt->cl_softerr)
+			task->tk_flags |= RPC_TASK_TIMEOUT;
 		if (clnt->cl_noretranstimeo)
 			task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT;
 		if (atomic_read(&clnt->cl_swapper))
@@ -1028,7 +1097,7 @@
 		task->tk_msg.rpc_argp = msg->rpc_argp;
 		task->tk_msg.rpc_resp = msg->rpc_resp;
 		if (msg->rpc_cred != NULL)
-			task->tk_msg.rpc_cred = get_rpccred(msg->rpc_cred);
+			task->tk_msg.rpc_cred = get_cred(msg->rpc_cred);
 	}
 }
 
@@ -1129,6 +1198,8 @@
 EXPORT_SYMBOL_GPL(rpc_call_async);
 
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
+static void call_bc_encode(struct rpc_task *task);
+
 /**
  * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
  * rpc_execute against it
@@ -1137,10 +1208,10 @@
 struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
 {
 	struct rpc_task *task;
-	struct xdr_buf *xbufp = &req->rq_snd_buf;
 	struct rpc_task_setup task_setup_data = {
 		.callback_ops = &rpc_default_ops,
-		.flags = RPC_TASK_SOFTCONN,
+		.flags = RPC_TASK_SOFTCONN |
+			RPC_TASK_NO_RETRANS_TIMEOUT,
 	};
 
 	dprintk("RPC: rpc_run_bc_task req= %p\n", req);
@@ -1148,16 +1219,9 @@
 	 * Create an rpc_task to send the data
 	 */
 	task = rpc_new_task(&task_setup_data);
-	task->tk_rqstp = req;
+	xprt_init_bc_request(req, task);
 
-	/*
-	 * Set up the xdr_buf length.
-	 * This also indicates that the buffer is XDR encoded already.
-	 */
-	xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
-			xbufp->tail[0].iov_len;
-
-	task->tk_action = call_bc_transmit;
+	task->tk_action = call_bc_encode;
 	atomic_inc(&task->tk_count);
 	WARN_ON_ONCE(atomic_read(&task->tk_count) != 2);
 	rpc_execute(task);
@@ -1167,6 +1231,29 @@
 }
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
+/**
+ * rpc_prepare_reply_pages - Prepare to receive a reply data payload into pages
+ * @req: RPC request to prepare
+ * @pages: vector of struct page pointers
+ * @base: offset in first page where receive should start, in bytes
+ * @len: expected size of the upper layer data payload, in bytes
+ * @hdrsize: expected size of upper layer reply header, in XDR words
+ *
+ */
+void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
+			     unsigned int base, unsigned int len,
+			     unsigned int hdrsize)
+{
+	/* Subtract one to force an extra word of buffer space for the
+	 * payload's XDR pad to fall into the rcv_buf's tail iovec.
+	 */
+	hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_ralign - 1;
+
+	xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
+	trace_rpc_reply_pages(req);
+}
+EXPORT_SYMBOL_GPL(rpc_prepare_reply_pages);
+
 void
 rpc_call_start(struct rpc_task *task)
 {
@@ -1439,6 +1526,19 @@
 }
 EXPORT_SYMBOL_GPL(rpc_max_bc_payload);
 
+unsigned int rpc_num_bc_slots(struct rpc_clnt *clnt)
+{
+	struct rpc_xprt *xprt;
+	unsigned int ret;
+
+	rcu_read_lock();
+	xprt = rcu_dereference(clnt->cl_xprt);
+	ret = xprt->ops->bc_num_slots(xprt);
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(rpc_num_bc_slots);
+
 /**
  * rpc_force_rebind - force transport to check that remote port is unchanged
  * @clnt: client to rebind
@@ -1454,22 +1554,14 @@
 }
 EXPORT_SYMBOL_GPL(rpc_force_rebind);
 
-/*
- * Restart an (async) RPC call from the call_prepare state.
- * Usually called from within the exit handler.
- */
-int
-rpc_restart_call_prepare(struct rpc_task *task)
+static int
+__rpc_restart_call(struct rpc_task *task, void (*action)(struct rpc_task *))
 {
-	if (RPC_ASSASSINATED(task))
-		return 0;
-	task->tk_action = call_start;
 	task->tk_status = 0;
-	if (task->tk_ops->rpc_call_prepare != NULL)
-		task->tk_action = rpc_prepare_task;
+	task->tk_rpc_status = 0;
+	task->tk_action = action;
 	return 1;
 }
-EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
 
 /*
  * Restart an (async) RPC call. Usually called from within the
@@ -1478,14 +1570,23 @@
 int
 rpc_restart_call(struct rpc_task *task)
 {
-	if (RPC_ASSASSINATED(task))
-		return 0;
-	task->tk_action = call_start;
-	task->tk_status = 0;
-	return 1;
+	return __rpc_restart_call(task, call_start);
 }
 EXPORT_SYMBOL_GPL(rpc_restart_call);
 
+/*
+ * Restart an (async) RPC call from the call_prepare state.
+ * Usually called from within the exit handler.
+ */
+int
+rpc_restart_call_prepare(struct rpc_task *task)
+{
+	if (task->tk_ops->rpc_call_prepare != NULL)
+		return __rpc_restart_call(task, rpc_prepare_task);
+	return rpc_restart_call(task);
+}
+EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
+
 const char
 *rpc_proc_name(const struct rpc_task *task)
 {
@@ -1500,6 +1601,19 @@
 		return "no proc";
 }
 
+static void
+__rpc_call_rpcerror(struct rpc_task *task, int tk_status, int rpc_status)
+{
+	task->tk_rpc_status = rpc_status;
+	rpc_exit(task, tk_status);
+}
+
+static void
+rpc_call_rpcerror(struct rpc_task *task, int status)
+{
+	__rpc_call_rpcerror(task, status, status);
+}
+
 /*
  * 0.  Initial state
  *
@@ -1558,14 +1672,13 @@
 	task->tk_status = 0;
 	if (status >= 0) {
 		if (task->tk_rqstp) {
-			xprt_request_init(task);
 			task->tk_action = call_refresh;
 			return;
 		}
 
 		printk(KERN_ERR "%s: status=%d, but no request slot, exiting\n",
 				__func__, status);
-		rpc_exit(task, -EIO);
+		rpc_call_rpcerror(task, -EIO);
 		return;
 	}
 
@@ -1593,7 +1706,7 @@
 				__func__, status);
 		break;
 	}
-	rpc_exit(task, status);
+	rpc_call_rpcerror(task, status);
 }
 
 /*
@@ -1661,7 +1774,7 @@
 	}
 	dprintk("RPC: %5u %s: refresh creds failed with error %d\n",
 				task->tk_pid, __func__, status);
-	rpc_exit(task, status);
+	rpc_call_rpcerror(task, status);
 }
 
 /*
@@ -1671,7 +1784,7 @@
 static void
 call_allocate(struct rpc_task *task)
 {
-	unsigned int slack = task->tk_rqstp->rq_cred->cr_auth->au_cslack;
+	const struct rpc_auth *auth = task->tk_rqstp->rq_cred->cr_auth;
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_xprt *xprt = req->rq_xprt;
 	const struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
@@ -1680,7 +1793,7 @@
 	dprint_status(task);
 
 	task->tk_status = 0;
-	task->tk_action = call_bind;
+	task->tk_action = call_encode;
 
 	if (req->rq_buffer)
 		return;
@@ -1696,9 +1809,15 @@
 	 * and reply headers, and convert both values
 	 * to byte sizes.
 	 */
-	req->rq_callsize = RPC_CALLHDRSIZE + (slack << 1) + proc->p_arglen;
+	req->rq_callsize = RPC_CALLHDRSIZE + (auth->au_cslack << 1) +
+			   proc->p_arglen;
 	req->rq_callsize <<= 2;
-	req->rq_rcvsize = RPC_REPHDRSIZE + slack + proc->p_replen;
+	/*
+	 * Note: the reply buffer must at minimum allocate enough space
+	 * for the 'struct accepted_reply' from RFC5531.
+	 */
+	req->rq_rcvsize = RPC_REPHDRSIZE + auth->au_rslack + \
+			max_t(size_t, proc->p_replen, 2);
 	req->rq_rcvsize <<= 2;
 
 	status = xprt->ops->buf_alloc(task);
@@ -1706,7 +1825,7 @@
 	if (status == 0)
 		return;
 	if (status != -ENOMEM) {
-		rpc_exit(task, status);
+		rpc_call_rpcerror(task, status);
 		return;
 	}
 
@@ -1718,33 +1837,23 @@
 		return;
 	}
 
-	rpc_exit(task, -ERESTARTSYS);
+	rpc_call_rpcerror(task, -ERESTARTSYS);
 }
 
-static inline int
+static int
 rpc_task_need_encode(struct rpc_task *task)
 {
-	return task->tk_rqstp->rq_snd_buf.len == 0;
+	return test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) == 0 &&
+		(!(task->tk_flags & RPC_TASK_SENT) ||
+		 !(task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) ||
+		 xprt_request_need_retransmit(task));
 }
 
-static inline void
-rpc_task_force_reencode(struct rpc_task *task)
-{
-	task->tk_rqstp->rq_snd_buf.len = 0;
-	task->tk_rqstp->rq_bytes_sent = 0;
-}
-
-/*
- * 3.	Encode arguments of an RPC call
- */
 static void
 rpc_xdr_encode(struct rpc_task *task)
 {
 	struct rpc_rqst	*req = task->tk_rqstp;
-	kxdreproc_t	encode;
-	__be32		*p;
-
-	dprint_status(task);
+	struct xdr_stream xdr;
 
 	xdr_buf_init(&req->rq_snd_buf,
 		     req->rq_buffer,
@@ -1753,19 +1862,82 @@
 		     req->rq_rbuffer,
 		     req->rq_rcvsize);
 
-	p = rpc_encode_header(task);
-	if (p == NULL) {
-		printk(KERN_INFO "RPC: couldn't encode RPC header, exit EIO\n");
-		rpc_exit(task, -EIO);
+	req->rq_reply_bytes_recvd = 0;
+	req->rq_snd_buf.head[0].iov_len = 0;
+	xdr_init_encode(&xdr, &req->rq_snd_buf,
+			req->rq_snd_buf.head[0].iov_base, req);
+	xdr_free_bvec(&req->rq_snd_buf);
+	if (rpc_encode_header(task, &xdr))
+		return;
+
+	task->tk_status = rpcauth_wrap_req(task, &xdr);
+}
+
+/*
+ * 3.	Encode arguments of an RPC call
+ */
+static void
+call_encode(struct rpc_task *task)
+{
+	if (!rpc_task_need_encode(task))
+		goto out;
+	dprint_status(task);
+	/* Dequeue task from the receive queue while we're encoding */
+	xprt_request_dequeue_xprt(task);
+	/* Encode here so that rpcsec_gss can use correct sequence number. */
+	rpc_xdr_encode(task);
+	/* Did the encode result in an error condition? */
+	if (task->tk_status != 0) {
+		/* Was the error nonfatal? */
+		switch (task->tk_status) {
+		case -EAGAIN:
+		case -ENOMEM:
+			rpc_delay(task, HZ >> 4);
+			break;
+		case -EKEYEXPIRED:
+			if (!task->tk_cred_retry) {
+				rpc_exit(task, task->tk_status);
+			} else {
+				task->tk_action = call_refresh;
+				task->tk_cred_retry--;
+				dprintk("RPC: %5u %s: retry refresh creds\n",
+					task->tk_pid, __func__);
+			}
+			break;
+		default:
+			rpc_call_rpcerror(task, task->tk_status);
+		}
 		return;
 	}
 
-	encode = task->tk_msg.rpc_proc->p_encode;
-	if (encode == NULL)
-		return;
+	/* Add task to reply queue before transmission to avoid races */
+	if (rpc_reply_expected(task))
+		xprt_request_enqueue_receive(task);
+	xprt_request_enqueue_transmit(task);
+out:
+	task->tk_action = call_transmit;
+	/* Check that the connection is OK */
+	if (!xprt_bound(task->tk_xprt))
+		task->tk_action = call_bind;
+	else if (!xprt_connected(task->tk_xprt))
+		task->tk_action = call_connect;
+}
 
-	task->tk_status = rpcauth_wrap_req(task, encode, req, p,
-			task->tk_msg.rpc_argp);
+/*
+ * Helpers to check if the task was already transmitted, and
+ * to take action when that is the case.
+ */
+static bool
+rpc_task_transmitted(struct rpc_task *task)
+{
+	return !test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
+}
+
+static void
+rpc_task_handle_transmitted(struct rpc_task *task)
+{
+	xprt_end_transmit(task);
+	task->tk_action = call_transmit_status;
 }
 
 /*
@@ -1776,14 +1948,23 @@
 {
 	struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
 
+	if (rpc_task_transmitted(task)) {
+		rpc_task_handle_transmitted(task);
+		return;
+	}
+
+	if (xprt_bound(xprt)) {
+		task->tk_action = call_connect;
+		return;
+	}
+
 	dprint_status(task);
 
-	task->tk_action = call_connect;
-	if (!xprt_bound(xprt)) {
-		task->tk_action = call_bind_status;
-		task->tk_timeout = xprt->bind_timeout;
-		xprt->ops->rpcbind(task);
-	}
+	task->tk_action = call_bind_status;
+	if (!xprt_prepare_transmit(task))
+		return;
+
+	xprt->ops->rpcbind(task);
 }
 
 /*
@@ -1792,16 +1973,23 @@
 static void
 call_bind_status(struct rpc_task *task)
 {
+	struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
 	int status = -EIO;
 
-	if (task->tk_status >= 0) {
-		dprint_status(task);
-		task->tk_status = 0;
-		task->tk_action = call_connect;
+	if (rpc_task_transmitted(task)) {
+		rpc_task_handle_transmitted(task);
 		return;
 	}
 
+	dprint_status(task);
 	trace_rpc_bind_status(task);
+	if (task->tk_status >= 0)
+		goto out_next;
+	if (xprt_bound(xprt)) {
+		task->tk_status = 0;
+		goto out_next;
+	}
+
 	switch (task->tk_status) {
 	case -ENOMEM:
 		dprintk("RPC: %5u rpcbind out of memory\n", task->tk_pid);
@@ -1820,6 +2008,11 @@
 		task->tk_rebind_retry--;
 		rpc_delay(task, 3*HZ);
 		goto retry_timeout;
+	case -ENOBUFS:
+		rpc_delay(task, HZ >> 2);
+		goto retry_timeout;
+	case -EAGAIN:
+		goto retry_timeout;
 	case -ETIMEDOUT:
 		dprintk("RPC: %5u rpcbind request timed out\n",
 				task->tk_pid);
@@ -1841,7 +2034,6 @@
 	case -ENETDOWN:
 	case -EHOSTUNREACH:
 	case -ENETUNREACH:
-	case -ENOBUFS:
 	case -EPIPE:
 		dprintk("RPC: %5u remote rpcbind unreachable: %d\n",
 				task->tk_pid, task->tk_status);
@@ -1856,12 +2048,15 @@
 				task->tk_pid, -task->tk_status);
 	}
 
-	rpc_exit(task, status);
+	rpc_call_rpcerror(task, status);
 	return;
-
+out_next:
+	task->tk_action = call_connect;
+	return;
 retry_timeout:
 	task->tk_status = 0;
-	task->tk_action = call_timeout;
+	task->tk_action = call_bind;
+	rpc_check_timeout(task);
 }
 
 /*
@@ -1872,21 +2067,30 @@
 {
 	struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
 
+	if (rpc_task_transmitted(task)) {
+		rpc_task_handle_transmitted(task);
+		return;
+	}
+
+	if (xprt_connected(xprt)) {
+		task->tk_action = call_transmit;
+		return;
+	}
+
 	dprintk("RPC: %5u call_connect xprt %p %s connected\n",
 			task->tk_pid, xprt,
 			(xprt_connected(xprt) ? "is" : "is not"));
 
-	task->tk_action = call_transmit;
-	if (!xprt_connected(xprt)) {
-		task->tk_action = call_connect_status;
-		if (task->tk_status < 0)
-			return;
-		if (task->tk_flags & RPC_TASK_NOCONNECT) {
-			rpc_exit(task, -ENOTCONN);
-			return;
-		}
-		xprt_connect(task);
+	task->tk_action = call_connect_status;
+	if (task->tk_status < 0)
+		return;
+	if (task->tk_flags & RPC_TASK_NOCONNECT) {
+		rpc_call_rpcerror(task, -ENOTCONN);
+		return;
 	}
+	if (!xprt_prepare_transmit(task))
+		return;
+	xprt_connect(task);
 }
 
 /*
@@ -1895,12 +2099,27 @@
 static void
 call_connect_status(struct rpc_task *task)
 {
+	struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
 	struct rpc_clnt *clnt = task->tk_client;
 	int status = task->tk_status;
 
-	dprint_status(task);
+	if (rpc_task_transmitted(task)) {
+		rpc_task_handle_transmitted(task);
+		return;
+	}
 
+	dprint_status(task);
 	trace_rpc_connect_status(task);
+
+	if (task->tk_status == 0) {
+		clnt->cl_stats->netreconn++;
+		goto out_next;
+	}
+	if (xprt_connected(xprt)) {
+		task->tk_status = 0;
+		goto out_next;
+	}
+
 	task->tk_status = 0;
 	switch (status) {
 	case -ECONNREFUSED:
@@ -1909,8 +2128,7 @@
 			break;
 		if (clnt->cl_autobind) {
 			rpc_force_rebind(clnt);
-			task->tk_action = call_bind;
-			return;
+			goto out_retry;
 		}
 		/* fall through */
 	case -ECONNRESET:
@@ -1918,8 +2136,6 @@
 	case -ENETDOWN:
 	case -ENETUNREACH:
 	case -EHOSTUNREACH:
-	case -EADDRINUSE:
-	case -ENOBUFS:
 	case -EPIPE:
 		xprt_conditional_disconnect(task->tk_rqstp->rq_xprt,
 					    task->tk_rqstp->rq_connect_cookie);
@@ -1928,17 +2144,24 @@
 		/* retry with existing socket, after a delay */
 		rpc_delay(task, 3*HZ);
 		/* fall through */
+	case -EADDRINUSE:
+	case -ENOTCONN:
 	case -EAGAIN:
-		/* Check for timeouts before looping back to call_bind */
 	case -ETIMEDOUT:
-		task->tk_action = call_timeout;
-		return;
-	case 0:
-		clnt->cl_stats->netreconn++;
-		task->tk_action = call_transmit;
-		return;
+		goto out_retry;
+	case -ENOBUFS:
+		rpc_delay(task, HZ >> 2);
+		goto out_retry;
 	}
-	rpc_exit(task, status);
+	rpc_call_rpcerror(task, status);
+	return;
+out_next:
+	task->tk_action = call_transmit;
+	return;
+out_retry:
+	/* Check for timeouts before looping back to call_bind */
+	task->tk_action = call_bind;
+	rpc_check_timeout(task);
 }
 
 /*
@@ -1947,43 +2170,25 @@
 static void
 call_transmit(struct rpc_task *task)
 {
-	int is_retrans = RPC_WAS_SENT(task);
+	if (rpc_task_transmitted(task)) {
+		rpc_task_handle_transmitted(task);
+		return;
+	}
 
 	dprint_status(task);
 
-	task->tk_action = call_status;
-	if (task->tk_status < 0)
-		return;
+	task->tk_action = call_transmit_status;
 	if (!xprt_prepare_transmit(task))
 		return;
-	task->tk_action = call_transmit_status;
-	/* Encode here so that rpcsec_gss can use correct sequence number. */
-	if (rpc_task_need_encode(task)) {
-		rpc_xdr_encode(task);
-		/* Did the encode result in an error condition? */
-		if (task->tk_status != 0) {
-			/* Was the error nonfatal? */
-			if (task->tk_status == -EAGAIN)
-				rpc_delay(task, HZ >> 4);
-			else
-				rpc_exit(task, task->tk_status);
+	task->tk_status = 0;
+	if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
+		if (!xprt_connected(task->tk_xprt)) {
+			task->tk_status = -ENOTCONN;
 			return;
 		}
+		xprt_transmit(task);
 	}
-	xprt_transmit(task);
-	if (task->tk_status < 0)
-		return;
-	if (is_retrans)
-		task->tk_client->cl_stats->rpcretrans++;
-	/*
-	 * On success, ensure that we call xprt_end_transmit() before sleeping
-	 * in order to allow access to the socket to other RPC requests.
-	 */
-	call_transmit_status(task);
-	if (rpc_reply_expected(task))
-		return;
-	task->tk_action = rpc_exit_task;
-	rpc_wake_up_queued_task(&task->tk_rqstp->rq_xprt->pending, task);
+	xprt_end_transmit(task);
 }
 
 /*
@@ -1998,20 +2203,19 @@
 	 * Common case: success.  Force the compiler to put this
 	 * test first.
 	 */
-	if (task->tk_status == 0) {
-		xprt_end_transmit(task);
-		rpc_task_force_reencode(task);
+	if (rpc_task_transmitted(task)) {
+		task->tk_status = 0;
+		xprt_request_wait_receive(task);
 		return;
 	}
 
 	switch (task->tk_status) {
-	case -EAGAIN:
-	case -ENOBUFS:
-		break;
 	default:
 		dprint_status(task);
-		xprt_end_transmit(task);
-		rpc_task_force_reencode(task);
+		break;
+	case -EBADMSG:
+		task->tk_status = 0;
+		task->tk_action = call_encode;
 		break;
 		/*
 		 * Special cases: if we've been waiting on the
@@ -2019,6 +2223,14 @@
 		 * socket just returned a connection error,
 		 * then hold onto the transport lock.
 		 */
+	case -ENOBUFS:
+		rpc_delay(task, HZ>>2);
+		/* fall through */
+	case -EBADSLT:
+	case -EAGAIN:
+		task->tk_action = call_transmit;
+		task->tk_status = 0;
+		break;
 	case -ECONNREFUSED:
 	case -EHOSTDOWN:
 	case -ENETDOWN:
@@ -2026,12 +2238,11 @@
 	case -ENETUNREACH:
 	case -EPERM:
 		if (RPC_IS_SOFTCONN(task)) {
-			xprt_end_transmit(task);
 			if (!task->tk_msg.rpc_proc->p_proc)
 				trace_xprt_ping(task->tk_xprt,
 						task->tk_status);
-			rpc_exit(task, task->tk_status);
-			break;
+			rpc_call_rpcerror(task, task->tk_status);
+			return;
 		}
 		/* fall through */
 	case -ECONNRESET:
@@ -2039,11 +2250,24 @@
 	case -EADDRINUSE:
 	case -ENOTCONN:
 	case -EPIPE:
-		rpc_task_force_reencode(task);
+		task->tk_action = call_bind;
+		task->tk_status = 0;
+		break;
 	}
+	rpc_check_timeout(task);
 }
 
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
+static void call_bc_transmit(struct rpc_task *task);
+static void call_bc_transmit_status(struct rpc_task *task);
+
+static void
+call_bc_encode(struct rpc_task *task)
+{
+	xprt_request_enqueue_transmit(task);
+	task->tk_action = call_bc_transmit;
+}
+
 /*
  * 5b.	Send the backchannel RPC reply.  On error, drop the reply.  In
  * addition, disconnect on connectivity errors.
@@ -2051,26 +2275,26 @@
 static void
 call_bc_transmit(struct rpc_task *task)
 {
+	task->tk_action = call_bc_transmit_status;
+	if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
+		if (!xprt_prepare_transmit(task))
+			return;
+		task->tk_status = 0;
+		xprt_transmit(task);
+	}
+	xprt_end_transmit(task);
+}
+
+static void
+call_bc_transmit_status(struct rpc_task *task)
+{
 	struct rpc_rqst *req = task->tk_rqstp;
 
-	if (!xprt_prepare_transmit(task))
-		goto out_retry;
+	if (rpc_task_transmitted(task))
+		task->tk_status = 0;
 
-	if (task->tk_status < 0) {
-		printk(KERN_NOTICE "RPC: Could not send backchannel reply "
-			"error: %d\n", task->tk_status);
-		goto out_done;
-	}
-	if (req->rq_connect_cookie != req->rq_xprt->connect_cookie)
-		req->rq_bytes_sent = 0;
-
-	xprt_transmit(task);
-
-	if (task->tk_status == -EAGAIN)
-		goto out_nospace;
-
-	xprt_end_transmit(task);
 	dprint_status(task);
+
 	switch (task->tk_status) {
 	case 0:
 		/* Success */
@@ -2084,6 +2308,14 @@
 	case -ENOTCONN:
 	case -EPIPE:
 		break;
+	case -ENOBUFS:
+		rpc_delay(task, HZ>>2);
+		/* fall through */
+	case -EBADSLT:
+	case -EAGAIN:
+		task->tk_status = 0;
+		task->tk_action = call_bc_transmit;
+		return;
 	case -ETIMEDOUT:
 		/*
 		 * Problem reaching the server.  Disconnect and let the
@@ -2102,19 +2334,11 @@
 		 * We were unable to reply and will have to drop the
 		 * request.  The server should reconnect and retransmit.
 		 */
-		WARN_ON_ONCE(task->tk_status == -EAGAIN);
 		printk(KERN_NOTICE "RPC: Could not send backchannel reply "
 			"error: %d\n", task->tk_status);
 		break;
 	}
-	rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
-out_done:
 	task->tk_action = rpc_exit_task;
-	return;
-out_nospace:
-	req->rq_connect_cookie = req->rq_xprt->connect_cookie;
-out_retry:
-	task->tk_status = 0;
 }
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
@@ -2125,15 +2349,11 @@
 call_status(struct rpc_task *task)
 {
 	struct rpc_clnt	*clnt = task->tk_client;
-	struct rpc_rqst	*req = task->tk_rqstp;
 	int		status;
 
 	if (!task->tk_msg.rpc_proc->p_proc)
 		trace_xprt_ping(task->tk_xprt, task->tk_status);
 
-	if (req->rq_reply_bytes_recvd > 0 && !req->rq_bytes_sent)
-		task->tk_status = req->rq_reply_bytes_recvd;
-
 	dprint_status(task);
 
 	status = task->tk_status;
@@ -2150,10 +2370,8 @@
 	case -EHOSTUNREACH:
 	case -ENETUNREACH:
 	case -EPERM:
-		if (RPC_IS_SOFTCONN(task)) {
-			rpc_exit(task, status);
-			break;
-		}
+		if (RPC_IS_SOFTCONN(task))
+			goto out_exit;
 		/*
 		 * Delay any retries for 3 seconds, then handle as if it
 		 * were a timeout.
@@ -2161,79 +2379,90 @@
 		rpc_delay(task, 3*HZ);
 		/* fall through */
 	case -ETIMEDOUT:
-		task->tk_action = call_timeout;
 		break;
 	case -ECONNREFUSED:
 	case -ECONNRESET:
 	case -ECONNABORTED:
+	case -ENOTCONN:
 		rpc_force_rebind(clnt);
-		/* fall through */
+		break;
 	case -EADDRINUSE:
 		rpc_delay(task, 3*HZ);
 		/* fall through */
 	case -EPIPE:
-	case -ENOTCONN:
-		task->tk_action = call_bind;
-		break;
-	case -ENOBUFS:
-		rpc_delay(task, HZ>>2);
-		/* fall through */
 	case -EAGAIN:
-		task->tk_action = call_transmit;
 		break;
 	case -EIO:
 		/* shutdown or soft timeout */
-		rpc_exit(task, status);
-		break;
+		goto out_exit;
 	default:
 		if (clnt->cl_chatty)
 			printk("%s: RPC call returned error %d\n",
 			       clnt->cl_program->name, -status);
-		rpc_exit(task, status);
+		goto out_exit;
 	}
+	task->tk_action = call_encode;
+	rpc_check_timeout(task);
+	return;
+out_exit:
+	rpc_call_rpcerror(task, status);
 }
 
-/*
- * 6a.	Handle RPC timeout
- * 	We do not release the request slot, so we keep using the
- *	same XID for all retransmits.
- */
+static bool
+rpc_check_connected(const struct rpc_rqst *req)
+{
+	/* No allocated request or transport? return true */
+	if (!req || !req->rq_xprt)
+		return true;
+	return xprt_connected(req->rq_xprt);
+}
+
 static void
-call_timeout(struct rpc_task *task)
+rpc_check_timeout(struct rpc_task *task)
 {
 	struct rpc_clnt	*clnt = task->tk_client;
 
-	if (xprt_adjust_timeout(task->tk_rqstp) == 0) {
-		dprintk("RPC: %5u call_timeout (minor)\n", task->tk_pid);
-		goto retry;
-	}
+	if (xprt_adjust_timeout(task->tk_rqstp) == 0)
+		return;
 
 	dprintk("RPC: %5u call_timeout (major)\n", task->tk_pid);
 	task->tk_timeouts++;
 
-	if (RPC_IS_SOFTCONN(task)) {
-		rpc_exit(task, -ETIMEDOUT);
+	if (RPC_IS_SOFTCONN(task) && !rpc_check_connected(task->tk_rqstp)) {
+		rpc_call_rpcerror(task, -ETIMEDOUT);
 		return;
 	}
+
 	if (RPC_IS_SOFT(task)) {
+		/*
+		 * Once a "no retrans timeout" soft tasks (a.k.a NFSv4) has
+		 * been sent, it should time out only if the transport
+		 * connection gets terminally broken.
+		 */
+		if ((task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) &&
+		    rpc_check_connected(task->tk_rqstp))
+			return;
+
 		if (clnt->cl_chatty) {
-			printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
+			pr_notice_ratelimited(
+				"%s: server %s not responding, timed out\n",
 				clnt->cl_program->name,
 				task->tk_xprt->servername);
 		}
 		if (task->tk_flags & RPC_TASK_TIMEOUT)
-			rpc_exit(task, -ETIMEDOUT);
+			rpc_call_rpcerror(task, -ETIMEDOUT);
 		else
-			rpc_exit(task, -EIO);
+			__rpc_call_rpcerror(task, -EIO, -ETIMEDOUT);
 		return;
 	}
 
 	if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) {
 		task->tk_flags |= RPC_CALL_MAJORSEEN;
 		if (clnt->cl_chatty) {
-			printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
-			clnt->cl_program->name,
-			task->tk_xprt->servername);
+			pr_notice_ratelimited(
+				"%s: server %s not responding, still trying\n",
+				clnt->cl_program->name,
+				task->tk_xprt->servername);
 		}
 	}
 	rpc_force_rebind(clnt);
@@ -2242,10 +2471,6 @@
 	 * event? RFC2203 requires the server to drop all such requests.
 	 */
 	rpcauth_invalcred(task);
-
-retry:
-	task->tk_action = call_bind;
-	task->tk_status = 0;
 }
 
 /*
@@ -2256,14 +2481,19 @@
 {
 	struct rpc_clnt	*clnt = task->tk_client;
 	struct rpc_rqst	*req = task->tk_rqstp;
-	kxdrdproc_t	decode = task->tk_msg.rpc_proc->p_decode;
-	__be32		*p;
+	struct xdr_stream xdr;
+	int err;
 
 	dprint_status(task);
 
+	if (!task->tk_msg.rpc_proc->p_decode) {
+		task->tk_action = rpc_exit_task;
+		return;
+	}
+
 	if (task->tk_flags & RPC_CALL_MAJORSEEN) {
 		if (clnt->cl_chatty) {
-			printk(KERN_NOTICE "%s: server %s OK\n",
+			pr_notice_ratelimited("%s: server %s OK\n",
 				clnt->cl_program->name,
 				task->tk_xprt->servername);
 		}
@@ -2275,229 +2505,200 @@
 	 * before it changed req->rq_reply_bytes_recvd.
 	 */
 	smp_rmb();
+
+	/*
+	 * Did we ever call xprt_complete_rqst()? If not, we should assume
+	 * the message is incomplete.
+	 */
+	err = -EAGAIN;
+	if (!req->rq_reply_bytes_recvd)
+		goto out;
+
 	req->rq_rcv_buf.len = req->rq_private_buf.len;
 
 	/* Check that the softirq receive buffer is valid */
 	WARN_ON(memcmp(&req->rq_rcv_buf, &req->rq_private_buf,
 				sizeof(req->rq_rcv_buf)) != 0);
 
-	if (req->rq_rcv_buf.len < 12) {
-		if (!RPC_IS_SOFT(task)) {
-			task->tk_action = call_bind;
-			goto out_retry;
-		}
-		dprintk("RPC:       %s: too small RPC reply size (%d bytes)\n",
-				clnt->cl_program->name, task->tk_status);
-		task->tk_action = call_timeout;
-		goto out_retry;
-	}
-
-	p = rpc_verify_header(task);
-	if (IS_ERR(p)) {
-		if (p == ERR_PTR(-EAGAIN))
-			goto out_retry;
+	xdr_init_decode(&xdr, &req->rq_rcv_buf,
+			req->rq_rcv_buf.head[0].iov_base, req);
+	err = rpc_decode_header(task, &xdr);
+out:
+	switch (err) {
+	case 0:
+		task->tk_action = rpc_exit_task;
+		task->tk_status = rpcauth_unwrap_resp(task, &xdr);
+		dprintk("RPC: %5u %s result %d\n",
+			task->tk_pid, __func__, task->tk_status);
 		return;
-	}
-
-	task->tk_action = rpc_exit_task;
-
-	if (decode) {
-		task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
-						      task->tk_msg.rpc_resp);
-	}
-	dprintk("RPC: %5u call_decode result %d\n", task->tk_pid,
-			task->tk_status);
-	return;
-out_retry:
-	task->tk_status = 0;
-	/* Note: rpc_verify_header() may have freed the RPC slot */
-	if (task->tk_rqstp == req) {
-		req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0;
+	case -EAGAIN:
+		task->tk_status = 0;
 		if (task->tk_client->cl_discrtry)
 			xprt_conditional_disconnect(req->rq_xprt,
-					req->rq_connect_cookie);
+						    req->rq_connect_cookie);
+		task->tk_action = call_encode;
+		rpc_check_timeout(task);
+		break;
+	case -EKEYREJECTED:
+		task->tk_action = call_reserve;
+		rpc_check_timeout(task);
+		rpcauth_invalcred(task);
+		/* Ensure we obtain a new XID if we retry! */
+		xprt_release(task);
 	}
 }
 
-static __be32 *
-rpc_encode_header(struct rpc_task *task)
+static int
+rpc_encode_header(struct rpc_task *task, struct xdr_stream *xdr)
 {
 	struct rpc_clnt *clnt = task->tk_client;
 	struct rpc_rqst	*req = task->tk_rqstp;
-	__be32		*p = req->rq_svec[0].iov_base;
+	__be32 *p;
+	int error;
 
-	/* FIXME: check buffer size? */
+	error = -EMSGSIZE;
+	p = xdr_reserve_space(xdr, RPC_CALLHDRSIZE << 2);
+	if (!p)
+		goto out_fail;
+	*p++ = req->rq_xid;
+	*p++ = rpc_call;
+	*p++ = cpu_to_be32(RPC_VERSION);
+	*p++ = cpu_to_be32(clnt->cl_prog);
+	*p++ = cpu_to_be32(clnt->cl_vers);
+	*p   = cpu_to_be32(task->tk_msg.rpc_proc->p_proc);
 
-	p = xprt_skip_transport_header(req->rq_xprt, p);
-	*p++ = req->rq_xid;		/* XID */
-	*p++ = htonl(RPC_CALL);		/* CALL */
-	*p++ = htonl(RPC_VERSION);	/* RPC version */
-	*p++ = htonl(clnt->cl_prog);	/* program number */
-	*p++ = htonl(clnt->cl_vers);	/* program version */
-	*p++ = htonl(task->tk_msg.rpc_proc->p_proc);	/* procedure */
-	p = rpcauth_marshcred(task, p);
-	req->rq_slen = xdr_adjust_iovec(&req->rq_svec[0], p);
-	return p;
+	error = rpcauth_marshcred(task, xdr);
+	if (error < 0)
+		goto out_fail;
+	return 0;
+out_fail:
+	trace_rpc_bad_callhdr(task);
+	rpc_call_rpcerror(task, error);
+	return error;
 }
 
-static __be32 *
-rpc_verify_header(struct rpc_task *task)
+static noinline int
+rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr)
 {
 	struct rpc_clnt *clnt = task->tk_client;
-	struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0];
-	int len = task->tk_rqstp->rq_rcv_buf.len >> 2;
-	__be32	*p = iov->iov_base;
-	u32 n;
-	int error = -EACCES;
+	int error;
+	__be32 *p;
 
-	if ((task->tk_rqstp->rq_rcv_buf.len & 3) != 0) {
-		/* RFC-1014 says that the representation of XDR data must be a
-		 * multiple of four bytes
-		 * - if it isn't pointer subtraction in the NFS client may give
-		 *   undefined results
-		 */
-		dprintk("RPC: %5u %s: XDR representation not a multiple of"
-		       " 4 bytes: 0x%x\n", task->tk_pid, __func__,
-		       task->tk_rqstp->rq_rcv_buf.len);
-		error = -EIO;
-		goto out_err;
-	}
-	if ((len -= 3) < 0)
-		goto out_overflow;
+	/* RFC-1014 says that the representation of XDR data must be a
+	 * multiple of four bytes
+	 * - if it isn't pointer subtraction in the NFS client may give
+	 *   undefined results
+	 */
+	if (task->tk_rqstp->rq_rcv_buf.len & 3)
+		goto out_unparsable;
 
-	p += 1; /* skip XID */
-	if ((n = ntohl(*p++)) != RPC_REPLY) {
-		dprintk("RPC: %5u %s: not an RPC reply: %x\n",
-			task->tk_pid, __func__, n);
-		error = -EIO;
-		goto out_garbage;
-	}
+	p = xdr_inline_decode(xdr, 3 * sizeof(*p));
+	if (!p)
+		goto out_unparsable;
+	p++;	/* skip XID */
+	if (*p++ != rpc_reply)
+		goto out_unparsable;
+	if (*p++ != rpc_msg_accepted)
+		goto out_msg_denied;
 
-	if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) {
-		if (--len < 0)
-			goto out_overflow;
-		switch ((n = ntohl(*p++))) {
-		case RPC_AUTH_ERROR:
-			break;
-		case RPC_MISMATCH:
-			dprintk("RPC: %5u %s: RPC call version mismatch!\n",
-				task->tk_pid, __func__);
-			error = -EPROTONOSUPPORT;
-			goto out_err;
-		default:
-			dprintk("RPC: %5u %s: RPC call rejected, "
-				"unknown error: %x\n",
-				task->tk_pid, __func__, n);
-			error = -EIO;
-			goto out_err;
-		}
-		if (--len < 0)
-			goto out_overflow;
-		switch ((n = ntohl(*p++))) {
-		case RPC_AUTH_REJECTEDCRED:
-		case RPC_AUTH_REJECTEDVERF:
-		case RPCSEC_GSS_CREDPROBLEM:
-		case RPCSEC_GSS_CTXPROBLEM:
-			if (!task->tk_cred_retry)
-				break;
-			task->tk_cred_retry--;
-			dprintk("RPC: %5u %s: retry stale creds\n",
-					task->tk_pid, __func__);
-			rpcauth_invalcred(task);
-			/* Ensure we obtain a new XID! */
-			xprt_release(task);
-			task->tk_action = call_reserve;
-			goto out_retry;
-		case RPC_AUTH_BADCRED:
-		case RPC_AUTH_BADVERF:
-			/* possibly garbled cred/verf? */
-			if (!task->tk_garb_retry)
-				break;
-			task->tk_garb_retry--;
-			dprintk("RPC: %5u %s: retry garbled creds\n",
-					task->tk_pid, __func__);
-			task->tk_action = call_bind;
-			goto out_retry;
-		case RPC_AUTH_TOOWEAK:
-			printk(KERN_NOTICE "RPC: server %s requires stronger "
-			       "authentication.\n",
-			       task->tk_xprt->servername);
-			break;
-		default:
-			dprintk("RPC: %5u %s: unknown auth error: %x\n",
-					task->tk_pid, __func__, n);
-			error = -EIO;
-		}
-		dprintk("RPC: %5u %s: call rejected %d\n",
-				task->tk_pid, __func__, n);
-		goto out_err;
-	}
-	p = rpcauth_checkverf(task, p);
-	if (IS_ERR(p)) {
-		error = PTR_ERR(p);
-		dprintk("RPC: %5u %s: auth check failed with %d\n",
-				task->tk_pid, __func__, error);
-		goto out_garbage;		/* bad verifier, retry */
-	}
-	len = p - (__be32 *)iov->iov_base - 1;
-	if (len < 0)
-		goto out_overflow;
-	switch ((n = ntohl(*p++))) {
-	case RPC_SUCCESS:
-		return p;
-	case RPC_PROG_UNAVAIL:
-		dprintk("RPC: %5u %s: program %u is unsupported "
-				"by server %s\n", task->tk_pid, __func__,
-				(unsigned int)clnt->cl_prog,
-				task->tk_xprt->servername);
+	error = rpcauth_checkverf(task, xdr);
+	if (error)
+		goto out_verifier;
+
+	p = xdr_inline_decode(xdr, sizeof(*p));
+	if (!p)
+		goto out_unparsable;
+	switch (*p) {
+	case rpc_success:
+		return 0;
+	case rpc_prog_unavail:
+		trace_rpc__prog_unavail(task);
 		error = -EPFNOSUPPORT;
 		goto out_err;
-	case RPC_PROG_MISMATCH:
-		dprintk("RPC: %5u %s: program %u, version %u unsupported "
-				"by server %s\n", task->tk_pid, __func__,
-				(unsigned int)clnt->cl_prog,
-				(unsigned int)clnt->cl_vers,
-				task->tk_xprt->servername);
+	case rpc_prog_mismatch:
+		trace_rpc__prog_mismatch(task);
 		error = -EPROTONOSUPPORT;
 		goto out_err;
-	case RPC_PROC_UNAVAIL:
-		dprintk("RPC: %5u %s: proc %s unsupported by program %u, "
-				"version %u on server %s\n",
-				task->tk_pid, __func__,
-				rpc_proc_name(task),
-				clnt->cl_prog, clnt->cl_vers,
-				task->tk_xprt->servername);
+	case rpc_proc_unavail:
+		trace_rpc__proc_unavail(task);
 		error = -EOPNOTSUPP;
 		goto out_err;
-	case RPC_GARBAGE_ARGS:
-		dprintk("RPC: %5u %s: server saw garbage\n",
-				task->tk_pid, __func__);
-		break;			/* retry */
+	case rpc_garbage_args:
+	case rpc_system_err:
+		trace_rpc__garbage_args(task);
+		error = -EIO;
+		break;
 	default:
-		dprintk("RPC: %5u %s: server accept status: %x\n",
-				task->tk_pid, __func__, n);
-		/* Also retry */
+		goto out_unparsable;
 	}
 
 out_garbage:
 	clnt->cl_stats->rpcgarbage++;
 	if (task->tk_garb_retry) {
 		task->tk_garb_retry--;
-		dprintk("RPC: %5u %s: retrying\n",
-				task->tk_pid, __func__);
-		task->tk_action = call_bind;
-out_retry:
-		return ERR_PTR(-EAGAIN);
+		task->tk_action = call_encode;
+		return -EAGAIN;
 	}
 out_err:
-	rpc_exit(task, error);
-	dprintk("RPC: %5u %s: call failed with error %d\n", task->tk_pid,
-			__func__, error);
-	return ERR_PTR(error);
-out_overflow:
-	dprintk("RPC: %5u %s: server reply was truncated.\n", task->tk_pid,
-			__func__);
+	rpc_call_rpcerror(task, error);
+	return error;
+
+out_unparsable:
+	trace_rpc__unparsable(task);
+	error = -EIO;
 	goto out_garbage;
+
+out_verifier:
+	trace_rpc_bad_verifier(task);
+	goto out_garbage;
+
+out_msg_denied:
+	error = -EACCES;
+	p = xdr_inline_decode(xdr, sizeof(*p));
+	if (!p)
+		goto out_unparsable;
+	switch (*p++) {
+	case rpc_auth_error:
+		break;
+	case rpc_mismatch:
+		trace_rpc__mismatch(task);
+		error = -EPROTONOSUPPORT;
+		goto out_err;
+	default:
+		goto out_unparsable;
+	}
+
+	p = xdr_inline_decode(xdr, sizeof(*p));
+	if (!p)
+		goto out_unparsable;
+	switch (*p++) {
+	case rpc_autherr_rejectedcred:
+	case rpc_autherr_rejectedverf:
+	case rpcsec_gsserr_credproblem:
+	case rpcsec_gsserr_ctxproblem:
+		if (!task->tk_cred_retry)
+			break;
+		task->tk_cred_retry--;
+		trace_rpc__stale_creds(task);
+		return -EKEYREJECTED;
+	case rpc_autherr_badcred:
+	case rpc_autherr_badverf:
+		/* possibly garbled cred/verf? */
+		if (!task->tk_garb_retry)
+			break;
+		task->tk_garb_retry--;
+		trace_rpc__bad_creds(task);
+		task->tk_action = call_encode;
+		return -EAGAIN;
+	case rpc_autherr_tooweak:
+		trace_rpc__auth_tooweak(task);
+		pr_warn("RPC: server %s requires stronger authentication.\n",
+			task->tk_xprt->servername);
+		break;
+	default:
+		goto out_unparsable;
+	}
+	goto out_err;
 }
 
 static void rpcproc_encode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
@@ -2522,9 +2723,8 @@
 		.rpc_proc = &rpcproc_null,
 	};
 	int err;
-	msg.rpc_cred = authnull_ops.lookup_cred(NULL, NULL, 0);
-	err = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN);
-	put_rpccred(msg.rpc_cred);
+	err = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN |
+			    RPC_TASK_NULLCREDS);
 	return err;
 }
 
@@ -2535,15 +2735,15 @@
 {
 	struct rpc_message msg = {
 		.rpc_proc = &rpcproc_null,
-		.rpc_cred = cred,
 	};
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = clnt,
 		.rpc_xprt = xprt,
 		.rpc_message = &msg,
+		.rpc_op_cred = cred,
 		.callback_ops = (ops != NULL) ? ops : &rpc_default_ops,
 		.callback_data = data,
-		.flags = flags,
+		.flags = flags | RPC_TASK_NULLCREDS,
 	};
 
 	return rpc_run_task(&task_setup_data);
@@ -2594,7 +2794,6 @@
 		void *dummy)
 {
 	struct rpc_cb_add_xprt_calldata *data;
-	struct rpc_cred *cred;
 	struct rpc_task *task;
 
 	data = kmalloc(sizeof(*data), GFP_NOFS);
@@ -2602,15 +2801,18 @@
 		return -ENOMEM;
 	data->xps = xprt_switch_get(xps);
 	data->xprt = xprt_get(xprt);
+	if (rpc_xprt_switch_has_addr(data->xps, (struct sockaddr *)&xprt->addr)) {
+		rpc_cb_add_xprt_release(data);
+		goto success;
+	}
 
-	cred = authnull_ops.lookup_cred(NULL, NULL, 0);
-	task = rpc_call_null_helper(clnt, xprt, cred,
-			RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC,
+	task = rpc_call_null_helper(clnt, xprt, NULL,
+			RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC|RPC_TASK_NULLCREDS,
 			&rpc_cb_add_xprt_call_ops, data);
-	put_rpccred(cred);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
 	rpc_put_task(task);
+success:
 	return 1;
 }
 EXPORT_SYMBOL_GPL(rpc_clnt_test_and_add_xprt);
@@ -2638,7 +2840,6 @@
 				     struct rpc_xprt *xprt,
 				     void *data)
 {
-	struct rpc_cred *cred;
 	struct rpc_task *task;
 	struct rpc_add_xprt_test *xtest = (struct rpc_add_xprt_test *)data;
 	int status = -EADDRINUSE;
@@ -2650,11 +2851,9 @@
 		goto out_err;
 
 	/* Test the connection */
-	cred = authnull_ops.lookup_cred(NULL, NULL, 0);
-	task = rpc_call_null_helper(clnt, xprt, cred,
-				    RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
+	task = rpc_call_null_helper(clnt, xprt, NULL,
+				    RPC_TASK_SOFT | RPC_TASK_SOFTCONN | RPC_TASK_NULLCREDS,
 				    NULL, NULL);
-	put_rpccred(cred);
 	if (IS_ERR(task)) {
 		status = PTR_ERR(task);
 		goto out_err;
@@ -2668,6 +2867,9 @@
 	/* rpc_xprt_switch and rpc_xprt are deferrenced by add_xprt_test() */
 	xtest->add_xprt_test(clnt, xprt, xtest->data);
 
+	xprt_put(xprt);
+	xprt_switch_put(xps);
+
 	/* so that rpc_clnt_add_xprt does not call rpc_xprt_switch_add_xprt */
 	return 1;
 out_err:
@@ -2712,6 +2914,7 @@
 	xprt = xprt_iter_xprt(&clnt->cl_xpi);
 	if (xps == NULL || xprt == NULL) {
 		rcu_read_unlock();
+		xprt_switch_put(xps);
 		return -EAGAIN;
 	}
 	resvport = xprt->resvport;
@@ -2827,7 +3030,7 @@
 
 	printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%ps q:%s\n",
 		task->tk_pid, task->tk_flags, task->tk_status,
-		clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops,
+		clnt, task->tk_rqstp, rpc_task_timeout(task), task->tk_ops,
 		clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task),
 		task->tk_action, rpc_waitq);
 }

diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index 45a0333..fd9bca2 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c

@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/**
+/*
  * debugfs interface for sunrpc
  *
  * (c) 2014 Jeff Layton <jlayton@primarydata.com>
@@ -11,7 +11,6 @@
 #include "netns.h"
 
 static struct dentry *topdir;
-static struct dentry *rpc_fault_dir;
 static struct dentry *rpc_clnt_dir;
 static struct dentry *rpc_xprt_dir;
 
@@ -33,7 +32,7 @@
 
 	seq_printf(f, "%5u %04x %6d 0x%x 0x%x %8ld %ps %sv%u %s a:%ps q:%s\n",
 		task->tk_pid, task->tk_flags, task->tk_status,
-		clnt->cl_clid, xid, task->tk_timeout, task->tk_ops,
+		clnt->cl_clid, xid, rpc_task_timeout(task), task->tk_ops,
 		clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task),
 		task->tk_action, rpc_waitq);
 	return 0;
@@ -118,16 +117,37 @@
 	.release	= tasks_release,
 };
 
+static int do_xprt_debugfs(struct rpc_clnt *clnt, struct rpc_xprt *xprt, void *numv)
+{
+	int len;
+	char name[24]; /* enough for "../../rpc_xprt/ + 8 hex digits + NULL */
+	char link[9]; /* enough for 8 hex digits + NULL */
+	int *nump = numv;
+
+	if (IS_ERR_OR_NULL(xprt->debugfs))
+		return 0;
+	len = snprintf(name, sizeof(name), "../../rpc_xprt/%s",
+		       xprt->debugfs->d_name.name);
+	if (len > sizeof(name))
+		return -1;
+	if (*nump == 0)
+		strcpy(link, "xprt");
+	else {
+		len = snprintf(link, sizeof(link), "xprt%d", *nump);
+		if (len > sizeof(link))
+			return -1;
+	}
+	debugfs_create_symlink(link, clnt->cl_debugfs, name);
+	(*nump)++;
+	return 0;
+}
+
 void
 rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
 {
 	int len;
-	char name[24]; /* enough for "../../rpc_xprt/ + 8 hex digits + NULL */
-	struct rpc_xprt *xprt;
-
-	/* Already registered? */
-	if (clnt->cl_debugfs || !rpc_clnt_dir)
-		return;
+	char name[9]; /* enough for 8 hex digits + NULL */
+	int xprtnum = 0;
 
 	len = snprintf(name, sizeof(name), "%x", clnt->cl_clid);
 	if (len >= sizeof(name))
@@ -135,35 +155,12 @@
 
 	/* make the per-client dir */
 	clnt->cl_debugfs = debugfs_create_dir(name, rpc_clnt_dir);
-	if (!clnt->cl_debugfs)
-		return;
 
 	/* make tasks file */
-	if (!debugfs_create_file("tasks", S_IFREG | 0400, clnt->cl_debugfs,
-				 clnt, &tasks_fops))
-		goto out_err;
+	debugfs_create_file("tasks", S_IFREG | 0400, clnt->cl_debugfs, clnt,
+			    &tasks_fops);
 
-	rcu_read_lock();
-	xprt = rcu_dereference(clnt->cl_xprt);
-	/* no "debugfs" dentry? Don't bother with the symlink. */
-	if (!xprt->debugfs) {
-		rcu_read_unlock();
-		return;
-	}
-	len = snprintf(name, sizeof(name), "../../rpc_xprt/%s",
-			xprt->debugfs->d_name.name);
-	rcu_read_unlock();
-
-	if (len >= sizeof(name))
-		goto out_err;
-
-	if (!debugfs_create_symlink("xprt", clnt->cl_debugfs, name))
-		goto out_err;
-
-	return;
-out_err:
-	debugfs_remove_recursive(clnt->cl_debugfs);
-	clnt->cl_debugfs = NULL;
+	rpc_clnt_iterate_for_each_xprt(clnt, do_xprt_debugfs, &xprtnum);
 }
 
 void
@@ -226,9 +223,6 @@
 	static atomic_t	cur_id;
 	char		name[9]; /* 8 hex digits + NULL term */
 
-	if (!rpc_xprt_dir)
-		return;
-
 	id = (unsigned int)atomic_inc_return(&cur_id);
 
 	len = snprintf(name, sizeof(name), "%x", id);
@@ -237,15 +231,10 @@
 
 	/* make the per-client dir */
 	xprt->debugfs = debugfs_create_dir(name, rpc_xprt_dir);
-	if (!xprt->debugfs)
-		return;
 
 	/* make tasks file */
-	if (!debugfs_create_file("info", S_IFREG | 0400, xprt->debugfs,
-				 xprt, &xprt_info_fops)) {
-		debugfs_remove_recursive(xprt->debugfs);
-		xprt->debugfs = NULL;
-	}
+	debugfs_create_file("info", S_IFREG | 0400, xprt->debugfs, xprt,
+			    &xprt_info_fops);
 
 	atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect);
 }
@@ -308,28 +297,11 @@
 	.release	= fault_release,
 };
 
-static struct dentry *
-inject_fault_dir(struct dentry *topdir)
-{
-	struct dentry *faultdir;
-
-	faultdir = debugfs_create_dir("inject_fault", topdir);
-	if (!faultdir)
-		return NULL;
-
-	if (!debugfs_create_file("disconnect", S_IFREG | 0400, faultdir,
-				 NULL, &fault_disconnect_fops))
-		return NULL;
-
-	return faultdir;
-}
-
 void __exit
 sunrpc_debugfs_exit(void)
 {
 	debugfs_remove_recursive(topdir);
 	topdir = NULL;
-	rpc_fault_dir = NULL;
 	rpc_clnt_dir = NULL;
 	rpc_xprt_dir = NULL;
 }
@@ -337,26 +309,16 @@
 void __init
 sunrpc_debugfs_init(void)
 {
-	topdir = debugfs_create_dir("sunrpc", NULL);
-	if (!topdir)
-		return;
+	struct dentry *rpc_fault_dir;
 
-	rpc_fault_dir = inject_fault_dir(topdir);
-	if (!rpc_fault_dir)
-		goto out_remove;
+	topdir = debugfs_create_dir("sunrpc", NULL);
 
 	rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir);
-	if (!rpc_clnt_dir)
-		goto out_remove;
 
 	rpc_xprt_dir = debugfs_create_dir("rpc_xprt", topdir);
-	if (!rpc_xprt_dir)
-		goto out_remove;
 
-	return;
-out_remove:
-	debugfs_remove_recursive(topdir);
-	topdir = NULL;
-	rpc_fault_dir = NULL;
-	rpc_clnt_dir = NULL;
+	rpc_fault_dir = debugfs_create_dir("inject_fault", topdir);
+
+	debugfs_create_file("disconnect", S_IFREG | 0400, rpc_fault_dir, NULL,
+			    &fault_disconnect_fops);
 }

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 4fda18d..b71a39d 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * net/sunrpc/rpc_pipe.c
  *
@@ -13,6 +14,7 @@
 #include <linux/string.h>
 #include <linux/pagemap.h>
 #include <linux/mount.h>
+#include <linux/fs_context.h>
 #include <linux/namei.h>
 #include <linux/fsnotify.h>
 #include <linux/kernel.h>
@@ -202,18 +204,11 @@
 }
 
 static void
-rpc_i_callback(struct rcu_head *head)
+rpc_free_inode(struct inode *inode)
 {
-	struct inode *inode = container_of(head, struct inode, i_rcu);
 	kmem_cache_free(rpc_inode_cachep, RPC_I(inode));
 }
 
-static void
-rpc_destroy_inode(struct inode *inode)
-{
-	call_rcu(&inode->i_rcu, rpc_i_callback);
-}
-
 static int
 rpc_pipe_open(struct inode *inode, struct file *filp)
 {
@@ -604,6 +599,8 @@
 
 	dget(dentry);
 	ret = simple_rmdir(dir, dentry);
+	if (!ret)
+		fsnotify_rmdir(dir, dentry);
 	d_delete(dentry);
 	dput(dentry);
 	return ret;
@@ -615,6 +612,8 @@
 
 	dget(dentry);
 	ret = simple_unlink(dir, dentry);
+	if (!ret)
+		fsnotify_unlink(dir, dentry);
 	d_delete(dentry);
 	dput(dentry);
 	return ret;
@@ -1123,7 +1122,7 @@
  */
 static const struct super_operations s_ops = {
 	.alloc_inode	= rpc_alloc_inode,
-	.destroy_inode	= rpc_destroy_inode,
+	.free_inode	= rpc_free_inode,
 	.statfs		= simple_statfs,
 };
 
@@ -1266,7 +1265,7 @@
  * that this file will be there and have a certain format.
  */
 static int
-rpc_show_dummy_info(struct seq_file *m, void *v)
+rpc_dummy_info_show(struct seq_file *m, void *v)
 {
 	seq_printf(m, "RPC server: %s\n", utsname()->nodename);
 	seq_printf(m, "service: foo (1) version 0\n");
@@ -1275,25 +1274,12 @@
 	seq_printf(m, "port: 0\n");
 	return 0;
 }
-
-static int
-rpc_dummy_info_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, rpc_show_dummy_info, NULL);
-}
-
-static const struct file_operations rpc_dummy_info_operations = {
-	.owner		= THIS_MODULE,
-	.open		= rpc_dummy_info_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(rpc_dummy_info);
 
 static const struct rpc_filelist gssd_dummy_info_file[] = {
 	[0] = {
 		.name = "info",
-		.i_fop = &rpc_dummy_info_operations,
+		.i_fop = &rpc_dummy_info_fops,
 		.mode = S_IFREG | 0400,
 	},
 };
@@ -1367,11 +1353,11 @@
 }
 
 static int
-rpc_fill_super(struct super_block *sb, void *data, int silent)
+rpc_fill_super(struct super_block *sb, struct fs_context *fc)
 {
 	struct inode *inode;
 	struct dentry *root, *gssd_dentry;
-	struct net *net = get_net(sb->s_fs_info);
+	struct net *net = sb->s_fs_info;
 	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 	int err;
 
@@ -1428,12 +1414,28 @@
 }
 EXPORT_SYMBOL_GPL(gssd_running);
 
-static struct dentry *
-rpc_mount(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *data)
+static int rpc_fs_get_tree(struct fs_context *fc)
 {
-	struct net *net = current->nsproxy->net_ns;
-	return mount_ns(fs_type, flags, data, net, net->user_ns, rpc_fill_super);
+	return get_tree_keyed(fc, rpc_fill_super, get_net(fc->net_ns));
+}
+
+static void rpc_fs_free_fc(struct fs_context *fc)
+{
+	if (fc->s_fs_info)
+		put_net(fc->s_fs_info);
+}
+
+static const struct fs_context_operations rpc_fs_context_ops = {
+	.free		= rpc_fs_free_fc,
+	.get_tree	= rpc_fs_get_tree,
+};
+
+static int rpc_init_fs_context(struct fs_context *fc)
+{
+	put_user_ns(fc->user_ns);
+	fc->user_ns = get_user_ns(fc->net_ns->user_ns);
+	fc->ops = &rpc_fs_context_ops;
+	return 0;
 }
 
 static void rpc_kill_sb(struct super_block *sb)
@@ -1461,7 +1463,7 @@
 static struct file_system_type rpc_pipe_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "rpc_pipefs",
-	.mount		= rpc_mount,
+	.init_fs_context = rpc_init_fs_context,
 	.kill_sb	= rpc_kill_sb,
 };
 MODULE_ALIAS_FS("rpc_pipefs");

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index c7872bc..4a020b6 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * In-kernel rpcbind client supporting versions 2, 3, and 4 of the rpcbind
  * protocol
@@ -240,6 +241,7 @@
 		.program	= &rpcb_program,
 		.version	= RPCBVERS_2,
 		.authflavor	= RPC_AUTH_NULL,
+		.cred		= current_cred(),
 		/*
 		 * We turn off the idle timeout to prevent the kernel
 		 * from automatically disconnecting the socket.
@@ -299,6 +301,7 @@
 		.program	= &rpcb_program,
 		.version	= RPCBVERS_2,
 		.authflavor	= RPC_AUTH_UNIX,
+		.cred		= current_cred(),
 		.flags		= RPC_CLNT_CREATE_NOPING,
 	};
 	struct rpc_clnt *clnt, *clnt4;
@@ -358,7 +361,8 @@
 static struct rpc_clnt *rpcb_create(struct net *net, const char *nodename,
 				    const char *hostname,
 				    struct sockaddr *srvaddr, size_t salen,
-				    int proto, u32 version)
+				    int proto, u32 version,
+				    const struct cred *cred)
 {
 	struct rpc_create_args args = {
 		.net		= net,
@@ -370,6 +374,7 @@
 		.program	= &rpcb_program,
 		.version	= version,
 		.authflavor	= RPC_AUTH_UNIX,
+		.cred		= cred,
 		.flags		= (RPC_CLNT_CREATE_NOPING |
 					RPC_CLNT_CREATE_NONPRIVPORT),
 	};
@@ -694,7 +699,8 @@
 
 	/* Put self on the wait queue to ensure we get notified if
 	 * some other task is already attempting to bind the port */
-	rpc_sleep_on(&xprt->binding, task, NULL);
+	rpc_sleep_on_timeout(&xprt->binding, task,
+			NULL, jiffies + xprt->bind_timeout);
 
 	if (xprt_test_and_set_binding(xprt)) {
 		dprintk("RPC: %5u %s: waiting for another binder\n",
@@ -744,7 +750,8 @@
 	rpcb_clnt = rpcb_create(xprt->xprt_net,
 				clnt->cl_nodename,
 				xprt->servername, sap, salen,
-				xprt->prot, bind_version);
+				xprt->prot, bind_version,
+				clnt->cl_cred);
 	if (IS_ERR(rpcb_clnt)) {
 		status = PTR_ERR(rpcb_clnt);
 		dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n",
@@ -752,7 +759,7 @@
 		goto bailout_nofree;
 	}
 
-	map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC);
+	map = kzalloc(sizeof(struct rpcbind_args), GFP_NOFS);
 	if (!map) {
 		status = -ENOMEM;
 		dprintk("RPC: %5u %s: no memory available\n",
@@ -770,7 +777,13 @@
 	case RPCBVERS_4:
 	case RPCBVERS_3:
 		map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID];
-		map->r_addr = rpc_sockaddr2uaddr(sap, GFP_ATOMIC);
+		map->r_addr = rpc_sockaddr2uaddr(sap, GFP_NOFS);
+		if (!map->r_addr) {
+			status = -ENOMEM;
+			dprintk("RPC: %5u %s: no memory available\n",
+				task->tk_pid, __func__);
+			goto bailout_free_args;
+		}
 		map->r_owner = "";
 		break;
 	case RPCBVERS_2:
@@ -793,6 +806,8 @@
 	rpc_put_task(child);
 	return;
 
+bailout_free_args:
+	kfree(map);
 bailout_release_client:
 	rpc_release_client(rpcb_clnt);
 bailout_nofree:

diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 3fe5d60..360afe1 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * linux/net/sunrpc/sched.c
  *
@@ -19,8 +20,10 @@
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
 #include <linux/freezer.h>
+#include <linux/sched/mm.h>
 
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/metrics.h>
 
 #include "sunrpc.h"
 
@@ -44,7 +47,7 @@
 
 static void			rpc_async_schedule(struct work_struct *);
 static void			 rpc_release_task(struct rpc_task *task);
-static void __rpc_queue_timer_fn(struct timer_list *t);
+static void __rpc_queue_timer_fn(struct work_struct *);
 
 /*
  * RPC tasks sit here while waiting for conditions to improve.
@@ -56,6 +59,21 @@
  */
 struct workqueue_struct *rpciod_workqueue __read_mostly;
 struct workqueue_struct *xprtiod_workqueue __read_mostly;
+EXPORT_SYMBOL_GPL(xprtiod_workqueue);
+
+unsigned long
+rpc_task_timeout(const struct rpc_task *task)
+{
+	unsigned long timeout = READ_ONCE(task->tk_timeout);
+
+	if (timeout != 0) {
+		unsigned long now = jiffies;
+		if (time_before(now, timeout))
+			return timeout - now;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rpc_task_timeout);
 
 /*
  * Disable the timer for a given RPC task. Should be called with
@@ -65,71 +83,101 @@
 static void
 __rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
 {
-	if (task->tk_timeout == 0)
+	if (list_empty(&task->u.tk_wait.timer_list))
 		return;
 	dprintk("RPC: %5u disabling timer\n", task->tk_pid);
 	task->tk_timeout = 0;
 	list_del(&task->u.tk_wait.timer_list);
 	if (list_empty(&queue->timer_list.list))
-		del_timer(&queue->timer_list.timer);
+		cancel_delayed_work(&queue->timer_list.dwork);
 }
 
 static void
 rpc_set_queue_timer(struct rpc_wait_queue *queue, unsigned long expires)
 {
+	unsigned long now = jiffies;
 	queue->timer_list.expires = expires;
-	mod_timer(&queue->timer_list.timer, expires);
+	if (time_before_eq(expires, now))
+		expires = 0;
+	else
+		expires -= now;
+	mod_delayed_work(rpciod_workqueue, &queue->timer_list.dwork, expires);
 }
 
 /*
  * Set up a timer for the current task.
  */
 static void
-__rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
+__rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task,
+		unsigned long timeout)
 {
-	if (!task->tk_timeout)
-		return;
-
 	dprintk("RPC: %5u setting alarm for %u ms\n",
-		task->tk_pid, jiffies_to_msecs(task->tk_timeout));
+		task->tk_pid, jiffies_to_msecs(timeout - jiffies));
 
-	task->u.tk_wait.expires = jiffies + task->tk_timeout;
-	if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires))
-		rpc_set_queue_timer(queue, task->u.tk_wait.expires);
+	task->tk_timeout = timeout;
+	if (list_empty(&queue->timer_list.list) || time_before(timeout, queue->timer_list.expires))
+		rpc_set_queue_timer(queue, timeout);
 	list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
 }
 
-static void rpc_rotate_queue_owner(struct rpc_wait_queue *queue)
-{
-	struct list_head *q = &queue->tasks[queue->priority];
-	struct rpc_task *task;
-
-	if (!list_empty(q)) {
-		task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
-		if (task->tk_owner == queue->owner)
-			list_move_tail(&task->u.tk_wait.list, q);
-	}
-}
-
 static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
 {
 	if (queue->priority != priority) {
-		/* Fairness: rotate the list when changing priority */
-		rpc_rotate_queue_owner(queue);
 		queue->priority = priority;
+		queue->nr = 1U << priority;
 	}
 }
 
-static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
-{
-	queue->owner = pid;
-	queue->nr = RPC_BATCH_COUNT;
-}
-
 static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
 {
 	rpc_set_waitqueue_priority(queue, queue->maxpriority);
-	rpc_set_waitqueue_owner(queue, 0);
+}
+
+/*
+ * Add a request to a queue list
+ */
+static void
+__rpc_list_enqueue_task(struct list_head *q, struct rpc_task *task)
+{
+	struct rpc_task *t;
+
+	list_for_each_entry(t, q, u.tk_wait.list) {
+		if (t->tk_owner == task->tk_owner) {
+			list_add_tail(&task->u.tk_wait.links,
+					&t->u.tk_wait.links);
+			/* Cache the queue head in task->u.tk_wait.list */
+			task->u.tk_wait.list.next = q;
+			task->u.tk_wait.list.prev = NULL;
+			return;
+		}
+	}
+	INIT_LIST_HEAD(&task->u.tk_wait.links);
+	list_add_tail(&task->u.tk_wait.list, q);
+}
+
+/*
+ * Remove request from a queue list
+ */
+static void
+__rpc_list_dequeue_task(struct rpc_task *task)
+{
+	struct list_head *q;
+	struct rpc_task *t;
+
+	if (task->u.tk_wait.list.prev == NULL) {
+		list_del(&task->u.tk_wait.links);
+		return;
+	}
+	if (!list_empty(&task->u.tk_wait.links)) {
+		t = list_first_entry(&task->u.tk_wait.links,
+				struct rpc_task,
+				u.tk_wait.links);
+		/* Assume __rpc_list_enqueue_task() cached the queue head */
+		q = t->u.tk_wait.list.next;
+		list_add_tail(&t->u.tk_wait.list, q);
+		list_del(&task->u.tk_wait.links);
+	}
+	list_del(&task->u.tk_wait.list);
 }
 
 /*
@@ -139,22 +187,9 @@
 		struct rpc_task *task,
 		unsigned char queue_priority)
 {
-	struct list_head *q;
-	struct rpc_task *t;
-
-	INIT_LIST_HEAD(&task->u.tk_wait.links);
 	if (unlikely(queue_priority > queue->maxpriority))
 		queue_priority = queue->maxpriority;
-	if (queue_priority > queue->priority)
-		rpc_set_waitqueue_priority(queue, queue_priority);
-	q = &queue->tasks[queue_priority];
-	list_for_each_entry(t, q, u.tk_wait.list) {
-		if (t->tk_owner == task->tk_owner) {
-			list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
-			return;
-		}
-	}
-	list_add_tail(&task->u.tk_wait.list, q);
+	__rpc_list_enqueue_task(&queue->tasks[queue_priority], task);
 }
 
 /*
@@ -173,6 +208,7 @@
 	if (RPC_IS_QUEUED(task))
 		return;
 
+	INIT_LIST_HEAD(&task->u.tk_wait.timer_list);
 	if (RPC_IS_PRIORITY(queue))
 		__rpc_add_wait_queue_priority(queue, task, queue_priority);
 	else if (RPC_IS_SWAPPER(task))
@@ -194,13 +230,7 @@
  */
 static void __rpc_remove_wait_queue_priority(struct rpc_task *task)
 {
-	struct rpc_task *t;
-
-	if (!list_empty(&task->u.tk_wait.links)) {
-		t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list);
-		list_move(&t->u.tk_wait.list, &task->u.tk_wait.list);
-		list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links);
-	}
+	__rpc_list_dequeue_task(task);
 }
 
 /*
@@ -212,7 +242,8 @@
 	__rpc_disable_timer(queue, task);
 	if (RPC_IS_PRIORITY(queue))
 		__rpc_remove_wait_queue_priority(task);
-	list_del(&task->u.tk_wait.list);
+	else
+		list_del(&task->u.tk_wait.list);
 	queue->qlen--;
 	dprintk("RPC: %5u removed from queue %p \"%s\"\n",
 			task->tk_pid, queue, rpc_qname(queue));
@@ -228,7 +259,8 @@
 	queue->maxpriority = nr_queues - 1;
 	rpc_reset_waitqueue_priority(queue);
 	queue->qlen = 0;
-	timer_setup(&queue->timer_list.timer, __rpc_queue_timer_fn, 0);
+	queue->timer_list.expires = 0;
+	INIT_DEFERRABLE_WORK(&queue->timer_list.dwork, __rpc_queue_timer_fn);
 	INIT_LIST_HEAD(&queue->timer_list.list);
 	rpc_assign_waitqueue_name(queue, qname);
 }
@@ -247,7 +279,7 @@
 
 void rpc_destroy_wait_queue(struct rpc_wait_queue *queue)
 {
-	del_timer_sync(&queue->timer_list.timer);
+	cancel_delayed_work_sync(&queue->timer_list.dwork);
 }
 EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue);
 
@@ -352,7 +384,6 @@
  */
 static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
 		struct rpc_task *task,
-		rpc_action action,
 		unsigned char queue_priority)
 {
 	dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
@@ -362,48 +393,101 @@
 
 	__rpc_add_wait_queue(q, task, queue_priority);
 
-	WARN_ON_ONCE(task->tk_callback != NULL);
-	task->tk_callback = action;
-	__rpc_add_timer(q, task);
 }
 
+static void __rpc_sleep_on_priority_timeout(struct rpc_wait_queue *q,
+		struct rpc_task *task, unsigned long timeout,
+		unsigned char queue_priority)
+{
+	if (time_is_after_jiffies(timeout)) {
+		__rpc_sleep_on_priority(q, task, queue_priority);
+		__rpc_add_timer(q, task, timeout);
+	} else
+		task->tk_status = -ETIMEDOUT;
+}
+
+static void rpc_set_tk_callback(struct rpc_task *task, rpc_action action)
+{
+	if (action && !WARN_ON_ONCE(task->tk_callback != NULL))
+		task->tk_callback = action;
+}
+
+static bool rpc_sleep_check_activated(struct rpc_task *task)
+{
+	/* We shouldn't ever put an inactive task to sleep */
+	if (WARN_ON_ONCE(!RPC_IS_ACTIVATED(task))) {
+		task->tk_status = -EIO;
+		rpc_put_task_async(task);
+		return false;
+	}
+	return true;
+}
+
+void rpc_sleep_on_timeout(struct rpc_wait_queue *q, struct rpc_task *task,
+				rpc_action action, unsigned long timeout)
+{
+	if (!rpc_sleep_check_activated(task))
+		return;
+
+	rpc_set_tk_callback(task, action);
+
+	/*
+	 * Protect the queue operations.
+	 */
+	spin_lock(&q->lock);
+	__rpc_sleep_on_priority_timeout(q, task, timeout, task->tk_priority);
+	spin_unlock(&q->lock);
+}
+EXPORT_SYMBOL_GPL(rpc_sleep_on_timeout);
+
 void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
 				rpc_action action)
 {
-	/* We shouldn't ever put an inactive task to sleep */
-	WARN_ON_ONCE(!RPC_IS_ACTIVATED(task));
-	if (!RPC_IS_ACTIVATED(task)) {
-		task->tk_status = -EIO;
-		rpc_put_task_async(task);
+	if (!rpc_sleep_check_activated(task))
 		return;
-	}
 
+	rpc_set_tk_callback(task, action);
+
+	WARN_ON_ONCE(task->tk_timeout != 0);
 	/*
 	 * Protect the queue operations.
 	 */
-	spin_lock_bh(&q->lock);
-	__rpc_sleep_on_priority(q, task, action, task->tk_priority);
-	spin_unlock_bh(&q->lock);
+	spin_lock(&q->lock);
+	__rpc_sleep_on_priority(q, task, task->tk_priority);
+	spin_unlock(&q->lock);
 }
 EXPORT_SYMBOL_GPL(rpc_sleep_on);
 
-void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task,
-		rpc_action action, int priority)
+void rpc_sleep_on_priority_timeout(struct rpc_wait_queue *q,
+		struct rpc_task *task, unsigned long timeout, int priority)
 {
-	/* We shouldn't ever put an inactive task to sleep */
-	WARN_ON_ONCE(!RPC_IS_ACTIVATED(task));
-	if (!RPC_IS_ACTIVATED(task)) {
-		task->tk_status = -EIO;
-		rpc_put_task_async(task);
+	if (!rpc_sleep_check_activated(task))
 		return;
-	}
 
+	priority -= RPC_PRIORITY_LOW;
 	/*
 	 * Protect the queue operations.
 	 */
-	spin_lock_bh(&q->lock);
-	__rpc_sleep_on_priority(q, task, action, priority - RPC_PRIORITY_LOW);
-	spin_unlock_bh(&q->lock);
+	spin_lock(&q->lock);
+	__rpc_sleep_on_priority_timeout(q, task, timeout, priority);
+	spin_unlock(&q->lock);
+}
+EXPORT_SYMBOL_GPL(rpc_sleep_on_priority_timeout);
+
+void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task,
+		int priority)
+{
+	if (!rpc_sleep_check_activated(task))
+		return;
+
+	WARN_ON_ONCE(task->tk_timeout != 0);
+	priority -= RPC_PRIORITY_LOW;
+	/*
+	 * Protect the queue operations.
+	 */
+	spin_lock(&q->lock);
+	__rpc_sleep_on_priority(q, task, priority);
+	spin_unlock(&q->lock);
 }
 EXPORT_SYMBOL_GPL(rpc_sleep_on_priority);
 
@@ -440,34 +524,31 @@
 /*
  * Wake up a queued task while the queue lock is being held
  */
-static void rpc_wake_up_task_on_wq_queue_locked(struct workqueue_struct *wq,
-		struct rpc_wait_queue *queue, struct rpc_task *task)
+static struct rpc_task *
+rpc_wake_up_task_on_wq_queue_action_locked(struct workqueue_struct *wq,
+		struct rpc_wait_queue *queue, struct rpc_task *task,
+		bool (*action)(struct rpc_task *, void *), void *data)
 {
 	if (RPC_IS_QUEUED(task)) {
 		smp_rmb();
-		if (task->tk_waitqueue == queue)
-			__rpc_do_wake_up_task_on_wq(wq, queue, task);
+		if (task->tk_waitqueue == queue) {
+			if (action == NULL || action(task, data)) {
+				__rpc_do_wake_up_task_on_wq(wq, queue, task);
+				return task;
+			}
+		}
 	}
+	return NULL;
 }
 
 /*
  * Wake up a queued task while the queue lock is being held
  */
-static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task)
+static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue,
+					  struct rpc_task *task)
 {
-	rpc_wake_up_task_on_wq_queue_locked(rpciod_workqueue, queue, task);
-}
-
-/*
- * Wake up a task on a specific queue
- */
-void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq,
-		struct rpc_wait_queue *queue,
-		struct rpc_task *task)
-{
-	spin_lock_bh(&queue->lock);
-	rpc_wake_up_task_on_wq_queue_locked(wq, queue, task);
-	spin_unlock_bh(&queue->lock);
+	rpc_wake_up_task_on_wq_queue_action_locked(rpciod_workqueue, queue,
+						   task, NULL, NULL);
 }
 
 /*
@@ -475,12 +556,48 @@
  */
 void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task)
 {
-	spin_lock_bh(&queue->lock);
+	if (!RPC_IS_QUEUED(task))
+		return;
+	spin_lock(&queue->lock);
 	rpc_wake_up_task_queue_locked(queue, task);
-	spin_unlock_bh(&queue->lock);
+	spin_unlock(&queue->lock);
 }
 EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task);
 
+static bool rpc_task_action_set_status(struct rpc_task *task, void *status)
+{
+	task->tk_status = *(int *)status;
+	return true;
+}
+
+static void
+rpc_wake_up_task_queue_set_status_locked(struct rpc_wait_queue *queue,
+		struct rpc_task *task, int status)
+{
+	rpc_wake_up_task_on_wq_queue_action_locked(rpciod_workqueue, queue,
+			task, rpc_task_action_set_status, &status);
+}
+
+/**
+ * rpc_wake_up_queued_task_set_status - wake up a task and set task->tk_status
+ * @queue: pointer to rpc_wait_queue
+ * @task: pointer to rpc_task
+ * @status: integer error value
+ *
+ * If @task is queued on @queue, then it is woken up, and @task->tk_status is
+ * set to the value of @status.
+ */
+void
+rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *queue,
+		struct rpc_task *task, int status)
+{
+	if (!RPC_IS_QUEUED(task))
+		return;
+	spin_lock(&queue->lock);
+	rpc_wake_up_task_queue_set_status_locked(queue, task, status);
+	spin_unlock(&queue->lock);
+}
+
 /*
  * Wake up the next task on a priority queue.
  */
@@ -493,17 +610,9 @@
 	 * Service a batch of tasks from a single owner.
 	 */
 	q = &queue->tasks[queue->priority];
-	if (!list_empty(q)) {
-		task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
-		if (queue->owner == task->tk_owner) {
-			if (--queue->nr)
-				goto out;
-			list_move_tail(&task->u.tk_wait.list, q);
-		}
-		/*
-		 * Check if we need to switch queues.
-		 */
-		goto new_owner;
+	if (!list_empty(q) && --queue->nr) {
+		task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
+		goto out;
 	}
 
 	/*
@@ -515,7 +624,7 @@
 		else
 			q = q - 1;
 		if (!list_empty(q)) {
-			task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
+			task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
 			goto new_queue;
 		}
 	} while (q != &queue->tasks[queue->priority]);
@@ -525,8 +634,6 @@
 
 new_queue:
 	rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0]));
-new_owner:
-	rpc_set_waitqueue_owner(queue, task->tk_owner);
 out:
 	return task;
 }
@@ -551,15 +658,12 @@
 
 	dprintk("RPC:       wake_up_first(%p \"%s\")\n",
 			queue, rpc_qname(queue));
-	spin_lock_bh(&queue->lock);
+	spin_lock(&queue->lock);
 	task = __rpc_find_next_queued(queue);
-	if (task != NULL) {
-		if (func(task, data))
-			rpc_wake_up_task_on_wq_queue_locked(wq, queue, task);
-		else
-			task = NULL;
-	}
-	spin_unlock_bh(&queue->lock);
+	if (task != NULL)
+		task = rpc_wake_up_task_on_wq_queue_action_locked(wq, queue,
+				task, func, data);
+	spin_unlock(&queue->lock);
 
 	return task;
 }
@@ -598,7 +702,7 @@
 {
 	struct list_head *head;
 
-	spin_lock_bh(&queue->lock);
+	spin_lock(&queue->lock);
 	head = &queue->tasks[queue->maxpriority];
 	for (;;) {
 		while (!list_empty(head)) {
@@ -612,7 +716,7 @@
 			break;
 		head--;
 	}
-	spin_unlock_bh(&queue->lock);
+	spin_unlock(&queue->lock);
 }
 EXPORT_SYMBOL_GPL(rpc_wake_up);
 
@@ -627,7 +731,7 @@
 {
 	struct list_head *head;
 
-	spin_lock_bh(&queue->lock);
+	spin_lock(&queue->lock);
 	head = &queue->tasks[queue->maxpriority];
 	for (;;) {
 		while (!list_empty(head)) {
@@ -642,20 +746,22 @@
 			break;
 		head--;
 	}
-	spin_unlock_bh(&queue->lock);
+	spin_unlock(&queue->lock);
 }
 EXPORT_SYMBOL_GPL(rpc_wake_up_status);
 
-static void __rpc_queue_timer_fn(struct timer_list *t)
+static void __rpc_queue_timer_fn(struct work_struct *work)
 {
-	struct rpc_wait_queue *queue = from_timer(queue, t, timer_list.timer);
+	struct rpc_wait_queue *queue = container_of(work,
+			struct rpc_wait_queue,
+			timer_list.dwork.work);
 	struct rpc_task *task, *n;
 	unsigned long expires, now, timeo;
 
 	spin_lock(&queue->lock);
 	expires = now = jiffies;
 	list_for_each_entry_safe(task, n, &queue->timer_list.list, u.tk_wait.timer_list) {
-		timeo = task->u.tk_wait.expires;
+		timeo = task->tk_timeout;
 		if (time_after_eq(now, timeo)) {
 			dprintk("RPC: %5u timeout\n", task->tk_pid);
 			task->tk_status = -ETIMEDOUT;
@@ -681,8 +787,7 @@
  */
 void rpc_delay(struct rpc_task *task, unsigned long delay)
 {
-	task->tk_timeout = delay;
-	rpc_sleep_on(&delay_queue, task, __rpc_atrun);
+	rpc_sleep_on_timeout(&delay_queue, task, __rpc_atrun, jiffies + delay);
 }
 EXPORT_SYMBOL_GPL(rpc_delay);
 
@@ -710,8 +815,7 @@
 rpc_reset_task_statistics(struct rpc_task *task)
 {
 	task->tk_timeouts = 0;
-	task->tk_flags &= ~(RPC_CALL_MAJORSEEN|RPC_TASK_KILLED|RPC_TASK_SENT);
-
+	task->tk_flags &= ~(RPC_CALL_MAJORSEEN|RPC_TASK_SENT);
 	rpc_init_task_statistics(task);
 }
 
@@ -721,10 +825,13 @@
 void rpc_exit_task(struct rpc_task *task)
 {
 	task->tk_action = NULL;
+	if (task->tk_ops->rpc_count_stats)
+		task->tk_ops->rpc_count_stats(task, task->tk_calldata);
+	else if (task->tk_client)
+		rpc_count_iostats(task, task->tk_client->cl_metrics);
 	if (task->tk_ops->rpc_call_done != NULL) {
 		task->tk_ops->rpc_call_done(task, task->tk_calldata);
 		if (task->tk_action != NULL) {
-			WARN_ON(RPC_ASSASSINATED(task));
 			/* Always release the RPC slot and buffer memory */
 			xprt_release(task);
 			rpc_reset_task_statistics(task);
@@ -732,12 +839,24 @@
 	}
 }
 
+void rpc_signal_task(struct rpc_task *task)
+{
+	struct rpc_wait_queue *queue;
+
+	if (!RPC_IS_ACTIVATED(task))
+		return;
+	set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
+	smp_mb__after_atomic();
+	queue = READ_ONCE(task->tk_waitqueue);
+	if (queue)
+		rpc_wake_up_queued_task_set_status(queue, task, -ERESTARTSYS);
+}
+
 void rpc_exit(struct rpc_task *task, int status)
 {
 	task->tk_status = status;
 	task->tk_action = rpc_exit_task;
-	if (RPC_IS_QUEUED(task))
-		rpc_wake_up_queued_task(task->tk_waitqueue, task);
+	rpc_wake_up_queued_task(task->tk_waitqueue, task);
 }
 EXPORT_SYMBOL_GPL(rpc_exit);
 
@@ -788,6 +907,15 @@
 		 */
 		if (!RPC_IS_QUEUED(task))
 			continue;
+
+		/*
+		 * Signalled tasks should exit rather than sleep.
+		 */
+		if (RPC_SIGNALLED(task)) {
+			task->tk_rpc_status = -ERESTARTSYS;
+			rpc_exit(task, -ERESTARTSYS);
+		}
+
 		/*
 		 * The queue->lock protects against races with
 		 * rpc_make_runnable().
@@ -798,13 +926,13 @@
 		 * rpc_task pointer may still be dereferenced.
 		 */
 		queue = task->tk_waitqueue;
-		spin_lock_bh(&queue->lock);
+		spin_lock(&queue->lock);
 		if (!RPC_IS_QUEUED(task)) {
-			spin_unlock_bh(&queue->lock);
+			spin_unlock(&queue->lock);
 			continue;
 		}
 		rpc_clear_running(task);
-		spin_unlock_bh(&queue->lock);
+		spin_unlock(&queue->lock);
 		if (task_is_async)
 			return;
 
@@ -813,7 +941,7 @@
 		status = out_of_line_wait_on_bit(&task->tk_runstate,
 				RPC_TASK_QUEUED, rpc_wait_bit_killable,
 				TASK_KILLABLE);
-		if (status == -ERESTARTSYS) {
+		if (status < 0) {
 			/*
 			 * When a sync task receives a signal, it exits with
 			 * -ERESTARTSYS. In order to catch any callbacks that
@@ -821,7 +949,8 @@
 			 * break the loop here, but go around once more.
 			 */
 			dprintk("RPC: %5u got signal\n", task->tk_pid);
-			task->tk_flags |= RPC_TASK_KILLED;
+			set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
+			task->tk_rpc_status = -ERESTARTSYS;
 			rpc_exit(task, -ERESTARTSYS);
 		}
 		dprintk("RPC: %5u sync task resuming\n", task->tk_pid);
@@ -854,7 +983,10 @@
 
 static void rpc_async_schedule(struct work_struct *work)
 {
+	unsigned int pflags = memalloc_nofs_save();
+
 	__rpc_execute(container_of(work, struct rpc_task, u.tk_work));
+	memalloc_nofs_restore(pflags);
 }
 
 /**
@@ -873,16 +1005,13 @@
  * Most requests are 'small' (under 2KiB) and can be serviced from a
  * mempool, ensuring that NFS reads and writes can always proceed,
  * and that there is good locality of reference for these buffers.
- *
- * In order to avoid memory starvation triggering more writebacks of
- * NFS requests, we avoid using GFP_KERNEL.
  */
 int rpc_malloc(struct rpc_task *task)
 {
 	struct rpc_rqst *rqst = task->tk_rqstp;
 	size_t size = rqst->rq_callsize + rqst->rq_rcvsize;
 	struct rpc_buffer *buf;
-	gfp_t gfp = GFP_NOIO | __GFP_NOWARN;
+	gfp_t gfp = GFP_NOFS;
 
 	if (RPC_IS_SWAPPER(task))
 		gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
@@ -947,7 +1076,10 @@
 	/* Initialize workqueue for async tasks */
 	task->tk_workqueue = task_setup_data->workqueue;
 
-	task->tk_xprt = xprt_get(task_setup_data->rpc_xprt);
+	task->tk_xprt = rpc_task_get_xprt(task_setup_data->rpc_client,
+			xprt_get(task_setup_data->rpc_xprt));
+
+	task->tk_op_cred = get_rpccred(task_setup_data->rpc_op_cred);
 
 	if (task->tk_ops->rpc_call_prepare != NULL)
 		task->tk_action = rpc_prepare_task;
@@ -961,7 +1093,7 @@
 static struct rpc_task *
 rpc_alloc_task(void)
 {
-	return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOIO);
+	return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
 }
 
 /*
@@ -1006,6 +1138,7 @@
 {
 	unsigned short tk_flags = task->tk_flags;
 
+	put_rpccred(task->tk_op_cred);
 	rpc_release_calldata(task->tk_ops, task->tk_calldata);
 
 	if (tk_flags & RPC_TASK_DYNAMIC) {
@@ -1016,14 +1149,17 @@
 
 static void rpc_async_release(struct work_struct *work)
 {
+	unsigned int pflags = memalloc_nofs_save();
+
 	rpc_free_task(container_of(work, struct rpc_task, u.tk_work));
+	memalloc_nofs_restore(pflags);
 }
 
 static void rpc_release_resources_task(struct rpc_task *task)
 {
 	xprt_release(task);
 	if (task->tk_msg.rpc_cred) {
-		put_rpccred(task->tk_msg.rpc_cred);
+		put_cred(task->tk_msg.rpc_cred);
 		task->tk_msg.rpc_cred = NULL;
 	}
 	rpc_task_release_client(task);

diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index f217c34..1a864f1 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * linux/net/sunrpc/socklib.c
  *
@@ -26,7 +27,8 @@
  * Possibly called several times to iterate over an sk_buff and copy
  * data out of it.
  */
-size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
+static size_t
+xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
 {
 	if (len > desc->count)
 		len = desc->count;
@@ -36,7 +38,6 @@
 	desc->offset += len;
 	return len;
 }
-EXPORT_SYMBOL_GPL(xdr_skb_read_bits);
 
 /**
  * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer
@@ -69,7 +70,8 @@
  * @copy_actor: virtual method for copying data
  *
  */
-ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor)
+static ssize_t
+xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor)
 {
 	struct page	**ppage = xdr->pages;
 	unsigned int	len, pglen = xdr->page_len;
@@ -104,8 +106,8 @@
 
 		/* ACL likes to be lazy in allocating pages - ACLs
 		 * are small by default but can get huge. */
-		if (unlikely(*ppage == NULL)) {
-			*ppage = alloc_page(GFP_ATOMIC);
+		if ((xdr->flags & XDRBUF_SPARSE_PAGES) && *ppage == NULL) {
+			*ppage = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
 			if (unlikely(*ppage == NULL)) {
 				if (copied == 0)
 					copied = -ENOMEM;
@@ -140,7 +142,6 @@
 out:
 	return copied;
 }
-EXPORT_SYMBOL_GPL(xdr_partial_copy_from_skb);
 
 /**
  * csum_partial_copy_to_xdr - checksum and copy data
@@ -175,7 +176,7 @@
 		return -1;
 	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
 	    !skb->csum_complete_sw)
-		netdev_rx_csum_fault(skb->dev);
+		netdev_rx_csum_fault(skb->dev, skb);
 	return 0;
 no_checksum:
 	if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0)

diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 71166b3..7c74197 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * linux/net/sunrpc/stats.c
  *
@@ -176,6 +177,8 @@
 
 	execute = ktime_sub(now, task->tk_start);
 	op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute);
+	if (task->tk_status < 0)
+		op_metrics->om_error_status++;
 
 	spin_unlock(&op_metrics->om_lock);
 
@@ -218,13 +221,14 @@
 	a->om_queue = ktime_add(a->om_queue, b->om_queue);
 	a->om_rtt = ktime_add(a->om_rtt, b->om_rtt);
 	a->om_execute = ktime_add(a->om_execute, b->om_execute);
+	a->om_error_status += b->om_error_status;
 }
 
 static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats,
 			       int op, const struct rpc_procinfo *procs)
 {
 	_print_name(seq, op, procs);
-	seq_printf(seq, "%lu %lu %lu %Lu %Lu %Lu %Lu %Lu\n",
+	seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu %lu\n",
 		   stats->om_ops,
 		   stats->om_ntrans,
 		   stats->om_timeouts,
@@ -232,12 +236,20 @@
 		   stats->om_bytes_recv,
 		   ktime_to_ms(stats->om_queue),
 		   ktime_to_ms(stats->om_rtt),
-		   ktime_to_ms(stats->om_execute));
+		   ktime_to_ms(stats->om_execute),
+		   stats->om_error_status);
+}
+
+static int do_print_stats(struct rpc_clnt *clnt, struct rpc_xprt *xprt, void *seqv)
+{
+	struct seq_file *seq = seqv;
+
+	xprt->ops->print_stats(xprt, seq);
+	return 0;
 }
 
 void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt)
 {
-	struct rpc_xprt *xprt;
 	unsigned int op, maxproc = clnt->cl_maxproc;
 
 	if (!clnt->cl_metrics)
@@ -247,11 +259,7 @@
 	seq_printf(seq, "p/v: %u/%u (%s)\n",
 			clnt->cl_prog, clnt->cl_vers, clnt->cl_program->name);
 
-	rcu_read_lock();
-	xprt = rcu_dereference(clnt->cl_xprt);
-	if (xprt)
-		xprt->ops->print_stats(xprt, seq);
-	rcu_read_unlock();
+	rpc_clnt_iterate_for_each_xprt(clnt, do_print_stats, seq);
 
 	seq_printf(seq, "\tper-op statistics\n");
 	for (op = 0; op < maxproc; op++) {

diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 56f9eff..f9edaa9 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * linux/net/sunrpc/sunrpc_syms.c
  *

diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index d13e05f..d11b705 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * linux/net/sunrpc/svc.c
  *
@@ -993,6 +994,58 @@
 	return error;
 }
 
+int svc_rpcbind_set_version(struct net *net,
+			    const struct svc_program *progp,
+			    u32 version, int family,
+			    unsigned short proto,
+			    unsigned short port)
+{
+	dprintk("svc: svc_register(%sv%d, %s, %u, %u)\n",
+		progp->pg_name, version,
+		proto == IPPROTO_UDP?  "udp" : "tcp",
+		port, family);
+
+	return __svc_register(net, progp->pg_name, progp->pg_prog,
+				version, family, proto, port);
+
+}
+EXPORT_SYMBOL_GPL(svc_rpcbind_set_version);
+
+int svc_generic_rpcbind_set(struct net *net,
+			    const struct svc_program *progp,
+			    u32 version, int family,
+			    unsigned short proto,
+			    unsigned short port)
+{
+	const struct svc_version *vers = progp->pg_vers[version];
+	int error;
+
+	if (vers == NULL)
+		return 0;
+
+	if (vers->vs_hidden) {
+		dprintk("svc: svc_register(%sv%d, %s, %u, %u)"
+			" (but not telling portmap)\n",
+			progp->pg_name, version,
+			proto == IPPROTO_UDP?  "udp" : "tcp",
+			port, family);
+		return 0;
+	}
+
+	/*
+	 * Don't register a UDP port if we need congestion
+	 * control.
+	 */
+	if (vers->vs_need_cong_ctrl && proto == IPPROTO_UDP)
+		return 0;
+
+	error = svc_rpcbind_set_version(net, progp, version,
+					family, proto, port);
+
+	return (vers->vs_rpcb_optnl) ? 0 : error;
+}
+EXPORT_SYMBOL_GPL(svc_generic_rpcbind_set);
+
 /**
  * svc_register - register an RPC service with the local portmapper
  * @serv: svc_serv struct for the service to register
@@ -1008,7 +1061,6 @@
 		 const unsigned short port)
 {
 	struct svc_program	*progp;
-	const struct svc_version *vers;
 	unsigned int		i;
 	int			error = 0;
 
@@ -1018,37 +1070,9 @@
 
 	for (progp = serv->sv_program; progp; progp = progp->pg_next) {
 		for (i = 0; i < progp->pg_nvers; i++) {
-			vers = progp->pg_vers[i];
-			if (vers == NULL)
-				continue;
 
-			dprintk("svc: svc_register(%sv%d, %s, %u, %u)%s\n",
-					progp->pg_name,
-					i,
-					proto == IPPROTO_UDP?  "udp" : "tcp",
-					port,
-					family,
-					vers->vs_hidden ?
-					" (but not telling portmap)" : "");
-
-			if (vers->vs_hidden)
-				continue;
-
-			/*
-			 * Don't register a UDP port if we need congestion
-			 * control.
-			 */
-			if (vers->vs_need_cong_ctrl && proto == IPPROTO_UDP)
-				continue;
-
-			error = __svc_register(net, progp->pg_name, progp->pg_prog,
-						i, family, proto, port);
-
-			if (vers->vs_rpcb_optnl) {
-				error = 0;
-				continue;
-			}
-
+			error = progp->pg_rpcbind_set(net, progp, i,
+					family, proto, port);
 			if (error < 0) {
 				printk(KERN_WARNING "svc: failed to register "
 					"%sv%u RPC service (errno %d).\n",
@@ -1144,6 +1168,114 @@
 static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {}
 #endif
 
+__be32
+svc_return_autherr(struct svc_rqst *rqstp, __be32 auth_err)
+{
+	set_bit(RQ_AUTHERR, &rqstp->rq_flags);
+	return auth_err;
+}
+EXPORT_SYMBOL_GPL(svc_return_autherr);
+
+static __be32
+svc_get_autherr(struct svc_rqst *rqstp, __be32 *statp)
+{
+	if (test_and_clear_bit(RQ_AUTHERR, &rqstp->rq_flags))
+		return *statp;
+	return rpc_auth_ok;
+}
+
+static int
+svc_generic_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+{
+	struct kvec *argv = &rqstp->rq_arg.head[0];
+	struct kvec *resv = &rqstp->rq_res.head[0];
+	const struct svc_procedure *procp = rqstp->rq_procinfo;
+
+	/*
+	 * Decode arguments
+	 * XXX: why do we ignore the return value?
+	 */
+	if (procp->pc_decode &&
+	    !procp->pc_decode(rqstp, argv->iov_base)) {
+		*statp = rpc_garbage_args;
+		return 1;
+	}
+
+	*statp = procp->pc_func(rqstp);
+
+	if (*statp == rpc_drop_reply ||
+	    test_bit(RQ_DROPME, &rqstp->rq_flags))
+		return 0;
+
+	if (test_bit(RQ_AUTHERR, &rqstp->rq_flags))
+		return 1;
+
+	if (*statp != rpc_success)
+		return 1;
+
+	/* Encode reply */
+	if (procp->pc_encode &&
+	    !procp->pc_encode(rqstp, resv->iov_base + resv->iov_len)) {
+		dprintk("svc: failed to encode reply\n");
+		/* serv->sv_stats->rpcsystemerr++; */
+		*statp = rpc_system_err;
+	}
+	return 1;
+}
+
+__be32
+svc_generic_init_request(struct svc_rqst *rqstp,
+		const struct svc_program *progp,
+		struct svc_process_info *ret)
+{
+	const struct svc_version *versp = NULL;	/* compiler food */
+	const struct svc_procedure *procp = NULL;
+
+	if (rqstp->rq_vers >= progp->pg_nvers )
+		goto err_bad_vers;
+	versp = progp->pg_vers[rqstp->rq_vers];
+	if (!versp)
+		goto err_bad_vers;
+
+	/*
+	 * Some protocol versions (namely NFSv4) require some form of
+	 * congestion control.  (See RFC 7530 section 3.1 paragraph 2)
+	 * In other words, UDP is not allowed. We mark those when setting
+	 * up the svc_xprt, and verify that here.
+	 *
+	 * The spec is not very clear about what error should be returned
+	 * when someone tries to access a server that is listening on UDP
+	 * for lower versions. RPC_PROG_MISMATCH seems to be the closest
+	 * fit.
+	 */
+	if (versp->vs_need_cong_ctrl && rqstp->rq_xprt &&
+	    !test_bit(XPT_CONG_CTRL, &rqstp->rq_xprt->xpt_flags))
+		goto err_bad_vers;
+
+	if (rqstp->rq_proc >= versp->vs_nproc)
+		goto err_bad_proc;
+	rqstp->rq_procinfo = procp = &versp->vs_proc[rqstp->rq_proc];
+	if (!procp)
+		goto err_bad_proc;
+
+	/* Initialize storage for argp and resp */
+	memset(rqstp->rq_argp, 0, procp->pc_argsize);
+	memset(rqstp->rq_resp, 0, procp->pc_ressize);
+
+	/* Bump per-procedure stats counter */
+	versp->vs_count[rqstp->rq_proc]++;
+
+	ret->dispatch = versp->vs_dispatch;
+	return rpc_success;
+err_bad_vers:
+	ret->mismatch.lovers = progp->pg_lovers;
+	ret->mismatch.hivers = progp->pg_hivers;
+	return rpc_prog_mismatch;
+err_bad_proc:
+	return rpc_proc_unavail;
+}
+EXPORT_SYMBOL_GPL(svc_generic_init_request);
+
 /*
  * Common routine for processing the RPC request.
  */
@@ -1151,11 +1283,11 @@
 svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 {
 	struct svc_program	*progp;
-	const struct svc_version *versp = NULL;	/* compiler food */
 	const struct svc_procedure *procp = NULL;
 	struct svc_serv		*serv = rqstp->rq_server;
+	struct svc_process_info process;
 	__be32			*statp;
-	u32			prog, vers, proc;
+	u32			prog, vers;
 	__be32			auth_stat, rpc_stat;
 	int			auth_res;
 	__be32			*reply_statp;
@@ -1171,9 +1303,6 @@
 	set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
 	clear_bit(RQ_DROPME, &rqstp->rq_flags);
 
-	/* Setup reply header */
-	rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
-
 	svc_putu32(resv, rqstp->rq_xid);
 
 	vers = svc_getnl(argv);
@@ -1190,8 +1319,8 @@
 	svc_putnl(resv, 0);		/* ACCEPT */
 
 	rqstp->rq_prog = prog = svc_getnl(argv);	/* program number */
-	rqstp->rq_vers = vers = svc_getnl(argv);	/* version number */
-	rqstp->rq_proc = proc = svc_getnl(argv);	/* procedure number */
+	rqstp->rq_vers = svc_getnl(argv);	/* version number */
+	rqstp->rq_proc = svc_getnl(argv);	/* procedure number */
 
 	for (progp = serv->sv_program; progp; progp = progp->pg_next)
 		if (prog == progp->pg_prog)
@@ -1229,29 +1358,22 @@
 	if (progp == NULL)
 		goto err_bad_prog;
 
-	if (vers >= progp->pg_nvers ||
-	  !(versp = progp->pg_vers[vers]))
+	rpc_stat = progp->pg_init_request(rqstp, progp, &process);
+	switch (rpc_stat) {
+	case rpc_success:
+		break;
+	case rpc_prog_unavail:
+		goto err_bad_prog;
+	case rpc_prog_mismatch:
 		goto err_bad_vers;
-
-	/*
-	 * Some protocol versions (namely NFSv4) require some form of
-	 * congestion control.  (See RFC 7530 section 3.1 paragraph 2)
-	 * In other words, UDP is not allowed. We mark those when setting
-	 * up the svc_xprt, and verify that here.
-	 *
-	 * The spec is not very clear about what error should be returned
-	 * when someone tries to access a server that is listening on UDP
-	 * for lower versions. RPC_PROG_MISMATCH seems to be the closest
-	 * fit.
-	 */
-	if (versp->vs_need_cong_ctrl &&
-	    !test_bit(XPT_CONG_CTRL, &rqstp->rq_xprt->xpt_flags))
-		goto err_bad_vers;
-
-	procp = versp->vs_proc + proc;
-	if (proc >= versp->vs_nproc || !procp->pc_func)
+	case rpc_proc_unavail:
 		goto err_bad_proc;
-	rqstp->rq_procinfo = procp;
+	}
+
+	procp = rqstp->rq_procinfo;
+	/* Should this check go into the dispatcher? */
+	if (!procp || !procp->pc_func)
+		goto err_bad_proc;
 
 	/* Syntactic check complete */
 	serv->sv_stats->rpccnt++;
@@ -1261,13 +1383,6 @@
 	statp = resv->iov_base +resv->iov_len;
 	svc_putnl(resv, RPC_SUCCESS);
 
-	/* Bump per-procedure stats counter */
-	versp->vs_count[proc]++;
-
-	/* Initialize storage for argp and resp */
-	memset(rqstp->rq_argp, 0, procp->pc_argsize);
-	memset(rqstp->rq_resp, 0, procp->pc_ressize);
-
 	/* un-reserve some of the out-queue now that we have a
 	 * better idea of reply size
 	 */
@@ -1275,43 +1390,18 @@
 		svc_reserve_auth(rqstp, procp->pc_xdrressize<<2);
 
 	/* Call the function that processes the request. */
-	if (!versp->vs_dispatch) {
-		/*
-		 * Decode arguments
-		 * XXX: why do we ignore the return value?
-		 */
-		if (procp->pc_decode &&
-		    !procp->pc_decode(rqstp, argv->iov_base))
+	if (!process.dispatch) {
+		if (!svc_generic_dispatch(rqstp, statp))
+			goto release_dropit;
+		if (*statp == rpc_garbage_args)
 			goto err_garbage;
-
-		*statp = procp->pc_func(rqstp);
-
-		/* Encode reply */
-		if (*statp == rpc_drop_reply ||
-		    test_bit(RQ_DROPME, &rqstp->rq_flags)) {
-			if (procp->pc_release)
-				procp->pc_release(rqstp);
-			goto dropit;
-		}
-		if (*statp == rpc_autherr_badcred) {
-			if (procp->pc_release)
-				procp->pc_release(rqstp);
-			goto err_bad_auth;
-		}
-		if (*statp == rpc_success && procp->pc_encode &&
-		    !procp->pc_encode(rqstp, resv->iov_base + resv->iov_len)) {
-			dprintk("svc: failed to encode reply\n");
-			/* serv->sv_stats->rpcsystemerr++; */
-			*statp = rpc_system_err;
-		}
+		auth_stat = svc_get_autherr(rqstp, statp);
+		if (auth_stat != rpc_auth_ok)
+			goto err_release_bad_auth;
 	} else {
 		dprintk("svc: calling dispatcher\n");
-		if (!versp->vs_dispatch(rqstp, statp)) {
-			/* Release reply info */
-			if (procp->pc_release)
-				procp->pc_release(rqstp);
-			goto dropit;
-		}
+		if (!process.dispatch(rqstp, statp))
+			goto release_dropit; /* Release reply info */
 	}
 
 	/* Check RPC status result */
@@ -1330,13 +1420,16 @@
 		goto close;
 	return 1;		/* Caller can now send it */
 
+release_dropit:
+	if (procp->pc_release)
+		procp->pc_release(rqstp);
  dropit:
 	svc_authorise(rqstp);	/* doesn't hurt to call this twice */
 	dprintk("svc: svc_process dropit\n");
 	return 0;
 
  close:
-	if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
+	if (rqstp->rq_xprt && test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
 		svc_close_xprt(rqstp->rq_xprt);
 	dprintk("svc: svc_process close\n");
 	return 0;
@@ -1354,6 +1447,9 @@
 	svc_putnl(resv, 2);
 	goto sendit;
 
+err_release_bad_auth:
+	if (procp->pc_release)
+		procp->pc_release(rqstp);
 err_bad_auth:
 	dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat));
 	serv->sv_stats->rpcbadauth++;
@@ -1372,16 +1468,16 @@
 
 err_bad_vers:
 	svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n",
-		       vers, prog, progp->pg_name);
+		       rqstp->rq_vers, rqstp->rq_prog, progp->pg_name);
 
 	serv->sv_stats->rpcbadfmt++;
 	svc_putnl(resv, RPC_PROG_MISMATCH);
-	svc_putnl(resv, progp->pg_lovers);
-	svc_putnl(resv, progp->pg_hivers);
+	svc_putnl(resv, process.mismatch.lovers);
+	svc_putnl(resv, process.mismatch.hivers);
 	goto sendit;
 
 err_bad_proc:
-	svc_printk(rqstp, "unknown procedure (%d)\n", proc);
+	svc_printk(rqstp, "unknown procedure (%d)\n", rqstp->rq_proc);
 
 	serv->sv_stats->rpcbadfmt++;
 	svc_putnl(resv, RPC_PROC_UNAVAIL);
@@ -1431,6 +1527,10 @@
 		goto out_drop;
 	}
 
+	/* Reserve space for the record marker */
+	if (rqstp->rq_prot == IPPROTO_TCP)
+		svc_putnl(resv, 0);
+
 	/* Returns 1 for send, 0 for drop */
 	if (likely(svc_process_common(rqstp, argv, resv)))
 		return svc_send(rqstp);
@@ -1459,10 +1559,10 @@
 	dprintk("svc: %s(%p)\n", __func__, req);
 
 	/* Build the svc_rqst used by the common processing routine */
-	rqstp->rq_xprt = serv->sv_bc_xprt;
 	rqstp->rq_xid = req->rq_xid;
 	rqstp->rq_prot = req->rq_xprt->prot;
 	rqstp->rq_server = serv;
+	rqstp->rq_bc_net = req->rq_xprt->xprt_net;
 
 	rqstp->rq_addrlen = sizeof(req->rq_xprt->addr);
 	memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen);
@@ -1495,13 +1595,13 @@
 	/* Parse and execute the bc call */
 	proc_error = svc_process_common(rqstp, argv, resv);
 
-	atomic_inc(&req->rq_xprt->bc_free_slots);
+	atomic_dec(&req->rq_xprt->bc_slot_count);
 	if (!proc_error) {
 		/* Processing error: drop the request */
 		xprt_free_bc_request(req);
-		return 0;
+		error = -EINVAL;
+		goto out;
 	}
-
 	/* Finally, send the reply synchronously */
 	memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
 	task = rpc_run_bc_task(req);

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 83ccd02..de3c077 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * linux/net/sunrpc/svc_xprt.c
  *
@@ -34,7 +35,7 @@
 /* apparently the "standard" is that clients close
  * idle connections after 5 minutes, servers after
  * 6 minutes
- *   http://www.connectathon.org/talks96/nfstcp.pdf
+ *   http://nfsv4bat.org/Documents/ConnectAThon/1996/nfstcp.pdf
  */
 static int svc_conn_age_period = 6*60;
 
@@ -136,6 +137,7 @@
 	struct module *owner = xprt->xpt_class->xcl_owner;
 	if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags))
 		svcauth_unix_info_release(xprt);
+	put_cred(xprt->xpt_cred);
 	put_net(xprt->xpt_net);
 	/* See comment on corresponding get in xs_setup_bc_tcp(): */
 	if (xprt->xpt_bc_xprt)
@@ -171,7 +173,6 @@
 	mutex_init(&xprt->xpt_mutex);
 	spin_lock_init(&xprt->xpt_lock);
 	set_bit(XPT_BUSY, &xprt->xpt_flags);
-	rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
 	xprt->xpt_net = get_net(net);
 	strcpy(xprt->xpt_remotebuf, "uninitialized");
 }
@@ -253,7 +254,8 @@
 
 static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
 			    struct net *net, const int family,
-			    const unsigned short port, int flags)
+			    const unsigned short port, int flags,
+			    const struct cred *cred)
 {
 	struct svc_xprt_class *xcl;
 
@@ -274,6 +276,7 @@
 			module_put(xcl->xcl_owner);
 			return PTR_ERR(newxprt);
 		}
+		newxprt->xpt_cred = get_cred(cred);
 		svc_add_new_perm_xprt(serv, newxprt);
 		newport = svc_xprt_local_port(newxprt);
 		return newport;
@@ -287,19 +290,20 @@
 
 int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
 		    struct net *net, const int family,
-		    const unsigned short port, int flags)
+		    const unsigned short port, int flags,
+		    const struct cred *cred)
 {
 	int err;
 
 	dprintk("svc: creating transport %s[%d]\n", xprt_name, port);
-	err = _svc_create_xprt(serv, xprt_name, net, family, port, flags);
+	err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred);
 	if (err == -EPROTONOSUPPORT) {
 		request_module("svc%s", xprt_name);
-		err = _svc_create_xprt(serv, xprt_name, net, family, port, flags);
+		err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred);
 	}
-	if (err)
+	if (err < 0)
 		dprintk("svc: transport %s not found, err %d\n",
-			xprt_name, err);
+			xprt_name, -err);
 	return err;
 }
 EXPORT_SYMBOL_GPL(svc_create_xprt);
@@ -358,15 +362,29 @@
 	struct svc_xprt	*xprt = rqstp->rq_xprt;
 	if (test_and_clear_bit(RQ_DATA, &rqstp->rq_flags)) {
 		atomic_dec(&xprt->xpt_nr_rqsts);
+		smp_wmb(); /* See smp_rmb() in svc_xprt_ready() */
 		svc_xprt_enqueue(xprt);
 	}
 }
 
-static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
+static bool svc_xprt_ready(struct svc_xprt *xprt)
 {
-	if (xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_CLOSE)))
+	unsigned long xpt_flags;
+
+	/*
+	 * If another cpu has recently updated xpt_flags,
+	 * sk_sock->flags, xpt_reserved, or xpt_nr_rqsts, we need to
+	 * know about it; otherwise it's possible that both that cpu and
+	 * this one could call svc_xprt_enqueue() without either
+	 * svc_xprt_enqueue() recognizing that the conditions below
+	 * are satisfied, and we could stall indefinitely:
+	 */
+	smp_rmb();
+	xpt_flags = READ_ONCE(xprt->xpt_flags);
+
+	if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE)))
 		return true;
-	if (xprt->xpt_flags & ((1<<XPT_DATA)|(1<<XPT_DEFERRED))) {
+	if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) {
 		if (xprt->xpt_ops->xpo_has_wspace(xprt) &&
 		    svc_xprt_slots_in_range(xprt))
 			return true;
@@ -382,7 +400,7 @@
 	struct svc_rqst	*rqstp = NULL;
 	int cpu;
 
-	if (!svc_xprt_has_something_to_do(xprt))
+	if (!svc_xprt_ready(xprt))
 		return;
 
 	/* Mark transport as busy. It will remain in this state until
@@ -469,13 +487,14 @@
  */
 void svc_reserve(struct svc_rqst *rqstp, int space)
 {
+	struct svc_xprt *xprt = rqstp->rq_xprt;
+
 	space += rqstp->rq_res.head[0].iov_len;
 
-	if (space < rqstp->rq_reserved) {
-		struct svc_xprt *xprt = rqstp->rq_xprt;
+	if (xprt && space < rqstp->rq_reserved) {
 		atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved);
 		rqstp->rq_reserved = space;
-
+		smp_wmb(); /* See smp_rmb() in svc_xprt_ready() */
 		svc_xprt_enqueue(xprt);
 	}
 }
@@ -768,9 +787,10 @@
 		__module_get(xprt->xpt_class->xcl_owner);
 		svc_check_conn_limits(xprt->xpt_server);
 		newxpt = xprt->xpt_ops->xpo_accept(xprt);
-		if (newxpt)
+		if (newxpt) {
+			newxpt->xpt_cred = get_cred(xprt->xpt_cred);
 			svc_add_new_temp_xprt(serv, newxpt);
-		else
+		} else
 			module_put(xprt->xpt_class->xcl_owner);
 	} else if (svc_xprt_reserve_slot(rqstp, xprt)) {
 		/* XPT_DATA|XPT_DEFERRED case: */
@@ -895,7 +915,6 @@
 	else
 		len = xprt->xpt_ops->xpo_sendto(rqstp);
 	mutex_unlock(&xprt->xpt_mutex);
-	rpc_wake_up(&xprt->xpt_bc_pending);
 	trace_svc_send(rqstp, len);
 	svc_xprt_release(rqstp);
 

diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index bb8db3c..550b214 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * linux/net/sunrpc/svcauth.c
  *
@@ -27,12 +28,32 @@
 extern struct auth_ops svcauth_null;
 extern struct auth_ops svcauth_unix;
 
-static DEFINE_SPINLOCK(authtab_lock);
-static struct auth_ops	*authtab[RPC_AUTH_MAXFLAVOR] = {
-	[0] = &svcauth_null,
-	[1] = &svcauth_unix,
+static struct auth_ops __rcu *authtab[RPC_AUTH_MAXFLAVOR] = {
+	[RPC_AUTH_NULL] = (struct auth_ops __force __rcu *)&svcauth_null,
+	[RPC_AUTH_UNIX] = (struct auth_ops __force __rcu *)&svcauth_unix,
 };
 
+static struct auth_ops *
+svc_get_auth_ops(rpc_authflavor_t flavor)
+{
+	struct auth_ops		*aops;
+
+	if (flavor >= RPC_AUTH_MAXFLAVOR)
+		return NULL;
+	rcu_read_lock();
+	aops = rcu_dereference(authtab[flavor]);
+	if (aops != NULL && !try_module_get(aops->owner))
+		aops = NULL;
+	rcu_read_unlock();
+	return aops;
+}
+
+static void
+svc_put_auth_ops(struct auth_ops *aops)
+{
+	module_put(aops->owner);
+}
+
 int
 svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
 {
@@ -45,14 +66,11 @@
 
 	dprintk("svc: svc_authenticate (%d)\n", flavor);
 
-	spin_lock(&authtab_lock);
-	if (flavor >= RPC_AUTH_MAXFLAVOR || !(aops = authtab[flavor]) ||
-	    !try_module_get(aops->owner)) {
-		spin_unlock(&authtab_lock);
+	aops = svc_get_auth_ops(flavor);
+	if (aops == NULL) {
 		*authp = rpc_autherr_badcred;
 		return SVC_DENIED;
 	}
-	spin_unlock(&authtab_lock);
 
 	rqstp->rq_auth_slack = 0;
 	init_svc_cred(&rqstp->rq_cred);
@@ -82,7 +100,7 @@
 
 	if (aops) {
 		rv = aops->release(rqstp);
-		module_put(aops->owner);
+		svc_put_auth_ops(aops);
 	}
 	return rv;
 }
@@ -90,13 +108,14 @@
 int
 svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops)
 {
+	struct auth_ops *old;
 	int rv = -EINVAL;
-	spin_lock(&authtab_lock);
-	if (flavor < RPC_AUTH_MAXFLAVOR && authtab[flavor] == NULL) {
-		authtab[flavor] = aops;
-		rv = 0;
+
+	if (flavor < RPC_AUTH_MAXFLAVOR) {
+		old = cmpxchg((struct auth_ops ** __force)&authtab[flavor], NULL, aops);
+		if (old == NULL || old == aops)
+			rv = 0;
 	}
-	spin_unlock(&authtab_lock);
 	return rv;
 }
 EXPORT_SYMBOL_GPL(svc_auth_register);
@@ -104,10 +123,8 @@
 void
 svc_auth_unregister(rpc_authflavor_t flavor)
 {
-	spin_lock(&authtab_lock);
 	if (flavor < RPC_AUTH_MAXFLAVOR)
-		authtab[flavor] = NULL;
-	spin_unlock(&authtab_lock);
+		rcu_assign_pointer(authtab[flavor], NULL);
 }
 EXPORT_SYMBOL_GPL(svc_auth_unregister);
 
@@ -127,10 +144,11 @@
 static DEFINE_SPINLOCK(auth_domain_lock);
 
 static void auth_domain_release(struct kref *kref)
+	__releases(&auth_domain_lock)
 {
 	struct auth_domain *dom = container_of(kref, struct auth_domain, ref);
 
-	hlist_del(&dom->hash);
+	hlist_del_rcu(&dom->hash);
 	dom->flavour->domain_release(dom);
 	spin_unlock(&auth_domain_lock);
 }
@@ -159,7 +177,7 @@
 		}
 	}
 	if (new)
-		hlist_add_head(&new->hash, head);
+		hlist_add_head_rcu(&new->hash, head);
 	spin_unlock(&auth_domain_lock);
 	return new;
 }
@@ -167,6 +185,21 @@
 
 struct auth_domain *auth_domain_find(char *name)
 {
-	return auth_domain_lookup(name, NULL);
+	struct auth_domain *hp;
+	struct hlist_head *head;
+
+	head = &auth_domain_table[hash_str(name, DN_HASHBITS)];
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(hp, head, hash) {
+		if (strcmp(hp->name, name)==0) {
+			if (!kref_get_unless_zero(&hp->ref))
+				hp = NULL;
+			rcu_read_unlock();
+			return hp;
+		}
+	}
+	rcu_read_unlock();
+	return NULL;
 }
 EXPORT_SYMBOL_GPL(auth_domain_find);

diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index af7f28f..5c04ba7 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/module.h>
@@ -37,20 +38,26 @@
 extern struct auth_ops svcauth_null;
 extern struct auth_ops svcauth_unix;
 
-static void svcauth_unix_domain_release(struct auth_domain *dom)
+static void svcauth_unix_domain_release_rcu(struct rcu_head *head)
 {
+	struct auth_domain *dom = container_of(head, struct auth_domain, rcu_head);
 	struct unix_domain *ud = container_of(dom, struct unix_domain, h);
 
 	kfree(dom->name);
 	kfree(ud);
 }
 
+static void svcauth_unix_domain_release(struct auth_domain *dom)
+{
+	call_rcu(&dom->rcu_head, svcauth_unix_domain_release_rcu);
+}
+
 struct auth_domain *unix_domain_find(char *name)
 {
 	struct auth_domain *rv;
 	struct unix_domain *new = NULL;
 
-	rv = auth_domain_lookup(name, NULL);
+	rv = auth_domain_find(name);
 	while(1) {
 		if (rv) {
 			if (new && rv != &new->h)
@@ -91,6 +98,7 @@
 	char			m_class[8]; /* e.g. "nfsd" */
 	struct in6_addr		m_addr;
 	struct unix_domain	*m_client;
+	struct rcu_head		m_rcu;
 };
 
 static void ip_map_put(struct kref *kref)
@@ -101,7 +109,7 @@
 	if (test_bit(CACHE_VALID, &item->flags) &&
 	    !test_bit(CACHE_NEGATIVE, &item->flags))
 		auth_domain_put(&im->m_client->h);
-	kfree(im);
+	kfree_rcu(im, m_rcu);
 }
 
 static inline int hash_ip6(const struct in6_addr *ip)
@@ -280,9 +288,9 @@
 
 	strcpy(ip.m_class, class);
 	ip.m_addr = *addr;
-	ch = sunrpc_cache_lookup(cd, &ip.h,
-				 hash_str(class, IP_HASHBITS) ^
-				 hash_ip6(addr));
+	ch = sunrpc_cache_lookup_rcu(cd, &ip.h,
+				     hash_str(class, IP_HASHBITS) ^
+				     hash_ip6(addr));
 
 	if (ch)
 		return container_of(ch, struct ip_map, h);
@@ -412,6 +420,7 @@
 	struct cache_head	h;
 	kuid_t			uid;
 	struct group_info	*gi;
+	struct rcu_head		rcu;
 };
 
 static int unix_gid_hash(kuid_t uid)
@@ -426,7 +435,7 @@
 	if (test_bit(CACHE_VALID, &item->flags) &&
 	    !test_bit(CACHE_NEGATIVE, &item->flags))
 		put_group_info(ug->gi);
-	kfree(ug);
+	kfree_rcu(ug, rcu);
 }
 
 static int unix_gid_match(struct cache_head *corig, struct cache_head *cnew)
@@ -492,7 +501,7 @@
 	rv = get_int(&mesg, &id);
 	if (rv)
 		return -EINVAL;
-	uid = make_kuid(&init_user_ns, id);
+	uid = make_kuid(current_user_ns(), id);
 	ug.uid = uid;
 
 	expiry = get_expiry(&mesg);
@@ -514,7 +523,7 @@
 		err = -EINVAL;
 		if (rv)
 			goto out;
-		kgid = make_kgid(&init_user_ns, gid);
+		kgid = make_kgid(current_user_ns(), gid);
 		if (!gid_valid(kgid))
 			goto out;
 		ug.gi->gid[i] = kgid;
@@ -547,7 +556,7 @@
 			 struct cache_detail *cd,
 			 struct cache_head *h)
 {
-	struct user_namespace *user_ns = &init_user_ns;
+	struct user_namespace *user_ns = m->file->f_cred->user_ns;
 	struct unix_gid *ug;
 	int i;
 	int glen;
@@ -619,7 +628,7 @@
 	struct cache_head *ch;
 
 	ug.uid = uid;
-	ch = sunrpc_cache_lookup(cd, &ug.h, unix_gid_hash(uid));
+	ch = sunrpc_cache_lookup_rcu(cd, &ug.h, unix_gid_hash(uid));
 	if (ch)
 		return container_of(ch, struct unix_gid, h);
 	else
@@ -788,6 +797,7 @@
 	struct kvec	*argv = &rqstp->rq_arg.head[0];
 	struct kvec	*resv = &rqstp->rq_res.head[0];
 	struct svc_cred	*cred = &rqstp->rq_cred;
+	struct user_namespace *userns;
 	u32		slen, i;
 	int		len   = argv->iov_len;
 
@@ -808,8 +818,10 @@
 	 * (export-specific) anonymous id by nfsd_setuser.
 	 * Supplementary gid's will be left alone.
 	 */
-	cred->cr_uid = make_kuid(&init_user_ns, svc_getnl(argv)); /* uid */
-	cred->cr_gid = make_kgid(&init_user_ns, svc_getnl(argv)); /* gid */
+	userns = (rqstp->rq_xprt && rqstp->rq_xprt->xpt_cred) ?
+		rqstp->rq_xprt->xpt_cred->user_ns : &init_user_ns;
+	cred->cr_uid = make_kuid(userns, svc_getnl(argv)); /* uid */
+	cred->cr_gid = make_kgid(userns, svc_getnl(argv)); /* gid */
 	slen = svc_getnl(argv);			/* gids length */
 	if (slen > UNX_NGROUPS || (len -= (slen + 2)*4) < 0)
 		goto badcred;
@@ -817,7 +829,7 @@
 	if (cred->cr_group_info == NULL)
 		return SVC_CLOSE;
 	for (i = 0; i < slen; i++) {
-		kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv));
+		kgid_t kgid = make_kgid(userns, svc_getnl(argv));
 		cred->cr_group_info->gid[i] = kgid;
 	}
 	groups_sort(cred->cr_group_info);

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 5445145..2934dd7 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * linux/net/sunrpc/svcsock.c
  *
@@ -70,13 +71,6 @@
 static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
 					  struct net *, struct sockaddr *,
 					  int, int);
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
-static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
-					     struct net *, struct sockaddr *,
-					     int, int);
-static void svc_bc_sock_free(struct svc_xprt *xprt);
-#endif /* CONFIG_SUNRPC_BACKCHANNEL */
-
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key svc_key[2];
 static struct lock_class_key svc_slock_key[2];
@@ -325,68 +319,47 @@
 /*
  * Generic recvfrom routine.
  */
-static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
-			int buflen)
+static ssize_t svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov,
+			    unsigned int nr, size_t buflen, unsigned int base)
 {
 	struct svc_sock *svsk =
 		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
-	struct msghdr msg = {
-		.msg_flags	= MSG_DONTWAIT,
-	};
-	int len;
+	struct msghdr msg = { NULL };
+	ssize_t len;
 
 	rqstp->rq_xprt_hlen = 0;
 
 	clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, nr, buflen);
-	len = sock_recvmsg(svsk->sk_sock, &msg, msg.msg_flags);
+	iov_iter_kvec(&msg.msg_iter, READ, iov, nr, buflen);
+	if (base != 0) {
+		iov_iter_advance(&msg.msg_iter, base);
+		buflen -= base;
+	}
+	len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT);
 	/* If we read a full record, then assume there may be more
 	 * data to read (stream based sockets only!)
 	 */
 	if (len == buflen)
 		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 
-	dprintk("svc: socket %p recvfrom(%p, %zu) = %d\n",
+	dprintk("svc: socket %p recvfrom(%p, %zu) = %zd\n",
 		svsk, iov[0].iov_base, iov[0].iov_len, len);
 	return len;
 }
 
-static int svc_partial_recvfrom(struct svc_rqst *rqstp,
-				struct kvec *iov, int nr,
-				int buflen, unsigned int base)
-{
-	size_t save_iovlen;
-	void *save_iovbase;
-	unsigned int i;
-	int ret;
-
-	if (base == 0)
-		return svc_recvfrom(rqstp, iov, nr, buflen);
-
-	for (i = 0; i < nr; i++) {
-		if (iov[i].iov_len > base)
-			break;
-		base -= iov[i].iov_len;
-	}
-	save_iovlen = iov[i].iov_len;
-	save_iovbase = iov[i].iov_base;
-	iov[i].iov_len -= base;
-	iov[i].iov_base += base;
-	ret = svc_recvfrom(rqstp, &iov[i], nr - i, buflen);
-	iov[i].iov_len = save_iovlen;
-	iov[i].iov_base = save_iovbase;
-	return ret;
-}
-
 /*
  * Set socket snd and rcv buffer lengths
  */
-static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
-				unsigned int rcv)
+static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs)
 {
+	unsigned int max_mesg = svsk->sk_xprt.xpt_server->sv_max_mesg;
+	struct socket *sock = svsk->sk_sock;
+
+	nreqs = min(nreqs, INT_MAX / 2 / max_mesg);
+
 	lock_sock(sock->sk);
-	sock->sk->sk_sndbuf = snd * 2;
-	sock->sk->sk_rcvbuf = rcv * 2;
+	sock->sk->sk_sndbuf = nreqs * max_mesg * 2;
+	sock->sk->sk_rcvbuf = nreqs * max_mesg * 2;
 	sock->sk->sk_write_space(sock->sk);
 	release_sock(sock->sk);
 }
@@ -548,9 +521,7 @@
 	     * provides an upper bound on the number of threads
 	     * which will access the socket.
 	     */
-	    svc_sock_setbufsize(svsk->sk_sock,
-				(serv->sv_nrthreads+3) * serv->sv_max_mesg,
-				(serv->sv_nrthreads+3) * serv->sv_max_mesg);
+	    svc_sock_setbufsize(svsk, serv->sv_nrthreads + 3);
 
 	clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 	skb = NULL;
@@ -574,7 +545,7 @@
 		/* Don't enable netstamp, sunrpc doesn't
 		   need that much accuracy */
 	}
-	svsk->sk_sk->sk_stamp = skb->tstamp;
+	sock_write_timestamp(svsk->sk_sk, skb->tstamp);
 	set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
 
 	len  = skb->len;
@@ -642,10 +613,6 @@
 	return error;
 }
 
-static void svc_udp_prep_reply_hdr(struct svc_rqst *rqstp)
-{
-}
-
 static int svc_udp_has_wspace(struct svc_xprt *xprt)
 {
 	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
@@ -689,7 +656,6 @@
 	.xpo_release_rqst = svc_release_udp_skb,
 	.xpo_detach = svc_sock_detach,
 	.xpo_free = svc_sock_free,
-	.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
 	.xpo_has_wspace = svc_udp_has_wspace,
 	.xpo_accept = svc_udp_accept,
 	.xpo_secure_port = svc_sock_secure_port,
@@ -718,9 +684,7 @@
 	 * receive and respond to one request.
 	 * svc_udp_recvfrom will re-adjust if necessary
 	 */
-	svc_sock_setbufsize(svsk->sk_sock,
-			    3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
-			    3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
+	svc_sock_setbufsize(svsk, 3);
 
 	/* data might have come in before data_ready set up */
 	set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
@@ -962,7 +926,8 @@
 		want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
 		iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen;
 		iov.iov_len  = want;
-		if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0)
+		len = svc_recvfrom(rqstp, &iov, 1, want, 0);
+		if (len < 0)
 			goto error;
 		svsk->sk_tcplen += len;
 
@@ -1004,7 +969,7 @@
 
 	if (!bc_xprt)
 		return -EAGAIN;
-	spin_lock(&bc_xprt->recv_lock);
+	spin_lock(&bc_xprt->queue_lock);
 	req = xprt_lookup_rqst(bc_xprt, xid);
 	if (!req)
 		goto unlock_notfound;
@@ -1022,7 +987,7 @@
 	memcpy(dst->iov_base, src->iov_base, src->iov_len);
 	xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len);
 	rqstp->rq_arg.len = 0;
-	spin_unlock(&bc_xprt->recv_lock);
+	spin_unlock(&bc_xprt->queue_lock);
 	return 0;
 unlock_notfound:
 	printk(KERN_NOTICE
@@ -1031,7 +996,7 @@
 		__func__, ntohl(calldir),
 		bc_xprt, ntohl(xid));
 unlock_eagain:
-	spin_unlock(&bc_xprt->recv_lock);
+	spin_unlock(&bc_xprt->queue_lock);
 	return -EAGAIN;
 }
 
@@ -1088,14 +1053,13 @@
 
 	vec = rqstp->rq_vec;
 
-	pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0],
-						svsk->sk_datalen + want);
+	pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], base + want);
 
 	rqstp->rq_respages = &rqstp->rq_pages[pnum];
 	rqstp->rq_next_page = rqstp->rq_respages + 1;
 
 	/* Now receive data */
-	len = svc_partial_recvfrom(rqstp, vec, pnum, want, base);
+	len = svc_recvfrom(rqstp, vec, pnum, base + want, base);
 	if (len >= 0) {
 		svsk->sk_tcplen += len;
 		svsk->sk_datalen += len;
@@ -1195,17 +1159,6 @@
 	return sent;
 }
 
-/*
- * Setup response header. TCP has a 4B record length field.
- */
-static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
-{
-	struct kvec *resv = &rqstp->rq_res.head[0];
-
-	/* tcp needs a space for the record length... */
-	svc_putnl(resv, 0);
-}
-
 static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
 				       struct net *net,
 				       struct sockaddr *sa, int salen,
@@ -1214,58 +1167,6 @@
 	return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
 }
 
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
-static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
-					     struct net *, struct sockaddr *,
-					     int, int);
-static void svc_bc_sock_free(struct svc_xprt *xprt);
-
-static struct svc_xprt *svc_bc_tcp_create(struct svc_serv *serv,
-				       struct net *net,
-				       struct sockaddr *sa, int salen,
-				       int flags)
-{
-	return svc_bc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
-}
-
-static void svc_bc_tcp_sock_detach(struct svc_xprt *xprt)
-{
-}
-
-static const struct svc_xprt_ops svc_tcp_bc_ops = {
-	.xpo_create = svc_bc_tcp_create,
-	.xpo_detach = svc_bc_tcp_sock_detach,
-	.xpo_free = svc_bc_sock_free,
-	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
-	.xpo_secure_port = svc_sock_secure_port,
-};
-
-static struct svc_xprt_class svc_tcp_bc_class = {
-	.xcl_name = "tcp-bc",
-	.xcl_owner = THIS_MODULE,
-	.xcl_ops = &svc_tcp_bc_ops,
-	.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
-};
-
-static void svc_init_bc_xprt_sock(void)
-{
-	svc_reg_xprt_class(&svc_tcp_bc_class);
-}
-
-static void svc_cleanup_bc_xprt_sock(void)
-{
-	svc_unreg_xprt_class(&svc_tcp_bc_class);
-}
-#else /* CONFIG_SUNRPC_BACKCHANNEL */
-static void svc_init_bc_xprt_sock(void)
-{
-}
-
-static void svc_cleanup_bc_xprt_sock(void)
-{
-}
-#endif /* CONFIG_SUNRPC_BACKCHANNEL */
-
 static const struct svc_xprt_ops svc_tcp_ops = {
 	.xpo_create = svc_tcp_create,
 	.xpo_recvfrom = svc_tcp_recvfrom,
@@ -1273,7 +1174,6 @@
 	.xpo_release_rqst = svc_release_skb,
 	.xpo_detach = svc_tcp_sock_detach,
 	.xpo_free = svc_sock_free,
-	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
 	.xpo_has_wspace = svc_tcp_has_wspace,
 	.xpo_accept = svc_tcp_accept,
 	.xpo_secure_port = svc_sock_secure_port,
@@ -1292,14 +1192,12 @@
 {
 	svc_reg_xprt_class(&svc_tcp_class);
 	svc_reg_xprt_class(&svc_udp_class);
-	svc_init_bc_xprt_sock();
 }
 
 void svc_cleanup_xprt_sock(void)
 {
 	svc_unreg_xprt_class(&svc_tcp_class);
 	svc_unreg_xprt_class(&svc_udp_class);
-	svc_cleanup_bc_xprt_sock();
 }
 
 static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
@@ -1435,13 +1333,14 @@
  * @fd: file descriptor of the new listener
  * @name_return: pointer to buffer to fill in with name of listener
  * @len: size of the buffer
+ * @cred: credential
  *
  * Fills in socket name and returns positive length of name if successful.
  * Name is terminated with '\n'.  On error, returns a negative errno
  * value.
  */
 int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
-		const size_t len)
+		const size_t len, const struct cred *cred)
 {
 	int err = 0;
 	struct socket *so = sockfd_lookup(fd, &err);
@@ -1474,6 +1373,7 @@
 	salen = kernel_getsockname(svsk->sk_sock, sin);
 	if (salen >= 0)
 		svc_xprt_set_local(&svsk->sk_xprt, sin, salen);
+	svsk->sk_xprt.xpt_cred = get_cred(cred);
 	svc_add_new_perm_xprt(serv, &svsk->sk_xprt);
 	return svc_one_sock_name(svsk, name_return, len);
 out:
@@ -1620,45 +1520,3 @@
 		sock_release(svsk->sk_sock);
 	kfree(svsk);
 }
-
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
-/*
- * Create a back channel svc_xprt which shares the fore channel socket.
- */
-static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv,
-					     int protocol,
-					     struct net *net,
-					     struct sockaddr *sin, int len,
-					     int flags)
-{
-	struct svc_sock *svsk;
-	struct svc_xprt *xprt;
-
-	if (protocol != IPPROTO_TCP) {
-		printk(KERN_WARNING "svc: only TCP sockets"
-			" supported on shared back channel\n");
-		return ERR_PTR(-EINVAL);
-	}
-
-	svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);
-	if (!svsk)
-		return ERR_PTR(-ENOMEM);
-
-	xprt = &svsk->sk_xprt;
-	svc_xprt_init(net, &svc_tcp_bc_class, xprt, serv);
-	set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
-
-	serv->sv_bc_xprt = xprt;
-
-	return xprt;
-}
-
-/*
- * Free a back channel svc_sock.
- */
-static void svc_bc_sock_free(struct svc_xprt *xprt)
-{
-	if (xprt)
-		kfree(container_of(xprt, struct svc_sock, sk_xprt));
-}
-#endif /* CONFIG_SUNRPC_BACKCHANNEL */

diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 8c39364..d75f17b 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * linux/net/sunrpc/sysctl.c
  *
@@ -89,7 +90,7 @@
 	left = *lenp;
 
 	if (write) {
-		if (!access_ok(VERIFY_READ, buffer, left))
+		if (!access_ok(buffer, left))
 			return -EFAULT;
 		p = buffer;
 		while (left && __get_user(c, p) >= 0 && isspace(c))

diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c
index 08881d0..81ae35b 100644
--- a/net/sunrpc/timer.c
+++ b/net/sunrpc/timer.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * linux/net/sunrpc/timer.c
  *

diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index ac16f50..14ba9e7 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * linux/net/sunrpc/xdr.c
  *
@@ -15,6 +16,8 @@
 #include <linux/errno.h>
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/msg_prot.h>
+#include <linux/bvec.h>
+#include <trace/events/sunrpc.h>
 
 /*
  * XDR functions for basic NFS types
@@ -128,6 +131,48 @@
 }
 EXPORT_SYMBOL_GPL(xdr_terminate_string);
 
+size_t
+xdr_buf_pagecount(struct xdr_buf *buf)
+{
+	if (!buf->page_len)
+		return 0;
+	return (buf->page_base + buf->page_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+}
+
+int
+xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp)
+{
+	size_t i, n = xdr_buf_pagecount(buf);
+
+	if (n != 0 && buf->bvec == NULL) {
+		buf->bvec = kmalloc_array(n, sizeof(buf->bvec[0]), gfp);
+		if (!buf->bvec)
+			return -ENOMEM;
+		for (i = 0; i < n; i++) {
+			buf->bvec[i].bv_page = buf->pages[i];
+			buf->bvec[i].bv_len = PAGE_SIZE;
+			buf->bvec[i].bv_offset = 0;
+		}
+	}
+	return 0;
+}
+
+void
+xdr_free_bvec(struct xdr_buf *buf)
+{
+	kfree(buf->bvec);
+	buf->bvec = NULL;
+}
+
+/**
+ * xdr_inline_pages - Prepare receive buffer for a large reply
+ * @xdr: xdr_buf into which reply will be placed
+ * @offset: expected offset where data payload will start, in bytes
+ * @pages: vector of struct page pointers
+ * @base: offset in first page where receive should start, in bytes
+ * @len: expected size of the upper layer data payload, in bytes
+ *
+ */
 void
 xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
 		 struct page **pages, unsigned int base, unsigned int len)
@@ -145,6 +190,8 @@
 
 	tail->iov_base = buf + offset;
 	tail->iov_len = buflen - offset;
+	if ((xdr->page_len & 3) == 0)
+		tail->iov_len -= sizeof(__be32);
 
 	xdr->buflen += len;
 }
@@ -312,13 +359,15 @@
  * 'len' bytes. The extra data is not lost, but is instead
  * moved into the inlined pages and/or the tail.
  */
-static void
+static unsigned int
 xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
 {
 	struct kvec *head, *tail;
 	size_t copy, offs;
 	unsigned int pglen = buf->page_len;
+	unsigned int result;
 
+	result = 0;
 	tail = buf->tail;
 	head = buf->head;
 
@@ -332,6 +381,7 @@
 			copy = tail->iov_len - len;
 			memmove((char *)tail->iov_base + len,
 					tail->iov_base, copy);
+			result += copy;
 		}
 		/* Copy from the inlined pages into the tail */
 		copy = len;
@@ -342,11 +392,13 @@
 			copy = 0;
 		else if (copy > tail->iov_len - offs)
 			copy = tail->iov_len - offs;
-		if (copy != 0)
+		if (copy != 0) {
 			_copy_from_pages((char *)tail->iov_base + offs,
 					buf->pages,
 					buf->page_base + pglen + offs - len,
 					copy);
+			result += copy;
+		}
 		/* Do we also need to copy data from the head into the tail ? */
 		if (len > pglen) {
 			offs = copy = len - pglen;
@@ -356,6 +408,7 @@
 					(char *)head->iov_base +
 					head->iov_len - offs,
 					copy);
+			result += copy;
 		}
 	}
 	/* Now handle pages */
@@ -371,12 +424,15 @@
 		_copy_to_pages(buf->pages, buf->page_base,
 				(char *)head->iov_base + head->iov_len - len,
 				copy);
+		result += copy;
 	}
 	head->iov_len -= len;
 	buf->buflen -= len;
 	/* Have we truncated the message? */
 	if (buf->len > buf->buflen)
 		buf->len = buf->buflen;
+
+	return result;
 }
 
 /**
@@ -388,14 +444,16 @@
  * 'len' bytes. The extra data is not lost, but is instead
  * moved into the tail.
  */
-static void
+static unsigned int
 xdr_shrink_pagelen(struct xdr_buf *buf, size_t len)
 {
 	struct kvec *tail;
 	size_t copy;
 	unsigned int pglen = buf->page_len;
 	unsigned int tailbuf_len;
+	unsigned int result;
 
+	result = 0;
 	tail = buf->tail;
 	BUG_ON (len > pglen);
 
@@ -413,18 +471,22 @@
 		if (tail->iov_len > len) {
 			char *p = (char *)tail->iov_base + len;
 			memmove(p, tail->iov_base, tail->iov_len - len);
+			result += tail->iov_len - len;
 		} else
 			copy = tail->iov_len;
 		/* Copy from the inlined pages into the tail */
 		_copy_from_pages((char *)tail->iov_base,
 				buf->pages, buf->page_base + pglen - len,
 				copy);
+		result += copy;
 	}
 	buf->page_len -= len;
 	buf->buflen -= len;
 	/* Have we truncated the message? */
 	if (buf->len > buf->buflen)
 		buf->len = buf->buflen;
+
+	return result;
 }
 
 void
@@ -449,6 +511,7 @@
  * @xdr: pointer to xdr_stream struct
  * @buf: pointer to XDR buffer in which to encode data
  * @p: current pointer inside XDR buffer
+ * @rqst: pointer to controlling rpc_rqst, for debugging
  *
  * Note: at the moment the RPC client only passes the length of our
  *	 scratch buffer in the xdr_buf's header kvec. Previously this
@@ -457,7 +520,8 @@
  *	 of the buffer length, and takes care of adjusting the kvec
  *	 length for us.
  */
-void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
+void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
+		     struct rpc_rqst *rqst)
 {
 	struct kvec *iov = buf->head;
 	int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len;
@@ -479,6 +543,7 @@
 		buf->len += len;
 		iov->iov_len += len;
 	}
+	xdr->rqst = rqst;
 }
 EXPORT_SYMBOL_GPL(xdr_init_encode);
 
@@ -495,7 +560,7 @@
  * required at the end of encoding, or any other time when the xdr_buf
  * data might be read.
  */
-void xdr_commit_encode(struct xdr_stream *xdr)
+inline void xdr_commit_encode(struct xdr_stream *xdr)
 {
 	int shift = xdr->scratch.iov_len;
 	void *page;
@@ -517,9 +582,9 @@
 	int frag1bytes, frag2bytes;
 
 	if (nbytes > PAGE_SIZE)
-		return NULL; /* Bigger buffers require special handling */
+		goto out_overflow; /* Bigger buffers require special handling */
 	if (xdr->buf->len + nbytes > xdr->buf->buflen)
-		return NULL; /* Sorry, we're totally out of space */
+		goto out_overflow; /* Sorry, we're totally out of space */
 	frag1bytes = (xdr->end - xdr->p) << 2;
 	frag2bytes = nbytes - frag1bytes;
 	if (xdr->iov)
@@ -548,6 +613,9 @@
 	xdr->buf->page_len += frag2bytes;
 	xdr->buf->len += nbytes;
 	return p;
+out_overflow:
+	trace_rpc_xdr_overflow(xdr, nbytes);
+	return NULL;
 }
 
 /**
@@ -785,8 +853,10 @@
  * @xdr: pointer to xdr_stream struct
  * @buf: pointer to XDR buffer from which to decode data
  * @p: current pointer inside XDR buffer
+ * @rqst: pointer to controlling rpc_rqst, for debugging
  */
-void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
+void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
+		     struct rpc_rqst *rqst)
 {
 	xdr->buf = buf;
 	xdr->scratch.iov_base = NULL;
@@ -802,6 +872,7 @@
 		xdr->nwords -= p - xdr->p;
 		xdr->p = p;
 	}
+	xdr->rqst = rqst;
 }
 EXPORT_SYMBOL_GPL(xdr_init_decode);
 
@@ -820,7 +891,7 @@
 	buf->page_len =  len;
 	buf->buflen =  len;
 	buf->len = len;
-	xdr_init_decode(xdr, buf, NULL);
+	xdr_init_decode(xdr, buf, NULL, NULL);
 }
 EXPORT_SYMBOL_GPL(xdr_init_decode_pages);
 
@@ -862,20 +933,23 @@
 	size_t cplen = (char *)xdr->end - (char *)xdr->p;
 
 	if (nbytes > xdr->scratch.iov_len)
-		return NULL;
+		goto out_overflow;
 	p = __xdr_inline_decode(xdr, cplen);
 	if (p == NULL)
 		return NULL;
 	memcpy(cpdest, p, cplen);
+	if (!xdr_set_next_buffer(xdr))
+		goto out_overflow;
 	cpdest += cplen;
 	nbytes -= cplen;
-	if (!xdr_set_next_buffer(xdr))
-		return NULL;
 	p = __xdr_inline_decode(xdr, nbytes);
 	if (p == NULL)
 		return NULL;
 	memcpy(cpdest, p, nbytes);
 	return xdr->scratch.iov_base;
+out_overflow:
+	trace_rpc_xdr_overflow(xdr, nbytes);
+	return NULL;
 }
 
 /**
@@ -892,14 +966,17 @@
 {
 	__be32 *p;
 
-	if (nbytes == 0)
+	if (unlikely(nbytes == 0))
 		return xdr->p;
 	if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr))
-		return NULL;
+		goto out_overflow;
 	p = __xdr_inline_decode(xdr, nbytes);
 	if (p != NULL)
 		return p;
 	return xdr_copy_to_scratch(xdr, nbytes);
+out_overflow:
+	trace_rpc_xdr_overflow(xdr, nbytes);
+	return NULL;
 }
 EXPORT_SYMBOL_GPL(xdr_inline_decode);
 
@@ -909,13 +986,17 @@
 	struct kvec *iov;
 	unsigned int nwords = XDR_QUADLEN(len);
 	unsigned int cur = xdr_stream_pos(xdr);
+	unsigned int copied, offset;
 
 	if (xdr->nwords == 0)
 		return 0;
+
 	/* Realign pages to current pointer position */
-	iov  = buf->head;
+	iov = buf->head;
 	if (iov->iov_len > cur) {
-		xdr_shrink_bufhead(buf, iov->iov_len - cur);
+		offset = iov->iov_len - cur;
+		copied = xdr_shrink_bufhead(buf, offset);
+		trace_rpc_xdr_alignment(xdr, offset, copied);
 		xdr->nwords = XDR_QUADLEN(buf->len - cur);
 	}
 
@@ -927,7 +1008,9 @@
 		len = buf->page_len;
 	else if (nwords < xdr->nwords) {
 		/* Truncate page data and move it into the tail */
-		xdr_shrink_pagelen(buf, buf->page_len - len);
+		offset = buf->page_len - len;
+		copied = xdr_shrink_pagelen(buf, offset);
+		trace_rpc_xdr_alignment(xdr, offset, copied);
 		xdr->nwords = XDR_QUADLEN(buf->len - cur);
 	}
 	return len;
@@ -1068,47 +1151,6 @@
 }
 EXPORT_SYMBOL_GPL(xdr_buf_subsegment);
 
-/**
- * xdr_buf_trim - lop at most "len" bytes off the end of "buf"
- * @buf: buf to be trimmed
- * @len: number of bytes to reduce "buf" by
- *
- * Trim an xdr_buf by the given number of bytes by fixing up the lengths. Note
- * that it's possible that we'll trim less than that amount if the xdr_buf is
- * too small, or if (for instance) it's all in the head and the parser has
- * already read too far into it.
- */
-void xdr_buf_trim(struct xdr_buf *buf, unsigned int len)
-{
-	size_t cur;
-	unsigned int trim = len;
-
-	if (buf->tail[0].iov_len) {
-		cur = min_t(size_t, buf->tail[0].iov_len, trim);
-		buf->tail[0].iov_len -= cur;
-		trim -= cur;
-		if (!trim)
-			goto fix_len;
-	}
-
-	if (buf->page_len) {
-		cur = min_t(unsigned int, buf->page_len, trim);
-		buf->page_len -= cur;
-		trim -= cur;
-		if (!trim)
-			goto fix_len;
-	}
-
-	if (buf->head[0].iov_len) {
-		cur = min_t(size_t, buf->head[0].iov_len, trim);
-		buf->head[0].iov_len -= cur;
-		trim -= cur;
-	}
-fix_len:
-	buf->len -= (len - trim);
-}
-EXPORT_SYMBOL_GPL(xdr_buf_trim);
-
 static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
 {
 	unsigned int this_len;
@@ -1194,43 +1236,60 @@
 }
 EXPORT_SYMBOL_GPL(xdr_encode_word);
 
-/* If the netobj starting offset bytes from the start of xdr_buf is contained
- * entirely in the head or the tail, set object to point to it; otherwise
- * try to find space for it at the end of the tail, copy it there, and
- * set obj to point to it. */
-int xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, unsigned int offset)
+/**
+ * xdr_buf_read_mic() - obtain the address of the GSS mic from xdr buf
+ * @buf: pointer to buffer containing a mic
+ * @mic: on success, returns the address of the mic
+ * @offset: the offset in buf where mic may be found
+ *
+ * This function may modify the xdr buf if the mic is found to be straddling
+ * a boundary between head, pages, and tail.  On success the mic can be read
+ * from the address returned.  There is no need to free the mic.
+ *
+ * Return: Success returns 0, otherwise an integer error.
+ */
+int xdr_buf_read_mic(struct xdr_buf *buf, struct xdr_netobj *mic, unsigned int offset)
 {
 	struct xdr_buf subbuf;
+	unsigned int boundary;
 
-	if (xdr_decode_word(buf, offset, &obj->len))
+	if (xdr_decode_word(buf, offset, &mic->len))
 		return -EFAULT;
-	if (xdr_buf_subsegment(buf, &subbuf, offset + 4, obj->len))
+	offset += 4;
+
+	/* Is the mic partially in the head? */
+	boundary = buf->head[0].iov_len;
+	if (offset < boundary && (offset + mic->len) > boundary)
+		xdr_shift_buf(buf, boundary - offset);
+
+	/* Is the mic partially in the pages? */
+	boundary += buf->page_len;
+	if (offset < boundary && (offset + mic->len) > boundary)
+		xdr_shrink_pagelen(buf, boundary - offset);
+
+	if (xdr_buf_subsegment(buf, &subbuf, offset, mic->len))
 		return -EFAULT;
 
-	/* Is the obj contained entirely in the head? */
-	obj->data = subbuf.head[0].iov_base;
-	if (subbuf.head[0].iov_len == obj->len)
+	/* Is the mic contained entirely in the head? */
+	mic->data = subbuf.head[0].iov_base;
+	if (subbuf.head[0].iov_len == mic->len)
 		return 0;
-	/* ..or is the obj contained entirely in the tail? */
-	obj->data = subbuf.tail[0].iov_base;
-	if (subbuf.tail[0].iov_len == obj->len)
+	/* ..or is the mic contained entirely in the tail? */
+	mic->data = subbuf.tail[0].iov_base;
+	if (subbuf.tail[0].iov_len == mic->len)
 		return 0;
 
-	/* use end of tail as storage for obj:
-	 * (We don't copy to the beginning because then we'd have
-	 * to worry about doing a potentially overlapping copy.
-	 * This assumes the object is at most half the length of the
-	 * tail.) */
-	if (obj->len > buf->buflen - buf->len)
+	/* Find a contiguous area in @buf to hold all of @mic */
+	if (mic->len > buf->buflen - buf->len)
 		return -ENOMEM;
 	if (buf->tail[0].iov_len != 0)
-		obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len;
+		mic->data = buf->tail[0].iov_base + buf->tail[0].iov_len;
 	else
-		obj->data = buf->head[0].iov_base + buf->head[0].iov_len;
-	__read_bytes_from_xdr_buf(&subbuf, obj->data, obj->len);
+		mic->data = buf->head[0].iov_base + buf->head[0].iov_len;
+	__read_bytes_from_xdr_buf(&subbuf, mic->data, mic->len);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(xdr_buf_read_netobj);
+EXPORT_SYMBOL_GPL(xdr_buf_read_mic);
 
 /* Returns 0 on success, or else a negative error code. */
 static int

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index d066aae..41df4c5 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  linux/net/sunrpc/xprt.c
  *
@@ -49,6 +50,7 @@
 #include <linux/sunrpc/metrics.h>
 #include <linux/sunrpc/bc_xprt.h>
 #include <linux/rcupdate.h>
+#include <linux/sched/mm.h>
 
 #include <trace/events/sunrpc.h>
 
@@ -67,14 +69,20 @@
  */
 static void	 xprt_init(struct rpc_xprt *xprt, struct net *net);
 static __be32	xprt_alloc_xid(struct rpc_xprt *xprt);
-static void	xprt_connect_status(struct rpc_task *task);
-static int      __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
-static void     __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *);
 static void	 xprt_destroy(struct rpc_xprt *xprt);
 
 static DEFINE_SPINLOCK(xprt_list_lock);
 static LIST_HEAD(xprt_list);
 
+static unsigned long xprt_request_timeout(const struct rpc_rqst *req)
+{
+	unsigned long timeout = jiffies + req->rq_timeout;
+
+	if (time_before(timeout, req->rq_majortimeo))
+		return timeout;
+	return req->rq_majortimeo;
+}
+
 /**
  * xprt_register_transport - register a transport implementation
  * @transport: transport to register
@@ -171,6 +179,17 @@
 }
 EXPORT_SYMBOL_GPL(xprt_load_transport);
 
+static void xprt_clear_locked(struct rpc_xprt *xprt)
+{
+	xprt->snd_task = NULL;
+	if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
+		smp_mb__before_atomic();
+		clear_bit(XPRT_LOCKED, &xprt->state);
+		smp_mb__after_atomic();
+	} else
+		queue_work(xprtiod_workqueue, &xprt->task_cleanup);
+}
+
 /**
  * xprt_reserve_xprt - serialize write access to transports
  * @task: task that is requesting access to the transport
@@ -183,44 +202,56 @@
 int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
 {
 	struct rpc_rqst *req = task->tk_rqstp;
-	int priority;
 
 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
 		if (task == xprt->snd_task)
 			return 1;
 		goto out_sleep;
 	}
+	if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
+		goto out_unlock;
 	xprt->snd_task = task;
-	if (req != NULL)
-		req->rq_ntrans++;
 
 	return 1;
 
+out_unlock:
+	xprt_clear_locked(xprt);
 out_sleep:
 	dprintk("RPC: %5u failed to lock transport %p\n",
 			task->tk_pid, xprt);
-	task->tk_timeout = 0;
 	task->tk_status = -EAGAIN;
-	if (req == NULL)
-		priority = RPC_PRIORITY_LOW;
-	else if (!req->rq_ntrans)
-		priority = RPC_PRIORITY_NORMAL;
+	if  (RPC_IS_SOFT(task))
+		rpc_sleep_on_timeout(&xprt->sending, task, NULL,
+				xprt_request_timeout(req));
 	else
-		priority = RPC_PRIORITY_HIGH;
-	rpc_sleep_on_priority(&xprt->sending, task, NULL, priority);
+		rpc_sleep_on(&xprt->sending, task, NULL);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xprt_reserve_xprt);
 
-static void xprt_clear_locked(struct rpc_xprt *xprt)
+static bool
+xprt_need_congestion_window_wait(struct rpc_xprt *xprt)
 {
-	xprt->snd_task = NULL;
-	if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
-		smp_mb__before_atomic();
-		clear_bit(XPRT_LOCKED, &xprt->state);
-		smp_mb__after_atomic();
-	} else
-		queue_work(xprtiod_workqueue, &xprt->task_cleanup);
+	return test_bit(XPRT_CWND_WAIT, &xprt->state);
+}
+
+static void
+xprt_set_congestion_window_wait(struct rpc_xprt *xprt)
+{
+	if (!list_empty(&xprt->xmit_queue)) {
+		/* Peek at head of queue to see if it can make progress */
+		if (list_first_entry(&xprt->xmit_queue, struct rpc_rqst,
+					rq_xmit)->rq_cong)
+			return;
+	}
+	set_bit(XPRT_CWND_WAIT, &xprt->state);
+}
+
+static void
+xprt_test_and_clear_congestion_window_wait(struct rpc_xprt *xprt)
+{
+	if (!RPCXPRT_CONGESTED(xprt))
+		clear_bit(XPRT_CWND_WAIT, &xprt->state);
 }
 
 /*
@@ -230,11 +261,11 @@
  * Same as xprt_reserve_xprt, but Van Jacobson congestion control is
  * integrated into the decision of whether a request is allowed to be
  * woken up and given access to the transport.
+ * Note that the lock is only granted if we know there are free slots.
  */
 int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
 {
 	struct rpc_rqst *req = task->tk_rqstp;
-	int priority;
 
 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
 		if (task == xprt->snd_task)
@@ -245,25 +276,22 @@
 		xprt->snd_task = task;
 		return 1;
 	}
-	if (__xprt_get_cong(xprt, task)) {
+	if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
+		goto out_unlock;
+	if (!xprt_need_congestion_window_wait(xprt)) {
 		xprt->snd_task = task;
-		req->rq_ntrans++;
 		return 1;
 	}
+out_unlock:
 	xprt_clear_locked(xprt);
 out_sleep:
-	if (req)
-		__xprt_put_cong(xprt, req);
 	dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
-	task->tk_timeout = 0;
 	task->tk_status = -EAGAIN;
-	if (req == NULL)
-		priority = RPC_PRIORITY_LOW;
-	else if (!req->rq_ntrans)
-		priority = RPC_PRIORITY_NORMAL;
+	if (RPC_IS_SOFT(task))
+		rpc_sleep_on_timeout(&xprt->sending, task, NULL,
+				xprt_request_timeout(req));
 	else
-		priority = RPC_PRIORITY_HIGH;
-	rpc_sleep_on_priority(&xprt->sending, task, NULL, priority);
+		rpc_sleep_on(&xprt->sending, task, NULL);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong);
@@ -272,21 +300,19 @@
 {
 	int retval;
 
-	spin_lock_bh(&xprt->transport_lock);
+	if (test_bit(XPRT_LOCKED, &xprt->state) && xprt->snd_task == task)
+		return 1;
+	spin_lock(&xprt->transport_lock);
 	retval = xprt->ops->reserve_xprt(xprt, task);
-	spin_unlock_bh(&xprt->transport_lock);
+	spin_unlock(&xprt->transport_lock);
 	return retval;
 }
 
 static bool __xprt_lock_write_func(struct rpc_task *task, void *data)
 {
 	struct rpc_xprt *xprt = data;
-	struct rpc_rqst *req;
 
-	req = task->tk_rqstp;
 	xprt->snd_task = task;
-	if (req)
-		req->rq_ntrans++;
 	return true;
 }
 
@@ -294,53 +320,30 @@
 {
 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
 		return;
-
+	if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
+		goto out_unlock;
 	if (rpc_wake_up_first_on_wq(xprtiod_workqueue, &xprt->sending,
 				__xprt_lock_write_func, xprt))
 		return;
+out_unlock:
 	xprt_clear_locked(xprt);
 }
 
-static bool __xprt_lock_write_cong_func(struct rpc_task *task, void *data)
-{
-	struct rpc_xprt *xprt = data;
-	struct rpc_rqst *req;
-
-	req = task->tk_rqstp;
-	if (req == NULL) {
-		xprt->snd_task = task;
-		return true;
-	}
-	if (__xprt_get_cong(xprt, task)) {
-		xprt->snd_task = task;
-		req->rq_ntrans++;
-		return true;
-	}
-	return false;
-}
-
 static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
 {
 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
 		return;
-	if (RPCXPRT_CONGESTED(xprt))
+	if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
+		goto out_unlock;
+	if (xprt_need_congestion_window_wait(xprt))
 		goto out_unlock;
 	if (rpc_wake_up_first_on_wq(xprtiod_workqueue, &xprt->sending,
-				__xprt_lock_write_cong_func, xprt))
+				__xprt_lock_write_func, xprt))
 		return;
 out_unlock:
 	xprt_clear_locked(xprt);
 }
 
-static void xprt_task_clear_bytes_sent(struct rpc_task *task)
-{
-	if (task != NULL) {
-		struct rpc_rqst *req = task->tk_rqstp;
-		if (req != NULL)
-			req->rq_bytes_sent = 0;
-	}
-}
-
 /**
  * xprt_release_xprt - allow other requests to use a transport
  * @xprt: transport with other tasks potentially waiting
@@ -351,7 +354,6 @@
 void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
 {
 	if (xprt->snd_task == task) {
-		xprt_task_clear_bytes_sent(task);
 		xprt_clear_locked(xprt);
 		__xprt_lock_write_next(xprt);
 	}
@@ -369,7 +371,6 @@
 void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
 {
 	if (xprt->snd_task == task) {
-		xprt_task_clear_bytes_sent(task);
 		xprt_clear_locked(xprt);
 		__xprt_lock_write_next_cong(xprt);
 	}
@@ -378,9 +379,11 @@
 
 static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
 {
-	spin_lock_bh(&xprt->transport_lock);
+	if (xprt->snd_task != task)
+		return;
+	spin_lock(&xprt->transport_lock);
 	xprt->ops->release_xprt(xprt, task);
-	spin_unlock_bh(&xprt->transport_lock);
+	spin_unlock(&xprt->transport_lock);
 }
 
 /*
@@ -388,16 +391,16 @@
  * overflowed. Put the task to sleep if this is the case.
  */
 static int
-__xprt_get_cong(struct rpc_xprt *xprt, struct rpc_task *task)
+__xprt_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
 {
-	struct rpc_rqst *req = task->tk_rqstp;
-
 	if (req->rq_cong)
 		return 1;
 	dprintk("RPC: %5u xprt_cwnd_limited cong = %lu cwnd = %lu\n",
-			task->tk_pid, xprt->cong, xprt->cwnd);
-	if (RPCXPRT_CONGESTED(xprt))
+			req->rq_task->tk_pid, xprt->cong, xprt->cwnd);
+	if (RPCXPRT_CONGESTED(xprt)) {
+		xprt_set_congestion_window_wait(xprt);
 		return 0;
+	}
 	req->rq_cong = 1;
 	xprt->cong += RPC_CWNDSCALE;
 	return 1;
@@ -414,10 +417,32 @@
 		return;
 	req->rq_cong = 0;
 	xprt->cong -= RPC_CWNDSCALE;
+	xprt_test_and_clear_congestion_window_wait(xprt);
 	__xprt_lock_write_next_cong(xprt);
 }
 
 /**
+ * xprt_request_get_cong - Request congestion control credits
+ * @xprt: pointer to transport
+ * @req: pointer to RPC request
+ *
+ * Useful for transports that require congestion control.
+ */
+bool
+xprt_request_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
+{
+	bool ret = false;
+
+	if (req->rq_cong)
+		return true;
+	spin_lock(&xprt->transport_lock);
+	ret = __xprt_get_cong(xprt, req) != 0;
+	spin_unlock(&xprt->transport_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xprt_request_get_cong);
+
+/**
  * xprt_release_rqst_cong - housekeeping when request is complete
  * @task: RPC request that recently completed
  *
@@ -431,6 +456,26 @@
 }
 EXPORT_SYMBOL_GPL(xprt_release_rqst_cong);
 
+static void xprt_clear_congestion_window_wait_locked(struct rpc_xprt *xprt)
+{
+	if (test_and_clear_bit(XPRT_CWND_WAIT, &xprt->state))
+		__xprt_lock_write_next_cong(xprt);
+}
+
+/*
+ * Clear the congestion window wait flag and wake up the next
+ * entry on xprt->sending
+ */
+static void
+xprt_clear_congestion_window_wait(struct rpc_xprt *xprt)
+{
+	if (test_and_clear_bit(XPRT_CWND_WAIT, &xprt->state)) {
+		spin_lock(&xprt->transport_lock);
+		__xprt_lock_write_next_cong(xprt);
+		spin_unlock(&xprt->transport_lock);
+	}
+}
+
 /**
  * xprt_adjust_cwnd - adjust transport congestion window
  * @xprt: pointer to xprt
@@ -488,89 +533,87 @@
 
 /**
  * xprt_wait_for_buffer_space - wait for transport output buffer to clear
- * @task: task to be put to sleep
- * @action: function pointer to be executed after wait
+ * @xprt: transport
  *
  * Note that we only set the timer for the case of RPC_IS_SOFT(), since
  * we don't in general want to force a socket disconnection due to
  * an incomplete RPC call transmission.
  */
-void xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action)
+void xprt_wait_for_buffer_space(struct rpc_xprt *xprt)
 {
-	struct rpc_rqst *req = task->tk_rqstp;
-	struct rpc_xprt *xprt = req->rq_xprt;
-
-	task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0;
-	rpc_sleep_on(&xprt->pending, task, action);
+	set_bit(XPRT_WRITE_SPACE, &xprt->state);
 }
 EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space);
 
+static bool
+xprt_clear_write_space_locked(struct rpc_xprt *xprt)
+{
+	if (test_and_clear_bit(XPRT_WRITE_SPACE, &xprt->state)) {
+		__xprt_lock_write_next(xprt);
+		dprintk("RPC:       write space: waking waiting task on "
+				"xprt %p\n", xprt);
+		return true;
+	}
+	return false;
+}
+
 /**
  * xprt_write_space - wake the task waiting for transport output buffer space
  * @xprt: transport with waiting tasks
  *
  * Can be called in a soft IRQ context, so xprt_write_space never sleeps.
  */
-void xprt_write_space(struct rpc_xprt *xprt)
+bool xprt_write_space(struct rpc_xprt *xprt)
 {
-	spin_lock_bh(&xprt->transport_lock);
-	if (xprt->snd_task) {
-		dprintk("RPC:       write space: waking waiting task on "
-				"xprt %p\n", xprt);
-		rpc_wake_up_queued_task_on_wq(xprtiod_workqueue,
-				&xprt->pending, xprt->snd_task);
-	}
-	spin_unlock_bh(&xprt->transport_lock);
+	bool ret;
+
+	if (!test_bit(XPRT_WRITE_SPACE, &xprt->state))
+		return false;
+	spin_lock(&xprt->transport_lock);
+	ret = xprt_clear_write_space_locked(xprt);
+	spin_unlock(&xprt->transport_lock);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(xprt_write_space);
 
-/**
- * xprt_set_retrans_timeout_def - set a request's retransmit timeout
- * @task: task whose timeout is to be set
- *
- * Set a request's retransmit timeout based on the transport's
- * default timeout parameters.  Used by transports that don't adjust
- * the retransmit timeout based on round-trip time estimation.
- */
-void xprt_set_retrans_timeout_def(struct rpc_task *task)
+static unsigned long xprt_abs_ktime_to_jiffies(ktime_t abstime)
 {
-	task->tk_timeout = task->tk_rqstp->rq_timeout;
+	s64 delta = ktime_to_ns(ktime_get() - abstime);
+	return likely(delta >= 0) ?
+		jiffies - nsecs_to_jiffies(delta) :
+		jiffies + nsecs_to_jiffies(-delta);
 }
-EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_def);
 
-/**
- * xprt_set_retrans_timeout_rtt - set a request's retransmit timeout
- * @task: task whose timeout is to be set
- *
- * Set a request's retransmit timeout using the RTT estimator.
- */
-void xprt_set_retrans_timeout_rtt(struct rpc_task *task)
+static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req)
 {
-	int timer = task->tk_msg.rpc_proc->p_timer;
-	struct rpc_clnt *clnt = task->tk_client;
-	struct rpc_rtt *rtt = clnt->cl_rtt;
-	struct rpc_rqst *req = task->tk_rqstp;
-	unsigned long max_timeout = clnt->cl_timeout->to_maxval;
+	const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
+	unsigned long majortimeo = req->rq_timeout;
 
-	task->tk_timeout = rpc_calc_rto(rtt, timer);
-	task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries;
-	if (task->tk_timeout > max_timeout || task->tk_timeout == 0)
-		task->tk_timeout = max_timeout;
+	if (to->to_exponential)
+		majortimeo <<= to->to_retries;
+	else
+		majortimeo += to->to_increment * to->to_retries;
+	if (majortimeo > to->to_maxval || majortimeo == 0)
+		majortimeo = to->to_maxval;
+	return majortimeo;
 }
-EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_rtt);
 
 static void xprt_reset_majortimeo(struct rpc_rqst *req)
 {
-	const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
+	req->rq_majortimeo += xprt_calc_majortimeo(req);
+}
 
-	req->rq_majortimeo = req->rq_timeout;
-	if (to->to_exponential)
-		req->rq_majortimeo <<= to->to_retries;
+static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req)
+{
+	unsigned long time_init;
+	struct rpc_xprt *xprt = req->rq_xprt;
+
+	if (likely(xprt && xprt_connected(xprt)))
+		time_init = jiffies;
 	else
-		req->rq_majortimeo += to->to_increment * to->to_retries;
-	if (req->rq_majortimeo > to->to_maxval || req->rq_majortimeo == 0)
-		req->rq_majortimeo = to->to_maxval;
-	req->rq_majortimeo += jiffies;
+		time_init = xprt_abs_ktime_to_jiffies(task->tk_start);
+	req->rq_timeout = task->tk_client->cl_timeout->to_initval;
+	req->rq_majortimeo = time_init + xprt_calc_majortimeo(req);
 }
 
 /**
@@ -597,9 +640,9 @@
 		req->rq_retries = 0;
 		xprt_reset_majortimeo(req);
 		/* Reset the RTT counters == "slow start" */
-		spin_lock_bh(&xprt->transport_lock);
+		spin_lock(&xprt->transport_lock);
 		rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval);
-		spin_unlock_bh(&xprt->transport_lock);
+		spin_unlock(&xprt->transport_lock);
 		status = -ETIMEDOUT;
 	}
 
@@ -614,11 +657,13 @@
 {
 	struct rpc_xprt *xprt =
 		container_of(work, struct rpc_xprt, task_cleanup);
+	unsigned int pflags = memalloc_nofs_save();
 
 	clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
 	xprt->ops->close(xprt);
 	xprt_release_write(xprt, NULL);
 	wake_up_bit(&xprt->state, XPRT_LOCKED);
+	memalloc_nofs_restore(pflags);
 }
 
 /**
@@ -629,10 +674,12 @@
 void xprt_disconnect_done(struct rpc_xprt *xprt)
 {
 	dprintk("RPC:       disconnected transport %p\n", xprt);
-	spin_lock_bh(&xprt->transport_lock);
+	spin_lock(&xprt->transport_lock);
 	xprt_clear_connected(xprt);
-	xprt_wake_pending_tasks(xprt, -EAGAIN);
-	spin_unlock_bh(&xprt->transport_lock);
+	xprt_clear_write_space_locked(xprt);
+	xprt_clear_congestion_window_wait_locked(xprt);
+	xprt_wake_pending_tasks(xprt, -ENOTCONN);
+	spin_unlock(&xprt->transport_lock);
 }
 EXPORT_SYMBOL_GPL(xprt_disconnect_done);
 
@@ -644,16 +691,34 @@
 void xprt_force_disconnect(struct rpc_xprt *xprt)
 {
 	/* Don't race with the test_bit() in xprt_clear_locked() */
-	spin_lock_bh(&xprt->transport_lock);
+	spin_lock(&xprt->transport_lock);
 	set_bit(XPRT_CLOSE_WAIT, &xprt->state);
 	/* Try to schedule an autoclose RPC call */
 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
 		queue_work(xprtiod_workqueue, &xprt->task_cleanup);
-	xprt_wake_pending_tasks(xprt, -EAGAIN);
-	spin_unlock_bh(&xprt->transport_lock);
+	else if (xprt->snd_task)
+		rpc_wake_up_queued_task_set_status(&xprt->pending,
+				xprt->snd_task, -ENOTCONN);
+	spin_unlock(&xprt->transport_lock);
 }
 EXPORT_SYMBOL_GPL(xprt_force_disconnect);
 
+static unsigned int
+xprt_connect_cookie(struct rpc_xprt *xprt)
+{
+	return READ_ONCE(xprt->connect_cookie);
+}
+
+static bool
+xprt_request_retransmit_after_disconnect(struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct rpc_xprt *xprt = req->rq_xprt;
+
+	return req->rq_connect_cookie != xprt_connect_cookie(xprt) ||
+		!xprt_connected(xprt);
+}
+
 /**
  * xprt_conditional_disconnect - force a transport to disconnect
  * @xprt: transport to disconnect
@@ -668,7 +733,7 @@
 void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie)
 {
 	/* Don't race with the test_bit() in xprt_clear_locked() */
-	spin_lock_bh(&xprt->transport_lock);
+	spin_lock(&xprt->transport_lock);
 	if (cookie != xprt->connect_cookie)
 		goto out;
 	if (test_bit(XPRT_CLOSING, &xprt->state))
@@ -679,7 +744,7 @@
 		queue_work(xprtiod_workqueue, &xprt->task_cleanup);
 	xprt_wake_pending_tasks(xprt, -EAGAIN);
 out:
-	spin_unlock_bh(&xprt->transport_lock);
+	spin_unlock(&xprt->transport_lock);
 }
 
 static bool
@@ -692,7 +757,8 @@
 xprt_schedule_autodisconnect(struct rpc_xprt *xprt)
 	__must_hold(&xprt->transport_lock)
 {
-	if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
+	xprt->last_used = jiffies;
+	if (RB_EMPTY_ROOT(&xprt->recv_queue) && xprt_has_timer(xprt))
 		mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout);
 }
 
@@ -701,18 +767,13 @@
 {
 	struct rpc_xprt *xprt = from_timer(xprt, t, timer);
 
-	spin_lock(&xprt->transport_lock);
-	if (!list_empty(&xprt->recv))
-		goto out_abort;
+	if (!RB_EMPTY_ROOT(&xprt->recv_queue))
+		return;
 	/* Reset xprt->last_used to avoid connect/autodisconnect cycling */
 	xprt->last_used = jiffies;
 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
-		goto out_abort;
-	spin_unlock(&xprt->transport_lock);
+		return;
 	queue_work(xprtiod_workqueue, &xprt->task_cleanup);
-	return;
-out_abort:
-	spin_unlock(&xprt->transport_lock);
 }
 
 bool xprt_lock_connect(struct rpc_xprt *xprt,
@@ -721,22 +782,21 @@
 {
 	bool ret = false;
 
-	spin_lock_bh(&xprt->transport_lock);
+	spin_lock(&xprt->transport_lock);
 	if (!test_bit(XPRT_LOCKED, &xprt->state))
 		goto out;
 	if (xprt->snd_task != task)
 		goto out;
-	xprt_task_clear_bytes_sent(task);
 	xprt->snd_task = cookie;
 	ret = true;
 out:
-	spin_unlock_bh(&xprt->transport_lock);
+	spin_unlock(&xprt->transport_lock);
 	return ret;
 }
 
 void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie)
 {
-	spin_lock_bh(&xprt->transport_lock);
+	spin_lock(&xprt->transport_lock);
 	if (xprt->snd_task != cookie)
 		goto out;
 	if (!test_bit(XPRT_LOCKED, &xprt->state))
@@ -745,7 +805,7 @@
 	xprt->ops->release_xprt(xprt, NULL);
 	xprt_schedule_autodisconnect(xprt);
 out:
-	spin_unlock_bh(&xprt->transport_lock);
+	spin_unlock(&xprt->transport_lock);
 	wake_up_bit(&xprt->state, XPRT_LOCKED);
 }
 
@@ -772,10 +832,9 @@
 		xprt->ops->close(xprt);
 
 	if (!xprt_connected(xprt)) {
-		task->tk_rqstp->rq_bytes_sent = 0;
-		task->tk_timeout = task->tk_rqstp->rq_timeout;
 		task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie;
-		rpc_sleep_on(&xprt->pending, task, xprt_connect_status);
+		rpc_sleep_on_timeout(&xprt->pending, task, NULL,
+				xprt_request_timeout(task->tk_rqstp));
 
 		if (test_bit(XPRT_CLOSING, &xprt->state))
 			return;
@@ -794,38 +853,105 @@
 	xprt_release_write(xprt, task);
 }
 
-static void xprt_connect_status(struct rpc_task *task)
+/**
+ * xprt_reconnect_delay - compute the wait before scheduling a connect
+ * @xprt: transport instance
+ *
+ */
+unsigned long xprt_reconnect_delay(const struct rpc_xprt *xprt)
 {
-	struct rpc_xprt	*xprt = task->tk_rqstp->rq_xprt;
+	unsigned long start, now = jiffies;
 
-	if (task->tk_status == 0) {
-		xprt->stat.connect_count++;
-		xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start;
-		dprintk("RPC: %5u xprt_connect_status: connection established\n",
-				task->tk_pid);
-		return;
-	}
+	start = xprt->stat.connect_start + xprt->reestablish_timeout;
+	if (time_after(start, now))
+		return start - now;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xprt_reconnect_delay);
 
-	switch (task->tk_status) {
-	case -ECONNREFUSED:
-	case -ECONNRESET:
-	case -ECONNABORTED:
-	case -ENETUNREACH:
-	case -EHOSTUNREACH:
-	case -EPIPE:
-	case -EAGAIN:
-		dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid);
-		break;
-	case -ETIMEDOUT:
-		dprintk("RPC: %5u xprt_connect_status: connect attempt timed "
-				"out\n", task->tk_pid);
-		break;
-	default:
-		dprintk("RPC: %5u xprt_connect_status: error %d connecting to "
-				"server %s\n", task->tk_pid, -task->tk_status,
-				xprt->servername);
-		task->tk_status = -EIO;
+/**
+ * xprt_reconnect_backoff - compute the new re-establish timeout
+ * @xprt: transport instance
+ * @init_to: initial reestablish timeout
+ *
+ */
+void xprt_reconnect_backoff(struct rpc_xprt *xprt, unsigned long init_to)
+{
+	xprt->reestablish_timeout <<= 1;
+	if (xprt->reestablish_timeout > xprt->max_reconnect_timeout)
+		xprt->reestablish_timeout = xprt->max_reconnect_timeout;
+	if (xprt->reestablish_timeout < init_to)
+		xprt->reestablish_timeout = init_to;
+}
+EXPORT_SYMBOL_GPL(xprt_reconnect_backoff);
+
+enum xprt_xid_rb_cmp {
+	XID_RB_EQUAL,
+	XID_RB_LEFT,
+	XID_RB_RIGHT,
+};
+static enum xprt_xid_rb_cmp
+xprt_xid_cmp(__be32 xid1, __be32 xid2)
+{
+	if (xid1 == xid2)
+		return XID_RB_EQUAL;
+	if ((__force u32)xid1 < (__force u32)xid2)
+		return XID_RB_LEFT;
+	return XID_RB_RIGHT;
+}
+
+static struct rpc_rqst *
+xprt_request_rb_find(struct rpc_xprt *xprt, __be32 xid)
+{
+	struct rb_node *n = xprt->recv_queue.rb_node;
+	struct rpc_rqst *req;
+
+	while (n != NULL) {
+		req = rb_entry(n, struct rpc_rqst, rq_recv);
+		switch (xprt_xid_cmp(xid, req->rq_xid)) {
+		case XID_RB_LEFT:
+			n = n->rb_left;
+			break;
+		case XID_RB_RIGHT:
+			n = n->rb_right;
+			break;
+		case XID_RB_EQUAL:
+			return req;
+		}
 	}
+	return NULL;
+}
+
+static void
+xprt_request_rb_insert(struct rpc_xprt *xprt, struct rpc_rqst *new)
+{
+	struct rb_node **p = &xprt->recv_queue.rb_node;
+	struct rb_node *n = NULL;
+	struct rpc_rqst *req;
+
+	while (*p != NULL) {
+		n = *p;
+		req = rb_entry(n, struct rpc_rqst, rq_recv);
+		switch(xprt_xid_cmp(new->rq_xid, req->rq_xid)) {
+		case XID_RB_LEFT:
+			p = &n->rb_left;
+			break;
+		case XID_RB_RIGHT:
+			p = &n->rb_right;
+			break;
+		case XID_RB_EQUAL:
+			WARN_ON_ONCE(new != req);
+			return;
+		}
+	}
+	rb_link_node(&new->rq_recv, n, p);
+	rb_insert_color(&new->rq_recv, &xprt->recv_queue);
+}
+
+static void
+xprt_request_rb_remove(struct rpc_xprt *xprt, struct rpc_rqst *req)
+{
+	rb_erase(&req->rq_recv, &xprt->recv_queue);
 }
 
 /**
@@ -833,18 +959,18 @@
  * @xprt: transport on which the original request was transmitted
  * @xid: RPC XID of incoming reply
  *
- * Caller holds xprt->recv_lock.
+ * Caller holds xprt->queue_lock.
  */
 struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
 {
 	struct rpc_rqst *entry;
 
-	list_for_each_entry(entry, &xprt->recv, rq_list)
-		if (entry->rq_xid == xid) {
-			trace_xprt_lookup_rqst(xprt, xid, 0);
-			entry->rq_rtt = ktime_sub(ktime_get(), entry->rq_xtime);
-			return entry;
-		}
+	entry = xprt_request_rb_find(xprt, xid);
+	if (entry != NULL) {
+		trace_xprt_lookup_rqst(xprt, xid, 0);
+		entry->rq_rtt = ktime_sub(ktime_get(), entry->rq_xtime);
+		return entry;
+	}
 
 	dprintk("RPC:       xprt_lookup_rqst did not find xid %08x\n",
 			ntohl(xid));
@@ -854,16 +980,22 @@
 }
 EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
 
+static bool
+xprt_is_pinned_rqst(struct rpc_rqst *req)
+{
+	return atomic_read(&req->rq_pin) != 0;
+}
+
 /**
  * xprt_pin_rqst - Pin a request on the transport receive list
  * @req: Request to pin
  *
  * Caller must ensure this is atomic with the call to xprt_lookup_rqst()
- * so should be holding the xprt transport lock.
+ * so should be holding xprt->queue_lock.
  */
 void xprt_pin_rqst(struct rpc_rqst *req)
 {
-	set_bit(RPC_TASK_MSG_RECV, &req->rq_task->tk_runstate);
+	atomic_inc(&req->rq_pin);
 }
 EXPORT_SYMBOL_GPL(xprt_pin_rqst);
 
@@ -871,38 +1003,88 @@
  * xprt_unpin_rqst - Unpin a request on the transport receive list
  * @req: Request to pin
  *
- * Caller should be holding the xprt transport lock.
+ * Caller should be holding xprt->queue_lock.
  */
 void xprt_unpin_rqst(struct rpc_rqst *req)
 {
-	struct rpc_task *task = req->rq_task;
-
-	clear_bit(RPC_TASK_MSG_RECV, &task->tk_runstate);
-	if (test_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate))
-		wake_up_bit(&task->tk_runstate, RPC_TASK_MSG_RECV);
+	if (!test_bit(RPC_TASK_MSG_PIN_WAIT, &req->rq_task->tk_runstate)) {
+		atomic_dec(&req->rq_pin);
+		return;
+	}
+	if (atomic_dec_and_test(&req->rq_pin))
+		wake_up_var(&req->rq_pin);
 }
 EXPORT_SYMBOL_GPL(xprt_unpin_rqst);
 
 static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req)
-__must_hold(&req->rq_xprt->recv_lock)
 {
-	struct rpc_task *task = req->rq_task;
+	wait_var_event(&req->rq_pin, !xprt_is_pinned_rqst(req));
+}
 
-	if (task && test_bit(RPC_TASK_MSG_RECV, &task->tk_runstate)) {
-		spin_unlock(&req->rq_xprt->recv_lock);
-		set_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate);
-		wait_on_bit(&task->tk_runstate, RPC_TASK_MSG_RECV,
-				TASK_UNINTERRUPTIBLE);
-		clear_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate);
-		spin_lock(&req->rq_xprt->recv_lock);
-	}
+static bool
+xprt_request_data_received(struct rpc_task *task)
+{
+	return !test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) &&
+		READ_ONCE(task->tk_rqstp->rq_reply_bytes_recvd) != 0;
+}
+
+static bool
+xprt_request_need_enqueue_receive(struct rpc_task *task, struct rpc_rqst *req)
+{
+	return !test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) &&
+		READ_ONCE(task->tk_rqstp->rq_reply_bytes_recvd) == 0;
+}
+
+/**
+ * xprt_request_enqueue_receive - Add an request to the receive queue
+ * @task: RPC task
+ *
+ */
+void
+xprt_request_enqueue_receive(struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct rpc_xprt *xprt = req->rq_xprt;
+
+	if (!xprt_request_need_enqueue_receive(task, req))
+		return;
+
+	xprt_request_prepare(task->tk_rqstp);
+	spin_lock(&xprt->queue_lock);
+
+	/* Update the softirq receive buffer */
+	memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
+			sizeof(req->rq_private_buf));
+
+	/* Add request to the receive list */
+	xprt_request_rb_insert(xprt, req);
+	set_bit(RPC_TASK_NEED_RECV, &task->tk_runstate);
+	spin_unlock(&xprt->queue_lock);
+
+	/* Turn off autodisconnect */
+	del_singleshot_timer_sync(&xprt->timer);
+}
+
+/**
+ * xprt_request_dequeue_receive_locked - Remove a request from the receive queue
+ * @task: RPC task
+ *
+ * Caller must hold xprt->queue_lock.
+ */
+static void
+xprt_request_dequeue_receive_locked(struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+
+	if (test_and_clear_bit(RPC_TASK_NEED_RECV, &task->tk_runstate))
+		xprt_request_rb_remove(req->rq_xprt, req);
 }
 
 /**
  * xprt_update_rtt - Update RPC RTT statistics
  * @task: RPC request that recently completed
  *
- * Caller holds xprt->recv_lock.
+ * Caller holds xprt->queue_lock.
  */
 void xprt_update_rtt(struct rpc_task *task)
 {
@@ -924,7 +1106,7 @@
  * @task: RPC request that recently completed
  * @copied: actual number of bytes received from the transport
  *
- * Caller holds xprt->recv_lock.
+ * Caller holds xprt->queue_lock.
  */
 void xprt_complete_rqst(struct rpc_task *task, int copied)
 {
@@ -937,12 +1119,12 @@
 
 	xprt->stat.recvs++;
 
-	list_del_init(&req->rq_list);
 	req->rq_private_buf.len = copied;
 	/* Ensure all writes are done before we update */
 	/* req->rq_reply_bytes_recvd */
 	smp_wmb();
 	req->rq_reply_bytes_recvd = copied;
+	xprt_request_dequeue_receive_locked(task);
 	rpc_wake_up_queued_task(&xprt->pending, task);
 }
 EXPORT_SYMBOL_GPL(xprt_complete_rqst);
@@ -964,6 +1146,249 @@
 }
 
 /**
+ * xprt_wait_for_reply_request_def - wait for reply
+ * @task: pointer to rpc_task
+ *
+ * Set a request's retransmit timeout based on the transport's
+ * default timeout parameters.  Used by transports that don't adjust
+ * the retransmit timeout based on round-trip time estimation,
+ * and put the task to sleep on the pending queue.
+ */
+void xprt_wait_for_reply_request_def(struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+
+	rpc_sleep_on_timeout(&req->rq_xprt->pending, task, xprt_timer,
+			xprt_request_timeout(req));
+}
+EXPORT_SYMBOL_GPL(xprt_wait_for_reply_request_def);
+
+/**
+ * xprt_wait_for_reply_request_rtt - wait for reply using RTT estimator
+ * @task: pointer to rpc_task
+ *
+ * Set a request's retransmit timeout using the RTT estimator,
+ * and put the task to sleep on the pending queue.
+ */
+void xprt_wait_for_reply_request_rtt(struct rpc_task *task)
+{
+	int timer = task->tk_msg.rpc_proc->p_timer;
+	struct rpc_clnt *clnt = task->tk_client;
+	struct rpc_rtt *rtt = clnt->cl_rtt;
+	struct rpc_rqst *req = task->tk_rqstp;
+	unsigned long max_timeout = clnt->cl_timeout->to_maxval;
+	unsigned long timeout;
+
+	timeout = rpc_calc_rto(rtt, timer);
+	timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries;
+	if (timeout > max_timeout || timeout == 0)
+		timeout = max_timeout;
+	rpc_sleep_on_timeout(&req->rq_xprt->pending, task, xprt_timer,
+			jiffies + timeout);
+}
+EXPORT_SYMBOL_GPL(xprt_wait_for_reply_request_rtt);
+
+/**
+ * xprt_request_wait_receive - wait for the reply to an RPC request
+ * @task: RPC task about to send a request
+ *
+ */
+void xprt_request_wait_receive(struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct rpc_xprt *xprt = req->rq_xprt;
+
+	if (!test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate))
+		return;
+	/*
+	 * Sleep on the pending queue if we're expecting a reply.
+	 * The spinlock ensures atomicity between the test of
+	 * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on().
+	 */
+	spin_lock(&xprt->queue_lock);
+	if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) {
+		xprt->ops->wait_for_reply_request(task);
+		/*
+		 * Send an extra queue wakeup call if the
+		 * connection was dropped in case the call to
+		 * rpc_sleep_on() raced.
+		 */
+		if (xprt_request_retransmit_after_disconnect(task))
+			rpc_wake_up_queued_task_set_status(&xprt->pending,
+					task, -ENOTCONN);
+	}
+	spin_unlock(&xprt->queue_lock);
+}
+
+static bool
+xprt_request_need_enqueue_transmit(struct rpc_task *task, struct rpc_rqst *req)
+{
+	return !test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
+}
+
+/**
+ * xprt_request_enqueue_transmit - queue a task for transmission
+ * @task: pointer to rpc_task
+ *
+ * Add a task to the transmission queue.
+ */
+void
+xprt_request_enqueue_transmit(struct rpc_task *task)
+{
+	struct rpc_rqst *pos, *req = task->tk_rqstp;
+	struct rpc_xprt *xprt = req->rq_xprt;
+
+	if (xprt_request_need_enqueue_transmit(task, req)) {
+		req->rq_bytes_sent = 0;
+		spin_lock(&xprt->queue_lock);
+		/*
+		 * Requests that carry congestion control credits are added
+		 * to the head of the list to avoid starvation issues.
+		 */
+		if (req->rq_cong) {
+			xprt_clear_congestion_window_wait(xprt);
+			list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
+				if (pos->rq_cong)
+					continue;
+				/* Note: req is added _before_ pos */
+				list_add_tail(&req->rq_xmit, &pos->rq_xmit);
+				INIT_LIST_HEAD(&req->rq_xmit2);
+				trace_xprt_enq_xmit(task, 1);
+				goto out;
+			}
+		} else if (RPC_IS_SWAPPER(task)) {
+			list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
+				if (pos->rq_cong || pos->rq_bytes_sent)
+					continue;
+				if (RPC_IS_SWAPPER(pos->rq_task))
+					continue;
+				/* Note: req is added _before_ pos */
+				list_add_tail(&req->rq_xmit, &pos->rq_xmit);
+				INIT_LIST_HEAD(&req->rq_xmit2);
+				trace_xprt_enq_xmit(task, 2);
+				goto out;
+			}
+		} else if (!req->rq_seqno) {
+			list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
+				if (pos->rq_task->tk_owner != task->tk_owner)
+					continue;
+				list_add_tail(&req->rq_xmit2, &pos->rq_xmit2);
+				INIT_LIST_HEAD(&req->rq_xmit);
+				trace_xprt_enq_xmit(task, 3);
+				goto out;
+			}
+		}
+		list_add_tail(&req->rq_xmit, &xprt->xmit_queue);
+		INIT_LIST_HEAD(&req->rq_xmit2);
+		trace_xprt_enq_xmit(task, 4);
+out:
+		set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
+		spin_unlock(&xprt->queue_lock);
+	}
+}
+
+/**
+ * xprt_request_dequeue_transmit_locked - remove a task from the transmission queue
+ * @task: pointer to rpc_task
+ *
+ * Remove a task from the transmission queue
+ * Caller must hold xprt->queue_lock
+ */
+static void
+xprt_request_dequeue_transmit_locked(struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+
+	if (!test_and_clear_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+		return;
+	if (!list_empty(&req->rq_xmit)) {
+		list_del(&req->rq_xmit);
+		if (!list_empty(&req->rq_xmit2)) {
+			struct rpc_rqst *next = list_first_entry(&req->rq_xmit2,
+					struct rpc_rqst, rq_xmit2);
+			list_del(&req->rq_xmit2);
+			list_add_tail(&next->rq_xmit, &next->rq_xprt->xmit_queue);
+		}
+	} else
+		list_del(&req->rq_xmit2);
+}
+
+/**
+ * xprt_request_dequeue_transmit - remove a task from the transmission queue
+ * @task: pointer to rpc_task
+ *
+ * Remove a task from the transmission queue
+ */
+static void
+xprt_request_dequeue_transmit(struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct rpc_xprt *xprt = req->rq_xprt;
+
+	spin_lock(&xprt->queue_lock);
+	xprt_request_dequeue_transmit_locked(task);
+	spin_unlock(&xprt->queue_lock);
+}
+
+/**
+ * xprt_request_dequeue_xprt - remove a task from the transmit+receive queue
+ * @task: pointer to rpc_task
+ *
+ * Remove a task from the transmit and receive queues, and ensure that
+ * it is not pinned by the receive work item.
+ */
+void
+xprt_request_dequeue_xprt(struct rpc_task *task)
+{
+	struct rpc_rqst	*req = task->tk_rqstp;
+	struct rpc_xprt *xprt = req->rq_xprt;
+
+	if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) ||
+	    test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) ||
+	    xprt_is_pinned_rqst(req)) {
+		spin_lock(&xprt->queue_lock);
+		xprt_request_dequeue_transmit_locked(task);
+		xprt_request_dequeue_receive_locked(task);
+		while (xprt_is_pinned_rqst(req)) {
+			set_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
+			spin_unlock(&xprt->queue_lock);
+			xprt_wait_on_pinned_rqst(req);
+			spin_lock(&xprt->queue_lock);
+			clear_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
+		}
+		spin_unlock(&xprt->queue_lock);
+	}
+}
+
+/**
+ * xprt_request_prepare - prepare an encoded request for transport
+ * @req: pointer to rpc_rqst
+ *
+ * Calls into the transport layer to do whatever is needed to prepare
+ * the request for transmission or receive.
+ */
+void
+xprt_request_prepare(struct rpc_rqst *req)
+{
+	struct rpc_xprt *xprt = req->rq_xprt;
+
+	if (xprt->ops->prepare_request)
+		xprt->ops->prepare_request(req);
+}
+
+/**
+ * xprt_request_need_retransmit - Test if a task needs retransmission
+ * @task: pointer to rpc_task
+ *
+ * Test for whether a connection breakage requires the task to retransmit
+ */
+bool
+xprt_request_need_retransmit(struct rpc_task *task)
+{
+	return xprt_request_retransmit_after_disconnect(task);
+}
+
+/**
  * xprt_prepare_transmit - reserve the transport before sending a request
  * @task: RPC task about to send a request
  *
@@ -972,32 +1397,18 @@
 {
 	struct rpc_rqst	*req = task->tk_rqstp;
 	struct rpc_xprt	*xprt = req->rq_xprt;
-	bool ret = false;
 
 	dprintk("RPC: %5u xprt_prepare_transmit\n", task->tk_pid);
 
-	spin_lock_bh(&xprt->transport_lock);
-	if (!req->rq_bytes_sent) {
-		if (req->rq_reply_bytes_recvd) {
-			task->tk_status = req->rq_reply_bytes_recvd;
-			goto out_unlock;
-		}
-		if ((task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT)
-		    && xprt_connected(xprt)
-		    && req->rq_connect_cookie == xprt->connect_cookie) {
-			xprt->ops->set_retrans_timeout(task);
-			rpc_sleep_on(&xprt->pending, task, xprt_timer);
-			goto out_unlock;
-		}
+	if (!xprt_lock_write(xprt, task)) {
+		/* Race breaker: someone may have transmitted us */
+		if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+			rpc_wake_up_queued_task_set_status(&xprt->sending,
+					task, 0);
+		return false;
+
 	}
-	if (!xprt->ops->reserve_xprt(xprt, task)) {
-		task->tk_status = -EAGAIN;
-		goto out_unlock;
-	}
-	ret = true;
-out_unlock:
-	spin_unlock_bh(&xprt->transport_lock);
-	return ret;
+	return true;
 }
 
 void xprt_end_transmit(struct rpc_task *task)
@@ -1006,81 +1417,115 @@
 }
 
 /**
- * xprt_transmit - send an RPC request on a transport
- * @task: controlling RPC task
+ * xprt_request_transmit - send an RPC request on a transport
+ * @req: pointer to request to transmit
+ * @snd_task: RPC task that owns the transport lock
  *
- * We have to copy the iovec because sendmsg fiddles with its contents.
+ * This performs the transmission of a single request.
+ * Note that if the request is not the same as snd_task, then it
+ * does need to be pinned.
+ * Returns '0' on success.
  */
-void xprt_transmit(struct rpc_task *task)
+static int
+xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
 {
-	struct rpc_rqst	*req = task->tk_rqstp;
-	struct rpc_xprt	*xprt = req->rq_xprt;
+	struct rpc_xprt *xprt = req->rq_xprt;
+	struct rpc_task *task = req->rq_task;
 	unsigned int connect_cookie;
+	int is_retrans = RPC_WAS_SENT(task);
 	int status;
 
-	dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
-
-	if (!req->rq_reply_bytes_recvd) {
-		if (list_empty(&req->rq_list) && rpc_reply_expected(task)) {
-			/*
-			 * Add to the list only if we're expecting a reply
-			 */
-			/* Update the softirq receive buffer */
-			memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
-					sizeof(req->rq_private_buf));
-			/* Add request to the receive list */
-			spin_lock(&xprt->recv_lock);
-			list_add_tail(&req->rq_list, &xprt->recv);
-			spin_unlock(&xprt->recv_lock);
-			xprt_reset_majortimeo(req);
-			/* Turn off autodisconnect */
-			del_singleshot_timer_sync(&xprt->timer);
+	if (!req->rq_bytes_sent) {
+		if (xprt_request_data_received(task)) {
+			status = 0;
+			goto out_dequeue;
 		}
-	} else if (!req->rq_bytes_sent)
-		return;
+		/* Verify that our message lies in the RPCSEC_GSS window */
+		if (rpcauth_xmit_need_reencode(task)) {
+			status = -EBADMSG;
+			goto out_dequeue;
+		}
+		if (RPC_SIGNALLED(task)) {
+			status = -ERESTARTSYS;
+			goto out_dequeue;
+		}
+	}
+
+	/*
+	 * Update req->rq_ntrans before transmitting to avoid races with
+	 * xprt_update_rtt(), which needs to know that it is recording a
+	 * reply to the first transmission.
+	 */
+	req->rq_ntrans++;
 
 	connect_cookie = xprt->connect_cookie;
-	status = xprt->ops->send_request(task);
-	trace_xprt_transmit(xprt, req->rq_xid, status);
+	status = xprt->ops->send_request(req);
 	if (status != 0) {
-		task->tk_status = status;
-		return;
+		req->rq_ntrans--;
+		trace_xprt_transmit(req, status);
+		return status;
 	}
+
+	if (is_retrans)
+		task->tk_client->cl_stats->rpcretrans++;
+
 	xprt_inject_disconnect(xprt);
 
-	dprintk("RPC: %5u xmit complete\n", task->tk_pid);
 	task->tk_flags |= RPC_TASK_SENT;
-	spin_lock_bh(&xprt->transport_lock);
-
-	xprt->ops->set_retrans_timeout(task);
+	spin_lock(&xprt->transport_lock);
 
 	xprt->stat.sends++;
 	xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
 	xprt->stat.bklog_u += xprt->backlog.qlen;
 	xprt->stat.sending_u += xprt->sending.qlen;
 	xprt->stat.pending_u += xprt->pending.qlen;
-	spin_unlock_bh(&xprt->transport_lock);
+	spin_unlock(&xprt->transport_lock);
 
 	req->rq_connect_cookie = connect_cookie;
-	if (rpc_reply_expected(task) && !READ_ONCE(req->rq_reply_bytes_recvd)) {
-		/*
-		 * Sleep on the pending queue if we're expecting a reply.
-		 * The spinlock ensures atomicity between the test of
-		 * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on().
-		 */
-		spin_lock(&xprt->recv_lock);
-		if (!req->rq_reply_bytes_recvd) {
-			rpc_sleep_on(&xprt->pending, task, xprt_timer);
-			/*
-			 * Send an extra queue wakeup call if the
-			 * connection was dropped in case the call to
-			 * rpc_sleep_on() raced.
-			 */
-			if (!xprt_connected(xprt))
-				xprt_wake_pending_tasks(xprt, -ENOTCONN);
-		}
-		spin_unlock(&xprt->recv_lock);
+out_dequeue:
+	trace_xprt_transmit(req, status);
+	xprt_request_dequeue_transmit(task);
+	rpc_wake_up_queued_task_set_status(&xprt->sending, task, status);
+	return status;
+}
+
+/**
+ * xprt_transmit - send an RPC request on a transport
+ * @task: controlling RPC task
+ *
+ * Attempts to drain the transmit queue. On exit, either the transport
+ * signalled an error that needs to be handled before transmission can
+ * resume, or @task finished transmitting, and detected that it already
+ * received a reply.
+ */
+void
+xprt_transmit(struct rpc_task *task)
+{
+	struct rpc_rqst *next, *req = task->tk_rqstp;
+	struct rpc_xprt	*xprt = req->rq_xprt;
+	int status;
+
+	spin_lock(&xprt->queue_lock);
+	while (!list_empty(&xprt->xmit_queue)) {
+		next = list_first_entry(&xprt->xmit_queue,
+				struct rpc_rqst, rq_xmit);
+		xprt_pin_rqst(next);
+		spin_unlock(&xprt->queue_lock);
+		status = xprt_request_transmit(next, task);
+		if (status == -EBADMSG && next != req)
+			status = 0;
+		cond_resched();
+		spin_lock(&xprt->queue_lock);
+		xprt_unpin_rqst(next);
+		if (status == 0) {
+			if (!xprt_request_data_received(task) ||
+			    test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+				continue;
+		} else if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+			task->tk_status = status;
+		break;
 	}
+	spin_unlock(&xprt->queue_lock);
 }
 
 static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
@@ -1177,20 +1622,6 @@
 }
 EXPORT_SYMBOL_GPL(xprt_alloc_slot);
 
-void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
-{
-	/* Note: grabbing the xprt_lock_write() ensures that we throttle
-	 * new slot allocation if the transport is congested (i.e. when
-	 * reconnecting a stream transport or when out of socket write
-	 * buffer space).
-	 */
-	if (xprt_lock_write(xprt, task)) {
-		xprt_alloc_slot(xprt, task);
-		xprt_release_write(xprt, task);
-	}
-}
-EXPORT_SYMBOL_GPL(xprt_lock_and_alloc_slot);
-
 void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
 {
 	spin_lock(&xprt->reserve_lock);
@@ -1257,6 +1688,60 @@
 }
 EXPORT_SYMBOL_GPL(xprt_free);
 
+static void
+xprt_init_connect_cookie(struct rpc_rqst *req, struct rpc_xprt *xprt)
+{
+	req->rq_connect_cookie = xprt_connect_cookie(xprt) - 1;
+}
+
+static __be32
+xprt_alloc_xid(struct rpc_xprt *xprt)
+{
+	__be32 xid;
+
+	spin_lock(&xprt->reserve_lock);
+	xid = (__force __be32)xprt->xid++;
+	spin_unlock(&xprt->reserve_lock);
+	return xid;
+}
+
+static void
+xprt_init_xid(struct rpc_xprt *xprt)
+{
+	xprt->xid = prandom_u32();
+}
+
+static void
+xprt_request_init(struct rpc_task *task)
+{
+	struct rpc_xprt *xprt = task->tk_xprt;
+	struct rpc_rqst	*req = task->tk_rqstp;
+
+	req->rq_task	= task;
+	req->rq_xprt    = xprt;
+	req->rq_buffer  = NULL;
+	req->rq_xid	= xprt_alloc_xid(xprt);
+	xprt_init_connect_cookie(req, xprt);
+	req->rq_snd_buf.len = 0;
+	req->rq_snd_buf.buflen = 0;
+	req->rq_rcv_buf.len = 0;
+	req->rq_rcv_buf.buflen = 0;
+	req->rq_snd_buf.bvec = NULL;
+	req->rq_rcv_buf.bvec = NULL;
+	req->rq_release_snd_buf = NULL;
+	xprt_init_majortimeo(task, req);
+	dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
+			req, ntohl(req->rq_xid));
+}
+
+static void
+xprt_do_reserve(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+	xprt->ops->alloc_slot(xprt, task);
+	if (task->tk_rqstp != NULL)
+		xprt_request_init(task);
+}
+
 /**
  * xprt_reserve - allocate an RPC request slot
  * @task: RPC task requesting a slot allocation
@@ -1273,10 +1758,9 @@
 	if (task->tk_rqstp != NULL)
 		return;
 
-	task->tk_timeout = 0;
 	task->tk_status = -EAGAIN;
 	if (!xprt_throttle_congested(xprt, task))
-		xprt->ops->alloc_slot(xprt, task);
+		xprt_do_reserve(xprt, task);
 }
 
 /**
@@ -1296,47 +1780,8 @@
 	if (task->tk_rqstp != NULL)
 		return;
 
-	task->tk_timeout = 0;
 	task->tk_status = -EAGAIN;
-	xprt->ops->alloc_slot(xprt, task);
-}
-
-static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
-{
-	__be32 xid;
-
-	spin_lock(&xprt->reserve_lock);
-	xid = (__force __be32)xprt->xid++;
-	spin_unlock(&xprt->reserve_lock);
-	return xid;
-}
-
-static inline void xprt_init_xid(struct rpc_xprt *xprt)
-{
-	xprt->xid = prandom_u32();
-}
-
-void xprt_request_init(struct rpc_task *task)
-{
-	struct rpc_xprt *xprt = task->tk_xprt;
-	struct rpc_rqst	*req = task->tk_rqstp;
-
-	INIT_LIST_HEAD(&req->rq_list);
-	req->rq_timeout = task->tk_client->cl_timeout->to_initval;
-	req->rq_task	= task;
-	req->rq_xprt    = xprt;
-	req->rq_buffer  = NULL;
-	req->rq_xid	= xprt_alloc_xid(xprt);
-	req->rq_connect_cookie = xprt->connect_cookie - 1;
-	req->rq_bytes_sent = 0;
-	req->rq_snd_buf.len = 0;
-	req->rq_snd_buf.buflen = 0;
-	req->rq_rcv_buf.len = 0;
-	req->rq_rcv_buf.buflen = 0;
-	req->rq_release_snd_buf = NULL;
-	xprt_reset_majortimeo(req);
-	dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
-			req, ntohl(req->rq_xid));
+	xprt_do_reserve(xprt, task);
 }
 
 /**
@@ -1352,33 +1797,24 @@
 	if (req == NULL) {
 		if (task->tk_client) {
 			xprt = task->tk_xprt;
-			if (xprt->snd_task == task)
-				xprt_release_write(xprt, task);
+			xprt_release_write(xprt, task);
 		}
 		return;
 	}
 
 	xprt = req->rq_xprt;
-	if (task->tk_ops->rpc_count_stats != NULL)
-		task->tk_ops->rpc_count_stats(task, task->tk_calldata);
-	else if (task->tk_client)
-		rpc_count_iostats(task, task->tk_client->cl_metrics);
-	spin_lock(&xprt->recv_lock);
-	if (!list_empty(&req->rq_list)) {
-		list_del_init(&req->rq_list);
-		xprt_wait_on_pinned_rqst(req);
-	}
-	spin_unlock(&xprt->recv_lock);
-	spin_lock_bh(&xprt->transport_lock);
+	xprt_request_dequeue_xprt(task);
+	spin_lock(&xprt->transport_lock);
 	xprt->ops->release_xprt(xprt, task);
 	if (xprt->ops->release_request)
 		xprt->ops->release_request(task);
-	xprt->last_used = jiffies;
 	xprt_schedule_autodisconnect(xprt);
-	spin_unlock_bh(&xprt->transport_lock);
+	spin_unlock(&xprt->transport_lock);
 	if (req->rq_buffer)
 		xprt->ops->buf_free(task);
 	xprt_inject_disconnect(xprt);
+	xdr_free_bvec(&req->rq_rcv_buf);
+	xdr_free_bvec(&req->rq_snd_buf);
 	if (req->rq_cred != NULL)
 		put_rpccred(req->rq_cred);
 	task->tk_rqstp = NULL;
@@ -1392,16 +1828,35 @@
 		xprt_free_bc_request(req);
 }
 
+#ifdef CONFIG_SUNRPC_BACKCHANNEL
+void
+xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task)
+{
+	struct xdr_buf *xbufp = &req->rq_snd_buf;
+
+	task->tk_rqstp = req;
+	req->rq_task = task;
+	xprt_init_connect_cookie(req, req->rq_xprt);
+	/*
+	 * Set up the xdr_buf length.
+	 * This also indicates that the buffer is XDR encoded already.
+	 */
+	xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
+		xbufp->tail[0].iov_len;
+}
+#endif
+
 static void xprt_init(struct rpc_xprt *xprt, struct net *net)
 {
 	kref_init(&xprt->kref);
 
 	spin_lock_init(&xprt->transport_lock);
 	spin_lock_init(&xprt->reserve_lock);
-	spin_lock_init(&xprt->recv_lock);
+	spin_lock_init(&xprt->queue_lock);
 
 	INIT_LIST_HEAD(&xprt->free);
-	INIT_LIST_HEAD(&xprt->recv);
+	xprt->recv_queue = RB_ROOT;
+	INIT_LIST_HEAD(&xprt->xmit_queue);
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 	spin_lock_init(&xprt->bc_pa_lock);
 	INIT_LIST_HEAD(&xprt->bc_pa_list);
@@ -1414,7 +1869,7 @@
 
 	rpc_init_wait_queue(&xprt->binding, "xprt_binding");
 	rpc_init_wait_queue(&xprt->pending, "xprt_pending");
-	rpc_init_priority_wait_queue(&xprt->sending, "xprt_sending");
+	rpc_init_wait_queue(&xprt->sending, "xprt_sending");
 	rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");
 
 	xprt_init_xid(xprt);
@@ -1488,6 +1943,11 @@
 	rpc_destroy_wait_queue(&xprt->backlog);
 	kfree(xprt->servername);
 	/*
+	 * Destroy any existing back channel
+	 */
+	xprt_destroy_backchannel(xprt, UINT_MAX);
+
+	/*
 	 * Tear down transport state and free the rpc_xprt
 	 */
 	xprt->ops->destroy(xprt);

diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index e2d64c7..78c075a 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c

@@ -19,7 +19,7 @@
 #include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/xprtmultipath.h>
 
-typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct list_head *head,
+typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct rpc_xprt_switch *xps,
 		const struct rpc_xprt *cur);
 
 static const struct rpc_xprt_iter_ops rpc_xprt_iter_singular;
@@ -36,6 +36,7 @@
 	if (xps->xps_nxprts == 0)
 		xps->xps_net = xprt->xprt_net;
 	xps->xps_nxprts++;
+	xps->xps_nactive++;
 }
 
 /**
@@ -51,8 +52,7 @@
 	if (xprt == NULL)
 		return;
 	spin_lock(&xps->xps_lock);
-	if ((xps->xps_net == xprt->xprt_net || xps->xps_net == NULL) &&
-	    !rpc_xprt_switch_has_addr(xps, (struct sockaddr *)&xprt->addr))
+	if (xps->xps_net == xprt->xprt_net || xps->xps_net == NULL)
 		xprt_switch_add_xprt_locked(xps, xprt);
 	spin_unlock(&xps->xps_lock);
 }
@@ -62,6 +62,7 @@
 {
 	if (unlikely(xprt == NULL))
 		return;
+	xps->xps_nactive--;
 	xps->xps_nxprts--;
 	if (xps->xps_nxprts == 0)
 		xps->xps_net = NULL;
@@ -102,7 +103,9 @@
 	if (xps != NULL) {
 		spin_lock_init(&xps->xps_lock);
 		kref_init(&xps->xps_kref);
-		xps->xps_nxprts = 0;
+		xps->xps_nxprts = xps->xps_nactive = 0;
+		atomic_long_set(&xps->xps_queuelen, 0);
+		xps->xps_net = NULL;
 		INIT_LIST_HEAD(&xps->xps_xprt_list);
 		xps->xps_iter_ops = &rpc_xprt_iter_singular;
 		xprt_switch_add_xprt_locked(xps, xprt);
@@ -193,9 +196,21 @@
 }
 
 static
+bool xprt_is_active(const struct rpc_xprt *xprt)
+{
+	return kref_read(&xprt->kref) != 0;
+}
+
+static
 struct rpc_xprt *xprt_switch_find_first_entry(struct list_head *head)
 {
-	return list_first_or_null_rcu(head, struct rpc_xprt, xprt_switch);
+	struct rpc_xprt *pos;
+
+	list_for_each_entry_rcu(pos, head, xprt_switch) {
+		if (xprt_is_active(pos))
+			return pos;
+	}
+	return NULL;
 }
 
 static
@@ -213,9 +228,12 @@
 		const struct rpc_xprt *cur)
 {
 	struct rpc_xprt *pos;
+	bool found = false;
 
 	list_for_each_entry_rcu(pos, head, xprt_switch) {
 		if (cur == pos)
+			found = true;
+		if (found && xprt_is_active(pos))
 			return pos;
 	}
 	return NULL;
@@ -260,9 +278,12 @@
 		const struct rpc_xprt *cur)
 {
 	struct rpc_xprt *pos, *prev = NULL;
+	bool found = false;
 
 	list_for_each_entry_rcu(pos, head, xprt_switch) {
 		if (cur == prev)
+			found = true;
+		if (found && xprt_is_active(pos))
 			return pos;
 		prev = pos;
 	}
@@ -270,22 +291,15 @@
 }
 
 static
-struct rpc_xprt *xprt_switch_set_next_cursor(struct list_head *head,
+struct rpc_xprt *xprt_switch_set_next_cursor(struct rpc_xprt_switch *xps,
 		struct rpc_xprt **cursor,
 		xprt_switch_find_xprt_t find_next)
 {
-	struct rpc_xprt *cur, *pos, *old;
+	struct rpc_xprt *pos, *old;
 
-	cur = READ_ONCE(*cursor);
-	for (;;) {
-		old = cur;
-		pos = find_next(head, old);
-		if (pos == NULL)
-			break;
-		cur = cmpxchg_relaxed(cursor, old, pos);
-		if (cur == old)
-			break;
-	}
+	old = smp_load_acquire(cursor);
+	pos = find_next(xps, old);
+	smp_store_release(cursor, pos);
 	return pos;
 }
 
@@ -297,13 +311,11 @@
 
 	if (xps == NULL)
 		return NULL;
-	return xprt_switch_set_next_cursor(&xps->xps_xprt_list,
-			&xpi->xpi_cursor,
-			find_next);
+	return xprt_switch_set_next_cursor(xps, &xpi->xpi_cursor, find_next);
 }
 
 static
-struct rpc_xprt *xprt_switch_find_next_entry_roundrobin(struct list_head *head,
+struct rpc_xprt *__xprt_switch_find_next_entry_roundrobin(struct list_head *head,
 		const struct rpc_xprt *cur)
 {
 	struct rpc_xprt *ret;
@@ -315,6 +327,31 @@
 }
 
 static
+struct rpc_xprt *xprt_switch_find_next_entry_roundrobin(struct rpc_xprt_switch *xps,
+		const struct rpc_xprt *cur)
+{
+	struct list_head *head = &xps->xps_xprt_list;
+	struct rpc_xprt *xprt;
+	unsigned int nactive;
+
+	for (;;) {
+		unsigned long xprt_queuelen, xps_queuelen;
+
+		xprt = __xprt_switch_find_next_entry_roundrobin(head, cur);
+		if (!xprt)
+			break;
+		xprt_queuelen = atomic_long_read(&xprt->queuelen);
+		xps_queuelen = atomic_long_read(&xps->xps_queuelen);
+		nactive = READ_ONCE(xps->xps_nactive);
+		/* Exit loop if xprt_queuelen <= average queue length */
+		if (xprt_queuelen * nactive <= xps_queuelen)
+			break;
+		cur = xprt;
+	}
+	return xprt;
+}
+
+static
 struct rpc_xprt *xprt_iter_next_entry_roundrobin(struct rpc_xprt_iter *xpi)
 {
 	return xprt_iter_next_entry_multiple(xpi,
@@ -322,9 +359,17 @@
 }
 
 static
+struct rpc_xprt *xprt_switch_find_next_entry_all(struct rpc_xprt_switch *xps,
+		const struct rpc_xprt *cur)
+{
+	return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur);
+}
+
+static
 struct rpc_xprt *xprt_iter_next_entry_all(struct rpc_xprt_iter *xpi)
 {
-	return xprt_iter_next_entry_multiple(xpi, xprt_switch_find_next_entry);
+	return xprt_iter_next_entry_multiple(xpi,
+			xprt_switch_find_next_entry_all);
 }
 
 /*
@@ -383,7 +428,7 @@
 /**
  * xprt_iter_xchg_switch - Atomically swap out the rpc_xprt_switch
  * @xpi: pointer to rpc_xprt_iter
- * @xps: pointer to a new rpc_xprt_switch or NULL
+ * @newswitch: pointer to a new rpc_xprt_switch or NULL
  *
  * Swaps out the existing xpi->xpi_xpswitch with a new value.
  */
@@ -401,7 +446,7 @@
 
 /**
  * xprt_iter_destroy - Destroys the xprt iterator
- * @xpi pointer to rpc_xprt_iter
+ * @xpi: pointer to rpc_xprt_iter
  */
 void xprt_iter_destroy(struct rpc_xprt_iter *xpi)
 {

diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index 8bf19e1..8ed0377 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile

@@ -1,8 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
 
-rpcrdma-y := transport.o rpc_rdma.o verbs.o \
-	fmr_ops.o frwr_ops.o \
+rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o \
 	svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
 	svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \
 	module.o

diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 90adeff..b458bf5 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c

@@ -5,7 +5,6 @@
  * Support for backward direction RPCs on RPC/RDMA.
  */
 
-#include <linux/module.h>
 #include <linux/sunrpc/xprt.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/svc_xprt.h>
@@ -20,59 +19,6 @@
 
 #undef RPCRDMA_BACKCHANNEL_DEBUG
 
-static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt,
-				 struct rpc_rqst *rqst)
-{
-	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
-	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
-
-	spin_lock(&buf->rb_reqslock);
-	list_del(&req->rl_all);
-	spin_unlock(&buf->rb_reqslock);
-
-	rpcrdma_destroy_req(req);
-}
-
-static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt,
-				 unsigned int count)
-{
-	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
-	struct rpc_rqst *rqst;
-	unsigned int i;
-
-	for (i = 0; i < (count << 1); i++) {
-		struct rpcrdma_regbuf *rb;
-		struct rpcrdma_req *req;
-		size_t size;
-
-		req = rpcrdma_create_req(r_xprt);
-		if (IS_ERR(req))
-			return PTR_ERR(req);
-		rqst = &req->rl_slot;
-
-		rqst->rq_xprt = xprt;
-		INIT_LIST_HEAD(&rqst->rq_list);
-		INIT_LIST_HEAD(&rqst->rq_bc_list);
-		__set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
-		spin_lock_bh(&xprt->bc_pa_lock);
-		list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
-		spin_unlock_bh(&xprt->bc_pa_lock);
-
-		size = r_xprt->rx_data.inline_rsize;
-		rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
-		if (IS_ERR(rb))
-			goto out_fail;
-		req->rl_sendbuf = rb;
-		xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base,
-			     min_t(size_t, size, PAGE_SIZE));
-	}
-	return 0;
-
-out_fail:
-	rpcrdma_bc_free_rqst(r_xprt, rqst);
-	return -ENOMEM;
-}
-
 /**
  * xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests
  * @xprt: transport associated with these backchannel resources
@@ -83,55 +29,10 @@
 int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
 {
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
-	int rc;
 
-	/* The backchannel reply path returns each rpc_rqst to the
-	 * bc_pa_list _after_ the reply is sent. If the server is
-	 * faster than the client, it can send another backward
-	 * direction request before the rpc_rqst is returned to the
-	 * list. The client rejects the request in this case.
-	 *
-	 * Twice as many rpc_rqsts are prepared to ensure there is
-	 * always an rpc_rqst available as soon as a reply is sent.
-	 */
-	if (reqs > RPCRDMA_BACKWARD_WRS >> 1)
-		goto out_err;
-
-	rc = rpcrdma_bc_setup_reqs(r_xprt, reqs);
-	if (rc)
-		goto out_free;
-
-	r_xprt->rx_buf.rb_bc_srv_max_requests = reqs;
-	request_module("svcrdma");
+	r_xprt->rx_buf.rb_bc_srv_max_requests = RPCRDMA_BACKWARD_WRS >> 1;
 	trace_xprtrdma_cb_setup(r_xprt, reqs);
 	return 0;
-
-out_free:
-	xprt_rdma_bc_destroy(xprt, reqs);
-
-out_err:
-	pr_err("RPC:       %s: setup backchannel transport failed\n", __func__);
-	return -ENOMEM;
-}
-
-/**
- * xprt_rdma_bc_up - Create transport endpoint for backchannel service
- * @serv: server endpoint
- * @net: network namespace
- *
- * The "xprt" is an implied argument: it supplies the name of the
- * backchannel transport class.
- *
- * Returns zero on success, negative errno on failure
- */
-int xprt_rdma_bc_up(struct svc_serv *serv, struct net *net)
-{
-	int ret;
-
-	ret = svc_create_xprt(serv, "rdma-bc", net, PF_INET, 0, 0);
-	if (ret < 0)
-		return ret;
-	return 0;
 }
 
 /**
@@ -143,14 +44,19 @@
 size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
 {
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
-	struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
+	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
 	size_t maxmsg;
 
-	maxmsg = min_t(unsigned int, cdata->inline_rsize, cdata->inline_wsize);
+	maxmsg = min_t(unsigned int, ep->rep_inline_send, ep->rep_inline_recv);
 	maxmsg = min_t(unsigned int, maxmsg, PAGE_SIZE);
 	return maxmsg - RPCRDMA_HDRLEN_MIN;
 }
 
+unsigned int xprt_rdma_bc_max_slots(struct rpc_xprt *xprt)
+{
+	return RPCRDMA_BACKWARD_WRS >> 1;
+}
+
 static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
 {
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
@@ -159,7 +65,7 @@
 
 	rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
 	xdr_init_encode(&req->rl_stream, &req->rl_hdrbuf,
-			req->rl_rdmabuf->rg_base);
+			rdmab_data(req->rl_rdmabuf), rqst);
 
 	p = xdr_reserve_space(&req->rl_stream, 28);
 	if (unlikely(!p))
@@ -194,18 +100,21 @@
  */
 int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
 {
-	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
+	struct rpc_xprt *xprt = rqst->rq_xprt;
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
 	int rc;
 
-	if (!xprt_connected(rqst->rq_xprt))
-		goto drop_connection;
+	if (!xprt_connected(xprt))
+		return -ENOTCONN;
+
+	if (!xprt_request_get_cong(xprt, rqst))
+		return -EBADSLT;
 
 	rc = rpcrdma_bc_marshal_reply(rqst);
 	if (rc < 0)
 		goto failed_marshal;
 
-	rpcrdma_post_recvs(r_xprt, true);
 	if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
 		goto drop_connection;
 	return 0;
@@ -214,7 +123,7 @@
 	if (rc != -ENOTCONN)
 		return rc;
 drop_connection:
-	xprt_disconnect_done(rqst->rq_xprt);
+	xprt_rdma_close(xprt);
 	return -ENOTCONN;
 }
 
@@ -225,19 +134,18 @@
  */
 void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs)
 {
-	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 	struct rpc_rqst *rqst, *tmp;
 
-	spin_lock_bh(&xprt->bc_pa_lock);
+	spin_lock(&xprt->bc_pa_lock);
 	list_for_each_entry_safe(rqst, tmp, &xprt->bc_pa_list, rq_bc_pa_list) {
 		list_del(&rqst->rq_bc_pa_list);
-		spin_unlock_bh(&xprt->bc_pa_lock);
+		spin_unlock(&xprt->bc_pa_lock);
 
-		rpcrdma_bc_free_rqst(r_xprt, rqst);
+		rpcrdma_req_destroy(rpcr_to_rdmar(rqst));
 
-		spin_lock_bh(&xprt->bc_pa_lock);
+		spin_lock(&xprt->bc_pa_lock);
 	}
-	spin_unlock_bh(&xprt->bc_pa_lock);
+	spin_unlock(&xprt->bc_pa_lock);
 }
 
 /**
@@ -249,15 +157,50 @@
 	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
 	struct rpc_xprt *xprt = rqst->rq_xprt;
 
-	dprintk("RPC:       %s: freeing rqst %p (req %p)\n",
-		__func__, rqst, req);
-
 	rpcrdma_recv_buffer_put(req->rl_reply);
 	req->rl_reply = NULL;
 
-	spin_lock_bh(&xprt->bc_pa_lock);
+	spin_lock(&xprt->bc_pa_lock);
 	list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
-	spin_unlock_bh(&xprt->bc_pa_lock);
+	spin_unlock(&xprt->bc_pa_lock);
+	xprt_put(xprt);
+}
+
+static struct rpc_rqst *rpcrdma_bc_rqst_get(struct rpcrdma_xprt *r_xprt)
+{
+	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+	struct rpcrdma_req *req;
+	struct rpc_rqst *rqst;
+	size_t size;
+
+	spin_lock(&xprt->bc_pa_lock);
+	rqst = list_first_entry_or_null(&xprt->bc_pa_list, struct rpc_rqst,
+					rq_bc_pa_list);
+	if (!rqst)
+		goto create_req;
+	list_del(&rqst->rq_bc_pa_list);
+	spin_unlock(&xprt->bc_pa_lock);
+	return rqst;
+
+create_req:
+	spin_unlock(&xprt->bc_pa_lock);
+
+	/* Set a limit to prevent a remote from overrunning our resources.
+	 */
+	if (xprt->bc_alloc_count >= RPCRDMA_BACKWARD_WRS)
+		return NULL;
+
+	size = min_t(size_t, r_xprt->rx_ep.rep_inline_recv, PAGE_SIZE);
+	req = rpcrdma_req_create(r_xprt, size, GFP_KERNEL);
+	if (!req)
+		return NULL;
+
+	xprt->bc_alloc_count++;
+	rqst = &req->rl_slot;
+	rqst->rq_xprt = xprt;
+	__set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
+	xdr_buf_init(&rqst->rq_snd_buf, rdmab_data(req->rl_sendbuf), size);
+	return rqst;
 }
 
 /**
@@ -291,20 +234,11 @@
 	pr_info("RPC:       %s: %*ph\n", __func__, size, p);
 #endif
 
-	/* Grab a free bc rqst */
-	spin_lock(&xprt->bc_pa_lock);
-	if (list_empty(&xprt->bc_pa_list)) {
-		spin_unlock(&xprt->bc_pa_lock);
+	rqst = rpcrdma_bc_rqst_get(r_xprt);
+	if (!rqst)
 		goto out_overflow;
-	}
-	rqst = list_first_entry(&xprt->bc_pa_list,
-				struct rpc_rqst, rq_bc_pa_list);
-	list_del(&rqst->rq_bc_pa_list);
-	spin_unlock(&xprt->bc_pa_lock);
 
-	/* Prepare rqst */
 	rqst->rq_reply_bytes_recvd = 0;
-	rqst->rq_bytes_sent = 0;
 	rqst->rq_xid = *p;
 
 	rqst->rq_private_buf.len = size;
@@ -326,6 +260,7 @@
 
 	/* Queue rqst for ULP's callback service */
 	bc_serv = xprt->bc_serv;
+	xprt_get(xprt);
 	spin_lock(&bc_serv->sv_cb_lock);
 	list_add(&rqst->rq_bc_list, &bc_serv->sv_cb_list);
 	spin_unlock(&bc_serv->sv_cb_lock);
@@ -337,7 +272,7 @@
 
 out_overflow:
 	pr_warn("RPC/RDMA backchannel overflow\n");
-	xprt_disconnect_done(xprt);
+	xprt_force_disconnect(xprt);
 	/* This receive buffer gets reposted automatically
 	 * when the connection is re-established.
 	 */

diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
deleted file mode 100644
index 0f7c465..0000000
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ /dev/null

@@ -1,348 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2015, 2017 Oracle.  All rights reserved.
- * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
- */
-
-/* Lightweight memory registration using Fast Memory Regions (FMR).
- * Referred to sometimes as MTHCAFMR mode.
- *
- * FMR uses synchronous memory registration and deregistration.
- * FMR registration is known to be fast, but FMR deregistration
- * can take tens of usecs to complete.
- */
-
-/* Normal operation
- *
- * A Memory Region is prepared for RDMA READ or WRITE using the
- * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is
- * finished, the Memory Region is unmapped using the ib_unmap_fmr
- * verb (fmr_op_unmap).
- */
-
-#include <linux/sunrpc/svc_rdma.h>
-
-#include "xprt_rdma.h"
-#include <trace/events/rpcrdma.h>
-
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY	RPCDBG_TRANS
-#endif
-
-/* Maximum scatter/gather per FMR */
-#define RPCRDMA_MAX_FMR_SGES	(64)
-
-/* Access mode of externally registered pages */
-enum {
-	RPCRDMA_FMR_ACCESS_FLAGS	= IB_ACCESS_REMOTE_WRITE |
-					  IB_ACCESS_REMOTE_READ,
-};
-
-bool
-fmr_is_supported(struct rpcrdma_ia *ia)
-{
-	if (!ia->ri_device->alloc_fmr) {
-		pr_info("rpcrdma: 'fmr' mode is not supported by device %s\n",
-			ia->ri_device->name);
-		return false;
-	}
-	return true;
-}
-
-static int
-fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
-{
-	static struct ib_fmr_attr fmr_attr = {
-		.max_pages	= RPCRDMA_MAX_FMR_SGES,
-		.max_maps	= 1,
-		.page_shift	= PAGE_SHIFT
-	};
-
-	mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
-				       sizeof(u64), GFP_KERNEL);
-	if (!mr->fmr.fm_physaddrs)
-		goto out_free;
-
-	mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
-			    sizeof(*mr->mr_sg), GFP_KERNEL);
-	if (!mr->mr_sg)
-		goto out_free;
-
-	sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES);
-
-	mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
-				     &fmr_attr);
-	if (IS_ERR(mr->fmr.fm_mr))
-		goto out_fmr_err;
-
-	INIT_LIST_HEAD(&mr->mr_list);
-	return 0;
-
-out_fmr_err:
-	dprintk("RPC:       %s: ib_alloc_fmr returned %ld\n", __func__,
-		PTR_ERR(mr->fmr.fm_mr));
-
-out_free:
-	kfree(mr->mr_sg);
-	kfree(mr->fmr.fm_physaddrs);
-	return -ENOMEM;
-}
-
-static int
-__fmr_unmap(struct rpcrdma_mr *mr)
-{
-	LIST_HEAD(l);
-	int rc;
-
-	list_add(&mr->fmr.fm_mr->list, &l);
-	rc = ib_unmap_fmr(&l);
-	list_del(&mr->fmr.fm_mr->list);
-	return rc;
-}
-
-static void
-fmr_op_release_mr(struct rpcrdma_mr *mr)
-{
-	LIST_HEAD(unmap_list);
-	int rc;
-
-	kfree(mr->fmr.fm_physaddrs);
-	kfree(mr->mr_sg);
-
-	/* In case this one was left mapped, try to unmap it
-	 * to prevent dealloc_fmr from failing with EBUSY
-	 */
-	rc = __fmr_unmap(mr);
-	if (rc)
-		pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
-		       mr, rc);
-
-	rc = ib_dealloc_fmr(mr->fmr.fm_mr);
-	if (rc)
-		pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n",
-		       mr, rc);
-
-	kfree(mr);
-}
-
-/* Reset of a single FMR.
- */
-static void
-fmr_op_recover_mr(struct rpcrdma_mr *mr)
-{
-	struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
-	int rc;
-
-	/* ORDER: invalidate first */
-	rc = __fmr_unmap(mr);
-	if (rc)
-		goto out_release;
-
-	/* ORDER: then DMA unmap */
-	rpcrdma_mr_unmap_and_put(mr);
-
-	r_xprt->rx_stats.mrs_recovered++;
-	return;
-
-out_release:
-	pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr);
-	r_xprt->rx_stats.mrs_orphaned++;
-
-	trace_xprtrdma_dma_unmap(mr);
-	ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
-			mr->mr_sg, mr->mr_nents, mr->mr_dir);
-
-	spin_lock(&r_xprt->rx_buf.rb_mrlock);
-	list_del(&mr->mr_all);
-	spin_unlock(&r_xprt->rx_buf.rb_mrlock);
-
-	fmr_op_release_mr(mr);
-}
-
-/* On success, sets:
- *	ep->rep_attr.cap.max_send_wr
- *	ep->rep_attr.cap.max_recv_wr
- *	cdata->max_requests
- *	ia->ri_max_segs
- */
-static int
-fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
-	    struct rpcrdma_create_data_internal *cdata)
-{
-	int max_qp_wr;
-
-	max_qp_wr = ia->ri_device->attrs.max_qp_wr;
-	max_qp_wr -= RPCRDMA_BACKWARD_WRS;
-	max_qp_wr -= 1;
-	if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
-		return -ENOMEM;
-	if (cdata->max_requests > max_qp_wr)
-		cdata->max_requests = max_qp_wr;
-	ep->rep_attr.cap.max_send_wr = cdata->max_requests;
-	ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
-	ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
-	ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
-	ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
-	ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
-
-	ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
-				RPCRDMA_MAX_FMR_SGES);
-	return 0;
-}
-
-/* FMR mode conveys up to 64 pages of payload per chunk segment.
- */
-static size_t
-fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
-{
-	return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
-		     RPCRDMA_MAX_HDR_SEGS * RPCRDMA_MAX_FMR_SGES);
-}
-
-/* Use the ib_map_phys_fmr() verb to register a memory region
- * for remote access via RDMA READ or RDMA WRITE.
- */
-static struct rpcrdma_mr_seg *
-fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
-	   int nsegs, bool writing, struct rpcrdma_mr **out)
-{
-	struct rpcrdma_mr_seg *seg1 = seg;
-	int len, pageoff, i, rc;
-	struct rpcrdma_mr *mr;
-	u64 *dma_pages;
-
-	mr = rpcrdma_mr_get(r_xprt);
-	if (!mr)
-		return ERR_PTR(-EAGAIN);
-
-	pageoff = offset_in_page(seg1->mr_offset);
-	seg1->mr_offset -= pageoff;	/* start of page */
-	seg1->mr_len += pageoff;
-	len = -pageoff;
-	if (nsegs > RPCRDMA_MAX_FMR_SGES)
-		nsegs = RPCRDMA_MAX_FMR_SGES;
-	for (i = 0; i < nsegs;) {
-		if (seg->mr_page)
-			sg_set_page(&mr->mr_sg[i],
-				    seg->mr_page,
-				    seg->mr_len,
-				    offset_in_page(seg->mr_offset));
-		else
-			sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
-				   seg->mr_len);
-		len += seg->mr_len;
-		++seg;
-		++i;
-		/* Check for holes */
-		if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
-		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
-			break;
-	}
-	mr->mr_dir = rpcrdma_data_dir(writing);
-
-	mr->mr_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
-				     mr->mr_sg, i, mr->mr_dir);
-	if (!mr->mr_nents)
-		goto out_dmamap_err;
-	trace_xprtrdma_dma_map(mr);
-
-	for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
-		dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
-	rc = ib_map_phys_fmr(mr->fmr.fm_mr, dma_pages, mr->mr_nents,
-			     dma_pages[0]);
-	if (rc)
-		goto out_maperr;
-
-	mr->mr_handle = mr->fmr.fm_mr->rkey;
-	mr->mr_length = len;
-	mr->mr_offset = dma_pages[0] + pageoff;
-
-	*out = mr;
-	return seg;
-
-out_dmamap_err:
-	pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
-	       mr->mr_sg, i);
-	rpcrdma_mr_put(mr);
-	return ERR_PTR(-EIO);
-
-out_maperr:
-	pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
-	       len, (unsigned long long)dma_pages[0],
-	       pageoff, mr->mr_nents, rc);
-	rpcrdma_mr_unmap_and_put(mr);
-	return ERR_PTR(-EIO);
-}
-
-/* Post Send WR containing the RPC Call message.
- */
-static int
-fmr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
-{
-	return ib_post_send(ia->ri_id->qp, &req->rl_sendctx->sc_wr, NULL);
-}
-
-/* Invalidate all memory regions that were registered for "req".
- *
- * Sleeps until it is safe for the host CPU to access the
- * previously mapped memory regions.
- *
- * Caller ensures that @mrs is not empty before the call. This
- * function empties the list.
- */
-static void
-fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
-{
-	struct rpcrdma_mr *mr;
-	LIST_HEAD(unmap_list);
-	int rc;
-
-	/* ORDER: Invalidate all of the req's MRs first
-	 *
-	 * ib_unmap_fmr() is slow, so use a single call instead
-	 * of one call per mapped FMR.
-	 */
-	list_for_each_entry(mr, mrs, mr_list) {
-		dprintk("RPC:       %s: unmapping fmr %p\n",
-			__func__, &mr->fmr);
-		trace_xprtrdma_localinv(mr);
-		list_add_tail(&mr->fmr.fm_mr->list, &unmap_list);
-	}
-	r_xprt->rx_stats.local_inv_needed++;
-	rc = ib_unmap_fmr(&unmap_list);
-	if (rc)
-		goto out_reset;
-
-	/* ORDER: Now DMA unmap all of the req's MRs, and return
-	 * them to the free MW list.
-	 */
-	while (!list_empty(mrs)) {
-		mr = rpcrdma_mr_pop(mrs);
-		list_del(&mr->fmr.fm_mr->list);
-		rpcrdma_mr_unmap_and_put(mr);
-	}
-
-	return;
-
-out_reset:
-	pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
-
-	while (!list_empty(mrs)) {
-		mr = rpcrdma_mr_pop(mrs);
-		list_del(&mr->fmr.fm_mr->list);
-		fmr_op_recover_mr(mr);
-	}
-}
-
-const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
-	.ro_map				= fmr_op_map,
-	.ro_send			= fmr_op_send,
-	.ro_unmap_sync			= fmr_op_unmap_sync,
-	.ro_recover_mr			= fmr_op_recover_mr,
-	.ro_open			= fmr_op_open,
-	.ro_maxpages			= fmr_op_maxpages,
-	.ro_init_mr			= fmr_op_init_mr,
-	.ro_release_mr			= fmr_op_release_mr,
-	.ro_displayname			= "fmr",
-	.ro_send_w_inv_ok		= 0,
-};

diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 1bb00dd..30065a2 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c

@@ -7,67 +7,37 @@
 /* Lightweight memory registration using Fast Registration Work
  * Requests (FRWR).
  *
- * FRWR features ordered asynchronous registration and deregistration
- * of arbitrarily sized memory regions. This is the fastest and safest
+ * FRWR features ordered asynchronous registration and invalidation
+ * of arbitrarily-sized memory regions. This is the fastest and safest
  * but most complex memory registration mode.
  */
 
 /* Normal operation
  *
- * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG
- * Work Request (frwr_op_map). When the RDMA operation is finished, this
+ * A Memory Region is prepared for RDMA Read or Write using a FAST_REG
+ * Work Request (frwr_map). When the RDMA operation is finished, this
  * Memory Region is invalidated using a LOCAL_INV Work Request
- * (frwr_op_unmap_sync).
+ * (frwr_unmap_async and frwr_unmap_sync).
  *
- * Typically these Work Requests are not signaled, and neither are RDMA
- * SEND Work Requests (with the exception of signaling occasionally to
- * prevent provider work queue overflows). This greatly reduces HCA
+ * Typically FAST_REG Work Requests are not signaled, and neither are
+ * RDMA Send Work Requests (with the exception of signaling occasionally
+ * to prevent provider work queue overflows). This greatly reduces HCA
  * interrupt workload.
- *
- * As an optimization, frwr_op_unmap marks MRs INVALID before the
- * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on
- * rb_mrs immediately so that no work (like managing a linked list
- * under a spinlock) is needed in the completion upcall.
- *
- * But this means that frwr_op_map() can occasionally encounter an MR
- * that is INVALID but the LOCAL_INV WR has not completed. Work Queue
- * ordering prevents a subsequent FAST_REG WR from executing against
- * that MR while it is still being invalidated.
  */
 
 /* Transport recovery
  *
- * ->op_map and the transport connect worker cannot run at the same
- * time, but ->op_unmap can fire while the transport connect worker
- * is running. Thus MR recovery is handled in ->op_map, to guarantee
- * that recovered MRs are owned by a sending RPC, and not one where
- * ->op_unmap could fire at the same time transport reconnect is
- * being done.
+ * frwr_map and frwr_unmap_* cannot run at the same time the transport
+ * connect worker is running. The connect worker holds the transport
+ * send lock, just as ->send_request does. This prevents frwr_map and
+ * the connect worker from running concurrently. When a connection is
+ * closed, the Receive completion queue is drained before the allowing
+ * the connect worker to get control. This prevents frwr_unmap and the
+ * connect worker from running concurrently.
  *
- * When the underlying transport disconnects, MRs are left in one of
- * four states:
- *
- * INVALID:	The MR was not in use before the QP entered ERROR state.
- *
- * VALID:	The MR was registered before the QP entered ERROR state.
- *
- * FLUSHED_FR:	The MR was being registered when the QP entered ERROR
- *		state, and the pending WR was flushed.
- *
- * FLUSHED_LI:	The MR was being invalidated when the QP entered ERROR
- *		state, and the pending WR was flushed.
- *
- * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered
- * with ib_dereg_mr and then are re-initialized. Because MR recovery
- * allocates fresh resources, it is deferred to a workqueue, and the
- * recovered MRs are placed back on the rb_mrs list when recovery is
- * complete. frwr_op_map allocates another MR for the current RPC while
- * the broken MR is reset.
- *
- * To ensure that frwr_op_map doesn't encounter an MR that is marked
- * INVALID but that is about to be flushed due to a previous transport
- * disconnect, the transport connect worker attempts to drain all
- * pending send queue WRs before the transport is reconnected.
+ * When the underlying transport disconnects, MRs that are in flight
+ * are flushed and are likely unusable. Thus all flushed MRs are
+ * destroyed. New MRs are created on demand.
  */
 
 #include <linux/sunrpc/rpc_rdma.h>
@@ -80,10 +50,15 @@
 # define RPCDBG_FACILITY	RPCDBG_TRANS
 #endif
 
-bool
-frwr_is_supported(struct rpcrdma_ia *ia)
+/**
+ * frwr_is_supported - Check if device supports FRWR
+ * @device: interface adapter to check
+ *
+ * Returns true if device supports FRWR, otherwise false
+ */
+bool frwr_is_supported(struct ib_device *device)
 {
-	struct ib_device_attr *attrs = &ia->ri_device->attrs;
+	struct ib_device_attr *attrs = &device->attrs;
 
 	if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
 		goto out_not_supported;
@@ -93,142 +68,172 @@
 
 out_not_supported:
 	pr_info("rpcrdma: 'frwr' mode is not supported by device %s\n",
-		ia->ri_device->name);
+		device->name);
 	return false;
 }
 
-static int
-frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
-{
-	unsigned int depth = ia->ri_max_frwr_depth;
-	struct rpcrdma_frwr *frwr = &mr->frwr;
-	int rc;
-
-	frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
-	if (IS_ERR(frwr->fr_mr))
-		goto out_mr_err;
-
-	mr->mr_sg = kcalloc(depth, sizeof(*mr->mr_sg), GFP_KERNEL);
-	if (!mr->mr_sg)
-		goto out_list_err;
-
-	INIT_LIST_HEAD(&mr->mr_list);
-	sg_init_table(mr->mr_sg, depth);
-	init_completion(&frwr->fr_linv_done);
-	return 0;
-
-out_mr_err:
-	rc = PTR_ERR(frwr->fr_mr);
-	dprintk("RPC:       %s: ib_alloc_mr status %i\n",
-		__func__, rc);
-	return rc;
-
-out_list_err:
-	rc = -ENOMEM;
-	dprintk("RPC:       %s: sg allocation failure\n",
-		__func__);
-	ib_dereg_mr(frwr->fr_mr);
-	return rc;
-}
-
-static void
-frwr_op_release_mr(struct rpcrdma_mr *mr)
+/**
+ * frwr_release_mr - Destroy one MR
+ * @mr: MR allocated by frwr_init_mr
+ *
+ */
+void frwr_release_mr(struct rpcrdma_mr *mr)
 {
 	int rc;
 
 	rc = ib_dereg_mr(mr->frwr.fr_mr);
 	if (rc)
-		pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
-		       mr, rc);
+		trace_xprtrdma_frwr_dereg(mr, rc);
 	kfree(mr->mr_sg);
 	kfree(mr);
 }
 
-static int
-__frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
+static void frwr_mr_recycle(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
 {
-	struct rpcrdma_frwr *frwr = &mr->frwr;
-	int rc;
+	trace_xprtrdma_mr_recycle(mr);
 
-	rc = ib_dereg_mr(frwr->fr_mr);
-	if (rc) {
-		pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n",
-			rc, mr);
-		return rc;
+	if (mr->mr_dir != DMA_NONE) {
+		trace_xprtrdma_mr_unmap(mr);
+		ib_dma_unmap_sg(r_xprt->rx_ia.ri_id->device,
+				mr->mr_sg, mr->mr_nents, mr->mr_dir);
+		mr->mr_dir = DMA_NONE;
 	}
 
-	frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
-				  ia->ri_max_frwr_depth);
-	if (IS_ERR(frwr->fr_mr)) {
-		pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
-			PTR_ERR(frwr->fr_mr), mr);
-		return PTR_ERR(frwr->fr_mr);
-	}
+	spin_lock(&r_xprt->rx_buf.rb_lock);
+	list_del(&mr->mr_all);
+	r_xprt->rx_stats.mrs_recycled++;
+	spin_unlock(&r_xprt->rx_buf.rb_lock);
 
-	dprintk("RPC:       %s: recovered FRWR %p\n", __func__, frwr);
-	frwr->fr_state = FRWR_IS_INVALID;
-	return 0;
+	frwr_release_mr(mr);
 }
 
-/* Reset of a single FRWR. Generate a fresh rkey by replacing the MR.
+/* MRs are dynamically allocated, so simply clean up and release the MR.
+ * A replacement MR will subsequently be allocated on demand.
  */
 static void
-frwr_op_recover_mr(struct rpcrdma_mr *mr)
+frwr_mr_recycle_worker(struct work_struct *work)
 {
-	enum rpcrdma_frwr_state state = mr->frwr.fr_state;
-	struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
-	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-	int rc;
+	struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr,
+					     mr_recycle);
 
-	rc = __frwr_mr_reset(ia, mr);
-	if (state != FRWR_FLUSHED_LI) {
-		trace_xprtrdma_dma_unmap(mr);
-		ib_dma_unmap_sg(ia->ri_device,
-				mr->mr_sg, mr->mr_nents, mr->mr_dir);
-	}
-	if (rc)
-		goto out_release;
-
-	rpcrdma_mr_put(mr);
-	r_xprt->rx_stats.mrs_recovered++;
-	return;
-
-out_release:
-	pr_err("rpcrdma: FRWR reset failed %d, %p released\n", rc, mr);
-	r_xprt->rx_stats.mrs_orphaned++;
-
-	spin_lock(&r_xprt->rx_buf.rb_mrlock);
-	list_del(&mr->mr_all);
-	spin_unlock(&r_xprt->rx_buf.rb_mrlock);
-
-	frwr_op_release_mr(mr);
+	frwr_mr_recycle(mr->mr_xprt, mr);
 }
 
-/* On success, sets:
+/* frwr_recycle - Discard MRs
+ * @req: request to reset
+ *
+ * Used after a reconnect. These MRs could be in flight, we can't
+ * tell. Safe thing to do is release them.
+ */
+void frwr_recycle(struct rpcrdma_req *req)
+{
+	struct rpcrdma_mr *mr;
+
+	while ((mr = rpcrdma_mr_pop(&req->rl_registered)))
+		frwr_mr_recycle(mr->mr_xprt, mr);
+}
+
+/* frwr_reset - Place MRs back on the free list
+ * @req: request to reset
+ *
+ * Used after a failed marshal. For FRWR, this means the MRs
+ * don't have to be fully released and recreated.
+ *
+ * NB: This is safe only as long as none of @req's MRs are
+ * involved with an ongoing asynchronous FAST_REG or LOCAL_INV
+ * Work Request.
+ */
+void frwr_reset(struct rpcrdma_req *req)
+{
+	struct rpcrdma_mr *mr;
+
+	while ((mr = rpcrdma_mr_pop(&req->rl_registered)))
+		rpcrdma_mr_put(mr);
+}
+
+/**
+ * frwr_init_mr - Initialize one MR
+ * @ia: interface adapter
+ * @mr: generic MR to prepare for FRWR
+ *
+ * Returns zero if successful. Otherwise a negative errno
+ * is returned.
+ */
+int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
+{
+	unsigned int depth = ia->ri_max_frwr_depth;
+	struct scatterlist *sg;
+	struct ib_mr *frmr;
+	int rc;
+
+	/* NB: ib_alloc_mr and device drivers typically allocate
+	 *     memory with GFP_KERNEL.
+	 */
+	frmr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
+	if (IS_ERR(frmr))
+		goto out_mr_err;
+
+	sg = kcalloc(depth, sizeof(*sg), GFP_NOFS);
+	if (!sg)
+		goto out_list_err;
+
+	mr->frwr.fr_mr = frmr;
+	mr->mr_dir = DMA_NONE;
+	INIT_LIST_HEAD(&mr->mr_list);
+	INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker);
+	init_completion(&mr->frwr.fr_linv_done);
+
+	sg_init_table(sg, depth);
+	mr->mr_sg = sg;
+	return 0;
+
+out_mr_err:
+	rc = PTR_ERR(frmr);
+	trace_xprtrdma_frwr_alloc(mr, rc);
+	return rc;
+
+out_list_err:
+	ib_dereg_mr(frmr);
+	return -ENOMEM;
+}
+
+/**
+ * frwr_open - Prepare an endpoint for use with FRWR
+ * @ia: interface adapter this endpoint will use
+ * @ep: endpoint to prepare
+ *
+ * On success, sets:
  *	ep->rep_attr.cap.max_send_wr
  *	ep->rep_attr.cap.max_recv_wr
- *	cdata->max_requests
+ *	ep->rep_max_requests
  *	ia->ri_max_segs
  *
  * And these FRWR-related fields:
  *	ia->ri_max_frwr_depth
  *	ia->ri_mrtype
+ *
+ * On failure, a negative errno is returned.
  */
-static int
-frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
-	     struct rpcrdma_create_data_internal *cdata)
+int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep)
 {
-	struct ib_device_attr *attrs = &ia->ri_device->attrs;
+	struct ib_device_attr *attrs = &ia->ri_id->device->attrs;
 	int max_qp_wr, depth, delta;
 
 	ia->ri_mrtype = IB_MR_TYPE_MEM_REG;
 	if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
 		ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;
 
-	ia->ri_max_frwr_depth =
-			min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
-			      attrs->max_fast_reg_page_list_len);
-	dprintk("RPC:       %s: device's max FR page list len = %u\n",
+	/* Quirk: Some devices advertise a large max_fast_reg_page_list_len
+	 * capability, but perform optimally when the MRs are not larger
+	 * than a page.
+	 */
+	if (attrs->max_sge_rd > 1)
+		ia->ri_max_frwr_depth = attrs->max_sge_rd;
+	else
+		ia->ri_max_frwr_depth = attrs->max_fast_reg_page_list_len;
+	if (ia->ri_max_frwr_depth > RPCRDMA_MAX_DATA_SEGS)
+		ia->ri_max_frwr_depth = RPCRDMA_MAX_DATA_SEGS;
+	dprintk("RPC:       %s: max FR page list depth = %u\n",
 		__func__, ia->ri_max_frwr_depth);
 
 	/* Add room for frwr register and invalidate WRs.
@@ -253,145 +258,78 @@
 		} while (delta > 0);
 	}
 
-	max_qp_wr = ia->ri_device->attrs.max_qp_wr;
+	max_qp_wr = ia->ri_id->device->attrs.max_qp_wr;
 	max_qp_wr -= RPCRDMA_BACKWARD_WRS;
 	max_qp_wr -= 1;
 	if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
 		return -ENOMEM;
-	if (cdata->max_requests > max_qp_wr)
-		cdata->max_requests = max_qp_wr;
-	ep->rep_attr.cap.max_send_wr = cdata->max_requests * depth;
+	if (ep->rep_max_requests > max_qp_wr)
+		ep->rep_max_requests = max_qp_wr;
+	ep->rep_attr.cap.max_send_wr = ep->rep_max_requests * depth;
 	if (ep->rep_attr.cap.max_send_wr > max_qp_wr) {
-		cdata->max_requests = max_qp_wr / depth;
-		if (!cdata->max_requests)
+		ep->rep_max_requests = max_qp_wr / depth;
+		if (!ep->rep_max_requests)
 			return -EINVAL;
-		ep->rep_attr.cap.max_send_wr = cdata->max_requests *
-					       depth;
+		ep->rep_attr.cap.max_send_wr = ep->rep_max_requests * depth;
 	}
 	ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
 	ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
-	ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
+	ep->rep_attr.cap.max_recv_wr = ep->rep_max_requests;
 	ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
 	ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
 
-	ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
-				ia->ri_max_frwr_depth);
+	ia->ri_max_segs =
+		DIV_ROUND_UP(RPCRDMA_MAX_DATA_SEGS, ia->ri_max_frwr_depth);
+	/* Reply chunks require segments for head and tail buffers */
+	ia->ri_max_segs += 2;
+	if (ia->ri_max_segs > RPCRDMA_MAX_HDR_SEGS)
+		ia->ri_max_segs = RPCRDMA_MAX_HDR_SEGS;
 	return 0;
 }
 
-/* FRWR mode conveys a list of pages per chunk segment. The
+/**
+ * frwr_maxpages - Compute size of largest payload
+ * @r_xprt: transport
+ *
+ * Returns maximum size of an RPC message, in pages.
+ *
+ * FRWR mode conveys a list of pages per chunk segment. The
  * maximum length of that list is the FRWR page list depth.
  */
-static size_t
-frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
+size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt)
 {
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 
 	return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
-		     RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth);
-}
-
-static void
-__frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
-{
-	if (wc->status != IB_WC_WR_FLUSH_ERR)
-		pr_err("rpcrdma: %s: %s (%u/0x%x)\n",
-		       wr, ib_wc_status_msg(wc->status),
-		       wc->status, wc->vendor_err);
+		     (ia->ri_max_segs - 2) * ia->ri_max_frwr_depth);
 }
 
 /**
- * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC
- * @cq:	completion queue (ignored)
- * @wc:	completed WR
+ * frwr_map - Register a memory region
+ * @r_xprt: controlling transport
+ * @seg: memory region co-ordinates
+ * @nsegs: number of segments remaining
+ * @writing: true when RDMA Write will be used
+ * @xid: XID of RPC using the registered memory
+ * @mr: MR to fill in
  *
- */
-static void
-frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
-{
-	struct ib_cqe *cqe = wc->wr_cqe;
-	struct rpcrdma_frwr *frwr =
-			container_of(cqe, struct rpcrdma_frwr, fr_cqe);
-
-	/* WARNING: Only wr_cqe and status are reliable at this point */
-	if (wc->status != IB_WC_SUCCESS) {
-		frwr->fr_state = FRWR_FLUSHED_FR;
-		__frwr_sendcompletion_flush(wc, "fastreg");
-	}
-	trace_xprtrdma_wc_fastreg(wc, frwr);
-}
-
-/**
- * frwr_wc_localinv - Invoked by RDMA provider for a flushed LocalInv WC
- * @cq:	completion queue (ignored)
- * @wc:	completed WR
- *
- */
-static void
-frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
-{
-	struct ib_cqe *cqe = wc->wr_cqe;
-	struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
-						 fr_cqe);
-
-	/* WARNING: Only wr_cqe and status are reliable at this point */
-	if (wc->status != IB_WC_SUCCESS) {
-		frwr->fr_state = FRWR_FLUSHED_LI;
-		__frwr_sendcompletion_flush(wc, "localinv");
-	}
-	trace_xprtrdma_wc_li(wc, frwr);
-}
-
-/**
- * frwr_wc_localinv_wake - Invoked by RDMA provider for a signaled LocalInv WC
- * @cq:	completion queue (ignored)
- * @wc:	completed WR
- *
- * Awaken anyone waiting for an MR to finish being fenced.
- */
-static void
-frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
-{
-	struct ib_cqe *cqe = wc->wr_cqe;
-	struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
-						 fr_cqe);
-
-	/* WARNING: Only wr_cqe and status are reliable at this point */
-	if (wc->status != IB_WC_SUCCESS) {
-		frwr->fr_state = FRWR_FLUSHED_LI;
-		__frwr_sendcompletion_flush(wc, "localinv");
-	}
-	complete(&frwr->fr_linv_done);
-	trace_xprtrdma_wc_li_wake(wc, frwr);
-}
-
-/* Post a REG_MR Work Request to register a memory region
+ * Prepare a REG_MR Work Request to register a memory region
  * for remote access via RDMA READ or RDMA WRITE.
+ *
+ * Returns the next segment or a negative errno pointer.
+ * On success, @mr is filled in.
  */
-static struct rpcrdma_mr_seg *
-frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
-	    int nsegs, bool writing, struct rpcrdma_mr **out)
+struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
+				struct rpcrdma_mr_seg *seg,
+				int nsegs, bool writing, __be32 xid,
+				struct rpcrdma_mr *mr)
 {
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-	bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
-	struct rpcrdma_frwr *frwr;
-	struct rpcrdma_mr *mr;
-	struct ib_mr *ibmr;
 	struct ib_reg_wr *reg_wr;
+	struct ib_mr *ibmr;
 	int i, n;
 	u8 key;
 
-	mr = NULL;
-	do {
-		if (mr)
-			rpcrdma_mr_defer_recovery(mr);
-		mr = rpcrdma_mr_get(r_xprt);
-		if (!mr)
-			return ERR_PTR(-EAGAIN);
-	} while (mr->frwr.fr_state != FRWR_IS_INVALID);
-	frwr = &mr->frwr;
-	frwr->fr_state = FRWR_IS_VALID;
-
 	if (nsegs > ia->ri_max_frwr_depth)
 		nsegs = ia->ri_max_frwr_depth;
 	for (i = 0; i < nsegs;) {
@@ -406,7 +344,7 @@
 
 		++seg;
 		++i;
-		if (holes_ok)
+		if (ia->ri_mrtype == IB_MR_TYPE_SG_GAPS)
 			continue;
 		if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
 		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
@@ -414,20 +352,22 @@
 	}
 	mr->mr_dir = rpcrdma_data_dir(writing);
 
-	mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
+	mr->mr_nents =
+		ib_dma_map_sg(ia->ri_id->device, mr->mr_sg, i, mr->mr_dir);
 	if (!mr->mr_nents)
 		goto out_dmamap_err;
-	trace_xprtrdma_dma_map(mr);
 
-	ibmr = frwr->fr_mr;
+	ibmr = mr->frwr.fr_mr;
 	n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
 	if (unlikely(n != mr->mr_nents))
 		goto out_mapmr_err;
 
+	ibmr->iova &= 0x00000000ffffffff;
+	ibmr->iova |= ((u64)be32_to_cpu(xid)) << 32;
 	key = (u8)(ibmr->rkey & 0x000000FF);
 	ib_update_fast_reg_key(ibmr, ++key);
 
-	reg_wr = &frwr->fr_regwr;
+	reg_wr = &mr->frwr.fr_regwr;
 	reg_wr->mr = ibmr;
 	reg_wr->key = ibmr->rkey;
 	reg_wr->access = writing ?
@@ -437,32 +377,49 @@
 	mr->mr_handle = ibmr->rkey;
 	mr->mr_length = ibmr->length;
 	mr->mr_offset = ibmr->iova;
+	trace_xprtrdma_mr_map(mr);
 
-	*out = mr;
 	return seg;
 
 out_dmamap_err:
-	pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
-	       mr->mr_sg, i);
-	frwr->fr_state = FRWR_IS_INVALID;
-	rpcrdma_mr_put(mr);
+	mr->mr_dir = DMA_NONE;
+	trace_xprtrdma_frwr_sgerr(mr, i);
 	return ERR_PTR(-EIO);
 
 out_mapmr_err:
-	pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
-	       frwr->fr_mr, n, mr->mr_nents);
-	rpcrdma_mr_defer_recovery(mr);
+	trace_xprtrdma_frwr_maperr(mr, n);
 	return ERR_PTR(-EIO);
 }
 
-/* Post Send WR containing the RPC Call message.
+/**
+ * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC
+ * @cq:	completion queue (ignored)
+ * @wc:	completed WR
  *
- * For FRMR, chain any FastReg WRs to the Send WR. Only a
+ */
+static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct ib_cqe *cqe = wc->wr_cqe;
+	struct rpcrdma_frwr *frwr =
+		container_of(cqe, struct rpcrdma_frwr, fr_cqe);
+
+	/* WARNING: Only wr_cqe and status are reliable at this point */
+	trace_xprtrdma_wc_fastreg(wc, frwr);
+	/* The MR will get recycled when the associated req is retransmitted */
+}
+
+/**
+ * frwr_send - post Send WR containing the RPC Call message
+ * @ia: interface adapter
+ * @req: Prepared RPC Call
+ *
+ * For FRWR, chain any FastReg WRs to the Send WR. Only a
  * single ib_post_send call is needed to register memory
  * and then post the Send WR.
+ *
+ * Returns the result of ib_post_send.
  */
-static int
-frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
+int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
 {
 	struct ib_send_wr *post_wr;
 	struct rpcrdma_mr *mr;
@@ -484,45 +441,94 @@
 	}
 
 	/* If ib_post_send fails, the next ->send_request for
-	 * @req will queue these MWs for recovery.
+	 * @req will queue these MRs for recovery.
 	 */
 	return ib_post_send(ia->ri_id->qp, post_wr, NULL);
 }
 
-/* Handle a remotely invalidated mr on the @mrs list
+/**
+ * frwr_reminv - handle a remotely invalidated mr on the @mrs list
+ * @rep: Received reply
+ * @mrs: list of MRs to check
+ *
  */
-static void
-frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
+void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
 {
 	struct rpcrdma_mr *mr;
 
 	list_for_each_entry(mr, mrs, mr_list)
 		if (mr->mr_handle == rep->rr_inv_rkey) {
 			list_del_init(&mr->mr_list);
-			trace_xprtrdma_remoteinv(mr);
-			mr->frwr.fr_state = FRWR_IS_INVALID;
-			rpcrdma_mr_unmap_and_put(mr);
+			trace_xprtrdma_mr_remoteinv(mr);
+			rpcrdma_mr_put(mr);
 			break;	/* only one invalidated MR per RPC */
 		}
 }
 
-/* Invalidate all memory regions that were registered for "req".
+static void __frwr_release_mr(struct ib_wc *wc, struct rpcrdma_mr *mr)
+{
+	if (wc->status != IB_WC_SUCCESS)
+		rpcrdma_mr_recycle(mr);
+	else
+		rpcrdma_mr_put(mr);
+}
+
+/**
+ * frwr_wc_localinv - Invoked by RDMA provider for a LOCAL_INV WC
+ * @cq:	completion queue (ignored)
+ * @wc:	completed WR
  *
- * Sleeps until it is safe for the host CPU to access the
- * previously mapped memory regions.
- *
- * Caller ensures that @mrs is not empty before the call. This
- * function empties the list.
  */
-static void
-frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
+static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct ib_cqe *cqe = wc->wr_cqe;
+	struct rpcrdma_frwr *frwr =
+		container_of(cqe, struct rpcrdma_frwr, fr_cqe);
+	struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
+
+	/* WARNING: Only wr_cqe and status are reliable at this point */
+	trace_xprtrdma_wc_li(wc, frwr);
+	__frwr_release_mr(wc, mr);
+}
+
+/**
+ * frwr_wc_localinv_wake - Invoked by RDMA provider for a LOCAL_INV WC
+ * @cq:	completion queue (ignored)
+ * @wc:	completed WR
+ *
+ * Awaken anyone waiting for an MR to finish being fenced.
+ */
+static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct ib_cqe *cqe = wc->wr_cqe;
+	struct rpcrdma_frwr *frwr =
+		container_of(cqe, struct rpcrdma_frwr, fr_cqe);
+	struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
+
+	/* WARNING: Only wr_cqe and status are reliable at this point */
+	trace_xprtrdma_wc_li_wake(wc, frwr);
+	__frwr_release_mr(wc, mr);
+	complete(&frwr->fr_linv_done);
+}
+
+/**
+ * frwr_unmap_sync - invalidate memory regions that were registered for @req
+ * @r_xprt: controlling transport instance
+ * @req: rpcrdma_req with a non-empty list of MRs to process
+ *
+ * Sleeps until it is safe for the host CPU to access the previously mapped
+ * memory regions. This guarantees that registered MRs are properly fenced
+ * from the server before the RPC consumer accesses the data in them. It
+ * also ensures proper Send flow control: waking the next RPC waits until
+ * this RPC has relinquished all its Send Queue entries.
+ */
+void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 {
 	struct ib_send_wr *first, **prev, *last;
 	const struct ib_send_wr *bad_wr;
-	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	struct rpcrdma_frwr *frwr;
 	struct rpcrdma_mr *mr;
-	int count, rc;
+	int rc;
 
 	/* ORDER: Invalidate all of the MRs first
 	 *
@@ -530,33 +536,31 @@
 	 * a single ib_post_send() call.
 	 */
 	frwr = NULL;
-	count = 0;
 	prev = &first;
-	list_for_each_entry(mr, mrs, mr_list) {
-		mr->frwr.fr_state = FRWR_IS_INVALID;
+	while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
+
+		trace_xprtrdma_mr_localinv(mr);
+		r_xprt->rx_stats.local_inv_needed++;
 
 		frwr = &mr->frwr;
-		trace_xprtrdma_localinv(mr);
-
 		frwr->fr_cqe.done = frwr_wc_localinv;
 		last = &frwr->fr_invwr;
-		memset(last, 0, sizeof(*last));
+		last->next = NULL;
 		last->wr_cqe = &frwr->fr_cqe;
+		last->sg_list = NULL;
+		last->num_sge = 0;
 		last->opcode = IB_WR_LOCAL_INV;
+		last->send_flags = IB_SEND_SIGNALED;
 		last->ex.invalidate_rkey = mr->mr_handle;
-		count++;
 
 		*prev = last;
 		prev = &last->next;
 	}
-	if (!frwr)
-		goto unmap;
 
 	/* Strong send queue ordering guarantees that when the
 	 * last WR in the chain completes, all WRs in the chain
 	 * are complete.
 	 */
-	last->send_flags = IB_SEND_SIGNALED;
 	frwr->fr_cqe.done = frwr_wc_localinv_wake;
 	reinit_completion(&frwr->fr_linv_done);
 
@@ -564,52 +568,128 @@
 	 * replaces the QP. The RPC reply handler won't call us
 	 * unless ri_id->qp is a valid pointer.
 	 */
-	r_xprt->rx_stats.local_inv_needed++;
 	bad_wr = NULL;
-	rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);
+	rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr);
+	trace_xprtrdma_post_send(req, rc);
+
+	/* The final LOCAL_INV WR in the chain is supposed to
+	 * do the wake. If it was never posted, the wake will
+	 * not happen, so don't wait in that case.
+	 */
 	if (bad_wr != first)
 		wait_for_completion(&frwr->fr_linv_done);
-	if (rc)
-		goto reset_mrs;
+	if (!rc)
+		return;
 
-	/* ORDER: Now DMA unmap all of the MRs, and return
-	 * them to the free MR list.
-	 */
-unmap:
-	while (!list_empty(mrs)) {
-		mr = rpcrdma_mr_pop(mrs);
-		rpcrdma_mr_unmap_and_put(mr);
-	}
-	return;
-
-reset_mrs:
-	pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc);
-
-	/* Find and reset the MRs in the LOCAL_INV WRs that did not
-	 * get posted.
+	/* Recycle MRs in the LOCAL_INV chain that did not get posted.
 	 */
 	while (bad_wr) {
 		frwr = container_of(bad_wr, struct rpcrdma_frwr,
 				    fr_invwr);
 		mr = container_of(frwr, struct rpcrdma_mr, frwr);
-
-		__frwr_mr_reset(ia, mr);
-
 		bad_wr = bad_wr->next;
+
+		list_del_init(&mr->mr_list);
+		rpcrdma_mr_recycle(mr);
 	}
-	goto unmap;
 }
 
-const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
-	.ro_map				= frwr_op_map,
-	.ro_send			= frwr_op_send,
-	.ro_reminv			= frwr_op_reminv,
-	.ro_unmap_sync			= frwr_op_unmap_sync,
-	.ro_recover_mr			= frwr_op_recover_mr,
-	.ro_open			= frwr_op_open,
-	.ro_maxpages			= frwr_op_maxpages,
-	.ro_init_mr			= frwr_op_init_mr,
-	.ro_release_mr			= frwr_op_release_mr,
-	.ro_displayname			= "frwr",
-	.ro_send_w_inv_ok		= RPCRDMA_CMP_F_SND_W_INV_OK,
-};
+/**
+ * frwr_wc_localinv_done - Invoked by RDMA provider for a signaled LOCAL_INV WC
+ * @cq:	completion queue (ignored)
+ * @wc:	completed WR
+ *
+ */
+static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct ib_cqe *cqe = wc->wr_cqe;
+	struct rpcrdma_frwr *frwr =
+		container_of(cqe, struct rpcrdma_frwr, fr_cqe);
+	struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
+	struct rpcrdma_rep *rep = mr->mr_req->rl_reply;
+
+	/* WARNING: Only wr_cqe and status are reliable at this point */
+	trace_xprtrdma_wc_li_done(wc, frwr);
+	__frwr_release_mr(wc, mr);
+
+	/* Ensure @rep is generated before __frwr_release_mr */
+	smp_rmb();
+	rpcrdma_complete_rqst(rep);
+}
+
+/**
+ * frwr_unmap_async - invalidate memory regions that were registered for @req
+ * @r_xprt: controlling transport instance
+ * @req: rpcrdma_req with a non-empty list of MRs to process
+ *
+ * This guarantees that registered MRs are properly fenced from the
+ * server before the RPC consumer accesses the data in them. It also
+ * ensures proper Send flow control: waking the next RPC waits until
+ * this RPC has relinquished all its Send Queue entries.
+ */
+void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+{
+	struct ib_send_wr *first, *last, **prev;
+	const struct ib_send_wr *bad_wr;
+	struct rpcrdma_frwr *frwr;
+	struct rpcrdma_mr *mr;
+	int rc;
+
+	/* Chain the LOCAL_INV Work Requests and post them with
+	 * a single ib_post_send() call.
+	 */
+	frwr = NULL;
+	prev = &first;
+	while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
+
+		trace_xprtrdma_mr_localinv(mr);
+		r_xprt->rx_stats.local_inv_needed++;
+
+		frwr = &mr->frwr;
+		frwr->fr_cqe.done = frwr_wc_localinv;
+		last = &frwr->fr_invwr;
+		last->next = NULL;
+		last->wr_cqe = &frwr->fr_cqe;
+		last->sg_list = NULL;
+		last->num_sge = 0;
+		last->opcode = IB_WR_LOCAL_INV;
+		last->send_flags = IB_SEND_SIGNALED;
+		last->ex.invalidate_rkey = mr->mr_handle;
+
+		*prev = last;
+		prev = &last->next;
+	}
+
+	/* Strong send queue ordering guarantees that when the
+	 * last WR in the chain completes, all WRs in the chain
+	 * are complete. The last completion will wake up the
+	 * RPC waiter.
+	 */
+	frwr->fr_cqe.done = frwr_wc_localinv_done;
+
+	/* Transport disconnect drains the receive CQ before it
+	 * replaces the QP. The RPC reply handler won't call us
+	 * unless ri_id->qp is a valid pointer.
+	 */
+	bad_wr = NULL;
+	rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr);
+	trace_xprtrdma_post_send(req, rc);
+	if (!rc)
+		return;
+
+	/* Recycle MRs in the LOCAL_INV chain that did not get posted.
+	 */
+	while (bad_wr) {
+		frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr);
+		mr = container_of(frwr, struct rpcrdma_mr, frwr);
+		bad_wr = bad_wr->next;
+
+		rpcrdma_mr_recycle(mr);
+	}
+
+	/* The final LOCAL_INV WR in the chain is supposed to
+	 * do the wake. If it was never posted, the wake will
+	 * not happen, so wake here in that case.
+	 */
+	rpcrdma_complete_rqst(req->rl_reply);
+}

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index c8ae983..b86b5fd 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c

@@ -71,7 +71,6 @@
 	size = RPCRDMA_HDRLEN_MIN;
 
 	/* Maximum Read list size */
-	maxsegs += 2;	/* segment for head and tail buffers */
 	size = maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
 
 	/* Minimal Read chunk size */
@@ -97,7 +96,6 @@
 	size = RPCRDMA_HDRLEN_MIN;
 
 	/* Maximum Write list size */
-	maxsegs += 2;	/* segment for head and tail buffers */
 	size = sizeof(__be32);		/* segment count */
 	size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32);
 	size += sizeof(__be32);	/* list discriminator */
@@ -107,16 +105,23 @@
 	return size;
 }
 
+/**
+ * rpcrdma_set_max_header_sizes - Initialize inline payload sizes
+ * @r_xprt: transport instance to initialize
+ *
+ * The max_inline fields contain the maximum size of an RPC message
+ * so the marshaling code doesn't have to repeat this calculation
+ * for every RPC.
+ */
 void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
 {
-	struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
-	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-	unsigned int maxsegs = ia->ri_max_segs;
+	unsigned int maxsegs = r_xprt->rx_ia.ri_max_segs;
+	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
 
-	ia->ri_max_inline_write = cdata->inline_wsize -
-				  rpcrdma_max_call_header_size(maxsegs);
-	ia->ri_max_inline_read = cdata->inline_rsize -
-				 rpcrdma_max_reply_header_size(maxsegs);
+	ep->rep_max_inline_send =
+		ep->rep_inline_send - rpcrdma_max_call_header_size(maxsegs);
+	ep->rep_max_inline_recv =
+		ep->rep_inline_recv - rpcrdma_max_reply_header_size(maxsegs);
 }
 
 /* The client can send a request inline as long as the RPCRDMA header
@@ -133,7 +138,7 @@
 	struct xdr_buf *xdr = &rqst->rq_snd_buf;
 	unsigned int count, remaining, offset;
 
-	if (xdr->len > r_xprt->rx_ia.ri_max_inline_write)
+	if (xdr->len > r_xprt->rx_ep.rep_max_inline_send)
 		return false;
 
 	if (xdr->page_len) {
@@ -161,9 +166,21 @@
 static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
 				   struct rpc_rqst *rqst)
 {
-	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+	return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep.rep_max_inline_recv;
+}
 
-	return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read;
+/* The client is required to provide a Reply chunk if the maximum
+ * size of the non-payload part of the RPC Reply is larger than
+ * the inline threshold.
+ */
+static bool
+rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt,
+			  const struct rpc_rqst *rqst)
+{
+	const struct xdr_buf *buf = &rqst->rq_rcv_buf;
+
+	return (buf->head[0].iov_len + buf->tail[0].iov_len) <
+		r_xprt->rx_ep.rep_max_inline_recv;
 }
 
 /* Split @vec on page boundaries into SGEs. FMR registers pages, not
@@ -220,11 +237,12 @@
 	ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
 	page_base = offset_in_page(xdrbuf->page_base);
 	while (len) {
-		if (unlikely(!*ppages)) {
-			/* XXX: Certain upper layer operations do
-			 *	not provide receive buffer pages.
-			 */
-			*ppages = alloc_page(GFP_ATOMIC);
+		/* ACL likes to be lazy in allocating pages - ACLs
+		 * are small by default but can get huge.
+		 */
+		if (unlikely(xdrbuf->flags & XDRBUF_SPARSE_PAGES)) {
+			if (!*ppages)
+				*ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
 			if (!*ppages)
 				return -ENOBUFS;
 		}
@@ -324,6 +342,32 @@
 	return 0;
 }
 
+static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt,
+						 struct rpcrdma_req *req,
+						 struct rpcrdma_mr_seg *seg,
+						 int nsegs, bool writing,
+						 struct rpcrdma_mr **mr)
+{
+	*mr = rpcrdma_mr_pop(&req->rl_free_mrs);
+	if (!*mr) {
+		*mr = rpcrdma_mr_get(r_xprt);
+		if (!*mr)
+			goto out_getmr_err;
+		trace_xprtrdma_mr_get(req);
+		(*mr)->mr_req = req;
+	}
+
+	rpcrdma_mr_push(*mr, &req->rl_registered);
+	return frwr_map(r_xprt, seg, nsegs, writing, req->rl_slot.rq_xid, *mr);
+
+out_getmr_err:
+	trace_xprtrdma_nomrs(req);
+	xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
+	if (r_xprt->rx_ep.rep_connected != -ENODEV)
+		schedule_work(&r_xprt->rx_buf.rb_refresh_worker);
+	return ERR_PTR(-EAGAIN);
+}
+
 /* Register and XDR encode the Read list. Supports encoding a list of read
  * segments that belong to a single read chunk.
  *
@@ -338,9 +382,10 @@
  *
  * Only a single @pos value is currently supported.
  */
-static noinline int
-rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
-			 struct rpc_rqst *rqst, enum rpcrdma_chunktype rtype)
+static int rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
+				    struct rpcrdma_req *req,
+				    struct rpc_rqst *rqst,
+				    enum rpcrdma_chunktype rtype)
 {
 	struct xdr_stream *xdr = &req->rl_stream;
 	struct rpcrdma_mr_seg *seg;
@@ -348,6 +393,9 @@
 	unsigned int pos;
 	int nsegs;
 
+	if (rtype == rpcrdma_noch)
+		goto done;
+
 	pos = rqst->rq_snd_buf.head[0].iov_len;
 	if (rtype == rpcrdma_areadch)
 		pos = 0;
@@ -358,21 +406,20 @@
 		return nsegs;
 
 	do {
-		seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
-						   false, &mr);
+		seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, false, &mr);
 		if (IS_ERR(seg))
 			return PTR_ERR(seg);
-		rpcrdma_mr_push(mr, &req->rl_registered);
 
 		if (encode_read_segment(xdr, mr, pos) < 0)
 			return -EMSGSIZE;
 
-		trace_xprtrdma_read_chunk(rqst->rq_task, pos, mr, nsegs);
+		trace_xprtrdma_chunk_read(rqst->rq_task, pos, mr, nsegs);
 		r_xprt->rx_stats.read_chunk_count++;
 		nsegs -= mr->mr_nents;
 	} while (nsegs);
 
-	return 0;
+done:
+	return encode_item_not_present(xdr);
 }
 
 /* Register and XDR encode the Write list. Supports encoding a list
@@ -390,9 +437,10 @@
  *
  * Only a single Write chunk is currently supported.
  */
-static noinline int
-rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
-			  struct rpc_rqst *rqst, enum rpcrdma_chunktype wtype)
+static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt,
+				     struct rpcrdma_req *req,
+				     struct rpc_rqst *rqst,
+				     enum rpcrdma_chunktype wtype)
 {
 	struct xdr_stream *xdr = &req->rl_stream;
 	struct rpcrdma_mr_seg *seg;
@@ -400,6 +448,9 @@
 	int nsegs, nchunks;
 	__be32 *segcount;
 
+	if (wtype != rpcrdma_writech)
+		goto done;
+
 	seg = req->rl_segments;
 	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
 				     rqst->rq_rcv_buf.head[0].iov_len,
@@ -416,16 +467,14 @@
 
 	nchunks = 0;
 	do {
-		seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
-						   true, &mr);
+		seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr);
 		if (IS_ERR(seg))
 			return PTR_ERR(seg);
-		rpcrdma_mr_push(mr, &req->rl_registered);
 
 		if (encode_rdma_segment(xdr, mr) < 0)
 			return -EMSGSIZE;
 
-		trace_xprtrdma_write_chunk(rqst->rq_task, mr, nsegs);
+		trace_xprtrdma_chunk_write(rqst->rq_task, mr, nsegs);
 		r_xprt->rx_stats.write_chunk_count++;
 		r_xprt->rx_stats.total_rdma_request += mr->mr_length;
 		nchunks++;
@@ -435,7 +484,8 @@
 	/* Update count of segments in this Write chunk */
 	*segcount = cpu_to_be32(nchunks);
 
-	return 0;
+done:
+	return encode_item_not_present(xdr);
 }
 
 /* Register and XDR encode the Reply chunk. Supports encoding an array
@@ -450,9 +500,10 @@
  * Returns zero on success, or a negative errno if a failure occurred.
  * @xdr is advanced to the next position in the stream.
  */
-static noinline int
-rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
-			   struct rpc_rqst *rqst, enum rpcrdma_chunktype wtype)
+static int rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
+				      struct rpcrdma_req *req,
+				      struct rpc_rqst *rqst,
+				      enum rpcrdma_chunktype wtype)
 {
 	struct xdr_stream *xdr = &req->rl_stream;
 	struct rpcrdma_mr_seg *seg;
@@ -460,6 +511,9 @@
 	int nsegs, nchunks;
 	__be32 *segcount;
 
+	if (wtype != rpcrdma_replych)
+		return encode_item_not_present(xdr);
+
 	seg = req->rl_segments;
 	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
 	if (nsegs < 0)
@@ -474,16 +528,14 @@
 
 	nchunks = 0;
 	do {
-		seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
-						   true, &mr);
+		seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr);
 		if (IS_ERR(seg))
 			return PTR_ERR(seg);
-		rpcrdma_mr_push(mr, &req->rl_registered);
 
 		if (encode_rdma_segment(xdr, mr) < 0)
 			return -EMSGSIZE;
 
-		trace_xprtrdma_reply_chunk(rqst->rq_task, mr, nsegs);
+		trace_xprtrdma_chunk_reply(rqst->rq_task, mr, nsegs);
 		r_xprt->rx_stats.reply_chunk_count++;
 		r_xprt->rx_stats.total_rdma_request += mr->mr_length;
 		nchunks++;
@@ -496,51 +548,57 @@
 	return 0;
 }
 
+static void rpcrdma_sendctx_done(struct kref *kref)
+{
+	struct rpcrdma_req *req =
+		container_of(kref, struct rpcrdma_req, rl_kref);
+	struct rpcrdma_rep *rep = req->rl_reply;
+
+	rpcrdma_complete_rqst(rep);
+	rep->rr_rxprt->rx_stats.reply_waits_for_send++;
+}
+
 /**
- * rpcrdma_unmap_sendctx - DMA-unmap Send buffers
+ * rpcrdma_sendctx_unmap - DMA-unmap Send buffer
  * @sc: sendctx containing SGEs to unmap
  *
  */
-void
-rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc)
+void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
 {
-	struct rpcrdma_ia *ia = &sc->sc_xprt->rx_ia;
 	struct ib_sge *sge;
-	unsigned int count;
+
+	if (!sc->sc_unmap_count)
+		return;
 
 	/* The first two SGEs contain the transport header and
 	 * the inline buffer. These are always left mapped so
 	 * they can be cheaply re-used.
 	 */
-	sge = &sc->sc_sges[2];
-	for (count = sc->sc_unmap_count; count; ++sge, --count)
-		ib_dma_unmap_page(ia->ri_device,
-				  sge->addr, sge->length, DMA_TO_DEVICE);
+	for (sge = &sc->sc_sges[2]; sc->sc_unmap_count;
+	     ++sge, --sc->sc_unmap_count)
+		ib_dma_unmap_page(sc->sc_device, sge->addr, sge->length,
+				  DMA_TO_DEVICE);
 
-	if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &sc->sc_req->rl_flags)) {
-		smp_mb__after_atomic();
-		wake_up_bit(&sc->sc_req->rl_flags, RPCRDMA_REQ_F_TX_RESOURCES);
-	}
+	kref_put(&sc->sc_req->rl_kref, rpcrdma_sendctx_done);
 }
 
 /* Prepare an SGE for the RPC-over-RDMA transport header.
  */
-static bool
-rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
-			u32 len)
+static bool rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt *r_xprt,
+				    struct rpcrdma_req *req, u32 len)
 {
 	struct rpcrdma_sendctx *sc = req->rl_sendctx;
 	struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
 	struct ib_sge *sge = sc->sc_sges;
 
-	if (!rpcrdma_dma_map_regbuf(ia, rb))
+	if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
 		goto out_regbuf;
 	sge->addr = rdmab_addr(rb);
 	sge->length = len;
 	sge->lkey = rdmab_lkey(rb);
 
-	ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr,
-				      sge->length, DMA_TO_DEVICE);
+	ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length,
+				      DMA_TO_DEVICE);
 	sc->sc_wr.num_sge++;
 	return true;
 
@@ -552,23 +610,23 @@
 /* Prepare the Send SGEs. The head and tail iovec, and each entry
  * in the page list, gets its own SGE.
  */
-static bool
-rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
-			 struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
+static bool rpcrdma_prepare_msg_sges(struct rpcrdma_xprt *r_xprt,
+				     struct rpcrdma_req *req,
+				     struct xdr_buf *xdr,
+				     enum rpcrdma_chunktype rtype)
 {
 	struct rpcrdma_sendctx *sc = req->rl_sendctx;
 	unsigned int sge_no, page_base, len, remaining;
 	struct rpcrdma_regbuf *rb = req->rl_sendbuf;
-	struct ib_device *device = ia->ri_device;
 	struct ib_sge *sge = sc->sc_sges;
-	u32 lkey = ia->ri_pd->local_dma_lkey;
 	struct page *page, **ppages;
 
 	/* The head iovec is straightforward, as it is already
 	 * DMA-mapped. Sync the content that has changed.
 	 */
-	if (!rpcrdma_dma_map_regbuf(ia, rb))
+	if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
 		goto out_regbuf;
+	sc->sc_device = rdmab_device(rb);
 	sge_no = 1;
 	sge[sge_no].addr = rdmab_addr(rb);
 	sge[sge_no].length = xdr->head[0].iov_len;
@@ -615,13 +673,14 @@
 				goto out_mapping_overflow;
 
 			len = min_t(u32, PAGE_SIZE - page_base, remaining);
-			sge[sge_no].addr = ib_dma_map_page(device, *ppages,
-							   page_base, len,
-							   DMA_TO_DEVICE);
-			if (ib_dma_mapping_error(device, sge[sge_no].addr))
+			sge[sge_no].addr =
+				ib_dma_map_page(rdmab_device(rb), *ppages,
+						page_base, len, DMA_TO_DEVICE);
+			if (ib_dma_mapping_error(rdmab_device(rb),
+						 sge[sge_no].addr))
 				goto out_mapping_err;
 			sge[sge_no].length = len;
-			sge[sge_no].lkey = lkey;
+			sge[sge_no].lkey = rdmab_lkey(rb);
 
 			sc->sc_unmap_count++;
 			ppages++;
@@ -642,20 +701,20 @@
 
 map_tail:
 		sge_no++;
-		sge[sge_no].addr = ib_dma_map_page(device, page,
-						   page_base, len,
-						   DMA_TO_DEVICE);
-		if (ib_dma_mapping_error(device, sge[sge_no].addr))
+		sge[sge_no].addr =
+			ib_dma_map_page(rdmab_device(rb), page, page_base, len,
+					DMA_TO_DEVICE);
+		if (ib_dma_mapping_error(rdmab_device(rb), sge[sge_no].addr))
 			goto out_mapping_err;
 		sge[sge_no].length = len;
-		sge[sge_no].lkey = lkey;
+		sge[sge_no].lkey = rdmab_lkey(rb);
 		sc->sc_unmap_count++;
 	}
 
 out:
 	sc->sc_wr.num_sge += sge_no;
 	if (sc->sc_unmap_count)
-		__set_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
+		kref_get(&req->rl_kref);
 	return true;
 
 out_regbuf:
@@ -663,13 +722,13 @@
 	return false;
 
 out_mapping_overflow:
-	rpcrdma_unmap_sendctx(sc);
+	rpcrdma_sendctx_unmap(sc);
 	pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no);
 	return false;
 
 out_mapping_err:
-	rpcrdma_unmap_sendctx(sc);
-	pr_err("rpcrdma: Send mapping error\n");
+	rpcrdma_sendctx_unmap(sc);
+	trace_xprtrdma_dma_maperr(sge[sge_no].addr);
 	return false;
 }
 
@@ -688,22 +747,28 @@
 			  struct rpcrdma_req *req, u32 hdrlen,
 			  struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
 {
-	req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf);
+	int ret;
+
+	ret = -EAGAIN;
+	req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt);
 	if (!req->rl_sendctx)
-		return -EAGAIN;
+		goto err;
 	req->rl_sendctx->sc_wr.num_sge = 0;
 	req->rl_sendctx->sc_unmap_count = 0;
 	req->rl_sendctx->sc_req = req;
-	__clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
+	kref_init(&req->rl_kref);
 
-	if (!rpcrdma_prepare_hdr_sge(&r_xprt->rx_ia, req, hdrlen))
-		return -EIO;
-
+	ret = -EIO;
+	if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen))
+		goto err;
 	if (rtype != rpcrdma_areadch)
-		if (!rpcrdma_prepare_msg_sges(&r_xprt->rx_ia, req, xdr, rtype))
-			return -EIO;
-
+		if (!rpcrdma_prepare_msg_sges(r_xprt, req, xdr, rtype))
+			goto err;
 	return 0;
+
+err:
+	trace_xprtrdma_prepsend_failed(&req->rl_slot, ret);
+	return ret;
 }
 
 /**
@@ -736,8 +801,8 @@
 	int ret;
 
 	rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
-	xdr_init_encode(xdr, &req->rl_hdrbuf,
-			req->rl_rdmabuf->rg_base);
+	xdr_init_encode(xdr, &req->rl_hdrbuf, rdmab_data(req->rl_rdmabuf),
+			rqst);
 
 	/* Fixed header fields */
 	ret = -EMSGSIZE;
@@ -766,7 +831,8 @@
 	 */
 	if (rpcrdma_results_inline(r_xprt, rqst))
 		wtype = rpcrdma_noch;
-	else if (ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ)
+	else if ((ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ) &&
+		 rpcrdma_nonpayload_inline(r_xprt, rqst))
 		wtype = rpcrdma_writech;
 	else
 		wtype = rpcrdma_replych;
@@ -801,12 +867,7 @@
 	 * chunks. Very likely the connection has been replaced,
 	 * so these registrations are invalid and unusable.
 	 */
-	while (unlikely(!list_empty(&req->rl_registered))) {
-		struct rpcrdma_mr *mr;
-
-		mr = rpcrdma_mr_pop(&req->rl_registered);
-		rpcrdma_mr_defer_recovery(mr);
-	}
+	frwr_recycle(req);
 
 	/* This implementation supports the following combinations
 	 * of chunk lists in one RPC-over-RDMA Call message:
@@ -830,49 +891,28 @@
 	 * send a Call message with a Position Zero Read chunk and a
 	 * regular Read chunk at the same time.
 	 */
-	if (rtype != rpcrdma_noch) {
-		ret = rpcrdma_encode_read_list(r_xprt, req, rqst, rtype);
-		if (ret)
-			goto out_err;
-	}
-	ret = encode_item_not_present(xdr);
+	ret = rpcrdma_encode_read_list(r_xprt, req, rqst, rtype);
+	if (ret)
+		goto out_err;
+	ret = rpcrdma_encode_write_list(r_xprt, req, rqst, wtype);
+	if (ret)
+		goto out_err;
+	ret = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, wtype);
 	if (ret)
 		goto out_err;
 
-	if (wtype == rpcrdma_writech) {
-		ret = rpcrdma_encode_write_list(r_xprt, req, rqst, wtype);
-		if (ret)
-			goto out_err;
-	}
-	ret = encode_item_not_present(xdr);
-	if (ret)
-		goto out_err;
-
-	if (wtype != rpcrdma_replych)
-		ret = encode_item_not_present(xdr);
-	else
-		ret = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, wtype);
-	if (ret)
-		goto out_err;
-
-	trace_xprtrdma_marshal(rqst, xdr_stream_pos(xdr), rtype, wtype);
-
-	ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr),
+	ret = rpcrdma_prepare_send_sges(r_xprt, req, req->rl_hdrbuf.len,
 					&rqst->rq_snd_buf, rtype);
 	if (ret)
 		goto out_err;
+
+	trace_xprtrdma_marshal(req, rtype, wtype);
 	return 0;
 
 out_err:
-	switch (ret) {
-	case -EAGAIN:
-		xprt_wait_for_buffer_space(rqst->rq_task, NULL);
-		break;
-	case -ENOBUFS:
-		break;
-	default:
-		r_xprt->rx_stats.failed_marshal_count++;
-	}
+	trace_xprtrdma_marshal_failed(rqst, ret);
+	r_xprt->rx_stats.failed_marshal_count++;
+	frwr_reset(req);
 	return ret;
 }
 
@@ -1190,17 +1230,20 @@
 		p = xdr_inline_decode(xdr, 2 * sizeof(*p));
 		if (!p)
 			break;
-		dprintk("RPC: %5u: %s: server reports version error (%u-%u)\n",
-			rqst->rq_task->tk_pid, __func__,
-			be32_to_cpup(p), be32_to_cpu(*(p + 1)));
+		dprintk("RPC:       %s: server reports "
+			"version error (%u-%u), xid %08x\n", __func__,
+			be32_to_cpup(p), be32_to_cpu(*(p + 1)),
+			be32_to_cpu(rep->rr_xid));
 		break;
 	case err_chunk:
-		dprintk("RPC: %5u: %s: server reports header decoding error\n",
-			rqst->rq_task->tk_pid, __func__);
+		dprintk("RPC:       %s: server reports "
+			"header decoding error, xid %08x\n", __func__,
+			be32_to_cpu(rep->rr_xid));
 		break;
 	default:
-		dprintk("RPC: %5u: %s: server reports unrecognized error %d\n",
-			rqst->rq_task->tk_pid, __func__, be32_to_cpup(p));
+		dprintk("RPC:       %s: server reports "
+			"unrecognized error %d, xid %08x\n", __func__,
+			be32_to_cpup(p), be32_to_cpu(rep->rr_xid));
 	}
 
 	r_xprt->rx_stats.bad_reply_count++;
@@ -1216,11 +1259,8 @@
 	struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
 	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
 	struct rpc_rqst *rqst = rep->rr_rqst;
-	unsigned long cwnd;
 	int status;
 
-	xprt->reestablish_timeout = 0;
-
 	switch (rep->rr_proc) {
 	case rdma_msg:
 		status = rpcrdma_decode_msg(r_xprt, rep, rqst);
@@ -1238,15 +1278,10 @@
 		goto out_badheader;
 
 out:
-	spin_lock(&xprt->recv_lock);
-	cwnd = xprt->cwnd;
-	xprt->cwnd = r_xprt->rx_buf.rb_credits << RPC_CWNDSHIFT;
-	if (xprt->cwnd > cwnd)
-		xprt_release_rqst_cong(rqst->rq_task);
-
+	spin_lock(&xprt->queue_lock);
 	xprt_complete_rqst(rqst->rq_task, status);
 	xprt_unpin_rqst(rqst);
-	spin_unlock(&xprt->recv_lock);
+	spin_unlock(&xprt->queue_lock);
 	return;
 
 /* If the incoming reply terminated a pending RPC, the next
@@ -1256,56 +1291,20 @@
 out_badheader:
 	trace_xprtrdma_reply_hdr(rep);
 	r_xprt->rx_stats.bad_reply_count++;
-	status = -EIO;
 	goto out;
 }
 
-void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+static void rpcrdma_reply_done(struct kref *kref)
 {
-	/* Invalidate and unmap the data payloads before waking
-	 * the waiting application. This guarantees the memory
-	 * regions are properly fenced from the server before the
-	 * application accesses the data. It also ensures proper
-	 * send flow control: waking the next RPC waits until this
-	 * RPC has relinquished all its Send Queue entries.
-	 */
-	if (!list_empty(&req->rl_registered))
-		r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
-						    &req->rl_registered);
+	struct rpcrdma_req *req =
+		container_of(kref, struct rpcrdma_req, rl_kref);
 
-	/* Ensure that any DMA mapped pages associated with
-	 * the Send of the RPC Call have been unmapped before
-	 * allowing the RPC to complete. This protects argument
-	 * memory not controlled by the RPC client from being
-	 * re-used before we're done with it.
-	 */
-	if (test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
-		r_xprt->rx_stats.reply_waits_for_send++;
-		out_of_line_wait_on_bit(&req->rl_flags,
-					RPCRDMA_REQ_F_TX_RESOURCES,
-					bit_wait,
-					TASK_UNINTERRUPTIBLE);
-	}
+	rpcrdma_complete_rqst(req->rl_reply);
 }
 
-/* Reply handling runs in the poll worker thread. Anything that
- * might wait is deferred to a separate workqueue.
- */
-void rpcrdma_deferred_completion(struct work_struct *work)
-{
-	struct rpcrdma_rep *rep =
-			container_of(work, struct rpcrdma_rep, rr_work);
-	struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst);
-	struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
-
-	trace_xprtrdma_defer_cmp(rep);
-	if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
-		r_xprt->rx_ia.ri_ops->ro_reminv(rep, &req->rl_registered);
-	rpcrdma_release_rqst(r_xprt, req);
-	rpcrdma_complete_rqst(rep);
-}
-
-/* Process received RPC/RDMA messages.
+/**
+ * rpcrdma_reply_handler - Process received RPC/RDMA messages
+ * @rep: Incoming rpcrdma_rep object to process
  *
  * Errors must result in the RPC task either being awakened, or
  * allowed to timeout, to discover the errors at that time.
@@ -1320,14 +1319,15 @@
 	u32 credits;
 	__be32 *p;
 
-	--buf->rb_posted_receives;
-
-	if (rep->rr_hdrbuf.head[0].iov_len == 0)
-		goto out_badstatus;
+	/* Any data means we had a useful conversation, so
+	 * then we don't need to delay the next reconnect.
+	 */
+	if (xprt->reestablish_timeout)
+		xprt->reestablish_timeout = 0;
 
 	/* Fixed transport header fields */
 	xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
-			rep->rr_hdrbuf.head[0].iov_base);
+			rep->rr_hdrbuf.head[0].iov_base, NULL);
 	p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
 	if (unlikely(!p))
 		goto out_shortreply;
@@ -1345,51 +1345,55 @@
 	/* Match incoming rpcrdma_rep to an rpcrdma_req to
 	 * get context for handling any incoming chunks.
 	 */
-	spin_lock(&xprt->recv_lock);
+	spin_lock(&xprt->queue_lock);
 	rqst = xprt_lookup_rqst(xprt, rep->rr_xid);
 	if (!rqst)
 		goto out_norqst;
 	xprt_pin_rqst(rqst);
+	spin_unlock(&xprt->queue_lock);
 
 	if (credits == 0)
 		credits = 1;	/* don't deadlock */
 	else if (credits > buf->rb_max_requests)
 		credits = buf->rb_max_requests;
-	buf->rb_credits = credits;
-
-	spin_unlock(&xprt->recv_lock);
+	if (buf->rb_credits != credits) {
+		spin_lock(&xprt->transport_lock);
+		buf->rb_credits = credits;
+		xprt->cwnd = credits << RPC_CWNDSHIFT;
+		spin_unlock(&xprt->transport_lock);
+	}
 
 	req = rpcr_to_rdmar(rqst);
+	if (req->rl_reply) {
+		trace_xprtrdma_leaked_rep(rqst, req->rl_reply);
+		rpcrdma_recv_buffer_put(req->rl_reply);
+	}
 	req->rl_reply = rep;
 	rep->rr_rqst = rqst;
-	clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
 
 	trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
 
-	rpcrdma_post_recvs(r_xprt, false);
-	queue_work(rpcrdma_receive_wq, &rep->rr_work);
+	if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
+		frwr_reminv(rep, &req->rl_registered);
+	if (!list_empty(&req->rl_registered))
+		frwr_unmap_async(r_xprt, req);
+		/* LocalInv completion will complete the RPC */
+	else
+		kref_put(&req->rl_kref, rpcrdma_reply_done);
 	return;
 
 out_badversion:
 	trace_xprtrdma_reply_vers(rep);
-	goto repost;
+	goto out;
 
-/* The RPC transaction has already been terminated, or the header
- * is corrupt.
- */
 out_norqst:
-	spin_unlock(&xprt->recv_lock);
+	spin_unlock(&xprt->queue_lock);
 	trace_xprtrdma_reply_rqst(rep);
-	goto repost;
+	goto out;
 
 out_shortreply:
 	trace_xprtrdma_reply_short(rep);
 
-/* If no pending RPC transaction was matched, post a replacement
- * receive buffer before returning.
- */
-repost:
-	rpcrdma_post_recvs(r_xprt, false);
-out_badstatus:
+out:
 	rpcrdma_recv_buffer_put(rep);
 }

diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 134bef6..97bca50 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c

@@ -73,8 +73,6 @@
 atomic_t rdma_stat_sq_poll;
 atomic_t rdma_stat_sq_prod;
 
-struct workqueue_struct *svc_rdma_wq;
-
 /*
  * This function implements reading and resetting an atomic_t stat
  * variable through read/write to a proc file. Any write to the file
@@ -230,14 +228,10 @@
 void svc_rdma_cleanup(void)
 {
 	dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
-	destroy_workqueue(svc_rdma_wq);
 	if (svcrdma_table_header) {
 		unregister_sysctl_table(svcrdma_table_header);
 		svcrdma_table_header = NULL;
 	}
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
-	svc_unreg_xprt_class(&svc_rdma_bc_class);
-#endif
 	svc_unreg_xprt_class(&svc_rdma_class);
 }
 
@@ -249,18 +243,11 @@
 	dprintk("\tmax_bc_requests  : %u\n", svcrdma_max_bc_requests);
 	dprintk("\tmax_inline       : %d\n", svcrdma_max_req_size);
 
-	svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
-	if (!svc_rdma_wq)
-		return -ENOMEM;
-
 	if (!svcrdma_table_header)
 		svcrdma_table_header =
 			register_sysctl_table(svcrdma_root_table);
 
 	/* Register RDMA with the SVC transport switch */
 	svc_reg_xprt_class(&svc_rdma_class);
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
-	svc_reg_xprt_class(&svc_rdma_bc_class);
-#endif
 	return 0;
 }

diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index b982766..d1fcc41 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c

@@ -5,8 +5,6 @@
  * Support for backward direction RPCs on RPC/RDMA (server-side).
  */
 
-#include <linux/module.h>
-
 #include <linux/sunrpc/svc_rdma.h>
 
 #include "xprt_rdma.h"
@@ -32,7 +30,6 @@
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 	struct kvec *dst, *src = &rcvbuf->head[0];
 	struct rpc_rqst *req;
-	unsigned long cwnd;
 	u32 credits;
 	size_t len;
 	__be32 xid;
@@ -56,7 +53,7 @@
 	if (src->iov_len < 24)
 		goto out_shortreply;
 
-	spin_lock(&xprt->recv_lock);
+	spin_lock(&xprt->queue_lock);
 	req = xprt_lookup_rqst(xprt, xid);
 	if (!req)
 		goto out_notfound;
@@ -66,6 +63,8 @@
 	if (dst->iov_len < len)
 		goto out_unlock;
 	memcpy(dst->iov_base, p, len);
+	xprt_pin_rqst(req);
+	spin_unlock(&xprt->queue_lock);
 
 	credits = be32_to_cpup(rdma_resp + 2);
 	if (credits == 0)
@@ -73,20 +72,18 @@
 	else if (credits > r_xprt->rx_buf.rb_bc_max_requests)
 		credits = r_xprt->rx_buf.rb_bc_max_requests;
 
-	spin_lock_bh(&xprt->transport_lock);
-	cwnd = xprt->cwnd;
+	spin_lock(&xprt->transport_lock);
 	xprt->cwnd = credits << RPC_CWNDSHIFT;
-	if (xprt->cwnd > cwnd)
-		xprt_release_rqst_cong(req->rq_task);
-	spin_unlock_bh(&xprt->transport_lock);
+	spin_unlock(&xprt->transport_lock);
 
-
+	spin_lock(&xprt->queue_lock);
 	ret = 0;
 	xprt_complete_rqst(req->rq_task, rcvbuf->len);
+	xprt_unpin_rqst(req);
 	rcvbuf->len = 0;
 
 out_unlock:
-	spin_unlock(&xprt->recv_lock);
+	spin_unlock(&xprt->queue_lock);
 out:
 	return ret;
 
@@ -203,11 +200,10 @@
 		svc_rdma_send_ctxt_put(rdma, ctxt);
 		goto drop_connection;
 	}
-	return rc;
+	return 0;
 
 drop_connection:
 	dprintk("svcrdma: failed to send bc call\n");
-	xprt_disconnect_done(xprt);
 	return -ENOTCONN;
 }
 
@@ -215,9 +211,8 @@
  * connection.
  */
 static int
-xprt_rdma_bc_send_request(struct rpc_task *task)
+xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
 {
-	struct rpc_rqst *rqst = task->tk_rqstp;
 	struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
 	struct svcxprt_rdma *rdma;
 	int ret;
@@ -225,17 +220,15 @@
 	dprintk("svcrdma: sending bc call with xid: %08x\n",
 		be32_to_cpu(rqst->rq_xid));
 
-	if (!mutex_trylock(&sxprt->xpt_mutex)) {
-		rpc_sleep_on(&sxprt->xpt_bc_pending, task, NULL);
-		if (!mutex_trylock(&sxprt->xpt_mutex))
-			return -EAGAIN;
-		rpc_wake_up_queued_task(&sxprt->xpt_bc_pending, task);
-	}
+	mutex_lock(&sxprt->xpt_mutex);
 
 	ret = -ENOTCONN;
 	rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
-	if (!test_bit(XPT_DEAD, &sxprt->xpt_flags))
+	if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) {
 		ret = rpcrdma_bc_send_request(rdma, rqst);
+		if (ret == -ENOTCONN)
+			svc_close_xprt(sxprt);
+	}
 
 	mutex_unlock(&sxprt->xpt_mutex);
 
@@ -257,7 +250,6 @@
 	dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
 
 	xprt_free(xprt);
-	module_put(THIS_MODULE);
 }
 
 static const struct rpc_xprt_ops xprt_rdma_bc_procs = {
@@ -269,7 +261,7 @@
 	.buf_alloc		= xprt_rdma_bc_allocate,
 	.buf_free		= xprt_rdma_bc_free,
 	.send_request		= xprt_rdma_bc_send_request,
-	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
+	.wait_for_reply_request	= xprt_wait_for_reply_request_def,
 	.close			= xprt_rdma_bc_close,
 	.destroy		= xprt_rdma_bc_put,
 	.print_stats		= xprt_rdma_print_stats
@@ -312,7 +304,6 @@
 	xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
 
 	xprt->prot = XPRT_TRANSPORT_BC_RDMA;
-	xprt->tsh_size = 0;
 	xprt->ops = &xprt_rdma_bc_procs;
 
 	memcpy(&xprt->addr, args->dstaddr, args->addrlen);
@@ -329,20 +320,9 @@
 	args->bc_xprt->xpt_bc_xprt = xprt;
 	xprt->bc_xprt = args->bc_xprt;
 
-	if (!try_module_get(THIS_MODULE))
-		goto out_fail;
-
 	/* Final put for backchannel xprt is in __svc_rdma_free */
 	xprt_get(xprt);
 	return xprt;
-
-out_fail:
-	xprt_rdma_free_addresses(xprt);
-	args->bc_xprt->xpt_bc_xprt = NULL;
-	args->bc_xprt->xpt_bc_xps = NULL;
-	xprt_put(xprt);
-	xprt_free(xprt);
-	return ERR_PTR(-EINVAL);
 }
 
 struct xprt_class xprt_rdma_bc = {

diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index b24d5b8..96bccd3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c

@@ -172,9 +172,10 @@
 void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma)
 {
 	struct svc_rdma_recv_ctxt *ctxt;
+	struct llist_node *node;
 
-	while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts))) {
-		list_del(&ctxt->rc_list);
+	while ((node = llist_del_first(&rdma->sc_recv_ctxts))) {
+		ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
 		svc_rdma_recv_ctxt_destroy(rdma, ctxt);
 	}
 }
@@ -183,21 +184,18 @@
 svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma)
 {
 	struct svc_rdma_recv_ctxt *ctxt;
+	struct llist_node *node;
 
-	spin_lock(&rdma->sc_recv_lock);
-	ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts);
-	if (!ctxt)
+	node = llist_del_first(&rdma->sc_recv_ctxts);
+	if (!node)
 		goto out_empty;
-	list_del(&ctxt->rc_list);
-	spin_unlock(&rdma->sc_recv_lock);
+	ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
 
 out:
 	ctxt->rc_page_count = 0;
 	return ctxt;
 
 out_empty:
-	spin_unlock(&rdma->sc_recv_lock);
-
 	ctxt = svc_rdma_recv_ctxt_alloc(rdma);
 	if (!ctxt)
 		return NULL;
@@ -218,11 +216,9 @@
 	for (i = 0; i < ctxt->rc_page_count; i++)
 		put_page(ctxt->rc_pages[i]);
 
-	if (!ctxt->rc_temp) {
-		spin_lock(&rdma->sc_recv_lock);
-		list_add(&ctxt->rc_list, &rdma->sc_recv_ctxts);
-		spin_unlock(&rdma->sc_recv_lock);
-	} else
+	if (!ctxt->rc_temp)
+		llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
+	else
 		svc_rdma_recv_ctxt_destroy(rdma, ctxt);
 }
 
@@ -272,11 +268,8 @@
 			return false;
 		ctxt->rc_temp = true;
 		ret = __svc_rdma_post_recv(rdma, ctxt);
-		if (ret) {
-			pr_err("svcrdma: failure posting recv buffers: %d\n",
-			       ret);
+		if (ret)
 			return false;
-		}
 	}
 	return true;
 }
@@ -314,17 +307,14 @@
 
 	spin_lock(&rdma->sc_rq_dto_lock);
 	list_add_tail(&ctxt->rc_list, &rdma->sc_rq_dto_q);
-	spin_unlock(&rdma->sc_rq_dto_lock);
+	/* Note the unlock pairs with the smp_rmb in svc_xprt_ready: */
 	set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
+	spin_unlock(&rdma->sc_rq_dto_lock);
 	if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags))
 		svc_xprt_enqueue(&rdma->sc_xprt);
 	goto out;
 
 flushed:
-	if (wc->status != IB_WC_WR_FLUSH_ERR)
-		pr_err("svcrdma: Recv: %s (%u/0x%x)\n",
-		       ib_wc_status_msg(wc->status),
-		       wc->status, wc->vendor_err);
 post_err:
 	svc_rdma_recv_ctxt_put(rdma, ctxt);
 	set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
@@ -485,6 +475,68 @@
 	return p;
 }
 
+/* RPC-over-RDMA Version One private extension: Remote Invalidation.
+ * Responder's choice: requester signals it can handle Send With
+ * Invalidate, and responder chooses one R_key to invalidate.
+ *
+ * If there is exactly one distinct R_key in the received transport
+ * header, set rc_inv_rkey to that R_key. Otherwise, set it to zero.
+ *
+ * Perform this operation while the received transport header is
+ * still in the CPU cache.
+ */
+static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma,
+				  struct svc_rdma_recv_ctxt *ctxt)
+{
+	__be32 inv_rkey, *p;
+	u32 i, segcount;
+
+	ctxt->rc_inv_rkey = 0;
+
+	if (!rdma->sc_snd_w_inv)
+		return;
+
+	inv_rkey = xdr_zero;
+	p = ctxt->rc_recv_buf;
+	p += rpcrdma_fixed_maxsz;
+
+	/* Read list */
+	while (*p++ != xdr_zero) {
+		p++;	/* position */
+		if (inv_rkey == xdr_zero)
+			inv_rkey = *p;
+		else if (inv_rkey != *p)
+			return;
+		p += 4;
+	}
+
+	/* Write list */
+	while (*p++ != xdr_zero) {
+		segcount = be32_to_cpup(p++);
+		for (i = 0; i < segcount; i++) {
+			if (inv_rkey == xdr_zero)
+				inv_rkey = *p;
+			else if (inv_rkey != *p)
+				return;
+			p += 4;
+		}
+	}
+
+	/* Reply chunk */
+	if (*p++ != xdr_zero) {
+		segcount = be32_to_cpup(p++);
+		for (i = 0; i < segcount; i++) {
+			if (inv_rkey == xdr_zero)
+				inv_rkey = *p;
+			else if (inv_rkey != *p)
+				return;
+			p += 4;
+		}
+	}
+
+	ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey);
+}
+
 /* On entry, xdr->head[0].iov_base points to first byte in the
  * RPC-over-RDMA header.
  *
@@ -746,6 +798,7 @@
 		svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
 		return ret;
 	}
+	svc_rdma_get_inv_rkey(rdma_xprt, ctxt);
 
 	p += rpcrdma_fixed_maxsz;
 	if (*p != xdr_zero)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index dc19517..48fe3b1 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c

@@ -64,8 +64,7 @@
 		spin_unlock(&rdma->sc_rw_ctxt_lock);
 	} else {
 		spin_unlock(&rdma->sc_rw_ctxt_lock);
-		ctxt = kmalloc(sizeof(*ctxt) +
-			       SG_CHUNK_SIZE * sizeof(struct scatterlist),
+		ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE),
 			       GFP_KERNEL);
 		if (!ctxt)
 			goto out;
@@ -74,7 +73,8 @@
 
 	ctxt->rw_sg_table.sgl = ctxt->rw_first_sgl;
 	if (sg_alloc_table_chained(&ctxt->rw_sg_table, sges,
-				   ctxt->rw_sg_table.sgl)) {
+				   ctxt->rw_sg_table.sgl,
+				   SG_CHUNK_SIZE)) {
 		kfree(ctxt);
 		ctxt = NULL;
 	}
@@ -85,7 +85,7 @@
 static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
 				 struct svc_rdma_rw_ctxt *ctxt)
 {
-	sg_free_table_chained(&ctxt->rw_sg_table, true);
+	sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE);
 
 	spin_lock(&rdma->sc_rw_ctxt_lock);
 	list_add(&ctxt->rw_list, &rdma->sc_rw_ctxts);
@@ -213,13 +213,8 @@
 	atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
 	wake_up(&rdma->sc_send_wait);
 
-	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+	if (unlikely(wc->status != IB_WC_SUCCESS))
 		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
-		if (wc->status != IB_WC_WR_FLUSH_ERR)
-			pr_err("svcrdma: write ctx: %s (%u/0x%x)\n",
-			       ib_wc_status_msg(wc->status),
-			       wc->status, wc->vendor_err);
-	}
 
 	svc_rdma_write_info_free(info);
 }
@@ -278,18 +273,15 @@
 
 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
 		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
-		if (wc->status != IB_WC_WR_FLUSH_ERR)
-			pr_err("svcrdma: read ctx: %s (%u/0x%x)\n",
-			       ib_wc_status_msg(wc->status),
-			       wc->status, wc->vendor_err);
 		svc_rdma_recv_ctxt_put(rdma, info->ri_readctxt);
 	} else {
 		spin_lock(&rdma->sc_rq_dto_lock);
 		list_add_tail(&info->ri_readctxt->rc_list,
 			      &rdma->sc_read_complete_q);
+		/* Note the unlock pairs with the smp_rmb in svc_xprt_ready: */
+		set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
 		spin_unlock(&rdma->sc_rq_dto_lock);
 
-		set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
 		svc_xprt_enqueue(&rdma->sc_xprt);
 	}
 

diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 8602a5f..6fdba72 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c

@@ -272,10 +272,6 @@
 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
 		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
 		svc_xprt_enqueue(&rdma->sc_xprt);
-		if (wc->status != IB_WC_WR_FLUSH_ERR)
-			pr_err("svcrdma: Send: %s (%u/0x%x)\n",
-			       ib_wc_status_msg(wc->status),
-			       wc->status, wc->vendor_err);
 	}
 
 	svc_xprt_put(&rdma->sc_xprt);
@@ -484,32 +480,6 @@
 		*reply = NULL;
 }
 
-/* RPC-over-RDMA Version One private extension: Remote Invalidation.
- * Responder's choice: requester signals it can handle Send With
- * Invalidate, and responder chooses one rkey to invalidate.
- *
- * Find a candidate rkey to invalidate when sending a reply.  Picks the
- * first R_key it finds in the chunk lists.
- *
- * Returns zero if RPC's chunk lists are empty.
- */
-static u32 svc_rdma_get_inv_rkey(__be32 *rdma_argp,
-				 __be32 *wr_lst, __be32 *rp_ch)
-{
-	__be32 *p;
-
-	p = rdma_argp + rpcrdma_fixed_maxsz;
-	if (*p != xdr_zero)
-		p += 2;
-	else if (wr_lst && be32_to_cpup(wr_lst + 1))
-		p = wr_lst + 2;
-	else if (rp_ch && be32_to_cpup(rp_ch + 1))
-		p = rp_ch + 2;
-	else
-		return 0;
-	return be32_to_cpup(p);
-}
-
 static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
 				 struct svc_rdma_send_ctxt *ctxt,
 				 struct page *page,
@@ -563,6 +533,99 @@
 				      DMA_TO_DEVICE);
 }
 
+/* If the xdr_buf has more elements than the device can
+ * transmit in a single RDMA Send, then the reply will
+ * have to be copied into a bounce buffer.
+ */
+static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma,
+				    struct xdr_buf *xdr,
+				    __be32 *wr_lst)
+{
+	int elements;
+
+	/* xdr->head */
+	elements = 1;
+
+	/* xdr->pages */
+	if (!wr_lst) {
+		unsigned int remaining;
+		unsigned long pageoff;
+
+		pageoff = xdr->page_base & ~PAGE_MASK;
+		remaining = xdr->page_len;
+		while (remaining) {
+			++elements;
+			remaining -= min_t(u32, PAGE_SIZE - pageoff,
+					   remaining);
+			pageoff = 0;
+		}
+	}
+
+	/* xdr->tail */
+	if (xdr->tail[0].iov_len)
+		++elements;
+
+	/* assume 1 SGE is needed for the transport header */
+	return elements >= rdma->sc_max_send_sges;
+}
+
+/* The device is not capable of sending the reply directly.
+ * Assemble the elements of @xdr into the transport header
+ * buffer.
+ */
+static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma,
+				      struct svc_rdma_send_ctxt *ctxt,
+				      struct xdr_buf *xdr, __be32 *wr_lst)
+{
+	unsigned char *dst, *tailbase;
+	unsigned int taillen;
+
+	dst = ctxt->sc_xprt_buf;
+	dst += ctxt->sc_sges[0].length;
+
+	memcpy(dst, xdr->head[0].iov_base, xdr->head[0].iov_len);
+	dst += xdr->head[0].iov_len;
+
+	tailbase = xdr->tail[0].iov_base;
+	taillen = xdr->tail[0].iov_len;
+	if (wr_lst) {
+		u32 xdrpad;
+
+		xdrpad = xdr_padsize(xdr->page_len);
+		if (taillen && xdrpad) {
+			tailbase += xdrpad;
+			taillen -= xdrpad;
+		}
+	} else {
+		unsigned int len, remaining;
+		unsigned long pageoff;
+		struct page **ppages;
+
+		ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
+		pageoff = xdr->page_base & ~PAGE_MASK;
+		remaining = xdr->page_len;
+		while (remaining) {
+			len = min_t(u32, PAGE_SIZE - pageoff, remaining);
+
+			memcpy(dst, page_address(*ppages), len);
+			remaining -= len;
+			dst += len;
+			pageoff = 0;
+		}
+	}
+
+	if (taillen)
+		memcpy(dst, tailbase, taillen);
+
+	ctxt->sc_sges[0].length += xdr->len;
+	ib_dma_sync_single_for_device(rdma->sc_pd->device,
+				      ctxt->sc_sges[0].addr,
+				      ctxt->sc_sges[0].length,
+				      DMA_TO_DEVICE);
+
+	return 0;
+}
+
 /* svc_rdma_map_reply_msg - Map the buffer holding RPC message
  * @rdma: controlling transport
  * @ctxt: send_ctxt for the Send WR
@@ -585,8 +648,10 @@
 	u32 xdr_pad;
 	int ret;
 
-	if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
-		return -EIO;
+	if (svc_rdma_pull_up_needed(rdma, xdr, wr_lst))
+		return svc_rdma_pull_up_reply_msg(rdma, ctxt, xdr, wr_lst);
+
+	++ctxt->sc_cur_sge_no;
 	ret = svc_rdma_dma_map_buf(rdma, ctxt,
 				   xdr->head[0].iov_base,
 				   xdr->head[0].iov_len);
@@ -617,8 +682,7 @@
 	while (remaining) {
 		len = min_t(u32, PAGE_SIZE - page_off, remaining);
 
-		if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
-			return -EIO;
+		++ctxt->sc_cur_sge_no;
 		ret = svc_rdma_dma_map_page(rdma, ctxt, *ppages++,
 					    page_off, len);
 		if (ret < 0)
@@ -632,8 +696,7 @@
 	len = xdr->tail[0].iov_len;
 tail:
 	if (len) {
-		if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
-			return -EIO;
+		++ctxt->sc_cur_sge_no;
 		ret = svc_rdma_dma_map_buf(rdma, ctxt, base, len);
 		if (ret < 0)
 			return ret;
@@ -672,7 +735,7 @@
  *
  * RDMA Send is the last step of transmitting an RPC reply. Pages
  * involved in the earlier RDMA Writes are here transferred out
- * of the rqstp and into the ctxt's page array. These pages are
+ * of the rqstp and into the sctxt's page array. These pages are
  * DMA unmapped by each Write completion, but the subsequent Send
  * completion finally releases these pages.
  *
@@ -680,32 +743,31 @@
  * - The Reply's transport header will never be larger than a page.
  */
 static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
-				   struct svc_rdma_send_ctxt *ctxt,
-				   __be32 *rdma_argp,
+				   struct svc_rdma_send_ctxt *sctxt,
+				   struct svc_rdma_recv_ctxt *rctxt,
 				   struct svc_rqst *rqstp,
 				   __be32 *wr_lst, __be32 *rp_ch)
 {
 	int ret;
 
 	if (!rp_ch) {
-		ret = svc_rdma_map_reply_msg(rdma, ctxt,
+		ret = svc_rdma_map_reply_msg(rdma, sctxt,
 					     &rqstp->rq_res, wr_lst);
 		if (ret < 0)
 			return ret;
 	}
 
-	svc_rdma_save_io_pages(rqstp, ctxt);
+	svc_rdma_save_io_pages(rqstp, sctxt);
 
-	ctxt->sc_send_wr.opcode = IB_WR_SEND;
-	if (rdma->sc_snd_w_inv) {
-		ctxt->sc_send_wr.ex.invalidate_rkey =
-			svc_rdma_get_inv_rkey(rdma_argp, wr_lst, rp_ch);
-		if (ctxt->sc_send_wr.ex.invalidate_rkey)
-			ctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
+	if (rctxt->rc_inv_rkey) {
+		sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
+		sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey;
+	} else {
+		sctxt->sc_send_wr.opcode = IB_WR_SEND;
 	}
 	dprintk("svcrdma: posting Send WR with %u sge(s)\n",
-		ctxt->sc_send_wr.num_sge);
-	return svc_rdma_send(rdma, &ctxt->sc_send_wr);
+		sctxt->sc_send_wr.num_sge);
+	return svc_rdma_send(rdma, &sctxt->sc_send_wr);
 }
 
 /* Given the client-provided Write and Reply chunks, the server was not
@@ -741,10 +803,6 @@
 	return 0;
 }
 
-void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
-{
-}
-
 /**
  * svc_rdma_sendto - Transmit an RPC reply
  * @rqstp: processed RPC request, reply XDR already in ::rq_res
@@ -809,7 +867,7 @@
 	}
 
 	svc_rdma_sync_reply_hdr(rdma, sctxt, svc_rdma_reply_hdr_len(rdma_resp));
-	ret = svc_rdma_send_reply_msg(rdma, sctxt, rdma_argp, rqstp,
+	ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp,
 				      wr_lst, rp_ch);
 	if (ret < 0)
 		goto err1;

diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 2848caf..145a361 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c

@@ -85,7 +85,6 @@
 	.xpo_release_rqst = svc_rdma_release_rqst,
 	.xpo_detach = svc_rdma_detach,
 	.xpo_free = svc_rdma_free,
-	.xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
 	.xpo_has_wspace = svc_rdma_has_wspace,
 	.xpo_accept = svc_rdma_accept,
 	.xpo_secure_port = svc_rdma_secure_port,
@@ -100,64 +99,6 @@
 	.xcl_ident = XPRT_TRANSPORT_RDMA,
 };
 
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
-static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *, struct net *,
-					   struct sockaddr *, int, int);
-static void svc_rdma_bc_detach(struct svc_xprt *);
-static void svc_rdma_bc_free(struct svc_xprt *);
-
-static const struct svc_xprt_ops svc_rdma_bc_ops = {
-	.xpo_create = svc_rdma_bc_create,
-	.xpo_detach = svc_rdma_bc_detach,
-	.xpo_free = svc_rdma_bc_free,
-	.xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
-	.xpo_secure_port = svc_rdma_secure_port,
-};
-
-struct svc_xprt_class svc_rdma_bc_class = {
-	.xcl_name = "rdma-bc",
-	.xcl_owner = THIS_MODULE,
-	.xcl_ops = &svc_rdma_bc_ops,
-	.xcl_max_payload = (1024 - RPCRDMA_HDRLEN_MIN)
-};
-
-static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv,
-					   struct net *net,
-					   struct sockaddr *sa, int salen,
-					   int flags)
-{
-	struct svcxprt_rdma *cma_xprt;
-	struct svc_xprt *xprt;
-
-	cma_xprt = svc_rdma_create_xprt(serv, net);
-	if (!cma_xprt)
-		return ERR_PTR(-ENOMEM);
-	xprt = &cma_xprt->sc_xprt;
-
-	svc_xprt_init(net, &svc_rdma_bc_class, xprt, serv);
-	set_bit(XPT_CONG_CTRL, &xprt->xpt_flags);
-	serv->sv_bc_xprt = xprt;
-
-	dprintk("svcrdma: %s(%p)\n", __func__, xprt);
-	return xprt;
-}
-
-static void svc_rdma_bc_detach(struct svc_xprt *xprt)
-{
-	dprintk("svcrdma: %s(%p)\n", __func__, xprt);
-}
-
-static void svc_rdma_bc_free(struct svc_xprt *xprt)
-{
-	struct svcxprt_rdma *rdma =
-		container_of(xprt, struct svcxprt_rdma, sc_xprt);
-
-	dprintk("svcrdma: %s(%p)\n", __func__, xprt);
-	if (xprt)
-		kfree(rdma);
-}
-#endif	/* CONFIG_SUNRPC_BACKCHANNEL */
-
 /* QP event handler */
 static void qp_event_handler(struct ib_event *event, void *context)
 {
@@ -199,14 +140,13 @@
 	INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts);
-	INIT_LIST_HEAD(&cma_xprt->sc_recv_ctxts);
+	init_llist_head(&cma_xprt->sc_recv_ctxts);
 	INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
 	init_waitqueue_head(&cma_xprt->sc_send_wait);
 
 	spin_lock_init(&cma_xprt->sc_lock);
 	spin_lock_init(&cma_xprt->sc_rq_dto_lock);
 	spin_lock_init(&cma_xprt->sc_send_lock);
-	spin_lock_init(&cma_xprt->sc_recv_lock);
 	spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
 
 	/*
@@ -270,9 +210,14 @@
 	/* Save client advertised inbound read limit for use later in accept. */
 	newxprt->sc_ord = param->initiator_depth;
 
-	/* Set the local and remote addresses in the transport */
 	sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
 	svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
+	/* The remote port is arbitrary and not under the control of the
+	 * client ULP. Set it to a fixed value so that the DRC continues
+	 * to be effective after a reconnect.
+	 */
+	rpc_set_port((struct sockaddr *)&newxprt->sc_xprt.xpt_remote, 0);
+
 	sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
 	svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa));
 
@@ -280,9 +225,9 @@
 	 * Enqueue the new transport on the accept queue of the listening
 	 * transport
 	 */
-	spin_lock_bh(&listen_xprt->sc_lock);
+	spin_lock(&listen_xprt->sc_lock);
 	list_add_tail(&newxprt->sc_accept_q, &listen_xprt->sc_accept_q);
-	spin_unlock_bh(&listen_xprt->sc_lock);
+	spin_unlock(&listen_xprt->sc_lock);
 
 	set_bit(XPT_CONN, &listen_xprt->sc_xprt.xpt_flags);
 	svc_xprt_enqueue(&listen_xprt->sc_xprt);
@@ -449,13 +394,13 @@
 	struct ib_qp_init_attr qp_attr;
 	unsigned int ctxts, rq_depth;
 	struct ib_device *dev;
-	struct sockaddr *sap;
 	int ret = 0;
+	RPC_IFDEBUG(struct sockaddr *sap);
 
 	listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
 	clear_bit(XPT_CONN, &xprt->xpt_flags);
 	/* Get the next entry off the accept list */
-	spin_lock_bh(&listen_rdma->sc_lock);
+	spin_lock(&listen_rdma->sc_lock);
 	if (!list_empty(&listen_rdma->sc_accept_q)) {
 		newxprt = list_entry(listen_rdma->sc_accept_q.next,
 				     struct svcxprt_rdma, sc_accept_q);
@@ -463,7 +408,7 @@
 	}
 	if (!list_empty(&listen_rdma->sc_accept_q))
 		set_bit(XPT_CONN, &listen_rdma->sc_xprt.xpt_flags);
-	spin_unlock_bh(&listen_rdma->sc_lock);
+	spin_unlock(&listen_rdma->sc_lock);
 	if (!newxprt)
 		return NULL;
 
@@ -475,13 +420,12 @@
 
 	/* Qualify the transport resource defaults with the
 	 * capabilities of this particular device */
-	newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
-	/* transport hdr, head iovec, one page list entry, tail iovec */
-	if (newxprt->sc_max_send_sges < 4) {
-		pr_err("svcrdma: too few Send SGEs available (%d)\n",
-		       newxprt->sc_max_send_sges);
-		goto errout;
-	}
+	/* Transport header, head iovec, tail iovec */
+	newxprt->sc_max_send_sges = 3;
+	/* Add one SGE per page list entry */
+	newxprt->sc_max_send_sges += (svcrdma_max_req_size / PAGE_SIZE) + 1;
+	if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge)
+		newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
 	newxprt->sc_max_req_size = svcrdma_max_req_size;
 	newxprt->sc_max_requests = svcrdma_max_requests;
 	newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
@@ -509,14 +453,14 @@
 		dprintk("svcrdma: error creating PD for connect request\n");
 		goto errout;
 	}
-	newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth,
-					0, IB_POLL_WORKQUEUE);
+	newxprt->sc_sq_cq = ib_alloc_cq_any(dev, newxprt, newxprt->sc_sq_depth,
+					    IB_POLL_WORKQUEUE);
 	if (IS_ERR(newxprt->sc_sq_cq)) {
 		dprintk("svcrdma: error creating SQ CQ for connect request\n");
 		goto errout;
 	}
-	newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, rq_depth,
-					0, IB_POLL_WORKQUEUE);
+	newxprt->sc_rq_cq =
+		ib_alloc_cq_any(dev, newxprt, rq_depth, IB_POLL_WORKQUEUE);
 	if (IS_ERR(newxprt->sc_rq_cq)) {
 		dprintk("svcrdma: error creating RQ CQ for connect request\n");
 		goto errout;
@@ -585,6 +529,7 @@
 	if (ret)
 		goto errout;
 
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 	dprintk("svcrdma: new connection %p accepted:\n", newxprt);
 	sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
 	dprintk("    local address   : %pIS:%u\n", sap, rpc_get_port(sap));
@@ -595,6 +540,7 @@
 	dprintk("    rdma_rw_ctxs    : %d\n", ctxts);
 	dprintk("    max_requests    : %d\n", newxprt->sc_max_requests);
 	dprintk("    ord             : %d\n", conn_param.initiator_depth);
+#endif
 
 	trace_svcrdma_xprt_accept(&newxprt->sc_xprt);
 	return &newxprt->sc_xprt;
@@ -648,11 +594,6 @@
 	if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
 		ib_drain_qp(rdma->sc_qp);
 
-	/* We should only be called from kref_put */
-	if (kref_read(&xprt->xpt_ref) != 0)
-		pr_err("svcrdma: sc_xprt still in use? (%d)\n",
-		       kref_read(&xprt->xpt_ref));
-
 	svc_rdma_flush_recv_queues(rdma);
 
 	/* Final put of backchannel client transport */
@@ -688,8 +629,9 @@
 {
 	struct svcxprt_rdma *rdma =
 		container_of(xprt, struct svcxprt_rdma, sc_xprt);
+
 	INIT_WORK(&rdma->sc_work, __svc_rdma_free);
-	queue_work(svc_rdma_wq, &rdma->sc_work);
+	schedule_work(&rdma->sc_work);
 }
 
 static int svc_rdma_has_wspace(struct svc_xprt *xprt)

diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 98cbc7b..160558b 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c

@@ -68,9 +68,9 @@
  * tunables
  */
 
-static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
+unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
 unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
-static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
+unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
 unsigned int xprt_rdma_memreg_strategy		= RPCRDMA_FRWR;
 int xprt_rdma_pad_optimize;
 
@@ -80,7 +80,6 @@
 static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE;
 static unsigned int min_inline_size = RPCRDMA_MIN_INLINE;
 static unsigned int max_inline_size = RPCRDMA_MAX_INLINE;
-static unsigned int zero;
 static unsigned int max_padding = PAGE_SIZE;
 static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
 static unsigned int max_memreg = RPCRDMA_LAST - 1;
@@ -122,7 +121,7 @@
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 		.extra2		= &max_padding,
 	},
 	{
@@ -225,82 +224,70 @@
 		}
 }
 
-void
-rpcrdma_conn_func(struct rpcrdma_ep *ep)
-{
-	schedule_delayed_work(&ep->rep_connect_worker, 0);
-}
-
-void
-rpcrdma_connect_worker(struct work_struct *work)
-{
-	struct rpcrdma_ep *ep =
-		container_of(work, struct rpcrdma_ep, rep_connect_worker.work);
-	struct rpcrdma_xprt *r_xprt =
-		container_of(ep, struct rpcrdma_xprt, rx_ep);
-	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
-
-	spin_lock_bh(&xprt->transport_lock);
-	if (ep->rep_connected > 0) {
-		if (!xprt_test_and_set_connected(xprt))
-			xprt_wake_pending_tasks(xprt, 0);
-	} else {
-		if (xprt_test_and_clear_connected(xprt))
-			xprt_wake_pending_tasks(xprt, -ENOTCONN);
-	}
-	spin_unlock_bh(&xprt->transport_lock);
-}
-
+/**
+ * xprt_rdma_connect_worker - establish connection in the background
+ * @work: worker thread context
+ *
+ * Requester holds the xprt's send lock to prevent activity on this
+ * transport while a fresh connection is being established. RPC tasks
+ * sleep on the xprt's pending queue waiting for connect to complete.
+ */
 static void
 xprt_rdma_connect_worker(struct work_struct *work)
 {
 	struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt,
 						   rx_connect_worker.work);
 	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
-	int rc = 0;
-
-	xprt_clear_connected(xprt);
+	int rc;
 
 	rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
-	if (rc)
-		xprt_wake_pending_tasks(xprt, rc);
-
 	xprt_clear_connecting(xprt);
+	if (r_xprt->rx_ep.rep_connected > 0) {
+		if (!xprt_test_and_set_connected(xprt)) {
+			xprt->stat.connect_count++;
+			xprt->stat.connect_time += (long)jiffies -
+						   xprt->stat.connect_start;
+			xprt_wake_pending_tasks(xprt, -EAGAIN);
+		}
+	} else {
+		if (xprt_test_and_clear_connected(xprt))
+			xprt_wake_pending_tasks(xprt, rc);
+	}
 }
 
+/**
+ * xprt_rdma_inject_disconnect - inject a connection fault
+ * @xprt: transport context
+ *
+ * If @xprt is connected, disconnect it to simulate spurious connection
+ * loss.
+ */
 static void
 xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
 {
-	struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt,
-						   rx_xprt);
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 
-	trace_xprtrdma_inject_dsc(r_xprt);
+	trace_xprtrdma_op_inject_dsc(r_xprt);
 	rdma_disconnect(r_xprt->rx_ia.ri_id);
 }
 
-/*
- * xprt_rdma_destroy
+/**
+ * xprt_rdma_destroy - Full tear down of transport
+ * @xprt: doomed transport context
  *
- * Destroy the xprt.
- * Free all memory associated with the object, including its own.
- * NOTE: none of the *destroy methods free memory for their top-level
- * objects, even though they may have allocated it (they do free
- * private memory). It's up to the caller to handle it. In this
- * case (RDMA transport), all structure memory is inlined with the
- * struct rpcrdma_xprt.
+ * Caller guarantees there will be no more calls to us with
+ * this @xprt.
  */
 static void
 xprt_rdma_destroy(struct rpc_xprt *xprt)
 {
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 
-	trace_xprtrdma_destroy(r_xprt);
+	trace_xprtrdma_op_destroy(r_xprt);
 
 	cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
 
-	xprt_clear_connected(xprt);
-
-	rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
+	rpcrdma_ep_destroy(r_xprt);
 	rpcrdma_buffer_destroy(&r_xprt->rx_buf);
 	rpcrdma_ia_close(&r_xprt->rx_ia);
 
@@ -310,6 +297,7 @@
 	module_put(THIS_MODULE);
 }
 
+/* 60 second timeout, no retries */
 static const struct rpc_timeout xprt_rdma_default_timeout = {
 	.to_initval = 60 * HZ,
 	.to_maxval = 60 * HZ,
@@ -323,33 +311,26 @@
 static struct rpc_xprt *
 xprt_setup_rdma(struct xprt_create *args)
 {
-	struct rpcrdma_create_data_internal cdata;
 	struct rpc_xprt *xprt;
 	struct rpcrdma_xprt *new_xprt;
-	struct rpcrdma_ep *new_ep;
 	struct sockaddr *sap;
 	int rc;
 
-	if (args->addrlen > sizeof(xprt->addr)) {
-		dprintk("RPC:       %s: address too large\n", __func__);
+	if (args->addrlen > sizeof(xprt->addr))
 		return ERR_PTR(-EBADF);
-	}
 
 	xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0);
-	if (xprt == NULL) {
-		dprintk("RPC:       %s: couldn't allocate rpcrdma_xprt\n",
-			__func__);
+	if (!xprt)
 		return ERR_PTR(-ENOMEM);
-	}
 
-	/* 60 second timeout, no retries */
 	xprt->timeout = &xprt_rdma_default_timeout;
+	xprt->connect_timeout = xprt->timeout->to_initval;
+	xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
 	xprt->bind_timeout = RPCRDMA_BIND_TO;
 	xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
 	xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
 
 	xprt->resvport = 0;		/* privileged port not needed */
-	xprt->tsh_size = 0;		/* RPC-RDMA handles framing */
 	xprt->ops = &xprt_rdma_procs;
 
 	/*
@@ -367,40 +348,12 @@
 		xprt_set_bound(xprt);
 	xprt_rdma_format_addresses(xprt, sap);
 
-	cdata.max_requests = xprt_rdma_slot_table_entries;
-
-	cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */
-	cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */
-
-	cdata.inline_wsize = xprt_rdma_max_inline_write;
-	if (cdata.inline_wsize > cdata.wsize)
-		cdata.inline_wsize = cdata.wsize;
-
-	cdata.inline_rsize = xprt_rdma_max_inline_read;
-	if (cdata.inline_rsize > cdata.rsize)
-		cdata.inline_rsize = cdata.rsize;
-
-	/*
-	 * Create new transport instance, which includes initialized
-	 *  o ia
-	 *  o endpoint
-	 *  o buffers
-	 */
-
 	new_xprt = rpcx_to_rdmax(xprt);
-
 	rc = rpcrdma_ia_open(new_xprt);
 	if (rc)
 		goto out1;
 
-	/*
-	 * initialize and create ep
-	 */
-	new_xprt->rx_data = cdata;
-	new_ep = &new_xprt->rx_ep;
-
-	rc = rpcrdma_ep_create(&new_xprt->rx_ep,
-				&new_xprt->rx_ia, &new_xprt->rx_data);
+	rc = rpcrdma_ep_create(new_xprt);
 	if (rc)
 		goto out2;
 
@@ -411,7 +364,7 @@
 	INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
 			  xprt_rdma_connect_worker);
 
-	xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt);
+	xprt->max_payload = frwr_maxpages(new_xprt);
 	if (xprt->max_payload == 0)
 		goto out4;
 	xprt->max_payload <<= PAGE_SHIFT;
@@ -431,42 +384,45 @@
 	rpcrdma_buffer_destroy(&new_xprt->rx_buf);
 	rc = -ENODEV;
 out3:
-	rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
+	rpcrdma_ep_destroy(new_xprt);
 out2:
 	rpcrdma_ia_close(&new_xprt->rx_ia);
 out1:
-	trace_xprtrdma_destroy(new_xprt);
+	trace_xprtrdma_op_destroy(new_xprt);
 	xprt_rdma_free_addresses(xprt);
 	xprt_free(xprt);
 	return ERR_PTR(rc);
 }
 
 /**
- * xprt_rdma_close - Close down RDMA connection
- * @xprt: generic transport to be closed
+ * xprt_rdma_close - close a transport connection
+ * @xprt: transport context
  *
- * Called during transport shutdown reconnect, or device
- * removal. Caller holds the transport's write lock.
+ * Called during autoclose or device removal.
+ *
+ * Caller holds @xprt's send lock to prevent activity on this
+ * transport while the connection is torn down.
  */
-static void
-xprt_rdma_close(struct rpc_xprt *xprt)
+void xprt_rdma_close(struct rpc_xprt *xprt)
 {
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 
-	dprintk("RPC:       %s: closing xprt %p\n", __func__, xprt);
+	might_sleep();
+
+	trace_xprtrdma_op_close(r_xprt);
+
+	/* Prevent marshaling and sending of new requests */
+	xprt_clear_connected(xprt);
 
 	if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) {
-		xprt_clear_connected(xprt);
 		rpcrdma_ia_remove(ia);
-		return;
+		goto out;
 	}
+
 	if (ep->rep_connected == -ENODEV)
 		return;
-	if (ep->rep_connected > 0)
-		xprt->reestablish_timeout = 0;
-	xprt_disconnect_done(xprt);
 	rpcrdma_ep_disconnect(ep, ia);
 
 	/* Prepare @xprt for the next connection by reinitializing
@@ -474,6 +430,11 @@
 	 */
 	r_xprt->rx_buf.rb_credits = 1;
 	xprt->cwnd = RPC_CWNDSHIFT;
+
+out:
+	xprt->reestablish_timeout = 0;
+	++xprt->connect_cookie;
+	xprt_disconnect_done(xprt);
 }
 
 /**
@@ -525,25 +486,65 @@
 	xprt_force_disconnect(xprt);
 }
 
+/**
+ * xprt_rdma_set_connect_timeout - set timeouts for establishing a connection
+ * @xprt: controlling transport instance
+ * @connect_timeout: reconnect timeout after client disconnects
+ * @reconnect_timeout: reconnect timeout after server disconnects
+ *
+ */
+static void xprt_rdma_set_connect_timeout(struct rpc_xprt *xprt,
+					  unsigned long connect_timeout,
+					  unsigned long reconnect_timeout)
+{
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+
+	trace_xprtrdma_op_set_cto(r_xprt, connect_timeout, reconnect_timeout);
+
+	spin_lock(&xprt->transport_lock);
+
+	if (connect_timeout < xprt->connect_timeout) {
+		struct rpc_timeout to;
+		unsigned long initval;
+
+		to = *xprt->timeout;
+		initval = connect_timeout;
+		if (initval < RPCRDMA_INIT_REEST_TO << 1)
+			initval = RPCRDMA_INIT_REEST_TO << 1;
+		to.to_initval = initval;
+		to.to_maxval = initval;
+		r_xprt->rx_timeout = to;
+		xprt->timeout = &r_xprt->rx_timeout;
+		xprt->connect_timeout = connect_timeout;
+	}
+
+	if (reconnect_timeout < xprt->max_reconnect_timeout)
+		xprt->max_reconnect_timeout = reconnect_timeout;
+
+	spin_unlock(&xprt->transport_lock);
+}
+
+/**
+ * xprt_rdma_connect - schedule an attempt to reconnect
+ * @xprt: transport state
+ * @task: RPC scheduler context (unused)
+ *
+ */
 static void
 xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
 {
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+	unsigned long delay;
 
+	trace_xprtrdma_op_connect(r_xprt);
+
+	delay = 0;
 	if (r_xprt->rx_ep.rep_connected != 0) {
-		/* Reconnect */
-		schedule_delayed_work(&r_xprt->rx_connect_worker,
-				      xprt->reestablish_timeout);
-		xprt->reestablish_timeout <<= 1;
-		if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO)
-			xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO;
-		else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO)
-			xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
-	} else {
-		schedule_delayed_work(&r_xprt->rx_connect_worker, 0);
-		if (!RPC_IS_ASYNC(task))
-			flush_delayed_work(&r_xprt->rx_connect_worker);
+		delay = xprt_reconnect_delay(xprt);
+		xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO);
 	}
+	queue_delayed_work(xprtiod_workqueue, &r_xprt->rx_connect_worker,
+			   delay);
 }
 
 /**
@@ -569,6 +570,7 @@
 	return;
 
 out_sleep:
+	set_bit(XPRT_CONGESTED, &xprt->state);
 	rpc_sleep_on(&xprt->backlog, task, NULL);
 	task->tk_status = -EAGAIN;
 }
@@ -582,57 +584,24 @@
 static void
 xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst)
 {
+	struct rpcrdma_xprt *r_xprt =
+		container_of(xprt, struct rpcrdma_xprt, rx_xprt);
+
 	memset(rqst, 0, sizeof(*rqst));
-	rpcrdma_buffer_put(rpcr_to_rdmar(rqst));
-	rpc_wake_up_next(&xprt->backlog);
+	rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
+	if (unlikely(!rpc_wake_up_next(&xprt->backlog)))
+		clear_bit(XPRT_CONGESTED, &xprt->state);
 }
 
-static bool
-rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
-		    size_t size, gfp_t flags)
+static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt,
+				 struct rpcrdma_regbuf *rb, size_t size,
+				 gfp_t flags)
 {
-	struct rpcrdma_regbuf *rb;
-
-	if (req->rl_sendbuf && rdmab_length(req->rl_sendbuf) >= size)
-		return true;
-
-	rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags);
-	if (IS_ERR(rb))
-		return false;
-
-	rpcrdma_free_regbuf(req->rl_sendbuf);
-	r_xprt->rx_stats.hardway_register_count += size;
-	req->rl_sendbuf = rb;
-	return true;
-}
-
-/* The rq_rcv_buf is used only if a Reply chunk is necessary.
- * The decision to use a Reply chunk is made later in
- * rpcrdma_marshal_req. This buffer is registered at that time.
- *
- * Otherwise, the associated RPC Reply arrives in a separate
- * Receive buffer, arbitrarily chosen by the HCA. The buffer
- * allocated here for the RPC Reply is not utilized in that
- * case. See rpcrdma_inline_fixup.
- *
- * A regbuf is used here to remember the buffer size.
- */
-static bool
-rpcrdma_get_recvbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
-		    size_t size, gfp_t flags)
-{
-	struct rpcrdma_regbuf *rb;
-
-	if (req->rl_recvbuf && rdmab_length(req->rl_recvbuf) >= size)
-		return true;
-
-	rb = rpcrdma_alloc_regbuf(size, DMA_NONE, flags);
-	if (IS_ERR(rb))
-		return false;
-
-	rpcrdma_free_regbuf(req->rl_recvbuf);
-	r_xprt->rx_stats.hardway_register_count += size;
-	req->rl_recvbuf = rb;
+	if (unlikely(rdmab_length(rb) < size)) {
+		if (!rpcrdma_regbuf_realloc(rb, size, flags))
+			return false;
+		r_xprt->rx_stats.hardway_register_count += size;
+	}
 	return true;
 }
 
@@ -644,13 +613,6 @@
  *        0:	Success; rq_buffer points to RPC buffer to use
  *   ENOMEM:	Out of memory, call again later
  *      EIO:	A permanent error occurred, do not retry
- *
- * The RDMA allocate/free functions need the task structure as a place
- * to hide the struct rpcrdma_req, which is necessary for the actual
- * send/recv sequence.
- *
- * xprt_rdma_allocate provides buffers that are already mapped for
- * DMA, and a local DMA lkey is provided for each.
  */
 static int
 xprt_rdma_allocate(struct rpc_task *task)
@@ -664,18 +626,20 @@
 	if (RPC_IS_SWAPPER(task))
 		flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
 
-	if (!rpcrdma_get_sendbuf(r_xprt, req, rqst->rq_callsize, flags))
+	if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize,
+				  flags))
 		goto out_fail;
-	if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
+	if (!rpcrdma_check_regbuf(r_xprt, req->rl_recvbuf, rqst->rq_rcvsize,
+				  flags))
 		goto out_fail;
 
-	rqst->rq_buffer = req->rl_sendbuf->rg_base;
-	rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
-	trace_xprtrdma_allocate(task, req);
+	rqst->rq_buffer = rdmab_data(req->rl_sendbuf);
+	rqst->rq_rbuffer = rdmab_data(req->rl_recvbuf);
+	trace_xprtrdma_op_allocate(task, req);
 	return 0;
 
 out_fail:
-	trace_xprtrdma_allocate(task, NULL);
+	trace_xprtrdma_op_allocate(task, NULL);
 	return -ENOMEM;
 }
 
@@ -692,14 +656,21 @@
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
 	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
 
-	if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
-		rpcrdma_release_rqst(r_xprt, req);
-	trace_xprtrdma_rpc_done(task, req);
+	trace_xprtrdma_op_free(task, req);
+
+	if (!list_empty(&req->rl_registered))
+		frwr_unmap_sync(r_xprt, req);
+
+	/* XXX: If the RPC is completing because of a signal and
+	 * not because a reply was received, we ought to ensure
+	 * that the Send completion has fired, so that memory
+	 * involved with the Send is not still visible to the NIC.
+	 */
 }
 
 /**
  * xprt_rdma_send_request - marshal and send an RPC request
- * @task: RPC task with an RPC message in rq_snd_buf
+ * @rqst: RPC message in rq_snd_buf
  *
  * Caller holds the transport's write lock.
  *
@@ -708,13 +679,14 @@
  *	%-ENOTCONN if the caller should reconnect and call again
  *	%-EAGAIN if the caller should call again
  *	%-ENOBUFS if the caller should call again after a delay
- *	%-EIO if a permanent error occurred and the request was not
- *		sent. Do not try to send this message again.
+ *	%-EMSGSIZE if encoding ran out of buffer space. The request
+ *		was not sent. Do not try to send this message again.
+ *	%-EIO if an I/O error occurred. The request was not sent.
+ *		Do not try to send this message again.
  */
 static int
-xprt_rdma_send_request(struct rpc_task *task)
+xprt_rdma_send_request(struct rpc_rqst *rqst)
 {
-	struct rpc_rqst *rqst = task->tk_rqstp;
 	struct rpc_xprt *xprt = rqst->rq_xprt;
 	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
@@ -726,7 +698,10 @@
 #endif	/* CONFIG_SUNRPC_BACKCHANNEL */
 
 	if (!xprt_connected(xprt))
-		goto drop_connection;
+		return -ENOTCONN;
+
+	if (!xprt_request_get_cong(xprt, rqst))
+		return -EBADSLT;
 
 	rc = rpcrdma_marshal_req(r_xprt, rqst);
 	if (rc < 0)
@@ -737,17 +712,15 @@
 		goto drop_connection;
 	rqst->rq_xtime = ktime_get();
 
-	__set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
 	if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
 		goto drop_connection;
 
 	rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
-	rqst->rq_bytes_sent = 0;
 
 	/* An RPC with no reply will throw off credit accounting,
 	 * so drop the connection to reset the credit grant.
 	 */
-	if (!rpc_reply_expected(task))
+	if (!rpc_reply_expected(rqst->rq_task))
 		goto drop_connection;
 	return 0;
 
@@ -755,8 +728,8 @@
 	if (rc != -ENOTCONN)
 		return rc;
 drop_connection:
-	xprt_disconnect_done(xprt);
-	return -ENOTCONN;	/* implies disconnect */
+	xprt_rdma_close(xprt);
+	return -ENOTCONN;
 }
 
 void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
@@ -772,7 +745,7 @@
 		   0,	/* need a local port? */
 		   xprt->stat.bind_count,
 		   xprt->stat.connect_count,
-		   xprt->stat.connect_time,
+		   xprt->stat.connect_time / HZ,
 		   idle_time,
 		   xprt->stat.sends,
 		   xprt->stat.recvs,
@@ -792,7 +765,7 @@
 		   r_xprt->rx_stats.bad_reply_count,
 		   r_xprt->rx_stats.nomsg_call_count);
 	seq_printf(seq, "%lu %lu %lu %lu %lu %lu\n",
-		   r_xprt->rx_stats.mrs_recovered,
+		   r_xprt->rx_stats.mrs_recycled,
 		   r_xprt->rx_stats.mrs_orphaned,
 		   r_xprt->rx_stats.mrs_allocated,
 		   r_xprt->rx_stats.local_inv_needed,
@@ -821,7 +794,7 @@
 	.alloc_slot		= xprt_rdma_alloc_slot,
 	.free_slot		= xprt_rdma_free_slot,
 	.release_request	= xprt_release_rqst_cong,       /* ditto */
-	.set_retrans_timeout	= xprt_set_retrans_timeout_def, /* ditto */
+	.wait_for_reply_request	= xprt_wait_for_reply_request_def, /* ditto */
 	.timer			= xprt_rdma_timer,
 	.rpcbind		= rpcb_getport_async,	/* sunrpc/rpcb_clnt.c */
 	.set_port		= xprt_rdma_set_port,
@@ -831,14 +804,15 @@
 	.send_request		= xprt_rdma_send_request,
 	.close			= xprt_rdma_close,
 	.destroy		= xprt_rdma_destroy,
+	.set_connect_timeout	= xprt_rdma_set_connect_timeout,
 	.print_stats		= xprt_rdma_print_stats,
 	.enable_swap		= xprt_rdma_enable_swap,
 	.disable_swap		= xprt_rdma_disable_swap,
 	.inject_disconnect	= xprt_rdma_inject_disconnect,
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 	.bc_setup		= xprt_rdma_bc_setup,
-	.bc_up			= xprt_rdma_bc_up,
 	.bc_maxpayload		= xprt_rdma_bc_maxpayload,
+	.bc_num_slots		= xprt_rdma_bc_max_slots,
 	.bc_free_rqst		= xprt_rdma_bc_free_rqst,
 	.bc_destroy		= xprt_rdma_bc_destroy,
 #endif
@@ -854,58 +828,31 @@
 
 void xprt_rdma_cleanup(void)
 {
-	int rc;
-
-	dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n");
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 	if (sunrpc_table_header) {
 		unregister_sysctl_table(sunrpc_table_header);
 		sunrpc_table_header = NULL;
 	}
 #endif
-	rc = xprt_unregister_transport(&xprt_rdma);
-	if (rc)
-		dprintk("RPC:       %s: xprt_unregister returned %i\n",
-			__func__, rc);
 
-	rpcrdma_destroy_wq();
-
-	rc = xprt_unregister_transport(&xprt_rdma_bc);
-	if (rc)
-		dprintk("RPC:       %s: xprt_unregister(bc) returned %i\n",
-			__func__, rc);
+	xprt_unregister_transport(&xprt_rdma);
+	xprt_unregister_transport(&xprt_rdma_bc);
 }
 
 int xprt_rdma_init(void)
 {
 	int rc;
 
-	rc = rpcrdma_alloc_wq();
+	rc = xprt_register_transport(&xprt_rdma);
 	if (rc)
 		return rc;
 
-	rc = xprt_register_transport(&xprt_rdma);
-	if (rc) {
-		rpcrdma_destroy_wq();
-		return rc;
-	}
-
 	rc = xprt_register_transport(&xprt_rdma_bc);
 	if (rc) {
 		xprt_unregister_transport(&xprt_rdma);
-		rpcrdma_destroy_wq();
 		return rc;
 	}
 
-	dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
-
-	dprintk("Defaults:\n");
-	dprintk("\tSlots %d\n"
-		"\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
-		xprt_rdma_slot_table_entries,
-		xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
-	dprintk("\tPadding 0\n\tMemreg %d\n", xprt_rdma_memreg_strategy);
-
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 	if (!sunrpc_table_header)
 		sunrpc_table_header = register_sysctl_table(sunrpc_table);

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 956a5ea..3a90753 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c

@@ -53,6 +53,7 @@
 #include <linux/slab.h>
 #include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/svc_rdma.h>
+#include <linux/log2.h>
 
 #include <asm-generic/barrier.h>
 #include <asm/bitops.h>
@@ -74,56 +75,52 @@
  * internal functions
  */
 static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
+static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf);
 static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
 static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
-static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp);
-static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
+static void rpcrdma_mr_free(struct rpcrdma_mr *mr);
+static struct rpcrdma_regbuf *
+rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction,
+		     gfp_t flags);
+static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb);
+static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb);
+static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
 
-struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
-
-int
-rpcrdma_alloc_wq(void)
+/* Wait for outstanding transport work to finish. ib_drain_qp
+ * handles the drains in the wrong order for us, so open code
+ * them here.
+ */
+static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
 {
-	struct workqueue_struct *recv_wq;
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 
-	recv_wq = alloc_workqueue("xprtrdma_receive",
-				  WQ_MEM_RECLAIM | WQ_HIGHPRI,
-				  0);
-	if (!recv_wq)
-		return -ENOMEM;
+	/* Flush Receives, then wait for deferred Reply work
+	 * to complete.
+	 */
+	ib_drain_rq(ia->ri_id->qp);
 
-	rpcrdma_receive_wq = recv_wq;
-	return 0;
+	/* Deferred Reply processing might have scheduled
+	 * local invalidations.
+	 */
+	ib_drain_sq(ia->ri_id->qp);
 }
 
-void
-rpcrdma_destroy_wq(void)
-{
-	struct workqueue_struct *wq;
-
-	if (rpcrdma_receive_wq) {
-		wq = rpcrdma_receive_wq;
-		rpcrdma_receive_wq = NULL;
-		destroy_workqueue(wq);
-	}
-}
-
+/**
+ * rpcrdma_qp_event_handler - Handle one QP event (error notification)
+ * @event: details of the event
+ * @context: ep that owns QP where event occurred
+ *
+ * Called from the RDMA provider (device driver) possibly in an interrupt
+ * context.
+ */
 static void
-rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
+rpcrdma_qp_event_handler(struct ib_event *event, void *context)
 {
 	struct rpcrdma_ep *ep = context;
 	struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt,
 						   rx_ep);
 
-	trace_xprtrdma_qp_error(r_xprt, event);
-	pr_err("rpcrdma: %s on device %s ep %p\n",
-	       ib_event_msg(event->event), event->device->name, context);
-
-	if (ep->rep_connected == 1) {
-		ep->rep_connected = -EIO;
-		rpcrdma_conn_func(ep);
-		wake_up_all(&ep->rep_connect_wait);
-	}
+	trace_xprtrdma_qp_event(r_xprt, event);
 }
 
 /**
@@ -141,11 +138,6 @@
 
 	/* WARNING: Only wr_cqe and status are reliable at this point */
 	trace_xprtrdma_wc_send(sc, wc);
-	if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
-		pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
-		       ib_wc_status_msg(wc->status),
-		       wc->status, wc->vendor_err);
-
 	rpcrdma_sendctx_put_locked(sc);
 }
 
@@ -161,11 +153,13 @@
 	struct ib_cqe *cqe = wc->wr_cqe;
 	struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep,
 					       rr_cqe);
+	struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
 
-	/* WARNING: Only wr_id and status are reliable at this point */
+	/* WARNING: Only wr_cqe and status are reliable at this point */
 	trace_xprtrdma_wc_receive(wc);
+	--r_xprt->rx_ep.rep_receive_count;
 	if (wc->status != IB_WC_SUCCESS)
-		goto out_fail;
+		goto out_flushed;
 
 	/* status == SUCCESS means all fields in wc are trustworthy */
 	rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len);
@@ -176,24 +170,18 @@
 				   rdmab_addr(rep->rr_rdmabuf),
 				   wc->byte_len, DMA_FROM_DEVICE);
 
-out_schedule:
+	rpcrdma_post_recvs(r_xprt, false);
 	rpcrdma_reply_handler(rep);
 	return;
 
-out_fail:
-	if (wc->status != IB_WC_WR_FLUSH_ERR)
-		pr_err("rpcrdma: Recv: %s (%u/0x%x)\n",
-		       ib_wc_status_msg(wc->status),
-		       wc->status, wc->vendor_err);
-	rpcrdma_set_xdrlen(&rep->rr_hdrbuf, 0);
-	goto out_schedule;
+out_flushed:
+	rpcrdma_recv_buffer_put(rep);
 }
 
 static void
 rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
 			       struct rdma_conn_param *param)
 {
-	struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
 	const struct rpcrdma_connect_private *pmsg = param->private_data;
 	unsigned int rsize, wsize;
 
@@ -210,89 +198,96 @@
 		wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
 	}
 
-	if (rsize < cdata->inline_rsize)
-		cdata->inline_rsize = rsize;
-	if (wsize < cdata->inline_wsize)
-		cdata->inline_wsize = wsize;
-	dprintk("RPC:       %s: max send %u, max recv %u\n",
-		__func__, cdata->inline_wsize, cdata->inline_rsize);
+	if (rsize < r_xprt->rx_ep.rep_inline_recv)
+		r_xprt->rx_ep.rep_inline_recv = rsize;
+	if (wsize < r_xprt->rx_ep.rep_inline_send)
+		r_xprt->rx_ep.rep_inline_send = wsize;
+	dprintk("RPC:       %s: max send %u, max recv %u\n", __func__,
+		r_xprt->rx_ep.rep_inline_send,
+		r_xprt->rx_ep.rep_inline_recv);
 	rpcrdma_set_max_header_sizes(r_xprt);
 }
 
+/**
+ * rpcrdma_cm_event_handler - Handle RDMA CM events
+ * @id: rdma_cm_id on which an event has occurred
+ * @event: details of the event
+ *
+ * Called with @id's mutex held. Returns 1 if caller should
+ * destroy @id, otherwise 0.
+ */
 static int
-rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
+rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
 {
-	struct rpcrdma_xprt *xprt = id->context;
-	struct rpcrdma_ia *ia = &xprt->rx_ia;
-	struct rpcrdma_ep *ep = &xprt->rx_ep;
-	int connstate = 0;
+	struct rpcrdma_xprt *r_xprt = id->context;
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
 
-	trace_xprtrdma_conn_upcall(xprt, event);
+	might_sleep();
+
+	trace_xprtrdma_cm_event(r_xprt, event);
 	switch (event->event) {
 	case RDMA_CM_EVENT_ADDR_RESOLVED:
 	case RDMA_CM_EVENT_ROUTE_RESOLVED:
 		ia->ri_async_rc = 0;
 		complete(&ia->ri_done);
-		break;
+		return 0;
 	case RDMA_CM_EVENT_ADDR_ERROR:
 		ia->ri_async_rc = -EPROTO;
 		complete(&ia->ri_done);
-		break;
+		return 0;
 	case RDMA_CM_EVENT_ROUTE_ERROR:
 		ia->ri_async_rc = -ENETUNREACH;
 		complete(&ia->ri_done);
-		break;
+		return 0;
 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 		pr_info("rpcrdma: removing device %s for %s:%s\n",
-			ia->ri_device->name,
-			rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt));
+			ia->ri_id->device->name,
+			rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt));
 #endif
 		set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
 		ep->rep_connected = -ENODEV;
-		xprt_force_disconnect(&xprt->rx_xprt);
+		xprt_force_disconnect(xprt);
 		wait_for_completion(&ia->ri_remove_done);
 
 		ia->ri_id = NULL;
-		ia->ri_device = NULL;
 		/* Return 1 to ensure the core destroys the id. */
 		return 1;
 	case RDMA_CM_EVENT_ESTABLISHED:
-		++xprt->rx_xprt.connect_cookie;
-		connstate = 1;
-		rpcrdma_update_connect_private(xprt, &event->param.conn);
-		goto connected;
+		++xprt->connect_cookie;
+		ep->rep_connected = 1;
+		rpcrdma_update_connect_private(r_xprt, &event->param.conn);
+		wake_up_all(&ep->rep_connect_wait);
+		break;
 	case RDMA_CM_EVENT_CONNECT_ERROR:
-		connstate = -ENOTCONN;
-		goto connected;
+		ep->rep_connected = -ENOTCONN;
+		goto disconnected;
 	case RDMA_CM_EVENT_UNREACHABLE:
-		connstate = -ENETUNREACH;
-		goto connected;
+		ep->rep_connected = -ENETUNREACH;
+		goto disconnected;
 	case RDMA_CM_EVENT_REJECTED:
 		dprintk("rpcrdma: connection to %s:%s rejected: %s\n",
-			rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
+			rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt),
 			rdma_reject_msg(id, event->status));
-		connstate = -ECONNREFUSED;
+		ep->rep_connected = -ECONNREFUSED;
 		if (event->status == IB_CM_REJ_STALE_CONN)
-			connstate = -EAGAIN;
-		goto connected;
+			ep->rep_connected = -EAGAIN;
+		goto disconnected;
 	case RDMA_CM_EVENT_DISCONNECTED:
-		++xprt->rx_xprt.connect_cookie;
-		connstate = -ECONNABORTED;
-connected:
-		ep->rep_connected = connstate;
-		rpcrdma_conn_func(ep);
+		ep->rep_connected = -ECONNABORTED;
+disconnected:
+		xprt_force_disconnect(xprt);
 		wake_up_all(&ep->rep_connect_wait);
-		/*FALLTHROUGH*/
+		break;
 	default:
-		dprintk("RPC:       %s: %s:%s on %s/%s (ep 0x%p): %s\n",
-			__func__,
-			rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
-			ia->ri_device->name, ia->ri_ops->ro_displayname,
-			ep, rdma_event_msg(event->event));
 		break;
 	}
 
+	dprintk("RPC:       %s: %s:%s on %s/frwr: %s\n", __func__,
+		rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt),
+		ia->ri_id->device->name, rdma_event_msg(event->event));
 	return 0;
 }
 
@@ -308,24 +303,17 @@
 	init_completion(&ia->ri_done);
 	init_completion(&ia->ri_remove_done);
 
-	id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_conn_upcall,
+	id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_cm_event_handler,
 			    xprt, RDMA_PS_TCP, IB_QPT_RC);
-	if (IS_ERR(id)) {
-		rc = PTR_ERR(id);
-		dprintk("RPC:       %s: rdma_create_id() failed %i\n",
-			__func__, rc);
+	if (IS_ERR(id))
 		return id;
-	}
 
 	ia->ri_async_rc = -ETIMEDOUT;
 	rc = rdma_resolve_addr(id, NULL,
 			       (struct sockaddr *)&xprt->rx_xprt.addr,
 			       RDMA_RESOLVE_TIMEOUT);
-	if (rc) {
-		dprintk("RPC:       %s: rdma_resolve_addr() failed %i\n",
-			__func__, rc);
+	if (rc)
 		goto out;
-	}
 	rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
 	if (rc < 0) {
 		trace_xprtrdma_conn_tout(xprt);
@@ -338,11 +326,8 @@
 
 	ia->ri_async_rc = -ETIMEDOUT;
 	rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
-	if (rc) {
-		dprintk("RPC:       %s: rdma_resolve_route() failed %i\n",
-			__func__, rc);
+	if (rc)
 		goto out;
-	}
 	rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
 	if (rc < 0) {
 		trace_xprtrdma_conn_tout(xprt);
@@ -381,9 +366,8 @@
 		rc = PTR_ERR(ia->ri_id);
 		goto out_err;
 	}
-	ia->ri_device = ia->ri_id->device;
 
-	ia->ri_pd = ib_alloc_pd(ia->ri_device, 0);
+	ia->ri_pd = ib_alloc_pd(ia->ri_id->device, 0);
 	if (IS_ERR(ia->ri_pd)) {
 		rc = PTR_ERR(ia->ri_pd);
 		pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc);
@@ -392,20 +376,12 @@
 
 	switch (xprt_rdma_memreg_strategy) {
 	case RPCRDMA_FRWR:
-		if (frwr_is_supported(ia)) {
-			ia->ri_ops = &rpcrdma_frwr_memreg_ops;
+		if (frwr_is_supported(ia->ri_id->device))
 			break;
-		}
-		/*FALLTHROUGH*/
-	case RPCRDMA_MTHCAFMR:
-		if (fmr_is_supported(ia)) {
-			ia->ri_ops = &rpcrdma_fmr_memreg_ops;
-			break;
-		}
 		/*FALLTHROUGH*/
 	default:
 		pr_err("rpcrdma: Device %s does not support memreg mode %d\n",
-		       ia->ri_device->name, xprt_rdma_memreg_strategy);
+		       ia->ri_id->device->name, xprt_rdma_memreg_strategy);
 		rc = -EINVAL;
 		goto out_err;
 	}
@@ -432,9 +408,8 @@
 	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
 	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
 	struct rpcrdma_req *req;
-	struct rpcrdma_rep *rep;
 
-	cancel_delayed_work_sync(&buf->rb_refresh_worker);
+	cancel_work_sync(&buf->rb_refresh_worker);
 
 	/* This is similar to rpcrdma_ep_destroy, but:
 	 * - Don't cancel the connect worker.
@@ -444,7 +419,7 @@
 	 *   connection is already gone.
 	 */
 	if (ia->ri_id->qp) {
-		ib_drain_qp(ia->ri_id->qp);
+		rpcrdma_xprt_drain(r_xprt);
 		rdma_destroy_qp(ia->ri_id);
 		ia->ri_id->qp = NULL;
 	}
@@ -456,12 +431,11 @@
 	/* The ULP is responsible for ensuring all DMA
 	 * mappings and MRs are gone.
 	 */
-	list_for_each_entry(rep, &buf->rb_recv_bufs, rr_list)
-		rpcrdma_dma_unmap_regbuf(rep->rr_rdmabuf);
+	rpcrdma_reps_destroy(buf);
 	list_for_each_entry(req, &buf->rb_allreqs, rl_all) {
-		rpcrdma_dma_unmap_regbuf(req->rl_rdmabuf);
-		rpcrdma_dma_unmap_regbuf(req->rl_sendbuf);
-		rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
+		rpcrdma_regbuf_dma_unmap(req->rl_rdmabuf);
+		rpcrdma_regbuf_dma_unmap(req->rl_sendbuf);
+		rpcrdma_regbuf_dma_unmap(req->rl_recvbuf);
 	}
 	rpcrdma_mrs_destroy(buf);
 	ib_dealloc_pd(ia->ri_pd);
@@ -487,7 +461,6 @@
 		rdma_destroy_id(ia->ri_id);
 	}
 	ia->ri_id = NULL;
-	ia->ri_device = NULL;
 
 	/* If the pd is still busy, xprtrdma missed freeing a resource */
 	if (ia->ri_pd && !IS_ERR(ia->ri_pd))
@@ -495,19 +468,26 @@
 	ia->ri_pd = NULL;
 }
 
-/*
- * Create unconnected endpoint.
+/**
+ * rpcrdma_ep_create - Create unconnected endpoint
+ * @r_xprt: transport to instantiate
+ *
+ * Returns zero on success, or a negative errno.
  */
-int
-rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
-		  struct rpcrdma_create_data_internal *cdata)
+int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
 {
+	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
 	struct ib_cq *sendcq, *recvcq;
 	unsigned int max_sge;
 	int rc;
 
-	max_sge = min_t(unsigned int, ia->ri_device->attrs.max_send_sge,
+	ep->rep_max_requests = xprt_rdma_slot_table_entries;
+	ep->rep_inline_send = xprt_rdma_max_inline_write;
+	ep->rep_inline_recv = xprt_rdma_max_inline_read;
+
+	max_sge = min_t(unsigned int, ia->ri_id->device->attrs.max_send_sge,
 			RPCRDMA_MAX_SEND_SGES);
 	if (max_sge < RPCRDMA_MIN_SEND_SGES) {
 		pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
@@ -515,11 +495,11 @@
 	}
 	ia->ri_max_send_sges = max_sge;
 
-	rc = ia->ri_ops->ro_open(ia, ep, cdata);
+	rc = frwr_open(ia, ep);
 	if (rc)
 		return rc;
 
-	ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
+	ep->rep_attr.event_handler = rpcrdma_qp_event_handler;
 	ep->rep_attr.qp_context = ep;
 	ep->rep_attr.srq = NULL;
 	ep->rep_attr.cap.max_send_sge = max_sge;
@@ -537,30 +517,24 @@
 		ep->rep_attr.cap.max_send_sge,
 		ep->rep_attr.cap.max_recv_sge);
 
-	/* set trigger for requesting send completion */
-	ep->rep_send_batch = min_t(unsigned int, RPCRDMA_MAX_SEND_BATCH,
-				   cdata->max_requests >> 2);
+	ep->rep_send_batch = ep->rep_max_requests >> 3;
 	ep->rep_send_count = ep->rep_send_batch;
 	init_waitqueue_head(&ep->rep_connect_wait);
-	INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
+	ep->rep_receive_count = 0;
 
-	sendcq = ib_alloc_cq(ia->ri_device, NULL,
-			     ep->rep_attr.cap.max_send_wr + 1,
-			     1, IB_POLL_WORKQUEUE);
+	sendcq = ib_alloc_cq_any(ia->ri_id->device, NULL,
+				 ep->rep_attr.cap.max_send_wr + 1,
+				 IB_POLL_WORKQUEUE);
 	if (IS_ERR(sendcq)) {
 		rc = PTR_ERR(sendcq);
-		dprintk("RPC:       %s: failed to create send CQ: %i\n",
-			__func__, rc);
 		goto out1;
 	}
 
-	recvcq = ib_alloc_cq(ia->ri_device, NULL,
-			     ep->rep_attr.cap.max_recv_wr + 1,
-			     0, IB_POLL_WORKQUEUE);
+	recvcq = ib_alloc_cq_any(ia->ri_id->device, NULL,
+				 ep->rep_attr.cap.max_recv_wr + 1,
+				 IB_POLL_WORKQUEUE);
 	if (IS_ERR(recvcq)) {
 		rc = PTR_ERR(recvcq);
-		dprintk("RPC:       %s: failed to create recv CQ: %i\n",
-			__func__, rc);
 		goto out2;
 	}
 
@@ -573,16 +547,16 @@
 	/* Prepare RDMA-CM private message */
 	pmsg->cp_magic = rpcrdma_cmp_magic;
 	pmsg->cp_version = RPCRDMA_CMP_VERSION;
-	pmsg->cp_flags |= ia->ri_ops->ro_send_w_inv_ok;
-	pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize);
-	pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize);
+	pmsg->cp_flags |= RPCRDMA_CMP_F_SND_W_INV_OK;
+	pmsg->cp_send_size = rpcrdma_encode_buffer_size(ep->rep_inline_send);
+	pmsg->cp_recv_size = rpcrdma_encode_buffer_size(ep->rep_inline_recv);
 	ep->rep_remote_cma.private_data = pmsg;
 	ep->rep_remote_cma.private_data_len = sizeof(*pmsg);
 
 	/* Client offers RDMA Read but does not initiate */
 	ep->rep_remote_cma.initiator_depth = 0;
 	ep->rep_remote_cma.responder_resources =
-		min_t(int, U8_MAX, ia->ri_device->attrs.max_qp_rd_atom);
+		min_t(int, U8_MAX, ia->ri_id->device->attrs.max_qp_rd_atom);
 
 	/* Limit transport retries so client can detect server
 	 * GID changes quickly. RPC layer handles re-establishing
@@ -605,17 +579,15 @@
 	return rc;
 }
 
-/*
- * rpcrdma_ep_destroy
+/**
+ * rpcrdma_ep_destroy - Disconnect and destroy endpoint.
+ * @r_xprt: transport instance to shut down
  *
- * Disconnect and destroy endpoint. After this, the only
- * valid operations on the ep are to free it (if dynamically
- * allocated) or re-create it.
  */
-void
-rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt)
 {
-	cancel_delayed_work_sync(&ep->rep_connect_worker);
+	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 
 	if (ia->ri_id && ia->ri_id->qp) {
 		rpcrdma_ep_disconnect(ep, ia);
@@ -633,10 +605,10 @@
  * Unlike a normal reconnection, a fresh PD and a new set
  * of MRs and buffers is needed.
  */
-static int
-rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
-			 struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+static int rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
+				    struct ib_qp_init_attr *qp_init_attr)
 {
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	int rc, err;
 
 	trace_xprtrdma_reinsert(r_xprt);
@@ -646,14 +618,14 @@
 		goto out1;
 
 	rc = -ENOMEM;
-	err = rpcrdma_ep_create(ep, ia, &r_xprt->rx_data);
+	err = rpcrdma_ep_create(r_xprt);
 	if (err) {
 		pr_err("rpcrdma: rpcrdma_ep_create returned %d\n", err);
 		goto out2;
 	}
 
 	rc = -ENETUNREACH;
-	err = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
+	err = rdma_create_qp(ia->ri_id, ia->ri_pd, qp_init_attr);
 	if (err) {
 		pr_err("rpcrdma: rdma_create_qp returned %d\n", err);
 		goto out3;
@@ -663,23 +635,23 @@
 	return 0;
 
 out3:
-	rpcrdma_ep_destroy(ep, ia);
+	rpcrdma_ep_destroy(r_xprt);
 out2:
 	rpcrdma_ia_close(ia);
 out1:
 	return rc;
 }
 
-static int
-rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
-		     struct rpcrdma_ia *ia)
+static int rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt,
+				struct ib_qp_init_attr *qp_init_attr)
 {
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	struct rdma_cm_id *id, *old;
 	int err, rc;
 
 	trace_xprtrdma_reconnect(r_xprt);
 
-	rpcrdma_ep_disconnect(ep, ia);
+	rpcrdma_ep_disconnect(&r_xprt->rx_ep, ia);
 
 	rc = -EHOSTUNREACH;
 	id = rpcrdma_create_id(r_xprt, ia);
@@ -696,17 +668,14 @@
 	 */
 	old = id;
 	rc = -ENETUNREACH;
-	if (ia->ri_device != id->device) {
+	if (ia->ri_id->device != id->device) {
 		pr_err("rpcrdma: can't reconnect on different device!\n");
 		goto out_destroy;
 	}
 
-	err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
-	if (err) {
-		dprintk("RPC:       %s: rdma_create_qp returned %d\n",
-			__func__, err);
+	err = rdma_create_qp(id, ia->ri_pd, qp_init_attr);
+	if (err)
 		goto out_destroy;
-	}
 
 	/* Atomically replace the transport's ID and QP. */
 	rc = 0;
@@ -728,41 +697,43 @@
 {
 	struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
 						   rx_ia);
+	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+	struct ib_qp_init_attr qp_init_attr;
 	int rc;
 
 retry:
+	memcpy(&qp_init_attr, &ep->rep_attr, sizeof(qp_init_attr));
 	switch (ep->rep_connected) {
 	case 0:
 		dprintk("RPC:       %s: connecting...\n", __func__);
-		rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
+		rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &qp_init_attr);
 		if (rc) {
-			dprintk("RPC:       %s: rdma_create_qp failed %i\n",
-				__func__, rc);
 			rc = -ENETUNREACH;
 			goto out_noupdate;
 		}
 		break;
 	case -ENODEV:
-		rc = rpcrdma_ep_recreate_xprt(r_xprt, ep, ia);
+		rc = rpcrdma_ep_recreate_xprt(r_xprt, &qp_init_attr);
 		if (rc)
 			goto out_noupdate;
 		break;
 	default:
-		rc = rpcrdma_ep_reconnect(r_xprt, ep, ia);
+		rc = rpcrdma_ep_reconnect(r_xprt, &qp_init_attr);
 		if (rc)
 			goto out;
 	}
 
 	ep->rep_connected = 0;
+	xprt_clear_connected(xprt);
+
 	rpcrdma_post_recvs(r_xprt, true);
 
 	rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
-	if (rc) {
-		dprintk("RPC:       %s: rdma_connect() failed with %i\n",
-				__func__, rc);
+	if (rc)
 		goto out;
-	}
 
+	if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO)
+		xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
 	wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
 	if (ep->rep_connected <= 0) {
 		if (ep->rep_connected == -EAGAIN)
@@ -781,8 +752,10 @@
 	return rc;
 }
 
-/*
- * rpcrdma_ep_disconnect
+/**
+ * rpcrdma_ep_disconnect - Disconnect underlying transport
+ * @ep: endpoint to disconnect
+ * @ia: associated interface adapter
  *
  * This is separate from destroy to facilitate the ability
  * to reconnect without recreating the endpoint.
@@ -793,19 +766,20 @@
 void
 rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
 {
+	struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt,
+						   rx_ep);
 	int rc;
 
+	/* returns without wait if ID is not connected */
 	rc = rdma_disconnect(ia->ri_id);
 	if (!rc)
-		/* returns without wait if not connected */
 		wait_event_interruptible(ep->rep_connect_wait,
 							ep->rep_connected != 1);
 	else
 		ep->rep_connected = rc;
-	trace_xprtrdma_disconnect(container_of(ep, struct rpcrdma_xprt,
-					       rx_ep), rc);
+	trace_xprtrdma_disconnect(r_xprt, rc);
 
-	ib_drain_qp(ia->ri_id->qp);
+	rpcrdma_xprt_drain(r_xprt);
 }
 
 /* Fixed-size circular FIFO queue. This implementation is wait-free and
@@ -822,8 +796,8 @@
  */
 
 /* rpcrdma_sendctxs_destroy() assumes caller has already quiesced
- * queue activity, and ib_drain_qp has flushed all remaining Send
- * requests.
+ * queue activity, and rpcrdma_xprt_drain has flushed all remaining
+ * Send requests.
  */
 static void rpcrdma_sendctxs_destroy(struct rpcrdma_buffer *buf)
 {
@@ -838,8 +812,7 @@
 {
 	struct rpcrdma_sendctx *sc;
 
-	sc = kzalloc(sizeof(*sc) +
-		     ia->ri_max_send_sges * sizeof(struct ib_sge),
+	sc = kzalloc(struct_size(sc, sc_sges, ia->ri_max_send_sges),
 		     GFP_KERNEL);
 	if (!sc)
 		return NULL;
@@ -872,18 +845,13 @@
 	for (i = 0; i <= buf->rb_sc_last; i++) {
 		sc = rpcrdma_sendctx_create(&r_xprt->rx_ia);
 		if (!sc)
-			goto out_destroy;
+			return -ENOMEM;
 
 		sc->sc_xprt = r_xprt;
 		buf->rb_sc_ctxs[i] = sc;
 	}
-	buf->rb_flags = 0;
 
 	return 0;
-
-out_destroy:
-	rpcrdma_sendctxs_destroy(buf);
-	return -ENOMEM;
 }
 
 /* The sendctx queue is not guaranteed to have a size that is a
@@ -898,20 +866,20 @@
 
 /**
  * rpcrdma_sendctx_get_locked - Acquire a send context
- * @buf: transport buffers from which to acquire an unused context
+ * @r_xprt: controlling transport instance
  *
  * Returns pointer to a free send completion context; or NULL if
  * the queue is empty.
  *
  * Usage: Called to acquire an SGE array before preparing a Send WR.
  *
- * The caller serializes calls to this function (per rpcrdma_buffer),
- * and provides an effective memory barrier that flushes the new value
+ * The caller serializes calls to this function (per transport), and
+ * provides an effective memory barrier that flushes the new value
  * of rb_sc_head.
  */
-struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf)
+struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt)
 {
-	struct rpcrdma_xprt *r_xprt;
+	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
 	struct rpcrdma_sendctx *sc;
 	unsigned long next_head;
 
@@ -935,8 +903,7 @@
 	 * completions recently. This is a sign the Send Queue is
 	 * backing up. Cause the caller to pause and try again.
 	 */
-	set_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags);
-	r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf);
+	xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
 	r_xprt->rx_stats.empty_sendctx_q++;
 	return NULL;
 }
@@ -948,7 +915,7 @@
  * Usage: Called from Send completion to return a sendctxt
  * to the queue.
  *
- * The caller serializes calls to this function (per rpcrdma_buffer).
+ * The caller serializes calls to this function (per transport).
  */
 static void
 rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
@@ -956,7 +923,7 @@
 	struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf;
 	unsigned long next_tail;
 
-	/* Unmap SGEs of previously completed by unsignaled
+	/* Unmap SGEs of previously completed but unsignaled
 	 * Sends by walking up the queue until @sc is found.
 	 */
 	next_tail = buf->rb_sc_tail;
@@ -964,50 +931,14 @@
 		next_tail = rpcrdma_sendctx_next(buf, next_tail);
 
 		/* ORDER: item must be accessed _before_ tail is updated */
-		rpcrdma_unmap_sendctx(buf->rb_sc_ctxs[next_tail]);
+		rpcrdma_sendctx_unmap(buf->rb_sc_ctxs[next_tail]);
 
 	} while (buf->rb_sc_ctxs[next_tail] != sc);
 
 	/* Paired with READ_ONCE */
 	smp_store_release(&buf->rb_sc_tail, next_tail);
 
-	if (test_and_clear_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags)) {
-		smp_mb__after_atomic();
-		xprt_write_space(&sc->sc_xprt->rx_xprt);
-	}
-}
-
-static void
-rpcrdma_mr_recovery_worker(struct work_struct *work)
-{
-	struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
-						  rb_recovery_worker.work);
-	struct rpcrdma_mr *mr;
-
-	spin_lock(&buf->rb_recovery_lock);
-	while (!list_empty(&buf->rb_stale_mrs)) {
-		mr = rpcrdma_mr_pop(&buf->rb_stale_mrs);
-		spin_unlock(&buf->rb_recovery_lock);
-
-		trace_xprtrdma_recover_mr(mr);
-		mr->mr_xprt->rx_ia.ri_ops->ro_recover_mr(mr);
-
-		spin_lock(&buf->rb_recovery_lock);
-	}
-	spin_unlock(&buf->rb_recovery_lock);
-}
-
-void
-rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr)
-{
-	struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
-	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
-
-	spin_lock(&buf->rb_recovery_lock);
-	rpcrdma_mr_push(mr, &buf->rb_stale_mrs);
-	spin_unlock(&buf->rb_recovery_lock);
-
-	schedule_delayed_work(&buf->rb_recovery_worker, 0);
+	xprt_write_space(&sc->sc_xprt->rx_xprt);
 }
 
 static void
@@ -1016,18 +947,16 @@
 	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	unsigned int count;
-	LIST_HEAD(free);
-	LIST_HEAD(all);
 
-	for (count = 0; count < 3; count++) {
+	for (count = 0; count < ia->ri_max_segs; count++) {
 		struct rpcrdma_mr *mr;
 		int rc;
 
-		mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+		mr = kzalloc(sizeof(*mr), GFP_NOFS);
 		if (!mr)
 			break;
 
-		rc = ia->ri_ops->ro_init_mr(ia, mr);
+		rc = frwr_init_mr(ia, mr);
 		if (rc) {
 			kfree(mr);
 			break;
@@ -1035,143 +964,185 @@
 
 		mr->mr_xprt = r_xprt;
 
-		list_add(&mr->mr_list, &free);
-		list_add(&mr->mr_all, &all);
+		spin_lock(&buf->rb_lock);
+		list_add(&mr->mr_list, &buf->rb_mrs);
+		list_add(&mr->mr_all, &buf->rb_all_mrs);
+		spin_unlock(&buf->rb_lock);
 	}
 
-	spin_lock(&buf->rb_mrlock);
-	list_splice(&free, &buf->rb_mrs);
-	list_splice(&all, &buf->rb_all);
 	r_xprt->rx_stats.mrs_allocated += count;
-	spin_unlock(&buf->rb_mrlock);
 	trace_xprtrdma_createmrs(r_xprt, count);
-
-	xprt_write_space(&r_xprt->rx_xprt);
 }
 
 static void
 rpcrdma_mr_refresh_worker(struct work_struct *work)
 {
 	struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
-						  rb_refresh_worker.work);
+						  rb_refresh_worker);
 	struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
 						   rx_buf);
 
 	rpcrdma_mrs_create(r_xprt);
+	xprt_write_space(&r_xprt->rx_xprt);
 }
 
-struct rpcrdma_req *
-rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
+/**
+ * rpcrdma_req_create - Allocate an rpcrdma_req object
+ * @r_xprt: controlling r_xprt
+ * @size: initial size, in bytes, of send and receive buffers
+ * @flags: GFP flags passed to memory allocators
+ *
+ * Returns an allocated and fully initialized rpcrdma_req or NULL.
+ */
+struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
+				       gfp_t flags)
 {
 	struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
 	struct rpcrdma_regbuf *rb;
 	struct rpcrdma_req *req;
+	size_t maxhdrsize;
 
-	req = kzalloc(sizeof(*req), GFP_KERNEL);
+	req = kzalloc(sizeof(*req), flags);
 	if (req == NULL)
-		return ERR_PTR(-ENOMEM);
+		goto out1;
 
-	rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
-				  DMA_TO_DEVICE, GFP_KERNEL);
-	if (IS_ERR(rb)) {
-		kfree(req);
-		return ERR_PTR(-ENOMEM);
-	}
+	/* Compute maximum header buffer size in bytes */
+	maxhdrsize = rpcrdma_fixed_maxsz + 3 +
+		     r_xprt->rx_ia.ri_max_segs * rpcrdma_readchunk_maxsz;
+	maxhdrsize *= sizeof(__be32);
+	rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize),
+				  DMA_TO_DEVICE, flags);
+	if (!rb)
+		goto out2;
 	req->rl_rdmabuf = rb;
-	xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
-	req->rl_buffer = buffer;
-	INIT_LIST_HEAD(&req->rl_registered);
+	xdr_buf_init(&req->rl_hdrbuf, rdmab_data(rb), rdmab_length(rb));
 
-	spin_lock(&buffer->rb_reqslock);
+	req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE, flags);
+	if (!req->rl_sendbuf)
+		goto out3;
+
+	req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE, flags);
+	if (!req->rl_recvbuf)
+		goto out4;
+
+	INIT_LIST_HEAD(&req->rl_free_mrs);
+	INIT_LIST_HEAD(&req->rl_registered);
+	spin_lock(&buffer->rb_lock);
 	list_add(&req->rl_all, &buffer->rb_allreqs);
-	spin_unlock(&buffer->rb_reqslock);
+	spin_unlock(&buffer->rb_lock);
 	return req;
+
+out4:
+	kfree(req->rl_sendbuf);
+out3:
+	kfree(req->rl_rdmabuf);
+out2:
+	kfree(req);
+out1:
+	return NULL;
 }
 
-static int
-rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp)
+static struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
+					      bool temp)
 {
-	struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
-	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
 	struct rpcrdma_rep *rep;
-	int rc;
 
-	rc = -ENOMEM;
 	rep = kzalloc(sizeof(*rep), GFP_KERNEL);
 	if (rep == NULL)
 		goto out;
 
-	rep->rr_rdmabuf = rpcrdma_alloc_regbuf(cdata->inline_rsize,
+	rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep.rep_inline_recv,
 					       DMA_FROM_DEVICE, GFP_KERNEL);
-	if (IS_ERR(rep->rr_rdmabuf)) {
-		rc = PTR_ERR(rep->rr_rdmabuf);
+	if (!rep->rr_rdmabuf)
 		goto out_free;
-	}
-	xdr_buf_init(&rep->rr_hdrbuf, rep->rr_rdmabuf->rg_base,
-		     rdmab_length(rep->rr_rdmabuf));
 
+	xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf),
+		     rdmab_length(rep->rr_rdmabuf));
 	rep->rr_cqe.done = rpcrdma_wc_receive;
 	rep->rr_rxprt = r_xprt;
-	INIT_WORK(&rep->rr_work, rpcrdma_deferred_completion);
 	rep->rr_recv_wr.next = NULL;
 	rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
 	rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
 	rep->rr_recv_wr.num_sge = 1;
 	rep->rr_temp = temp;
-
-	spin_lock(&buf->rb_lock);
-	list_add(&rep->rr_list, &buf->rb_recv_bufs);
-	spin_unlock(&buf->rb_lock);
-	return 0;
+	return rep;
 
 out_free:
 	kfree(rep);
 out:
-	dprintk("RPC:       %s: reply buffer %d alloc failed\n",
-		__func__, rc);
-	return rc;
+	return NULL;
 }
 
-int
-rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
+static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
+{
+	rpcrdma_regbuf_free(rep->rr_rdmabuf);
+	kfree(rep);
+}
+
+static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf)
+{
+	struct llist_node *node;
+
+	/* Calls to llist_del_first are required to be serialized */
+	node = llist_del_first(&buf->rb_free_reps);
+	if (!node)
+		return NULL;
+	return llist_entry(node, struct rpcrdma_rep, rr_node);
+}
+
+static void rpcrdma_rep_put(struct rpcrdma_buffer *buf,
+			    struct rpcrdma_rep *rep)
+{
+	if (!rep->rr_temp)
+		llist_add(&rep->rr_node, &buf->rb_free_reps);
+	else
+		rpcrdma_rep_destroy(rep);
+}
+
+static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf)
+{
+	struct rpcrdma_rep *rep;
+
+	while ((rep = rpcrdma_rep_get_locked(buf)) != NULL)
+		rpcrdma_rep_destroy(rep);
+}
+
+/**
+ * rpcrdma_buffer_create - Create initial set of req/rep objects
+ * @r_xprt: transport instance to (re)initialize
+ *
+ * Returns zero on success, otherwise a negative errno.
+ */
+int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
 {
 	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
 	int i, rc;
 
-	buf->rb_max_requests = r_xprt->rx_data.max_requests;
+	buf->rb_max_requests = r_xprt->rx_ep.rep_max_requests;
 	buf->rb_bc_srv_max_requests = 0;
-	spin_lock_init(&buf->rb_mrlock);
 	spin_lock_init(&buf->rb_lock);
-	spin_lock_init(&buf->rb_recovery_lock);
 	INIT_LIST_HEAD(&buf->rb_mrs);
-	INIT_LIST_HEAD(&buf->rb_all);
-	INIT_LIST_HEAD(&buf->rb_stale_mrs);
-	INIT_DELAYED_WORK(&buf->rb_refresh_worker,
-			  rpcrdma_mr_refresh_worker);
-	INIT_DELAYED_WORK(&buf->rb_recovery_worker,
-			  rpcrdma_mr_recovery_worker);
+	INIT_LIST_HEAD(&buf->rb_all_mrs);
+	INIT_WORK(&buf->rb_refresh_worker, rpcrdma_mr_refresh_worker);
 
 	rpcrdma_mrs_create(r_xprt);
 
 	INIT_LIST_HEAD(&buf->rb_send_bufs);
 	INIT_LIST_HEAD(&buf->rb_allreqs);
-	spin_lock_init(&buf->rb_reqslock);
+
+	rc = -ENOMEM;
 	for (i = 0; i < buf->rb_max_requests; i++) {
 		struct rpcrdma_req *req;
 
-		req = rpcrdma_create_req(r_xprt);
-		if (IS_ERR(req)) {
-			dprintk("RPC:       %s: request buffer %d alloc"
-				" failed\n", __func__, i);
-			rc = PTR_ERR(req);
+		req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE,
+					 GFP_KERNEL);
+		if (!req)
 			goto out;
-		}
 		list_add(&req->rl_list, &buf->rb_send_bufs);
 	}
 
 	buf->rb_credits = 1;
-	buf->rb_posted_receives = 0;
-	INIT_LIST_HEAD(&buf->rb_recv_bufs);
+	init_llist_head(&buf->rb_free_reps);
 
 	rc = rpcrdma_sendctxs_create(r_xprt);
 	if (rc)
@@ -1183,19 +1154,23 @@
 	return rc;
 }
 
-static void
-rpcrdma_destroy_rep(struct rpcrdma_rep *rep)
+/**
+ * rpcrdma_req_destroy - Destroy an rpcrdma_req object
+ * @req: unused object to be destroyed
+ *
+ * This function assumes that the caller prevents concurrent device
+ * unload and transport tear-down.
+ */
+void rpcrdma_req_destroy(struct rpcrdma_req *req)
 {
-	rpcrdma_free_regbuf(rep->rr_rdmabuf);
-	kfree(rep);
-}
+	list_del(&req->rl_all);
 
-void
-rpcrdma_destroy_req(struct rpcrdma_req *req)
-{
-	rpcrdma_free_regbuf(req->rl_recvbuf);
-	rpcrdma_free_regbuf(req->rl_sendbuf);
-	rpcrdma_free_regbuf(req->rl_rdmabuf);
+	while (!list_empty(&req->rl_free_mrs))
+		rpcrdma_mr_free(rpcrdma_mr_pop(&req->rl_free_mrs));
+
+	rpcrdma_regbuf_free(req->rl_recvbuf);
+	rpcrdma_regbuf_free(req->rl_sendbuf);
+	rpcrdma_regbuf_free(req->rl_rdmabuf);
 	kfree(req);
 }
 
@@ -1204,62 +1179,49 @@
 {
 	struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
 						   rx_buf);
-	struct rpcrdma_ia *ia = rdmab_to_ia(buf);
 	struct rpcrdma_mr *mr;
 	unsigned int count;
 
 	count = 0;
-	spin_lock(&buf->rb_mrlock);
-	while (!list_empty(&buf->rb_all)) {
-		mr = list_entry(buf->rb_all.next, struct rpcrdma_mr, mr_all);
+	spin_lock(&buf->rb_lock);
+	while ((mr = list_first_entry_or_null(&buf->rb_all_mrs,
+					      struct rpcrdma_mr,
+					      mr_all)) != NULL) {
 		list_del(&mr->mr_all);
+		spin_unlock(&buf->rb_lock);
 
-		spin_unlock(&buf->rb_mrlock);
-
-		/* Ensure MW is not on any rl_registered list */
-		if (!list_empty(&mr->mr_list))
-			list_del(&mr->mr_list);
-
-		ia->ri_ops->ro_release_mr(mr);
+		frwr_release_mr(mr);
 		count++;
-		spin_lock(&buf->rb_mrlock);
+		spin_lock(&buf->rb_lock);
 	}
-	spin_unlock(&buf->rb_mrlock);
+	spin_unlock(&buf->rb_lock);
 	r_xprt->rx_stats.mrs_allocated = 0;
-
-	dprintk("RPC:       %s: released %u MRs\n", __func__, count);
 }
 
+/**
+ * rpcrdma_buffer_destroy - Release all hw resources
+ * @buf: root control block for resources
+ *
+ * ORDERING: relies on a prior rpcrdma_xprt_drain :
+ * - No more Send or Receive completions can occur
+ * - All MRs, reps, and reqs are returned to their free lists
+ */
 void
 rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
 {
-	cancel_delayed_work_sync(&buf->rb_recovery_worker);
-	cancel_delayed_work_sync(&buf->rb_refresh_worker);
+	cancel_work_sync(&buf->rb_refresh_worker);
 
 	rpcrdma_sendctxs_destroy(buf);
+	rpcrdma_reps_destroy(buf);
 
-	while (!list_empty(&buf->rb_recv_bufs)) {
-		struct rpcrdma_rep *rep;
-
-		rep = list_first_entry(&buf->rb_recv_bufs,
-				       struct rpcrdma_rep, rr_list);
-		list_del(&rep->rr_list);
-		rpcrdma_destroy_rep(rep);
-	}
-
-	spin_lock(&buf->rb_reqslock);
-	while (!list_empty(&buf->rb_allreqs)) {
+	while (!list_empty(&buf->rb_send_bufs)) {
 		struct rpcrdma_req *req;
 
-		req = list_first_entry(&buf->rb_allreqs,
-				       struct rpcrdma_req, rl_all);
-		list_del(&req->rl_all);
-
-		spin_unlock(&buf->rb_reqslock);
-		rpcrdma_destroy_req(req);
-		spin_lock(&buf->rb_reqslock);
+		req = list_first_entry(&buf->rb_send_bufs,
+				       struct rpcrdma_req, rl_list);
+		list_del(&req->rl_list);
+		rpcrdma_req_destroy(req);
 	}
-	spin_unlock(&buf->rb_reqslock);
 
 	rpcrdma_mrs_destroy(buf);
 }
@@ -1275,61 +1237,42 @@
 rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt)
 {
 	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
-	struct rpcrdma_mr *mr = NULL;
+	struct rpcrdma_mr *mr;
 
-	spin_lock(&buf->rb_mrlock);
-	if (!list_empty(&buf->rb_mrs))
-		mr = rpcrdma_mr_pop(&buf->rb_mrs);
-	spin_unlock(&buf->rb_mrlock);
-
-	if (!mr)
-		goto out_nomrs;
+	spin_lock(&buf->rb_lock);
+	mr = rpcrdma_mr_pop(&buf->rb_mrs);
+	spin_unlock(&buf->rb_lock);
 	return mr;
-
-out_nomrs:
-	trace_xprtrdma_nomrs(r_xprt);
-	if (r_xprt->rx_ep.rep_connected != -ENODEV)
-		schedule_delayed_work(&buf->rb_refresh_worker, 0);
-
-	/* Allow the reply handler and refresh worker to run */
-	cond_resched();
-
-	return NULL;
-}
-
-static void
-__rpcrdma_mr_put(struct rpcrdma_buffer *buf, struct rpcrdma_mr *mr)
-{
-	spin_lock(&buf->rb_mrlock);
-	rpcrdma_mr_push(mr, &buf->rb_mrs);
-	spin_unlock(&buf->rb_mrlock);
 }
 
 /**
- * rpcrdma_mr_put - Release an rpcrdma_mr object
- * @mr: object to release
+ * rpcrdma_mr_put - DMA unmap an MR and release it
+ * @mr: MR to release
  *
  */
-void
-rpcrdma_mr_put(struct rpcrdma_mr *mr)
-{
-	__rpcrdma_mr_put(&mr->mr_xprt->rx_buf, mr);
-}
-
-/**
- * rpcrdma_mr_unmap_and_put - DMA unmap an MR and release it
- * @mr: object to release
- *
- */
-void
-rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
+void rpcrdma_mr_put(struct rpcrdma_mr *mr)
 {
 	struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
 
-	trace_xprtrdma_dma_unmap(mr);
-	ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
-			mr->mr_sg, mr->mr_nents, mr->mr_dir);
-	__rpcrdma_mr_put(&r_xprt->rx_buf, mr);
+	if (mr->mr_dir != DMA_NONE) {
+		trace_xprtrdma_mr_unmap(mr);
+		ib_dma_unmap_sg(r_xprt->rx_ia.ri_id->device,
+				mr->mr_sg, mr->mr_nents, mr->mr_dir);
+		mr->mr_dir = DMA_NONE;
+	}
+
+	rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs);
+}
+
+static void rpcrdma_mr_free(struct rpcrdma_mr *mr)
+{
+	struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
+	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+
+	mr->mr_req = NULL;
+	spin_lock(&buf->rb_lock);
+	rpcrdma_mr_push(mr, &buf->rb_mrs);
+	spin_unlock(&buf->rb_lock);
 }
 
 /**
@@ -1354,105 +1297,112 @@
 
 /**
  * rpcrdma_buffer_put - Put request/reply buffers back into pool
+ * @buffers: buffer pool
  * @req: object to return
  *
  */
-void
-rpcrdma_buffer_put(struct rpcrdma_req *req)
+void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
 {
-	struct rpcrdma_buffer *buffers = req->rl_buffer;
-	struct rpcrdma_rep *rep = req->rl_reply;
-
+	if (req->rl_reply)
+		rpcrdma_rep_put(buffers, req->rl_reply);
 	req->rl_reply = NULL;
 
 	spin_lock(&buffers->rb_lock);
 	list_add(&req->rl_list, &buffers->rb_send_bufs);
-	if (rep) {
-		if (!rep->rr_temp) {
-			list_add(&rep->rr_list, &buffers->rb_recv_bufs);
-			rep = NULL;
-		}
-	}
 	spin_unlock(&buffers->rb_lock);
-	if (rep)
-		rpcrdma_destroy_rep(rep);
-}
-
-/*
- * Put reply buffers back into pool when not attached to
- * request. This happens in error conditions.
- */
-void
-rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
-{
-	struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf;
-
-	if (!rep->rr_temp) {
-		spin_lock(&buffers->rb_lock);
-		list_add(&rep->rr_list, &buffers->rb_recv_bufs);
-		spin_unlock(&buffers->rb_lock);
-	} else {
-		rpcrdma_destroy_rep(rep);
-	}
 }
 
 /**
- * rpcrdma_alloc_regbuf - allocate and DMA-map memory for SEND/RECV buffers
- * @size: size of buffer to be allocated, in bytes
- * @direction: direction of data movement
- * @flags: GFP flags
+ * rpcrdma_recv_buffer_put - Release rpcrdma_rep back to free list
+ * @rep: rep to release
  *
- * Returns an ERR_PTR, or a pointer to a regbuf, a buffer that
- * can be persistently DMA-mapped for I/O.
+ * Used after error conditions.
+ */
+void rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
+{
+	rpcrdma_rep_put(&rep->rr_rxprt->rx_buf, rep);
+}
+
+/* Returns a pointer to a rpcrdma_regbuf object, or NULL.
  *
  * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
  * receiving the payload of RDMA RECV operations. During Long Calls
- * or Replies they may be registered externally via ro_map.
+ * or Replies they may be registered externally via frwr_map.
  */
-struct rpcrdma_regbuf *
-rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction,
+static struct rpcrdma_regbuf *
+rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction,
 		     gfp_t flags)
 {
 	struct rpcrdma_regbuf *rb;
 
-	rb = kmalloc(sizeof(*rb) + size, flags);
-	if (rb == NULL)
-		return ERR_PTR(-ENOMEM);
+	rb = kmalloc(sizeof(*rb), flags);
+	if (!rb)
+		return NULL;
+	rb->rg_data = kmalloc(size, flags);
+	if (!rb->rg_data) {
+		kfree(rb);
+		return NULL;
+	}
 
 	rb->rg_device = NULL;
 	rb->rg_direction = direction;
 	rb->rg_iov.length = size;
-
 	return rb;
 }
 
 /**
- * __rpcrdma_map_regbuf - DMA-map a regbuf
- * @ia: controlling rpcrdma_ia
- * @rb: regbuf to be mapped
+ * rpcrdma_regbuf_realloc - re-allocate a SEND/RECV buffer
+ * @rb: regbuf to reallocate
+ * @size: size of buffer to be allocated, in bytes
+ * @flags: GFP flags
+ *
+ * Returns true if reallocation was successful. If false is
+ * returned, @rb is left untouched.
  */
-bool
-__rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
+bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags)
 {
-	struct ib_device *device = ia->ri_device;
+	void *buf;
+
+	buf = kmalloc(size, flags);
+	if (!buf)
+		return false;
+
+	rpcrdma_regbuf_dma_unmap(rb);
+	kfree(rb->rg_data);
+
+	rb->rg_data = buf;
+	rb->rg_iov.length = size;
+	return true;
+}
+
+/**
+ * __rpcrdma_regbuf_dma_map - DMA-map a regbuf
+ * @r_xprt: controlling transport instance
+ * @rb: regbuf to be mapped
+ *
+ * Returns true if the buffer is now DMA mapped to @r_xprt's device
+ */
+bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt,
+			      struct rpcrdma_regbuf *rb)
+{
+	struct ib_device *device = r_xprt->rx_ia.ri_id->device;
 
 	if (rb->rg_direction == DMA_NONE)
 		return false;
 
-	rb->rg_iov.addr = ib_dma_map_single(device,
-					    (void *)rb->rg_base,
-					    rdmab_length(rb),
-					    rb->rg_direction);
-	if (ib_dma_mapping_error(device, rdmab_addr(rb)))
+	rb->rg_iov.addr = ib_dma_map_single(device, rdmab_data(rb),
+					    rdmab_length(rb), rb->rg_direction);
+	if (ib_dma_mapping_error(device, rdmab_addr(rb))) {
+		trace_xprtrdma_dma_maperr(rdmab_addr(rb));
 		return false;
+	}
 
 	rb->rg_device = device;
-	rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey;
+	rb->rg_iov.lkey = r_xprt->rx_ia.ri_pd->local_dma_lkey;
 	return true;
 }
 
-static void
-rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb)
+static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb)
 {
 	if (!rb)
 		return;
@@ -1460,26 +1410,27 @@
 	if (!rpcrdma_regbuf_is_mapped(rb))
 		return;
 
-	ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb),
-			    rdmab_length(rb), rb->rg_direction);
+	ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb), rdmab_length(rb),
+			    rb->rg_direction);
 	rb->rg_device = NULL;
 }
 
-/**
- * rpcrdma_free_regbuf - deregister and free registered buffer
- * @rb: regbuf to be deregistered and freed
- */
-void
-rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb)
+static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb)
 {
-	rpcrdma_dma_unmap_regbuf(rb);
+	rpcrdma_regbuf_dma_unmap(rb);
+	if (rb)
+		kfree(rb->rg_data);
 	kfree(rb);
 }
 
-/*
- * Prepost any receive buffer, then post send.
+/**
+ * rpcrdma_ep_post - Post WRs to a transport's Send Queue
+ * @ia: transport's device information
+ * @ep: transport's RDMA endpoint information
+ * @req: rpcrdma_req containing the Send WR to post
  *
- * Receive buffer is donated to hardware, reclaimed upon recv completion.
+ * Returns 0 if the post was successful, otherwise -ENOTCONN
+ * is returned.
  */
 int
 rpcrdma_ep_post(struct rpcrdma_ia *ia,
@@ -1489,8 +1440,7 @@
 	struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr;
 	int rc;
 
-	if (!ep->rep_send_count ||
-	    test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
+	if (!ep->rep_send_count || kref_read(&req->rl_kref) > 1) {
 		send_wr->send_flags |= IB_SEND_SIGNALED;
 		ep->rep_send_count = ep->rep_send_batch;
 	} else {
@@ -1498,77 +1448,79 @@
 		--ep->rep_send_count;
 	}
 
-	rc = ia->ri_ops->ro_send(ia, req);
+	rc = frwr_send(ia, req);
 	trace_xprtrdma_post_send(req, rc);
 	if (rc)
 		return -ENOTCONN;
 	return 0;
 }
 
-/**
- * rpcrdma_post_recvs - Maybe post some Receive buffers
- * @r_xprt: controlling transport
- * @temp: when true, allocate temp rpcrdma_rep objects
- *
- */
-void
+static void
 rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
 {
 	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
-	struct ib_recv_wr *wr, *bad_wr;
+	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+	struct ib_recv_wr *i, *wr, *bad_wr;
+	struct rpcrdma_rep *rep;
 	int needed, count, rc;
 
-	needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1);
-	if (buf->rb_posted_receives > needed)
-		return;
-	needed -= buf->rb_posted_receives;
-
+	rc = 0;
 	count = 0;
+
+	needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1);
+	if (likely(ep->rep_receive_count > needed))
+		goto out;
+	needed -= ep->rep_receive_count;
+	if (!temp)
+		needed += RPCRDMA_MAX_RECV_BATCH;
+
+	/* fast path: all needed reps can be found on the free list */
 	wr = NULL;
 	while (needed) {
-		struct rpcrdma_regbuf *rb;
-		struct rpcrdma_rep *rep;
+		rep = rpcrdma_rep_get_locked(buf);
+		if (!rep)
+			rep = rpcrdma_rep_create(r_xprt, temp);
+		if (!rep)
+			break;
 
-		spin_lock(&buf->rb_lock);
-		rep = list_first_entry_or_null(&buf->rb_recv_bufs,
-					       struct rpcrdma_rep, rr_list);
-		if (likely(rep))
-			list_del(&rep->rr_list);
-		spin_unlock(&buf->rb_lock);
-		if (!rep) {
-			if (rpcrdma_create_rep(r_xprt, temp))
-				break;
-			continue;
-		}
-
-		rb = rep->rr_rdmabuf;
-		if (!rpcrdma_regbuf_is_mapped(rb)) {
-			if (!__rpcrdma_dma_map_regbuf(&r_xprt->rx_ia, rb)) {
-				rpcrdma_recv_buffer_put(rep);
-				break;
-			}
-		}
-
-		trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe);
 		rep->rr_recv_wr.next = wr;
 		wr = &rep->rr_recv_wr;
-		++count;
 		--needed;
 	}
-	if (!count)
-		return;
+	if (!wr)
+		goto out;
+
+	for (i = wr; i; i = i->next) {
+		rep = container_of(i, struct rpcrdma_rep, rr_recv_wr);
+
+		if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf))
+			goto release_wrs;
+
+		trace_xprtrdma_post_recv(rep);
+		++count;
+	}
 
 	rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr,
 			  (const struct ib_recv_wr **)&bad_wr);
+out:
+	trace_xprtrdma_post_recvs(r_xprt, count, rc);
 	if (rc) {
-		for (wr = bad_wr; wr; wr = wr->next) {
+		for (wr = bad_wr; wr;) {
 			struct rpcrdma_rep *rep;
 
 			rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr);
+			wr = wr->next;
 			rpcrdma_recv_buffer_put(rep);
 			--count;
 		}
 	}
-	buf->rb_posted_receives += count;
-	trace_xprtrdma_post_recvs(r_xprt, count, rc);
+	ep->rep_receive_count += count;
+	return;
+
+release_wrs:
+	for (i = wr; i;) {
+		rep = container_of(i, struct rpcrdma_rep, rr_recv_wr);
+		i = i->next;
+		rpcrdma_recv_buffer_put(rep);
+	}
 }

diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 2ca14f7..65e6b0e 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h

@@ -44,8 +44,10 @@
 
 #include <linux/wait.h> 		/* wait_queue_head_t, etc */
 #include <linux/spinlock.h> 		/* spinlock_t, etc */
-#include <linux/atomic.h>			/* atomic_t, etc */
+#include <linux/atomic.h>		/* atomic_t, etc */
+#include <linux/kref.h>			/* struct kref */
 #include <linux/workqueue.h>		/* struct work_struct */
+#include <linux/llist.h>
 
 #include <rdma/rdma_cm.h>		/* RDMA connection api */
 #include <rdma/ib_verbs.h>		/* RDMA verbs api */
@@ -66,23 +68,17 @@
  * Interface Adapter -- one per transport instance
  */
 struct rpcrdma_ia {
-	const struct rpcrdma_memreg_ops	*ri_ops;
-	struct ib_device	*ri_device;
 	struct rdma_cm_id 	*ri_id;
 	struct ib_pd		*ri_pd;
-	struct completion	ri_done;
-	struct completion	ri_remove_done;
 	int			ri_async_rc;
 	unsigned int		ri_max_segs;
 	unsigned int		ri_max_frwr_depth;
-	unsigned int		ri_max_inline_write;
-	unsigned int		ri_max_inline_read;
 	unsigned int		ri_max_send_sges;
 	bool			ri_implicit_roundup;
 	enum ib_mr_type		ri_mrtype;
 	unsigned long		ri_flags;
-	struct ib_qp_attr	ri_qp_attr;
-	struct ib_qp_init_attr	ri_qp_init_attr;
+	struct completion	ri_done;
+	struct completion	ri_remove_done;
 };
 
 enum {
@@ -96,84 +92,86 @@
 struct rpcrdma_ep {
 	unsigned int		rep_send_count;
 	unsigned int		rep_send_batch;
+	unsigned int		rep_max_inline_send;
+	unsigned int		rep_max_inline_recv;
 	int			rep_connected;
 	struct ib_qp_init_attr	rep_attr;
 	wait_queue_head_t 	rep_connect_wait;
 	struct rpcrdma_connect_private	rep_cm_private;
 	struct rdma_conn_param	rep_remote_cma;
-	struct delayed_work	rep_connect_worker;
+	unsigned int		rep_max_requests;	/* set by /proc */
+	unsigned int		rep_inline_send;	/* negotiated */
+	unsigned int		rep_inline_recv;	/* negotiated */
+	int			rep_receive_count;
 };
 
 /* Pre-allocate extra Work Requests for handling backward receives
  * and sends. This is a fixed value because the Work Queues are
- * allocated when the forward channel is set up.
+ * allocated when the forward channel is set up, long before the
+ * backchannel is provisioned. This value is two times
+ * NFS4_DEF_CB_SLOT_TABLE_SIZE.
  */
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
-#define RPCRDMA_BACKWARD_WRS		(8)
+#define RPCRDMA_BACKWARD_WRS (32)
 #else
-#define RPCRDMA_BACKWARD_WRS		(0)
+#define RPCRDMA_BACKWARD_WRS (0)
 #endif
 
 /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV
- *
- * The below structure appears at the front of a large region of kmalloc'd
- * memory, which always starts on a good alignment boundary.
  */
 
 struct rpcrdma_regbuf {
 	struct ib_sge		rg_iov;
 	struct ib_device	*rg_device;
 	enum dma_data_direction	rg_direction;
-	__be32			rg_base[0] __attribute__ ((aligned(256)));
+	void			*rg_data;
 };
 
-static inline u64
-rdmab_addr(struct rpcrdma_regbuf *rb)
+static inline u64 rdmab_addr(struct rpcrdma_regbuf *rb)
 {
 	return rb->rg_iov.addr;
 }
 
-static inline u32
-rdmab_length(struct rpcrdma_regbuf *rb)
+static inline u32 rdmab_length(struct rpcrdma_regbuf *rb)
 {
 	return rb->rg_iov.length;
 }
 
-static inline u32
-rdmab_lkey(struct rpcrdma_regbuf *rb)
+static inline u32 rdmab_lkey(struct rpcrdma_regbuf *rb)
 {
 	return rb->rg_iov.lkey;
 }
 
-static inline struct ib_device *
-rdmab_device(struct rpcrdma_regbuf *rb)
+static inline struct ib_device *rdmab_device(struct rpcrdma_regbuf *rb)
 {
 	return rb->rg_device;
 }
 
+static inline void *rdmab_data(const struct rpcrdma_regbuf *rb)
+{
+	return rb->rg_data;
+}
+
 #define RPCRDMA_DEF_GFP		(GFP_NOIO | __GFP_NOWARN)
 
 /* To ensure a transport can always make forward progress,
  * the number of RDMA segments allowed in header chunk lists
- * is capped at 8. This prevents less-capable devices and
- * memory registrations from overrunning the Send buffer
- * while building chunk lists.
+ * is capped at 16. This prevents less-capable devices from
+ * overrunning the Send buffer while building chunk lists.
  *
  * Elements of the Read list take up more room than the
- * Write list or Reply chunk. 8 read segments means the Read
- * list (or Write list or Reply chunk) cannot consume more
- * than
+ * Write list or Reply chunk. 16 read segments means the
+ * chunk lists cannot consume more than
  *
- * ((8 + 2) * read segment size) + 1 XDR words, or 244 bytes.
+ * ((16 + 2) * read segment size) + 1 XDR words,
  *
- * And the fixed part of the header is another 24 bytes.
- *
- * The smallest inline threshold is 1024 bytes, ensuring that
- * at least 750 bytes are available for RPC messages.
+ * or about 400 bytes. The fixed part of the header is
+ * another 24 bytes. Thus when the inline threshold is
+ * 1024 bytes, at least 600 bytes are available for RPC
+ * message bodies.
  */
 enum {
-	RPCRDMA_MAX_HDR_SEGS = 8,
-	RPCRDMA_HDRBUF_SIZE = 256,
+	RPCRDMA_MAX_HDR_SEGS = 16,
 };
 
 /*
@@ -200,14 +198,23 @@
 	bool			rr_temp;
 	struct rpcrdma_regbuf	*rr_rdmabuf;
 	struct rpcrdma_xprt	*rr_rxprt;
-	struct work_struct	rr_work;
+	struct rpc_rqst		*rr_rqst;
 	struct xdr_buf		rr_hdrbuf;
 	struct xdr_stream	rr_stream;
-	struct rpc_rqst		*rr_rqst;
-	struct list_head	rr_list;
+	struct llist_node	rr_node;
 	struct ib_recv_wr	rr_recv_wr;
 };
 
+/* To reduce the rate at which a transport invokes ib_post_recv
+ * (and thus the hardware doorbell rate), xprtrdma posts Receive
+ * WRs in batches.
+ *
+ * Setting this to zero disables Receive post batching.
+ */
+enum {
+	RPCRDMA_MAX_RECV_BATCH = 7,
+};
+
 /* struct rpcrdma_sendctx - DMA mapped SGEs to unmap after Send completes
  */
 struct rpcrdma_req;
@@ -215,46 +222,22 @@
 struct rpcrdma_sendctx {
 	struct ib_send_wr	sc_wr;
 	struct ib_cqe		sc_cqe;
+	struct ib_device	*sc_device;
 	struct rpcrdma_xprt	*sc_xprt;
 	struct rpcrdma_req	*sc_req;
 	unsigned int		sc_unmap_count;
 	struct ib_sge		sc_sges[];
 };
 
-/* Limit the number of SGEs that can be unmapped during one
- * Send completion. This caps the amount of work a single
- * completion can do before returning to the provider.
- *
- * Setting this to zero disables Send completion batching.
- */
-enum {
-	RPCRDMA_MAX_SEND_BATCH = 7,
-};
-
 /*
  * struct rpcrdma_mr - external memory region metadata
  *
  * An external memory region is any buffer or page that is registered
  * on the fly (ie, not pre-registered).
- *
- * Each rpcrdma_buffer has a list of free MWs anchored in rb_mrs. During
- * call_allocate, rpcrdma_buffer_get() assigns one to each segment in
- * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep
- * track of registration metadata while each RPC is pending.
- * rpcrdma_deregister_external() uses this metadata to unmap and
- * release these resources when an RPC is complete.
  */
-enum rpcrdma_frwr_state {
-	FRWR_IS_INVALID,	/* ready to be used */
-	FRWR_IS_VALID,		/* in use */
-	FRWR_FLUSHED_FR,	/* flushed FASTREG WR */
-	FRWR_FLUSHED_LI,	/* flushed LOCALINV WR */
-};
-
 struct rpcrdma_frwr {
 	struct ib_mr			*fr_mr;
 	struct ib_cqe			fr_cqe;
-	enum rpcrdma_frwr_state		fr_state;
 	struct completion		fr_linv_done;
 	union {
 		struct ib_reg_wr	fr_regwr;
@@ -262,24 +245,19 @@
 	};
 };
 
-struct rpcrdma_fmr {
-	struct ib_fmr		*fm_mr;
-	u64			*fm_physaddrs;
-};
-
+struct rpcrdma_req;
 struct rpcrdma_mr {
 	struct list_head	mr_list;
+	struct rpcrdma_req	*mr_req;
 	struct scatterlist	*mr_sg;
 	int			mr_nents;
 	enum dma_data_direction	mr_dir;
-	union {
-		struct rpcrdma_fmr	fmr;
-		struct rpcrdma_frwr	frwr;
-	};
+	struct rpcrdma_frwr	frwr;
 	struct rpcrdma_xprt	*mr_xprt;
 	u32			mr_handle;
 	u32			mr_length;
 	u64			mr_offset;
+	struct work_struct	mr_recycle;
 	struct list_head	mr_all;
 };
 
@@ -337,7 +315,6 @@
 struct rpcrdma_req {
 	struct list_head	rl_list;
 	struct rpc_rqst		rl_slot;
-	struct rpcrdma_buffer	*rl_buffer;
 	struct rpcrdma_rep	*rl_reply;
 	struct xdr_stream	rl_stream;
 	struct xdr_buf		rl_hdrbuf;
@@ -347,18 +324,13 @@
 	struct rpcrdma_regbuf	*rl_recvbuf;	/* rq_rcv_buf */
 
 	struct list_head	rl_all;
-	unsigned long		rl_flags;
+	struct kref		rl_kref;
 
-	struct list_head	rl_registered;	/* registered segments */
+	struct list_head	rl_free_mrs;
+	struct list_head	rl_registered;
 	struct rpcrdma_mr_seg	rl_segments[RPCRDMA_MAX_SEGS];
 };
 
-/* rl_flags */
-enum {
-	RPCRDMA_REQ_F_PENDING = 0,
-	RPCRDMA_REQ_F_TX_RESOURCES,
-};
-
 static inline struct rpcrdma_req *
 rpcr_to_rdmar(const struct rpc_rqst *rqst)
 {
@@ -368,7 +340,7 @@
 static inline void
 rpcrdma_mr_push(struct rpcrdma_mr *mr, struct list_head *list)
 {
-	list_add_tail(&mr->mr_list, list);
+	list_add(&mr->mr_list, list);
 }
 
 static inline struct rpcrdma_mr *
@@ -376,8 +348,9 @@
 {
 	struct rpcrdma_mr *mr;
 
-	mr = list_first_entry(list, struct rpcrdma_mr, mr_list);
-	list_del_init(&mr->mr_list);
+	mr = list_first_entry_or_null(list, struct rpcrdma_mr, mr_list);
+	if (mr)
+		list_del_init(&mr->mr_list);
 	return mr;
 }
 
@@ -388,53 +361,27 @@
  * One of these is associated with a transport instance
  */
 struct rpcrdma_buffer {
-	spinlock_t		rb_mrlock;	/* protect rb_mrs list */
+	spinlock_t		rb_lock;
+	struct list_head	rb_send_bufs;
 	struct list_head	rb_mrs;
-	struct list_head	rb_all;
 
 	unsigned long		rb_sc_head;
 	unsigned long		rb_sc_tail;
 	unsigned long		rb_sc_last;
 	struct rpcrdma_sendctx	**rb_sc_ctxs;
 
-	spinlock_t		rb_lock;	/* protect buf lists */
-	struct list_head	rb_send_bufs;
-	struct list_head	rb_recv_bufs;
-	unsigned long		rb_flags;
+	struct list_head	rb_allreqs;
+	struct list_head	rb_all_mrs;
+
+	struct llist_head	rb_free_reps;
+
 	u32			rb_max_requests;
 	u32			rb_credits;	/* most recent credit grant */
-	int			rb_posted_receives;
 
 	u32			rb_bc_srv_max_requests;
-	spinlock_t		rb_reqslock;	/* protect rb_allreqs */
-	struct list_head	rb_allreqs;
-
 	u32			rb_bc_max_requests;
 
-	spinlock_t		rb_recovery_lock; /* protect rb_stale_mrs */
-	struct list_head	rb_stale_mrs;
-	struct delayed_work	rb_recovery_worker;
-	struct delayed_work	rb_refresh_worker;
-};
-#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
-
-/* rb_flags */
-enum {
-	RPCRDMA_BUF_F_EMPTY_SCQ = 0,
-};
-
-/*
- * Internal structure for transport instance creation. This
- * exists primarily for modularity.
- *
- * This data should be set with mount options
- */
-struct rpcrdma_create_data_internal {
-	unsigned int	max_requests;	/* max requests (slots) in flight */
-	unsigned int	rsize;		/* mount rsize - max read hdr+data */
-	unsigned int	wsize;		/* mount wsize - max write hdr+data */
-	unsigned int	inline_rsize;	/* max non-rdma read data payload */
-	unsigned int	inline_wsize;	/* max non-rdma write data payload */
+	struct work_struct	rb_refresh_worker;
 };
 
 /*
@@ -452,7 +399,7 @@
 	unsigned long		hardway_register_count;
 	unsigned long		failed_marshal_count;
 	unsigned long		bad_reply_count;
-	unsigned long		mrs_recovered;
+	unsigned long		mrs_recycled;
 	unsigned long		mrs_orphaned;
 	unsigned long		mrs_allocated;
 	unsigned long		empty_sendctx_q;
@@ -467,36 +414,6 @@
 };
 
 /*
- * Per-registration mode operations
- */
-struct rpcrdma_xprt;
-struct rpcrdma_memreg_ops {
-	struct rpcrdma_mr_seg *
-			(*ro_map)(struct rpcrdma_xprt *,
-				  struct rpcrdma_mr_seg *, int, bool,
-				  struct rpcrdma_mr **);
-	int		(*ro_send)(struct rpcrdma_ia *ia,
-				   struct rpcrdma_req *req);
-	void		(*ro_reminv)(struct rpcrdma_rep *rep,
-				     struct list_head *mrs);
-	void		(*ro_unmap_sync)(struct rpcrdma_xprt *,
-					 struct list_head *);
-	void		(*ro_recover_mr)(struct rpcrdma_mr *mr);
-	int		(*ro_open)(struct rpcrdma_ia *,
-				   struct rpcrdma_ep *,
-				   struct rpcrdma_create_data_internal *);
-	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
-	int		(*ro_init_mr)(struct rpcrdma_ia *,
-				      struct rpcrdma_mr *);
-	void		(*ro_release_mr)(struct rpcrdma_mr *mr);
-	const char	*ro_displayname;
-	const int	ro_send_w_inv_ok;
-};
-
-extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops;
-extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops;
-
-/*
  * RPCRDMA transport -- encapsulates the structures above for
  * integration with RPC.
  *
@@ -511,13 +428,12 @@
 	struct rpcrdma_ia	rx_ia;
 	struct rpcrdma_ep	rx_ep;
 	struct rpcrdma_buffer	rx_buf;
-	struct rpcrdma_create_data_internal rx_data;
 	struct delayed_work	rx_connect_worker;
+	struct rpc_timeout	rx_timeout;
 	struct rpcrdma_stats	rx_stats;
 };
 
 #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
-#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
 
 static inline const char *
 rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt)
@@ -547,65 +463,72 @@
 int rpcrdma_ia_open(struct rpcrdma_xprt *xprt);
 void rpcrdma_ia_remove(struct rpcrdma_ia *ia);
 void rpcrdma_ia_close(struct rpcrdma_ia *);
-bool frwr_is_supported(struct rpcrdma_ia *);
-bool fmr_is_supported(struct rpcrdma_ia *);
-
-extern struct workqueue_struct *rpcrdma_receive_wq;
 
 /*
  * Endpoint calls - xprtrdma/verbs.c
  */
-int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
-				struct rpcrdma_create_data_internal *);
-void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
+int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt);
+void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt);
 int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
-void rpcrdma_conn_func(struct rpcrdma_ep *ep);
 void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
 
 int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
 				struct rpcrdma_req *);
-void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
 
 /*
  * Buffer calls - xprtrdma/verbs.c
  */
-struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
-void rpcrdma_destroy_req(struct rpcrdma_req *);
+struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
+				       gfp_t flags);
+void rpcrdma_req_destroy(struct rpcrdma_req *req);
 int rpcrdma_buffer_create(struct rpcrdma_xprt *);
 void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
-struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
+struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt);
 
 struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
 void rpcrdma_mr_put(struct rpcrdma_mr *mr);
-void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr);
-void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr);
+
+static inline void
+rpcrdma_mr_recycle(struct rpcrdma_mr *mr)
+{
+	schedule_work(&mr->mr_recycle);
+}
 
 struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
-void rpcrdma_buffer_put(struct rpcrdma_req *);
+void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers,
+			struct rpcrdma_req *req);
 void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
 
-struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction,
-					    gfp_t);
-bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *);
-void rpcrdma_free_regbuf(struct rpcrdma_regbuf *);
+bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size,
+			    gfp_t flags);
+bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt,
+			      struct rpcrdma_regbuf *rb);
 
-static inline bool
-rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb)
+/**
+ * rpcrdma_regbuf_is_mapped - check if buffer is DMA mapped
+ *
+ * Returns true if the buffer is now mapped to rb->rg_device.
+ */
+static inline bool rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb)
 {
 	return rb->rg_device != NULL;
 }
 
-static inline bool
-rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
+/**
+ * rpcrdma_regbuf_dma_map - DMA-map a regbuf
+ * @r_xprt: controlling transport instance
+ * @rb: regbuf to be mapped
+ *
+ * Returns true if the buffer is currently DMA mapped.
+ */
+static inline bool rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt,
+					  struct rpcrdma_regbuf *rb)
 {
 	if (likely(rpcrdma_regbuf_is_mapped(rb)))
 		return true;
-	return __rpcrdma_dma_map_regbuf(ia, rb);
+	return __rpcrdma_regbuf_dma_map(r_xprt, rb);
 }
 
-int rpcrdma_alloc_wq(void);
-void rpcrdma_destroy_wq(void);
-
 /*
  * Wrappers for chunk registration, shared by read/write chunk code.
  */
@@ -616,6 +539,24 @@
 	return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
 }
 
+/* Memory registration calls xprtrdma/frwr_ops.c
+ */
+bool frwr_is_supported(struct ib_device *device);
+void frwr_recycle(struct rpcrdma_req *req);
+void frwr_reset(struct rpcrdma_req *req);
+int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep);
+int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr);
+void frwr_release_mr(struct rpcrdma_mr *mr);
+size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt);
+struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
+				struct rpcrdma_mr_seg *seg,
+				int nsegs, bool writing, __be32 xid,
+				struct rpcrdma_mr *mr);
+int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req);
+void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs);
+void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
+void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
+
 /*
  * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
  */
@@ -632,14 +573,11 @@
 			      struct rpcrdma_req *req, u32 hdrlen,
 			      struct xdr_buf *xdr,
 			      enum rpcrdma_chunktype rtype);
-void rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc);
+void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc);
 int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst);
 void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
 void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
 void rpcrdma_reply_handler(struct rpcrdma_rep *rep);
-void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt,
-			  struct rpcrdma_req *req);
-void rpcrdma_deferred_completion(struct work_struct *work);
 
 static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
 {
@@ -649,10 +587,12 @@
 
 /* RPC/RDMA module init - xprtrdma/transport.c
  */
+extern unsigned int xprt_rdma_slot_table_entries;
 extern unsigned int xprt_rdma_max_inline_read;
+extern unsigned int xprt_rdma_max_inline_write;
 void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
 void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
-void rpcrdma_connect_worker(struct work_struct *work);
+void xprt_rdma_close(struct rpc_xprt *xprt);
 void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq);
 int xprt_rdma_init(void);
 void xprt_rdma_cleanup(void);
@@ -661,8 +601,8 @@
  */
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int);
-int xprt_rdma_bc_up(struct svc_serv *, struct net *);
 size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
+unsigned int xprt_rdma_bc_max_slots(struct rpc_xprt *);
 int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
 void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
 int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst);

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 6b7539c..70e52f5 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c

@@ -47,13 +47,15 @@
 #include <net/checksum.h>
 #include <net/udp.h>
 #include <net/tcp.h>
+#include <linux/bvec.h>
+#include <linux/highmem.h>
+#include <linux/uio.h>
+#include <linux/sched/mm.h>
 
 #include <trace/events/sunrpc.h>
 
 #include "sunrpc.h"
 
-#define RPC_TCP_READ_CHUNK_SZ	(3*512*1024)
-
 static void xs_close(struct rpc_xprt *xprt);
 static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
 		struct socket *sock);
@@ -68,8 +70,6 @@
 static unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
 static unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
 
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-
 #define XS_TCP_LINGER_TO	(15U * HZ)
 static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
 
@@ -129,7 +129,7 @@
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &xprt_min_resvport_limit,
-		.extra2		= &xprt_max_resvport
+		.extra2		= &xprt_max_resvport_limit
 	},
 	{
 		.procname	= "max_resvport",
@@ -137,7 +137,7 @@
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &xprt_min_resvport,
+		.extra1		= &xprt_min_resvport_limit,
 		.extra2		= &xprt_max_resvport_limit
 	},
 	{
@@ -159,8 +159,6 @@
 	{ },
 };
 
-#endif
-
 /*
  * Wait duration for a reply from the RPC portmapper.
  */
@@ -325,61 +323,480 @@
 		}
 }
 
-#define XS_SENDMSG_FLAGS	(MSG_DONTWAIT | MSG_NOSIGNAL)
-
-static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more)
+static size_t
+xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp)
 {
-	struct msghdr msg = {
-		.msg_name	= addr,
-		.msg_namelen	= addrlen,
-		.msg_flags	= XS_SENDMSG_FLAGS | (more ? MSG_MORE : 0),
-	};
-	struct kvec iov = {
-		.iov_base	= vec->iov_base + base,
-		.iov_len	= vec->iov_len - base,
-	};
+	size_t i,n;
 
-	if (iov.iov_len != 0)
-		return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
-	return kernel_sendmsg(sock, &msg, NULL, 0, 0);
+	if (!want || !(buf->flags & XDRBUF_SPARSE_PAGES))
+		return want;
+	n = (buf->page_base + want + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	for (i = 0; i < n; i++) {
+		if (buf->pages[i])
+			continue;
+		buf->bvec[i].bv_page = buf->pages[i] = alloc_page(gfp);
+		if (!buf->pages[i]) {
+			i *= PAGE_SIZE;
+			return i > buf->page_base ? i - buf->page_base : 0;
+		}
+	}
+	return want;
 }
 
-static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy, int *sent_p)
+static ssize_t
+xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek)
 {
-	ssize_t (*do_sendpage)(struct socket *sock, struct page *page,
-			int offset, size_t size, int flags);
-	struct page **ppage;
-	unsigned int remainder;
+	ssize_t ret;
+	if (seek != 0)
+		iov_iter_advance(&msg->msg_iter, seek);
+	ret = sock_recvmsg(sock, msg, flags);
+	return ret > 0 ? ret + seek : ret;
+}
+
+static ssize_t
+xs_read_kvec(struct socket *sock, struct msghdr *msg, int flags,
+		struct kvec *kvec, size_t count, size_t seek)
+{
+	iov_iter_kvec(&msg->msg_iter, READ, kvec, 1, count);
+	return xs_sock_recvmsg(sock, msg, flags, seek);
+}
+
+static ssize_t
+xs_read_bvec(struct socket *sock, struct msghdr *msg, int flags,
+		struct bio_vec *bvec, unsigned long nr, size_t count,
+		size_t seek)
+{
+	iov_iter_bvec(&msg->msg_iter, READ, bvec, nr, count);
+	return xs_sock_recvmsg(sock, msg, flags, seek);
+}
+
+static ssize_t
+xs_read_discard(struct socket *sock, struct msghdr *msg, int flags,
+		size_t count)
+{
+	iov_iter_discard(&msg->msg_iter, READ, count);
+	return sock_recvmsg(sock, msg, flags);
+}
+
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
+static void
+xs_flush_bvec(const struct bio_vec *bvec, size_t count, size_t seek)
+{
+	struct bvec_iter bi = {
+		.bi_size = count,
+	};
+	struct bio_vec bv;
+
+	bvec_iter_advance(bvec, &bi, seek & PAGE_MASK);
+	for_each_bvec(bv, bvec, bi, bi)
+		flush_dcache_page(bv.bv_page);
+}
+#else
+static inline void
+xs_flush_bvec(const struct bio_vec *bvec, size_t count, size_t seek)
+{
+}
+#endif
+
+static ssize_t
+xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
+		struct xdr_buf *buf, size_t count, size_t seek, size_t *read)
+{
+	size_t want, seek_init = seek, offset = 0;
+	ssize_t ret;
+
+	want = min_t(size_t, count, buf->head[0].iov_len);
+	if (seek < want) {
+		ret = xs_read_kvec(sock, msg, flags, &buf->head[0], want, seek);
+		if (ret <= 0)
+			goto sock_err;
+		offset += ret;
+		if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+			goto out;
+		if (ret != want)
+			goto out;
+		seek = 0;
+	} else {
+		seek -= want;
+		offset += want;
+	}
+
+	want = xs_alloc_sparse_pages(buf,
+			min_t(size_t, count - offset, buf->page_len),
+			GFP_KERNEL);
+	if (seek < want) {
+		ret = xs_read_bvec(sock, msg, flags, buf->bvec,
+				xdr_buf_pagecount(buf),
+				want + buf->page_base,
+				seek + buf->page_base);
+		if (ret <= 0)
+			goto sock_err;
+		xs_flush_bvec(buf->bvec, ret, seek + buf->page_base);
+		offset += ret - buf->page_base;
+		if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+			goto out;
+		if (ret != want)
+			goto out;
+		seek = 0;
+	} else {
+		seek -= want;
+		offset += want;
+	}
+
+	want = min_t(size_t, count - offset, buf->tail[0].iov_len);
+	if (seek < want) {
+		ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek);
+		if (ret <= 0)
+			goto sock_err;
+		offset += ret;
+		if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+			goto out;
+		if (ret != want)
+			goto out;
+	} else if (offset < seek_init)
+		offset = seek_init;
+	ret = -EMSGSIZE;
+out:
+	*read = offset - seek_init;
+	return ret;
+sock_err:
+	offset += seek;
+	goto out;
+}
+
+static void
+xs_read_header(struct sock_xprt *transport, struct xdr_buf *buf)
+{
+	if (!transport->recv.copied) {
+		if (buf->head[0].iov_len >= transport->recv.offset)
+			memcpy(buf->head[0].iov_base,
+					&transport->recv.xid,
+					transport->recv.offset);
+		transport->recv.copied = transport->recv.offset;
+	}
+}
+
+static bool
+xs_read_stream_request_done(struct sock_xprt *transport)
+{
+	return transport->recv.fraghdr & cpu_to_be32(RPC_LAST_STREAM_FRAGMENT);
+}
+
+static void
+xs_read_stream_check_eor(struct sock_xprt *transport,
+		struct msghdr *msg)
+{
+	if (xs_read_stream_request_done(transport))
+		msg->msg_flags |= MSG_EOR;
+}
+
+static ssize_t
+xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
+		int flags, struct rpc_rqst *req)
+{
+	struct xdr_buf *buf = &req->rq_private_buf;
+	size_t want, uninitialized_var(read);
+	ssize_t uninitialized_var(ret);
+
+	xs_read_header(transport, buf);
+
+	want = transport->recv.len - transport->recv.offset;
+	if (want != 0) {
+		ret = xs_read_xdr_buf(transport->sock, msg, flags, buf,
+				transport->recv.copied + want,
+				transport->recv.copied,
+				&read);
+		transport->recv.offset += read;
+		transport->recv.copied += read;
+	}
+
+	if (transport->recv.offset == transport->recv.len)
+		xs_read_stream_check_eor(transport, msg);
+
+	if (want == 0)
+		return 0;
+
+	switch (ret) {
+	default:
+		break;
+	case -EFAULT:
+	case -EMSGSIZE:
+		msg->msg_flags |= MSG_TRUNC;
+		return read;
+	case 0:
+		return -ESHUTDOWN;
+	}
+	return ret < 0 ? ret : read;
+}
+
+static size_t
+xs_read_stream_headersize(bool isfrag)
+{
+	if (isfrag)
+		return sizeof(__be32);
+	return 3 * sizeof(__be32);
+}
+
+static ssize_t
+xs_read_stream_header(struct sock_xprt *transport, struct msghdr *msg,
+		int flags, size_t want, size_t seek)
+{
+	struct kvec kvec = {
+		.iov_base = &transport->recv.fraghdr,
+		.iov_len = want,
+	};
+	return xs_read_kvec(transport->sock, msg, flags, &kvec, want, seek);
+}
+
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+static ssize_t
+xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
+{
+	struct rpc_xprt *xprt = &transport->xprt;
+	struct rpc_rqst *req;
+	ssize_t ret;
+
+	/* Look up and lock the request corresponding to the given XID */
+	req = xprt_lookup_bc_request(xprt, transport->recv.xid);
+	if (!req) {
+		printk(KERN_WARNING "Callback slot table overflowed\n");
+		return -ESHUTDOWN;
+	}
+	if (transport->recv.copied && !req->rq_private_buf.len)
+		return -ESHUTDOWN;
+
+	ret = xs_read_stream_request(transport, msg, flags, req);
+	if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+		xprt_complete_bc_request(req, transport->recv.copied);
+	else
+		req->rq_private_buf.len = transport->recv.copied;
+
+	return ret;
+}
+#else /* CONFIG_SUNRPC_BACKCHANNEL */
+static ssize_t
+xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
+{
+	return -ESHUTDOWN;
+}
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
+
+static ssize_t
+xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags)
+{
+	struct rpc_xprt *xprt = &transport->xprt;
+	struct rpc_rqst *req;
+	ssize_t ret = 0;
+
+	/* Look up and lock the request corresponding to the given XID */
+	spin_lock(&xprt->queue_lock);
+	req = xprt_lookup_rqst(xprt, transport->recv.xid);
+	if (!req || (transport->recv.copied && !req->rq_private_buf.len)) {
+		msg->msg_flags |= MSG_TRUNC;
+		goto out;
+	}
+	xprt_pin_rqst(req);
+	spin_unlock(&xprt->queue_lock);
+
+	ret = xs_read_stream_request(transport, msg, flags, req);
+
+	spin_lock(&xprt->queue_lock);
+	if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+		xprt_complete_rqst(req->rq_task, transport->recv.copied);
+	else
+		req->rq_private_buf.len = transport->recv.copied;
+	xprt_unpin_rqst(req);
+out:
+	spin_unlock(&xprt->queue_lock);
+	return ret;
+}
+
+static ssize_t
+xs_read_stream(struct sock_xprt *transport, int flags)
+{
+	struct msghdr msg = { 0 };
+	size_t want, read = 0;
+	ssize_t ret = 0;
+
+	if (transport->recv.len == 0) {
+		want = xs_read_stream_headersize(transport->recv.copied != 0);
+		ret = xs_read_stream_header(transport, &msg, flags, want,
+				transport->recv.offset);
+		if (ret <= 0)
+			goto out_err;
+		transport->recv.offset = ret;
+		if (transport->recv.offset != want)
+			return transport->recv.offset;
+		transport->recv.len = be32_to_cpu(transport->recv.fraghdr) &
+			RPC_FRAGMENT_SIZE_MASK;
+		transport->recv.offset -= sizeof(transport->recv.fraghdr);
+		read = ret;
+	}
+
+	switch (be32_to_cpu(transport->recv.calldir)) {
+	default:
+		msg.msg_flags |= MSG_TRUNC;
+		break;
+	case RPC_CALL:
+		ret = xs_read_stream_call(transport, &msg, flags);
+		break;
+	case RPC_REPLY:
+		ret = xs_read_stream_reply(transport, &msg, flags);
+	}
+	if (msg.msg_flags & MSG_TRUNC) {
+		transport->recv.calldir = cpu_to_be32(-1);
+		transport->recv.copied = -1;
+	}
+	if (ret < 0)
+		goto out_err;
+	read += ret;
+	if (transport->recv.offset < transport->recv.len) {
+		if (!(msg.msg_flags & MSG_TRUNC))
+			return read;
+		msg.msg_flags = 0;
+		ret = xs_read_discard(transport->sock, &msg, flags,
+				transport->recv.len - transport->recv.offset);
+		if (ret <= 0)
+			goto out_err;
+		transport->recv.offset += ret;
+		read += ret;
+		if (transport->recv.offset != transport->recv.len)
+			return read;
+	}
+	if (xs_read_stream_request_done(transport)) {
+		trace_xs_stream_read_request(transport);
+		transport->recv.copied = 0;
+	}
+	transport->recv.offset = 0;
+	transport->recv.len = 0;
+	return read;
+out_err:
+	return ret != 0 ? ret : -ESHUTDOWN;
+}
+
+static __poll_t xs_poll_socket(struct sock_xprt *transport)
+{
+	return transport->sock->ops->poll(transport->file, transport->sock,
+			NULL);
+}
+
+static bool xs_poll_socket_readable(struct sock_xprt *transport)
+{
+	__poll_t events = xs_poll_socket(transport);
+
+	return (events & (EPOLLIN | EPOLLRDNORM)) && !(events & EPOLLRDHUP);
+}
+
+static void xs_poll_check_readable(struct sock_xprt *transport)
+{
+
+	clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
+	if (!xs_poll_socket_readable(transport))
+		return;
+	if (!test_and_set_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
+		queue_work(xprtiod_workqueue, &transport->recv_worker);
+}
+
+static void xs_stream_data_receive(struct sock_xprt *transport)
+{
+	size_t read = 0;
+	ssize_t ret = 0;
+
+	mutex_lock(&transport->recv_mutex);
+	if (transport->sock == NULL)
+		goto out;
+	for (;;) {
+		ret = xs_read_stream(transport, MSG_DONTWAIT);
+		if (ret < 0)
+			break;
+		read += ret;
+		cond_resched();
+	}
+	if (ret == -ESHUTDOWN)
+		kernel_sock_shutdown(transport->sock, SHUT_RDWR);
+	else
+		xs_poll_check_readable(transport);
+out:
+	mutex_unlock(&transport->recv_mutex);
+	trace_xs_stream_read_data(&transport->xprt, ret, read);
+}
+
+static void xs_stream_data_receive_workfn(struct work_struct *work)
+{
+	struct sock_xprt *transport =
+		container_of(work, struct sock_xprt, recv_worker);
+	unsigned int pflags = memalloc_nofs_save();
+
+	xs_stream_data_receive(transport);
+	memalloc_nofs_restore(pflags);
+}
+
+static void
+xs_stream_reset_connect(struct sock_xprt *transport)
+{
+	transport->recv.offset = 0;
+	transport->recv.len = 0;
+	transport->recv.copied = 0;
+	transport->xmit.offset = 0;
+}
+
+static void
+xs_stream_start_connect(struct sock_xprt *transport)
+{
+	transport->xprt.stat.connect_count++;
+	transport->xprt.stat.connect_start = jiffies;
+}
+
+#define XS_SENDMSG_FLAGS	(MSG_DONTWAIT | MSG_NOSIGNAL)
+
+static int xs_sendmsg(struct socket *sock, struct msghdr *msg, size_t seek)
+{
+	if (seek)
+		iov_iter_advance(&msg->msg_iter, seek);
+	return sock_sendmsg(sock, msg);
+}
+
+static int xs_send_kvec(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t seek)
+{
+	iov_iter_kvec(&msg->msg_iter, WRITE, vec, 1, vec->iov_len);
+	return xs_sendmsg(sock, msg, seek);
+}
+
+static int xs_send_pagedata(struct socket *sock, struct msghdr *msg, struct xdr_buf *xdr, size_t base)
+{
 	int err;
 
-	remainder = xdr->page_len - base;
-	base += xdr->page_base;
-	ppage = xdr->pages + (base >> PAGE_SHIFT);
-	base &= ~PAGE_MASK;
-	do_sendpage = sock->ops->sendpage;
-	if (!zerocopy)
-		do_sendpage = sock_no_sendpage;
-	for(;;) {
-		unsigned int len = min_t(unsigned int, PAGE_SIZE - base, remainder);
-		int flags = XS_SENDMSG_FLAGS;
+	err = xdr_alloc_bvec(xdr, GFP_KERNEL);
+	if (err < 0)
+		return err;
 
-		remainder -= len;
-		if (more)
-			flags |= MSG_MORE;
-		if (remainder != 0)
-			flags |= MSG_SENDPAGE_NOTLAST | MSG_MORE;
-		err = do_sendpage(sock, *ppage, base, len, flags);
-		if (remainder == 0 || err != len)
-			break;
-		*sent_p += err;
-		ppage++;
-		base = 0;
-	}
-	if (err > 0) {
-		*sent_p += err;
-		err = 0;
-	}
-	return err;
+	iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec,
+			xdr_buf_pagecount(xdr),
+			xdr->page_len + xdr->page_base);
+	return xs_sendmsg(sock, msg, base + xdr->page_base);
+}
+
+#define xs_record_marker_len() sizeof(rpc_fraghdr)
+
+/* Common case:
+ *  - stream transport
+ *  - sending from byte 0 of the message
+ *  - the message is wholly contained in @xdr's head iovec
+ */
+static int xs_send_rm_and_kvec(struct socket *sock, struct msghdr *msg,
+		rpc_fraghdr marker, struct kvec *vec, size_t base)
+{
+	struct kvec iov[2] = {
+		[0] = {
+			.iov_base	= &marker,
+			.iov_len	= sizeof(marker)
+		},
+		[1] = *vec,
+	};
+	size_t len = iov[0].iov_len + iov[1].iov_len;
+
+	iov_iter_kvec(&msg->msg_iter, WRITE, iov, 2, len);
+	return xs_sendmsg(sock, msg, base);
 }
 
 /**
@@ -389,49 +806,60 @@
  * @addrlen: UDP only -- length of destination address
  * @xdr: buffer containing this request
  * @base: starting position in the buffer
- * @zerocopy: true if it is safe to use sendpage()
+ * @rm: stream record marker field
  * @sent_p: return the total number of bytes successfully queued for sending
  *
  */
-static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy, int *sent_p)
+static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, rpc_fraghdr rm, int *sent_p)
 {
-	unsigned int remainder = xdr->len - base;
+	struct msghdr msg = {
+		.msg_name = addr,
+		.msg_namelen = addrlen,
+		.msg_flags = XS_SENDMSG_FLAGS | MSG_MORE,
+	};
+	unsigned int rmsize = rm ? sizeof(rm) : 0;
+	unsigned int remainder = rmsize + xdr->len - base;
+	unsigned int want;
 	int err = 0;
-	int sent = 0;
 
 	if (unlikely(!sock))
 		return -ENOTSOCK;
 
-	if (base != 0) {
-		addr = NULL;
-		addrlen = 0;
-	}
-
-	if (base < xdr->head[0].iov_len || addr != NULL) {
-		unsigned int len = xdr->head[0].iov_len - base;
+	want = xdr->head[0].iov_len + rmsize;
+	if (base < want) {
+		unsigned int len = want - base;
 		remainder -= len;
-		err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0);
+		if (remainder == 0)
+			msg.msg_flags &= ~MSG_MORE;
+		if (rmsize)
+			err = xs_send_rm_and_kvec(sock, &msg, rm,
+					&xdr->head[0], base);
+		else
+			err = xs_send_kvec(sock, &msg, &xdr->head[0], base);
 		if (remainder == 0 || err != len)
 			goto out;
 		*sent_p += err;
 		base = 0;
 	} else
-		base -= xdr->head[0].iov_len;
+		base -= want;
 
 	if (base < xdr->page_len) {
 		unsigned int len = xdr->page_len - base;
 		remainder -= len;
-		err = xs_send_pagedata(sock, xdr, base, remainder != 0, zerocopy, &sent);
-		*sent_p += sent;
-		if (remainder == 0 || sent != len)
+		if (remainder == 0)
+			msg.msg_flags &= ~MSG_MORE;
+		err = xs_send_pagedata(sock, &msg, xdr, base);
+		if (remainder == 0 || err != len)
 			goto out;
+		*sent_p += err;
 		base = 0;
 	} else
 		base -= xdr->page_len;
 
 	if (base >= xdr->tail[0].iov_len)
 		return 0;
-	err = xs_send_kvec(sock, NULL, 0, &xdr->tail[0], base, 0);
+	msg.msg_flags &= ~MSG_MORE;
+	err = xs_send_kvec(sock, &msg, &xdr->tail[0], base);
 out:
 	if (err > 0) {
 		*sent_p += err;
@@ -440,42 +868,35 @@
 	return err;
 }
 
-static void xs_nospace_callback(struct rpc_task *task)
-{
-	struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt);
-
-	transport->inet->sk_write_pending--;
-}
-
 /**
- * xs_nospace - place task on wait queue if transmit was incomplete
- * @task: task to put to sleep
+ * xs_nospace - handle transmit was incomplete
+ * @req: pointer to RPC request
  *
  */
-static int xs_nospace(struct rpc_task *task)
+static int xs_nospace(struct rpc_rqst *req)
 {
-	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_xprt *xprt = req->rq_xprt;
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 	struct sock *sk = transport->inet;
 	int ret = -EAGAIN;
 
 	dprintk("RPC: %5u xmit incomplete (%u left of %u)\n",
-			task->tk_pid, req->rq_slen - req->rq_bytes_sent,
+			req->rq_task->tk_pid,
+			req->rq_slen - transport->xmit.offset,
 			req->rq_slen);
 
 	/* Protect against races with write_space */
-	spin_lock_bh(&xprt->transport_lock);
+	spin_lock(&xprt->transport_lock);
 
 	/* Don't race with disconnect */
 	if (xprt_connected(xprt)) {
 		/* wait for more buffer space */
 		sk->sk_write_pending++;
-		xprt_wait_for_buffer_space(task, xs_nospace_callback);
+		xprt_wait_for_buffer_space(xprt);
 	} else
 		ret = -ENOTCONN;
 
-	spin_unlock_bh(&xprt->transport_lock);
+	spin_unlock(&xprt->transport_lock);
 
 	/* Race breaker in case memory is freed before above code is called */
 	if (ret == -EAGAIN) {
@@ -491,19 +912,37 @@
 	return ret;
 }
 
-/*
- * Construct a stream transport record marker in @buf.
- */
-static inline void xs_encode_stream_record_marker(struct xdr_buf *buf)
+static void
+xs_stream_prepare_request(struct rpc_rqst *req)
 {
-	u32 reclen = buf->len - sizeof(rpc_fraghdr);
-	rpc_fraghdr *base = buf->head[0].iov_base;
-	*base = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | reclen);
+	xdr_free_bvec(&req->rq_rcv_buf);
+	req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_KERNEL);
+}
+
+/*
+ * Determine if the previous message in the stream was aborted before it
+ * could complete transmission.
+ */
+static bool
+xs_send_request_was_aborted(struct sock_xprt *transport, struct rpc_rqst *req)
+{
+	return transport->xmit.offset != 0 && req->rq_bytes_sent == 0;
+}
+
+/*
+ * Return the stream record marker field for a record of length < 2^31-1
+ */
+static rpc_fraghdr
+xs_stream_record_marker(struct xdr_buf *xdr)
+{
+	if (!xdr->len)
+		return 0;
+	return cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | (u32)xdr->len);
 }
 
 /**
  * xs_local_send_request - write an RPC request to an AF_LOCAL socket
- * @task: RPC task that manages the state of an RPC request
+ * @req: pointer to RPC request
  *
  * Return values:
  *        0:	The request has been sent
@@ -512,35 +951,41 @@
  * ENOTCONN:	Caller needs to invoke connect logic then call again
  *    other:	Some other error occured, the request was not sent
  */
-static int xs_local_send_request(struct rpc_task *task)
+static int xs_local_send_request(struct rpc_rqst *req)
 {
-	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_xprt *xprt = req->rq_xprt;
 	struct sock_xprt *transport =
 				container_of(xprt, struct sock_xprt, xprt);
 	struct xdr_buf *xdr = &req->rq_snd_buf;
+	rpc_fraghdr rm = xs_stream_record_marker(xdr);
+	unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen;
 	int status;
 	int sent = 0;
 
-	xs_encode_stream_record_marker(&req->rq_snd_buf);
+	/* Close the stream if the previous transmission was incomplete */
+	if (xs_send_request_was_aborted(transport, req)) {
+		xs_close(xprt);
+		return -ENOTCONN;
+	}
 
 	xs_pktdump("packet data:",
 			req->rq_svec->iov_base, req->rq_svec->iov_len);
 
 	req->rq_xtime = ktime_get();
-	status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent,
-			      true, &sent);
+	status = xs_sendpages(transport->sock, NULL, 0, xdr,
+			      transport->xmit.offset, rm, &sent);
 	dprintk("RPC:       %s(%u) = %d\n",
-			__func__, xdr->len - req->rq_bytes_sent, status);
+			__func__, xdr->len - transport->xmit.offset, status);
 
 	if (status == -EAGAIN && sock_writeable(transport->inet))
 		status = -ENOBUFS;
 
 	if (likely(sent > 0) || status == 0) {
-		req->rq_bytes_sent += sent;
-		req->rq_xmit_bytes_sent += sent;
-		if (likely(req->rq_bytes_sent >= req->rq_slen)) {
-			req->rq_bytes_sent = 0;
+		transport->xmit.offset += sent;
+		req->rq_bytes_sent = transport->xmit.offset;
+		if (likely(req->rq_bytes_sent >= msglen)) {
+			req->rq_xmit_bytes_sent += transport->xmit.offset;
+			transport->xmit.offset = 0;
 			return 0;
 		}
 		status = -EAGAIN;
@@ -550,7 +995,7 @@
 	case -ENOBUFS:
 		break;
 	case -EAGAIN:
-		status = xs_nospace(task);
+		status = xs_nospace(req);
 		break;
 	default:
 		dprintk("RPC:       sendmsg returned unrecognized error %d\n",
@@ -566,7 +1011,7 @@
 
 /**
  * xs_udp_send_request - write an RPC request to a UDP socket
- * @task: address of RPC task that manages the state of an RPC request
+ * @req: pointer to RPC request
  *
  * Return values:
  *        0:	The request has been sent
@@ -575,9 +1020,8 @@
  * ENOTCONN:	Caller needs to invoke connect logic then call again
  *    other:	Some other error occurred, the request was not sent
  */
-static int xs_udp_send_request(struct rpc_task *task)
+static int xs_udp_send_request(struct rpc_rqst *req)
 {
-	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_xprt *xprt = req->rq_xprt;
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 	struct xdr_buf *xdr = &req->rq_snd_buf;
@@ -590,12 +1034,16 @@
 
 	if (!xprt_bound(xprt))
 		return -ENOTCONN;
+
+	if (!xprt_request_get_cong(xprt, req))
+		return -EBADSLT;
+
 	req->rq_xtime = ktime_get();
 	status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen,
-			      xdr, req->rq_bytes_sent, true, &sent);
+			      xdr, 0, 0, &sent);
 
 	dprintk("RPC:       xs_udp_send_request(%u) = %d\n",
-			xdr->len - req->rq_bytes_sent, status);
+			xdr->len, status);
 
 	/* firewall is blocking us, don't return -EAGAIN or we end up looping */
 	if (status == -EPERM)
@@ -619,7 +1067,7 @@
 		/* Should we call xs_close() here? */
 		break;
 	case -EAGAIN:
-		status = xs_nospace(task);
+		status = xs_nospace(req);
 		break;
 	case -ENETUNREACH:
 	case -ENOBUFS:
@@ -639,7 +1087,7 @@
 
 /**
  * xs_tcp_send_request - write an RPC request to a TCP socket
- * @task: address of RPC task that manages the state of an RPC request
+ * @req: pointer to RPC request
  *
  * Return values:
  *        0:	The request has been sent
@@ -651,28 +1099,27 @@
  * XXX: In the case of soft timeouts, should we eventually give up
  *	if sendmsg is not able to make progress?
  */
-static int xs_tcp_send_request(struct rpc_task *task)
+static int xs_tcp_send_request(struct rpc_rqst *req)
 {
-	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_xprt *xprt = req->rq_xprt;
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 	struct xdr_buf *xdr = &req->rq_snd_buf;
-	bool zerocopy = true;
+	rpc_fraghdr rm = xs_stream_record_marker(xdr);
+	unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen;
 	bool vm_wait = false;
 	int status;
 	int sent;
 
-	xs_encode_stream_record_marker(&req->rq_snd_buf);
+	/* Close the stream if the previous transmission was incomplete */
+	if (xs_send_request_was_aborted(transport, req)) {
+		if (transport->sock != NULL)
+			kernel_sock_shutdown(transport->sock, SHUT_RDWR);
+		return -ENOTCONN;
+	}
 
 	xs_pktdump("packet data:",
 				req->rq_svec->iov_base,
 				req->rq_svec->iov_len);
-	/* Don't use zero copy if this is a resend. If the RPC call
-	 * completes while the socket holds a reference to the pages,
-	 * then we may end up resending corrupted data.
-	 */
-	if (task->tk_flags & RPC_TASK_SENT)
-		zerocopy = false;
 
 	if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state))
 		xs_tcp_set_socket_timeouts(xprt, transport->sock);
@@ -684,17 +1131,18 @@
 	while (1) {
 		sent = 0;
 		status = xs_sendpages(transport->sock, NULL, 0, xdr,
-				      req->rq_bytes_sent, zerocopy, &sent);
+				      transport->xmit.offset, rm, &sent);
 
 		dprintk("RPC:       xs_tcp_send_request(%u) = %d\n",
-				xdr->len - req->rq_bytes_sent, status);
+				xdr->len - transport->xmit.offset, status);
 
 		/* If we've sent the entire packet, immediately
 		 * reset the count of bytes sent. */
-		req->rq_bytes_sent += sent;
-		req->rq_xmit_bytes_sent += sent;
-		if (likely(req->rq_bytes_sent >= req->rq_slen)) {
-			req->rq_bytes_sent = 0;
+		transport->xmit.offset += sent;
+		req->rq_bytes_sent = transport->xmit.offset;
+		if (likely(req->rq_bytes_sent >= msglen)) {
+			req->rq_xmit_bytes_sent += transport->xmit.offset;
+			transport->xmit.offset = 0;
 			return 0;
 		}
 
@@ -732,7 +1180,7 @@
 		/* Should we call xs_close() here? */
 		break;
 	case -EAGAIN:
-		status = xs_nospace(task);
+		status = xs_nospace(req);
 		break;
 	case -ECONNRESET:
 	case -ECONNREFUSED:
@@ -749,35 +1197,6 @@
 	return status;
 }
 
-/**
- * xs_tcp_release_xprt - clean up after a tcp transmission
- * @xprt: transport
- * @task: rpc task
- *
- * This cleans up if an error causes us to abort the transmission of a request.
- * In this case, the socket may need to be reset in order to avoid confusing
- * the server.
- */
-static void xs_tcp_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
-{
-	struct rpc_rqst *req;
-
-	if (task != xprt->snd_task)
-		return;
-	if (task == NULL)
-		goto out_release;
-	req = task->tk_rqstp;
-	if (req == NULL)
-		goto out_release;
-	if (req->rq_bytes_sent == 0)
-		goto out_release;
-	if (req->rq_bytes_sent == req->rq_snd_buf.len)
-		goto out_release;
-	set_bit(XPRT_CLOSE_WAIT, &xprt->state);
-out_release:
-	xprt_release_xprt(xprt, task);
-}
-
 static void xs_save_old_callbacks(struct sock_xprt *transport, struct sock *sk)
 {
 	transport->old_data_ready = sk->sk_data_ready;
@@ -799,6 +1218,15 @@
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 
 	clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
+	clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state);
+	clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state);
+	clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state);
+}
+
+static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr)
+{
+	set_bit(nr, &transport->sock_state);
+	queue_work(xprtiod_workqueue, &transport->error_worker);
 }
 
 static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt)
@@ -819,20 +1247,24 @@
  */
 static void xs_error_report(struct sock *sk)
 {
+	struct sock_xprt *transport;
 	struct rpc_xprt *xprt;
-	int err;
 
 	read_lock_bh(&sk->sk_callback_lock);
 	if (!(xprt = xprt_from_sock(sk)))
 		goto out;
 
-	err = -sk->sk_err;
-	if (err == 0)
+	transport = container_of(xprt, struct sock_xprt, xprt);
+	transport->xprt_err = -sk->sk_err;
+	if (transport->xprt_err == 0)
 		goto out;
 	dprintk("RPC:       xs_error_report client %p, error=%d...\n",
-			xprt, -err);
-	trace_rpc_socket_error(xprt, sk->sk_socket, err);
-	xprt_wake_pending_tasks(xprt, err);
+			xprt, -transport->xprt_err);
+	trace_rpc_socket_error(xprt, sk->sk_socket, transport->xprt_err);
+
+	/* barrier ensures xprt_err is set before XPRT_SOCK_WAKE_ERROR */
+	smp_mb__before_atomic();
+	xs_run_error_worker(transport, XPRT_SOCK_WAKE_ERROR);
  out:
 	read_unlock_bh(&sk->sk_callback_lock);
 }
@@ -842,6 +1274,7 @@
 	struct socket *sock = transport->sock;
 	struct sock *sk = transport->inet;
 	struct rpc_xprt *xprt = &transport->xprt;
+	struct file *filp = transport->file;
 
 	if (sk == NULL)
 		return;
@@ -855,6 +1288,7 @@
 	write_lock_bh(&sk->sk_callback_lock);
 	transport->inet = NULL;
 	transport->sock = NULL;
+	transport->file = NULL;
 
 	sk->sk_user_data = NULL;
 
@@ -862,10 +1296,14 @@
 	xprt_clear_connected(xprt);
 	write_unlock_bh(&sk->sk_callback_lock);
 	xs_sock_reset_connection_flags(xprt);
+	/* Reset stream record info */
+	xs_stream_reset_connect(transport);
 	mutex_unlock(&transport->recv_mutex);
 
 	trace_rpc_socket_close(xprt, sock);
-	sock_release(sock);
+	fput(filp);
+
+	xprt_disconnect_done(xprt);
 }
 
 /**
@@ -886,8 +1324,6 @@
 
 	xs_reset_transport(transport);
 	xprt->reestablish_timeout = 0;
-
-	xprt_disconnect_done(xprt);
 }
 
 static void xs_inject_disconnect(struct rpc_xprt *xprt)
@@ -917,118 +1353,11 @@
 	cancel_delayed_work_sync(&transport->connect_worker);
 	xs_close(xprt);
 	cancel_work_sync(&transport->recv_worker);
+	cancel_work_sync(&transport->error_worker);
 	xs_xprt_free(xprt);
 	module_put(THIS_MODULE);
 }
 
-static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
-{
-	struct xdr_skb_reader desc = {
-		.skb		= skb,
-		.offset		= sizeof(rpc_fraghdr),
-		.count		= skb->len - sizeof(rpc_fraghdr),
-	};
-
-	if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0)
-		return -1;
-	if (desc.count)
-		return -1;
-	return 0;
-}
-
-/**
- * xs_local_data_read_skb
- * @xprt: transport
- * @sk: socket
- * @skb: skbuff
- *
- * Currently this assumes we can read the whole reply in a single gulp.
- */
-static void xs_local_data_read_skb(struct rpc_xprt *xprt,
-		struct sock *sk,
-		struct sk_buff *skb)
-{
-	struct rpc_task *task;
-	struct rpc_rqst *rovr;
-	int repsize, copied;
-	u32 _xid;
-	__be32 *xp;
-
-	repsize = skb->len - sizeof(rpc_fraghdr);
-	if (repsize < 4) {
-		dprintk("RPC:       impossible RPC reply size %d\n", repsize);
-		return;
-	}
-
-	/* Copy the XID from the skb... */
-	xp = skb_header_pointer(skb, sizeof(rpc_fraghdr), sizeof(_xid), &_xid);
-	if (xp == NULL)
-		return;
-
-	/* Look up and lock the request corresponding to the given XID */
-	spin_lock(&xprt->recv_lock);
-	rovr = xprt_lookup_rqst(xprt, *xp);
-	if (!rovr)
-		goto out_unlock;
-	xprt_pin_rqst(rovr);
-	spin_unlock(&xprt->recv_lock);
-	task = rovr->rq_task;
-
-	copied = rovr->rq_private_buf.buflen;
-	if (copied > repsize)
-		copied = repsize;
-
-	if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) {
-		dprintk("RPC:       sk_buff copy failed\n");
-		spin_lock(&xprt->recv_lock);
-		goto out_unpin;
-	}
-
-	spin_lock(&xprt->recv_lock);
-	xprt_complete_rqst(task, copied);
-out_unpin:
-	xprt_unpin_rqst(rovr);
- out_unlock:
-	spin_unlock(&xprt->recv_lock);
-}
-
-static void xs_local_data_receive(struct sock_xprt *transport)
-{
-	struct sk_buff *skb;
-	struct sock *sk;
-	int err;
-
-restart:
-	mutex_lock(&transport->recv_mutex);
-	sk = transport->inet;
-	if (sk == NULL)
-		goto out;
-	for (;;) {
-		skb = skb_recv_datagram(sk, 0, 1, &err);
-		if (skb != NULL) {
-			xs_local_data_read_skb(&transport->xprt, sk, skb);
-			skb_free_datagram(sk, skb);
-			continue;
-		}
-		if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
-			break;
-		if (need_resched()) {
-			mutex_unlock(&transport->recv_mutex);
-			cond_resched();
-			goto restart;
-		}
-	}
-out:
-	mutex_unlock(&transport->recv_mutex);
-}
-
-static void xs_local_data_receive_workfn(struct work_struct *work)
-{
-	struct sock_xprt *transport =
-		container_of(work, struct sock_xprt, recv_worker);
-	xs_local_data_receive(transport);
-}
-
 /**
  * xs_udp_data_read_skb - receive callback for UDP sockets
  * @xprt: transport
@@ -1058,13 +1387,13 @@
 		return;
 
 	/* Look up and lock the request corresponding to the given XID */
-	spin_lock(&xprt->recv_lock);
+	spin_lock(&xprt->queue_lock);
 	rovr = xprt_lookup_rqst(xprt, *xp);
 	if (!rovr)
 		goto out_unlock;
 	xprt_pin_rqst(rovr);
 	xprt_update_rtt(rovr->rq_task);
-	spin_unlock(&xprt->recv_lock);
+	spin_unlock(&xprt->queue_lock);
 	task = rovr->rq_task;
 
 	if ((copied = rovr->rq_private_buf.buflen) > repsize)
@@ -1072,22 +1401,22 @@
 
 	/* Suck it into the iovec, verify checksum if not done by hw. */
 	if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) {
-		spin_lock(&xprt->recv_lock);
+		spin_lock(&xprt->queue_lock);
 		__UDPX_INC_STATS(sk, UDP_MIB_INERRORS);
 		goto out_unpin;
 	}
 
 
-	spin_lock_bh(&xprt->transport_lock);
+	spin_lock(&xprt->transport_lock);
 	xprt_adjust_cwnd(xprt, task, copied);
-	spin_unlock_bh(&xprt->transport_lock);
-	spin_lock(&xprt->recv_lock);
+	spin_unlock(&xprt->transport_lock);
+	spin_lock(&xprt->queue_lock);
 	xprt_complete_rqst(task, copied);
 	__UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS);
 out_unpin:
 	xprt_unpin_rqst(rovr);
  out_unlock:
-	spin_unlock(&xprt->recv_lock);
+	spin_unlock(&xprt->queue_lock);
 }
 
 static void xs_udp_data_receive(struct sock_xprt *transport)
@@ -1096,26 +1425,19 @@
 	struct sock *sk;
 	int err;
 
-restart:
 	mutex_lock(&transport->recv_mutex);
 	sk = transport->inet;
 	if (sk == NULL)
 		goto out;
 	for (;;) {
 		skb = skb_recv_udp(sk, 0, 1, &err);
-		if (skb != NULL) {
-			xs_udp_data_read_skb(&transport->xprt, sk, skb);
-			consume_skb(skb);
-			continue;
-		}
-		if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
+		if (skb == NULL)
 			break;
-		if (need_resched()) {
-			mutex_unlock(&transport->recv_mutex);
-			cond_resched();
-			goto restart;
-		}
+		xs_udp_data_read_skb(&transport->xprt, sk, skb);
+		consume_skb(skb);
+		cond_resched();
 	}
+	xs_poll_check_readable(transport);
 out:
 	mutex_unlock(&transport->recv_mutex);
 }
@@ -1124,7 +1446,10 @@
 {
 	struct sock_xprt *transport =
 		container_of(work, struct sock_xprt, recv_worker);
+	unsigned int pflags = memalloc_nofs_save();
+
 	xs_udp_data_receive(transport);
+	memalloc_nofs_restore(pflags);
 }
 
 /**
@@ -1163,417 +1488,13 @@
 	xprt_force_disconnect(xprt);
 }
 
-static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc)
-{
-	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
-	size_t len, used;
-	char *p;
-
-	p = ((char *) &transport->tcp_fraghdr) + transport->tcp_offset;
-	len = sizeof(transport->tcp_fraghdr) - transport->tcp_offset;
-	used = xdr_skb_read_bits(desc, p, len);
-	transport->tcp_offset += used;
-	if (used != len)
-		return;
-
-	transport->tcp_reclen = ntohl(transport->tcp_fraghdr);
-	if (transport->tcp_reclen & RPC_LAST_STREAM_FRAGMENT)
-		transport->tcp_flags |= TCP_RCV_LAST_FRAG;
-	else
-		transport->tcp_flags &= ~TCP_RCV_LAST_FRAG;
-	transport->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK;
-
-	transport->tcp_flags &= ~TCP_RCV_COPY_FRAGHDR;
-	transport->tcp_offset = 0;
-
-	/* Sanity check of the record length */
-	if (unlikely(transport->tcp_reclen < 8)) {
-		dprintk("RPC:       invalid TCP record fragment length\n");
-		xs_tcp_force_close(xprt);
-		return;
-	}
-	dprintk("RPC:       reading TCP record fragment of length %d\n",
-			transport->tcp_reclen);
-}
-
-static void xs_tcp_check_fraghdr(struct sock_xprt *transport)
-{
-	if (transport->tcp_offset == transport->tcp_reclen) {
-		transport->tcp_flags |= TCP_RCV_COPY_FRAGHDR;
-		transport->tcp_offset = 0;
-		if (transport->tcp_flags & TCP_RCV_LAST_FRAG) {
-			transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
-			transport->tcp_flags |= TCP_RCV_COPY_XID;
-			transport->tcp_copied = 0;
-		}
-	}
-}
-
-static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_reader *desc)
-{
-	size_t len, used;
-	char *p;
-
-	len = sizeof(transport->tcp_xid) - transport->tcp_offset;
-	dprintk("RPC:       reading XID (%zu bytes)\n", len);
-	p = ((char *) &transport->tcp_xid) + transport->tcp_offset;
-	used = xdr_skb_read_bits(desc, p, len);
-	transport->tcp_offset += used;
-	if (used != len)
-		return;
-	transport->tcp_flags &= ~TCP_RCV_COPY_XID;
-	transport->tcp_flags |= TCP_RCV_READ_CALLDIR;
-	transport->tcp_copied = 4;
-	dprintk("RPC:       reading %s XID %08x\n",
-			(transport->tcp_flags & TCP_RPC_REPLY) ? "reply for"
-							      : "request with",
-			ntohl(transport->tcp_xid));
-	xs_tcp_check_fraghdr(transport);
-}
-
-static inline void xs_tcp_read_calldir(struct sock_xprt *transport,
-				       struct xdr_skb_reader *desc)
-{
-	size_t len, used;
-	u32 offset;
-	char *p;
-
-	/*
-	 * We want transport->tcp_offset to be 8 at the end of this routine
-	 * (4 bytes for the xid and 4 bytes for the call/reply flag).
-	 * When this function is called for the first time,
-	 * transport->tcp_offset is 4 (after having already read the xid).
-	 */
-	offset = transport->tcp_offset - sizeof(transport->tcp_xid);
-	len = sizeof(transport->tcp_calldir) - offset;
-	dprintk("RPC:       reading CALL/REPLY flag (%zu bytes)\n", len);
-	p = ((char *) &transport->tcp_calldir) + offset;
-	used = xdr_skb_read_bits(desc, p, len);
-	transport->tcp_offset += used;
-	if (used != len)
-		return;
-	transport->tcp_flags &= ~TCP_RCV_READ_CALLDIR;
-	/*
-	 * We don't yet have the XDR buffer, so we will write the calldir
-	 * out after we get the buffer from the 'struct rpc_rqst'
-	 */
-	switch (ntohl(transport->tcp_calldir)) {
-	case RPC_REPLY:
-		transport->tcp_flags |= TCP_RCV_COPY_CALLDIR;
-		transport->tcp_flags |= TCP_RCV_COPY_DATA;
-		transport->tcp_flags |= TCP_RPC_REPLY;
-		break;
-	case RPC_CALL:
-		transport->tcp_flags |= TCP_RCV_COPY_CALLDIR;
-		transport->tcp_flags |= TCP_RCV_COPY_DATA;
-		transport->tcp_flags &= ~TCP_RPC_REPLY;
-		break;
-	default:
-		dprintk("RPC:       invalid request message type\n");
-		xs_tcp_force_close(&transport->xprt);
-	}
-	xs_tcp_check_fraghdr(transport);
-}
-
-static inline void xs_tcp_read_common(struct rpc_xprt *xprt,
-				     struct xdr_skb_reader *desc,
-				     struct rpc_rqst *req)
-{
-	struct sock_xprt *transport =
-				container_of(xprt, struct sock_xprt, xprt);
-	struct xdr_buf *rcvbuf;
-	size_t len;
-	ssize_t r;
-
-	rcvbuf = &req->rq_private_buf;
-
-	if (transport->tcp_flags & TCP_RCV_COPY_CALLDIR) {
-		/*
-		 * Save the RPC direction in the XDR buffer
-		 */
-		memcpy(rcvbuf->head[0].iov_base + transport->tcp_copied,
-			&transport->tcp_calldir,
-			sizeof(transport->tcp_calldir));
-		transport->tcp_copied += sizeof(transport->tcp_calldir);
-		transport->tcp_flags &= ~TCP_RCV_COPY_CALLDIR;
-	}
-
-	len = desc->count;
-	if (len > transport->tcp_reclen - transport->tcp_offset)
-		desc->count = transport->tcp_reclen - transport->tcp_offset;
-	r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied,
-					  desc, xdr_skb_read_bits);
-
-	if (desc->count) {
-		/* Error when copying to the receive buffer,
-		 * usually because we weren't able to allocate
-		 * additional buffer pages. All we can do now
-		 * is turn off TCP_RCV_COPY_DATA, so the request
-		 * will not receive any additional updates,
-		 * and time out.
-		 * Any remaining data from this record will
-		 * be discarded.
-		 */
-		transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
-		dprintk("RPC:       XID %08x truncated request\n",
-				ntohl(transport->tcp_xid));
-		dprintk("RPC:       xprt = %p, tcp_copied = %lu, "
-				"tcp_offset = %u, tcp_reclen = %u\n",
-				xprt, transport->tcp_copied,
-				transport->tcp_offset, transport->tcp_reclen);
-		return;
-	}
-
-	transport->tcp_copied += r;
-	transport->tcp_offset += r;
-	desc->count = len - r;
-
-	dprintk("RPC:       XID %08x read %zd bytes\n",
-			ntohl(transport->tcp_xid), r);
-	dprintk("RPC:       xprt = %p, tcp_copied = %lu, tcp_offset = %u, "
-			"tcp_reclen = %u\n", xprt, transport->tcp_copied,
-			transport->tcp_offset, transport->tcp_reclen);
-
-	if (transport->tcp_copied == req->rq_private_buf.buflen)
-		transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
-	else if (transport->tcp_offset == transport->tcp_reclen) {
-		if (transport->tcp_flags & TCP_RCV_LAST_FRAG)
-			transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
-	}
-}
-
-/*
- * Finds the request corresponding to the RPC xid and invokes the common
- * tcp read code to read the data.
- */
-static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
-				    struct xdr_skb_reader *desc)
-{
-	struct sock_xprt *transport =
-				container_of(xprt, struct sock_xprt, xprt);
-	struct rpc_rqst *req;
-
-	dprintk("RPC:       read reply XID %08x\n", ntohl(transport->tcp_xid));
-
-	/* Find and lock the request corresponding to this xid */
-	spin_lock(&xprt->recv_lock);
-	req = xprt_lookup_rqst(xprt, transport->tcp_xid);
-	if (!req) {
-		dprintk("RPC:       XID %08x request not found!\n",
-				ntohl(transport->tcp_xid));
-		spin_unlock(&xprt->recv_lock);
-		return -1;
-	}
-	xprt_pin_rqst(req);
-	spin_unlock(&xprt->recv_lock);
-
-	xs_tcp_read_common(xprt, desc, req);
-
-	spin_lock(&xprt->recv_lock);
-	if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
-		xprt_complete_rqst(req->rq_task, transport->tcp_copied);
-	xprt_unpin_rqst(req);
-	spin_unlock(&xprt->recv_lock);
-	return 0;
-}
-
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
-/*
- * Obtains an rpc_rqst previously allocated and invokes the common
- * tcp read code to read the data.  The result is placed in the callback
- * queue.
- * If we're unable to obtain the rpc_rqst we schedule the closing of the
- * connection and return -1.
- */
-static int xs_tcp_read_callback(struct rpc_xprt *xprt,
-				       struct xdr_skb_reader *desc)
-{
-	struct sock_xprt *transport =
-				container_of(xprt, struct sock_xprt, xprt);
-	struct rpc_rqst *req;
-
-	/* Look up the request corresponding to the given XID */
-	req = xprt_lookup_bc_request(xprt, transport->tcp_xid);
-	if (req == NULL) {
-		printk(KERN_WARNING "Callback slot table overflowed\n");
-		xprt_force_disconnect(xprt);
-		return -1;
-	}
-
-	dprintk("RPC:       read callback  XID %08x\n", ntohl(req->rq_xid));
-	xs_tcp_read_common(xprt, desc, req);
-
-	if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
-		xprt_complete_bc_request(req, transport->tcp_copied);
-
-	return 0;
-}
-
-static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
-					struct xdr_skb_reader *desc)
-{
-	struct sock_xprt *transport =
-				container_of(xprt, struct sock_xprt, xprt);
-
-	return (transport->tcp_flags & TCP_RPC_REPLY) ?
-		xs_tcp_read_reply(xprt, desc) :
-		xs_tcp_read_callback(xprt, desc);
-}
-
-static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net)
-{
-	int ret;
-
-	ret = svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0,
-			      SVC_SOCK_ANONYMOUS);
-	if (ret < 0)
-		return ret;
-	return 0;
-}
-
 static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt)
 {
 	return PAGE_SIZE;
 }
-#else
-static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
-					struct xdr_skb_reader *desc)
-{
-	return xs_tcp_read_reply(xprt, desc);
-}
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
-/*
- * Read data off the transport.  This can be either an RPC_CALL or an
- * RPC_REPLY.  Relay the processing to helper functions.
- */
-static void xs_tcp_read_data(struct rpc_xprt *xprt,
-				    struct xdr_skb_reader *desc)
-{
-	struct sock_xprt *transport =
-				container_of(xprt, struct sock_xprt, xprt);
-
-	if (_xs_tcp_read_data(xprt, desc) == 0)
-		xs_tcp_check_fraghdr(transport);
-	else {
-		/*
-		 * The transport_lock protects the request handling.
-		 * There's no need to hold it to update the tcp_flags.
-		 */
-		transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
-	}
-}
-
-static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_skb_reader *desc)
-{
-	size_t len;
-
-	len = transport->tcp_reclen - transport->tcp_offset;
-	if (len > desc->count)
-		len = desc->count;
-	desc->count -= len;
-	desc->offset += len;
-	transport->tcp_offset += len;
-	dprintk("RPC:       discarded %zu bytes\n", len);
-	xs_tcp_check_fraghdr(transport);
-}
-
-static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len)
-{
-	struct rpc_xprt *xprt = rd_desc->arg.data;
-	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
-	struct xdr_skb_reader desc = {
-		.skb	= skb,
-		.offset	= offset,
-		.count	= len,
-	};
-	size_t ret;
-
-	dprintk("RPC:       xs_tcp_data_recv started\n");
-	do {
-		trace_xs_tcp_data_recv(transport);
-		/* Read in a new fragment marker if necessary */
-		/* Can we ever really expect to get completely empty fragments? */
-		if (transport->tcp_flags & TCP_RCV_COPY_FRAGHDR) {
-			xs_tcp_read_fraghdr(xprt, &desc);
-			continue;
-		}
-		/* Read in the xid if necessary */
-		if (transport->tcp_flags & TCP_RCV_COPY_XID) {
-			xs_tcp_read_xid(transport, &desc);
-			continue;
-		}
-		/* Read in the call/reply flag */
-		if (transport->tcp_flags & TCP_RCV_READ_CALLDIR) {
-			xs_tcp_read_calldir(transport, &desc);
-			continue;
-		}
-		/* Read in the request data */
-		if (transport->tcp_flags & TCP_RCV_COPY_DATA) {
-			xs_tcp_read_data(xprt, &desc);
-			continue;
-		}
-		/* Skip over any trailing bytes on short reads */
-		xs_tcp_read_discard(transport, &desc);
-	} while (desc.count);
-	ret = len - desc.count;
-	if (ret < rd_desc->count)
-		rd_desc->count -= ret;
-	else
-		rd_desc->count = 0;
-	trace_xs_tcp_data_recv(transport);
-	dprintk("RPC:       xs_tcp_data_recv done\n");
-	return ret;
-}
-
-static void xs_tcp_data_receive(struct sock_xprt *transport)
-{
-	struct rpc_xprt *xprt = &transport->xprt;
-	struct sock *sk;
-	read_descriptor_t rd_desc = {
-		.arg.data = xprt,
-	};
-	unsigned long total = 0;
-	int read = 0;
-
-restart:
-	mutex_lock(&transport->recv_mutex);
-	sk = transport->inet;
-	if (sk == NULL)
-		goto out;
-
-	/* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
-	for (;;) {
-		rd_desc.count = RPC_TCP_READ_CHUNK_SZ;
-		lock_sock(sk);
-		read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
-		if (rd_desc.count != 0 || read < 0) {
-			clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
-			release_sock(sk);
-			break;
-		}
-		release_sock(sk);
-		total += read;
-		if (need_resched()) {
-			mutex_unlock(&transport->recv_mutex);
-			cond_resched();
-			goto restart;
-		}
-	}
-	if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
-		queue_work(xprtiod_workqueue, &transport->recv_worker);
-out:
-	mutex_unlock(&transport->recv_mutex);
-	trace_xs_tcp_data_ready(xprt, read, total);
-}
-
-static void xs_tcp_data_receive_workfn(struct work_struct *work)
-{
-	struct sock_xprt *transport =
-		container_of(work, struct sock_xprt, recv_worker);
-	xs_tcp_data_receive(transport);
-}
-
 /**
  * xs_tcp_state_change - callback to handle TCP socket state changes
  * @sk: socket whose state has changed
@@ -1598,22 +1519,16 @@
 	trace_rpc_socket_state_change(xprt, sk->sk_socket);
 	switch (sk->sk_state) {
 	case TCP_ESTABLISHED:
-		spin_lock(&xprt->transport_lock);
 		if (!xprt_test_and_set_connected(xprt)) {
-
-			/* Reset TCP record info */
-			transport->tcp_offset = 0;
-			transport->tcp_reclen = 0;
-			transport->tcp_copied = 0;
-			transport->tcp_flags =
-				TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
 			xprt->connect_cookie++;
 			clear_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
 			xprt_clear_connecting(xprt);
 
-			xprt_wake_pending_tasks(xprt, -EAGAIN);
+			xprt->stat.connect_count++;
+			xprt->stat.connect_time += (long)jiffies -
+						   xprt->stat.connect_start;
+			xs_run_error_worker(transport, XPRT_SOCK_WAKE_PENDING);
 		}
-		spin_unlock(&xprt->transport_lock);
 		break;
 	case TCP_FIN_WAIT1:
 		/* The client initiated a shutdown of the socket */
@@ -1629,7 +1544,7 @@
 		/* The server initiated a shutdown of the socket */
 		xprt->connect_cookie++;
 		clear_bit(XPRT_CONNECTED, &xprt->state);
-		xs_tcp_force_close(xprt);
+		xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT);
 		/* fall through */
 	case TCP_CLOSING:
 		/*
@@ -1650,10 +1565,8 @@
 					&transport->sock_state))
 			xprt_clear_connecting(xprt);
 		clear_bit(XPRT_CLOSING, &xprt->state);
-		if (sk->sk_err)
-			xprt_wake_pending_tasks(xprt, -sk->sk_err);
 		/* Trigger the socket release */
-		xs_tcp_force_close(xprt);
+		xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT);
 	}
  out:
 	read_unlock_bh(&sk->sk_callback_lock);
@@ -1662,6 +1575,7 @@
 static void xs_write_space(struct sock *sk)
 {
 	struct socket_wq *wq;
+	struct sock_xprt *transport;
 	struct rpc_xprt *xprt;
 
 	if (!sk->sk_socket)
@@ -1670,12 +1584,14 @@
 
 	if (unlikely(!(xprt = xprt_from_sock(sk))))
 		return;
+	transport = container_of(xprt, struct sock_xprt, xprt);
 	rcu_read_lock();
 	wq = rcu_dereference(sk->sk_wq);
 	if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0)
 		goto out;
 
-	xprt_write_space(xprt);
+	xs_run_error_worker(transport, XPRT_SOCK_WAKE_WRITE);
+	sk->sk_write_pending--;
 out:
 	rcu_read_unlock();
 }
@@ -1762,22 +1678,29 @@
 
 /**
  * xs_udp_timer - called when a retransmit timeout occurs on a UDP transport
+ * @xprt: controlling transport
  * @task: task that timed out
  *
  * Adjust the congestion window after a retransmit timeout has occurred.
  */
 static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task)
 {
-	spin_lock_bh(&xprt->transport_lock);
+	spin_lock(&xprt->transport_lock);
 	xprt_adjust_cwnd(xprt, task, -ETIMEDOUT);
-	spin_unlock_bh(&xprt->transport_lock);
+	spin_unlock(&xprt->transport_lock);
 }
 
-static unsigned short xs_get_random_port(void)
+static int xs_get_random_port(void)
 {
-	unsigned short range = xprt_max_resvport - xprt_min_resvport + 1;
-	unsigned short rand = (unsigned short) prandom_u32() % range;
-	return rand + xprt_min_resvport;
+	unsigned short min = xprt_min_resvport, max = xprt_max_resvport;
+	unsigned short range;
+	unsigned short rand;
+
+	if (max < min)
+		return -EADDRINUSE;
+	range = max - min + 1;
+	rand = (unsigned short) prandom_u32() % range;
+	return rand + min;
 }
 
 /**
@@ -1833,9 +1756,9 @@
 		transport->srcport = xs_sock_getport(sock);
 }
 
-static unsigned short xs_get_srcport(struct sock_xprt *transport)
+static int xs_get_srcport(struct sock_xprt *transport)
 {
-	unsigned short port = transport->srcport;
+	int port = transport->srcport;
 
 	if (port == 0 && transport->xprt.resvport)
 		port = xs_get_random_port();
@@ -1856,7 +1779,7 @@
 {
 	struct sockaddr_storage myaddr;
 	int err, nloop = 0;
-	unsigned short port = xs_get_srcport(transport);
+	int port = xs_get_srcport(transport);
 	unsigned short last;
 
 	/*
@@ -1874,8 +1797,8 @@
 	 * transport->xprt.resvport == 1) xs_get_srcport above will
 	 * ensure that port is non-zero and we will bind as needed.
 	 */
-	if (port == 0)
-		return 0;
+	if (port <= 0)
+		return port;
 
 	memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen);
 	do {
@@ -1974,6 +1897,7 @@
 		struct sock_xprt *transport, int family, int type,
 		int protocol, bool reuseport)
 {
+	struct file *filp;
 	struct socket *sock;
 	int err;
 
@@ -1994,6 +1918,11 @@
 		goto out;
 	}
 
+	filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+	if (IS_ERR(filp))
+		return ERR_CAST(filp);
+	transport->file = filp;
+
 	return sock;
 out:
 	return ERR_PTR(err);
@@ -2017,7 +1946,6 @@
 		sk->sk_write_space = xs_udp_write_space;
 		sock_set_flag(sk, SOCK_FASYNC);
 		sk->sk_error_report = xs_error_report;
-		sk->sk_allocation = GFP_NOIO;
 
 		xprt_clear_connected(xprt);
 
@@ -2028,9 +1956,8 @@
 		write_unlock_bh(&sk->sk_callback_lock);
 	}
 
-	/* Tell the socket layer to start connecting... */
-	xprt->stat.connect_count++;
-	xprt->stat.connect_start = jiffies;
+	xs_stream_start_connect(transport);
+
 	return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0);
 }
 
@@ -2041,6 +1968,7 @@
 static int xs_local_setup_socket(struct sock_xprt *transport)
 {
 	struct rpc_xprt *xprt = &transport->xprt;
+	struct file *filp;
 	struct socket *sock;
 	int status = -EIO;
 
@@ -2053,6 +1981,13 @@
 	}
 	xs_reclassify_socket(AF_LOCAL, sock);
 
+	filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+	if (IS_ERR(filp)) {
+		status = PTR_ERR(filp);
+		goto out;
+	}
+	transport->file = filp;
+
 	dprintk("RPC:       worker connecting xprt %p via AF_LOCAL to %s\n",
 			xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
 
@@ -2062,6 +1997,9 @@
 	case 0:
 		dprintk("RPC:       xprt %p connected to %s\n",
 				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
+		xprt->stat.connect_count++;
+		xprt->stat.connect_time += (long)jiffies -
+					   xprt->stat.connect_start;
 		xprt_set_connected(xprt);
 	case -ENOBUFS:
 		break;
@@ -2100,6 +2038,7 @@
 		 * we'll need to figure out how to pass a namespace to
 		 * connect.
 		 */
+		task->tk_rpc_status = -ENOTCONN;
 		rpc_exit(task, -ENOTCONN);
 		return;
 	}
@@ -2203,7 +2142,6 @@
 		sk->sk_data_ready = xs_data_ready;
 		sk->sk_write_space = xs_udp_write_space;
 		sock_set_flag(sk, SOCK_FASYNC);
-		sk->sk_allocation = GFP_NOIO;
 
 		xprt_set_connected(xprt);
 
@@ -2244,8 +2182,8 @@
 	trace_rpc_socket_connect(xprt, sock, 0);
 	status = 0;
 out:
-	xprt_unlock_connect(xprt, transport);
 	xprt_clear_connecting(xprt);
+	xprt_unlock_connect(xprt, transport);
 	xprt_wake_pending_tasks(xprt, status);
 }
 
@@ -2284,13 +2222,13 @@
 	unsigned int opt_on = 1;
 	unsigned int timeo;
 
-	spin_lock_bh(&xprt->transport_lock);
+	spin_lock(&xprt->transport_lock);
 	keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ);
 	keepcnt = xprt->timeout->to_retries + 1;
 	timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
 		(xprt->timeout->to_retries + 1);
 	clear_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
-	spin_unlock_bh(&xprt->transport_lock);
+	spin_unlock(&xprt->transport_lock);
 
 	/* TCP Keepalive options */
 	kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
@@ -2315,7 +2253,7 @@
 	struct rpc_timeout to;
 	unsigned long initval;
 
-	spin_lock_bh(&xprt->transport_lock);
+	spin_lock(&xprt->transport_lock);
 	if (reconnect_timeout < xprt->max_reconnect_timeout)
 		xprt->max_reconnect_timeout = reconnect_timeout;
 	if (connect_timeout < xprt->connect_timeout) {
@@ -2332,7 +2270,7 @@
 		xprt->connect_timeout = connect_timeout;
 	}
 	set_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
-	spin_unlock_bh(&xprt->transport_lock);
+	spin_unlock(&xprt->transport_lock);
 }
 
 static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
@@ -2366,7 +2304,6 @@
 		sk->sk_write_space = xs_tcp_write_space;
 		sock_set_flag(sk, SOCK_FASYNC);
 		sk->sk_error_report = xs_error_report;
-		sk->sk_allocation = GFP_NOIO;
 
 		/* socket options */
 		sock_reset_flag(sk, SOCK_LINGER);
@@ -2386,9 +2323,9 @@
 
 	xs_set_memalloc(xprt);
 
+	xs_stream_start_connect(transport);
+
 	/* Tell the socket layer to start connecting... */
-	xprt->stat.connect_count++;
-	xprt->stat.connect_start = jiffies;
 	set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
 	ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
 	switch (ret) {
@@ -2410,6 +2347,7 @@
 
 /**
  * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint
+ * @work: queued work item
  *
  * Invoked by a work queue tasklet.
  */
@@ -2480,30 +2418,11 @@
 	}
 	status = -EAGAIN;
 out:
-	xprt_unlock_connect(xprt, transport);
 	xprt_clear_connecting(xprt);
+	xprt_unlock_connect(xprt, transport);
 	xprt_wake_pending_tasks(xprt, status);
 }
 
-static unsigned long xs_reconnect_delay(const struct rpc_xprt *xprt)
-{
-	unsigned long start, now = jiffies;
-
-	start = xprt->stat.connect_start + xprt->reestablish_timeout;
-	if (time_after(start, now))
-		return start - now;
-	return 0;
-}
-
-static void xs_reconnect_backoff(struct rpc_xprt *xprt)
-{
-	xprt->reestablish_timeout <<= 1;
-	if (xprt->reestablish_timeout > xprt->max_reconnect_timeout)
-		xprt->reestablish_timeout = xprt->max_reconnect_timeout;
-	if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
-		xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
-}
-
 /**
  * xs_connect - connect a socket to a remote endpoint
  * @xprt: pointer to transport structure
@@ -2533,8 +2452,8 @@
 		/* Start by resetting any existing state */
 		xs_reset_transport(transport);
 
-		delay = xs_reconnect_delay(xprt);
-		xs_reconnect_backoff(xprt);
+		delay = xprt_reconnect_delay(xprt);
+		xprt_reconnect_backoff(xprt, XS_TCP_INIT_REEST_TO);
 
 	} else
 		dprintk("RPC:       xs_connect scheduled xprt %p\n", xprt);
@@ -2544,6 +2463,53 @@
 			delay);
 }
 
+static void xs_wake_disconnect(struct sock_xprt *transport)
+{
+	if (test_and_clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state))
+		xs_tcp_force_close(&transport->xprt);
+}
+
+static void xs_wake_write(struct sock_xprt *transport)
+{
+	if (test_and_clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state))
+		xprt_write_space(&transport->xprt);
+}
+
+static void xs_wake_error(struct sock_xprt *transport)
+{
+	int sockerr;
+
+	if (!test_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state))
+		return;
+	mutex_lock(&transport->recv_mutex);
+	if (transport->sock == NULL)
+		goto out;
+	if (!test_and_clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state))
+		goto out;
+	sockerr = xchg(&transport->xprt_err, 0);
+	if (sockerr < 0)
+		xprt_wake_pending_tasks(&transport->xprt, sockerr);
+out:
+	mutex_unlock(&transport->recv_mutex);
+}
+
+static void xs_wake_pending(struct sock_xprt *transport)
+{
+	if (test_and_clear_bit(XPRT_SOCK_WAKE_PENDING, &transport->sock_state))
+		xprt_wake_pending_tasks(&transport->xprt, -EAGAIN);
+}
+
+static void xs_error_handle(struct work_struct *work)
+{
+	struct sock_xprt *transport = container_of(work,
+			struct sock_xprt, error_worker);
+
+	xs_wake_disconnect(transport);
+	xs_wake_write(transport);
+	xs_wake_error(transport);
+	xs_wake_pending(transport);
+}
+
 /**
  * xs_local_print_stats - display AF_LOCAL socket-specifc stats
  * @xprt: rpc_xprt struct containing statistics
@@ -2561,7 +2527,7 @@
 			"%llu %llu %lu %llu %llu\n",
 			xprt->stat.bind_count,
 			xprt->stat.connect_count,
-			xprt->stat.connect_time,
+			xprt->stat.connect_time / HZ,
 			idle_time,
 			xprt->stat.sends,
 			xprt->stat.recvs,
@@ -2616,7 +2582,7 @@
 			transport->srcport,
 			xprt->stat.bind_count,
 			xprt->stat.connect_count,
-			xprt->stat.connect_time,
+			xprt->stat.connect_time / HZ,
 			idle_time,
 			xprt->stat.sends,
 			xprt->stat.recvs,
@@ -2678,35 +2644,43 @@
 {
 	int len;
 	struct xdr_buf *xbufp = &req->rq_snd_buf;
-	struct rpc_xprt *xprt = req->rq_xprt;
 	struct sock_xprt *transport =
-				container_of(xprt, struct sock_xprt, xprt);
-	struct socket *sock = transport->sock;
+			container_of(req->rq_xprt, struct sock_xprt, xprt);
 	unsigned long headoff;
 	unsigned long tailoff;
+	struct page *tailpage;
+	struct msghdr msg = {
+		.msg_flags	= MSG_MORE
+	};
+	rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT |
+					 (u32)xbufp->len);
+	struct kvec iov = {
+		.iov_base	= &marker,
+		.iov_len	= sizeof(marker),
+	};
 
-	xs_encode_stream_record_marker(xbufp);
+	len = kernel_sendmsg(transport->sock, &msg, &iov, 1, iov.iov_len);
+	if (len != iov.iov_len)
+		return -EAGAIN;
 
+	tailpage = NULL;
+	if (xbufp->tail[0].iov_len)
+		tailpage = virt_to_page(xbufp->tail[0].iov_base);
 	tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK;
 	headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK;
-	len = svc_send_common(sock, xbufp,
+	len = svc_send_common(transport->sock, xbufp,
 			      virt_to_page(xbufp->head[0].iov_base), headoff,
-			      xbufp->tail[0].iov_base, tailoff);
-
-	if (len != xbufp->len) {
-		printk(KERN_NOTICE "Error sending entire callback!\n");
-		len = -EAGAIN;
-	}
-
+			      tailpage, tailoff);
+	if (len != xbufp->len)
+		return -EAGAIN;
 	return len;
 }
 
 /*
  * The send routine. Borrows from svc_send
  */
-static int bc_send_request(struct rpc_task *task)
+static int bc_send_request(struct rpc_rqst *req)
 {
-	struct rpc_rqst *req = task->tk_rqstp;
 	struct svc_xprt	*xprt;
 	int len;
 
@@ -2720,12 +2694,7 @@
 	 * Grab the mutex to serialize data as the connection is shared
 	 * with the fore channel
 	 */
-	if (!mutex_trylock(&xprt->xpt_mutex)) {
-		rpc_sleep_on(&xprt->xpt_bc_pending, task, NULL);
-		if (!mutex_trylock(&xprt->xpt_mutex))
-			return -EAGAIN;
-		rpc_wake_up_queued_task(&xprt->xpt_bc_pending, task);
-	}
+	mutex_lock(&xprt->xpt_mutex);
 	if (test_bit(XPT_DEAD, &xprt->xpt_flags))
 		len = -ENOTCONN;
 	else
@@ -2761,7 +2730,7 @@
 
 static const struct rpc_xprt_ops xs_local_ops = {
 	.reserve_xprt		= xprt_reserve_xprt,
-	.release_xprt		= xs_tcp_release_xprt,
+	.release_xprt		= xprt_release_xprt,
 	.alloc_slot		= xprt_alloc_slot,
 	.free_slot		= xprt_free_slot,
 	.rpcbind		= xs_local_rpcbind,
@@ -2769,8 +2738,9 @@
 	.connect		= xs_local_connect,
 	.buf_alloc		= rpc_malloc,
 	.buf_free		= rpc_free,
+	.prepare_request	= xs_stream_prepare_request,
 	.send_request		= xs_local_send_request,
-	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
+	.wait_for_reply_request	= xprt_wait_for_reply_request_def,
 	.close			= xs_close,
 	.destroy		= xs_destroy,
 	.print_stats		= xs_local_print_stats,
@@ -2790,7 +2760,7 @@
 	.buf_alloc		= rpc_malloc,
 	.buf_free		= rpc_free,
 	.send_request		= xs_udp_send_request,
-	.set_retrans_timeout	= xprt_set_retrans_timeout_rtt,
+	.wait_for_reply_request	= xprt_wait_for_reply_request_rtt,
 	.timer			= xs_udp_timer,
 	.release_request	= xprt_release_rqst_cong,
 	.close			= xs_close,
@@ -2803,16 +2773,17 @@
 
 static const struct rpc_xprt_ops xs_tcp_ops = {
 	.reserve_xprt		= xprt_reserve_xprt,
-	.release_xprt		= xs_tcp_release_xprt,
-	.alloc_slot		= xprt_lock_and_alloc_slot,
+	.release_xprt		= xprt_release_xprt,
+	.alloc_slot		= xprt_alloc_slot,
 	.free_slot		= xprt_free_slot,
 	.rpcbind		= rpcb_getport_async,
 	.set_port		= xs_set_port,
 	.connect		= xs_connect,
 	.buf_alloc		= rpc_malloc,
 	.buf_free		= rpc_free,
+	.prepare_request	= xs_stream_prepare_request,
 	.send_request		= xs_tcp_send_request,
-	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
+	.wait_for_reply_request	= xprt_wait_for_reply_request_def,
 	.close			= xs_tcp_shutdown,
 	.destroy		= xs_destroy,
 	.set_connect_timeout	= xs_tcp_set_connect_timeout,
@@ -2822,8 +2793,8 @@
 	.inject_disconnect	= xs_inject_disconnect,
 #ifdef CONFIG_SUNRPC_BACKCHANNEL
 	.bc_setup		= xprt_setup_bc,
-	.bc_up			= xs_tcp_bc_up,
 	.bc_maxpayload		= xs_tcp_bc_maxpayload,
+	.bc_num_slots		= xprt_bc_max_slots,
 	.bc_free_rqst		= xprt_free_bc_rqst,
 	.bc_destroy		= xprt_destroy_bc,
 #endif
@@ -2841,7 +2812,7 @@
 	.buf_alloc		= bc_malloc,
 	.buf_free		= bc_free,
 	.send_request		= bc_send_request,
-	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
+	.wait_for_reply_request	= xprt_wait_for_reply_request_def,
 	.close			= bc_close,
 	.destroy		= bc_destroy,
 	.print_stats		= xs_tcp_print_stats,
@@ -2942,7 +2913,6 @@
 	transport = container_of(xprt, struct sock_xprt, xprt);
 
 	xprt->prot = 0;
-	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
 	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
 
 	xprt->bind_timeout = XS_BIND_TO;
@@ -2952,9 +2922,9 @@
 	xprt->ops = &xs_local_ops;
 	xprt->timeout = &xs_local_default_timeout;
 
-	INIT_WORK(&transport->recv_worker, xs_local_data_receive_workfn);
-	INIT_DELAYED_WORK(&transport->connect_worker,
-			xs_dummy_setup_socket);
+	INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
+	INIT_WORK(&transport->error_worker, xs_error_handle);
+	INIT_DELAYED_WORK(&transport->connect_worker, xs_dummy_setup_socket);
 
 	switch (sun->sun_family) {
 	case AF_LOCAL:
@@ -3012,7 +2982,6 @@
 	transport = container_of(xprt, struct sock_xprt, xprt);
 
 	xprt->prot = IPPROTO_UDP;
-	xprt->tsh_size = 0;
 	/* XXX: header size can vary due to auth type, IPv6, etc. */
 	xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
 
@@ -3025,6 +2994,7 @@
 	xprt->timeout = &xs_udp_default_timeout;
 
 	INIT_WORK(&transport->recv_worker, xs_udp_data_receive_workfn);
+	INIT_WORK(&transport->error_worker, xs_error_handle);
 	INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_setup_socket);
 
 	switch (addr->sa_family) {
@@ -3092,7 +3062,6 @@
 	transport = container_of(xprt, struct sock_xprt, xprt);
 
 	xprt->prot = IPPROTO_TCP;
-	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
 	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
 
 	xprt->bind_timeout = XS_BIND_TO;
@@ -3106,7 +3075,8 @@
 	xprt->connect_timeout = xprt->timeout->to_initval *
 		(xprt->timeout->to_retries + 1);
 
-	INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn);
+	INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
+	INIT_WORK(&transport->error_worker, xs_error_handle);
 	INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
 
 	switch (addr->sa_family) {
@@ -3165,7 +3135,6 @@
 	transport = container_of(xprt, struct sock_xprt, xprt);
 
 	xprt->prot = IPPROTO_TCP;
-	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
 	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
 	xprt->timeout = &xs_tcp_default_timeout;
 
@@ -3265,10 +3234,8 @@
  */
 int init_socket_xprt(void)
 {
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 	if (!sunrpc_table_header)
 		sunrpc_table_header = register_sysctl_table(sunrpc_table);
-#endif
 
 	xprt_register_transport(&xs_local_transport);
 	xprt_register_transport(&xs_udp_transport);
@@ -3284,12 +3251,10 @@
  */
 void cleanup_socket_xprt(void)
 {
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 	if (sunrpc_table_header) {
 		unregister_sysctl_table(sunrpc_table_header);
 		sunrpc_table_header = NULL;
 	}
-#endif
 
 	xprt_unregister_transport(&xs_local_transport);
 	xprt_unregister_transport(&xs_udp_transport);
@@ -3317,12 +3282,8 @@
 
 static int param_set_portnr(const char *val, const struct kernel_param *kp)
 {
-	if (kp->arg == &xprt_min_resvport)
-		return param_set_uint_minmax(val, kp,
-			RPC_MIN_RESVPORT,
-			xprt_max_resvport);
 	return param_set_uint_minmax(val, kp,
-			xprt_min_resvport,
+			RPC_MIN_RESVPORT,
 			RPC_MAX_RESVPORT);
 }
 

diff --git a/net/switchdev/Kconfig b/net/switchdev/Kconfig
index 651fa20..50f21a6 100644
--- a/net/switchdev/Kconfig
+++ b/net/switchdev/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Configuration for Switch device support
 #

diff --git a/net/switchdev/Makefile b/net/switchdev/Makefile
index 5ed63ed..bd69a31 100644
--- a/net/switchdev/Makefile
+++ b/net/switchdev/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Switch device API
 #

diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 74b9d91..3a1d428 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * net/switchdev/switchdev.c - Switch device API
  * Copyright (c) 2014-2015 Jiri Pirko <jiri@resnulli.us>
  * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/kernel.h>
@@ -23,78 +19,6 @@
 #include <linux/rtnetlink.h>
 #include <net/switchdev.h>
 
-/**
- *	switchdev_trans_item_enqueue - Enqueue data item to transaction queue
- *
- *	@trans: transaction
- *	@data: pointer to data being queued
- *	@destructor: data destructor
- *	@tritem: transaction item being queued
- *
- *	Enqeueue data item to transaction queue. tritem is typically placed in
- *	cointainter pointed at by data pointer. Destructor is called on
- *	transaction abort and after successful commit phase in case
- *	the caller did not dequeue the item before.
- */
-void switchdev_trans_item_enqueue(struct switchdev_trans *trans,
-				  void *data, void (*destructor)(void const *),
-				  struct switchdev_trans_item *tritem)
-{
-	tritem->data = data;
-	tritem->destructor = destructor;
-	list_add_tail(&tritem->list, &trans->item_list);
-}
-EXPORT_SYMBOL_GPL(switchdev_trans_item_enqueue);
-
-static struct switchdev_trans_item *
-__switchdev_trans_item_dequeue(struct switchdev_trans *trans)
-{
-	struct switchdev_trans_item *tritem;
-
-	if (list_empty(&trans->item_list))
-		return NULL;
-	tritem = list_first_entry(&trans->item_list,
-				  struct switchdev_trans_item, list);
-	list_del(&tritem->list);
-	return tritem;
-}
-
-/**
- *	switchdev_trans_item_dequeue - Dequeue data item from transaction queue
- *
- *	@trans: transaction
- */
-void *switchdev_trans_item_dequeue(struct switchdev_trans *trans)
-{
-	struct switchdev_trans_item *tritem;
-
-	tritem = __switchdev_trans_item_dequeue(trans);
-	BUG_ON(!tritem);
-	return tritem->data;
-}
-EXPORT_SYMBOL_GPL(switchdev_trans_item_dequeue);
-
-static void switchdev_trans_init(struct switchdev_trans *trans)
-{
-	INIT_LIST_HEAD(&trans->item_list);
-}
-
-static void switchdev_trans_items_destroy(struct switchdev_trans *trans)
-{
-	struct switchdev_trans_item *tritem;
-
-	while ((tritem = __switchdev_trans_item_dequeue(trans)))
-		tritem->destructor(tritem->data);
-}
-
-static void switchdev_trans_items_warn_destroy(struct net_device *dev,
-					       struct switchdev_trans *trans)
-{
-	WARN(!list_empty(&trans->item_list), "%s: transaction item queue is not empty.\n",
-	     dev->name);
-	switchdev_trans_items_destroy(trans);
-}
-
 static LIST_HEAD(deferred);
 static DEFINE_SPINLOCK(deferred_lock);
 
@@ -174,81 +98,32 @@
 	return 0;
 }
 
-/**
- *	switchdev_port_attr_get - Get port attribute
- *
- *	@dev: port device
- *	@attr: attribute to get
- */
-int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
+static int switchdev_port_attr_notify(enum switchdev_notifier_type nt,
+				      struct net_device *dev,
+				      const struct switchdev_attr *attr,
+				      struct switchdev_trans *trans)
 {
-	const struct switchdev_ops *ops = dev->switchdev_ops;
-	struct net_device *lower_dev;
-	struct list_head *iter;
-	struct switchdev_attr first = {
-		.id = SWITCHDEV_ATTR_ID_UNDEFINED
+	int err;
+	int rc;
+
+	struct switchdev_notifier_port_attr_info attr_info = {
+		.attr = attr,
+		.trans = trans,
+		.handled = false,
 	};
-	int err = -EOPNOTSUPP;
 
-	if (ops && ops->switchdev_port_attr_get)
-		return ops->switchdev_port_attr_get(dev, attr);
-
-	if (attr->flags & SWITCHDEV_F_NO_RECURSE)
+	rc = call_switchdev_blocking_notifiers(nt, dev,
+					       &attr_info.info, NULL);
+	err = notifier_to_errno(rc);
+	if (err) {
+		WARN_ON(!attr_info.handled);
 		return err;
-
-	/* Switch device port(s) may be stacked under
-	 * bond/team/vlan dev, so recurse down to get attr on
-	 * each port.  Return -ENODATA if attr values don't
-	 * compare across ports.
-	 */
-
-	netdev_for_each_lower_dev(dev, lower_dev, iter) {
-		err = switchdev_port_attr_get(lower_dev, attr);
-		if (err)
-			break;
-		if (first.id == SWITCHDEV_ATTR_ID_UNDEFINED)
-			first = *attr;
-		else if (memcmp(&first, attr, sizeof(*attr)))
-			return -ENODATA;
 	}
 
-	return err;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_attr_get);
+	if (!attr_info.handled)
+		return -EOPNOTSUPP;
 
-static int __switchdev_port_attr_set(struct net_device *dev,
-				     const struct switchdev_attr *attr,
-				     struct switchdev_trans *trans)
-{
-	const struct switchdev_ops *ops = dev->switchdev_ops;
-	struct net_device *lower_dev;
-	struct list_head *iter;
-	int err = -EOPNOTSUPP;
-
-	if (ops && ops->switchdev_port_attr_set) {
-		err = ops->switchdev_port_attr_set(dev, attr, trans);
-		goto done;
-	}
-
-	if (attr->flags & SWITCHDEV_F_NO_RECURSE)
-		goto done;
-
-	/* Switch device port(s) may be stacked under
-	 * bond/team/vlan dev, so recurse down to set attr on
-	 * each port.
-	 */
-
-	netdev_for_each_lower_dev(dev, lower_dev, iter) {
-		err = __switchdev_port_attr_set(lower_dev, attr, trans);
-		if (err)
-			break;
-	}
-
-done:
-	if (err == -EOPNOTSUPP && attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP)
-		err = 0;
-
-	return err;
+	return 0;
 }
 
 static int switchdev_port_attr_set_now(struct net_device *dev,
@@ -257,8 +132,6 @@
 	struct switchdev_trans trans;
 	int err;
 
-	switchdev_trans_init(&trans);
-
 	/* Phase I: prepare for attr set. Driver/device should fail
 	 * here if there are going to be issues in the commit phase,
 	 * such as lack of resources or support.  The driver/device
@@ -267,18 +140,10 @@
 	 */
 
 	trans.ph_prepare = true;
-	err = __switchdev_port_attr_set(dev, attr, &trans);
-	if (err) {
-		/* Prepare phase failed: abort the transaction.  Any
-		 * resources reserved in the prepare phase are
-		 * released.
-		 */
-
-		if (err != -EOPNOTSUPP)
-			switchdev_trans_items_destroy(&trans);
-
+	err = switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr,
+					 &trans);
+	if (err)
 		return err;
-	}
 
 	/* Phase II: commit attr set.  This cannot fail as a fault
 	 * of driver/device.  If it does, it's a bug in the driver/device
@@ -286,10 +151,10 @@
 	 */
 
 	trans.ph_prepare = false;
-	err = __switchdev_port_attr_set(dev, attr, &trans);
+	err = switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr,
+					 &trans);
 	WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
 	     dev->name, attr->id);
-	switchdev_trans_items_warn_destroy(dev, &trans);
 
 	return err;
 }
@@ -353,42 +218,41 @@
 	return 0;
 }
 
-static int __switchdev_port_obj_add(struct net_device *dev,
-				    const struct switchdev_obj *obj,
-				    struct switchdev_trans *trans)
+static int switchdev_port_obj_notify(enum switchdev_notifier_type nt,
+				     struct net_device *dev,
+				     const struct switchdev_obj *obj,
+				     struct switchdev_trans *trans,
+				     struct netlink_ext_ack *extack)
 {
-	const struct switchdev_ops *ops = dev->switchdev_ops;
-	struct net_device *lower_dev;
-	struct list_head *iter;
-	int err = -EOPNOTSUPP;
+	int rc;
+	int err;
 
-	if (ops && ops->switchdev_port_obj_add)
-		return ops->switchdev_port_obj_add(dev, obj, trans);
+	struct switchdev_notifier_port_obj_info obj_info = {
+		.obj = obj,
+		.trans = trans,
+		.handled = false,
+	};
 
-	/* Switch device port(s) may be stacked under
-	 * bond/team/vlan dev, so recurse down to add object on
-	 * each port.
-	 */
-
-	netdev_for_each_lower_dev(dev, lower_dev, iter) {
-		err = __switchdev_port_obj_add(lower_dev, obj, trans);
-		if (err)
-			break;
+	rc = call_switchdev_blocking_notifiers(nt, dev, &obj_info.info, extack);
+	err = notifier_to_errno(rc);
+	if (err) {
+		WARN_ON(!obj_info.handled);
+		return err;
 	}
-
-	return err;
+	if (!obj_info.handled)
+		return -EOPNOTSUPP;
+	return 0;
 }
 
 static int switchdev_port_obj_add_now(struct net_device *dev,
-				      const struct switchdev_obj *obj)
+				      const struct switchdev_obj *obj,
+				      struct netlink_ext_ack *extack)
 {
 	struct switchdev_trans trans;
 	int err;
 
 	ASSERT_RTNL();
 
-	switchdev_trans_init(&trans);
-
 	/* Phase I: prepare for obj add. Driver/device should fail
 	 * here if there are going to be issues in the commit phase,
 	 * such as lack of resources or support.  The driver/device
@@ -397,18 +261,10 @@
 	 */
 
 	trans.ph_prepare = true;
-	err = __switchdev_port_obj_add(dev, obj, &trans);
-	if (err) {
-		/* Prepare phase failed: abort the transaction.  Any
-		 * resources reserved in the prepare phase are
-		 * released.
-		 */
-
-		if (err != -EOPNOTSUPP)
-			switchdev_trans_items_destroy(&trans);
-
+	err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
+					dev, obj, &trans, extack);
+	if (err)
 		return err;
-	}
 
 	/* Phase II: commit obj add.  This cannot fail as a fault
 	 * of driver/device.  If it does, it's a bug in the driver/device
@@ -416,9 +272,9 @@
 	 */
 
 	trans.ph_prepare = false;
-	err = __switchdev_port_obj_add(dev, obj, &trans);
+	err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
+					dev, obj, &trans, extack);
 	WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
-	switchdev_trans_items_warn_destroy(dev, &trans);
 
 	return err;
 }
@@ -429,7 +285,7 @@
 	const struct switchdev_obj *obj = data;
 	int err;
 
-	err = switchdev_port_obj_add_now(dev, obj);
+	err = switchdev_port_obj_add_now(dev, obj, NULL);
 	if (err && err != -EOPNOTSUPP)
 		netdev_err(dev, "failed (err=%d) to add object (id=%d)\n",
 			   err, obj->id);
@@ -459,38 +315,21 @@
  *	in case SWITCHDEV_F_DEFER flag is not set.
  */
 int switchdev_port_obj_add(struct net_device *dev,
-			   const struct switchdev_obj *obj)
+			   const struct switchdev_obj *obj,
+			   struct netlink_ext_ack *extack)
 {
 	if (obj->flags & SWITCHDEV_F_DEFER)
 		return switchdev_port_obj_add_defer(dev, obj);
 	ASSERT_RTNL();
-	return switchdev_port_obj_add_now(dev, obj);
+	return switchdev_port_obj_add_now(dev, obj, extack);
 }
 EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
 
 static int switchdev_port_obj_del_now(struct net_device *dev,
 				      const struct switchdev_obj *obj)
 {
-	const struct switchdev_ops *ops = dev->switchdev_ops;
-	struct net_device *lower_dev;
-	struct list_head *iter;
-	int err = -EOPNOTSUPP;
-
-	if (ops && ops->switchdev_port_obj_del)
-		return ops->switchdev_port_obj_del(dev, obj);
-
-	/* Switch device port(s) may be stacked under
-	 * bond/team/vlan dev, so recurse down to delete object on
-	 * each port.
-	 */
-
-	netdev_for_each_lower_dev(dev, lower_dev, iter) {
-		err = switchdev_port_obj_del_now(lower_dev, obj);
-		if (err)
-			break;
-	}
-
-	return err;
+	return switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_DEL,
+					 dev, obj, NULL, NULL);
 }
 
 static void switchdev_port_obj_del_deferred(struct net_device *dev,
@@ -535,6 +374,7 @@
 EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
 
 static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
+static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain);
 
 /**
  *	register_switchdev_notifier - Register notifier
@@ -569,29 +409,195 @@
  *	Call all network notifier blocks.
  */
 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
-			     struct switchdev_notifier_info *info)
+			     struct switchdev_notifier_info *info,
+			     struct netlink_ext_ack *extack)
 {
 	info->dev = dev;
+	info->extack = extack;
 	return atomic_notifier_call_chain(&switchdev_notif_chain, val, info);
 }
 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
 
-bool switchdev_port_same_parent_id(struct net_device *a,
-				   struct net_device *b)
+int register_switchdev_blocking_notifier(struct notifier_block *nb)
 {
-	struct switchdev_attr a_attr = {
-		.orig_dev = a,
-		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
-	};
-	struct switchdev_attr b_attr = {
-		.orig_dev = b,
-		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
-	};
+	struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain;
 
-	if (switchdev_port_attr_get(a, &a_attr) ||
-	    switchdev_port_attr_get(b, &b_attr))
-		return false;
-
-	return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
+	return blocking_notifier_chain_register(chain, nb);
 }
-EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id);
+EXPORT_SYMBOL_GPL(register_switchdev_blocking_notifier);
+
+int unregister_switchdev_blocking_notifier(struct notifier_block *nb)
+{
+	struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain;
+
+	return blocking_notifier_chain_unregister(chain, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_switchdev_blocking_notifier);
+
+int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev,
+				      struct switchdev_notifier_info *info,
+				      struct netlink_ext_ack *extack)
+{
+	info->dev = dev;
+	info->extack = extack;
+	return blocking_notifier_call_chain(&switchdev_blocking_notif_chain,
+					    val, info);
+}
+EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers);
+
+static int __switchdev_handle_port_obj_add(struct net_device *dev,
+			struct switchdev_notifier_port_obj_info *port_obj_info,
+			bool (*check_cb)(const struct net_device *dev),
+			int (*add_cb)(struct net_device *dev,
+				      const struct switchdev_obj *obj,
+				      struct switchdev_trans *trans,
+				      struct netlink_ext_ack *extack))
+{
+	struct netlink_ext_ack *extack;
+	struct net_device *lower_dev;
+	struct list_head *iter;
+	int err = -EOPNOTSUPP;
+
+	extack = switchdev_notifier_info_to_extack(&port_obj_info->info);
+
+	if (check_cb(dev)) {
+		/* This flag is only checked if the return value is success. */
+		port_obj_info->handled = true;
+		return add_cb(dev, port_obj_info->obj, port_obj_info->trans,
+			      extack);
+	}
+
+	/* Switch ports might be stacked under e.g. a LAG. Ignore the
+	 * unsupported devices, another driver might be able to handle them. But
+	 * propagate to the callers any hard errors.
+	 *
+	 * If the driver does its own bookkeeping of stacked ports, it's not
+	 * necessary to go through this helper.
+	 */
+	netdev_for_each_lower_dev(dev, lower_dev, iter) {
+		err = __switchdev_handle_port_obj_add(lower_dev, port_obj_info,
+						      check_cb, add_cb);
+		if (err && err != -EOPNOTSUPP)
+			return err;
+	}
+
+	return err;
+}
+
+int switchdev_handle_port_obj_add(struct net_device *dev,
+			struct switchdev_notifier_port_obj_info *port_obj_info,
+			bool (*check_cb)(const struct net_device *dev),
+			int (*add_cb)(struct net_device *dev,
+				      const struct switchdev_obj *obj,
+				      struct switchdev_trans *trans,
+				      struct netlink_ext_ack *extack))
+{
+	int err;
+
+	err = __switchdev_handle_port_obj_add(dev, port_obj_info, check_cb,
+					      add_cb);
+	if (err == -EOPNOTSUPP)
+		err = 0;
+	return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_add);
+
+static int __switchdev_handle_port_obj_del(struct net_device *dev,
+			struct switchdev_notifier_port_obj_info *port_obj_info,
+			bool (*check_cb)(const struct net_device *dev),
+			int (*del_cb)(struct net_device *dev,
+				      const struct switchdev_obj *obj))
+{
+	struct net_device *lower_dev;
+	struct list_head *iter;
+	int err = -EOPNOTSUPP;
+
+	if (check_cb(dev)) {
+		/* This flag is only checked if the return value is success. */
+		port_obj_info->handled = true;
+		return del_cb(dev, port_obj_info->obj);
+	}
+
+	/* Switch ports might be stacked under e.g. a LAG. Ignore the
+	 * unsupported devices, another driver might be able to handle them. But
+	 * propagate to the callers any hard errors.
+	 *
+	 * If the driver does its own bookkeeping of stacked ports, it's not
+	 * necessary to go through this helper.
+	 */
+	netdev_for_each_lower_dev(dev, lower_dev, iter) {
+		err = __switchdev_handle_port_obj_del(lower_dev, port_obj_info,
+						      check_cb, del_cb);
+		if (err && err != -EOPNOTSUPP)
+			return err;
+	}
+
+	return err;
+}
+
+int switchdev_handle_port_obj_del(struct net_device *dev,
+			struct switchdev_notifier_port_obj_info *port_obj_info,
+			bool (*check_cb)(const struct net_device *dev),
+			int (*del_cb)(struct net_device *dev,
+				      const struct switchdev_obj *obj))
+{
+	int err;
+
+	err = __switchdev_handle_port_obj_del(dev, port_obj_info, check_cb,
+					      del_cb);
+	if (err == -EOPNOTSUPP)
+		err = 0;
+	return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_del);
+
+static int __switchdev_handle_port_attr_set(struct net_device *dev,
+			struct switchdev_notifier_port_attr_info *port_attr_info,
+			bool (*check_cb)(const struct net_device *dev),
+			int (*set_cb)(struct net_device *dev,
+				      const struct switchdev_attr *attr,
+				      struct switchdev_trans *trans))
+{
+	struct net_device *lower_dev;
+	struct list_head *iter;
+	int err = -EOPNOTSUPP;
+
+	if (check_cb(dev)) {
+		port_attr_info->handled = true;
+		return set_cb(dev, port_attr_info->attr,
+			      port_attr_info->trans);
+	}
+
+	/* Switch ports might be stacked under e.g. a LAG. Ignore the
+	 * unsupported devices, another driver might be able to handle them. But
+	 * propagate to the callers any hard errors.
+	 *
+	 * If the driver does its own bookkeeping of stacked ports, it's not
+	 * necessary to go through this helper.
+	 */
+	netdev_for_each_lower_dev(dev, lower_dev, iter) {
+		err = __switchdev_handle_port_attr_set(lower_dev, port_attr_info,
+						       check_cb, set_cb);
+		if (err && err != -EOPNOTSUPP)
+			return err;
+	}
+
+	return err;
+}
+
+int switchdev_handle_port_attr_set(struct net_device *dev,
+			struct switchdev_notifier_port_attr_info *port_attr_info,
+			bool (*check_cb)(const struct net_device *dev),
+			int (*set_cb)(struct net_device *dev,
+				      const struct switchdev_attr *attr,
+				      struct switchdev_trans *trans))
+{
+	int err;
+
+	err = __switchdev_handle_port_attr_set(dev, port_attr_info, check_cb,
+					       set_cb);
+	if (err == -EOPNOTSUPP)
+		err = 0;
+	return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_handle_port_attr_set);

diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 9aed6fe..d14dab8 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* -*- linux-c -*-
  * sysctl_net.c: sysctl interface to net subsystem.
  *

diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index e450212..b83e16a 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # TIPC configuration
 #
@@ -16,7 +17,7 @@
 	  This protocol support is also available as a module ( = code which
 	  can be inserted in and removed from the running kernel whenever you
 	  want). The module will be called tipc. If you want to compile it
-	  as a module, say M here and read <file:Documentation/kbuild/modules.txt>.
+	  as a module, say M here and read <file:Documentation/kbuild/modules.rst>.
 
 	  If in doubt, say N.
 

diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index aca168f..c86aba0 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile

@@ -9,7 +9,9 @@
 	   core.o link.o discover.o msg.o  \
 	   name_distr.o  subscr.o monitor.o name_table.o net.o  \
 	   netlink.o netlink_compat.o node.o socket.o eth_media.o \
-	   topsrv.o socket.o group.o
+	   topsrv.o socket.o group.o trace.o
+
+CFLAGS_trace.o += -I$(src)
 
 tipc-$(CONFIG_TIPC_MEDIA_UDP)	+= udp_media.o
 tipc-$(CONFIG_TIPC_MEDIA_IB)	+= ib_media.o

diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index b88d48d..0f1eaed 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c

@@ -75,6 +75,7 @@
 		tipc_set_node_id(net, node_id);
 	}
 	tn->trial_addr = addr;
+	tn->addr_trial_end = jiffies;
 	pr_info("32-bit node address hash set to %x\n", addr);
 }
 

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index d802654..6ef1abd 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c

@@ -54,7 +54,9 @@
  * @dests: array keeping number of reachable destinations per bearer
  * @primary_bearer: a bearer having links to all broadcast destinations, if any
  * @bcast_support: indicates if primary bearer, if any, supports broadcast
+ * @force_bcast: forces broadcast for multicast traffic
  * @rcast_support: indicates if all peer nodes support replicast
+ * @force_rcast: forces replicast for multicast traffic
  * @rc_ratio: dest count as percentage of cluster size where send method changes
  * @bc_threshold: calculated from rc_ratio; if dests > threshold use broadcast
  */
@@ -64,7 +66,9 @@
 	int dests[MAX_BEARERS];
 	int primary_bearer;
 	bool bcast_support;
+	bool force_bcast;
 	bool rcast_support;
+	bool force_rcast;
 	int rc_ratio;
 	int bc_threshold;
 };
@@ -181,7 +185,7 @@
 	}
 
 	/* We have to transmit across all bearers */
-	skb_queue_head_init(&_xmitq);
+	__skb_queue_head_init(&_xmitq);
 	for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
 		if (!bb->dests[bearer_id])
 			continue;
@@ -216,9 +220,24 @@
 	}
 	/* Can current method be changed ? */
 	method->expires = jiffies + TIPC_METHOD_EXPIRE;
-	if (method->mandatory || time_before(jiffies, exp))
+	if (method->mandatory)
 		return;
 
+	if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL) &&
+	    time_before(jiffies, exp))
+		return;
+
+	/* Configuration as force 'broadcast' method */
+	if (bb->force_bcast) {
+		method->rcast = false;
+		return;
+	}
+	/* Configuration as force 'replicast' method */
+	if (bb->force_rcast) {
+		method->rcast = true;
+		return;
+	}
+	/* Configuration as 'autoselect' or default method */
 	/* Determine method to use now */
 	method->rcast = dests <= bb->bc_threshold;
 }
@@ -237,7 +256,7 @@
 	struct sk_buff_head xmitq;
 	int rc = 0;
 
-	skb_queue_head_init(&xmitq);
+	__skb_queue_head_init(&xmitq);
 	tipc_bcast_lock(net);
 	if (tipc_link_bc_peers(l))
 		rc = tipc_link_xmit(l, pkts, &xmitq);
@@ -267,7 +286,7 @@
 	u32 dnode, selector;
 
 	selector = msg_link_selector(buf_msg(skb_peek(pkts)));
-	skb_queue_head_init(&_pkts);
+	__skb_queue_head_init(&_pkts);
 
 	list_for_each_entry_safe(dst, tmp, &dests->list, list) {
 		dnode = dst->node;
@@ -281,6 +300,63 @@
 	return 0;
 }
 
+/* tipc_mcast_send_sync - deliver a dummy message with SYN bit
+ * @net: the applicable net namespace
+ * @skb: socket buffer to copy
+ * @method: send method to be used
+ * @dests: destination nodes for message.
+ * @cong_link_cnt: returns number of encountered congested destination links
+ * Returns 0 if success, otherwise errno
+ */
+static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb,
+				struct tipc_mc_method *method,
+				struct tipc_nlist *dests,
+				u16 *cong_link_cnt)
+{
+	struct tipc_msg *hdr, *_hdr;
+	struct sk_buff_head tmpq;
+	struct sk_buff *_skb;
+
+	/* Is a cluster supporting with new capabilities ? */
+	if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL))
+		return 0;
+
+	hdr = buf_msg(skb);
+	if (msg_user(hdr) == MSG_FRAGMENTER)
+		hdr = msg_inner_hdr(hdr);
+	if (msg_type(hdr) != TIPC_MCAST_MSG)
+		return 0;
+
+	/* Allocate dummy message */
+	_skb = tipc_buf_acquire(MCAST_H_SIZE, GFP_KERNEL);
+	if (!_skb)
+		return -ENOMEM;
+
+	/* Preparing for 'synching' header */
+	msg_set_syn(hdr, 1);
+
+	/* Copy skb's header into a dummy header */
+	skb_copy_to_linear_data(_skb, hdr, MCAST_H_SIZE);
+	skb_orphan(_skb);
+
+	/* Reverse method for dummy message */
+	_hdr = buf_msg(_skb);
+	msg_set_size(_hdr, MCAST_H_SIZE);
+	msg_set_is_rcast(_hdr, !msg_is_rcast(hdr));
+
+	__skb_queue_head_init(&tmpq);
+	__skb_queue_tail(&tmpq, _skb);
+	if (method->rcast)
+		tipc_bcast_xmit(net, &tmpq, cong_link_cnt);
+	else
+		tipc_rcast_xmit(net, &tmpq, dests, cong_link_cnt);
+
+	/* This queue should normally be empty by now */
+	__skb_queue_purge(&tmpq);
+
+	return 0;
+}
+
 /* tipc_mcast_xmit - deliver message to indicated destination nodes
  *                   and to identified node local sockets
  * @net: the applicable net namespace
@@ -296,10 +372,13 @@
 		    u16 *cong_link_cnt)
 {
 	struct sk_buff_head inputq, localq;
+	bool rcast = method->rcast;
+	struct tipc_msg *hdr;
+	struct sk_buff *skb;
 	int rc = 0;
 
 	skb_queue_head_init(&inputq);
-	skb_queue_head_init(&localq);
+	__skb_queue_head_init(&localq);
 
 	/* Clone packets before they are consumed by next call */
 	if (dests->local && !tipc_msg_reassemble(pkts, &localq)) {
@@ -309,14 +388,28 @@
 	/* Send according to determined transmit method */
 	if (dests->remote) {
 		tipc_bcast_select_xmit_method(net, dests->remote, method);
+
+		skb = skb_peek(pkts);
+		hdr = buf_msg(skb);
+		if (msg_user(hdr) == MSG_FRAGMENTER)
+			hdr = msg_inner_hdr(hdr);
+		msg_set_is_rcast(hdr, method->rcast);
+
+		/* Switch method ? */
+		if (rcast != method->rcast)
+			tipc_mcast_send_sync(net, skb, method,
+					     dests, cong_link_cnt);
+
 		if (method->rcast)
 			rc = tipc_rcast_xmit(net, pkts, dests, cong_link_cnt);
 		else
 			rc = tipc_bcast_xmit(net, pkts, cong_link_cnt);
 	}
 
-	if (dests->local)
+	if (dests->local) {
+		tipc_loopback_trace(net, &localq);
 		tipc_sk_mcast_rcv(net, &localq, &inputq);
+	}
 exit:
 	/* This queue should normally be empty by now */
 	__skb_queue_purge(pkts);
@@ -485,10 +578,63 @@
 	return 0;
 }
 
+static int tipc_bc_link_set_broadcast_mode(struct net *net, u32 bc_mode)
+{
+	struct tipc_bc_base *bb = tipc_bc_base(net);
+
+	switch (bc_mode) {
+	case BCLINK_MODE_BCAST:
+		if (!bb->bcast_support)
+			return -ENOPROTOOPT;
+
+		bb->force_bcast = true;
+		bb->force_rcast = false;
+		break;
+	case BCLINK_MODE_RCAST:
+		if (!bb->rcast_support)
+			return -ENOPROTOOPT;
+
+		bb->force_bcast = false;
+		bb->force_rcast = true;
+		break;
+	case BCLINK_MODE_SEL:
+		if (!bb->bcast_support || !bb->rcast_support)
+			return -ENOPROTOOPT;
+
+		bb->force_bcast = false;
+		bb->force_rcast = false;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int tipc_bc_link_set_broadcast_ratio(struct net *net, u32 bc_ratio)
+{
+	struct tipc_bc_base *bb = tipc_bc_base(net);
+
+	if (!bb->bcast_support || !bb->rcast_support)
+		return -ENOPROTOOPT;
+
+	if (bc_ratio > 100 || bc_ratio <= 0)
+		return -EINVAL;
+
+	bb->rc_ratio = bc_ratio;
+	tipc_bcast_lock(net);
+	tipc_bcbase_calc_bc_threshold(net);
+	tipc_bcast_unlock(net);
+
+	return 0;
+}
+
 int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[])
 {
 	int err;
 	u32 win;
+	u32 bc_mode;
+	u32 bc_ratio;
 	struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
 
 	if (!attrs[TIPC_NLA_LINK_PROP])
@@ -498,12 +644,28 @@
 	if (err)
 		return err;
 
-	if (!props[TIPC_NLA_PROP_WIN])
+	if (!props[TIPC_NLA_PROP_WIN] &&
+	    !props[TIPC_NLA_PROP_BROADCAST] &&
+	    !props[TIPC_NLA_PROP_BROADCAST_RATIO]) {
 		return -EOPNOTSUPP;
+	}
 
-	win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
+	if (props[TIPC_NLA_PROP_BROADCAST]) {
+		bc_mode = nla_get_u32(props[TIPC_NLA_PROP_BROADCAST]);
+		err = tipc_bc_link_set_broadcast_mode(net, bc_mode);
+	}
 
-	return tipc_bc_link_set_queue_limits(net, win);
+	if (!err && props[TIPC_NLA_PROP_BROADCAST_RATIO]) {
+		bc_ratio = nla_get_u32(props[TIPC_NLA_PROP_BROADCAST_RATIO]);
+		err = tipc_bc_link_set_broadcast_ratio(net, bc_ratio);
+	}
+
+	if (!err && props[TIPC_NLA_PROP_WIN]) {
+		win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
+		err = tipc_bc_link_set_queue_limits(net, win);
+	}
+
+	return err;
 }
 
 int tipc_bcast_init(struct net *net)
@@ -529,7 +691,7 @@
 		goto enomem;
 	bb->link = l;
 	tn->bcl = l;
-	bb->rc_ratio = 25;
+	bb->rc_ratio = 10;
 	bb->rcast_support = true;
 	return 0;
 enomem:
@@ -576,3 +738,108 @@
 	nl->remote = 0;
 	nl->local = false;
 }
+
+u32 tipc_bcast_get_broadcast_mode(struct net *net)
+{
+	struct tipc_bc_base *bb = tipc_bc_base(net);
+
+	if (bb->force_bcast)
+		return BCLINK_MODE_BCAST;
+
+	if (bb->force_rcast)
+		return BCLINK_MODE_RCAST;
+
+	if (bb->bcast_support && bb->rcast_support)
+		return BCLINK_MODE_SEL;
+
+	return 0;
+}
+
+u32 tipc_bcast_get_broadcast_ratio(struct net *net)
+{
+	struct tipc_bc_base *bb = tipc_bc_base(net);
+
+	return bb->rc_ratio;
+}
+
+void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq,
+			   struct sk_buff_head *inputq)
+{
+	struct sk_buff *skb, *_skb, *tmp;
+	struct tipc_msg *hdr, *_hdr;
+	bool match = false;
+	u32 node, port;
+
+	skb = skb_peek(inputq);
+	if (!skb)
+		return;
+
+	hdr = buf_msg(skb);
+
+	if (likely(!msg_is_syn(hdr) && skb_queue_empty(defq)))
+		return;
+
+	node = msg_orignode(hdr);
+	if (node == tipc_own_addr(net))
+		return;
+
+	port = msg_origport(hdr);
+
+	/* Has the twin SYN message already arrived ? */
+	skb_queue_walk(defq, _skb) {
+		_hdr = buf_msg(_skb);
+		if (msg_orignode(_hdr) != node)
+			continue;
+		if (msg_origport(_hdr) != port)
+			continue;
+		match = true;
+		break;
+	}
+
+	if (!match) {
+		if (!msg_is_syn(hdr))
+			return;
+		__skb_dequeue(inputq);
+		__skb_queue_tail(defq, skb);
+		return;
+	}
+
+	/* Deliver non-SYN message from other link, otherwise queue it */
+	if (!msg_is_syn(hdr)) {
+		if (msg_is_rcast(hdr) != msg_is_rcast(_hdr))
+			return;
+		__skb_dequeue(inputq);
+		__skb_queue_tail(defq, skb);
+		return;
+	}
+
+	/* Queue non-SYN/SYN message from same link */
+	if (msg_is_rcast(hdr) == msg_is_rcast(_hdr)) {
+		__skb_dequeue(inputq);
+		__skb_queue_tail(defq, skb);
+		return;
+	}
+
+	/* Matching SYN messages => return the one with data, if any */
+	__skb_unlink(_skb, defq);
+	if (msg_data_sz(hdr)) {
+		kfree_skb(_skb);
+	} else {
+		__skb_dequeue(inputq);
+		kfree_skb(skb);
+		__skb_queue_tail(inputq, _skb);
+	}
+
+	/* Deliver subsequent non-SYN messages from same peer */
+	skb_queue_walk_safe(defq, _skb, tmp) {
+		_hdr = buf_msg(_skb);
+		if (msg_orignode(_hdr) != node)
+			continue;
+		if (msg_origport(_hdr) != port)
+			continue;
+		if (msg_is_syn(_hdr))
+			break;
+		__skb_unlink(_skb, defq);
+		__skb_queue_tail(inputq, _skb);
+	}
+}

diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 751530a..dadad95 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h

@@ -48,6 +48,10 @@
 
 #define TIPC_METHOD_EXPIRE msecs_to_jiffies(5000)
 
+#define BCLINK_MODE_BCAST  0x1
+#define BCLINK_MODE_RCAST  0x2
+#define BCLINK_MODE_SEL    0x4
+
 struct tipc_nlist {
 	struct list_head list;
 	u32 self;
@@ -63,11 +67,13 @@
 /* Cookie to be used between socket and broadcast layer
  * @rcast: replicast (instead of broadcast) was used at previous xmit
  * @mandatory: broadcast/replicast indication was set by user
+ * @deferredq: defer queue to make message in order
  * @expires: re-evaluate non-mandatory transmit method if we are past this
  */
 struct tipc_mc_method {
 	bool rcast;
 	bool mandatory;
+	struct sk_buff_head deferredq;
 	unsigned long expires;
 };
 
@@ -92,6 +98,12 @@
 int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]);
 int tipc_bclink_reset_stats(struct net *net);
 
+u32 tipc_bcast_get_broadcast_mode(struct net *net);
+u32 tipc_bcast_get_broadcast_ratio(struct net *net);
+
+void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq,
+			   struct sk_buff_head *inputq);
+
 static inline void tipc_bcast_lock(struct net *net)
 {
 	spin_lock_bh(&tipc_net(net)->bclock);

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 645c160..0214aa1 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c

@@ -43,6 +43,7 @@
 #include "bcast.h"
 #include "netlink.h"
 #include "udp_media.h"
+#include "trace.h"
 
 #define MAX_ADDR_STR 60
 
@@ -61,7 +62,7 @@
 {
 	struct tipc_net *tn = tipc_net(net);
 
-	return rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
+	return rcu_dereference(tn->bearer_list[bearer_id]);
 }
 
 static void bearer_disable(struct net *net, struct tipc_bearer *b);
@@ -99,7 +100,7 @@
 /**
  * tipc_media_addr_printf - record media address in print buffer
  */
-void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a)
+int tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a)
 {
 	char addr_str[MAX_ADDR_STR];
 	struct tipc_media *m;
@@ -114,9 +115,10 @@
 
 		ret = scnprintf(buf, len, "UNKNOWN(%u)", a->media_id);
 		for (i = 0; i < sizeof(a->value); i++)
-			ret += scnprintf(buf - ret, len + ret,
-					    "-%02x", a->value[i]);
+			ret += scnprintf(buf + ret, len - ret,
+					    "-%x", a->value[i]);
 	}
+	return ret;
 }
 
 /**
@@ -208,7 +210,7 @@
 	struct tipc_bearer *b;
 
 	rcu_read_lock();
-	b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
+	b = rcu_dereference(tn->bearer_list[bearer_id]);
 	if (b)
 		tipc_disc_add_dest(b->disc);
 	rcu_read_unlock();
@@ -220,7 +222,7 @@
 	struct tipc_bearer *b;
 
 	rcu_read_lock();
-	b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
+	b = rcu_dereference(tn->bearer_list[bearer_id]);
 	if (b)
 		tipc_disc_remove_dest(b->disc);
 	rcu_read_unlock();
@@ -317,7 +319,6 @@
 	res = tipc_disc_create(net, b, &b->bcast_addr, &skb);
 	if (res) {
 		bearer_disable(net, b);
-		kfree(b);
 		errstr = "failed to create discoverer";
 		goto rejected;
 	}
@@ -388,6 +389,11 @@
 		dev_put(dev);
 		return -EINVAL;
 	}
+	if (dev == net->loopback_dev) {
+		dev_put(dev);
+		pr_info("Enabling <%s> not permitted\n", b->name);
+		return -EINVAL;
+	}
 
 	/* Autoconfigure own node identity if needed */
 	if (!tipc_own_id(net) && hwaddr_len <= NODE_ID_LEN) {
@@ -443,7 +449,7 @@
 	struct net_device *dev;
 	int delta;
 
-	dev = (struct net_device *)rcu_dereference_rtnl(b->media_ptr);
+	dev = (struct net_device *)rcu_dereference(b->media_ptr);
 	if (!dev)
 		return 0;
 
@@ -480,7 +486,7 @@
 	struct tipc_bearer *b;
 
 	rcu_read_lock();
-	b = rcu_dereference_rtnl(tipc_net(net)->bearer_list[bearer_id]);
+	b = rcu_dereference(tipc_net(net)->bearer_list[bearer_id]);
 	if (b)
 		mtu = b->mtu;
 	rcu_read_unlock();
@@ -573,11 +579,11 @@
 	struct tipc_bearer *b;
 
 	rcu_read_lock();
-	b = rcu_dereference_rtnl(dev->tipc_ptr) ?:
-		rcu_dereference_rtnl(orig_dev->tipc_ptr);
+	b = rcu_dereference(dev->tipc_ptr) ?:
+		rcu_dereference(orig_dev->tipc_ptr);
 	if (likely(b && test_bit(0, &b->up) &&
 		   (skb->pkt_type <= PACKET_MULTICAST))) {
-		skb->next = NULL;
+		skb_mark_not_on_list(skb);
 		tipc_rcv(dev_net(b->pt.dev), skb, b);
 		rcu_read_unlock();
 		return NET_RX_SUCCESS;
@@ -607,6 +613,7 @@
 	if (!b)
 		return NOTIFY_DONE;
 
+	trace_tipc_l2_device_event(dev, b, evt);
 	switch (evt) {
 	case NETDEV_CHANGE:
 		if (netif_carrier_ok(dev) && netif_oper_up(dev)) {
@@ -672,6 +679,65 @@
 	}
 }
 
+void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts)
+{
+	struct net_device *dev = net->loopback_dev;
+	struct sk_buff *skb, *_skb;
+	int exp;
+
+	skb_queue_walk(pkts, _skb) {
+		skb = pskb_copy(_skb, GFP_ATOMIC);
+		if (!skb)
+			continue;
+
+		exp = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb));
+		if (exp > 0 && pskb_expand_head(skb, exp, 0, GFP_ATOMIC)) {
+			kfree_skb(skb);
+			continue;
+		}
+
+		skb_reset_network_header(skb);
+		dev_hard_header(skb, dev, ETH_P_TIPC, dev->dev_addr,
+				dev->dev_addr, skb->len);
+		skb->dev = dev;
+		skb->pkt_type = PACKET_HOST;
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		skb->protocol = eth_type_trans(skb, dev);
+		netif_rx_ni(skb);
+	}
+}
+
+static int tipc_loopback_rcv_pkt(struct sk_buff *skb, struct net_device *dev,
+				 struct packet_type *pt, struct net_device *od)
+{
+	consume_skb(skb);
+	return NET_RX_SUCCESS;
+}
+
+int tipc_attach_loopback(struct net *net)
+{
+	struct net_device *dev = net->loopback_dev;
+	struct tipc_net *tn = tipc_net(net);
+
+	if (!dev)
+		return -ENODEV;
+
+	dev_hold(dev);
+	tn->loopback_pt.dev = dev;
+	tn->loopback_pt.type = htons(ETH_P_TIPC);
+	tn->loopback_pt.func = tipc_loopback_rcv_pkt;
+	dev_add_pack(&tn->loopback_pt);
+	return 0;
+}
+
+void tipc_detach_loopback(struct net *net)
+{
+	struct tipc_net *tn = tipc_net(net);
+
+	dev_remove_pack(&tn->loopback_pt);
+	dev_put(net->loopback_dev);
+}
+
 /* Caller should hold rtnl_lock to protect the bearer */
 static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg,
 				struct tipc_bearer *bearer, int nlflags)
@@ -685,14 +751,14 @@
 	if (!hdr)
 		return -EMSGSIZE;
 
-	attrs = nla_nest_start(msg->skb, TIPC_NLA_BEARER);
+	attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER);
 	if (!attrs)
 		goto msg_full;
 
 	if (nla_put_string(msg->skb, TIPC_NLA_BEARER_NAME, bearer->name))
 		goto attr_msg_full;
 
-	prop = nla_nest_start(msg->skb, TIPC_NLA_BEARER_PROP);
+	prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER_PROP);
 	if (!prop)
 		goto prop_msg_full;
 	if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, bearer->priority))
@@ -774,9 +840,9 @@
 	if (!info->attrs[TIPC_NLA_BEARER])
 		return -EINVAL;
 
-	err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
-			       info->attrs[TIPC_NLA_BEARER],
-			       tipc_nl_bearer_policy, info->extack);
+	err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX,
+					  info->attrs[TIPC_NLA_BEARER],
+					  tipc_nl_bearer_policy, info->extack);
 	if (err)
 		return err;
 
@@ -823,9 +889,9 @@
 	if (!info->attrs[TIPC_NLA_BEARER])
 		return -EINVAL;
 
-	err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
-			       info->attrs[TIPC_NLA_BEARER],
-			       tipc_nl_bearer_policy, info->extack);
+	err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX,
+					  info->attrs[TIPC_NLA_BEARER],
+					  tipc_nl_bearer_policy, info->extack);
 	if (err)
 		return err;
 
@@ -868,9 +934,9 @@
 	if (!info->attrs[TIPC_NLA_BEARER])
 		return -EINVAL;
 
-	err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
-			       info->attrs[TIPC_NLA_BEARER],
-			       tipc_nl_bearer_policy, info->extack);
+	err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX,
+					  info->attrs[TIPC_NLA_BEARER],
+					  tipc_nl_bearer_policy, info->extack);
 	if (err)
 		return err;
 
@@ -919,9 +985,9 @@
 	if (!info->attrs[TIPC_NLA_BEARER])
 		return -EINVAL;
 
-	err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
-			       info->attrs[TIPC_NLA_BEARER],
-			       tipc_nl_bearer_policy, info->extack);
+	err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX,
+					  info->attrs[TIPC_NLA_BEARER],
+					  tipc_nl_bearer_policy, info->extack);
 	if (err)
 		return err;
 
@@ -962,9 +1028,9 @@
 	if (!info->attrs[TIPC_NLA_BEARER])
 		return -EINVAL;
 
-	err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
-			       info->attrs[TIPC_NLA_BEARER],
-			       tipc_nl_bearer_policy, info->extack);
+	err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX,
+					  info->attrs[TIPC_NLA_BEARER],
+					  tipc_nl_bearer_policy, info->extack);
 	if (err)
 		return err;
 
@@ -1031,14 +1097,14 @@
 	if (!hdr)
 		return -EMSGSIZE;
 
-	attrs = nla_nest_start(msg->skb, TIPC_NLA_MEDIA);
+	attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MEDIA);
 	if (!attrs)
 		goto msg_full;
 
 	if (nla_put_string(msg->skb, TIPC_NLA_MEDIA_NAME, media->name))
 		goto attr_msg_full;
 
-	prop = nla_nest_start(msg->skb, TIPC_NLA_MEDIA_PROP);
+	prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_MEDIA_PROP);
 	if (!prop)
 		goto prop_msg_full;
 	if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, media->priority))
@@ -1105,9 +1171,9 @@
 	if (!info->attrs[TIPC_NLA_MEDIA])
 		return -EINVAL;
 
-	err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX,
-			       info->attrs[TIPC_NLA_MEDIA],
-			       tipc_nl_media_policy, info->extack);
+	err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MEDIA_MAX,
+					  info->attrs[TIPC_NLA_MEDIA],
+					  tipc_nl_media_policy, info->extack);
 	if (err)
 		return err;
 
@@ -1153,9 +1219,9 @@
 	if (!info->attrs[TIPC_NLA_MEDIA])
 		return -EINVAL;
 
-	err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX,
-			       info->attrs[TIPC_NLA_MEDIA],
-			       tipc_nl_media_policy, info->extack);
+	err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MEDIA_MAX,
+					  info->attrs[TIPC_NLA_MEDIA],
+					  tipc_nl_media_policy, info->extack);
 
 	if (!attrs[TIPC_NLA_MEDIA_NAME])
 		return -EINVAL;

diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 394290c..ea0f3c4 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h

@@ -207,7 +207,7 @@
 
 int tipc_media_set_priority(const char *name, u32 new_value);
 int tipc_media_set_window(const char *name, u32 new_value);
-void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a);
+int tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a);
 int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
 			 struct nlattr *attrs[]);
 void tipc_disable_l2_media(struct tipc_bearer *b);
@@ -232,6 +232,16 @@
 		      struct tipc_media_addr *dst);
 void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
 			 struct sk_buff_head *xmitq);
+void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts);
+int tipc_attach_loopback(struct net *net);
+void tipc_detach_loopback(struct net *net);
+
+static inline void tipc_loopback_trace(struct net *net,
+				       struct sk_buff_head *pkts)
+{
+	if (unlikely(dev_nit_active(net->loopback_dev)))
+		tipc_clone_to_loopback(net, pkts);
+}
 
 /* check if device MTU is too low for tipc headers */
 static inline bool tipc_mtu_bad(struct net_device *dev, unsigned int reserve)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 5b38f51..8f35060 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c

@@ -34,8 +34,6 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
 #include "core.h"
 #include "name_table.h"
 #include "subscr.h"
@@ -43,6 +41,7 @@
 #include "net.h"
 #include "socket.h"
 #include "bcast.h"
+#include "node.h"
 
 #include <linux/module.h>
 
@@ -59,6 +58,7 @@
 	tn->node_addr = 0;
 	tn->trial_addr = 0;
 	tn->addr_trial_end = 0;
+	tn->capabilities = TIPC_NODE_CAPABILITIES;
 	memset(tn->node_id, 0, sizeof(tn->node_id));
 	memset(tn->node_id_string, 0, sizeof(tn->node_id_string));
 	tn->mon_threshold = TIPC_DEF_MON_THRESHOLD;
@@ -75,19 +75,18 @@
 		goto out_nametbl;
 
 	INIT_LIST_HEAD(&tn->dist_queue);
-	err = tipc_topsrv_start(net);
-	if (err)
-		goto out_subscr;
 
 	err = tipc_bcast_init(net);
 	if (err)
 		goto out_bclink;
 
+	err = tipc_attach_loopback(net);
+	if (err)
+		goto out_bclink;
+
 	return 0;
 
 out_bclink:
-	tipc_bcast_stop(net);
-out_subscr:
 	tipc_nametbl_stop(net);
 out_nametbl:
 	tipc_sk_rht_destroy(net);
@@ -97,7 +96,7 @@
 
 static void __net_exit tipc_exit_net(struct net *net)
 {
-	tipc_topsrv_stop(net);
+	tipc_detach_loopback(net);
 	tipc_net_stop(net);
 	tipc_bcast_stop(net);
 	tipc_nametbl_stop(net);
@@ -111,6 +110,11 @@
 	.size = sizeof(struct tipc_net),
 };
 
+static struct pernet_operations tipc_topsrv_net_ops = {
+	.init = tipc_topsrv_init_net,
+	.exit = tipc_topsrv_exit_net,
+};
+
 static int __init tipc_init(void)
 {
 	int err;
@@ -129,18 +133,22 @@
 	if (err)
 		goto out_netlink_compat;
 
-	err = tipc_socket_init();
-	if (err)
-		goto out_socket;
-
 	err = tipc_register_sysctl();
 	if (err)
 		goto out_sysctl;
 
-	err = register_pernet_subsys(&tipc_net_ops);
+	err = register_pernet_device(&tipc_net_ops);
 	if (err)
 		goto out_pernet;
 
+	err = tipc_socket_init();
+	if (err)
+		goto out_socket;
+
+	err = register_pernet_device(&tipc_topsrv_net_ops);
+	if (err)
+		goto out_pernet_topsrv;
+
 	err = tipc_bearer_setup();
 	if (err)
 		goto out_bearer;
@@ -148,12 +156,14 @@
 	pr_info("Started in single node mode\n");
 	return 0;
 out_bearer:
-	unregister_pernet_subsys(&tipc_net_ops);
+	unregister_pernet_device(&tipc_topsrv_net_ops);
+out_pernet_topsrv:
+	tipc_socket_stop();
+out_socket:
+	unregister_pernet_device(&tipc_net_ops);
 out_pernet:
 	tipc_unregister_sysctl();
 out_sysctl:
-	tipc_socket_stop();
-out_socket:
 	tipc_netlink_compat_stop();
 out_netlink_compat:
 	tipc_netlink_stop();
@@ -165,10 +175,11 @@
 static void __exit tipc_exit(void)
 {
 	tipc_bearer_cleanup();
-	unregister_pernet_subsys(&tipc_net_ops);
+	unregister_pernet_device(&tipc_topsrv_net_ops);
+	tipc_socket_stop();
+	unregister_pernet_device(&tipc_net_ops);
 	tipc_netlink_stop();
 	tipc_netlink_compat_stop();
-	tipc_socket_stop();
 	tipc_unregister_sysctl();
 
 	pr_info("Deactivated\n");

diff --git a/net/tipc/core.h b/net/tipc/core.h
index 8020a6c..3042f65 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h

@@ -60,6 +60,12 @@
 #include <linux/rhashtable.h>
 #include <net/genetlink.h>
 
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 struct tipc_node;
 struct tipc_bearer;
 struct tipc_bc_base;
@@ -122,6 +128,12 @@
 	/* Topology subscription server */
 	struct tipc_topsrv *topsrv;
 	atomic_t subscription_count;
+
+	/* Cluster capabilities */
+	u16 capabilities;
+
+	/* Tracing of node internal messages */
+	struct packet_type loopback_pt;
 };
 
 static inline struct tipc_net *tipc_net(struct net *net)

diff --git a/net/tipc/group.c b/net/tipc/group.c
index 06fee14..89257e2 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c

@@ -199,7 +199,7 @@
 	struct tipc_member *m, *tmp;
 	struct sk_buff_head xmitq;
 
-	skb_queue_head_init(&xmitq);
+	__skb_queue_head_init(&xmitq);
 	rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) {
 		tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, &xmitq);
 		tipc_group_update_member(m, 0);
@@ -218,6 +218,7 @@
 
 	rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) {
 		tipc_group_proto_xmit(grp, m, GRP_LEAVE_MSG, &xmitq);
+		__skb_queue_purge(&m->deferredq);
 		list_del(&m->list);
 		kfree(m);
 	}
@@ -434,7 +435,7 @@
 		return true;
 	if (state == MBR_PENDING && adv == ADV_IDLE)
 		return true;
-	skb_queue_head_init(&xmitq);
+	__skb_queue_head_init(&xmitq);
 	tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, &xmitq);
 	tipc_node_distr_xmit(grp->net, &xmitq);
 	return true;
@@ -917,7 +918,10 @@
 
 int tipc_group_fill_sock_diag(struct tipc_group *grp, struct sk_buff *skb)
 {
-	struct nlattr *group = nla_nest_start(skb, TIPC_NLA_SOCK_GROUP);
+	struct nlattr *group = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_GROUP);
+
+	if (!group)
+		return -EMSGSIZE;
 
 	if (nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_ID,
 			grp->type) ||

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 836727e..999eab5 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c

@@ -43,6 +43,7 @@
 #include "discover.h"
 #include "netlink.h"
 #include "monitor.h"
+#include "trace.h"
 
 #include <linux/pkt_sched.h>
 
@@ -105,9 +106,6 @@
  * @transmitq: queue for sent, non-acked messages
  * @backlogq: queue for messages waiting to be sent
  * @snt_nxt: next sequence number to use for outbound messages
- * @last_retransmitted: sequence number of most recently retransmitted message
- * @stale_cnt: counter for number of identical retransmit attempts
- * @stale_limit: time when repeated identical retransmits must force link reset
  * @ackers: # of peers that needs to ack each packet before it can be released
  * @acked: # last packet acked by a certain peer. Used for broadcast.
  * @rcv_nxt: next sequence number to expect for inbound messages
@@ -150,6 +148,7 @@
 	/* Failover/synch */
 	u16 drop_point;
 	struct sk_buff *failover_reasm_skb;
+	struct sk_buff_head failover_deferdq;
 
 	/* Max packet negotiation */
 	u16 mtu;
@@ -161,12 +160,10 @@
 	struct {
 		u16 len;
 		u16 limit;
+		struct sk_buff *target_bskb;
 	} backlog[5];
 	u16 snd_nxt;
-	u16 last_retransm;
 	u16 window;
-	u16 stale_cnt;
-	unsigned long stale_limit;
 
 	/* Reception */
 	u16 rcv_nxt;
@@ -180,15 +177,13 @@
 
 	/* Fragmentation/reassembly */
 	struct sk_buff *reasm_buf;
+	struct sk_buff *reasm_tnlmsg;
 
 	/* Broadcast */
 	u16 ackers;
 	u16 acked;
 	struct tipc_link *bc_rcvlink;
 	struct tipc_link *bc_sndlink;
-	unsigned long prev_retr;
-	u16 prev_from;
-	u16 prev_to;
 	u8 nack_state;
 	bool bc_peer_is_up;
 
@@ -210,7 +205,8 @@
 	BC_NACK_SND_SUPPRESS,
 };
 
-#define TIPC_BC_RETR_LIMIT 10   /* [ms] */
+#define TIPC_BC_RETR_LIM  (jiffies + msecs_to_jiffies(10))
+#define TIPC_UC_RETR_TIME (jiffies + msecs_to_jiffies(1))
 
 /*
  * Interval between NACKs when packets arrive out of order
@@ -248,6 +244,10 @@
 static void tipc_link_build_bc_init_msg(struct tipc_link *l,
 					struct sk_buff_head *xmitq);
 static bool tipc_link_release_pkts(struct tipc_link *l, u16 to);
+static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data);
+static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap,
+				     struct tipc_gap_ack_blks *ga,
+				     struct sk_buff_head *xmitq);
 
 /*
  *  Simple non-static link routines (i.e. referenced outside this file)
@@ -359,9 +359,11 @@
 	rcv_l->bc_peer_is_up = true;
 	rcv_l->state = LINK_ESTABLISHED;
 	tipc_link_bc_ack_rcv(rcv_l, ack, xmitq);
+	trace_tipc_link_reset(rcv_l, TIPC_DUMP_ALL, "bclink removed!");
 	tipc_link_reset(rcv_l);
 	rcv_l->state = LINK_RESET;
 	if (!snd_l->ackers) {
+		trace_tipc_link_reset(snd_l, TIPC_DUMP_ALL, "zero ackers!");
 		tipc_link_reset(snd_l);
 		snd_l->state = LINK_RESET;
 		__skb_queue_purge(xmitq);
@@ -493,6 +495,7 @@
 	__skb_queue_head_init(&l->transmq);
 	__skb_queue_head_init(&l->backlogq);
 	__skb_queue_head_init(&l->deferdq);
+	__skb_queue_head_init(&l->failover_deferdq);
 	skb_queue_head_init(&l->wakeupq);
 	skb_queue_head_init(l->inputq);
 	return true;
@@ -525,6 +528,7 @@
 
 	l = *link;
 	strcpy(l->name, tipc_bclink_name);
+	trace_tipc_link_reset(l, TIPC_DUMP_ALL, "bclink created!");
 	tipc_link_reset(l);
 	l->state = LINK_RESET;
 	l->ackers = 0;
@@ -549,6 +553,7 @@
 int tipc_link_fsm_evt(struct tipc_link *l, int evt)
 {
 	int rc = 0;
+	int old_state = l->state;
 
 	switch (l->state) {
 	case LINK_RESETTING:
@@ -695,10 +700,12 @@
 	default:
 		pr_err("Unknown FSM state %x in %s\n", l->state, l->name);
 	}
+	trace_tipc_link_fsm(l->name, old_state, l->state, evt);
 	return rc;
 illegal_evt:
 	pr_err("Illegal FSM event %x in state %x on link %s\n",
 	       evt, l->state, l->name);
+	trace_tipc_link_fsm(l->name, old_state, l->state, evt);
 	return rc;
 }
 
@@ -723,7 +730,7 @@
 	if (msg_user(msg) == MSG_FRAGMENTER) {
 		if (msg_type(msg) != FIRST_FRAGMENT)
 			return;
-		length = msg_size(msg_get_wrapped(msg));
+		length = msg_size(msg_inner_hdr(msg));
 	}
 	l->stats.msg_lengths_total += length;
 	l->stats.msg_length_counts++;
@@ -743,6 +750,18 @@
 		l->stats.msg_length_profile[6]++;
 }
 
+/**
+ * tipc_link_too_silent - check if link is "too silent"
+ * @l: tipc link to be checked
+ *
+ * Returns true if the link 'silent_intv_cnt' is about to reach the
+ * 'abort_limit' value, otherwise false
+ */
+bool tipc_link_too_silent(struct tipc_link *l)
+{
+	return (l->silent_intv_cnt + 2 > l->abort_limit);
+}
+
 /* tipc_link_timeout - perform periodic task as instructed from node timeout
  */
 int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
@@ -756,6 +775,8 @@
 	u16 bc_acked = l->bc_rcvlink->acked;
 	struct tipc_mon_state *mstate = &l->mon_state;
 
+	trace_tipc_link_timeout(l, TIPC_DUMP_NONE, " ");
+	trace_tipc_link_too_silent(l, TIPC_DUMP_ALL, " ");
 	switch (l->state) {
 	case LINK_ESTABLISHED:
 	case LINK_SYNCHING:
@@ -818,6 +839,7 @@
 	TIPC_SKB_CB(skb)->chain_imp = msg_importance(hdr);
 	skb_queue_tail(&l->wakeupq, skb);
 	l->stats.link_congs++;
+	trace_tipc_link_conges(l, TIPC_DUMP_ALL, "wakeup scheduled!");
 	return -ELINKCONG;
 }
 
@@ -829,27 +851,43 @@
  */
 static void link_prepare_wakeup(struct tipc_link *l)
 {
+	struct sk_buff_head *wakeupq = &l->wakeupq;
+	struct sk_buff_head *inputq = l->inputq;
 	struct sk_buff *skb, *tmp;
-	int imp, i = 0;
+	struct sk_buff_head tmpq;
+	int avail[5] = {0,};
+	int imp = 0;
 
-	skb_queue_walk_safe(&l->wakeupq, skb, tmp) {
+	__skb_queue_head_init(&tmpq);
+
+	for (; imp <= TIPC_SYSTEM_IMPORTANCE; imp++)
+		avail[imp] = l->backlog[imp].limit - l->backlog[imp].len;
+
+	skb_queue_walk_safe(wakeupq, skb, tmp) {
 		imp = TIPC_SKB_CB(skb)->chain_imp;
-		if (l->backlog[imp].len < l->backlog[imp].limit) {
-			skb_unlink(skb, &l->wakeupq);
-			skb_queue_tail(l->inputq, skb);
-		} else if (i++ > 10) {
-			break;
-		}
+		if (avail[imp] <= 0)
+			continue;
+		avail[imp]--;
+		__skb_unlink(skb, wakeupq);
+		__skb_queue_tail(&tmpq, skb);
 	}
+
+	spin_lock_bh(&inputq->lock);
+	skb_queue_splice_tail(&tmpq, inputq);
+	spin_unlock_bh(&inputq->lock);
+
 }
 
 void tipc_link_reset(struct tipc_link *l)
 {
 	struct sk_buff_head list;
+	u32 imp;
 
 	__skb_queue_head_init(&list);
 
 	l->in_session = false;
+	/* Force re-synch of peer session number before establishing */
+	l->peer_session--;
 	l->session++;
 	l->mtu = l->advertised_mtu;
 
@@ -864,14 +902,16 @@
 	__skb_queue_purge(&l->transmq);
 	__skb_queue_purge(&l->deferdq);
 	__skb_queue_purge(&l->backlogq);
-	l->backlog[TIPC_LOW_IMPORTANCE].len = 0;
-	l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0;
-	l->backlog[TIPC_HIGH_IMPORTANCE].len = 0;
-	l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0;
-	l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0;
+	__skb_queue_purge(&l->failover_deferdq);
+	for (imp = 0; imp <= TIPC_SYSTEM_IMPORTANCE; imp++) {
+		l->backlog[imp].len = 0;
+		l->backlog[imp].target_bskb = NULL;
+	}
 	kfree_skb(l->reasm_buf);
+	kfree_skb(l->reasm_tnlmsg);
 	kfree_skb(l->failover_reasm_skb);
 	l->reasm_buf = NULL;
+	l->reasm_tnlmsg = NULL;
 	l->failover_reasm_skb = NULL;
 	l->rcv_unacked = 0;
 	l->snd_nxt = 1;
@@ -881,7 +921,6 @@
 	l->acked = 0;
 	l->silent_intv_cnt = 0;
 	l->rst_cnt = 0;
-	l->stale_cnt = 0;
 	l->bc_peer_is_up = false;
 	memset(&l->mon_state, 0, sizeof(l->mon_state));
 	tipc_link_reset_stats(l);
@@ -909,12 +948,15 @@
 	u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
 	struct sk_buff_head *transmq = &l->transmq;
 	struct sk_buff_head *backlogq = &l->backlogq;
-	struct sk_buff *skb, *_skb, *bskb;
+	struct sk_buff *skb, *_skb, **tskb;
 	int pkt_cnt = skb_queue_len(list);
 	int rc = 0;
 
 	if (unlikely(msg_size(hdr) > mtu)) {
-		skb_queue_purge(list);
+		pr_warn("Too large msg, purging xmit list %d %d %d %d %d!\n",
+			skb_queue_len(list), msg_user(hdr),
+			msg_type(hdr), msg_size(hdr), mtu);
+		__skb_queue_purge(list);
 		return -EMSGSIZE;
 	}
 
@@ -943,11 +985,14 @@
 		if (likely(skb_queue_len(transmq) < maxwin)) {
 			_skb = skb_clone(skb, GFP_ATOMIC);
 			if (!_skb) {
-				skb_queue_purge(list);
+				__skb_queue_purge(list);
 				return -ENOBUFS;
 			}
 			__skb_dequeue(list);
 			__skb_queue_tail(transmq, skb);
+			/* next retransmit attempt */
+			if (link_is_bc_sndlink(l))
+				TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
 			__skb_queue_tail(xmitq, _skb);
 			TIPC_SKB_CB(skb)->ackers = l->ackers;
 			l->rcv_unacked = 0;
@@ -955,19 +1000,21 @@
 			seqno++;
 			continue;
 		}
-		if (tipc_msg_bundle(skb_peek_tail(backlogq), hdr, mtu)) {
+		tskb = &l->backlog[imp].target_bskb;
+		if (tipc_msg_bundle(*tskb, hdr, mtu)) {
 			kfree_skb(__skb_dequeue(list));
 			l->stats.sent_bundled++;
 			continue;
 		}
-		if (tipc_msg_make_bundle(&bskb, hdr, mtu, l->addr)) {
+		if (tipc_msg_make_bundle(tskb, hdr, mtu, l->addr)) {
 			kfree_skb(__skb_dequeue(list));
-			__skb_queue_tail(backlogq, bskb);
-			l->backlog[msg_importance(buf_msg(bskb))].len++;
+			__skb_queue_tail(backlogq, *tskb);
+			l->backlog[imp].len++;
 			l->stats.sent_bundled++;
 			l->stats.sent_bundles++;
 			continue;
 		}
+		l->backlog[imp].target_bskb = NULL;
 		l->backlog[imp].len += skb_queue_len(list);
 		skb_queue_splice_tail_init(list, backlogq);
 	}
@@ -983,6 +1030,7 @@
 	u16 seqno = l->snd_nxt;
 	u16 ack = l->rcv_nxt - 1;
 	u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
+	u32 imp;
 
 	while (skb_queue_len(&l->transmq) < l->window) {
 		skb = skb_peek(&l->backlogq);
@@ -993,8 +1041,15 @@
 			break;
 		__skb_dequeue(&l->backlogq);
 		hdr = buf_msg(skb);
-		l->backlog[msg_importance(hdr)].len--;
+		imp = msg_importance(hdr);
+		l->backlog[imp].len--;
+		if (unlikely(skb == l->backlog[imp].target_bskb))
+			l->backlog[imp].target_bskb = NULL;
 		__skb_queue_tail(&l->transmq, skb);
+		/* next retransmit attempt */
+		if (link_is_bc_sndlink(l))
+			TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
+
 		__skb_queue_tail(xmitq, _skb);
 		TIPC_SKB_CB(skb)->ackers = l->ackers;
 		msg_set_seqno(hdr, seqno);
@@ -1007,47 +1062,84 @@
 	l->snd_nxt = seqno;
 }
 
-static void link_retransmit_failure(struct tipc_link *l, struct sk_buff *skb)
+/**
+ * link_retransmit_failure() - Detect repeated retransmit failures
+ * @l: tipc link sender
+ * @r: tipc link receiver (= l in case of unicast)
+ * @rc: returned code
+ *
+ * Return: true if the repeated retransmit failures happens, otherwise
+ * false
+ */
+static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r,
+				    int *rc)
 {
-	struct tipc_msg *hdr = buf_msg(skb);
+	struct sk_buff *skb = skb_peek(&l->transmq);
+	struct tipc_msg *hdr;
+
+	if (!skb)
+		return false;
+
+	if (!TIPC_SKB_CB(skb)->retr_cnt)
+		return false;
+
+	if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp +
+			msecs_to_jiffies(r->tolerance)))
+		return false;
+
+	hdr = buf_msg(skb);
+	if (link_is_bc_sndlink(l) && !less(r->acked, msg_seqno(hdr)))
+		return false;
 
 	pr_warn("Retransmission failure on link <%s>\n", l->name);
 	link_print(l, "State of link ");
 	pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
 		msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr));
-	pr_info("sqno %u, prev: %x, src: %x\n",
-		msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr));
+	pr_info("sqno %u, prev: %x, dest: %x\n",
+		msg_seqno(hdr), msg_prevnode(hdr), msg_destnode(hdr));
+	pr_info("retr_stamp %d, retr_cnt %d\n",
+		jiffies_to_msecs(TIPC_SKB_CB(skb)->retr_stamp),
+		TIPC_SKB_CB(skb)->retr_cnt);
+
+	trace_tipc_list_dump(&l->transmq, true, "retrans failure!");
+	trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!");
+	trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!");
+
+	if (link_is_bc_sndlink(l)) {
+		r->state = LINK_RESET;
+		*rc = TIPC_LINK_DOWN_EVT;
+	} else {
+		*rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
+	}
+
+	return true;
 }
 
-/* tipc_link_retrans() - retransmit one or more packets
+/* tipc_link_bc_retrans() - retransmit zero or more packets
  * @l: the link to transmit on
  * @r: the receiving link ordering the retransmit. Same as l if unicast
  * @from: retransmit from (inclusive) this sequence number
  * @to: retransmit to (inclusive) this sequence number
  * xmitq: queue for accumulating the retransmitted packets
  */
-static int tipc_link_retrans(struct tipc_link *l, struct tipc_link *r,
-			     u16 from, u16 to, struct sk_buff_head *xmitq)
+static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r,
+				u16 from, u16 to, struct sk_buff_head *xmitq)
 {
 	struct sk_buff *_skb, *skb = skb_peek(&l->transmq);
 	u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
 	u16 ack = l->rcv_nxt - 1;
 	struct tipc_msg *hdr;
+	int rc = 0;
 
 	if (!skb)
 		return 0;
+	if (less(to, from))
+		return 0;
 
-	/* Detect repeated retransmit failures on same packet */
-	if (r->last_retransm != buf_seqno(skb)) {
-		r->last_retransm = buf_seqno(skb);
-		r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance);
-		r->stale_cnt = 0;
-	} else if (++r->stale_cnt > 99 && time_after(jiffies, r->stale_limit)) {
-		link_retransmit_failure(l, skb);
-		if (link_is_bc_sndlink(l))
-			return TIPC_LINK_DOWN_EVT;
-		return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
-	}
+	trace_tipc_link_retrans(r, from, to, &l->transmq);
+
+	if (link_retransmit_failure(l, r, &rc))
+		return rc;
 
 	skb_queue_walk(&l->transmq, skb) {
 		hdr = buf_msg(skb);
@@ -1055,7 +1147,11 @@
 			continue;
 		if (more(msg_seqno(hdr), to))
 			break;
-		_skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC);
+
+		if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
+			continue;
+		TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
+		_skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE, GFP_ATOMIC);
 		if (!_skb)
 			return 0;
 		hdr = buf_msg(_skb);
@@ -1064,6 +1160,10 @@
 		_skb->priority = TC_PRIO_CONTROL;
 		__skb_queue_tail(xmitq, _skb);
 		l->stats.retransmitted++;
+
+		/* Increase actual retrans counter & mark first time */
+		if (!TIPC_SKB_CB(skb)->retr_cnt++)
+			TIPC_SKB_CB(skb)->retr_stamp = jiffies;
 	}
 	return 0;
 }
@@ -1088,7 +1188,7 @@
 			skb_queue_tail(mc_inputq, skb);
 			return true;
 		}
-		/* else: fall through */
+		/* fall through */
 	case CONN_MANAGER:
 		skb_queue_tail(inputq, skb);
 		return true;
@@ -1107,7 +1207,7 @@
 	default:
 		pr_warn("Dropping received illegal msg type\n");
 		kfree_skb(skb);
-		return false;
+		return true;
 	};
 }
 
@@ -1116,34 +1216,14 @@
  * Consumes buffer
  */
 static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb,
-			   struct sk_buff_head *inputq)
+			   struct sk_buff_head *inputq,
+			   struct sk_buff **reasm_skb)
 {
 	struct tipc_msg *hdr = buf_msg(skb);
-	struct sk_buff **reasm_skb = &l->reasm_buf;
 	struct sk_buff *iskb;
 	struct sk_buff_head tmpq;
 	int usr = msg_user(hdr);
-	int rc = 0;
 	int pos = 0;
-	int ipos = 0;
-
-	if (unlikely(usr == TUNNEL_PROTOCOL)) {
-		if (msg_type(hdr) == SYNCH_MSG) {
-			__skb_queue_purge(&l->deferdq);
-			goto drop;
-		}
-		if (!tipc_msg_extract(skb, &iskb, &ipos))
-			return rc;
-		kfree_skb(skb);
-		skb = iskb;
-		hdr = buf_msg(skb);
-		if (less(msg_seqno(hdr), l->drop_point))
-			goto drop;
-		if (tipc_data_input(l, skb, inputq))
-			return rc;
-		usr = msg_user(hdr);
-		reasm_skb = &l->failover_reasm_skb;
-	}
 
 	if (usr == MSG_BUNDLER) {
 		skb_queue_head_init(&tmpq);
@@ -1168,11 +1248,83 @@
 		tipc_link_bc_init_rcv(l->bc_rcvlink, hdr);
 		tipc_bcast_unlock(l->net);
 	}
-drop:
+
 	kfree_skb(skb);
 	return 0;
 }
 
+/* tipc_link_tnl_rcv() - receive TUNNEL_PROTOCOL message, drop or process the
+ *			 inner message along with the ones in the old link's
+ *			 deferdq
+ * @l: tunnel link
+ * @skb: TUNNEL_PROTOCOL message
+ * @inputq: queue to put messages ready for delivery
+ */
+static int tipc_link_tnl_rcv(struct tipc_link *l, struct sk_buff *skb,
+			     struct sk_buff_head *inputq)
+{
+	struct sk_buff **reasm_skb = &l->failover_reasm_skb;
+	struct sk_buff **reasm_tnlmsg = &l->reasm_tnlmsg;
+	struct sk_buff_head *fdefq = &l->failover_deferdq;
+	struct tipc_msg *hdr = buf_msg(skb);
+	struct sk_buff *iskb;
+	int ipos = 0;
+	int rc = 0;
+	u16 seqno;
+
+	if (msg_type(hdr) == SYNCH_MSG) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	/* Not a fragment? */
+	if (likely(!msg_nof_fragms(hdr))) {
+		if (unlikely(!tipc_msg_extract(skb, &iskb, &ipos))) {
+			pr_warn_ratelimited("Unable to extract msg, defq: %d\n",
+					    skb_queue_len(fdefq));
+			return 0;
+		}
+		kfree_skb(skb);
+	} else {
+		/* Set fragment type for buf_append */
+		if (msg_fragm_no(hdr) == 1)
+			msg_set_type(hdr, FIRST_FRAGMENT);
+		else if (msg_fragm_no(hdr) < msg_nof_fragms(hdr))
+			msg_set_type(hdr, FRAGMENT);
+		else
+			msg_set_type(hdr, LAST_FRAGMENT);
+
+		if (!tipc_buf_append(reasm_tnlmsg, &skb)) {
+			/* Successful but non-complete reassembly? */
+			if (*reasm_tnlmsg || link_is_bc_rcvlink(l))
+				return 0;
+			pr_warn_ratelimited("Unable to reassemble tunnel msg\n");
+			return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
+		}
+		iskb = skb;
+	}
+
+	do {
+		seqno = buf_seqno(iskb);
+		if (unlikely(less(seqno, l->drop_point))) {
+			kfree_skb(iskb);
+			continue;
+		}
+		if (unlikely(seqno != l->drop_point)) {
+			__tipc_skb_queue_sorted(fdefq, seqno, iskb);
+			continue;
+		}
+
+		l->drop_point++;
+		if (!tipc_data_input(l, iskb, inputq))
+			rc |= tipc_link_input(l, iskb, inputq, reasm_skb);
+		if (unlikely(rc))
+			break;
+	} while ((iskb = __tipc_skb_dequeue(fdefq, l->drop_point)));
+
+	return rc;
+}
+
 static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked)
 {
 	bool released = false;
@@ -1188,6 +1340,121 @@
 	return released;
 }
 
+/* tipc_build_gap_ack_blks - build Gap ACK blocks
+ * @l: tipc link that data have come with gaps in sequence if any
+ * @data: data buffer to store the Gap ACK blocks after built
+ *
+ * returns the actual allocated memory size
+ */
+static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data)
+{
+	struct sk_buff *skb = skb_peek(&l->deferdq);
+	struct tipc_gap_ack_blks *ga = data;
+	u16 len, expect, seqno = 0;
+	u8 n = 0;
+
+	if (!skb)
+		goto exit;
+
+	expect = buf_seqno(skb);
+	skb_queue_walk(&l->deferdq, skb) {
+		seqno = buf_seqno(skb);
+		if (unlikely(more(seqno, expect))) {
+			ga->gacks[n].ack = htons(expect - 1);
+			ga->gacks[n].gap = htons(seqno - expect);
+			if (++n >= MAX_GAP_ACK_BLKS) {
+				pr_info_ratelimited("Too few Gap ACK blocks!\n");
+				goto exit;
+			}
+		} else if (unlikely(less(seqno, expect))) {
+			pr_warn("Unexpected skb in deferdq!\n");
+			continue;
+		}
+		expect = seqno + 1;
+	}
+
+	/* last block */
+	ga->gacks[n].ack = htons(seqno);
+	ga->gacks[n].gap = 0;
+	n++;
+
+exit:
+	len = tipc_gap_ack_blks_sz(n);
+	ga->len = htons(len);
+	ga->gack_cnt = n;
+	return len;
+}
+
+/* tipc_link_advance_transmq - advance TIPC link transmq queue by releasing
+ *			       acked packets, also doing retransmissions if
+ *			       gaps found
+ * @l: tipc link with transmq queue to be advanced
+ * @acked: seqno of last packet acked by peer without any gaps before
+ * @gap: # of gap packets
+ * @ga: buffer pointer to Gap ACK blocks from peer
+ * @xmitq: queue for accumulating the retransmitted packets if any
+ *
+ * In case of a repeated retransmit failures, the call will return shortly
+ * with a returned code (e.g. TIPC_LINK_DOWN_EVT)
+ */
+static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap,
+				     struct tipc_gap_ack_blks *ga,
+				     struct sk_buff_head *xmitq)
+{
+	struct sk_buff *skb, *_skb, *tmp;
+	struct tipc_msg *hdr;
+	u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
+	u16 ack = l->rcv_nxt - 1;
+	bool passed = false;
+	u16 seqno, n = 0;
+	int rc = 0;
+
+	skb_queue_walk_safe(&l->transmq, skb, tmp) {
+		seqno = buf_seqno(skb);
+
+next_gap_ack:
+		if (less_eq(seqno, acked)) {
+			/* release skb */
+			__skb_unlink(skb, &l->transmq);
+			kfree_skb(skb);
+		} else if (less_eq(seqno, acked + gap)) {
+			/* First, check if repeated retrans failures occurs? */
+			if (!passed && link_retransmit_failure(l, l, &rc))
+				return rc;
+			passed = true;
+
+			/* retransmit skb if unrestricted*/
+			if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
+				continue;
+			TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME;
+			_skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE,
+					   GFP_ATOMIC);
+			if (!_skb)
+				continue;
+			hdr = buf_msg(_skb);
+			msg_set_ack(hdr, ack);
+			msg_set_bcast_ack(hdr, bc_ack);
+			_skb->priority = TC_PRIO_CONTROL;
+			__skb_queue_tail(xmitq, _skb);
+			l->stats.retransmitted++;
+
+			/* Increase actual retrans counter & mark first time */
+			if (!TIPC_SKB_CB(skb)->retr_cnt++)
+				TIPC_SKB_CB(skb)->retr_stamp = jiffies;
+		} else {
+			/* retry with Gap ACK blocks if any */
+			if (!ga || n >= ga->gack_cnt)
+				break;
+			acked = ntohs(ga->gacks[n].ack);
+			gap = ntohs(ga->gacks[n].gap);
+			n++;
+			goto next_gap_ack;
+		}
+	}
+
+	return 0;
+}
+
 /* tipc_link_build_state_msg: prepare link state message for transmission
  *
  * Note that sending of broadcast ack is coordinated among nodes, to reduce
@@ -1242,6 +1509,7 @@
 				    struct sk_buff_head *xmitq)
 {
 	u32 def_cnt = ++l->stats.deferred_recv;
+	u32 defq_len = skb_queue_len(&l->deferdq);
 	int match1, match2;
 
 	if (link_is_bc_rcvlink(l)) {
@@ -1252,7 +1520,7 @@
 		return 0;
 	}
 
-	if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV))
+	if (defq_len >= 3 && !((defq_len - 3) % 16))
 		tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq);
 	return 0;
 }
@@ -1266,29 +1534,29 @@
 		  struct sk_buff_head *xmitq)
 {
 	struct sk_buff_head *defq = &l->deferdq;
-	struct tipc_msg *hdr;
+	struct tipc_msg *hdr = buf_msg(skb);
 	u16 seqno, rcv_nxt, win_lim;
 	int rc = 0;
 
+	/* Verify and update link state */
+	if (unlikely(msg_user(hdr) == LINK_PROTOCOL))
+		return tipc_link_proto_rcv(l, skb, xmitq);
+
+	/* Don't send probe at next timeout expiration */
+	l->silent_intv_cnt = 0;
+
 	do {
 		hdr = buf_msg(skb);
 		seqno = msg_seqno(hdr);
 		rcv_nxt = l->rcv_nxt;
 		win_lim = rcv_nxt + TIPC_MAX_LINK_WIN;
 
-		/* Verify and update link state */
-		if (unlikely(msg_user(hdr) == LINK_PROTOCOL))
-			return tipc_link_proto_rcv(l, skb, xmitq);
-
 		if (unlikely(!link_is_up(l))) {
 			if (l->state == LINK_ESTABLISHING)
 				rc = TIPC_LINK_UP_EVT;
 			goto drop;
 		}
 
-		/* Don't send probe at next timeout expiration */
-		l->silent_intv_cnt = 0;
-
 		/* Drop if outside receive window */
 		if (unlikely(less(seqno, rcv_nxt) || more(seqno, win_lim))) {
 			l->stats.duplicates++;
@@ -1297,7 +1565,6 @@
 
 		/* Forward queues and wake up waiting users */
 		if (likely(tipc_link_release_pkts(l, msg_ack(hdr)))) {
-			l->stale_cnt = 0;
 			tipc_link_advance_backlog(l, xmitq);
 			if (unlikely(!skb_queue_empty(&l->wakeupq)))
 				link_prepare_wakeup(l);
@@ -1313,13 +1580,16 @@
 		/* Deliver packet */
 		l->rcv_nxt++;
 		l->stats.recv_pkts++;
-		if (!tipc_data_input(l, skb, l->inputq))
-			rc |= tipc_link_input(l, skb, l->inputq);
+
+		if (unlikely(msg_user(hdr) == TUNNEL_PROTOCOL))
+			rc |= tipc_link_tnl_rcv(l, skb, l->inputq);
+		else if (!tipc_data_input(l, skb, l->inputq))
+			rc |= tipc_link_input(l, skb, l->inputq, &l->reasm_buf);
 		if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN))
 			rc |= tipc_link_build_state_msg(l, xmitq);
 		if (unlikely(rc & ~TIPC_LINK_SND_STATE))
 			break;
-	} while ((skb = __skb_dequeue(defq)));
+	} while ((skb = __tipc_skb_dequeue(defq, l->rcv_nxt)));
 
 	return rc;
 drop:
@@ -1340,6 +1610,7 @@
 	struct tipc_mon_state *mstate = &l->mon_state;
 	int dlen = 0;
 	void *data;
+	u16 glen = 0;
 
 	/* Don't send protocol message during reset or link failover */
 	if (tipc_link_is_blocked(l))
@@ -1352,8 +1623,8 @@
 		rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt;
 
 	skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE,
-			      tipc_max_domain_size, l->addr,
-			      tipc_own_addr(l->net), 0, 0, 0);
+			      tipc_max_domain_size + MAX_GAP_ACK_BLKS_SZ,
+			      l->addr, tipc_own_addr(l->net), 0, 0, 0);
 	if (!skb)
 		return;
 
@@ -1380,13 +1651,19 @@
 		msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl));
 		msg_set_probe(hdr, probe);
 		msg_set_is_keepalive(hdr, probe || probe_reply);
-		tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id);
-		msg_set_size(hdr, INT_H_SIZE + dlen);
-		skb_trim(skb, INT_H_SIZE + dlen);
+		if (l->peer_caps & TIPC_GAP_ACK_BLOCK)
+			glen = tipc_build_gap_ack_blks(l, data);
+		tipc_mon_prep(l->net, data + glen, &dlen, mstate, l->bearer_id);
+		msg_set_size(hdr, INT_H_SIZE + glen + dlen);
+		skb_trim(skb, INT_H_SIZE + glen + dlen);
 		l->stats.sent_states++;
 		l->rcv_unacked = 0;
 	} else {
 		/* RESET_MSG or ACTIVATE_MSG */
+		if (mtyp == ACTIVATE_MSG) {
+			msg_set_dest_session_valid(hdr, 1);
+			msg_set_dest_session(hdr, l->peer_session);
+		}
 		msg_set_max_pkt(hdr, l->advertised_mtu);
 		strcpy(data, l->if_name);
 		msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME);
@@ -1398,6 +1675,7 @@
 		l->stats.sent_nacks++;
 	skb->priority = TC_PRIO_CONTROL;
 	__skb_queue_tail(xmitq, skb);
+	trace_tipc_proto_build(skb, false, l->name);
 }
 
 void tipc_link_create_dummy_tnl_msg(struct tipc_link *l,
@@ -1409,7 +1687,7 @@
 	struct sk_buff *skb;
 	u32 dnode = l->addr;
 
-	skb_queue_head_init(&tnlq);
+	__skb_queue_head_init(&tnlq);
 	skb = tipc_msg_create(TUNNEL_PROTOCOL, FAILOVER_MSG,
 			      INT_H_SIZE, BASIC_H_SIZE,
 			      dnode, onode, 0, 0, 0);
@@ -1436,17 +1714,22 @@
 void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
 			   int mtyp, struct sk_buff_head *xmitq)
 {
+	struct sk_buff_head *fdefq = &tnl->failover_deferdq;
 	struct sk_buff *skb, *tnlskb;
 	struct tipc_msg *hdr, tnlhdr;
 	struct sk_buff_head *queue = &l->transmq;
-	struct sk_buff_head tmpxq, tnlq;
+	struct sk_buff_head tmpxq, tnlq, frags;
 	u16 pktlen, pktcnt, seqno = l->snd_nxt;
+	bool pktcnt_need_update = false;
+	u16 syncpt;
+	int rc;
 
 	if (!tnl)
 		return;
 
-	skb_queue_head_init(&tnlq);
-	skb_queue_head_init(&tmpxq);
+	__skb_queue_head_init(&tnlq);
+	__skb_queue_head_init(&tmpxq);
+	__skb_queue_head_init(&frags);
 
 	/* At least one packet required for safe algorithm => add dummy */
 	skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG,
@@ -1456,14 +1739,43 @@
 		pr_warn("%sunable to create tunnel packet\n", link_co_err);
 		return;
 	}
-	skb_queue_tail(&tnlq, skb);
+	__skb_queue_tail(&tnlq, skb);
 	tipc_link_xmit(l, &tnlq, &tmpxq);
 	__skb_queue_purge(&tmpxq);
 
+	/* Link Synching:
+	 * From now on, send only one single ("dummy") SYNCH message
+	 * to peer. The SYNCH message does not contain any data, just
+	 * a header conveying the synch point to the peer.
+	 */
+	if (mtyp == SYNCH_MSG && (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) {
+		tnlskb = tipc_msg_create(TUNNEL_PROTOCOL, SYNCH_MSG,
+					 INT_H_SIZE, 0, l->addr,
+					 tipc_own_addr(l->net),
+					 0, 0, 0);
+		if (!tnlskb) {
+			pr_warn("%sunable to create dummy SYNCH_MSG\n",
+				link_co_err);
+			return;
+		}
+
+		hdr = buf_msg(tnlskb);
+		syncpt = l->snd_nxt + skb_queue_len(&l->backlogq) - 1;
+		msg_set_syncpt(hdr, syncpt);
+		msg_set_bearer_id(hdr, l->peer_bearer_id);
+		__skb_queue_tail(&tnlq, tnlskb);
+		tipc_link_xmit(tnl, &tnlq, xmitq);
+		return;
+	}
+
 	/* Initialize reusable tunnel packet header */
 	tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL,
 		      mtyp, INT_H_SIZE, l->addr);
-	pktcnt = skb_queue_len(&l->transmq) + skb_queue_len(&l->backlogq);
+	if (mtyp == SYNCH_MSG)
+		pktcnt = l->snd_nxt - buf_seqno(skb_peek(&l->transmq));
+	else
+		pktcnt = skb_queue_len(&l->transmq);
+	pktcnt += skb_queue_len(&l->backlogq);
 	msg_set_msgcnt(&tnlhdr, pktcnt);
 	msg_set_bearer_id(&tnlhdr, l->peer_bearer_id);
 tnl:
@@ -1473,6 +1785,39 @@
 		if (queue == &l->backlogq)
 			msg_set_seqno(hdr, seqno++);
 		pktlen = msg_size(hdr);
+
+		/* Tunnel link MTU is not large enough? This could be
+		 * due to:
+		 * 1) Link MTU has just changed or set differently;
+		 * 2) Or FAILOVER on the top of a SYNCH message
+		 *
+		 * The 2nd case should not happen if peer supports
+		 * TIPC_TUNNEL_ENHANCED
+		 */
+		if (pktlen > tnl->mtu - INT_H_SIZE) {
+			if (mtyp == FAILOVER_MSG &&
+			    (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) {
+				rc = tipc_msg_fragment(skb, &tnlhdr, tnl->mtu,
+						       &frags);
+				if (rc) {
+					pr_warn("%sunable to frag msg: rc %d\n",
+						link_co_err, rc);
+					return;
+				}
+				pktcnt += skb_queue_len(&frags) - 1;
+				pktcnt_need_update = true;
+				skb_queue_splice_tail_init(&frags, &tnlq);
+				continue;
+			}
+			/* Unluckily, peer doesn't have TIPC_TUNNEL_ENHANCED
+			 * => Just warn it and return!
+			 */
+			pr_warn_ratelimited("%stoo large msg <%d, %d>: %d!\n",
+					    link_co_err, msg_user(hdr),
+					    msg_type(hdr), msg_size(hdr));
+			return;
+		}
+
 		msg_set_size(&tnlhdr, pktlen + INT_H_SIZE);
 		tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE, GFP_ATOMIC);
 		if (!tnlskb) {
@@ -1488,12 +1833,60 @@
 		goto tnl;
 	}
 
+	if (pktcnt_need_update)
+		skb_queue_walk(&tnlq, skb) {
+			hdr = buf_msg(skb);
+			msg_set_msgcnt(hdr, pktcnt);
+		}
+
 	tipc_link_xmit(tnl, &tnlq, xmitq);
 
 	if (mtyp == FAILOVER_MSG) {
 		tnl->drop_point = l->rcv_nxt;
 		tnl->failover_reasm_skb = l->reasm_buf;
 		l->reasm_buf = NULL;
+
+		/* Failover the link's deferdq */
+		if (unlikely(!skb_queue_empty(fdefq))) {
+			pr_warn("Link failover deferdq not empty: %d!\n",
+				skb_queue_len(fdefq));
+			__skb_queue_purge(fdefq);
+		}
+		skb_queue_splice_init(&l->deferdq, fdefq);
+	}
+}
+
+/**
+ * tipc_link_failover_prepare() - prepare tnl for link failover
+ *
+ * This is a special version of the precursor - tipc_link_tnl_prepare(),
+ * see the tipc_node_link_failover() for details
+ *
+ * @l: failover link
+ * @tnl: tunnel link
+ * @xmitq: queue for messages to be xmited
+ */
+void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl,
+				struct sk_buff_head *xmitq)
+{
+	struct sk_buff_head *fdefq = &tnl->failover_deferdq;
+
+	tipc_link_create_dummy_tnl_msg(tnl, xmitq);
+
+	/* This failover link enpoint was never established before,
+	 * so it has not received anything from peer.
+	 * Otherwise, it must be a normal failover situation or the
+	 * node has entered SELF_DOWN_PEER_LEAVING and both peer nodes
+	 * would have to start over from scratch instead.
+	 */
+	tnl->drop_point = 1;
+	tnl->failover_reasm_skb = NULL;
+
+	/* Initiate the link's failover deferdq */
+	if (unlikely(!skb_queue_empty(fdefq))) {
+		pr_warn("Link failover deferdq not empty: %d!\n",
+			skb_queue_len(fdefq));
+		__skb_queue_purge(fdefq);
 	}
 }
 
@@ -1547,6 +1940,7 @@
 			       struct sk_buff_head *xmitq)
 {
 	struct tipc_msg *hdr = buf_msg(skb);
+	struct tipc_gap_ack_blks *ga = NULL;
 	u16 rcvgap = 0;
 	u16 ack = msg_ack(hdr);
 	u16 gap = msg_seq_gap(hdr);
@@ -1557,10 +1951,12 @@
 	u16 dlen = msg_data_sz(hdr);
 	int mtyp = msg_type(hdr);
 	bool reply = msg_probe(hdr);
+	u16 glen = 0;
 	void *data;
 	char *if_name;
 	int rc = 0;
 
+	trace_tipc_proto_rcv(skb, false, l->name);
 	if (tipc_link_is_blocked(l) || !xmitq)
 		goto exit;
 
@@ -1571,8 +1967,11 @@
 	hdr = buf_msg(skb);
 	data = msg_data(hdr);
 
-	if (!tipc_link_validate_msg(l, hdr))
+	if (!tipc_link_validate_msg(l, hdr)) {
+		trace_tipc_skb_dump(skb, false, "PROTO invalid (1)!");
+		trace_tipc_link_dump(l, TIPC_DUMP_NONE, "PROTO invalid (1)!");
 		goto exit;
+	}
 
 	switch (mtyp) {
 	case RESET_MSG:
@@ -1599,6 +1998,17 @@
 			rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
 			break;
 		}
+
+		/* If this endpoint was re-created while peer was ESTABLISHING
+		 * it doesn't know current session number. Force re-synch.
+		 */
+		if (mtyp == ACTIVATE_MSG && msg_dest_session_valid(hdr) &&
+		    l->session != msg_dest_session(hdr)) {
+			if (less(l->session, msg_dest_session(hdr)))
+				l->session = msg_dest_session(hdr) + 1;
+			break;
+		}
+
 		/* ACTIVATE_MSG serves as PEER_RESET if link is already down */
 		if (mtyp == RESET_MSG || !link_is_up(l))
 			rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT);
@@ -1639,7 +2049,17 @@
 				rc = TIPC_LINK_UP_EVT;
 			break;
 		}
-		tipc_mon_rcv(l->net, data, dlen, l->addr,
+
+		/* Receive Gap ACK blocks from peer if any */
+		if (l->peer_caps & TIPC_GAP_ACK_BLOCK) {
+			ga = (struct tipc_gap_ack_blks *)data;
+			glen = ntohs(ga->len);
+			/* sanity check: if failed, ignore Gap ACK blocks */
+			if (glen != tipc_gap_ack_blks_sz(ga->gack_cnt))
+				ga = NULL;
+		}
+
+		tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr,
 			     &l->mon_state, l->bearer_id);
 
 		/* Send NACK if peer has sent pkts we haven't received yet */
@@ -1648,13 +2068,12 @@
 		if (rcvgap || reply)
 			tipc_link_build_proto_msg(l, STATE_MSG, 0, reply,
 						  rcvgap, 0, 0, xmitq);
-		tipc_link_release_pkts(l, ack);
+
+		rc |= tipc_link_advance_transmq(l, ack, gap, ga, xmitq);
 
 		/* If NACK, retransmit will now start at right position */
-		if (gap) {
-			rc = tipc_link_retrans(l, l, ack + 1, ack + gap, xmitq);
+		if (gap)
 			l->stats.recv_nacks++;
-		}
 
 		tipc_link_advance_backlog(l, xmitq);
 		if (unlikely(!skb_queue_empty(&l->wakeupq)))
@@ -1737,42 +2156,6 @@
 		l->rcv_nxt = peers_snd_nxt;
 }
 
-/* link_bc_retr eval()- check if the indicated range can be retransmitted now
- * - Adjust permitted range if there is overlap with previous retransmission
- */
-static bool link_bc_retr_eval(struct tipc_link *l, u16 *from, u16 *to)
-{
-	unsigned long elapsed = jiffies_to_msecs(jiffies - l->prev_retr);
-
-	if (less(*to, *from))
-		return false;
-
-	/* New retransmission request */
-	if ((elapsed > TIPC_BC_RETR_LIMIT) ||
-	    less(*to, l->prev_from) || more(*from, l->prev_to)) {
-		l->prev_from = *from;
-		l->prev_to = *to;
-		l->prev_retr = jiffies;
-		return true;
-	}
-
-	/* Inside range of previous retransmit */
-	if (!less(*from, l->prev_from) && !more(*to, l->prev_to))
-		return false;
-
-	/* Fully or partially outside previous range => exclude overlap */
-	if (less(*from, l->prev_from)) {
-		*to = l->prev_from - 1;
-		l->prev_from = *from;
-	}
-	if (more(*to, l->prev_to)) {
-		*from = l->prev_to + 1;
-		l->prev_to = *to;
-	}
-	l->prev_retr = jiffies;
-	return true;
-}
-
 /* tipc_link_bc_sync_rcv - update rcv link according to peer's send state
  */
 int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
@@ -1803,8 +2186,7 @@
 	if (more(peers_snd_nxt, l->rcv_nxt + l->window))
 		return rc;
 
-	if (link_bc_retr_eval(snd_l, &from, &to))
-		rc = tipc_link_retrans(snd_l, l, from, to, xmitq);
+	rc = tipc_link_bc_retrans(snd_l, l, from, to, xmitq);
 
 	l->snd_nxt = peers_snd_nxt;
 	if (link_bc_rcv_gap(l))
@@ -1852,6 +2234,7 @@
 	if (!more(acked, l->acked))
 		return;
 
+	trace_tipc_link_bc_ack(l, l->acked, acked, &snd_l->transmq);
 	/* Skip over packets peer has already acked */
 	skb_queue_walk(&snd_l->transmq, skb) {
 		if (more(buf_seqno(skb), l->acked))
@@ -1899,7 +2282,7 @@
 
 	if (dnode == tipc_own_addr(l->net)) {
 		tipc_link_bc_ack_rcv(l, acked, xmitq);
-		rc = tipc_link_retrans(l->bc_sndlink, l, from, to, xmitq);
+		rc = tipc_link_bc_retrans(l->bc_sndlink, l, from, to, xmitq);
 		l->stats.recv_nacks++;
 		return rc;
 	}
@@ -1950,8 +2333,8 @@
 {
 	int err;
 
-	err = nla_parse_nested(props, TIPC_NLA_PROP_MAX, prop,
-			       tipc_nl_prop_policy, NULL);
+	err = nla_parse_nested_deprecated(props, TIPC_NLA_PROP_MAX, prop,
+					  tipc_nl_prop_policy, NULL);
 	if (err)
 		return err;
 
@@ -2030,7 +2413,7 @@
 			(s->accu_queue_sz / s->queue_sz_counts) : 0}
 	};
 
-	stats = nla_nest_start(skb, TIPC_NLA_LINK_STATS);
+	stats = nla_nest_start_noflag(skb, TIPC_NLA_LINK_STATS);
 	if (!stats)
 		return -EMSGSIZE;
 
@@ -2062,7 +2445,7 @@
 	if (!hdr)
 		return -EMSGSIZE;
 
-	attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK);
+	attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK);
 	if (!attrs)
 		goto msg_full;
 
@@ -2084,7 +2467,7 @@
 		if (nla_put_flag(msg->skb, TIPC_NLA_LINK_ACTIVE))
 			goto attr_msg_full;
 
-	prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP);
+	prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK_PROP);
 	if (!prop)
 		goto attr_msg_full;
 	if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority))
@@ -2151,7 +2534,7 @@
 			(stats->accu_queue_sz / stats->queue_sz_counts) : 0}
 	};
 
-	nest = nla_nest_start(skb, TIPC_NLA_LINK_STATS);
+	nest = nla_nest_start_noflag(skb, TIPC_NLA_LINK_STATS);
 	if (!nest)
 		return -EMSGSIZE;
 
@@ -2175,6 +2558,8 @@
 	struct nlattr *attrs;
 	struct nlattr *prop;
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	u32 bc_mode = tipc_bcast_get_broadcast_mode(net);
+	u32 bc_ratio = tipc_bcast_get_broadcast_ratio(net);
 	struct tipc_link *bcl = tn->bcl;
 
 	if (!bcl)
@@ -2189,7 +2574,7 @@
 		return -EMSGSIZE;
 	}
 
-	attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK);
+	attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK);
 	if (!attrs)
 		goto msg_full;
 
@@ -2206,11 +2591,17 @@
 	if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, 0))
 		goto attr_msg_full;
 
-	prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP);
+	prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK_PROP);
 	if (!prop)
 		goto attr_msg_full;
 	if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window))
 		goto prop_msg_full;
+	if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST, bc_mode))
+		goto prop_msg_full;
+	if (bc_mode & BCLINK_MODE_SEL)
+		if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST_RATIO,
+				bc_ratio))
+			goto prop_msg_full;
 	nla_nest_end(msg->skb, prop);
 
 	err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats);
@@ -2255,3 +2646,122 @@
 {
 	l->abort_limit = limit;
 }
+
+char *tipc_link_name_ext(struct tipc_link *l, char *buf)
+{
+	if (!l)
+		scnprintf(buf, TIPC_MAX_LINK_NAME, "null");
+	else if (link_is_bc_sndlink(l))
+		scnprintf(buf, TIPC_MAX_LINK_NAME, "broadcast-sender");
+	else if (link_is_bc_rcvlink(l))
+		scnprintf(buf, TIPC_MAX_LINK_NAME,
+			  "broadcast-receiver, peer %x", l->addr);
+	else
+		memcpy(buf, l->name, TIPC_MAX_LINK_NAME);
+
+	return buf;
+}
+
+/**
+ * tipc_link_dump - dump TIPC link data
+ * @l: tipc link to be dumped
+ * @dqueues: bitmask to decide if any link queue to be dumped?
+ *           - TIPC_DUMP_NONE: don't dump link queues
+ *           - TIPC_DUMP_TRANSMQ: dump link transmq queue
+ *           - TIPC_DUMP_BACKLOGQ: dump link backlog queue
+ *           - TIPC_DUMP_DEFERDQ: dump link deferd queue
+ *           - TIPC_DUMP_INPUTQ: dump link input queue
+ *           - TIPC_DUMP_WAKEUP: dump link wakeup queue
+ *           - TIPC_DUMP_ALL: dump all the link queues above
+ * @buf: returned buffer of dump data in format
+ */
+int tipc_link_dump(struct tipc_link *l, u16 dqueues, char *buf)
+{
+	int i = 0;
+	size_t sz = (dqueues) ? LINK_LMAX : LINK_LMIN;
+	struct sk_buff_head *list;
+	struct sk_buff *hskb, *tskb;
+	u32 len;
+
+	if (!l) {
+		i += scnprintf(buf, sz, "link data: (null)\n");
+		return i;
+	}
+
+	i += scnprintf(buf, sz, "link data: %x", l->addr);
+	i += scnprintf(buf + i, sz - i, " %x", l->state);
+	i += scnprintf(buf + i, sz - i, " %u", l->in_session);
+	i += scnprintf(buf + i, sz - i, " %u", l->session);
+	i += scnprintf(buf + i, sz - i, " %u", l->peer_session);
+	i += scnprintf(buf + i, sz - i, " %u", l->snd_nxt);
+	i += scnprintf(buf + i, sz - i, " %u", l->rcv_nxt);
+	i += scnprintf(buf + i, sz - i, " %u", l->snd_nxt_state);
+	i += scnprintf(buf + i, sz - i, " %u", l->rcv_nxt_state);
+	i += scnprintf(buf + i, sz - i, " %x", l->peer_caps);
+	i += scnprintf(buf + i, sz - i, " %u", l->silent_intv_cnt);
+	i += scnprintf(buf + i, sz - i, " %u", l->rst_cnt);
+	i += scnprintf(buf + i, sz - i, " %u", 0);
+	i += scnprintf(buf + i, sz - i, " %u", 0);
+	i += scnprintf(buf + i, sz - i, " %u", l->acked);
+
+	list = &l->transmq;
+	len = skb_queue_len(list);
+	hskb = skb_peek(list);
+	tskb = skb_peek_tail(list);
+	i += scnprintf(buf + i, sz - i, " | %u %u %u", len,
+		       (hskb) ? msg_seqno(buf_msg(hskb)) : 0,
+		       (tskb) ? msg_seqno(buf_msg(tskb)) : 0);
+
+	list = &l->deferdq;
+	len = skb_queue_len(list);
+	hskb = skb_peek(list);
+	tskb = skb_peek_tail(list);
+	i += scnprintf(buf + i, sz - i, " | %u %u %u", len,
+		       (hskb) ? msg_seqno(buf_msg(hskb)) : 0,
+		       (tskb) ? msg_seqno(buf_msg(tskb)) : 0);
+
+	list = &l->backlogq;
+	len = skb_queue_len(list);
+	hskb = skb_peek(list);
+	tskb = skb_peek_tail(list);
+	i += scnprintf(buf + i, sz - i, " | %u %u %u", len,
+		       (hskb) ? msg_seqno(buf_msg(hskb)) : 0,
+		       (tskb) ? msg_seqno(buf_msg(tskb)) : 0);
+
+	list = l->inputq;
+	len = skb_queue_len(list);
+	hskb = skb_peek(list);
+	tskb = skb_peek_tail(list);
+	i += scnprintf(buf + i, sz - i, " | %u %u %u\n", len,
+		       (hskb) ? msg_seqno(buf_msg(hskb)) : 0,
+		       (tskb) ? msg_seqno(buf_msg(tskb)) : 0);
+
+	if (dqueues & TIPC_DUMP_TRANSMQ) {
+		i += scnprintf(buf + i, sz - i, "transmq: ");
+		i += tipc_list_dump(&l->transmq, false, buf + i);
+	}
+	if (dqueues & TIPC_DUMP_BACKLOGQ) {
+		i += scnprintf(buf + i, sz - i,
+			       "backlogq: <%u %u %u %u %u>, ",
+			       l->backlog[TIPC_LOW_IMPORTANCE].len,
+			       l->backlog[TIPC_MEDIUM_IMPORTANCE].len,
+			       l->backlog[TIPC_HIGH_IMPORTANCE].len,
+			       l->backlog[TIPC_CRITICAL_IMPORTANCE].len,
+			       l->backlog[TIPC_SYSTEM_IMPORTANCE].len);
+		i += tipc_list_dump(&l->backlogq, false, buf + i);
+	}
+	if (dqueues & TIPC_DUMP_DEFERDQ) {
+		i += scnprintf(buf + i, sz - i, "deferdq: ");
+		i += tipc_list_dump(&l->deferdq, false, buf + i);
+	}
+	if (dqueues & TIPC_DUMP_INPUTQ) {
+		i += scnprintf(buf + i, sz - i, "inputq: ");
+		i += tipc_list_dump(l->inputq, false, buf + i);
+	}
+	if (dqueues & TIPC_DUMP_WAKEUP) {
+		i += scnprintf(buf + i, sz - i, "wakeup: ");
+		i += tipc_list_dump(&l->wakeupq, false, buf + i);
+	}
+
+	return i;
+}

diff --git a/net/tipc/link.h b/net/tipc/link.h
index 90488c5..adcad65 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h

@@ -90,6 +90,8 @@
 			   int mtyp, struct sk_buff_head *xmitq);
 void tipc_link_create_dummy_tnl_msg(struct tipc_link *tnl,
 				    struct sk_buff_head *xmitq);
+void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl,
+				struct sk_buff_head *xmitq);
 void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq);
 int tipc_link_fsm_evt(struct tipc_link *l, int evt);
 bool tipc_link_is_up(struct tipc_link *l);
@@ -109,6 +111,7 @@
 u16 tipc_link_acked(struct tipc_link *l);
 u32 tipc_link_id(struct tipc_link *l);
 char *tipc_link_name(struct tipc_link *l);
+char *tipc_link_name_ext(struct tipc_link *l, char *buf);
 u32 tipc_link_state(struct tipc_link *l);
 char tipc_link_plane(struct tipc_link *l);
 int tipc_link_prio(struct tipc_link *l);
@@ -147,4 +150,5 @@
 			  struct sk_buff_head *xmitq);
 int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
 			  struct sk_buff_head *xmitq);
+bool tipc_link_too_silent(struct tipc_link *l);
 #endif

diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 67f6938..6a6eae8 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c

@@ -696,7 +696,7 @@
 	if (!hdr)
 		return -EMSGSIZE;
 
-	attrs = nla_nest_start(msg->skb, TIPC_NLA_MON_PEER);
+	attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON_PEER);
 	if (!attrs)
 		goto msg_full;
 
@@ -785,7 +785,7 @@
 	if (!hdr)
 		return -EMSGSIZE;
 
-	attrs = nla_nest_start(msg->skb, TIPC_NLA_MON);
+	attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON);
 	if (!attrs)
 		goto msg_full;
 

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index b618910..922d262 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c

@@ -244,6 +244,65 @@
 }
 
 /**
+ * tipc_msg_fragment - build a fragment skb list for TIPC message
+ *
+ * @skb: TIPC message skb
+ * @hdr: internal msg header to be put on the top of the fragments
+ * @pktmax: max size of a fragment incl. the header
+ * @frags: returned fragment skb list
+ *
+ * Returns 0 if the fragmentation is successful, otherwise: -EINVAL
+ * or -ENOMEM
+ */
+int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr,
+		      int pktmax, struct sk_buff_head *frags)
+{
+	int pktno, nof_fragms, dsz, dmax, eat;
+	struct tipc_msg *_hdr;
+	struct sk_buff *_skb;
+	u8 *data;
+
+	/* Non-linear buffer? */
+	if (skb_linearize(skb))
+		return -ENOMEM;
+
+	data = (u8 *)skb->data;
+	dsz = msg_size(buf_msg(skb));
+	dmax = pktmax - INT_H_SIZE;
+	if (dsz <= dmax || !dmax)
+		return -EINVAL;
+
+	nof_fragms = dsz / dmax + 1;
+	for (pktno = 1; pktno <= nof_fragms; pktno++) {
+		if (pktno < nof_fragms)
+			eat = dmax;
+		else
+			eat = dsz % dmax;
+		/* Allocate a new fragment */
+		_skb = tipc_buf_acquire(INT_H_SIZE + eat, GFP_ATOMIC);
+		if (!_skb)
+			goto error;
+		skb_orphan(_skb);
+		__skb_queue_tail(frags, _skb);
+		/* Copy header & data to the fragment */
+		skb_copy_to_linear_data(_skb, hdr, INT_H_SIZE);
+		skb_copy_to_linear_data_offset(_skb, INT_H_SIZE, data, eat);
+		data += eat;
+		/* Update the fragment's header */
+		_hdr = buf_msg(_skb);
+		msg_set_fragm_no(_hdr, pktno);
+		msg_set_nof_fragms(_hdr, nof_fragms);
+		msg_set_size(_hdr, INT_H_SIZE + eat);
+	}
+	return 0;
+
+error:
+	__skb_queue_purge(frags);
+	__skb_queue_head_init(frags);
+	return -ENOMEM;
+}
+
+/**
  * tipc_msg_build - create buffer chain containing specified header and data
  * @mhdr: Message header, to be prepended to data
  * @m: User message
@@ -484,10 +543,7 @@
 	bmsg = buf_msg(_skb);
 	tipc_msg_init(msg_prevnode(msg), bmsg, MSG_BUNDLER, 0,
 		      INT_H_SIZE, dnode);
-	if (msg_isdata(msg))
-		msg_set_importance(bmsg, TIPC_CRITICAL_IMPORTANCE);
-	else
-		msg_set_importance(bmsg, TIPC_SYSTEM_IMPORTANCE);
+	msg_set_importance(bmsg, msg_importance(msg));
 	msg_set_seqno(bmsg, msg_seqno(msg));
 	msg_set_ack(bmsg, msg_ack(msg));
 	msg_set_bcast_ack(bmsg, msg_bcast_ack(msg));
@@ -499,54 +555,56 @@
 /**
  * tipc_msg_reverse(): swap source and destination addresses and add error code
  * @own_node: originating node id for reversed message
- * @skb:  buffer containing message to be reversed; may be replaced.
+ * @skb:  buffer containing message to be reversed; will be consumed
  * @err:  error code to be set in message, if any
- * Consumes buffer at failure
+ * Replaces consumed buffer with new one when successful
  * Returns true if success, otherwise false
  */
 bool tipc_msg_reverse(u32 own_node,  struct sk_buff **skb, int err)
 {
 	struct sk_buff *_skb = *skb;
-	struct tipc_msg *hdr;
-	struct tipc_msg ohdr;
-	int dlen;
+	struct tipc_msg *_hdr, *hdr;
+	int hlen, dlen;
 
 	if (skb_linearize(_skb))
 		goto exit;
-	hdr = buf_msg(_skb);
-	dlen = min_t(uint, msg_data_sz(hdr), MAX_FORWARD_SIZE);
-	if (msg_dest_droppable(hdr))
+	_hdr = buf_msg(_skb);
+	dlen = min_t(uint, msg_data_sz(_hdr), MAX_FORWARD_SIZE);
+	hlen = msg_hdr_sz(_hdr);
+
+	if (msg_dest_droppable(_hdr))
 		goto exit;
-	if (msg_errcode(hdr))
+	if (msg_errcode(_hdr))
 		goto exit;
 
-	/* Take a copy of original header before altering message */
-	memcpy(&ohdr, hdr, msg_hdr_sz(hdr));
+	/* Never return SHORT header */
+	if (hlen == SHORT_H_SIZE)
+		hlen = BASIC_H_SIZE;
 
-	/* Never return SHORT header; expand by replacing buffer if necessary */
-	if (msg_short(hdr)) {
-		*skb = tipc_buf_acquire(BASIC_H_SIZE + dlen, GFP_ATOMIC);
-		if (!*skb)
-			goto exit;
-		memcpy((*skb)->data + BASIC_H_SIZE, msg_data(hdr), dlen);
-		kfree_skb(_skb);
-		_skb = *skb;
-		hdr = buf_msg(_skb);
-		memcpy(hdr, &ohdr, BASIC_H_SIZE);
-		msg_set_hdr_sz(hdr, BASIC_H_SIZE);
-	}
+	/* Don't return data along with SYN+, - sender has a clone */
+	if (msg_is_syn(_hdr) && err == TIPC_ERR_OVERLOAD)
+		dlen = 0;
 
-	/* Now reverse the concerned fields */
+	/* Allocate new buffer to return */
+	*skb = tipc_buf_acquire(hlen + dlen, GFP_ATOMIC);
+	if (!*skb)
+		goto exit;
+	memcpy((*skb)->data, _skb->data, msg_hdr_sz(_hdr));
+	memcpy((*skb)->data + hlen, msg_data(_hdr), dlen);
+
+	/* Build reverse header in new buffer */
+	hdr = buf_msg(*skb);
+	msg_set_hdr_sz(hdr, hlen);
 	msg_set_errcode(hdr, err);
 	msg_set_non_seq(hdr, 0);
-	msg_set_origport(hdr, msg_destport(&ohdr));
-	msg_set_destport(hdr, msg_origport(&ohdr));
-	msg_set_destnode(hdr, msg_prevnode(&ohdr));
+	msg_set_origport(hdr, msg_destport(_hdr));
+	msg_set_destport(hdr, msg_origport(_hdr));
+	msg_set_destnode(hdr, msg_prevnode(_hdr));
 	msg_set_prevnode(hdr, own_node);
 	msg_set_orignode(hdr, own_node);
-	msg_set_size(hdr, msg_hdr_sz(hdr) + dlen);
-	skb_trim(_skb, msg_size(hdr));
+	msg_set_size(hdr, hlen + dlen);
 	skb_orphan(_skb);
+	kfree_skb(_skb);
 	return true;
 exit:
 	kfree_skb(_skb);
@@ -554,6 +612,22 @@
 	return false;
 }
 
+bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy)
+{
+	struct sk_buff *skb, *_skb;
+
+	skb_queue_walk(msg, skb) {
+		_skb = skb_clone(skb, GFP_ATOMIC);
+		if (!_skb) {
+			__skb_queue_purge(cpy);
+			pr_err_ratelimited("Failed to clone buffer chain\n");
+			return false;
+		}
+		__skb_queue_tail(cpy, _skb);
+	}
+	return true;
+}
+
 /**
  * tipc_msg_lookup_dest(): try to find new destination for named message
  * @skb: the buffer containing the message.

diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index a4e944d..0daa6f0 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h

@@ -102,12 +102,15 @@
 #define TIPC_MEDIA_INFO_OFFSET	5
 
 struct tipc_skb_cb {
+	struct sk_buff *tail;
+	unsigned long nxt_retr;
+	unsigned long retr_stamp;
 	u32 bytes_read;
 	u32 orig_member;
-	struct sk_buff *tail;
-	bool validated;
 	u16 chain_imp;
 	u16 ackers;
+	u16 retr_cnt;
+	bool validated;
 };
 
 #define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0]))
@@ -116,6 +119,37 @@
 	__be32 hdr[15];
 };
 
+/* struct tipc_gap_ack - TIPC Gap ACK block
+ * @ack: seqno of the last consecutive packet in link deferdq
+ * @gap: number of gap packets since the last ack
+ *
+ * E.g:
+ *       link deferdq: 1 2 3 4      10 11      13 14 15       20
+ * --> Gap ACK blocks:      <4, 5>,   <11, 1>,      <15, 4>, <20, 0>
+ */
+struct tipc_gap_ack {
+	__be16 ack;
+	__be16 gap;
+};
+
+/* struct tipc_gap_ack_blks
+ * @len: actual length of the record
+ * @gack_cnt: number of Gap ACK blocks in the record
+ * @gacks: array of Gap ACK blocks
+ */
+struct tipc_gap_ack_blks {
+	__be16 len;
+	u8 gack_cnt;
+	u8 reserved;
+	struct tipc_gap_ack gacks[];
+};
+
+#define tipc_gap_ack_blks_sz(n) (sizeof(struct tipc_gap_ack_blks) + \
+				 sizeof(struct tipc_gap_ack) * (n))
+
+#define MAX_GAP_ACK_BLKS	32
+#define MAX_GAP_ACK_BLKS_SZ	tipc_gap_ack_blks_sz(MAX_GAP_ACK_BLKS)
+
 static inline struct tipc_msg *buf_msg(struct sk_buff *skb)
 {
 	return (struct tipc_msg *)skb->data;
@@ -216,6 +250,16 @@
 	msg_set_bits(m, 0, 20, 1, n);
 }
 
+static inline int msg_is_syn(struct tipc_msg *m)
+{
+	return msg_bits(m, 0, 17, 1);
+}
+
+static inline void msg_set_syn(struct tipc_msg *m, u32 d)
+{
+	msg_set_bits(m, 0, 17, 1, d);
+}
+
 static inline int msg_dest_droppable(struct tipc_msg *m)
 {
 	return msg_bits(m, 0, 19, 1);
@@ -246,6 +290,16 @@
 	msg_set_bits(m, 0, 18, 1, d);
 }
 
+static inline bool msg_is_rcast(struct tipc_msg *m)
+{
+	return msg_bits(m, 0, 18, 0x1);
+}
+
+static inline void msg_set_is_rcast(struct tipc_msg *m, bool d)
+{
+	msg_set_bits(m, 0, 18, 0x1, d);
+}
+
 static inline void msg_set_size(struct tipc_msg *m, u32 sz)
 {
 	m->hdr[0] = htonl((msg_word(m, 0) & ~0x1ffff) | sz);
@@ -256,7 +310,7 @@
 	return ((unchar *)m) + msg_hdr_sz(m);
 }
 
-static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m)
+static inline struct tipc_msg *msg_inner_hdr(struct tipc_msg *m)
 {
 	return (struct tipc_msg *)msg_data(m);
 }
@@ -349,6 +403,28 @@
 	msg_set_bits(m, 1, 0, 0xffff, n);
 }
 
+/* Note: reusing bits in word 1 for ACTIVATE_MSG only, to re-synch
+ * link peer session number
+ */
+static inline bool msg_dest_session_valid(struct tipc_msg *m)
+{
+	return msg_bits(m, 1, 16, 0x1);
+}
+
+static inline void msg_set_dest_session_valid(struct tipc_msg *m, bool valid)
+{
+	msg_set_bits(m, 1, 16, 0x1, valid);
+}
+
+static inline u16 msg_dest_session(struct tipc_msg *m)
+{
+	return msg_bits(m, 1, 0, 0xffff);
+}
+
+static inline void msg_set_dest_session(struct tipc_msg *m, u16 n)
+{
+	msg_set_bits(m, 1, 0, 0xffff, n);
+}
 
 /*
  * Word 2
@@ -412,7 +488,7 @@
 static inline u32 msg_origport(struct tipc_msg *m)
 {
 	if (msg_user(m) == MSG_FRAGMENTER)
-		m = msg_get_wrapped(m);
+		m = msg_inner_hdr(m);
 	return msg_word(m, 4);
 }
 
@@ -647,12 +723,26 @@
 	msg_set_bits(m, 4, 16, 0xffff, n);
 }
 
+static inline u32 msg_nof_fragms(struct tipc_msg *m)
+{
+	return msg_bits(m, 4, 0, 0xffff);
+}
+
+static inline void msg_set_nof_fragms(struct tipc_msg *m, u32 n)
+{
+	msg_set_bits(m, 4, 0, 0xffff, n);
+}
+
+static inline u32 msg_fragm_no(struct tipc_msg *m)
+{
+	return msg_bits(m, 4, 16, 0xffff);
+}
+
 static inline void msg_set_fragm_no(struct tipc_msg *m, u32 n)
 {
 	msg_set_bits(m, 4, 16, 0xffff, n);
 }
 
-
 static inline u16 msg_next_sent(struct tipc_msg *m)
 {
 	return msg_bits(m, 4, 0, 0xffff);
@@ -803,6 +893,16 @@
 	msg_set_bits(m, 9, 16, 0xffff, n);
 }
 
+static inline u16 msg_syncpt(struct tipc_msg *m)
+{
+	return msg_bits(m, 9, 16, 0xffff);
+}
+
+static inline void msg_set_syncpt(struct tipc_msg *m, u16 n)
+{
+	msg_set_bits(m, 9, 16, 0xffff, n);
+}
+
 static inline u32 msg_conn_ack(struct tipc_msg *m)
 {
 	return msg_bits(m, 9, 16, 0xffff);
@@ -961,6 +1061,8 @@
 bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg,
 			  u32 mtu, u32 dnode);
 bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos);
+int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr,
+		      int pktmax, struct sk_buff_head *frags);
 int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
 		   int offset, int dsz, int mtu, struct sk_buff_head *list);
 bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err);
@@ -970,6 +1072,7 @@
 			struct sk_buff_head *cpy);
 void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
 			     struct sk_buff *skb);
+bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy);
 
 static inline u16 buf_seqno(struct sk_buff *skb)
 {
@@ -1076,4 +1179,25 @@
 	tipc_skb_queue_splice_tail(&tmp, head);
 }
 
+/* __tipc_skb_dequeue() - dequeue the head skb according to expected seqno
+ * @list: list to be dequeued from
+ * @seqno: seqno of the expected msg
+ *
+ * returns skb dequeued from the list if its seqno is less than or equal to
+ * the expected one, otherwise the skb is still hold
+ *
+ * Note: must be used with appropriate locks held only
+ */
+static inline struct sk_buff *__tipc_skb_dequeue(struct sk_buff_head *list,
+						 u16 seqno)
+{
+	struct sk_buff *skb = skb_peek(list);
+
+	if (skb && less_eq(buf_seqno(skb), seqno)) {
+		__skb_unlink(skb, list);
+		return skb;
+	}
+	return NULL;
+}
+
 #endif

diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 3cfeb9d..836e629 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c

@@ -94,8 +94,9 @@
 		list_add_tail_rcu(&publ->binding_node, &nt->node_scope);
 		return NULL;
 	}
-	list_add_tail_rcu(&publ->binding_node, &nt->cluster_scope);
-
+	write_lock_bh(&nt->cluster_scope_lock);
+	list_add_tail(&publ->binding_node, &nt->cluster_scope);
+	write_unlock_bh(&nt->cluster_scope_lock);
 	skb = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0);
 	if (!skb) {
 		pr_warn("Publication distribution failure\n");
@@ -112,11 +113,13 @@
  */
 struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
 {
+	struct name_table *nt = tipc_name_table(net);
 	struct sk_buff *buf;
 	struct distr_item *item;
 
-	list_del_rcu(&publ->binding_node);
-
+	write_lock_bh(&nt->cluster_scope_lock);
+	list_del(&publ->binding_node);
+	write_unlock_bh(&nt->cluster_scope_lock);
 	if (publ->scope == TIPC_NODE_SCOPE)
 		return NULL;
 
@@ -147,7 +150,7 @@
 			ITEM_SIZE) * ITEM_SIZE;
 	u32 msg_rem = msg_dsz;
 
-	list_for_each_entry_rcu(publ, pls, binding_node) {
+	list_for_each_entry(publ, pls, binding_node) {
 		/* Prepare next buffer: */
 		if (!skb) {
 			skb = named_prepare_buf(net, PUBLICATION, msg_rem,
@@ -189,11 +192,10 @@
 
 	__skb_queue_head_init(&head);
 
-	rcu_read_lock();
+	read_lock_bh(&nt->cluster_scope_lock);
 	named_distribute(net, &head, dnode, &nt->cluster_scope);
-	rcu_read_unlock();
-
 	tipc_node_xmit(net, &head, dnode, 0);
+	read_unlock_bh(&nt->cluster_scope_lock);
 }
 
 /**
@@ -221,7 +223,8 @@
 		       publ->key);
 	}
 
-	kfree_rcu(p, rcu);
+	if (p)
+		kfree_rcu(p, rcu);
 }
 
 /**

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 66d5b2c..66a65c2 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c

@@ -744,6 +744,7 @@
 
 	INIT_LIST_HEAD(&nt->node_scope);
 	INIT_LIST_HEAD(&nt->cluster_scope);
+	rwlock_init(&nt->cluster_scope_lock);
 	tn->nametbl = nt;
 	spin_lock_init(&tn->nametbl_lock);
 	return 0;
@@ -828,11 +829,11 @@
 		if (!hdr)
 			return -EMSGSIZE;
 
-		attrs = nla_nest_start(msg->skb, TIPC_NLA_NAME_TABLE);
+		attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_NAME_TABLE);
 		if (!attrs)
 			goto msg_full;
 
-		b = nla_nest_start(msg->skb, TIPC_NLA_NAME_TABLE_PUBL);
+		b = nla_nest_start_noflag(msg->skb, TIPC_NLA_NAME_TABLE_PUBL);
 		if (!b)
 			goto attr_msg_full;
 
@@ -908,7 +909,8 @@
 	for (; i < TIPC_NAMETBL_SIZE; i++) {
 		head = &tn->nametbl->services[i];
 
-		if (*last_type) {
+		if (*last_type ||
+		    (!i && *last_key && (*last_lower == *last_key))) {
 			service = tipc_service_find(net, *last_type);
 			if (!service)
 				return -EPIPE;

diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 892bd75..f790663 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h

@@ -100,6 +100,7 @@
 	struct hlist_head services[TIPC_NAMETBL_SIZE];
 	struct list_head node_scope;
 	struct list_head cluster_scope;
+	rwlock_t cluster_scope_lock;
 	u32 local_publ_count;
 };
 

diff --git a/net/tipc/net.c b/net/tipc/net.c
index f076edb..85707c1 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c

@@ -163,12 +163,9 @@
 
 void tipc_net_stop(struct net *net)
 {
-	u32 self = tipc_own_addr(net);
-
-	if (!self)
+	if (!tipc_own_id(net))
 		return;
 
-	tipc_nametbl_withdraw(net, TIPC_CFG_SRV, self, self, self);
 	rtnl_lock();
 	tipc_bearer_stop(net);
 	tipc_node_stop(net);
@@ -190,7 +187,7 @@
 	if (!hdr)
 		return -EMSGSIZE;
 
-	attrs = nla_nest_start(msg->skb, TIPC_NLA_NET);
+	attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_NET);
 	if (!attrs)
 		goto msg_full;
 
@@ -248,9 +245,9 @@
 	if (!info->attrs[TIPC_NLA_NET])
 		return -EINVAL;
 
-	err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX,
-			       info->attrs[TIPC_NLA_NET], tipc_nl_net_policy,
-			       info->extack);
+	err = nla_parse_nested_deprecated(attrs, TIPC_NLA_NET_MAX,
+					  info->attrs[TIPC_NLA_NET],
+					  tipc_nl_net_policy, info->extack);
 
 	if (err)
 		return err;

diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 99ee419..d6165ad 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c

@@ -110,7 +110,9 @@
 	[TIPC_NLA_PROP_UNSPEC]		= { .type = NLA_UNSPEC },
 	[TIPC_NLA_PROP_PRIO]		= { .type = NLA_U32 },
 	[TIPC_NLA_PROP_TOL]		= { .type = NLA_U32 },
-	[TIPC_NLA_PROP_WIN]		= { .type = NLA_U32 }
+	[TIPC_NLA_PROP_WIN]		= { .type = NLA_U32 },
+	[TIPC_NLA_PROP_BROADCAST]	= { .type = NLA_U32 },
+	[TIPC_NLA_PROP_BROADCAST_RATIO]	= { .type = NLA_U32 }
 };
 
 const struct nla_policy tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1]	= {
@@ -141,115 +143,115 @@
 static const struct genl_ops tipc_genl_v2_ops[] = {
 	{
 		.cmd	= TIPC_NL_BEARER_DISABLE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit	= tipc_nl_bearer_disable,
-		.policy = tipc_nl_policy,
 	},
 	{
 		.cmd	= TIPC_NL_BEARER_ENABLE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit	= tipc_nl_bearer_enable,
-		.policy = tipc_nl_policy,
 	},
 	{
 		.cmd	= TIPC_NL_BEARER_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit	= tipc_nl_bearer_get,
 		.dumpit	= tipc_nl_bearer_dump,
-		.policy = tipc_nl_policy,
 	},
 	{
 		.cmd	= TIPC_NL_BEARER_ADD,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit	= tipc_nl_bearer_add,
-		.policy = tipc_nl_policy,
 	},
 	{
 		.cmd	= TIPC_NL_BEARER_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit	= tipc_nl_bearer_set,
-		.policy = tipc_nl_policy,
 	},
 	{
 		.cmd	= TIPC_NL_SOCK_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.start = tipc_dump_start,
 		.dumpit	= tipc_nl_sk_dump,
 		.done	= tipc_dump_done,
-		.policy = tipc_nl_policy,
 	},
 	{
 		.cmd	= TIPC_NL_PUBL_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.dumpit	= tipc_nl_publ_dump,
-		.policy = tipc_nl_policy,
 	},
 	{
 		.cmd	= TIPC_NL_LINK_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit   = tipc_nl_node_get_link,
 		.dumpit	= tipc_nl_node_dump_link,
-		.policy = tipc_nl_policy,
 	},
 	{
 		.cmd	= TIPC_NL_LINK_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit	= tipc_nl_node_set_link,
-		.policy = tipc_nl_policy,
 	},
 	{
 		.cmd	= TIPC_NL_LINK_RESET_STATS,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit   = tipc_nl_node_reset_link_stats,
-		.policy = tipc_nl_policy,
 	},
 	{
 		.cmd	= TIPC_NL_MEDIA_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit	= tipc_nl_media_get,
 		.dumpit	= tipc_nl_media_dump,
-		.policy = tipc_nl_policy,
 	},
 	{
 		.cmd	= TIPC_NL_MEDIA_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit	= tipc_nl_media_set,
-		.policy = tipc_nl_policy,
 	},
 	{
 		.cmd	= TIPC_NL_NODE_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.dumpit	= tipc_nl_node_dump,
-		.policy = tipc_nl_policy,
 	},
 	{
 		.cmd	= TIPC_NL_NET_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.dumpit	= tipc_nl_net_dump,
-		.policy = tipc_nl_policy,
 	},
 	{
 		.cmd	= TIPC_NL_NET_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit	= tipc_nl_net_set,
-		.policy = tipc_nl_policy,
 	},
 	{
 		.cmd	= TIPC_NL_NAME_TABLE_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.dumpit	= tipc_nl_name_table_dump,
-		.policy = tipc_nl_policy,
 	},
 	{
 		.cmd	= TIPC_NL_MON_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit	= tipc_nl_node_set_monitor,
-		.policy = tipc_nl_policy,
 	},
 	{
 		.cmd	= TIPC_NL_MON_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit	= tipc_nl_node_get_monitor,
 		.dumpit	= tipc_nl_node_dump_monitor,
-		.policy = tipc_nl_policy,
 	},
 	{
 		.cmd	= TIPC_NL_MON_PEER_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.dumpit	= tipc_nl_node_dump_monitor_peer,
-		.policy = tipc_nl_policy,
 	},
 	{
 		.cmd	= TIPC_NL_PEER_REMOVE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit	= tipc_nl_peer_rm,
-		.policy = tipc_nl_policy,
 	},
 #ifdef CONFIG_TIPC_MEDIA_UDP
 	{
 		.cmd	= TIPC_NL_UDP_GET_REMOTEIP,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.dumpit	= tipc_udp_nl_dump_remoteip,
-		.policy = tipc_nl_policy,
 	},
 #endif
 };
@@ -259,6 +261,7 @@
 	.version	= TIPC_GENL_V2_VERSION,
 	.hdrsize	= 0,
 	.maxattr	= TIPC_NLA_MAX,
+	.policy		= tipc_nl_policy,
 	.netnsok	= true,
 	.module		= THIS_MODULE,
 	.ops		= tipc_genl_v2_ops,
@@ -273,8 +276,8 @@
 	if (!*attr)
 		return -EOPNOTSUPP;
 
-	return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy,
-			   NULL);
+	return nlmsg_parse_deprecated(nlh, GENL_HDRLEN, *attr, maxattr,
+				      tipc_nl_policy, NULL);
 }
 
 int __init tipc_netlink_start(void)

diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 6376467..d4d2928 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c

@@ -55,6 +55,7 @@
 	int rep_type;
 	int rep_size;
 	int req_type;
+	int req_size;
 	struct net *net;
 	struct sk_buff *rep;
 	struct tlv_desc *req;
@@ -87,6 +88,11 @@
 	return limit;
 }
 
+static inline int TLV_GET_DATA_LEN(struct tlv_desc *tlv)
+{
+	return TLV_GET_LEN(tlv) - TLV_SPACE(0);
+}
+
 static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len)
 {
 	struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(skb);
@@ -166,6 +172,11 @@
 	return buf;
 }
 
+static inline bool string_is_valid(char *s, int len)
+{
+	return memchr(s, '\0', len) ? true : false;
+}
+
 static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
 				   struct tipc_nl_compat_msg *msg,
 				   struct sk_buff *arg)
@@ -247,7 +258,8 @@
 	int err;
 	struct sk_buff *arg;
 
-	if (msg->req_type && !TLV_CHECK_TYPE(msg->req, msg->req_type))
+	if (msg->req_type && (!msg->req_size ||
+			      !TLV_CHECK_TYPE(msg->req, msg->req_type)))
 		return -EINVAL;
 
 	msg->rep = tipc_tlv_alloc(msg->rep_size);
@@ -257,8 +269,14 @@
 	if (msg->rep_type)
 		tipc_tlv_init(msg->rep, msg->rep_type);
 
-	if (cmd->header)
-		(*cmd->header)(msg);
+	if (cmd->header) {
+		err = (*cmd->header)(msg);
+		if (err) {
+			kfree_skb(msg->rep);
+			msg->rep = NULL;
+			return err;
+		}
+	}
 
 	arg = nlmsg_new(0, GFP_KERNEL);
 	if (!arg) {
@@ -312,9 +330,9 @@
 	if (err)
 		goto doit_out;
 
-	err = nla_parse(attrbuf, tipc_genl_family.maxattr,
-			(const struct nlattr *)trans_buf->data,
-			trans_buf->len, NULL, NULL);
+	err = nla_parse_deprecated(attrbuf, tipc_genl_family.maxattr,
+				   (const struct nlattr *)trans_buf->data,
+				   trans_buf->len, NULL, NULL);
 	if (err)
 		goto doit_out;
 
@@ -338,7 +356,8 @@
 {
 	int err;
 
-	if (msg->req_type && !TLV_CHECK_TYPE(msg->req, msg->req_type))
+	if (msg->req_type && (!msg->req_size ||
+			      !TLV_CHECK_TYPE(msg->req, msg->req_type)))
 		return -EINVAL;
 
 	err = __tipc_nl_compat_doit(cmd, msg);
@@ -362,8 +381,8 @@
 	if (!attrs[TIPC_NLA_BEARER])
 		return -EINVAL;
 
-	err = nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX,
-			       attrs[TIPC_NLA_BEARER], NULL, NULL);
+	err = nla_parse_nested_deprecated(bearer, TIPC_NLA_BEARER_MAX,
+					  attrs[TIPC_NLA_BEARER], NULL, NULL);
 	if (err)
 		return err;
 
@@ -379,13 +398,23 @@
 	struct nlattr *prop;
 	struct nlattr *bearer;
 	struct tipc_bearer_config *b;
+	int len;
 
 	b = (struct tipc_bearer_config *)TLV_DATA(msg->req);
 
-	bearer = nla_nest_start(skb, TIPC_NLA_BEARER);
+	bearer = nla_nest_start_noflag(skb, TIPC_NLA_BEARER);
 	if (!bearer)
 		return -EMSGSIZE;
 
+	len = TLV_GET_DATA_LEN(msg->req);
+	len -= offsetof(struct tipc_bearer_config, name);
+	if (len <= 0)
+		return -EINVAL;
+
+	len = min_t(int, len, TIPC_MAX_BEARER_NAME);
+	if (!string_is_valid(b->name, len))
+		return -EINVAL;
+
 	if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, b->name))
 		return -EMSGSIZE;
 
@@ -393,7 +422,7 @@
 		return -EMSGSIZE;
 
 	if (ntohl(b->priority) <= TIPC_MAX_LINK_PRI) {
-		prop = nla_nest_start(skb, TIPC_NLA_BEARER_PROP);
+		prop = nla_nest_start_noflag(skb, TIPC_NLA_BEARER_PROP);
 		if (!prop)
 			return -EMSGSIZE;
 		if (nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(b->priority)))
@@ -411,13 +440,22 @@
 {
 	char *name;
 	struct nlattr *bearer;
+	int len;
 
 	name = (char *)TLV_DATA(msg->req);
 
-	bearer = nla_nest_start(skb, TIPC_NLA_BEARER);
+	bearer = nla_nest_start_noflag(skb, TIPC_NLA_BEARER);
 	if (!bearer)
 		return -EMSGSIZE;
 
+	len = TLV_GET_DATA_LEN(msg->req);
+	if (len <= 0)
+		return -EINVAL;
+
+	len = min_t(int, len, TIPC_MAX_BEARER_NAME);
+	if (!string_is_valid(name, len))
+		return -EINVAL;
+
 	if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, name))
 		return -EMSGSIZE;
 
@@ -478,32 +516,44 @@
 	struct nlattr *prop[TIPC_NLA_PROP_MAX + 1];
 	struct nlattr *stats[TIPC_NLA_STATS_MAX + 1];
 	int err;
+	int len;
 
 	if (!attrs[TIPC_NLA_LINK])
 		return -EINVAL;
 
-	err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK],
-			       NULL, NULL);
+	err = nla_parse_nested_deprecated(link, TIPC_NLA_LINK_MAX,
+					  attrs[TIPC_NLA_LINK], NULL, NULL);
 	if (err)
 		return err;
 
 	if (!link[TIPC_NLA_LINK_PROP])
 		return -EINVAL;
 
-	err = nla_parse_nested(prop, TIPC_NLA_PROP_MAX,
-			       link[TIPC_NLA_LINK_PROP], NULL, NULL);
+	err = nla_parse_nested_deprecated(prop, TIPC_NLA_PROP_MAX,
+					  link[TIPC_NLA_LINK_PROP], NULL,
+					  NULL);
 	if (err)
 		return err;
 
 	if (!link[TIPC_NLA_LINK_STATS])
 		return -EINVAL;
 
-	err = nla_parse_nested(stats, TIPC_NLA_STATS_MAX,
-			       link[TIPC_NLA_LINK_STATS], NULL, NULL);
+	err = nla_parse_nested_deprecated(stats, TIPC_NLA_STATS_MAX,
+					  link[TIPC_NLA_LINK_STATS], NULL,
+					  NULL);
 	if (err)
 		return err;
 
 	name = (char *)TLV_DATA(msg->req);
+
+	len = TLV_GET_DATA_LEN(msg->req);
+	if (len <= 0)
+		return -EINVAL;
+
+	len = min_t(int, len, TIPC_MAX_LINK_NAME);
+	if (!string_is_valid(name, len))
+		return -EINVAL;
+
 	if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0)
 		return 0;
 
@@ -608,8 +658,8 @@
 	if (!attrs[TIPC_NLA_LINK])
 		return -EINVAL;
 
-	err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK],
-			       NULL, NULL);
+	err = nla_parse_nested_deprecated(link, TIPC_NLA_LINK_MAX,
+					  attrs[TIPC_NLA_LINK], NULL, NULL);
 	if (err)
 		return err;
 
@@ -647,14 +697,14 @@
 
 	lc = (struct tipc_link_config *)TLV_DATA(msg->req);
 
-	media = nla_nest_start(skb, TIPC_NLA_MEDIA);
+	media = nla_nest_start_noflag(skb, TIPC_NLA_MEDIA);
 	if (!media)
 		return -EMSGSIZE;
 
 	if (nla_put_string(skb, TIPC_NLA_MEDIA_NAME, lc->name))
 		return -EMSGSIZE;
 
-	prop = nla_nest_start(skb, TIPC_NLA_MEDIA_PROP);
+	prop = nla_nest_start_noflag(skb, TIPC_NLA_MEDIA_PROP);
 	if (!prop)
 		return -EMSGSIZE;
 
@@ -674,14 +724,14 @@
 
 	lc = (struct tipc_link_config *)TLV_DATA(msg->req);
 
-	bearer = nla_nest_start(skb, TIPC_NLA_BEARER);
+	bearer = nla_nest_start_noflag(skb, TIPC_NLA_BEARER);
 	if (!bearer)
 		return -EMSGSIZE;
 
 	if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, lc->name))
 		return -EMSGSIZE;
 
-	prop = nla_nest_start(skb, TIPC_NLA_BEARER_PROP);
+	prop = nla_nest_start_noflag(skb, TIPC_NLA_BEARER_PROP);
 	if (!prop)
 		return -EMSGSIZE;
 
@@ -701,14 +751,14 @@
 
 	lc = (struct tipc_link_config *)TLV_DATA(msg->req);
 
-	link = nla_nest_start(skb, TIPC_NLA_LINK);
+	link = nla_nest_start_noflag(skb, TIPC_NLA_LINK);
 	if (!link)
 		return -EMSGSIZE;
 
 	if (nla_put_string(skb, TIPC_NLA_LINK_NAME, lc->name))
 		return -EMSGSIZE;
 
-	prop = nla_nest_start(skb, TIPC_NLA_LINK_PROP);
+	prop = nla_nest_start_noflag(skb, TIPC_NLA_LINK_PROP);
 	if (!prop)
 		return -EMSGSIZE;
 
@@ -726,9 +776,19 @@
 	struct tipc_link_config *lc;
 	struct tipc_bearer *bearer;
 	struct tipc_media *media;
+	int len;
 
 	lc = (struct tipc_link_config *)TLV_DATA(msg->req);
 
+	len = TLV_GET_DATA_LEN(msg->req);
+	len -= offsetof(struct tipc_link_config, name);
+	if (len <= 0)
+		return -EINVAL;
+
+	len = min_t(int, len, TIPC_MAX_LINK_NAME);
+	if (!string_is_valid(lc->name, len))
+		return -EINVAL;
+
 	media = tipc_media_find(lc->name);
 	if (media) {
 		cmd->doit = &__tipc_nl_media_set;
@@ -750,13 +810,22 @@
 {
 	char *name;
 	struct nlattr *link;
+	int len;
 
 	name = (char *)TLV_DATA(msg->req);
 
-	link = nla_nest_start(skb, TIPC_NLA_LINK);
+	link = nla_nest_start_noflag(skb, TIPC_NLA_LINK);
 	if (!link)
 		return -EMSGSIZE;
 
+	len = TLV_GET_DATA_LEN(msg->req);
+	if (len <= 0)
+		return -EINVAL;
+
+	len = min_t(int, len, TIPC_MAX_LINK_NAME);
+	if (!string_is_valid(name, len))
+		return -EINVAL;
+
 	if (nla_put_string(skb, TIPC_NLA_LINK_NAME, name))
 		return -EMSGSIZE;
 
@@ -778,6 +847,8 @@
 	};
 
 	ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req);
+	if (TLV_GET_DATA_LEN(msg->req) < sizeof(struct tipc_name_table_query))
+		return -EINVAL;
 
 	depth = ntohl(ntq->depth);
 
@@ -805,16 +876,18 @@
 	if (!attrs[TIPC_NLA_NAME_TABLE])
 		return -EINVAL;
 
-	err = nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX,
-			       attrs[TIPC_NLA_NAME_TABLE], NULL, NULL);
+	err = nla_parse_nested_deprecated(nt, TIPC_NLA_NAME_TABLE_MAX,
+					  attrs[TIPC_NLA_NAME_TABLE], NULL,
+					  NULL);
 	if (err)
 		return err;
 
 	if (!nt[TIPC_NLA_NAME_TABLE_PUBL])
 		return -EINVAL;
 
-	err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX,
-			       nt[TIPC_NLA_NAME_TABLE_PUBL], NULL, NULL);
+	err = nla_parse_nested_deprecated(publ, TIPC_NLA_PUBL_MAX,
+					  nt[TIPC_NLA_NAME_TABLE_PUBL], NULL,
+					  NULL);
 	if (err)
 		return err;
 
@@ -873,8 +946,8 @@
 	if (!attrs[TIPC_NLA_PUBL])
 		return -EINVAL;
 
-	err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, attrs[TIPC_NLA_PUBL],
-			       NULL, NULL);
+	err = nla_parse_nested_deprecated(publ, TIPC_NLA_PUBL_MAX,
+					  attrs[TIPC_NLA_PUBL], NULL, NULL);
 	if (err)
 		return err;
 
@@ -904,8 +977,12 @@
 
 	hdr = genlmsg_put(args, 0, 0, &tipc_genl_family, NLM_F_MULTI,
 			  TIPC_NL_PUBL_GET);
+	if (!hdr) {
+		kfree_skb(args);
+		return -EMSGSIZE;
+	}
 
-	nest = nla_nest_start(args, TIPC_NLA_SOCK);
+	nest = nla_nest_start_noflag(args, TIPC_NLA_SOCK);
 	if (!nest) {
 		kfree_skb(args);
 		return -EMSGSIZE;
@@ -939,8 +1016,8 @@
 	if (!attrs[TIPC_NLA_SOCK])
 		return -EINVAL;
 
-	err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK],
-			       NULL, NULL);
+	err = nla_parse_nested_deprecated(sock, TIPC_NLA_SOCK_MAX,
+					  attrs[TIPC_NLA_SOCK], NULL, NULL);
 	if (err)
 		return err;
 
@@ -951,8 +1028,12 @@
 		u32 node;
 		struct nlattr *con[TIPC_NLA_CON_MAX + 1];
 
-		nla_parse_nested(con, TIPC_NLA_CON_MAX,
-				 sock[TIPC_NLA_SOCK_CON], NULL, NULL);
+		err = nla_parse_nested_deprecated(con, TIPC_NLA_CON_MAX,
+						  sock[TIPC_NLA_SOCK_CON],
+						  NULL, NULL);
+
+		if (err)
+			return err;
 
 		node = nla_get_u32(con[TIPC_NLA_CON_NODE]);
 		tipc_tlv_sprintf(msg->rep, "  connected to <%u.%u.%u:%u>",
@@ -988,8 +1069,8 @@
 	if (!attrs[TIPC_NLA_MEDIA])
 		return -EINVAL;
 
-	err = nla_parse_nested(media, TIPC_NLA_MEDIA_MAX,
-			       attrs[TIPC_NLA_MEDIA], NULL, NULL);
+	err = nla_parse_nested_deprecated(media, TIPC_NLA_MEDIA_MAX,
+					  attrs[TIPC_NLA_MEDIA], NULL, NULL);
 	if (err)
 		return err;
 
@@ -1008,8 +1089,8 @@
 	if (!attrs[TIPC_NLA_NODE])
 		return -EINVAL;
 
-	err = nla_parse_nested(node, TIPC_NLA_NODE_MAX, attrs[TIPC_NLA_NODE],
-			       NULL, NULL);
+	err = nla_parse_nested_deprecated(node, TIPC_NLA_NODE_MAX,
+					  attrs[TIPC_NLA_NODE], NULL, NULL);
 	if (err)
 		return err;
 
@@ -1029,7 +1110,7 @@
 
 	val = ntohl(*(__be32 *)TLV_DATA(msg->req));
 
-	net = nla_nest_start(skb, TIPC_NLA_NET);
+	net = nla_nest_start_noflag(skb, TIPC_NLA_NET);
 	if (!net)
 		return -EMSGSIZE;
 
@@ -1055,8 +1136,8 @@
 	if (!attrs[TIPC_NLA_NET])
 		return -EINVAL;
 
-	err = nla_parse_nested(net, TIPC_NLA_NET_MAX, attrs[TIPC_NLA_NET],
-			       NULL, NULL);
+	err = nla_parse_nested_deprecated(net, TIPC_NLA_NET_MAX,
+					  attrs[TIPC_NLA_NET], NULL, NULL);
 	if (err)
 		return err;
 
@@ -1200,8 +1281,8 @@
 		goto send;
 	}
 
-	len = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN);
-	if (len && !TLV_OK(msg.req, len)) {
+	msg.req_size = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN);
+	if (msg.req_size && !TLV_OK(msg.req, msg.req_size)) {
 		msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED);
 		err = -EOPNOTSUPP;
 		goto send;
@@ -1229,6 +1310,7 @@
 static const struct genl_ops tipc_genl_compat_ops[] = {
 	{
 		.cmd		= TIPC_GENL_CMD,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit		= tipc_nl_compat_recv,
 	},
 };

diff --git a/net/tipc/node.c b/net/tipc/node.c
index 4880197..c8f6177 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c

@@ -43,6 +43,7 @@
 #include "monitor.h"
 #include "discover.h"
 #include "netlink.h"
+#include "trace.h"
 
 #define INVALID_NODE_SIG	0x10000
 #define NODE_CLEANUP_AFTER	300000
@@ -374,14 +375,20 @@
 		if (n->capabilities == capabilities)
 			goto exit;
 		/* Same node may come back with new capabilities */
-		write_lock_bh(&n->lock);
+		tipc_node_write_lock(n);
 		n->capabilities = capabilities;
 		for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
 			l = n->links[bearer_id].link;
 			if (l)
 				tipc_link_update_caps(l, capabilities);
 		}
-		write_unlock_bh(&n->lock);
+		tipc_node_write_unlock_fast(n);
+
+		/* Calculate cluster capabilities */
+		tn->capabilities = TIPC_NODE_CAPABILITIES;
+		list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
+			tn->capabilities &= temp_node->capabilities;
+		}
 		goto exit;
 	}
 	n = kzalloc(sizeof(*n), GFP_ATOMIC);
@@ -432,6 +439,12 @@
 			break;
 	}
 	list_add_tail_rcu(&n->list, &temp_node->list);
+	/* Calculate cluster capabilities */
+	tn->capabilities = TIPC_NODE_CAPABILITIES;
+	list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
+		tn->capabilities &= temp_node->capabilities;
+	}
+	trace_tipc_node_create(n, true, " ");
 exit:
 	spin_unlock_bh(&tn->node_list_lock);
 	return n;
@@ -459,6 +472,7 @@
 
 static void tipc_node_delete(struct tipc_node *node)
 {
+	trace_tipc_node_delete(node, true, " ");
 	tipc_node_delete_from_list(node);
 
 	del_timer_sync(&node->timer);
@@ -586,6 +600,7 @@
  */
 static bool tipc_node_cleanup(struct tipc_node *peer)
 {
+	struct tipc_node *temp_node;
 	struct tipc_net *tn = tipc_net(peer->net);
 	bool deleted = false;
 
@@ -601,6 +616,13 @@
 		deleted = true;
 	}
 	tipc_node_write_unlock(peer);
+
+	/* Calculate cluster capabilities */
+	tn->capabilities = TIPC_NODE_CAPABILITIES;
+	list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
+		tn->capabilities &= temp_node->capabilities;
+	}
+
 	spin_unlock_bh(&tn->node_list_lock);
 	return deleted;
 }
@@ -616,6 +638,7 @@
 	int bearer_id;
 	int rc = 0;
 
+	trace_tipc_node_timeout(n, false, " ");
 	if (!node_is_up(n) && tipc_node_cleanup(n)) {
 		/*Removing the reference of Timer*/
 		tipc_node_put(n);
@@ -624,6 +647,12 @@
 
 	__skb_queue_head_init(&xmitq);
 
+	/* Initial node interval to value larger (10 seconds), then it will be
+	 * recalculated with link lowest tolerance
+	 */
+	tipc_node_read_lock(n);
+	n->keepalive_intv = 10000;
+	tipc_node_read_unlock(n);
 	for (bearer_id = 0; remains && (bearer_id < MAX_BEARERS); bearer_id++) {
 		tipc_node_read_lock(n);
 		le = &n->links[bearer_id];
@@ -675,6 +704,7 @@
 
 	pr_debug("Established link <%s> on network plane %c\n",
 		 tipc_link_name(nl), tipc_link_plane(nl));
+	trace_tipc_node_link_up(n, true, " ");
 
 	/* Ensure that a STATE message goes first */
 	tipc_link_build_state_msg(nl, xmitq);
@@ -684,7 +714,6 @@
 		*slot0 = bearer_id;
 		*slot1 = bearer_id;
 		tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT);
-		n->failover_sent = false;
 		n->action_flags |= TIPC_NOTIFY_NODE_UP;
 		tipc_link_set_active(nl, true);
 		tipc_bcast_add_peer(n->net, nl, xmitq);
@@ -727,6 +756,49 @@
 }
 
 /**
+ * tipc_node_link_failover() - start failover in case "half-failover"
+ *
+ * This function is only called in a very special situation where link
+ * failover can be already started on peer node but not on this node.
+ * This can happen when e.g.
+ *	1. Both links <1A-2A>, <1B-2B> down
+ *	2. Link endpoint 2A up, but 1A still down (e.g. due to network
+ *	   disturbance, wrong session, etc.)
+ *	3. Link <1B-2B> up
+ *	4. Link endpoint 2A down (e.g. due to link tolerance timeout)
+ *	5. Node 2 starts failover onto link <1B-2B>
+ *
+ *	==> Node 1 does never start link/node failover!
+ *
+ * @n: tipc node structure
+ * @l: link peer endpoint failingover (- can be NULL)
+ * @tnl: tunnel link
+ * @xmitq: queue for messages to be xmited on tnl link later
+ */
+static void tipc_node_link_failover(struct tipc_node *n, struct tipc_link *l,
+				    struct tipc_link *tnl,
+				    struct sk_buff_head *xmitq)
+{
+	/* Avoid to be "self-failover" that can never end */
+	if (!tipc_link_is_up(tnl))
+		return;
+
+	/* Don't rush, failure link may be in the process of resetting */
+	if (l && !tipc_link_is_reset(l))
+		return;
+
+	tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT);
+	tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT);
+
+	n->sync_point = tipc_link_rcv_nxt(tnl) + (U16_MAX / 2 - 1);
+	tipc_link_failover_prepare(l, tnl, xmitq);
+
+	if (l)
+		tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
+	tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT);
+}
+
+/**
  * __tipc_node_link_down - handle loss of link
  */
 static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
@@ -777,6 +849,7 @@
 		if (tipc_link_peer_is_down(l))
 			tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT);
 		tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT);
+		trace_tipc_link_reset(l, TIPC_DUMP_ALL, "link down!");
 		tipc_link_fsm_evt(l, LINK_RESET_EVT);
 		tipc_link_reset(l);
 		tipc_link_build_reset_msg(l, xmitq);
@@ -794,6 +867,7 @@
 	tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT);
 	n->sync_point = tipc_link_rcv_nxt(tnl) + (U16_MAX / 2 - 1);
 	tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq);
+	trace_tipc_link_reset(l, TIPC_DUMP_ALL, "link down -> failover!");
 	tipc_link_reset(l);
 	tipc_link_fsm_evt(l, LINK_RESET_EVT);
 	tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
@@ -804,10 +878,10 @@
 static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
 {
 	struct tipc_link_entry *le = &n->links[bearer_id];
+	struct tipc_media_addr *maddr = NULL;
 	struct tipc_link *l = le->link;
-	struct tipc_media_addr *maddr;
-	struct sk_buff_head xmitq;
 	int old_bearer_id = bearer_id;
+	struct sk_buff_head xmitq;
 
 	if (!l)
 		return;
@@ -817,19 +891,22 @@
 	tipc_node_write_lock(n);
 	if (!tipc_link_is_establishing(l)) {
 		__tipc_node_link_down(n, &bearer_id, &xmitq, &maddr);
-		if (delete) {
-			kfree(l);
-			le->link = NULL;
-			n->link_cnt--;
-		}
 	} else {
 		/* Defuse pending tipc_node_link_up() */
+		tipc_link_reset(l);
 		tipc_link_fsm_evt(l, LINK_RESET_EVT);
 	}
+	if (delete) {
+		kfree(l);
+		le->link = NULL;
+		n->link_cnt--;
+	}
+	trace_tipc_node_link_down(n, true, "node link down or deleted!");
 	tipc_node_write_unlock(n);
 	if (delete)
 		tipc_mon_remove_peer(n->net, n->addr, old_bearer_id);
-	tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr);
+	if (!skb_queue_empty(&xmitq))
+		tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr);
 	tipc_sk_rcv(n->net, &le->inputq);
 }
 
@@ -1015,6 +1092,7 @@
 			*respond = false;
 			goto exit;
 		}
+		trace_tipc_link_reset(l, TIPC_DUMP_ALL, "link created!");
 		tipc_link_reset(l);
 		tipc_link_fsm_evt(l, LINK_RESET_EVT);
 		if (n->state == NODE_FAILINGOVER)
@@ -1054,6 +1132,7 @@
 
 	pr_warn("Resetting all links to %x\n", n->addr);
 
+	trace_tipc_node_reset_links(n, true, " ");
 	for (i = 0; i < MAX_BEARERS; i++) {
 		tipc_node_link_down(n, i, false);
 	}
@@ -1229,11 +1308,13 @@
 		pr_err("Unknown node fsm state %x\n", state);
 		break;
 	}
+	trace_tipc_node_fsm(n->peer_id, n->state, state, evt);
 	n->state = state;
 	return;
 
 illegal_evt:
 	pr_err("Illegal node fsm evt %x in state %x\n", evt, state);
+	trace_tipc_node_fsm(n->peer_id, n->state, state, evt);
 }
 
 static void node_lost_contact(struct tipc_node *n,
@@ -1247,6 +1328,7 @@
 
 	pr_debug("Lost contact with %x\n", n->addr);
 	n->delete_at = jiffies + msecs_to_jiffies(NODE_CLEANUP_AFTER);
+	trace_tipc_node_lost_contact(n, true, " ");
 
 	/* Clean up broadcast state */
 	tipc_bcast_remove_peer(n->net, n->bc_entry.link);
@@ -1319,7 +1401,7 @@
 	if (!hdr)
 		return -EMSGSIZE;
 
-	attrs = nla_nest_start(msg->skb, TIPC_NLA_NODE);
+	attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_NODE);
 	if (!attrs)
 		goto msg_full;
 
@@ -1361,13 +1443,15 @@
 	int rc;
 
 	if (in_own_node(net, dnode)) {
+		tipc_loopback_trace(net, list);
+		spin_lock_init(&list->lock);
 		tipc_sk_rcv(net, list);
 		return 0;
 	}
 
 	n = tipc_node_find(net, dnode);
 	if (unlikely(!n)) {
-		skb_queue_purge(list);
+		__skb_queue_purge(list);
 		return -EHOSTUNREACH;
 	}
 
@@ -1376,7 +1460,7 @@
 	if (unlikely(bearer_id == INVALID_BEARER_ID)) {
 		tipc_node_read_unlock(n);
 		tipc_node_put(n);
-		skb_queue_purge(list);
+		__skb_queue_purge(list);
 		return -EHOSTUNREACH;
 	}
 
@@ -1408,7 +1492,7 @@
 {
 	struct sk_buff_head head;
 
-	skb_queue_head_init(&head);
+	__skb_queue_head_init(&head);
 	__skb_queue_tail(&head, skb);
 	tipc_node_xmit(net, &head, dnode, selector);
 	return 0;
@@ -1543,6 +1627,10 @@
 	if (!skb_queue_empty(&be->inputq1))
 		tipc_node_mcast_rcv(n);
 
+	/* Handle NAME_DISTRIBUTOR messages sent from 1.7 nodes */
+	if (!skb_queue_empty(&n->bc_entry.namedq))
+		tipc_named_rcv(net, &n->bc_entry.namedq);
+
 	/* If reassembly or retransmission failure => reset all links to peer */
 	if (rc & TIPC_LINK_DOWN_EVT)
 		tipc_node_reset_links(n);
@@ -1563,7 +1651,6 @@
 	int usr = msg_user(hdr);
 	int mtyp = msg_type(hdr);
 	u16 oseqno = msg_seqno(hdr);
-	u16 iseqno = msg_seqno(msg_get_wrapped(hdr));
 	u16 exp_pkts = msg_msgcnt(hdr);
 	u16 rcv_nxt, syncpt, dlv_nxt, inputq_len;
 	int state = n->state;
@@ -1571,6 +1658,10 @@
 	struct tipc_media_addr *maddr;
 	int pb_id;
 
+	if (trace_tipc_node_check_state_enabled()) {
+		trace_tipc_skb_dump(skb, false, "skb for node state check");
+		trace_tipc_node_check_state(n, true, " ");
+	}
 	l = n->links[bearer_id].link;
 	if (!l)
 		return false;
@@ -1588,8 +1679,11 @@
 		}
 	}
 
-	if (!tipc_link_validate_msg(l, hdr))
+	if (!tipc_link_validate_msg(l, hdr)) {
+		trace_tipc_skb_dump(skb, false, "PROTO invalid (2)!");
+		trace_tipc_link_dump(l, TIPC_DUMP_NONE, "PROTO invalid (2)!");
 		return false;
+	}
 
 	/* Check and update node accesibility if applicable */
 	if (state == SELF_UP_PEER_COMING) {
@@ -1617,19 +1711,23 @@
 	/* Initiate or update failover mode if applicable */
 	if ((usr == TUNNEL_PROTOCOL) && (mtyp == FAILOVER_MSG)) {
 		syncpt = oseqno + exp_pkts - 1;
-		if (pl && tipc_link_is_up(pl)) {
+		if (pl && !tipc_link_is_reset(pl)) {
 			__tipc_node_link_down(n, &pb_id, xmitq, &maddr);
+			trace_tipc_node_link_down(n, true,
+						  "node link down <- failover!");
 			tipc_skb_queue_splice_tail_init(tipc_link_inputq(pl),
 							tipc_link_inputq(l));
 		}
+
 		/* If parallel link was already down, and this happened before
-		 * the tunnel link came up, FAILOVER was never sent. Ensure that
-		 * FAILOVER is sent to get peer out of NODE_FAILINGOVER state.
+		 * the tunnel link came up, node failover was never started.
+		 * Ensure that a FAILOVER_MSG is sent to get peer out of
+		 * NODE_FAILINGOVER state, also this node must accept
+		 * TUNNEL_MSGs from peer.
 		 */
-		if (n->state != NODE_FAILINGOVER && !n->failover_sent) {
-			tipc_link_create_dummy_tnl_msg(l, xmitq);
-			n->failover_sent = true;
-		}
+		if (n->state != NODE_FAILINGOVER)
+			tipc_node_link_failover(n, pl, l, xmitq);
+
 		/* If pkts arrive out of order, use lowest calculated syncpt */
 		if (less(syncpt, n->sync_point))
 			n->sync_point = syncpt;
@@ -1651,7 +1749,10 @@
 
 	/* Initiate synch mode if applicable */
 	if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG) && (oseqno == 1)) {
-		syncpt = iseqno + exp_pkts - 1;
+		if (n->capabilities & TIPC_TUNNEL_ENHANCED)
+			syncpt = msg_syncpt(hdr);
+		else
+			syncpt = msg_seqno(msg_inner_hdr(hdr)) + exp_pkts - 1;
 		if (!tipc_link_is_up(l))
 			__tipc_node_link_up(n, bearer_id, xmitq);
 		if (n->state == SELF_UP_PEER_UP) {
@@ -1710,6 +1811,7 @@
 	__skb_queue_head_init(&xmitq);
 
 	/* Ensure message is well-formed before touching the header */
+	TIPC_SKB_CB(skb)->validated = false;
 	if (unlikely(!tipc_msg_validate(&skb)))
 		goto discard;
 	hdr = buf_msg(skb);
@@ -1832,9 +1934,9 @@
 	if (!info->attrs[TIPC_NLA_NET])
 		return -EINVAL;
 
-	err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX,
-			       info->attrs[TIPC_NLA_NET], tipc_nl_net_policy,
-			       info->extack);
+	err = nla_parse_nested_deprecated(attrs, TIPC_NLA_NET_MAX,
+					  info->attrs[TIPC_NLA_NET],
+					  tipc_nl_net_policy, info->extack);
 	if (err)
 		return err;
 
@@ -1990,9 +2092,9 @@
 	if (!info->attrs[TIPC_NLA_LINK])
 		return -EINVAL;
 
-	err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX,
-			       info->attrs[TIPC_NLA_LINK],
-			       tipc_nl_link_policy, info->extack);
+	err = nla_parse_nested_deprecated(attrs, TIPC_NLA_LINK_MAX,
+					  info->attrs[TIPC_NLA_LINK],
+					  tipc_nl_link_policy, info->extack);
 	if (err)
 		return err;
 
@@ -2066,9 +2168,9 @@
 	if (!info->attrs[TIPC_NLA_LINK])
 		return -EINVAL;
 
-	err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX,
-			       info->attrs[TIPC_NLA_LINK],
-			       tipc_nl_link_policy, info->extack);
+	err = nla_parse_nested_deprecated(attrs, TIPC_NLA_LINK_MAX,
+					  info->attrs[TIPC_NLA_LINK],
+					  tipc_nl_link_policy, info->extack);
 	if (err)
 		return err;
 
@@ -2131,9 +2233,9 @@
 	if (!info->attrs[TIPC_NLA_LINK])
 		return -EINVAL;
 
-	err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX,
-			       info->attrs[TIPC_NLA_LINK],
-			       tipc_nl_link_policy, info->extack);
+	err = nla_parse_nested_deprecated(attrs, TIPC_NLA_LINK_MAX,
+					  info->attrs[TIPC_NLA_LINK],
+					  tipc_nl_link_policy, info->extack);
 	if (err)
 		return err;
 
@@ -2271,9 +2373,10 @@
 	if (!info->attrs[TIPC_NLA_MON])
 		return -EINVAL;
 
-	err = nla_parse_nested(attrs, TIPC_NLA_MON_MAX,
-			       info->attrs[TIPC_NLA_MON],
-			       tipc_nl_monitor_policy, info->extack);
+	err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MON_MAX,
+					  info->attrs[TIPC_NLA_MON],
+					  tipc_nl_monitor_policy,
+					  info->extack);
 	if (err)
 		return err;
 
@@ -2300,7 +2403,7 @@
 	if (!hdr)
 		return -EMSGSIZE;
 
-	attrs = nla_nest_start(msg->skb, TIPC_NLA_MON);
+	attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON);
 	if (!attrs)
 		goto msg_full;
 
@@ -2391,9 +2494,10 @@
 		if (!attrs[TIPC_NLA_MON])
 			return -EINVAL;
 
-		err = nla_parse_nested(mon, TIPC_NLA_MON_MAX,
-				       attrs[TIPC_NLA_MON],
-				       tipc_nl_monitor_policy, NULL);
+		err = nla_parse_nested_deprecated(mon, TIPC_NLA_MON_MAX,
+						  attrs[TIPC_NLA_MON],
+						  tipc_nl_monitor_policy,
+						  NULL);
 		if (err)
 			return err;
 
@@ -2425,3 +2529,65 @@
 
 	return skb->len;
 }
+
+u32 tipc_node_get_addr(struct tipc_node *node)
+{
+	return (node) ? node->addr : 0;
+}
+
+/**
+ * tipc_node_dump - dump TIPC node data
+ * @n: tipc node to be dumped
+ * @more: dump more?
+ *        - false: dump only tipc node data
+ *        - true: dump node link data as well
+ * @buf: returned buffer of dump data in format
+ */
+int tipc_node_dump(struct tipc_node *n, bool more, char *buf)
+{
+	int i = 0;
+	size_t sz = (more) ? NODE_LMAX : NODE_LMIN;
+
+	if (!n) {
+		i += scnprintf(buf, sz, "node data: (null)\n");
+		return i;
+	}
+
+	i += scnprintf(buf, sz, "node data: %x", n->addr);
+	i += scnprintf(buf + i, sz - i, " %x", n->state);
+	i += scnprintf(buf + i, sz - i, " %d", n->active_links[0]);
+	i += scnprintf(buf + i, sz - i, " %d", n->active_links[1]);
+	i += scnprintf(buf + i, sz - i, " %x", n->action_flags);
+	i += scnprintf(buf + i, sz - i, " %u", n->failover_sent);
+	i += scnprintf(buf + i, sz - i, " %u", n->sync_point);
+	i += scnprintf(buf + i, sz - i, " %d", n->link_cnt);
+	i += scnprintf(buf + i, sz - i, " %u", n->working_links);
+	i += scnprintf(buf + i, sz - i, " %x", n->capabilities);
+	i += scnprintf(buf + i, sz - i, " %lu\n", n->keepalive_intv);
+
+	if (!more)
+		return i;
+
+	i += scnprintf(buf + i, sz - i, "link_entry[0]:\n");
+	i += scnprintf(buf + i, sz - i, " mtu: %u\n", n->links[0].mtu);
+	i += scnprintf(buf + i, sz - i, " media: ");
+	i += tipc_media_addr_printf(buf + i, sz - i, &n->links[0].maddr);
+	i += scnprintf(buf + i, sz - i, "\n");
+	i += tipc_link_dump(n->links[0].link, TIPC_DUMP_NONE, buf + i);
+	i += scnprintf(buf + i, sz - i, " inputq: ");
+	i += tipc_list_dump(&n->links[0].inputq, false, buf + i);
+
+	i += scnprintf(buf + i, sz - i, "link_entry[1]:\n");
+	i += scnprintf(buf + i, sz - i, " mtu: %u\n", n->links[1].mtu);
+	i += scnprintf(buf + i, sz - i, " media: ");
+	i += tipc_media_addr_printf(buf + i, sz - i, &n->links[1].maddr);
+	i += scnprintf(buf + i, sz - i, "\n");
+	i += tipc_link_dump(n->links[1].link, TIPC_DUMP_NONE, buf + i);
+	i += scnprintf(buf + i, sz - i, " inputq: ");
+	i += tipc_list_dump(&n->links[1].inputq, false, buf + i);
+
+	i += scnprintf(buf + i, sz - i, "bclink:\n ");
+	i += tipc_link_dump(n->bc_entry.link, TIPC_DUMP_NONE, buf + i);
+
+	return i;
+}

diff --git a/net/tipc/node.h b/net/tipc/node.h
index 48b3298..291d0ec 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h

@@ -45,24 +45,33 @@
 /* Optional capabilities supported by this code version
  */
 enum {
+	TIPC_SYN_BIT          = (1),
 	TIPC_BCAST_SYNCH      = (1 << 1),
 	TIPC_BCAST_STATE_NACK = (1 << 2),
 	TIPC_BLOCK_FLOWCTL    = (1 << 3),
 	TIPC_BCAST_RCAST      = (1 << 4),
 	TIPC_NODE_ID128       = (1 << 5),
-	TIPC_LINK_PROTO_SEQNO = (1 << 6)
+	TIPC_LINK_PROTO_SEQNO = (1 << 6),
+	TIPC_MCAST_RBCTL      = (1 << 7),
+	TIPC_GAP_ACK_BLOCK    = (1 << 8),
+	TIPC_TUNNEL_ENHANCED  = (1 << 9)
 };
 
-#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH       |  \
-				TIPC_BCAST_STATE_NACK  |  \
-				TIPC_BCAST_RCAST       |  \
-				TIPC_BLOCK_FLOWCTL     |  \
-				TIPC_NODE_ID128        |  \
-				TIPC_LINK_PROTO_SEQNO)
+#define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT           |  \
+				TIPC_BCAST_SYNCH       |   \
+				TIPC_BCAST_STATE_NACK  |   \
+				TIPC_BCAST_RCAST       |   \
+				TIPC_BLOCK_FLOWCTL     |   \
+				TIPC_NODE_ID128        |   \
+				TIPC_LINK_PROTO_SEQNO  |   \
+				TIPC_MCAST_RBCTL       |   \
+				TIPC_GAP_ACK_BLOCK     |   \
+				TIPC_TUNNEL_ENHANCED)
 #define INVALID_BEARER_ID -1
 
 void tipc_node_stop(struct net *net);
 bool tipc_node_get_id(struct net *net, u32 addr, u8 *id);
+u32 tipc_node_get_addr(struct tipc_node *node);
 u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr);
 void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128,
 			  struct tipc_bearer *bearer,

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 366ce0b..4b92b19 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c

@@ -46,8 +46,9 @@
 #include "bcast.h"
 #include "netlink.h"
 #include "group.h"
+#include "trace.h"
 
-#define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */
+#define CONN_TIMEOUT_DEFAULT    8000    /* default connect timeout = 8s */
 #define CONN_PROBING_INTV	msecs_to_jiffies(3600000)  /* [ms] => 1 h */
 #define TIPC_FWD_MSG		1
 #define TIPC_MAX_PORT		0xffffffff
@@ -80,7 +81,6 @@
  * @publications: list of publications for port
  * @blocking_link: address of the congested link we are currently sleeping on
  * @pub_count: total # of publications port has made during its lifetime
- * @probing_state:
  * @conn_timeout: the time we can wait for an unresponded setup request
  * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
  * @cong_link_cnt: number of congested links
@@ -102,8 +102,8 @@
 	struct list_head cong_links;
 	struct list_head publications;
 	u32 pub_count;
-	uint conn_timeout;
 	atomic_t dupl_rcvcnt;
+	u16 conn_timeout;
 	bool probe_unacked;
 	u16 cong_link_cnt;
 	u16 snt_unacked;
@@ -234,6 +234,7 @@
  */
 static void tsk_advance_rx_queue(struct sock *sk)
 {
+	trace_tipc_sk_advance_rx(sk, NULL, TIPC_DUMP_SK_RCVQ, " ");
 	kfree_skb(__skb_dequeue(&sk->sk_receive_queue));
 }
 
@@ -248,6 +249,7 @@
 	if (!tipc_msg_reverse(onode, &skb, err))
 		return;
 
+	trace_tipc_sk_rej_msg(sk, skb, TIPC_DUMP_NONE, "@sk_respond!");
 	dnode = msg_destnode(buf_msg(skb));
 	selector = msg_origport(buf_msg(skb));
 	tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
@@ -377,16 +379,18 @@
 
 #define tipc_wait_for_cond(sock_, timeo_, condition_)			       \
 ({                                                                             \
+	DEFINE_WAIT_FUNC(wait_, woken_wake_function);                          \
 	struct sock *sk_;						       \
 	int rc_;							       \
 									       \
 	while ((rc_ = !(condition_))) {					       \
-		DEFINE_WAIT_FUNC(wait_, woken_wake_function);	               \
+		/* coupled with smp_wmb() in tipc_sk_proto_rcv() */            \
+		smp_rmb();                                                     \
 		sk_ = (sock_)->sk;					       \
 		rc_ = tipc_sk_sock_err((sock_), timeo_);		       \
 		if (rc_)						       \
 			break;						       \
-		prepare_to_wait(sk_sleep(sk_), &wait_, TASK_INTERRUPTIBLE);    \
+		add_wait_queue(sk_sleep(sk_), &wait_);                         \
 		release_sock(sk_);					       \
 		*(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \
 		sched_annotate_sleep();				               \
@@ -482,7 +486,8 @@
 		if (sock->type == SOCK_DGRAM)
 			tsk_set_unreliable(tsk, true);
 	}
-
+	__skb_queue_head_init(&tsk->mc_method.deferredq);
+	trace_tipc_sk_create(sk, NULL, TIPC_DUMP_NONE, " ");
 	return 0;
 }
 
@@ -507,6 +512,9 @@
 	tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt &&
 					    !tsk_conn_cong(tsk)));
 
+	/* Remove any pending SYN message */
+	__skb_queue_purge(&sk->sk_write_queue);
+
 	/* Reject all unreceived messages, except on an active connection
 	 * (which disconnects locally & sends a 'FIN+' to peer).
 	 */
@@ -569,10 +577,12 @@
 	tsk = tipc_sk(sk);
 	lock_sock(sk);
 
+	trace_tipc_sk_release(sk, NULL, TIPC_DUMP_ALL, " ");
 	__tipc_shutdown(sock, TIPC_ERR_NO_PORT);
 	sk->sk_shutdown = SHUTDOWN_MASK;
 	tipc_sk_leave(tsk);
 	tipc_sk_withdraw(tsk, 0, NULL);
+	__skb_queue_purge(&tsk->mc_method.deferredq);
 	sk_stop_timer(sk, &sk->sk_timer);
 	tipc_sk_remove(tsk);
 
@@ -716,6 +726,7 @@
 	__poll_t revents = 0;
 
 	sock_poll_wait(file, sock, wait);
+	trace_tipc_sk_poll(sk, NULL, TIPC_DUMP_ALL, " ");
 
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 		revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
@@ -724,12 +735,12 @@
 
 	switch (sk->sk_state) {
 	case TIPC_ESTABLISHED:
-	case TIPC_CONNECTING:
 		if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
 			revents |= EPOLLOUT;
-		/* fall thru' */
+		/* fall through */
 	case TIPC_LISTEN:
-		if (!skb_queue_empty(&sk->sk_receive_queue))
+	case TIPC_CONNECTING:
+		if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
 			revents |= EPOLLIN | EPOLLRDNORM;
 		break;
 	case TIPC_OPEN:
@@ -737,7 +748,7 @@
 			revents |= EPOLLOUT;
 		if (!tipc_sk_type_connectionless(sk))
 			break;
-		if (skb_queue_empty(&sk->sk_receive_queue))
+		if (skb_queue_empty_lockless(&sk->sk_receive_queue))
 			break;
 		revents |= EPOLLIN | EPOLLRDNORM;
 		break;
@@ -798,13 +809,16 @@
 	msg_set_nameupper(hdr, seq->upper);
 
 	/* Build message as chain of buffers */
-	skb_queue_head_init(&pkts);
+	__skb_queue_head_init(&pkts);
 	rc = tipc_msg_build(hdr, msg, 0, dlen, mtu, &pkts);
 
 	/* Send message if build was successful */
-	if (unlikely(rc == dlen))
+	if (unlikely(rc == dlen)) {
+		trace_tipc_sk_sendmcast(sk, skb_peek(&pkts),
+					TIPC_DUMP_SK_SNDQ, " ");
 		rc = tipc_mcast_xmit(net, &pkts, method, &dsts,
 				     &tsk->cong_link_cnt);
+	}
 
 	tipc_nlist_purge(&dsts);
 
@@ -839,7 +853,7 @@
 	msg_set_grp_bc_seqno(hdr, bc_snd_nxt);
 
 	/* Build message as chain of buffers */
-	skb_queue_head_init(&pkts);
+	__skb_queue_head_init(&pkts);
 	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
 	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
 	if (unlikely(rc != dlen))
@@ -878,7 +892,6 @@
 	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
 	int blks = tsk_blocks(GROUP_H_SIZE + dlen);
 	struct tipc_sock *tsk = tipc_sk(sk);
-	struct tipc_group *grp = tsk->group;
 	struct net *net = sock_net(sk);
 	struct tipc_member *mb = NULL;
 	u32 node, port;
@@ -892,7 +905,9 @@
 	/* Block or return if destination link or member is congested */
 	rc = tipc_wait_for_cond(sock, &timeout,
 				!tipc_dest_find(&tsk->cong_links, node, 0) &&
-				!tipc_group_cong(grp, node, port, blks, &mb));
+				tsk->group &&
+				!tipc_group_cong(tsk->group, node, port, blks,
+						 &mb));
 	if (unlikely(rc))
 		return rc;
 
@@ -922,7 +937,6 @@
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct list_head *cong_links = &tsk->cong_links;
 	int blks = tsk_blocks(GROUP_H_SIZE + dlen);
-	struct tipc_group *grp = tsk->group;
 	struct tipc_msg *hdr = &tsk->phdr;
 	struct tipc_member *first = NULL;
 	struct tipc_member *mbr = NULL;
@@ -939,9 +953,10 @@
 	type = msg_nametype(hdr);
 	inst = dest->addr.name.name.instance;
 	scope = msg_lookup_scope(hdr);
-	exclude = tipc_group_exclude(grp);
 
 	while (++lookups < 4) {
+		exclude = tipc_group_exclude(tsk->group);
+
 		first = NULL;
 
 		/* Look for a non-congested destination member, if any */
@@ -950,7 +965,8 @@
 						 &dstcnt, exclude, false))
 				return -EHOSTUNREACH;
 			tipc_dest_pop(&dsts, &node, &port);
-			cong = tipc_group_cong(grp, node, port, blks, &mbr);
+			cong = tipc_group_cong(tsk->group, node, port, blks,
+					       &mbr);
 			if (!cong)
 				break;
 			if (mbr == first)
@@ -969,7 +985,8 @@
 		/* Block or return if destination link or member is congested */
 		rc = tipc_wait_for_cond(sock, &timeout,
 					!tipc_dest_find(cong_links, node, 0) &&
-					!tipc_group_cong(grp, node, port,
+					tsk->group &&
+					!tipc_group_cong(tsk->group, node, port,
 							 blks, &mbr));
 		if (unlikely(rc))
 			return rc;
@@ -1004,8 +1021,7 @@
 	struct sock *sk = sock->sk;
 	struct net *net = sock_net(sk);
 	struct tipc_sock *tsk = tipc_sk(sk);
-	struct tipc_group *grp = tsk->group;
-	struct tipc_nlist *dsts = tipc_group_dests(grp);
+	struct tipc_nlist *dsts;
 	struct tipc_mc_method *method = &tsk->mc_method;
 	bool ack = method->mandatory && method->rcast;
 	int blks = tsk_blocks(MCAST_H_SIZE + dlen);
@@ -1014,15 +1030,17 @@
 	struct sk_buff_head pkts;
 	int rc = -EHOSTUNREACH;
 
-	if (!dsts->local && !dsts->remote)
-		return -EHOSTUNREACH;
-
 	/* Block or return if any destination link or member is congested */
-	rc = tipc_wait_for_cond(sock, &timeout,	!tsk->cong_link_cnt &&
-				!tipc_group_bc_cong(grp, blks));
+	rc = tipc_wait_for_cond(sock, &timeout,
+				!tsk->cong_link_cnt && tsk->group &&
+				!tipc_group_bc_cong(tsk->group, blks));
 	if (unlikely(rc))
 		return rc;
 
+	dsts = tipc_group_dests(tsk->group);
+	if (!dsts->local && !dsts->remote)
+		return -EHOSTUNREACH;
+
 	/* Complete message header */
 	if (dest) {
 		msg_set_type(hdr, TIPC_GRP_MCAST_MSG);
@@ -1034,13 +1052,13 @@
 	msg_set_hdr_sz(hdr, GROUP_H_SIZE);
 	msg_set_destport(hdr, 0);
 	msg_set_destnode(hdr, 0);
-	msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(grp));
+	msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(tsk->group));
 
 	/* Avoid getting stuck with repeated forced replicasts */
 	msg_set_grp_bc_ack_req(hdr, ack);
 
 	/* Build message as chain of buffers */
-	skb_queue_head_init(&pkts);
+	__skb_queue_head_init(&pkts);
 	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
 	if (unlikely(rc != dlen))
 		return rc;
@@ -1206,8 +1224,10 @@
 	bool conn_cong;
 
 	/* Ignore if connection cannot be validated: */
-	if (!tsk_peer_msg(tsk, hdr))
+	if (!tsk_peer_msg(tsk, hdr)) {
+		trace_tipc_sk_drop_msg(sk, skb, TIPC_DUMP_NONE, "@proto_rcv!");
 		goto exit;
+	}
 
 	if (unlikely(msg_errcode(hdr))) {
 		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
@@ -1314,7 +1334,7 @@
 
 	if (unlikely(!dest)) {
 		dest = &tsk->peer;
-		if (!syn || dest->family != AF_TIPC)
+		if (!syn && dest->family != AF_TIPC)
 			return -EDESTADDRREQ;
 	}
 
@@ -1329,6 +1349,7 @@
 			tsk->conn_type = dest->addr.name.name.type;
 			tsk->conn_instance = dest->addr.name.name.instance;
 		}
+		msg_set_syn(hdr, 1);
 	}
 
 	seq = &dest->addr.nameseq;
@@ -1366,12 +1387,15 @@
 	if (unlikely(rc))
 		return rc;
 
-	skb_queue_head_init(&pkts);
+	__skb_queue_head_init(&pkts);
 	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
 	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
 	if (unlikely(rc != dlen))
 		return rc;
+	if (unlikely(syn && !tipc_msg_skb_clone(&pkts, &sk->sk_write_queue)))
+		return -ENOMEM;
 
+	trace_tipc_sk_sendmsg(sk, skb_peek(&pkts), TIPC_DUMP_SK_SNDQ, " ");
 	rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
 	if (unlikely(rc == -ELINKCONG)) {
 		tipc_dest_push(clinks, dnode, 0);
@@ -1421,7 +1445,7 @@
 	int send, sent = 0;
 	int rc = 0;
 
-	skb_queue_head_init(&pkts);
+	__skb_queue_head_init(&pkts);
 
 	if (unlikely(dlen > INT_MAX))
 		return -EMSGSIZE;
@@ -1449,6 +1473,8 @@
 		if (unlikely(rc != send))
 			break;
 
+		trace_tipc_sk_sendstream(sk, skb_peek(&pkts),
+					 TIPC_DUMP_SK_SNDQ, " ");
 		rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
 		if (unlikely(rc == -ELINKCONG)) {
 			tsk->cong_link_cnt = 1;
@@ -1490,6 +1516,7 @@
 	struct net *net = sock_net(sk);
 	struct tipc_msg *msg = &tsk->phdr;
 
+	msg_set_syn(msg, 0);
 	msg_set_destnode(msg, peer_node);
 	msg_set_destport(msg, peer_port);
 	msg_set_type(msg, TIPC_CONN_MSG);
@@ -1501,6 +1528,7 @@
 	tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
 	tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
 	tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
+	__skb_queue_purge(&sk->sk_write_queue);
 	if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
 		return;
 
@@ -1652,7 +1680,7 @@
 static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
 {
 	struct sock *sk = sock->sk;
-	DEFINE_WAIT(wait);
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 	long timeo = *timeop;
 	int err = sock_error(sk);
 
@@ -1660,15 +1688,17 @@
 		return err;
 
 	for (;;) {
-		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
 			if (sk->sk_shutdown & RCV_SHUTDOWN) {
 				err = -ENOTCONN;
 				break;
 			}
+			add_wait_queue(sk_sleep(sk), &wait);
 			release_sock(sk);
-			timeo = schedule_timeout(timeo);
+			timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
+			sched_annotate_sleep();
 			lock_sock(sk);
+			remove_wait_queue(sk_sleep(sk), &wait);
 		}
 		err = 0;
 		if (!skb_queue_empty(&sk->sk_receive_queue))
@@ -1684,7 +1714,6 @@
 		if (err)
 			break;
 	}
-	finish_wait(sk_sleep(sk), &wait);
 	*timeop = timeo;
 	return err;
 }
@@ -1776,7 +1805,7 @@
 
 	/* Send group flow control advertisement when applicable */
 	if (tsk->group && msg_in_group(hdr) && !grp_evt) {
-		skb_queue_head_init(&xmitq);
+		__skb_queue_head_init(&xmitq);
 		tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen),
 					  msg_orignode(hdr), msg_origport(hdr),
 					  &xmitq);
@@ -1957,6 +1986,8 @@
 		return;
 	case SOCK_WAKEUP:
 		tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0);
+		/* coupled with smp_rmb() in tipc_wait_for_cond() */
+		smp_wmb();
 		tsk->cong_link_cnt--;
 		wakeup = true;
 		break;
@@ -1978,91 +2009,90 @@
 }
 
 /**
- * tipc_filter_connect - Handle incoming message for a connection-based socket
+ * tipc_sk_filter_connect - check incoming message for a connection-based socket
  * @tsk: TIPC socket
- * @skb: pointer to message buffer. Set to NULL if buffer is consumed
- *
- * Returns true if everything ok, false otherwise
+ * @skb: pointer to message buffer.
+ * Returns true if message should be added to receive queue, false otherwise
  */
 static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
 {
 	struct sock *sk = &tsk->sk;
 	struct net *net = sock_net(sk);
 	struct tipc_msg *hdr = buf_msg(skb);
-	u32 pport = msg_origport(hdr);
-	u32 pnode = msg_orignode(hdr);
+	bool con_msg = msg_connected(hdr);
+	u32 pport = tsk_peer_port(tsk);
+	u32 pnode = tsk_peer_node(tsk);
+	u32 oport = msg_origport(hdr);
+	u32 onode = msg_orignode(hdr);
+	int err = msg_errcode(hdr);
+	unsigned long delay;
 
 	if (unlikely(msg_mcast(hdr)))
 		return false;
 
 	switch (sk->sk_state) {
 	case TIPC_CONNECTING:
-		/* Accept only ACK or NACK message */
-		if (unlikely(!msg_connected(hdr))) {
-			if (pport != tsk_peer_port(tsk) ||
-			    pnode != tsk_peer_node(tsk))
-				return false;
-
-			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
-			sk->sk_err = ECONNREFUSED;
+		/* Setup ACK */
+		if (likely(con_msg)) {
+			if (err)
+				break;
+			tipc_sk_finish_conn(tsk, oport, onode);
+			msg_set_importance(&tsk->phdr, msg_importance(hdr));
+			/* ACK+ message with data is added to receive queue */
+			if (msg_data_sz(hdr))
+				return true;
+			/* Empty ACK-, - wake up sleeping connect() and drop */
 			sk->sk_state_change(sk);
-			return true;
+			msg_set_dest_droppable(hdr, 1);
+			return false;
 		}
-
-		if (unlikely(msg_errcode(hdr))) {
-			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
-			sk->sk_err = ECONNREFUSED;
-			sk->sk_state_change(sk);
-			return true;
-		}
-
-		if (unlikely(!msg_isdata(hdr))) {
-			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
-			sk->sk_err = EINVAL;
-			sk->sk_state_change(sk);
-			return true;
-		}
-
-		tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr));
-		msg_set_importance(&tsk->phdr, msg_importance(hdr));
-
-		/* If 'ACK+' message, add to socket receive queue */
-		if (msg_data_sz(hdr))
-			return true;
-
-		/* If empty 'ACK-' message, wake up sleeping connect() */
-		sk->sk_data_ready(sk);
-
-		/* 'ACK-' message is neither accepted nor rejected: */
-		msg_set_dest_droppable(hdr, 1);
-		return false;
-
-	case TIPC_OPEN:
-	case TIPC_DISCONNECTING:
-		break;
-	case TIPC_LISTEN:
-		/* Accept only SYN message */
-		if (!msg_connected(hdr) && !(msg_errcode(hdr)))
-			return true;
-		break;
-	case TIPC_ESTABLISHED:
-		/* Accept only connection-based messages sent by peer */
-		if (unlikely(!tsk_peer_msg(tsk, hdr)))
+		/* Ignore connectionless message if not from listening socket */
+		if (oport != pport || onode != pnode)
 			return false;
 
-		if (unlikely(msg_errcode(hdr))) {
-			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
-			/* Let timer expire on it's own */
-			tipc_node_remove_conn(net, tsk_peer_node(tsk),
-					      tsk->portid);
-			sk->sk_state_change(sk);
-		}
+		/* Rejected SYN */
+		if (err != TIPC_ERR_OVERLOAD)
+			break;
+
+		/* Prepare for new setup attempt if we have a SYN clone */
+		if (skb_queue_empty(&sk->sk_write_queue))
+			break;
+		get_random_bytes(&delay, 2);
+		delay %= (tsk->conn_timeout / 4);
+		delay = msecs_to_jiffies(delay + 100);
+		sk_reset_timer(sk, &sk->sk_timer, jiffies + delay);
+		return false;
+	case TIPC_OPEN:
+	case TIPC_DISCONNECTING:
+		return false;
+	case TIPC_LISTEN:
+		/* Accept only SYN message */
+		if (!msg_is_syn(hdr) &&
+		    tipc_node_get_capabilities(net, onode) & TIPC_SYN_BIT)
+			return false;
+		if (!con_msg && !err)
+			return true;
+		return false;
+	case TIPC_ESTABLISHED:
+		/* Accept only connection-based messages sent by peer */
+		if (likely(con_msg && !err && pport == oport && pnode == onode))
+			return true;
+		if (!tsk_peer_msg(tsk, hdr))
+			return false;
+		if (!err)
+			return true;
+		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+		tipc_node_remove_conn(net, pnode, tsk->portid);
+		sk->sk_state_change(sk);
 		return true;
 	default:
 		pr_err("Unknown sk_state %u\n", sk->sk_state);
 	}
-
-	return false;
+	/* Abort connection setup attempt */
+	tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+	sk->sk_err = ECONNREFUSED;
+	sk->sk_state_change(sk);
+	return true;
 }
 
 /**
@@ -2089,13 +2119,13 @@
 	struct tipc_msg *hdr = buf_msg(skb);
 
 	if (unlikely(msg_in_group(hdr)))
-		return sk->sk_rcvbuf;
+		return READ_ONCE(sk->sk_rcvbuf);
 
 	if (unlikely(!msg_connected(hdr)))
-		return sk->sk_rcvbuf << msg_importance(hdr);
+		return READ_ONCE(sk->sk_rcvbuf) << msg_importance(hdr);
 
 	if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
-		return sk->sk_rcvbuf;
+		return READ_ONCE(sk->sk_rcvbuf);
 
 	return FLOWCTL_MSG_LIM;
 }
@@ -2120,8 +2150,10 @@
 	struct tipc_msg *hdr = buf_msg(skb);
 	struct net *net = sock_net(sk);
 	struct sk_buff_head inputq;
+	int mtyp = msg_type(hdr);
 	int limit, err = TIPC_OK;
 
+	trace_tipc_sk_filter_rcv(sk, skb, TIPC_DUMP_ALL, " ");
 	TIPC_SKB_CB(skb)->bytes_read = 0;
 	__skb_queue_head_init(&inputq);
 	__skb_queue_tail(&inputq, skb);
@@ -2132,6 +2164,9 @@
 	if (unlikely(grp))
 		tipc_group_filter_msg(grp, &inputq, xmitq);
 
+	if (unlikely(!grp) && mtyp == TIPC_MCAST_MSG)
+		tipc_mcast_filter_msg(net, &tsk->mc_method.deferredq, &inputq);
+
 	/* Validate and add to receive buffer if there is space */
 	while ((skb = __skb_dequeue(&inputq))) {
 		hdr = buf_msg(skb);
@@ -2141,17 +2176,25 @@
 		    (!grp && msg_in_group(hdr)))
 			err = TIPC_ERR_NO_PORT;
 		else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) {
+			trace_tipc_sk_dump(sk, skb, TIPC_DUMP_ALL,
+					   "err_overload2!");
 			atomic_inc(&sk->sk_drops);
 			err = TIPC_ERR_OVERLOAD;
 		}
 
 		if (unlikely(err)) {
-			tipc_skb_reject(net, err, skb, xmitq);
+			if (tipc_msg_reverse(tipc_own_addr(net), &skb, err)) {
+				trace_tipc_sk_rej_msg(sk, skb, TIPC_DUMP_NONE,
+						      "@filter_rcv!");
+				__skb_queue_tail(xmitq, skb);
+			}
 			err = TIPC_OK;
 			continue;
 		}
 		__skb_queue_tail(&sk->sk_receive_queue, skb);
 		skb_set_owner_r(skb, sk);
+		trace_tipc_sk_overlimit2(sk, skb, TIPC_DUMP_ALL,
+					 "rcvq >90% allocated!");
 		sk->sk_data_ready(sk);
 	}
 }
@@ -2217,14 +2260,21 @@
 		if (!sk->sk_backlog.len)
 			atomic_set(dcnt, 0);
 		lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt);
-		if (likely(!sk_add_backlog(sk, skb, lim)))
+		if (likely(!sk_add_backlog(sk, skb, lim))) {
+			trace_tipc_sk_overlimit1(sk, skb, TIPC_DUMP_ALL,
+						 "bklg & rcvq >90% allocated!");
 			continue;
+		}
 
+		trace_tipc_sk_dump(sk, skb, TIPC_DUMP_ALL, "err_overload!");
 		/* Overload => reject message back to sender */
 		onode = tipc_own_addr(sock_net(sk));
 		atomic_inc(&sk->sk_drops);
-		if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD))
+		if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD)) {
+			trace_tipc_sk_rej_msg(sk, skb, TIPC_DUMP_ALL,
+					      "@sk_enqueue!");
 			__skb_queue_tail(xmitq, skb);
+		}
 		break;
 	}
 }
@@ -2273,6 +2323,8 @@
 		/* Prepare for message rejection */
 		if (!tipc_msg_reverse(tipc_own_addr(net), &skb, err))
 			continue;
+
+		trace_tipc_sk_rej_msg(NULL, skb, TIPC_DUMP_NONE, "@sk_rcv!");
 xmit:
 		dnode = msg_destnode(buf_msg(skb));
 		tipc_node_xmit_skb(net, skb, dnode, dport);
@@ -2302,6 +2354,16 @@
 	return 0;
 }
 
+static bool tipc_sockaddr_is_sane(struct sockaddr_tipc *addr)
+{
+	if (addr->family != AF_TIPC)
+		return false;
+	if (addr->addrtype == TIPC_SERVICE_RANGE)
+		return (addr->addr.nameseq.lower <= addr->addr.nameseq.upper);
+	return (addr->addrtype == TIPC_SERVICE_ADDR ||
+		addr->addrtype == TIPC_SOCKET_ADDR);
+}
+
 /**
  * tipc_connect - establish a connection to another TIPC port
  * @sock: socket structure
@@ -2337,18 +2399,18 @@
 		if (!tipc_sk_type_connectionless(sk))
 			res = -EINVAL;
 		goto exit;
-	} else if (dst->family != AF_TIPC) {
-		res = -EINVAL;
 	}
-	if (dst->addrtype != TIPC_ADDR_ID && dst->addrtype != TIPC_ADDR_NAME)
+	if (!tipc_sockaddr_is_sane(dst)) {
 		res = -EINVAL;
-	if (res)
 		goto exit;
-
+	}
 	/* DGRAM/RDM connect(), just save the destaddr */
 	if (tipc_sk_type_connectionless(sk)) {
 		memcpy(&tsk->peer, dest, destlen);
 		goto exit;
+	} else if (dst->addrtype == TIPC_SERVICE_RANGE) {
+		res = -EINVAL;
+		goto exit;
 	}
 
 	previous = sk->sk_state;
@@ -2374,7 +2436,7 @@
 		 * case is EINPROGRESS, rather than EALREADY.
 		 */
 		res = -EINPROGRESS;
-		/* fall thru' */
+		/* fall through */
 	case TIPC_CONNECTING:
 		if (!timeout) {
 			if (previous == TIPC_CONNECTING)
@@ -2546,6 +2608,7 @@
 
 	lock_sock(sk);
 
+	trace_tipc_sk_shutdown(sk, NULL, TIPC_DUMP_ALL, " ");
 	__tipc_shutdown(sock, TIPC_CONN_SHUTDOWN);
 	sk->sk_shutdown = SEND_SHUTDOWN;
 
@@ -2564,43 +2627,78 @@
 	return res;
 }
 
+static void tipc_sk_check_probing_state(struct sock *sk,
+					struct sk_buff_head *list)
+{
+	struct tipc_sock *tsk = tipc_sk(sk);
+	u32 pnode = tsk_peer_node(tsk);
+	u32 pport = tsk_peer_port(tsk);
+	u32 self = tsk_own_node(tsk);
+	u32 oport = tsk->portid;
+	struct sk_buff *skb;
+
+	if (tsk->probe_unacked) {
+		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+		sk->sk_err = ECONNABORTED;
+		tipc_node_remove_conn(sock_net(sk), pnode, pport);
+		sk->sk_state_change(sk);
+		return;
+	}
+	/* Prepare new probe */
+	skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0,
+			      pnode, self, pport, oport, TIPC_OK);
+	if (skb)
+		__skb_queue_tail(list, skb);
+	tsk->probe_unacked = true;
+	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
+}
+
+static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list)
+{
+	struct tipc_sock *tsk = tipc_sk(sk);
+
+	/* Try again later if dest link is congested */
+	if (tsk->cong_link_cnt) {
+		sk_reset_timer(sk, &sk->sk_timer, msecs_to_jiffies(100));
+		return;
+	}
+	/* Prepare SYN for retransmit */
+	tipc_msg_skb_clone(&sk->sk_write_queue, list);
+}
+
 static void tipc_sk_timeout(struct timer_list *t)
 {
 	struct sock *sk = from_timer(sk, t, sk_timer);
 	struct tipc_sock *tsk = tipc_sk(sk);
-	u32 peer_port = tsk_peer_port(tsk);
-	u32 peer_node = tsk_peer_node(tsk);
-	u32 own_node = tsk_own_node(tsk);
-	u32 own_port = tsk->portid;
-	struct net *net = sock_net(sk);
-	struct sk_buff *skb = NULL;
+	u32 pnode = tsk_peer_node(tsk);
+	struct sk_buff_head list;
+	int rc = 0;
 
+	__skb_queue_head_init(&list);
 	bh_lock_sock(sk);
-	if (!tipc_sk_connected(sk))
-		goto exit;
 
 	/* Try again later if socket is busy */
 	if (sock_owned_by_user(sk)) {
 		sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20);
-		goto exit;
+		bh_unlock_sock(sk);
+		return;
 	}
 
-	if (tsk->probe_unacked) {
-		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
-		tipc_node_remove_conn(net, peer_node, peer_port);
-		sk->sk_state_change(sk);
-		goto exit;
-	}
-	/* Send new probe */
-	skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0,
-			      peer_node, own_node, peer_port, own_port,
-			      TIPC_OK);
-	tsk->probe_unacked = true;
-	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
-exit:
+	if (sk->sk_state == TIPC_ESTABLISHED)
+		tipc_sk_check_probing_state(sk, &list);
+	else if (sk->sk_state == TIPC_CONNECTING)
+		tipc_sk_retry_connect(sk, &list);
+
 	bh_unlock_sock(sk);
-	if (skb)
-		tipc_node_xmit_skb(net, skb, peer_node, own_port);
+
+	if (!skb_queue_empty(&list))
+		rc = tipc_node_xmit(sock_net(sk), &list, pnode, tsk->portid);
+
+	/* SYN messages may cause link congestion */
+	if (rc == -ELINKCONG) {
+		tipc_dest_push(&tsk->cong_links, pnode, 0);
+		tsk->cong_link_cnt = 1;
+	}
 	sock_put(sk);
 }
 
@@ -2683,11 +2781,15 @@
 		rhashtable_walk_start(&iter);
 
 		while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
-			spin_lock_bh(&tsk->sk.sk_lock.slock);
+			sock_hold(&tsk->sk);
+			rhashtable_walk_stop(&iter);
+			lock_sock(&tsk->sk);
 			msg = &tsk->phdr;
 			msg_set_prevnode(msg, tipc_own_addr(net));
 			msg_set_orignode(msg, tipc_own_addr(net));
-			spin_unlock_bh(&tsk->sk.sk_lock.slock);
+			release_sock(&tsk->sk);
+			rhashtable_walk_start(&iter);
+			sock_put(&tsk->sk);
 		}
 
 		rhashtable_walk_stop(&iter);
@@ -2967,6 +3069,9 @@
 	case TIPC_SOCK_RECVQ_DEPTH:
 		value = skb_queue_len(&sk->sk_receive_queue);
 		break;
+	case TIPC_SOCK_RECVQ_USED:
+		value = sk_rmem_alloc_get(sk);
+		break;
 	case TIPC_GROUP_JOIN:
 		seq.type = 0;
 		if (tsk->group)
@@ -3167,7 +3272,9 @@
 	peer_node = tsk_peer_node(tsk);
 	peer_port = tsk_peer_port(tsk);
 
-	nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON);
+	nest = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_CON);
+	if (!nest)
+		return -EMSGSIZE;
 
 	if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node))
 		goto msg_full;
@@ -3224,7 +3331,7 @@
 	if (!hdr)
 		goto msg_cancel;
 
-	attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
+	attrs = nla_nest_start_noflag(skb, TIPC_NLA_SOCK);
 	if (!attrs)
 		goto genlmsg_cancel;
 
@@ -3329,7 +3436,7 @@
 	if (!(sk_filter_state & (1 << sk->sk_state)))
 		return 0;
 
-	attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
+	attrs = nla_nest_start_noflag(skb, TIPC_NLA_SOCK);
 	if (!attrs)
 		goto msg_cancel;
 
@@ -3347,7 +3454,7 @@
 			      TIPC_NLA_SOCK_PAD))
 		goto attr_msg_cancel;
 
-	stat = nla_nest_start(skb, TIPC_NLA_SOCK_STAT);
+	stat = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_STAT);
 	if (!stat)
 		goto attr_msg_cancel;
 
@@ -3404,7 +3511,7 @@
 	if (!hdr)
 		goto msg_cancel;
 
-	attrs = nla_nest_start(skb, TIPC_NLA_PUBL);
+	attrs = nla_nest_start_noflag(skb, TIPC_NLA_PUBL);
 	if (!attrs)
 		goto genlmsg_cancel;
 
@@ -3491,9 +3598,9 @@
 		if (!attrs[TIPC_NLA_SOCK])
 			return -EINVAL;
 
-		err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX,
-				       attrs[TIPC_NLA_SOCK],
-				       tipc_nl_sock_policy, NULL);
+		err = nla_parse_nested_deprecated(sock, TIPC_NLA_SOCK_MAX,
+						  attrs[TIPC_NLA_SOCK],
+						  tipc_nl_sock_policy, NULL);
 		if (err)
 			return err;
 
@@ -3523,3 +3630,187 @@
 
 	return skb->len;
 }
+
+/**
+ * tipc_sk_filtering - check if a socket should be traced
+ * @sk: the socket to be examined
+ * @sysctl_tipc_sk_filter[]: the socket tuple for filtering,
+ *  (portid, sock type, name type, name lower, name upper)
+ *
+ * Returns true if the socket meets the socket tuple data
+ * (value 0 = 'any') or when there is no tuple set (all = 0),
+ * otherwise false
+ */
+bool tipc_sk_filtering(struct sock *sk)
+{
+	struct tipc_sock *tsk;
+	struct publication *p;
+	u32 _port, _sktype, _type, _lower, _upper;
+	u32 type = 0, lower = 0, upper = 0;
+
+	if (!sk)
+		return true;
+
+	tsk = tipc_sk(sk);
+
+	_port = sysctl_tipc_sk_filter[0];
+	_sktype = sysctl_tipc_sk_filter[1];
+	_type = sysctl_tipc_sk_filter[2];
+	_lower = sysctl_tipc_sk_filter[3];
+	_upper = sysctl_tipc_sk_filter[4];
+
+	if (!_port && !_sktype && !_type && !_lower && !_upper)
+		return true;
+
+	if (_port)
+		return (_port == tsk->portid);
+
+	if (_sktype && _sktype != sk->sk_type)
+		return false;
+
+	if (tsk->published) {
+		p = list_first_entry_or_null(&tsk->publications,
+					     struct publication, binding_sock);
+		if (p) {
+			type = p->type;
+			lower = p->lower;
+			upper = p->upper;
+		}
+	}
+
+	if (!tipc_sk_type_connectionless(sk)) {
+		type = tsk->conn_type;
+		lower = tsk->conn_instance;
+		upper = tsk->conn_instance;
+	}
+
+	if ((_type && _type != type) || (_lower && _lower != lower) ||
+	    (_upper && _upper != upper))
+		return false;
+
+	return true;
+}
+
+u32 tipc_sock_get_portid(struct sock *sk)
+{
+	return (sk) ? (tipc_sk(sk))->portid : 0;
+}
+
+/**
+ * tipc_sk_overlimit1 - check if socket rx queue is about to be overloaded,
+ *			both the rcv and backlog queues are considered
+ * @sk: tipc sk to be checked
+ * @skb: tipc msg to be checked
+ *
+ * Returns true if the socket rx queue allocation is > 90%, otherwise false
+ */
+
+bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb)
+{
+	atomic_t *dcnt = &tipc_sk(sk)->dupl_rcvcnt;
+	unsigned int lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt);
+	unsigned int qsize = sk->sk_backlog.len + sk_rmem_alloc_get(sk);
+
+	return (qsize > lim * 90 / 100);
+}
+
+/**
+ * tipc_sk_overlimit2 - check if socket rx queue is about to be overloaded,
+ *			only the rcv queue is considered
+ * @sk: tipc sk to be checked
+ * @skb: tipc msg to be checked
+ *
+ * Returns true if the socket rx queue allocation is > 90%, otherwise false
+ */
+
+bool tipc_sk_overlimit2(struct sock *sk, struct sk_buff *skb)
+{
+	unsigned int lim = rcvbuf_limit(sk, skb);
+	unsigned int qsize = sk_rmem_alloc_get(sk);
+
+	return (qsize > lim * 90 / 100);
+}
+
+/**
+ * tipc_sk_dump - dump TIPC socket
+ * @sk: tipc sk to be dumped
+ * @dqueues: bitmask to decide if any socket queue to be dumped?
+ *           - TIPC_DUMP_NONE: don't dump socket queues
+ *           - TIPC_DUMP_SK_SNDQ: dump socket send queue
+ *           - TIPC_DUMP_SK_RCVQ: dump socket rcv queue
+ *           - TIPC_DUMP_SK_BKLGQ: dump socket backlog queue
+ *           - TIPC_DUMP_ALL: dump all the socket queues above
+ * @buf: returned buffer of dump data in format
+ */
+int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf)
+{
+	int i = 0;
+	size_t sz = (dqueues) ? SK_LMAX : SK_LMIN;
+	struct tipc_sock *tsk;
+	struct publication *p;
+	bool tsk_connected;
+
+	if (!sk) {
+		i += scnprintf(buf, sz, "sk data: (null)\n");
+		return i;
+	}
+
+	tsk = tipc_sk(sk);
+	tsk_connected = !tipc_sk_type_connectionless(sk);
+
+	i += scnprintf(buf, sz, "sk data: %u", sk->sk_type);
+	i += scnprintf(buf + i, sz - i, " %d", sk->sk_state);
+	i += scnprintf(buf + i, sz - i, " %x", tsk_own_node(tsk));
+	i += scnprintf(buf + i, sz - i, " %u", tsk->portid);
+	i += scnprintf(buf + i, sz - i, " | %u", tsk_connected);
+	if (tsk_connected) {
+		i += scnprintf(buf + i, sz - i, " %x", tsk_peer_node(tsk));
+		i += scnprintf(buf + i, sz - i, " %u", tsk_peer_port(tsk));
+		i += scnprintf(buf + i, sz - i, " %u", tsk->conn_type);
+		i += scnprintf(buf + i, sz - i, " %u", tsk->conn_instance);
+	}
+	i += scnprintf(buf + i, sz - i, " | %u", tsk->published);
+	if (tsk->published) {
+		p = list_first_entry_or_null(&tsk->publications,
+					     struct publication, binding_sock);
+		i += scnprintf(buf + i, sz - i, " %u", (p) ? p->type : 0);
+		i += scnprintf(buf + i, sz - i, " %u", (p) ? p->lower : 0);
+		i += scnprintf(buf + i, sz - i, " %u", (p) ? p->upper : 0);
+	}
+	i += scnprintf(buf + i, sz - i, " | %u", tsk->snd_win);
+	i += scnprintf(buf + i, sz - i, " %u", tsk->rcv_win);
+	i += scnprintf(buf + i, sz - i, " %u", tsk->max_pkt);
+	i += scnprintf(buf + i, sz - i, " %x", tsk->peer_caps);
+	i += scnprintf(buf + i, sz - i, " %u", tsk->cong_link_cnt);
+	i += scnprintf(buf + i, sz - i, " %u", tsk->snt_unacked);
+	i += scnprintf(buf + i, sz - i, " %u", tsk->rcv_unacked);
+	i += scnprintf(buf + i, sz - i, " %u", atomic_read(&tsk->dupl_rcvcnt));
+	i += scnprintf(buf + i, sz - i, " %u", sk->sk_shutdown);
+	i += scnprintf(buf + i, sz - i, " | %d", sk_wmem_alloc_get(sk));
+	i += scnprintf(buf + i, sz - i, " %d", sk->sk_sndbuf);
+	i += scnprintf(buf + i, sz - i, " | %d", sk_rmem_alloc_get(sk));
+	i += scnprintf(buf + i, sz - i, " %d", sk->sk_rcvbuf);
+	i += scnprintf(buf + i, sz - i, " | %d\n", READ_ONCE(sk->sk_backlog.len));
+
+	if (dqueues & TIPC_DUMP_SK_SNDQ) {
+		i += scnprintf(buf + i, sz - i, "sk_write_queue: ");
+		i += tipc_list_dump(&sk->sk_write_queue, false, buf + i);
+	}
+
+	if (dqueues & TIPC_DUMP_SK_RCVQ) {
+		i += scnprintf(buf + i, sz - i, "sk_receive_queue: ");
+		i += tipc_list_dump(&sk->sk_receive_queue, false, buf + i);
+	}
+
+	if (dqueues & TIPC_DUMP_SK_BKLGQ) {
+		i += scnprintf(buf + i, sz - i, "sk_backlog:\n  head ");
+		i += tipc_skb_dump(sk->sk_backlog.head, false, buf + i);
+		if (sk->sk_backlog.tail != sk->sk_backlog.head) {
+			i += scnprintf(buf + i, sz - i, "  tail ");
+			i += tipc_skb_dump(sk->sk_backlog.tail, false,
+					   buf + i);
+		}
+	}
+
+	return i;
+}

diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index 5e575f2..235b967 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h

@@ -71,4 +71,8 @@
 int tipc_dump_start(struct netlink_callback *cb);
 int __tipc_dump_start(struct netlink_callback *cb, struct net *net);
 int tipc_dump_done(struct netlink_callback *cb);
+u32 tipc_sock_get_portid(struct sock *sk);
+bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb);
+bool tipc_sk_overlimit2(struct sock *sk, struct sk_buff *skb);
+
 #endif

diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index d793b43..aa015c2 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h

@@ -77,8 +77,9 @@
 			     u32 found_lower, u32 found_upper,
 			     u32 event, u32 port, u32 node,
 			     u32 scope, int must);
-int tipc_topsrv_start(struct net *net);
-void tipc_topsrv_stop(struct net *net);
+
+int __net_init tipc_topsrv_init_net(struct net *net);
+void __net_exit tipc_topsrv_exit_net(struct net *net);
 
 void tipc_sub_put(struct tipc_subscription *subscription);
 void tipc_sub_get(struct tipc_subscription *subscription);

diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c
index 1a779b1..6159d32 100644
--- a/net/tipc/sysctl.c
+++ b/net/tipc/sysctl.c

@@ -34,6 +34,7 @@
  */
 
 #include "core.h"
+#include "trace.h"
 
 #include <linux/sysctl.h>
 
@@ -45,14 +46,23 @@
 		.data		= &sysctl_tipc_rmem,
 		.maxlen		= sizeof(sysctl_tipc_rmem),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1         = SYSCTL_ONE,
 	},
 	{
 		.procname	= "named_timeout",
 		.data		= &sysctl_tipc_named_timeout,
 		.maxlen		= sizeof(sysctl_tipc_named_timeout),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1         = SYSCTL_ZERO,
+	},
+	{
+		.procname       = "sk_filter",
+		.data           = &sysctl_tipc_sk_filter,
+		.maxlen         = sizeof(sysctl_tipc_sk_filter),
+		.mode           = 0644,
+		.proc_handler   = proc_doulongvec_minmax,
 	},
 	{}
 };

diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index b84c005..3a12fc1 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c

@@ -40,6 +40,7 @@
 #include "socket.h"
 #include "addr.h"
 #include "msg.h"
+#include "bearer.h"
 #include <net/sock.h>
 #include <linux/module.h>
 
@@ -57,16 +58,11 @@
  * @idr_lock: protect the connection identifier set
  * @idr_in_use: amount of allocated identifier entry
  * @net: network namspace instance
- * @rcvbuf_cache: memory cache of server receive buffer
+ * @awork: accept work item
  * @rcv_wq: receive workqueue
  * @send_wq: send workqueue
- * @max_rcvbuf_size: maximum permitted receive message length
- * @tipc_conn_new: callback will be called when new connection is incoming
- * @tipc_conn_release: callback will be called before releasing the connection
- * @tipc_conn_recvmsg: callback will be called when message arrives
+ * @listener: topsrv listener socket
  * @name: server name
- * @imp: message importance
- * @type: socket type
  */
 struct tipc_topsrv {
 	struct idr conn_idr;
@@ -76,7 +72,6 @@
 	struct work_struct awork;
 	struct workqueue_struct *rcv_wq;
 	struct workqueue_struct *send_wq;
-	int max_rcvbuf_size;
 	struct socket *listener;
 	char name[TIPC_SERVER_NAME_LEN];
 };
@@ -90,9 +85,7 @@
  * @server: pointer to connected server
  * @sub_list: lsit to all pertaing subscriptions
  * @sub_lock: lock protecting the subscription list
- * @outqueue_lock: control access to the outqueue
  * @rwork: receive work item
- * @rx_action: what to do when connection socket is active
  * @outqueue: pointer to first outbound message in queue
  * @outqueue_lock: control access to the outqueue
  * @swork: send work item
@@ -371,6 +364,7 @@
 	struct tipc_subscription *sub;
 
 	if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) {
+		s->filter &= __constant_ntohl(~TIPC_SUB_CANCEL);
 		tipc_conn_delete_sub(con, s);
 		return 0;
 	}
@@ -400,11 +394,11 @@
 	iov.iov_base = &s;
 	iov.iov_len = sizeof(s);
 	msg.msg_name = NULL;
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
+	iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, iov.iov_len);
 	ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
 	if (ret == -EWOULDBLOCK)
 		return -EWOULDBLOCK;
-	if (ret > 0) {
+	if (ret == sizeof(s)) {
 		read_lock_bh(&sk->sk_callback_lock);
 		ret = tipc_conn_rcv_sub(srv, con, &s);
 		read_unlock_bh(&sk->sk_callback_lock);
@@ -483,7 +477,7 @@
 	}
 }
 
-/* tipc_toprsv_listener_data_ready - interrupt callback with connection request
+/* tipc_topsrv_listener_data_ready - interrupt callback with connection request
  * The queued job is launched into tipc_topsrv_accept()
  */
 static void tipc_topsrv_listener_data_ready(struct sock *sk)
@@ -615,6 +609,7 @@
 	memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
 	skb_queue_head_init(&evtq);
 	__skb_queue_tail(&evtq, skb);
+	tipc_loopback_trace(net, &evtq);
 	tipc_sk_rcv(net, &evtq);
 }
 
@@ -642,7 +637,7 @@
 	destroy_workqueue(s->send_wq);
 }
 
-int tipc_topsrv_start(struct net *net)
+static int tipc_topsrv_start(struct net *net)
 {
 	struct tipc_net *tn = tipc_net(net);
 	const char name[] = "topology_server";
@@ -654,7 +649,6 @@
 		return -ENOMEM;
 
 	srv->net = net;
-	srv->max_rcvbuf_size = sizeof(struct tipc_subscr);
 	INIT_WORK(&srv->awork, tipc_topsrv_accept);
 
 	strscpy(srv->name, name, sizeof(srv->name));
@@ -676,7 +670,7 @@
 	return ret;
 }
 
-void tipc_topsrv_stop(struct net *net)
+static void tipc_topsrv_stop(struct net *net)
 {
 	struct tipc_topsrv *srv = tipc_topsrv(net);
 	struct socket *lsock = srv->listener;
@@ -701,3 +695,13 @@
 	idr_destroy(&srv->conn_idr);
 	kfree(srv);
 }
+
+int __net_init tipc_topsrv_init_net(struct net *net)
+{
+	return tipc_topsrv_start(net);
+}
+
+void __net_exit tipc_topsrv_exit_net(struct net *net)
+{
+	tipc_topsrv_stop(net);
+}

diff --git a/net/tipc/trace.c b/net/tipc/trace.c
new file mode 100644
index 0000000..265f6a2
--- /dev/null
+++ b/net/tipc/trace.c

@@ -0,0 +1,206 @@
+/*
+ * net/tipc/trace.c: TIPC tracepoints code
+ *
+ * Copyright (c) 2018, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "ASIS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+/**
+ * socket tuples for filtering in socket traces:
+ * (portid, sock type, name type, name lower, name upper)
+ */
+unsigned long sysctl_tipc_sk_filter[5] __read_mostly = {0, };
+
+/**
+ * tipc_skb_dump - dump TIPC skb data
+ * @skb: skb to be dumped
+ * @more: dump more?
+ *        - false: dump only tipc msg data
+ *        - true: dump kernel-related skb data and tipc cb[] array as well
+ * @buf: returned buffer of dump data in format
+ */
+int tipc_skb_dump(struct sk_buff *skb, bool more, char *buf)
+{
+	int i = 0;
+	size_t sz = (more) ? SKB_LMAX : SKB_LMIN;
+	struct tipc_msg *hdr;
+	struct tipc_skb_cb *skbcb;
+
+	if (!skb) {
+		i += scnprintf(buf, sz, "msg: (null)\n");
+		return i;
+	}
+
+	hdr = buf_msg(skb);
+	skbcb = TIPC_SKB_CB(skb);
+
+	/* tipc msg data section */
+	i += scnprintf(buf, sz, "msg: %u", msg_user(hdr));
+	i += scnprintf(buf + i, sz - i, " %u", msg_type(hdr));
+	i += scnprintf(buf + i, sz - i, " %u", msg_hdr_sz(hdr));
+	i += scnprintf(buf + i, sz - i, " %u", msg_data_sz(hdr));
+	i += scnprintf(buf + i, sz - i, " %x", msg_orignode(hdr));
+	i += scnprintf(buf + i, sz - i, " %x", msg_destnode(hdr));
+	i += scnprintf(buf + i, sz - i, " %u", msg_seqno(hdr));
+	i += scnprintf(buf + i, sz - i, " %u", msg_ack(hdr));
+	i += scnprintf(buf + i, sz - i, " %u", msg_bcast_ack(hdr));
+	switch (msg_user(hdr)) {
+	case LINK_PROTOCOL:
+		i += scnprintf(buf + i, sz - i, " %c", msg_net_plane(hdr));
+		i += scnprintf(buf + i, sz - i, " %u", msg_probe(hdr));
+		i += scnprintf(buf + i, sz - i, " %u", msg_peer_stopping(hdr));
+		i += scnprintf(buf + i, sz - i, " %u", msg_session(hdr));
+		i += scnprintf(buf + i, sz - i, " %u", msg_next_sent(hdr));
+		i += scnprintf(buf + i, sz - i, " %u", msg_seq_gap(hdr));
+		i += scnprintf(buf + i, sz - i, " %u", msg_bc_snd_nxt(hdr));
+		i += scnprintf(buf + i, sz - i, " %u", msg_bc_gap(hdr));
+		break;
+	case TIPC_LOW_IMPORTANCE:
+	case TIPC_MEDIUM_IMPORTANCE:
+	case TIPC_HIGH_IMPORTANCE:
+	case TIPC_CRITICAL_IMPORTANCE:
+	case CONN_MANAGER:
+	case SOCK_WAKEUP:
+		i += scnprintf(buf + i, sz - i, " | %u", msg_origport(hdr));
+		i += scnprintf(buf + i, sz - i, " %u", msg_destport(hdr));
+		switch (msg_type(hdr)) {
+		case TIPC_NAMED_MSG:
+			i += scnprintf(buf + i, sz - i, " %u",
+				       msg_nametype(hdr));
+			i += scnprintf(buf + i, sz - i, " %u",
+				       msg_nameinst(hdr));
+			break;
+		case TIPC_MCAST_MSG:
+			i += scnprintf(buf + i, sz - i, " %u",
+				       msg_nametype(hdr));
+			i += scnprintf(buf + i, sz - i, " %u",
+				       msg_namelower(hdr));
+			i += scnprintf(buf + i, sz - i, " %u",
+				       msg_nameupper(hdr));
+			break;
+		default:
+			break;
+		}
+		i += scnprintf(buf + i, sz - i, " | %u",
+			       msg_src_droppable(hdr));
+		i += scnprintf(buf + i, sz - i, " %u",
+			       msg_dest_droppable(hdr));
+		i += scnprintf(buf + i, sz - i, " %u", msg_errcode(hdr));
+		i += scnprintf(buf + i, sz - i, " %u", msg_reroute_cnt(hdr));
+		break;
+	default:
+		/* need more? */
+		break;
+	}
+
+	i += scnprintf(buf + i, sz - i, "\n");
+	if (!more)
+		return i;
+
+	/* kernel-related skb data section */
+	i += scnprintf(buf + i, sz - i, "skb: %s",
+		       (skb->dev) ? skb->dev->name : "n/a");
+	i += scnprintf(buf + i, sz - i, " %u", skb->len);
+	i += scnprintf(buf + i, sz - i, " %u", skb->data_len);
+	i += scnprintf(buf + i, sz - i, " %u", skb->hdr_len);
+	i += scnprintf(buf + i, sz - i, " %u", skb->truesize);
+	i += scnprintf(buf + i, sz - i, " %u", skb_cloned(skb));
+	i += scnprintf(buf + i, sz - i, " %p", skb->sk);
+	i += scnprintf(buf + i, sz - i, " %u", skb_shinfo(skb)->nr_frags);
+	i += scnprintf(buf + i, sz - i, " %llx",
+		       ktime_to_ms(skb_get_ktime(skb)));
+	i += scnprintf(buf + i, sz - i, " %llx\n",
+		       ktime_to_ms(skb_hwtstamps(skb)->hwtstamp));
+
+	/* tipc skb cb[] data section */
+	i += scnprintf(buf + i, sz - i, "cb[]: %u", skbcb->bytes_read);
+	i += scnprintf(buf + i, sz - i, " %u", skbcb->orig_member);
+	i += scnprintf(buf + i, sz - i, " %u",
+		       jiffies_to_msecs(skbcb->nxt_retr));
+	i += scnprintf(buf + i, sz - i, " %u", skbcb->validated);
+	i += scnprintf(buf + i, sz - i, " %u", skbcb->chain_imp);
+	i += scnprintf(buf + i, sz - i, " %u\n", skbcb->ackers);
+
+	return i;
+}
+
+/**
+ * tipc_list_dump - dump TIPC skb list/queue
+ * @list: list of skbs to be dumped
+ * @more: dump more?
+ *        - false: dump only the head & tail skbs
+ *        - true: dump the first & last 5 skbs
+ * @buf: returned buffer of dump data in format
+ */
+int tipc_list_dump(struct sk_buff_head *list, bool more, char *buf)
+{
+	int i = 0;
+	size_t sz = (more) ? LIST_LMAX : LIST_LMIN;
+	u32 count, len;
+	struct sk_buff *hskb, *tskb, *skb, *tmp;
+
+	if (!list) {
+		i += scnprintf(buf, sz, "(null)\n");
+		return i;
+	}
+
+	len = skb_queue_len(list);
+	i += scnprintf(buf, sz, "len = %d\n", len);
+
+	if (!len)
+		return i;
+
+	if (!more) {
+		hskb = skb_peek(list);
+		i += scnprintf(buf + i, sz - i, "  head ");
+		i += tipc_skb_dump(hskb, false, buf + i);
+		if (len > 1) {
+			tskb = skb_peek_tail(list);
+			i += scnprintf(buf + i, sz - i, "  tail ");
+			i += tipc_skb_dump(tskb, false, buf + i);
+		}
+	} else {
+		count = 0;
+		skb_queue_walk_safe(list, skb, tmp) {
+			count++;
+			if (count == 6)
+				i += scnprintf(buf + i, sz - i, "  .\n  .\n");
+			if (count > 5 && count <= len - 5)
+				continue;
+			i += scnprintf(buf + i, sz - i, "  #%d ", count);
+			i += tipc_skb_dump(skb, false, buf + i);
+		}
+	}
+	return i;
+}

diff --git a/net/tipc/trace.h b/net/tipc/trace.h
new file mode 100644
index 0000000..4d8e004
--- /dev/null
+++ b/net/tipc/trace.h

@@ -0,0 +1,431 @@
+/*
+ * net/tipc/trace.h: TIPC tracepoints
+ *
+ * Copyright (c) 2018, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "ASIS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM tipc
+
+#if !defined(_TIPC_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TIPC_TRACE_H
+
+#include <linux/tracepoint.h>
+#include "core.h"
+#include "link.h"
+#include "socket.h"
+#include "node.h"
+
+#define SKB_LMIN	(100)
+#define SKB_LMAX	(SKB_LMIN * 2)
+#define LIST_LMIN	(SKB_LMIN * 3)
+#define LIST_LMAX	(SKB_LMIN * 11)
+#define SK_LMIN		(SKB_LMIN * 2)
+#define SK_LMAX		(SKB_LMIN * 11)
+#define LINK_LMIN	(SKB_LMIN)
+#define LINK_LMAX	(SKB_LMIN * 16)
+#define NODE_LMIN	(SKB_LMIN)
+#define NODE_LMAX	(SKB_LMIN * 11)
+
+#ifndef __TIPC_TRACE_ENUM
+#define __TIPC_TRACE_ENUM
+enum {
+	TIPC_DUMP_NONE		= 0,
+
+	TIPC_DUMP_TRANSMQ	= 1,
+	TIPC_DUMP_BACKLOGQ	= (1 << 1),
+	TIPC_DUMP_DEFERDQ	= (1 << 2),
+	TIPC_DUMP_INPUTQ	= (1 << 3),
+	TIPC_DUMP_WAKEUP        = (1 << 4),
+
+	TIPC_DUMP_SK_SNDQ	= (1 << 8),
+	TIPC_DUMP_SK_RCVQ	= (1 << 9),
+	TIPC_DUMP_SK_BKLGQ	= (1 << 10),
+	TIPC_DUMP_ALL		= 0xffffu
+};
+#endif
+
+/* Link & Node FSM states: */
+#define state_sym(val)							  \
+	__print_symbolic(val,						  \
+			{(0xe),		"ESTABLISHED"			},\
+			{(0xe << 4),	"ESTABLISHING"			},\
+			{(0x1 << 8),	"RESET"				},\
+			{(0x2 << 12),	"RESETTING"			},\
+			{(0xd << 16),	"PEER_RESET"			},\
+			{(0xf << 20),	"FAILINGOVER"			},\
+			{(0xc << 24),	"SYNCHING"			},\
+			{(0xdd),	"SELF_DOWN_PEER_DOWN"		},\
+			{(0xaa),	"SELF_UP_PEER_UP"		},\
+			{(0xd1),	"SELF_DOWN_PEER_LEAVING"	},\
+			{(0xac),	"SELF_UP_PEER_COMING"		},\
+			{(0xca),	"SELF_COMING_PEER_UP"		},\
+			{(0x1d),	"SELF_LEAVING_PEER_DOWN"	},\
+			{(0xf0),	"FAILINGOVER"			},\
+			{(0xcc),	"SYNCHING"			})
+
+/* Link & Node FSM events: */
+#define evt_sym(val)							  \
+	__print_symbolic(val,						  \
+			{(0xec1ab1e),	"ESTABLISH_EVT"			},\
+			{(0x9eed0e),	"PEER_RESET_EVT"		},\
+			{(0xfa110e),	"FAILURE_EVT"			},\
+			{(0x10ca1d0e),	"RESET_EVT"			},\
+			{(0xfa110bee),	"FAILOVER_BEGIN_EVT"		},\
+			{(0xfa110ede),	"FAILOVER_END_EVT"		},\
+			{(0xc1ccbee),	"SYNCH_BEGIN_EVT"		},\
+			{(0xc1ccede),	"SYNCH_END_EVT"			},\
+			{(0xece),	"SELF_ESTABL_CONTACT_EVT"	},\
+			{(0x1ce),	"SELF_LOST_CONTACT_EVT"		},\
+			{(0x9ece),	"PEER_ESTABL_CONTACT_EVT"	},\
+			{(0x91ce),	"PEER_LOST_CONTACT_EVT"		},\
+			{(0xfbe),	"FAILOVER_BEGIN_EVT"		},\
+			{(0xfee),	"FAILOVER_END_EVT"		},\
+			{(0xcbe),	"SYNCH_BEGIN_EVT"		},\
+			{(0xcee),	"SYNCH_END_EVT"			})
+
+/* Bearer, net device events: */
+#define dev_evt_sym(val)						  \
+	__print_symbolic(val,						  \
+			{(NETDEV_CHANGE),	"NETDEV_CHANGE"		},\
+			{(NETDEV_GOING_DOWN),	"NETDEV_GOING_DOWN"	},\
+			{(NETDEV_UP),		"NETDEV_UP"		},\
+			{(NETDEV_CHANGEMTU),	"NETDEV_CHANGEMTU"	},\
+			{(NETDEV_CHANGEADDR),	"NETDEV_CHANGEADDR"	},\
+			{(NETDEV_UNREGISTER),	"NETDEV_UNREGISTER"	},\
+			{(NETDEV_CHANGENAME),	"NETDEV_CHANGENAME"	})
+
+extern unsigned long sysctl_tipc_sk_filter[5] __read_mostly;
+
+int tipc_skb_dump(struct sk_buff *skb, bool more, char *buf);
+int tipc_list_dump(struct sk_buff_head *list, bool more, char *buf);
+int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf);
+int tipc_link_dump(struct tipc_link *l, u16 dqueues, char *buf);
+int tipc_node_dump(struct tipc_node *n, bool more, char *buf);
+bool tipc_sk_filtering(struct sock *sk);
+
+DECLARE_EVENT_CLASS(tipc_skb_class,
+
+	TP_PROTO(struct sk_buff *skb, bool more, const char *header),
+
+	TP_ARGS(skb, more, header),
+
+	TP_STRUCT__entry(
+		__string(header, header)
+		__dynamic_array(char, buf, (more) ? SKB_LMAX : SKB_LMIN)
+	),
+
+	TP_fast_assign(
+		__assign_str(header, header);
+		tipc_skb_dump(skb, more, __get_str(buf));
+	),
+
+	TP_printk("%s\n%s", __get_str(header), __get_str(buf))
+)
+
+#define DEFINE_SKB_EVENT(name) \
+DEFINE_EVENT(tipc_skb_class, name, \
+	TP_PROTO(struct sk_buff *skb, bool more, const char *header), \
+	TP_ARGS(skb, more, header))
+DEFINE_SKB_EVENT(tipc_skb_dump);
+DEFINE_SKB_EVENT(tipc_proto_build);
+DEFINE_SKB_EVENT(tipc_proto_rcv);
+
+DECLARE_EVENT_CLASS(tipc_list_class,
+
+	TP_PROTO(struct sk_buff_head *list, bool more, const char *header),
+
+	TP_ARGS(list, more, header),
+
+	TP_STRUCT__entry(
+		__string(header, header)
+		__dynamic_array(char, buf, (more) ? LIST_LMAX : LIST_LMIN)
+	),
+
+	TP_fast_assign(
+		__assign_str(header, header);
+		tipc_list_dump(list, more, __get_str(buf));
+	),
+
+	TP_printk("%s\n%s", __get_str(header), __get_str(buf))
+);
+
+#define DEFINE_LIST_EVENT(name) \
+DEFINE_EVENT(tipc_list_class, name, \
+	TP_PROTO(struct sk_buff_head *list, bool more, const char *header), \
+	TP_ARGS(list, more, header))
+DEFINE_LIST_EVENT(tipc_list_dump);
+
+DECLARE_EVENT_CLASS(tipc_sk_class,
+
+	TP_PROTO(struct sock *sk, struct sk_buff *skb, u16 dqueues,
+		 const char *header),
+
+	TP_ARGS(sk, skb, dqueues, header),
+
+	TP_STRUCT__entry(
+		__string(header, header)
+		__field(u32, portid)
+		__dynamic_array(char, buf, (dqueues) ? SK_LMAX : SK_LMIN)
+		__dynamic_array(char, skb_buf, (skb) ? SKB_LMIN : 1)
+	),
+
+	TP_fast_assign(
+		__assign_str(header, header);
+		__entry->portid = tipc_sock_get_portid(sk);
+		tipc_sk_dump(sk, dqueues, __get_str(buf));
+		if (skb)
+			tipc_skb_dump(skb, false, __get_str(skb_buf));
+		else
+			*(__get_str(skb_buf)) = '\0';
+	),
+
+	TP_printk("<%u> %s\n%s%s", __entry->portid, __get_str(header),
+		  __get_str(skb_buf), __get_str(buf))
+);
+
+#define DEFINE_SK_EVENT_FILTER(name) \
+DEFINE_EVENT_CONDITION(tipc_sk_class, name, \
+	TP_PROTO(struct sock *sk, struct sk_buff *skb, u16 dqueues, \
+		 const char *header), \
+	TP_ARGS(sk, skb, dqueues, header), \
+	TP_CONDITION(tipc_sk_filtering(sk)))
+DEFINE_SK_EVENT_FILTER(tipc_sk_dump);
+DEFINE_SK_EVENT_FILTER(tipc_sk_create);
+DEFINE_SK_EVENT_FILTER(tipc_sk_sendmcast);
+DEFINE_SK_EVENT_FILTER(tipc_sk_sendmsg);
+DEFINE_SK_EVENT_FILTER(tipc_sk_sendstream);
+DEFINE_SK_EVENT_FILTER(tipc_sk_poll);
+DEFINE_SK_EVENT_FILTER(tipc_sk_filter_rcv);
+DEFINE_SK_EVENT_FILTER(tipc_sk_advance_rx);
+DEFINE_SK_EVENT_FILTER(tipc_sk_rej_msg);
+DEFINE_SK_EVENT_FILTER(tipc_sk_drop_msg);
+DEFINE_SK_EVENT_FILTER(tipc_sk_release);
+DEFINE_SK_EVENT_FILTER(tipc_sk_shutdown);
+
+#define DEFINE_SK_EVENT_FILTER_COND(name, cond) \
+DEFINE_EVENT_CONDITION(tipc_sk_class, name, \
+	TP_PROTO(struct sock *sk, struct sk_buff *skb, u16 dqueues, \
+		 const char *header), \
+	TP_ARGS(sk, skb, dqueues, header), \
+	TP_CONDITION(tipc_sk_filtering(sk) && (cond)))
+DEFINE_SK_EVENT_FILTER_COND(tipc_sk_overlimit1, tipc_sk_overlimit1(sk, skb));
+DEFINE_SK_EVENT_FILTER_COND(tipc_sk_overlimit2, tipc_sk_overlimit2(sk, skb));
+
+DECLARE_EVENT_CLASS(tipc_link_class,
+
+	TP_PROTO(struct tipc_link *l, u16 dqueues, const char *header),
+
+	TP_ARGS(l, dqueues, header),
+
+	TP_STRUCT__entry(
+		__string(header, header)
+		__array(char, name, TIPC_MAX_LINK_NAME)
+		__dynamic_array(char, buf, (dqueues) ? LINK_LMAX : LINK_LMIN)
+	),
+
+	TP_fast_assign(
+		__assign_str(header, header);
+		tipc_link_name_ext(l, __entry->name);
+		tipc_link_dump(l, dqueues, __get_str(buf));
+	),
+
+	TP_printk("<%s> %s\n%s", __entry->name, __get_str(header),
+		  __get_str(buf))
+);
+
+#define DEFINE_LINK_EVENT(name) \
+DEFINE_EVENT(tipc_link_class, name, \
+	TP_PROTO(struct tipc_link *l, u16 dqueues, const char *header), \
+	TP_ARGS(l, dqueues, header))
+DEFINE_LINK_EVENT(tipc_link_dump);
+DEFINE_LINK_EVENT(tipc_link_conges);
+DEFINE_LINK_EVENT(tipc_link_timeout);
+DEFINE_LINK_EVENT(tipc_link_reset);
+
+#define DEFINE_LINK_EVENT_COND(name, cond) \
+DEFINE_EVENT_CONDITION(tipc_link_class, name, \
+	TP_PROTO(struct tipc_link *l, u16 dqueues, const char *header), \
+	TP_ARGS(l, dqueues, header), \
+	TP_CONDITION(cond))
+DEFINE_LINK_EVENT_COND(tipc_link_too_silent, tipc_link_too_silent(l));
+
+DECLARE_EVENT_CLASS(tipc_link_transmq_class,
+
+	TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq),
+
+	TP_ARGS(r, f, t, tq),
+
+	TP_STRUCT__entry(
+		__array(char, name, TIPC_MAX_LINK_NAME)
+		__field(u16, from)
+		__field(u16, to)
+		__field(u32, len)
+		__field(u16, fseqno)
+		__field(u16, lseqno)
+	),
+
+	TP_fast_assign(
+		tipc_link_name_ext(r, __entry->name);
+		__entry->from = f;
+		__entry->to = t;
+		__entry->len = skb_queue_len(tq);
+		__entry->fseqno = msg_seqno(buf_msg(skb_peek(tq)));
+		__entry->lseqno = msg_seqno(buf_msg(skb_peek_tail(tq)));
+	),
+
+	TP_printk("<%s> retrans req: [%u-%u] transmq: %u [%u-%u]\n",
+		  __entry->name, __entry->from, __entry->to,
+		  __entry->len, __entry->fseqno, __entry->lseqno)
+);
+
+DEFINE_EVENT(tipc_link_transmq_class, tipc_link_retrans,
+	TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq),
+	TP_ARGS(r, f, t, tq)
+);
+
+DEFINE_EVENT_PRINT(tipc_link_transmq_class, tipc_link_bc_ack,
+	TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq),
+	TP_ARGS(r, f, t, tq),
+	TP_printk("<%s> acked: [%u-%u] transmq: %u [%u-%u]\n",
+		  __entry->name, __entry->from, __entry->to,
+		  __entry->len, __entry->fseqno, __entry->lseqno)
+);
+
+DECLARE_EVENT_CLASS(tipc_node_class,
+
+	TP_PROTO(struct tipc_node *n, bool more, const char *header),
+
+	TP_ARGS(n, more, header),
+
+	TP_STRUCT__entry(
+		__string(header, header)
+		__field(u32, addr)
+		__dynamic_array(char, buf, (more) ? NODE_LMAX : NODE_LMIN)
+	),
+
+	TP_fast_assign(
+		__assign_str(header, header);
+		__entry->addr = tipc_node_get_addr(n);
+		tipc_node_dump(n, more, __get_str(buf));
+	),
+
+	TP_printk("<%x> %s\n%s", __entry->addr, __get_str(header),
+		  __get_str(buf))
+);
+
+#define DEFINE_NODE_EVENT(name) \
+DEFINE_EVENT(tipc_node_class, name, \
+	TP_PROTO(struct tipc_node *n, bool more, const char *header), \
+	TP_ARGS(n, more, header))
+DEFINE_NODE_EVENT(tipc_node_dump);
+DEFINE_NODE_EVENT(tipc_node_create);
+DEFINE_NODE_EVENT(tipc_node_delete);
+DEFINE_NODE_EVENT(tipc_node_lost_contact);
+DEFINE_NODE_EVENT(tipc_node_timeout);
+DEFINE_NODE_EVENT(tipc_node_link_up);
+DEFINE_NODE_EVENT(tipc_node_link_down);
+DEFINE_NODE_EVENT(tipc_node_reset_links);
+DEFINE_NODE_EVENT(tipc_node_check_state);
+
+DECLARE_EVENT_CLASS(tipc_fsm_class,
+
+	TP_PROTO(const char *name, u32 os, u32 ns, int evt),
+
+	TP_ARGS(name, os, ns, evt),
+
+	TP_STRUCT__entry(
+		__string(name, name)
+		__field(u32, os)
+		__field(u32, ns)
+		__field(u32, evt)
+	),
+
+	TP_fast_assign(
+		__assign_str(name, name);
+		__entry->os = os;
+		__entry->ns = ns;
+		__entry->evt = evt;
+	),
+
+	TP_printk("<%s> %s--(%s)->%s\n", __get_str(name),
+		  state_sym(__entry->os), evt_sym(__entry->evt),
+		  state_sym(__entry->ns))
+);
+
+#define DEFINE_FSM_EVENT(fsm_name) \
+DEFINE_EVENT(tipc_fsm_class, fsm_name, \
+	TP_PROTO(const char *name, u32 os, u32 ns, int evt), \
+	TP_ARGS(name, os, ns, evt))
+DEFINE_FSM_EVENT(tipc_link_fsm);
+DEFINE_FSM_EVENT(tipc_node_fsm);
+
+TRACE_EVENT(tipc_l2_device_event,
+
+	TP_PROTO(struct net_device *dev, struct tipc_bearer *b,
+		 unsigned long evt),
+
+	TP_ARGS(dev, b, evt),
+
+	TP_STRUCT__entry(
+		__string(dev_name, dev->name)
+		__string(b_name, b->name)
+		__field(unsigned long, evt)
+		__field(u8, b_up)
+		__field(u8, carrier)
+		__field(u8, oper)
+	),
+
+	TP_fast_assign(
+		__assign_str(dev_name, dev->name);
+		__assign_str(b_name, b->name);
+		__entry->evt = evt;
+		__entry->b_up = test_bit(0, &b->up);
+		__entry->carrier = netif_carrier_ok(dev);
+		__entry->oper = netif_oper_up(dev);
+	),
+
+	TP_printk("%s on: <%s>/<%s> oper: %s carrier: %s bearer: %s\n",
+		  dev_evt_sym(__entry->evt), __get_str(dev_name),
+		  __get_str(b_name), (__entry->oper) ? "up" : "down",
+		  (__entry->carrier) ? "ok" : "notok",
+		  (__entry->b_up) ? "up" : "down")
+);
+
+#endif /* _TIPC_TRACE_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>

diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 9783101..287df68 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c

@@ -44,7 +44,7 @@
 #include <net/sock.h>
 #include <net/ip.h>
 #include <net/udp_tunnel.h>
-#include <net/addrconf.h>
+#include <net/ipv6_stubs.h>
 #include <linux/tipc_netlink.h>
 #include "core.h"
 #include "addr.h"
@@ -76,6 +76,7 @@
 /* struct udp_replicast - container for UDP remote addresses */
 struct udp_replicast {
 	struct udp_media_addr addr;
+	struct dst_cache dst_cache;
 	struct rcu_head rcu;
 	struct list_head list;
 };
@@ -158,47 +159,52 @@
 /* tipc_send_msg - enqueue a send request */
 static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
 			 struct udp_bearer *ub, struct udp_media_addr *src,
-			 struct udp_media_addr *dst)
+			 struct udp_media_addr *dst, struct dst_cache *cache)
 {
+	struct dst_entry *ndst = dst_cache_get(cache);
 	int ttl, err = 0;
-	struct rtable *rt;
 
 	if (dst->proto == htons(ETH_P_IP)) {
-		struct flowi4 fl = {
-			.daddr = dst->ipv4.s_addr,
-			.saddr = src->ipv4.s_addr,
-			.flowi4_mark = skb->mark,
-			.flowi4_proto = IPPROTO_UDP
-		};
-		rt = ip_route_output_key(net, &fl);
-		if (IS_ERR(rt)) {
-			err = PTR_ERR(rt);
-			goto tx_error;
+		struct rtable *rt = (struct rtable *)ndst;
+
+		if (!rt) {
+			struct flowi4 fl = {
+				.daddr = dst->ipv4.s_addr,
+				.saddr = src->ipv4.s_addr,
+				.flowi4_mark = skb->mark,
+				.flowi4_proto = IPPROTO_UDP
+			};
+			rt = ip_route_output_key(net, &fl);
+			if (IS_ERR(rt)) {
+				err = PTR_ERR(rt);
+				goto tx_error;
+			}
+			dst_cache_set_ip4(cache, &rt->dst, fl.saddr);
 		}
 
-		skb->dev = rt->dst.dev;
 		ttl = ip4_dst_hoplimit(&rt->dst);
 		udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr,
 				    dst->ipv4.s_addr, 0, ttl, 0, src->port,
 				    dst->port, false, true);
 #if IS_ENABLED(CONFIG_IPV6)
 	} else {
-		struct dst_entry *ndst;
-		struct flowi6 fl6 = {
-			.flowi6_oif = ub->ifindex,
-			.daddr = dst->ipv6,
-			.saddr = src->ipv6,
-			.flowi6_proto = IPPROTO_UDP
-		};
-		err = ipv6_stub->ipv6_dst_lookup(net, ub->ubsock->sk, &ndst,
-						 &fl6);
-		if (err)
-			goto tx_error;
+		if (!ndst) {
+			struct flowi6 fl6 = {
+				.flowi6_oif = ub->ifindex,
+				.daddr = dst->ipv6,
+				.saddr = src->ipv6,
+				.flowi6_proto = IPPROTO_UDP
+			};
+			err = ipv6_stub->ipv6_dst_lookup(net, ub->ubsock->sk,
+							 &ndst, &fl6);
+			if (err)
+				goto tx_error;
+			dst_cache_set_ip6(cache, ndst, &fl6.saddr);
+		}
 		ttl = ip6_dst_hoplimit(ndst);
-		err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb,
-					   ndst->dev, &src->ipv6,
-					   &dst->ipv6, 0, ttl, 0, src->port,
-					   dst->port, false);
+		err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, NULL,
+					   &src->ipv6, &dst->ipv6, 0, ttl, 0,
+					   src->port, dst->port, false);
 #endif
 	}
 	return err;
@@ -225,14 +231,15 @@
 	}
 
 	skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
-	ub = rcu_dereference_rtnl(b->media_ptr);
+	ub = rcu_dereference(b->media_ptr);
 	if (!ub) {
 		err = -ENODEV;
 		goto out;
 	}
 
 	if (addr->broadcast != TIPC_REPLICAST_SUPPORT)
-		return tipc_udp_xmit(net, skb, ub, src, dst);
+		return tipc_udp_xmit(net, skb, ub, src, dst,
+				     &ub->rcast.dst_cache);
 
 	/* Replicast, send an skb to each configured IP address */
 	list_for_each_entry_rcu(rcast, &ub->rcast.list, list) {
@@ -244,11 +251,10 @@
 			goto out;
 		}
 
-		err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr);
-		if (err) {
-			kfree_skb(_skb);
+		err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr,
+				    &rcast->dst_cache);
+		if (err)
 			goto out;
-		}
 	}
 	err = 0;
 out:
@@ -290,6 +296,11 @@
 	if (!rcast)
 		return -ENOMEM;
 
+	if (dst_cache_init(&rcast->dst_cache, GFP_ATOMIC)) {
+		kfree(rcast);
+		return -ENOMEM;
+	}
+
 	memcpy(&rcast->addr, addr, sizeof(struct udp_media_addr));
 
 	if (ntohs(addr->proto) == ETH_P_IP)
@@ -356,25 +367,21 @@
 	skb_pull(skb, sizeof(struct udphdr));
 	hdr = buf_msg(skb);
 
-	rcu_read_lock();
-	b = rcu_dereference_rtnl(ub->bearer);
+	b = rcu_dereference(ub->bearer);
 	if (!b)
-		goto rcu_out;
+		goto out;
 
 	if (b && test_bit(0, &b->up)) {
 		tipc_rcv(sock_net(sk), skb, b);
-		rcu_read_unlock();
 		return 0;
 	}
 
 	if (unlikely(msg_user(hdr) == LINK_CONFIG)) {
 		err = tipc_udp_rcast_disc(b, skb);
 		if (err)
-			goto rcu_out;
+			goto out;
 	}
 
-rcu_out:
-	rcu_read_unlock();
 out:
 	kfree_skb(skb);
 	return 0;
@@ -453,9 +460,9 @@
 		if (!attrs[TIPC_NLA_BEARER])
 			return -EINVAL;
 
-		err = nla_parse_nested(battrs, TIPC_NLA_BEARER_MAX,
-				       attrs[TIPC_NLA_BEARER],
-				       tipc_nl_bearer_policy, NULL);
+		err = nla_parse_nested_deprecated(battrs, TIPC_NLA_BEARER_MAX,
+						  attrs[TIPC_NLA_BEARER],
+						  tipc_nl_bearer_policy, NULL);
 		if (err)
 			return err;
 
@@ -483,7 +490,7 @@
 		}
 	}
 
-	ub = rcu_dereference_rtnl(b->media_ptr);
+	ub = rtnl_dereference(b->media_ptr);
 	if (!ub) {
 		rtnl_unlock();
 		return -EINVAL;
@@ -525,11 +532,11 @@
 	struct udp_bearer *ub;
 	struct nlattr *nest;
 
-	ub = rcu_dereference_rtnl(b->media_ptr);
+	ub = rtnl_dereference(b->media_ptr);
 	if (!ub)
 		return -ENODEV;
 
-	nest = nla_nest_start(msg->skb, TIPC_NLA_BEARER_UDP_OPTS);
+	nest = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER_UDP_OPTS);
 	if (!nest)
 		goto msg_full;
 
@@ -607,8 +614,7 @@
 	struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
 	struct udp_media_addr *dst;
 
-	if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr,
-			     tipc_nl_udp_policy, NULL))
+	if (nla_parse_nested_deprecated(opts, TIPC_NLA_UDP_MAX, attr, tipc_nl_udp_policy, NULL))
 		return -EINVAL;
 
 	if (!opts[TIPC_NLA_UDP_REMOTE])
@@ -650,6 +656,7 @@
 	struct udp_tunnel_sock_cfg tuncfg = {NULL};
 	struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
 	u8 node_id[NODE_ID_LEN] = {0,};
+	int rmcast = 0;
 
 	ub = kzalloc(sizeof(*ub), GFP_ATOMIC);
 	if (!ub)
@@ -660,9 +667,7 @@
 	if (!attrs[TIPC_NLA_BEARER_UDP_OPTS])
 		goto err;
 
-	if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX,
-			     attrs[TIPC_NLA_BEARER_UDP_OPTS],
-			     tipc_nl_udp_policy, NULL))
+	if (nla_parse_nested_deprecated(opts, TIPC_NLA_UDP_MAX, attrs[TIPC_NLA_BEARER_UDP_OPTS], tipc_nl_udp_policy, NULL))
 		goto err;
 
 	if (!opts[TIPC_NLA_UDP_LOCAL] || !opts[TIPC_NLA_UDP_REMOTE]) {
@@ -680,6 +685,14 @@
 	if (err)
 		goto err;
 
+	if (remote.proto != local.proto) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	/* Checking remote ip address */
+	rmcast = tipc_udp_is_mcast_addr(&remote);
+
 	/* Autoconfigure own node identity if needed */
 	if (!tipc_own_id(net)) {
 		memcpy(node_id, local.ipv6.in6_u.u6_addr8, 16);
@@ -705,7 +718,12 @@
 			goto err;
 		}
 		udp_conf.family = AF_INET;
-		udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
+
+		/* Switch to use ANY to receive packets from group */
+		if (rmcast)
+			udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
+		else
+			udp_conf.local_ip.s_addr = local.ipv4.s_addr;
 		udp_conf.use_udp_checksums = false;
 		ub->ifindex = dev->ifindex;
 		if (tipc_mtu_bad(dev, sizeof(struct iphdr) +
@@ -719,7 +737,10 @@
 		udp_conf.family = AF_INET6;
 		udp_conf.use_udp6_tx_checksums = true;
 		udp_conf.use_udp6_rx_checksums = true;
-		udp_conf.local_ip6 = in6addr_any;
+		if (rmcast)
+			udp_conf.local_ip6 = in6addr_any;
+		else
+			udp_conf.local_ip6 = local.ipv6;
 		b->mtu = 1280;
 #endif
 	} else {
@@ -736,22 +757,28 @@
 	tuncfg.encap_destroy = NULL;
 	setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg);
 
+	err = dst_cache_init(&ub->rcast.dst_cache, GFP_ATOMIC);
+	if (err)
+		goto free;
+
 	/**
 	 * The bcast media address port is used for all peers and the ip
 	 * is used if it's a multicast address.
 	 */
 	memcpy(&b->bcast_addr.value, &remote, sizeof(remote));
-	if (tipc_udp_is_mcast_addr(&remote))
+	if (rmcast)
 		err = enable_mcast(ub, &remote);
 	else
 		err = tipc_udp_rcast_add(b, &remote);
 	if (err)
-		goto err;
+		goto free;
 
 	return 0;
+
+free:
+	dst_cache_destroy(&ub->rcast.dst_cache);
+	udp_tunnel_sock_release(ub->ubsock);
 err:
-	if (ub->ubsock)
-		udp_tunnel_sock_release(ub->ubsock);
 	kfree(ub);
 	return err;
 }
@@ -763,12 +790,13 @@
 	struct udp_replicast *rcast, *tmp;
 
 	list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) {
+		dst_cache_destroy(&rcast->dst_cache);
 		list_del_rcu(&rcast->list);
 		kfree_rcu(rcast, rcu);
 	}
 
-	if (ub->ubsock)
-		udp_tunnel_sock_release(ub->ubsock);
+	dst_cache_destroy(&ub->rcast.dst_cache);
+	udp_tunnel_sock_release(ub->ubsock);
 	synchronize_net();
 	kfree(ub);
 }
@@ -778,13 +806,12 @@
 {
 	struct udp_bearer *ub;
 
-	ub = rcu_dereference_rtnl(b->media_ptr);
+	ub = rtnl_dereference(b->media_ptr);
 	if (!ub) {
 		pr_err("UDP bearer instance not found\n");
 		return;
 	}
-	if (ub->ubsock)
-		sock_set_flag(ub->ubsock->sk, SOCK_DEAD);
+	sock_set_flag(ub->ubsock->sk, SOCK_DEAD);
 	RCU_INIT_POINTER(ub->bearer, NULL);
 
 	/* sock_release need to be done outside of rtnl lock */

diff --git a/net/tls/Kconfig b/net/tls/Kconfig
index 73f05ec..e4328b3 100644
--- a/net/tls/Kconfig
+++ b/net/tls/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # TLS configuration
 #
@@ -8,6 +9,7 @@
 	select CRYPTO_AES
 	select CRYPTO_GCM
 	select STREAM_PARSER
+	select NET_SOCK_MSG
 	default n
 	---help---
 	Enable kernel support for TLS protocol. This allows symmetric

diff --git a/net/tls/Makefile b/net/tls/Makefile
index 4d6b728..ef0dc74 100644
--- a/net/tls/Makefile
+++ b/net/tls/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the TLS subsystem.
 #

diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 961b07d..683d008 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c

@@ -52,13 +52,16 @@
 
 static void tls_device_free_ctx(struct tls_context *ctx)
 {
-	if (ctx->tx_conf == TLS_HW)
+	if (ctx->tx_conf == TLS_HW) {
 		kfree(tls_offload_ctx_tx(ctx));
+		kfree(ctx->tx.rec_seq);
+		kfree(ctx->tx.iv);
+	}
 
 	if (ctx->rx_conf == TLS_HW)
 		kfree(tls_offload_ctx_rx(ctx));
 
-	kfree(ctx);
+	tls_ctx_free(NULL, ctx);
 }
 
 static void tls_device_gc_task(struct work_struct *work)
@@ -86,22 +89,6 @@
 	}
 }
 
-static void tls_device_attach(struct tls_context *ctx, struct sock *sk,
-			      struct net_device *netdev)
-{
-	if (sk->sk_destruct != tls_device_sk_destruct) {
-		refcount_set(&ctx->refcount, 1);
-		dev_hold(netdev);
-		ctx->netdev = netdev;
-		spin_lock_irq(&tls_device_lock);
-		list_add_tail(&ctx->list, &tls_device_list);
-		spin_unlock_irq(&tls_device_lock);
-
-		ctx->sk_destruct = sk->sk_destruct;
-		sk->sk_destruct = tls_device_sk_destruct;
-	}
-}
-
 static void tls_device_queue_ctx_destruction(struct tls_context *ctx)
 {
 	unsigned long flags;
@@ -135,13 +122,10 @@
 
 static void destroy_record(struct tls_record_info *record)
 {
-	int nr_frags = record->num_frags;
-	skb_frag_t *frag;
+	int i;
 
-	while (nr_frags-- > 0) {
-		frag = &record->frags[nr_frags];
-		__skb_frag_unref(frag);
-	}
+	for (i = 0; i < record->num_frags; i++)
+		__skb_frag_unref(&record->frags[i]);
 	kfree(record);
 }
 
@@ -172,12 +156,8 @@
 
 	spin_lock_irqsave(&ctx->lock, flags);
 	info = ctx->retransmit_hint;
-	if (info && !before(acked_seq, info->end_seq)) {
+	if (info && !before(acked_seq, info->end_seq))
 		ctx->retransmit_hint = NULL;
-		list_del(&info->list);
-		destroy_record(info);
-		deleted_records++;
-	}
 
 	list_for_each_entry_safe(info, temp, &ctx->records_list, list) {
 		if (before(acked_seq, info->end_seq))
@@ -196,7 +176,7 @@
  * socket and no in-flight SKBs associated with this
  * socket, so it is safe to free all the resources.
  */
-void tls_device_sk_destruct(struct sock *sk)
+static void tls_device_sk_destruct(struct sock *sk)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
@@ -214,7 +194,40 @@
 	if (refcount_dec_and_test(&tls_ctx->refcount))
 		tls_device_queue_ctx_destruction(tls_ctx);
 }
-EXPORT_SYMBOL(tls_device_sk_destruct);
+
+void tls_device_free_resources_tx(struct sock *sk)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+
+	tls_free_partial_record(sk, tls_ctx);
+}
+
+static void tls_device_resync_tx(struct sock *sk, struct tls_context *tls_ctx,
+				 u32 seq)
+{
+	struct net_device *netdev;
+	struct sk_buff *skb;
+	int err = 0;
+	u8 *rcd_sn;
+
+	skb = tcp_write_queue_tail(sk);
+	if (skb)
+		TCP_SKB_CB(skb)->eor = 1;
+
+	rcd_sn = tls_ctx->tx.rec_seq;
+
+	down_read(&device_offload_lock);
+	netdev = tls_ctx->netdev;
+	if (netdev)
+		err = netdev->tlsdev_ops->tls_dev_resync(netdev, sk, seq,
+							 rcd_sn,
+							 TLS_OFFLOAD_CTX_DIR_TX);
+	up_read(&device_offload_lock);
+	if (err)
+		return;
+
+	clear_bit_unlock(TLS_TX_SYNC_SCHED, &tls_ctx->flags);
+}
 
 static void tls_append_frag(struct tls_record_info *record,
 			    struct page_frag *pfrag,
@@ -223,14 +236,14 @@
 	skb_frag_t *frag;
 
 	frag = &record->frags[record->num_frags - 1];
-	if (frag->page.p == pfrag->page &&
-	    frag->page_offset + frag->size == pfrag->offset) {
-		frag->size += size;
+	if (skb_frag_page(frag) == pfrag->page &&
+	    skb_frag_off(frag) + skb_frag_size(frag) == pfrag->offset) {
+		skb_frag_size_add(frag, size);
 	} else {
 		++frag;
-		frag->page.p = pfrag->page;
-		frag->page_offset = pfrag->offset;
-		frag->size = size;
+		__skb_frag_set_page(frag, pfrag->page);
+		skb_frag_off_set(frag, pfrag->offset);
+		skb_frag_size_set(frag, size);
 		++record->num_frags;
 		get_page(pfrag->page);
 	}
@@ -243,41 +256,28 @@
 			   struct tls_context *ctx,
 			   struct tls_offload_context_tx *offload_ctx,
 			   struct tls_record_info *record,
-			   struct page_frag *pfrag,
-			   int flags,
-			   unsigned char record_type)
+			   int flags)
 {
+	struct tls_prot_info *prot = &ctx->prot_info;
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct page_frag dummy_tag_frag;
 	skb_frag_t *frag;
 	int i;
 
-	/* fill prepend */
-	frag = &record->frags[0];
-	tls_fill_prepend(ctx,
-			 skb_frag_address(frag),
-			 record->len - ctx->tx.prepend_size,
-			 record_type);
-
-	/* HW doesn't care about the data in the tag, because it fills it. */
-	dummy_tag_frag.page = skb_frag_page(frag);
-	dummy_tag_frag.offset = 0;
-
-	tls_append_frag(record, &dummy_tag_frag, ctx->tx.tag_size);
 	record->end_seq = tp->write_seq + record->len;
-	spin_lock_irq(&offload_ctx->lock);
-	list_add_tail(&record->list, &offload_ctx->records_list);
-	spin_unlock_irq(&offload_ctx->lock);
+	list_add_tail_rcu(&record->list, &offload_ctx->records_list);
 	offload_ctx->open_record = NULL;
-	set_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags);
-	tls_advance_record_sn(sk, &ctx->tx);
+
+	if (test_bit(TLS_TX_SYNC_SCHED, &ctx->flags))
+		tls_device_resync_tx(sk, ctx, tp->write_seq);
+
+	tls_advance_record_sn(sk, prot, &ctx->tx);
 
 	for (i = 0; i < record->num_frags; i++) {
 		frag = &record->frags[i];
 		sg_unmark_end(&offload_ctx->sg_tx_data[i]);
 		sg_set_page(&offload_ctx->sg_tx_data[i], skb_frag_page(frag),
-			    frag->size, frag->page_offset);
-		sk_mem_charge(sk, frag->size);
+			    skb_frag_size(frag), skb_frag_off(frag));
+		sk_mem_charge(sk, skb_frag_size(frag));
 		get_page(skb_frag_page(frag));
 	}
 	sg_mark_end(&offload_ctx->sg_tx_data[record->num_frags - 1]);
@@ -286,6 +286,38 @@
 	return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags);
 }
 
+static int tls_device_record_close(struct sock *sk,
+				   struct tls_context *ctx,
+				   struct tls_record_info *record,
+				   struct page_frag *pfrag,
+				   unsigned char record_type)
+{
+	struct tls_prot_info *prot = &ctx->prot_info;
+	int ret;
+
+	/* append tag
+	 * device will fill in the tag, we just need to append a placeholder
+	 * use socket memory to improve coalescing (re-using a single buffer
+	 * increases frag count)
+	 * if we can't allocate memory now, steal some back from data
+	 */
+	if (likely(skb_page_frag_refill(prot->tag_size, pfrag,
+					sk->sk_allocation))) {
+		ret = 0;
+		tls_append_frag(record, pfrag, prot->tag_size);
+	} else {
+		ret = prot->tag_size;
+		if (record->len <= prot->overhead_size)
+			return -ENOMEM;
+	}
+
+	/* fill prepend */
+	tls_fill_prepend(ctx, skb_frag_address(&record->frags[0]),
+			 record->len - prot->overhead_size,
+			 record_type, prot->version);
+	return ret;
+}
+
 static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx,
 				 struct page_frag *pfrag,
 				 size_t prepend_size)
@@ -299,7 +331,7 @@
 
 	frag = &record->frags[0];
 	__skb_frag_set_page(frag, pfrag->page);
-	frag->page_offset = pfrag->offset;
+	skb_frag_off_set(frag, pfrag->offset);
 	skb_frag_size_set(frag, prepend_size);
 
 	get_page(pfrag->page);
@@ -340,16 +372,42 @@
 	return 0;
 }
 
+static int tls_device_copy_data(void *addr, size_t bytes, struct iov_iter *i)
+{
+	size_t pre_copy, nocache;
+
+	pre_copy = ~((unsigned long)addr - 1) & (SMP_CACHE_BYTES - 1);
+	if (pre_copy) {
+		pre_copy = min(pre_copy, bytes);
+		if (copy_from_iter(addr, pre_copy, i) != pre_copy)
+			return -EFAULT;
+		bytes -= pre_copy;
+		addr += pre_copy;
+	}
+
+	nocache = round_down(bytes, SMP_CACHE_BYTES);
+	if (copy_from_iter_nocache(addr, nocache, i) != nocache)
+		return -EFAULT;
+	bytes -= nocache;
+	addr += nocache;
+
+	if (bytes && copy_from_iter(addr, bytes, i) != bytes)
+		return -EFAULT;
+
+	return 0;
+}
+
 static int tls_push_data(struct sock *sk,
 			 struct iov_iter *msg_iter,
 			 size_t size, int flags,
 			 unsigned char record_type)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
-	int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
 	int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE);
 	struct tls_record_info *record = ctx->open_record;
+	int tls_push_record_flags;
 	struct page_frag *pfrag;
 	size_t orig_size = size;
 	u32 max_open_record_len;
@@ -364,10 +422,15 @@
 	if (sk->sk_err)
 		return -sk->sk_err;
 
+	flags |= MSG_SENDPAGE_DECRYPTED;
+	tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
+
 	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
-	rc = tls_complete_pending_work(sk, tls_ctx, flags, &timeo);
-	if (rc < 0)
-		return rc;
+	if (tls_is_partially_sent_record(tls_ctx)) {
+		rc = tls_push_partial_record(sk, tls_ctx, flags);
+		if (rc < 0)
+			return rc;
+	}
 
 	pfrag = sk_page_frag(sk);
 
@@ -375,10 +438,10 @@
 	 * we need to leave room for an authentication tag.
 	 */
 	max_open_record_len = TLS_MAX_PAYLOAD_SIZE +
-			      tls_ctx->tx.prepend_size;
+			      prot->prepend_size;
 	do {
 		rc = tls_do_allocation(sk, ctx, pfrag,
-				       tls_ctx->tx.prepend_size);
+				       prot->prepend_size);
 		if (rc) {
 			rc = sk_stream_wait_memory(sk, &timeo);
 			if (!rc)
@@ -396,7 +459,7 @@
 				size = orig_size;
 				destroy_record(record);
 				ctx->open_record = NULL;
-			} else if (record->len > tls_ctx->tx.prepend_size) {
+			} else if (record->len > prot->prepend_size) {
 				goto last_record;
 			}
 
@@ -407,12 +470,10 @@
 		copy = min_t(size_t, size, (pfrag->size - pfrag->offset));
 		copy = min_t(size_t, copy, (max_open_record_len - record->len));
 
-		if (copy_from_iter_nocache(page_address(pfrag->page) +
-					       pfrag->offset,
-					   copy, msg_iter) != copy) {
-			rc = -EFAULT;
+		rc = tls_device_copy_data(page_address(pfrag->page) +
+					  pfrag->offset, copy, msg_iter);
+		if (rc)
 			goto handle_error;
-		}
 		tls_append_frag(record, pfrag, copy);
 
 		size -= copy;
@@ -421,7 +482,7 @@
 			tls_push_record_flags = flags;
 			if (more) {
 				tls_ctx->pending_open_record_frags =
-						record->num_frags;
+						!!record->num_frags;
 				break;
 			}
 
@@ -430,13 +491,24 @@
 
 		if (done || record->len >= max_open_record_len ||
 		    (record->num_frags >= MAX_SKB_FRAGS - 1)) {
+			rc = tls_device_record_close(sk, tls_ctx, record,
+						     pfrag, record_type);
+			if (rc) {
+				if (rc > 0) {
+					size += rc;
+				} else {
+					size = orig_size;
+					destroy_record(record);
+					ctx->open_record = NULL;
+					break;
+				}
+			}
+
 			rc = tls_push_record(sk,
 					     tls_ctx,
 					     ctx,
 					     record,
-					     pfrag,
-					     tls_push_record_flags,
-					     record_type);
+					     tls_push_record_flags);
 			if (rc < 0)
 				break;
 		}
@@ -451,8 +523,10 @@
 int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 {
 	unsigned char record_type = TLS_RECORD_TYPE_DATA;
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	int rc;
 
+	mutex_lock(&tls_ctx->tx_lock);
 	lock_sock(sk);
 
 	if (unlikely(msg->msg_controllen)) {
@@ -466,12 +540,14 @@
 
 out:
 	release_sock(sk);
+	mutex_unlock(&tls_ctx->tx_lock);
 	return rc;
 }
 
 int tls_device_sendpage(struct sock *sk, struct page *page,
 			int offset, size_t size, int flags)
 {
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct iov_iter	msg_iter;
 	char *kaddr = kmap(page);
 	struct kvec iov;
@@ -480,6 +556,7 @@
 	if (flags & MSG_SENDPAGE_NOTLAST)
 		flags |= MSG_MORE;
 
+	mutex_lock(&tls_ctx->tx_lock);
 	lock_sock(sk);
 
 	if (flags & MSG_OOB) {
@@ -489,13 +566,14 @@
 
 	iov.iov_base = kaddr + offset;
 	iov.iov_len = size;
-	iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, &iov, 1, size);
+	iov_iter_kvec(&msg_iter, WRITE, &iov, 1, size);
 	rc = tls_push_data(sk, &msg_iter, size,
 			   flags, TLS_RECORD_TYPE_DATA);
 	kunmap(page);
 
 out:
 	release_sock(sk);
+	mutex_unlock(&tls_ctx->tx_lock);
 	return rc;
 }
 
@@ -511,12 +589,16 @@
 		/* if retransmit_hint is irrelevant start
 		 * from the beggining of the list
 		 */
-		info = list_first_entry(&context->records_list,
-					struct tls_record_info, list);
+		info = list_first_entry_or_null(&context->records_list,
+						struct tls_record_info, list);
+		if (!info)
+			return NULL;
 		record_sn = context->unacked_record_sn;
 	}
 
-	list_for_each_entry_from(info, &context->records_list, list) {
+	/* We just need the _rcu for the READ_ONCE() */
+	rcu_read_lock();
+	list_for_each_entry_from_rcu(info, &context->records_list, list) {
 		if (before(seq, info->end_seq)) {
 			if (!context->retransmit_hint ||
 			    after(info->end_seq,
@@ -525,12 +607,15 @@
 				context->retransmit_hint = info;
 			}
 			*p_record_sn = record_sn;
-			return info;
+			goto exit_rcu_unlock;
 		}
 		record_sn++;
 	}
+	info = NULL;
 
-	return NULL;
+exit_rcu_unlock:
+	rcu_read_unlock();
+	return info;
 }
 EXPORT_SYMBOL(tls_get_record);
 
@@ -538,15 +623,45 @@
 {
 	struct iov_iter	msg_iter;
 
-	iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, NULL, 0, 0);
+	iov_iter_kvec(&msg_iter, WRITE, NULL, 0, 0);
 	return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA);
 }
 
-void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn)
+void tls_device_write_space(struct sock *sk, struct tls_context *ctx)
+{
+	if (tls_is_partially_sent_record(ctx)) {
+		gfp_t sk_allocation = sk->sk_allocation;
+
+		WARN_ON_ONCE(sk->sk_write_pending);
+
+		sk->sk_allocation = GFP_ATOMIC;
+		tls_push_partial_record(sk, ctx,
+					MSG_DONTWAIT | MSG_NOSIGNAL |
+					MSG_SENDPAGE_DECRYPTED);
+		sk->sk_allocation = sk_allocation;
+	}
+}
+
+static void tls_device_resync_rx(struct tls_context *tls_ctx,
+				 struct sock *sk, u32 seq, u8 *rcd_sn)
+{
+	struct net_device *netdev;
+
+	if (WARN_ON(test_and_set_bit(TLS_RX_SYNC_RUNNING, &tls_ctx->flags)))
+		return;
+	netdev = READ_ONCE(tls_ctx->netdev);
+	if (netdev)
+		netdev->tlsdev_ops->tls_dev_resync(netdev, sk, seq, rcd_sn,
+						   TLS_OFFLOAD_CTX_DIR_RX);
+	clear_bit_unlock(TLS_RX_SYNC_RUNNING, &tls_ctx->flags);
+}
+
+void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
-	struct net_device *netdev = tls_ctx->netdev;
 	struct tls_offload_context_rx *rx_ctx;
+	u8 rcd_sn[TLS_MAX_REC_SEQ_SIZE];
+	struct tls_prot_info *prot;
 	u32 is_req_pending;
 	s64 resync_req;
 	u32 req_seq;
@@ -554,22 +669,90 @@
 	if (tls_ctx->rx_conf != TLS_HW)
 		return;
 
+	prot = &tls_ctx->prot_info;
 	rx_ctx = tls_offload_ctx_rx(tls_ctx);
-	resync_req = atomic64_read(&rx_ctx->resync_req);
-	req_seq = ntohl(resync_req >> 32) - ((u32)TLS_HEADER_SIZE - 1);
-	is_req_pending = resync_req;
+	memcpy(rcd_sn, tls_ctx->rx.rec_seq, prot->rec_seq_size);
 
-	if (unlikely(is_req_pending) && req_seq == seq &&
-	    atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0))
-		netdev->tlsdev_ops->tls_dev_resync_rx(netdev, sk,
-						      seq + TLS_HEADER_SIZE - 1,
-						      rcd_sn);
+	switch (rx_ctx->resync_type) {
+	case TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ:
+		resync_req = atomic64_read(&rx_ctx->resync_req);
+		req_seq = resync_req >> 32;
+		seq += TLS_HEADER_SIZE - 1;
+		is_req_pending = resync_req;
+
+		if (likely(!is_req_pending) || req_seq != seq ||
+		    !atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0))
+			return;
+		break;
+	case TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT:
+		if (likely(!rx_ctx->resync_nh_do_now))
+			return;
+
+		/* head of next rec is already in, note that the sock_inq will
+		 * include the currently parsed message when called from parser
+		 */
+		if (tcp_inq(sk) > rcd_len)
+			return;
+
+		rx_ctx->resync_nh_do_now = 0;
+		seq += rcd_len;
+		tls_bigint_increment(rcd_sn, prot->rec_seq_size);
+		break;
+	}
+
+	tls_device_resync_rx(tls_ctx, sk, seq, rcd_sn);
+}
+
+static void tls_device_core_ctrl_rx_resync(struct tls_context *tls_ctx,
+					   struct tls_offload_context_rx *ctx,
+					   struct sock *sk, struct sk_buff *skb)
+{
+	struct strp_msg *rxm;
+
+	/* device will request resyncs by itself based on stream scan */
+	if (ctx->resync_type != TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT)
+		return;
+	/* already scheduled */
+	if (ctx->resync_nh_do_now)
+		return;
+	/* seen decrypted fragments since last fully-failed record */
+	if (ctx->resync_nh_reset) {
+		ctx->resync_nh_reset = 0;
+		ctx->resync_nh.decrypted_failed = 1;
+		ctx->resync_nh.decrypted_tgt = TLS_DEVICE_RESYNC_NH_START_IVAL;
+		return;
+	}
+
+	if (++ctx->resync_nh.decrypted_failed <= ctx->resync_nh.decrypted_tgt)
+		return;
+
+	/* doing resync, bump the next target in case it fails */
+	if (ctx->resync_nh.decrypted_tgt < TLS_DEVICE_RESYNC_NH_MAX_IVAL)
+		ctx->resync_nh.decrypted_tgt *= 2;
+	else
+		ctx->resync_nh.decrypted_tgt += TLS_DEVICE_RESYNC_NH_MAX_IVAL;
+
+	rxm = strp_msg(skb);
+
+	/* head of next rec is already in, parser will sync for us */
+	if (tcp_inq(sk) > rxm->full_len) {
+		ctx->resync_nh_do_now = 1;
+	} else {
+		struct tls_prot_info *prot = &tls_ctx->prot_info;
+		u8 rcd_sn[TLS_MAX_REC_SEQ_SIZE];
+
+		memcpy(rcd_sn, tls_ctx->rx.rec_seq, prot->rec_seq_size);
+		tls_bigint_increment(rcd_sn, prot->rec_seq_size);
+
+		tls_device_resync_rx(tls_ctx, sk, tcp_sk(sk)->copied_seq,
+				     rcd_sn);
+	}
 }
 
 static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb)
 {
 	struct strp_msg *rxm = strp_msg(skb);
-	int err = 0, offset = rxm->offset, copy, nsg;
+	int err = 0, offset = rxm->offset, copy, nsg, data_len, pos;
 	struct sk_buff *skb_iter, *unused;
 	struct scatterlist sg[1];
 	char *orig_buf, *buf;
@@ -590,8 +773,10 @@
 	sg_set_buf(&sg[0], buf,
 		   rxm->full_len + TLS_HEADER_SIZE +
 		   TLS_CIPHER_AES_GCM_128_IV_SIZE);
-	skb_copy_bits(skb, offset, buf,
-		      TLS_HEADER_SIZE + TLS_CIPHER_AES_GCM_128_IV_SIZE);
+	err = skb_copy_bits(skb, offset, buf,
+			    TLS_HEADER_SIZE + TLS_CIPHER_AES_GCM_128_IV_SIZE);
+	if (err)
+		goto free_buf;
 
 	/* We are interested only in the decrypted data not the auth */
 	err = decrypt_skb(sk, skb, sg);
@@ -600,27 +785,50 @@
 	else
 		err = 0;
 
-	copy = min_t(int, skb_pagelen(skb) - offset,
-		     rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+	data_len = rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE;
 
-	if (skb->decrypted)
-		skb_store_bits(skb, offset, buf, copy);
+	if (skb_pagelen(skb) > offset) {
+		copy = min_t(int, skb_pagelen(skb) - offset, data_len);
 
-	offset += copy;
-	buf += copy;
-
-	skb_walk_frags(skb, skb_iter) {
-		copy = min_t(int, skb_iter->len,
-			     rxm->full_len - offset + rxm->offset -
-			     TLS_CIPHER_AES_GCM_128_TAG_SIZE);
-
-		if (skb_iter->decrypted)
-			skb_store_bits(skb_iter, offset, buf, copy);
+		if (skb->decrypted) {
+			err = skb_store_bits(skb, offset, buf, copy);
+			if (err)
+				goto free_buf;
+		}
 
 		offset += copy;
 		buf += copy;
 	}
 
+	pos = skb_pagelen(skb);
+	skb_walk_frags(skb, skb_iter) {
+		int frag_pos;
+
+		/* Practically all frags must belong to msg if reencrypt
+		 * is needed with current strparser and coalescing logic,
+		 * but strparser may "get optimized", so let's be safe.
+		 */
+		if (pos + skb_iter->len <= offset)
+			goto done_with_frag;
+		if (pos >= data_len + rxm->offset)
+			break;
+
+		frag_pos = offset - pos;
+		copy = min_t(int, skb_iter->len - frag_pos,
+			     data_len + rxm->offset - offset);
+
+		if (skb_iter->decrypted) {
+			err = skb_store_bits(skb_iter, frag_pos, buf, copy);
+			if (err)
+				goto free_buf;
+		}
+
+		offset += copy;
+		buf += copy;
+done_with_frag:
+		pos += skb_iter->len;
+	}
+
 free_buf:
 	kfree(orig_buf);
 	return err;
@@ -634,10 +842,6 @@
 	int is_encrypted = !is_decrypted;
 	struct sk_buff *skb_iter;
 
-	/* Skip if it is already decrypted */
-	if (ctx->sw.decrypted)
-		return 0;
-
 	/* Check if all the data is decrypted already */
 	skb_walk_frags(skb, skb_iter) {
 		is_decrypted &= skb_iter->decrypted;
@@ -646,39 +850,62 @@
 
 	ctx->sw.decrypted |= is_decrypted;
 
-	/* Return immedeatly if the record is either entirely plaintext or
+	/* Return immediately if the record is either entirely plaintext or
 	 * entirely ciphertext. Otherwise handle reencrypt partially decrypted
 	 * record.
 	 */
-	return (is_encrypted || is_decrypted) ? 0 :
-		tls_device_reencrypt(sk, skb);
+	if (is_decrypted) {
+		ctx->resync_nh_reset = 1;
+		return 0;
+	}
+	if (is_encrypted) {
+		tls_device_core_ctrl_rx_resync(tls_ctx, ctx, sk, skb);
+		return 0;
+	}
+
+	ctx->resync_nh_reset = 1;
+	return tls_device_reencrypt(sk, skb);
+}
+
+static void tls_device_attach(struct tls_context *ctx, struct sock *sk,
+			      struct net_device *netdev)
+{
+	if (sk->sk_destruct != tls_device_sk_destruct) {
+		refcount_set(&ctx->refcount, 1);
+		dev_hold(netdev);
+		ctx->netdev = netdev;
+		spin_lock_irq(&tls_device_lock);
+		list_add_tail(&ctx->list, &tls_device_list);
+		spin_unlock_irq(&tls_device_lock);
+
+		ctx->sk_destruct = sk->sk_destruct;
+		sk->sk_destruct = tls_device_sk_destruct;
+	}
 }
 
 int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
 {
 	u16 nonce_size, tag_size, iv_size, rec_seq_size;
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	struct tls_record_info *start_marker_record;
 	struct tls_offload_context_tx *offload_ctx;
 	struct tls_crypto_info *crypto_info;
 	struct net_device *netdev;
 	char *iv, *rec_seq;
 	struct sk_buff *skb;
-	int rc = -EINVAL;
 	__be64 rcd_sn;
+	int rc;
 
 	if (!ctx)
-		goto out;
+		return -EINVAL;
 
-	if (ctx->priv_ctx_tx) {
-		rc = -EEXIST;
-		goto out;
-	}
+	if (ctx->priv_ctx_tx)
+		return -EEXIST;
 
 	start_marker_record = kmalloc(sizeof(*start_marker_record), GFP_KERNEL);
-	if (!start_marker_record) {
-		rc = -ENOMEM;
-		goto out;
-	}
+	if (!start_marker_record)
+		return -ENOMEM;
 
 	offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_TX, GFP_KERNEL);
 	if (!offload_ctx) {
@@ -687,6 +914,11 @@
 	}
 
 	crypto_info = &ctx->crypto_send.info;
+	if (crypto_info->version != TLS_1_2_VERSION) {
+		rc = -EOPNOTSUPP;
+		goto free_offload_ctx;
+	}
+
 	switch (crypto_info->cipher_type) {
 	case TLS_CIPHER_AES_GCM_128:
 		nonce_size = TLS_CIPHER_AES_GCM_128_IV_SIZE;
@@ -702,10 +934,18 @@
 		goto free_offload_ctx;
 	}
 
-	ctx->tx.prepend_size = TLS_HEADER_SIZE + nonce_size;
-	ctx->tx.tag_size = tag_size;
-	ctx->tx.overhead_size = ctx->tx.prepend_size + ctx->tx.tag_size;
-	ctx->tx.iv_size = iv_size;
+	/* Sanity-check the rec_seq_size for stack allocations */
+	if (rec_seq_size > TLS_MAX_REC_SEQ_SIZE) {
+		rc = -EINVAL;
+		goto free_offload_ctx;
+	}
+
+	prot->version = crypto_info->version;
+	prot->cipher_type = crypto_info->cipher_type;
+	prot->prepend_size = TLS_HEADER_SIZE + nonce_size;
+	prot->tag_size = tag_size;
+	prot->overhead_size = prot->prepend_size + prot->tag_size;
+	prot->iv_size = iv_size;
 	ctx->tx.iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
 			     GFP_KERNEL);
 	if (!ctx->tx.iv) {
@@ -715,7 +955,7 @@
 
 	memcpy(ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
 
-	ctx->tx.rec_seq_size = rec_seq_size;
+	prot->rec_seq_size = rec_seq_size;
 	ctx->tx.rec_seq = kmemdup(rec_seq, rec_seq_size, GFP_KERNEL);
 	if (!ctx->tx.rec_seq) {
 		rc = -ENOMEM;
@@ -751,17 +991,11 @@
 	if (skb)
 		TCP_SKB_CB(skb)->eor = 1;
 
-	/* We support starting offload on multiple sockets
-	 * concurrently, so we only need a read lock here.
-	 * This lock must precede get_netdev_for_sock to prevent races between
-	 * NETDEV_DOWN and setsockopt.
-	 */
-	down_read(&device_offload_lock);
 	netdev = get_netdev_for_sock(sk);
 	if (!netdev) {
 		pr_err_ratelimited("%s: netdev not found\n", __func__);
 		rc = -EINVAL;
-		goto release_lock;
+		goto disable_cad;
 	}
 
 	if (!(netdev->features & NETIF_F_HW_TLS_TX)) {
@@ -772,10 +1006,15 @@
 	/* Avoid offloading if the device is down
 	 * We don't want to offload new flows after
 	 * the NETDEV_DOWN event
+	 *
+	 * device_offload_lock is taken in tls_devices's NETDEV_DOWN
+	 * handler thus protecting from the device going down before
+	 * ctx was added to tls_device_list.
 	 */
+	down_read(&device_offload_lock);
 	if (!(netdev->flags & IFF_UP)) {
 		rc = -EINVAL;
-		goto release_netdev;
+		goto release_lock;
 	}
 
 	ctx->priv_ctx_tx = offload_ctx;
@@ -783,9 +1022,10 @@
 					     &ctx->crypto_send.info,
 					     tcp_sk(sk)->write_seq);
 	if (rc)
-		goto release_netdev;
+		goto release_lock;
 
 	tls_device_attach(ctx, sk, netdev);
+	up_read(&device_offload_lock);
 
 	/* following this assignment tls_is_sk_tx_device_offloaded
 	 * will return true and the context might be accessed
@@ -793,13 +1033,14 @@
 	 */
 	smp_store_release(&sk->sk_validate_xmit_skb, tls_validate_xmit_skb);
 	dev_put(netdev);
-	up_read(&device_offload_lock);
-	goto out;
 
-release_netdev:
-	dev_put(netdev);
+	return 0;
+
 release_lock:
 	up_read(&device_offload_lock);
+release_netdev:
+	dev_put(netdev);
+disable_cad:
 	clean_acked_data_disable(inet_csk(sk));
 	crypto_free_aead(offload_ctx->aead_send);
 free_rec_seq:
@@ -811,7 +1052,6 @@
 	ctx->priv_ctx_tx = NULL;
 free_marker_record:
 	kfree(start_marker_record);
-out:
 	return rc;
 }
 
@@ -821,22 +1061,16 @@
 	struct net_device *netdev;
 	int rc = 0;
 
-	/* We support starting offload on multiple sockets
-	 * concurrently, so we only need a read lock here.
-	 * This lock must precede get_netdev_for_sock to prevent races between
-	 * NETDEV_DOWN and setsockopt.
-	 */
-	down_read(&device_offload_lock);
+	if (ctx->crypto_recv.info.version != TLS_1_2_VERSION)
+		return -EOPNOTSUPP;
+
 	netdev = get_netdev_for_sock(sk);
 	if (!netdev) {
 		pr_err_ratelimited("%s: netdev not found\n", __func__);
-		rc = -EINVAL;
-		goto release_lock;
+		return -EINVAL;
 	}
 
 	if (!(netdev->features & NETIF_F_HW_TLS_RX)) {
-		pr_err_ratelimited("%s: netdev %s with no TLS offload\n",
-				   __func__, netdev->name);
 		rc = -ENOTSUPP;
 		goto release_netdev;
 	}
@@ -844,17 +1078,23 @@
 	/* Avoid offloading if the device is down
 	 * We don't want to offload new flows after
 	 * the NETDEV_DOWN event
+	 *
+	 * device_offload_lock is taken in tls_devices's NETDEV_DOWN
+	 * handler thus protecting from the device going down before
+	 * ctx was added to tls_device_list.
 	 */
+	down_read(&device_offload_lock);
 	if (!(netdev->flags & IFF_UP)) {
 		rc = -EINVAL;
-		goto release_netdev;
+		goto release_lock;
 	}
 
 	context = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_RX, GFP_KERNEL);
 	if (!context) {
 		rc = -ENOMEM;
-		goto release_netdev;
+		goto release_lock;
 	}
+	context->resync_nh_reset = 1;
 
 	ctx->priv_ctx_rx = context;
 	rc = tls_set_sw_offload(sk, ctx, 0);
@@ -864,23 +1104,26 @@
 	rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_RX,
 					     &ctx->crypto_recv.info,
 					     tcp_sk(sk)->copied_seq);
-	if (rc) {
-		pr_err_ratelimited("%s: The netdev has refused to offload this socket\n",
-				   __func__);
+	if (rc)
 		goto free_sw_resources;
-	}
 
 	tls_device_attach(ctx, sk, netdev);
-	goto release_netdev;
+	up_read(&device_offload_lock);
+
+	dev_put(netdev);
+
+	return 0;
 
 free_sw_resources:
+	up_read(&device_offload_lock);
 	tls_sw_free_resources_rx(sk);
+	down_read(&device_offload_lock);
 release_ctx:
 	ctx->priv_ctx_rx = NULL;
-release_netdev:
-	dev_put(netdev);
 release_lock:
 	up_read(&device_offload_lock);
+release_netdev:
+	dev_put(netdev);
 	return rc;
 }
 
@@ -894,12 +1137,6 @@
 	if (!netdev)
 		goto out;
 
-	if (!(netdev->features & NETIF_F_HW_TLS_RX)) {
-		pr_err_ratelimited("%s: device is missing NETIF_F_HW_TLS_RX cap\n",
-				   __func__);
-		goto out;
-	}
-
 	netdev->tlsdev_ops->tls_dev_del(netdev, tls_ctx,
 					TLS_OFFLOAD_CTX_DIR_RX);
 
@@ -909,8 +1146,6 @@
 	}
 out:
 	up_read(&device_offload_lock);
-	kfree(tls_ctx->rx.rec_seq);
-	kfree(tls_ctx->rx.iv);
 	tls_sw_release_resources_rx(sk);
 }
 
@@ -940,7 +1175,10 @@
 		if (ctx->rx_conf == TLS_HW)
 			netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
 							TLS_OFFLOAD_CTX_DIR_RX);
-		ctx->netdev = NULL;
+		WRITE_ONCE(ctx->netdev, NULL);
+		smp_mb__before_atomic(); /* pairs with test_and_set_bit() */
+		while (test_bit(TLS_RX_SYNC_RUNNING, &ctx->flags))
+			usleep_range(10, 200);
 		dev_put(netdev);
 		list_del_init(&ctx->list);
 
@@ -960,14 +1198,15 @@
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 
-	if (!(dev->features & (NETIF_F_HW_TLS_RX | NETIF_F_HW_TLS_TX)))
+	if (!dev->tlsdev_ops &&
+	    !(dev->features & (NETIF_F_HW_TLS_RX | NETIF_F_HW_TLS_TX)))
 		return NOTIFY_DONE;
 
 	switch (event) {
 	case NETDEV_REGISTER:
 	case NETDEV_FEAT_CHANGE:
 		if ((dev->features & NETIF_F_HW_TLS_RX) &&
-		    !dev->tlsdev_ops->tls_dev_resync_rx)
+		    !dev->tlsdev_ops->tls_dev_resync)
 			return NOTIFY_BAD;
 
 		if  (dev->tlsdev_ops &&
@@ -995,4 +1234,5 @@
 {
 	unregister_netdevice_notifier(&tls_dev_notifier);
 	flush_work(&tls_device_gc_work);
+	clean_acked_data_flush();
 }

diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index 450a6db..2889533 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c

@@ -73,7 +73,8 @@
 	len -= TLS_CIPHER_AES_GCM_128_IV_SIZE;
 
 	tls_make_aad(aad, len - TLS_CIPHER_AES_GCM_128_TAG_SIZE,
-		     (char *)&rcd_sn, sizeof(rcd_sn), buf[0]);
+		(char *)&rcd_sn, sizeof(rcd_sn), buf[0],
+		TLS_1_2_VERSION);
 
 	memcpy(iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, buf + TLS_HEADER_SIZE,
 	       TLS_CIPHER_AES_GCM_128_IV_SIZE);
@@ -193,18 +194,30 @@
 
 static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln)
 {
+	struct sock *sk = skb->sk;
+	int delta;
+
 	skb_copy_header(nskb, skb);
 
 	skb_put(nskb, skb->len);
 	memcpy(nskb->data, skb->data, headln);
-	update_chksum(nskb, headln);
 
 	nskb->destructor = skb->destructor;
-	nskb->sk = skb->sk;
+	nskb->sk = sk;
 	skb->destructor = NULL;
 	skb->sk = NULL;
-	refcount_add(nskb->truesize - skb->truesize,
-		     &nskb->sk->sk_wmem_alloc);
+
+	update_chksum(nskb, headln);
+
+	/* sock_efree means skb must gone through skb_orphan_partial() */
+	if (nskb->destructor == sock_efree)
+		return;
+
+	delta = nskb->truesize - skb->truesize;
+	if (likely(delta < 0))
+		WARN_ON_ONCE(refcount_sub_and_test(-delta, &sk->sk_wmem_alloc));
+	else if (delta)
+		refcount_add(delta, &sk->sk_wmem_alloc);
 }
 
 /* This function may be called after the user socket is already
@@ -231,7 +244,6 @@
 	record = tls_get_record(ctx, tcp_seq, rcd_sn);
 	if (!record) {
 		spin_unlock_irqrestore(&ctx->lock, flags);
-		WARN(1, "Record not found for seq %u\n", tcp_seq);
 		return -EINVAL;
 	}
 
@@ -261,7 +273,7 @@
 
 		__skb_frag_ref(frag);
 		sg_set_page(sg_in + i, skb_frag_page(frag),
-			    skb_frag_size(frag), frag->page_offset);
+			    skb_frag_size(frag), skb_frag_off(frag));
 
 		remaining -= skb_frag_size(frag);
 
@@ -400,7 +412,10 @@
 		put_page(sg_page(&sg_in[--resync_sgs]));
 	kfree(sg_in);
 free_orig:
-	kfree_skb(skb);
+	if (nskb)
+		consume_skb(skb);
+	else
+		kfree_skb(skb);
 	return nskb;
 }
 
@@ -415,6 +430,12 @@
 }
 EXPORT_SYMBOL_GPL(tls_validate_xmit_skb);
 
+struct sk_buff *tls_encrypt_skb(struct sk_buff *skb)
+{
+	return tls_sw_fallback(skb->sk, skb);
+}
+EXPORT_SYMBOL_GPL(tls_encrypt_skb);
+
 int tls_sw_fallback_init(struct sock *sk,
 			 struct tls_offload_context_tx *offload_ctx,
 			 struct tls_crypto_info *crypto_info)

diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 523622d..eff4442 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c

@@ -39,6 +39,7 @@
 #include <linux/netdevice.h>
 #include <linux/sched/signal.h>
 #include <linux/inetdevice.h>
+#include <linux/inet_diag.h>
 
 #include <net/tls.h>
 
@@ -55,10 +56,14 @@
 
 static struct proto *saved_tcpv6_prot;
 static DEFINE_MUTEX(tcpv6_prot_mutex);
+static struct proto *saved_tcpv4_prot;
+static DEFINE_MUTEX(tcpv4_prot_mutex);
 static LIST_HEAD(device_list);
-static DEFINE_MUTEX(device_mutex);
+static DEFINE_SPINLOCK(device_spinlock);
 static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
 static struct proto_ops tls_sw_proto_ops;
+static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
+			 struct proto *base);
 
 static void update_sk_prot(struct sock *sk, struct tls_context *ctx)
 {
@@ -141,9 +146,7 @@
 		size = sg->length;
 	}
 
-	clear_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags);
 	ctx->in_tcp_sendpages = false;
-	ctx->sk_write_space(sk);
 
 	return 0;
 }
@@ -193,15 +196,12 @@
 	return rc;
 }
 
-int tls_push_pending_closed_record(struct sock *sk, struct tls_context *ctx,
-				   int flags, long *timeo)
+int tls_push_partial_record(struct sock *sk, struct tls_context *ctx,
+			    int flags)
 {
 	struct scatterlist *sg;
 	u16 offset;
 
-	if (!tls_is_partially_sent_record(ctx))
-		return ctx->push_pending_record(sk, flags);
-
 	sg = ctx->partially_sent_record;
 	offset = ctx->partially_sent_offset;
 
@@ -209,6 +209,17 @@
 	return tls_push_sg(sk, ctx, sg, offset, flags);
 }
 
+void tls_free_partial_record(struct sock *sk, struct tls_context *ctx)
+{
+	struct scatterlist *sg;
+
+	for (sg = ctx->partially_sent_record; sg; sg = sg_next(sg)) {
+		put_page(sg_page(sg));
+		sk_mem_uncharge(sk, sg->length);
+	}
+	ctx->partially_sent_record = NULL;
+}
+
 static void tls_write_space(struct sock *sk)
 {
 	struct tls_context *ctx = tls_get_ctx(sk);
@@ -222,100 +233,95 @@
 		return;
 	}
 
-	if (!sk->sk_write_pending && tls_is_pending_closed_record(ctx)) {
-		gfp_t sk_allocation = sk->sk_allocation;
-		int rc;
-		long timeo = 0;
-
-		sk->sk_allocation = GFP_ATOMIC;
-		rc = tls_push_pending_closed_record(sk, ctx,
-						    MSG_DONTWAIT |
-						    MSG_NOSIGNAL,
-						    &timeo);
-		sk->sk_allocation = sk_allocation;
-
-		if (rc < 0)
-			return;
-	}
+#ifdef CONFIG_TLS_DEVICE
+	if (ctx->tx_conf == TLS_HW)
+		tls_device_write_space(sk, ctx);
+	else
+#endif
+		tls_sw_write_space(sk, ctx);
 
 	ctx->sk_write_space(sk);
 }
 
-static void tls_ctx_free(struct tls_context *ctx)
+/**
+ * tls_ctx_free() - free TLS ULP context
+ * @sk:  socket to with @ctx is attached
+ * @ctx: TLS context structure
+ *
+ * Free TLS context. If @sk is %NULL caller guarantees that the socket
+ * to which @ctx was attached has no outstanding references.
+ */
+void tls_ctx_free(struct sock *sk, struct tls_context *ctx)
 {
 	if (!ctx)
 		return;
 
 	memzero_explicit(&ctx->crypto_send, sizeof(ctx->crypto_send));
 	memzero_explicit(&ctx->crypto_recv, sizeof(ctx->crypto_recv));
-	kfree(ctx);
+	mutex_destroy(&ctx->tx_lock);
+
+	if (sk)
+		kfree_rcu(ctx, rcu);
+	else
+		kfree(ctx);
 }
 
-static void tls_sk_proto_close(struct sock *sk, long timeout)
+static void tls_sk_proto_cleanup(struct sock *sk,
+				 struct tls_context *ctx, long timeo)
 {
-	struct tls_context *ctx = tls_get_ctx(sk);
-	long timeo = sock_sndtimeo(sk, 0);
-	void (*sk_proto_close)(struct sock *sk, long timeout);
-	bool free_ctx = false;
-
-	lock_sock(sk);
-	sk_proto_close = ctx->sk_proto_close;
-
-	if ((ctx->tx_conf == TLS_HW_RECORD && ctx->rx_conf == TLS_HW_RECORD) ||
-	    (ctx->tx_conf == TLS_BASE && ctx->rx_conf == TLS_BASE)) {
-		free_ctx = true;
-		goto skip_tx_cleanup;
-	}
-
-	if (!tls_complete_pending_work(sk, ctx, 0, &timeo))
+	if (unlikely(sk->sk_write_pending) &&
+	    !wait_on_pending_writer(sk, &timeo))
 		tls_handle_open_record(sk, 0);
 
-	if (ctx->partially_sent_record) {
-		struct scatterlist *sg = ctx->partially_sent_record;
-
-		while (1) {
-			put_page(sg_page(sg));
-			sk_mem_uncharge(sk, sg->length);
-
-			if (sg_is_last(sg))
-				break;
-			sg++;
-		}
-	}
-
 	/* We need these for tls_sw_fallback handling of other packets */
 	if (ctx->tx_conf == TLS_SW) {
 		kfree(ctx->tx.rec_seq);
 		kfree(ctx->tx.iv);
-		tls_sw_free_resources_tx(sk);
+		tls_sw_release_resources_tx(sk);
+	} else if (ctx->tx_conf == TLS_HW) {
+		tls_device_free_resources_tx(sk);
 	}
 
-	if (ctx->rx_conf == TLS_SW) {
-		kfree(ctx->rx.rec_seq);
-		kfree(ctx->rx.iv);
-		tls_sw_free_resources_rx(sk);
-	}
-
-#ifdef CONFIG_TLS_DEVICE
-	if (ctx->rx_conf == TLS_HW)
+	if (ctx->rx_conf == TLS_SW)
+		tls_sw_release_resources_rx(sk);
+	else if (ctx->rx_conf == TLS_HW)
 		tls_device_offload_cleanup_rx(sk);
+}
 
-	if (ctx->tx_conf != TLS_HW && ctx->rx_conf != TLS_HW) {
-#else
-	{
-#endif
-		tls_ctx_free(ctx);
-		ctx = NULL;
-	}
+static void tls_sk_proto_close(struct sock *sk, long timeout)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct tls_context *ctx = tls_get_ctx(sk);
+	long timeo = sock_sndtimeo(sk, 0);
+	bool free_ctx;
 
-skip_tx_cleanup:
-	release_sock(sk);
-	sk_proto_close(sk, timeout);
-	/* free ctx for TLS_HW_RECORD, used by tcp_set_state
-	 * for sk->sk_prot->unhash [tls_hw_unhash]
-	 */
+	if (ctx->tx_conf == TLS_SW)
+		tls_sw_cancel_work_tx(ctx);
+
+	lock_sock(sk);
+	free_ctx = ctx->tx_conf != TLS_HW && ctx->rx_conf != TLS_HW;
+
+	if (ctx->tx_conf != TLS_BASE || ctx->rx_conf != TLS_BASE)
+		tls_sk_proto_cleanup(sk, ctx, timeo);
+
+	write_lock_bh(&sk->sk_callback_lock);
 	if (free_ctx)
-		tls_ctx_free(ctx);
+		rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
+	sk->sk_prot = ctx->sk_proto;
+	if (sk->sk_write_space == tls_write_space)
+		sk->sk_write_space = ctx->sk_write_space;
+	write_unlock_bh(&sk->sk_callback_lock);
+	release_sock(sk);
+	if (ctx->tx_conf == TLS_SW)
+		tls_sw_free_ctx_tx(ctx);
+	if (ctx->rx_conf == TLS_SW || ctx->rx_conf == TLS_HW)
+		tls_sw_strparser_done(ctx);
+	if (ctx->rx_conf == TLS_SW)
+		tls_sw_free_ctx_rx(ctx);
+	ctx->sk_proto->close(sk, timeout);
+
+	if (free_ctx)
+		tls_ctx_free(sk, ctx);
 }
 
 static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval,
@@ -378,6 +384,30 @@
 			rc = -EFAULT;
 		break;
 	}
+	case TLS_CIPHER_AES_GCM_256: {
+		struct tls12_crypto_info_aes_gcm_256 *
+		  crypto_info_aes_gcm_256 =
+		  container_of(crypto_info,
+			       struct tls12_crypto_info_aes_gcm_256,
+			       info);
+
+		if (len != sizeof(*crypto_info_aes_gcm_256)) {
+			rc = -EINVAL;
+			goto out;
+		}
+		lock_sock(sk);
+		memcpy(crypto_info_aes_gcm_256->iv,
+		       ctx->tx.iv + TLS_CIPHER_AES_GCM_256_SALT_SIZE,
+		       TLS_CIPHER_AES_GCM_256_IV_SIZE);
+		memcpy(crypto_info_aes_gcm_256->rec_seq, ctx->tx.rec_seq,
+		       TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE);
+		release_sock(sk);
+		if (copy_to_user(optval,
+				 crypto_info_aes_gcm_256,
+				 sizeof(*crypto_info_aes_gcm_256)))
+			rc = -EFAULT;
+		break;
+	}
 	default:
 		rc = -EINVAL;
 	}
@@ -408,7 +438,8 @@
 	struct tls_context *ctx = tls_get_ctx(sk);
 
 	if (level != SOL_TLS)
-		return ctx->getsockopt(sk, level, optname, optval, optlen);
+		return ctx->sk_proto->getsockopt(sk, level,
+						 optname, optval, optlen);
 
 	return do_tls_getsockopt(sk, optname, optval, optlen);
 }
@@ -417,7 +448,9 @@
 				  unsigned int optlen, int tx)
 {
 	struct tls_crypto_info *crypto_info;
+	struct tls_crypto_info *alt_crypto_info;
 	struct tls_context *ctx = tls_get_ctx(sk);
+	size_t optsize;
 	int rc = 0;
 	int conf;
 
@@ -426,10 +459,13 @@
 		goto out;
 	}
 
-	if (tx)
+	if (tx) {
 		crypto_info = &ctx->crypto_send.info;
-	else
+		alt_crypto_info = &ctx->crypto_recv.info;
+	} else {
 		crypto_info = &ctx->crypto_recv.info;
+		alt_crypto_info = &ctx->crypto_send.info;
+	}
 
 	/* Currently we don't support set crypto info more than one time */
 	if (TLS_CRYPTO_INFO_READY(crypto_info)) {
@@ -444,57 +480,70 @@
 	}
 
 	/* check version */
-	if (crypto_info->version != TLS_1_2_VERSION) {
+	if (crypto_info->version != TLS_1_2_VERSION &&
+	    crypto_info->version != TLS_1_3_VERSION) {
 		rc = -ENOTSUPP;
 		goto err_crypto_info;
 	}
 
-	switch (crypto_info->cipher_type) {
-	case TLS_CIPHER_AES_GCM_128: {
-		if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) {
+	/* Ensure that TLS version and ciphers are same in both directions */
+	if (TLS_CRYPTO_INFO_READY(alt_crypto_info)) {
+		if (alt_crypto_info->version != crypto_info->version ||
+		    alt_crypto_info->cipher_type != crypto_info->cipher_type) {
 			rc = -EINVAL;
 			goto err_crypto_info;
 		}
-		rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info),
-				    optlen - sizeof(*crypto_info));
-		if (rc) {
-			rc = -EFAULT;
-			goto err_crypto_info;
-		}
+	}
+
+	switch (crypto_info->cipher_type) {
+	case TLS_CIPHER_AES_GCM_128:
+		optsize = sizeof(struct tls12_crypto_info_aes_gcm_128);
+		break;
+	case TLS_CIPHER_AES_GCM_256: {
+		optsize = sizeof(struct tls12_crypto_info_aes_gcm_256);
 		break;
 	}
+	case TLS_CIPHER_AES_CCM_128:
+		optsize = sizeof(struct tls12_crypto_info_aes_ccm_128);
+		break;
 	default:
 		rc = -EINVAL;
 		goto err_crypto_info;
 	}
 
+	if (optlen != optsize) {
+		rc = -EINVAL;
+		goto err_crypto_info;
+	}
+
+	rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info),
+			    optlen - sizeof(*crypto_info));
+	if (rc) {
+		rc = -EFAULT;
+		goto err_crypto_info;
+	}
+
 	if (tx) {
-#ifdef CONFIG_TLS_DEVICE
 		rc = tls_set_device_offload(sk, ctx);
 		conf = TLS_HW;
 		if (rc) {
-#else
-		{
-#endif
 			rc = tls_set_sw_offload(sk, ctx, 1);
+			if (rc)
+				goto err_crypto_info;
 			conf = TLS_SW;
 		}
 	} else {
-#ifdef CONFIG_TLS_DEVICE
 		rc = tls_set_device_offload_rx(sk, ctx);
 		conf = TLS_HW;
 		if (rc) {
-#else
-		{
-#endif
 			rc = tls_set_sw_offload(sk, ctx, 0);
+			if (rc)
+				goto err_crypto_info;
 			conf = TLS_SW;
 		}
+		tls_sw_strparser_arm(sk, ctx);
 	}
 
-	if (rc)
-		goto err_crypto_info;
-
 	if (tx)
 		ctx->tx_conf = conf;
 	else
@@ -540,7 +589,8 @@
 	struct tls_context *ctx = tls_get_ctx(sk);
 
 	if (level != SOL_TLS)
-		return ctx->setsockopt(sk, level, optname, optval, optlen);
+		return ctx->sk_proto->setsockopt(sk, level, optname, optval,
+						 optlen);
 
 	return do_tls_setsockopt(sk, optname, optval, optlen);
 }
@@ -550,39 +600,80 @@
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tls_context *ctx;
 
-	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC);
 	if (!ctx)
 		return NULL;
 
-	icsk->icsk_ulp_data = ctx;
+	mutex_init(&ctx->tx_lock);
+	rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
+	ctx->sk_proto = sk->sk_prot;
 	return ctx;
 }
 
+static void tls_build_proto(struct sock *sk)
+{
+	int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
+
+	/* Build IPv6 TLS whenever the address of tcpv6 _prot changes */
+	if (ip_ver == TLSV6 &&
+	    unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) {
+		mutex_lock(&tcpv6_prot_mutex);
+		if (likely(sk->sk_prot != saved_tcpv6_prot)) {
+			build_protos(tls_prots[TLSV6], sk->sk_prot);
+			smp_store_release(&saved_tcpv6_prot, sk->sk_prot);
+		}
+		mutex_unlock(&tcpv6_prot_mutex);
+	}
+
+	if (ip_ver == TLSV4 &&
+	    unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv4_prot))) {
+		mutex_lock(&tcpv4_prot_mutex);
+		if (likely(sk->sk_prot != saved_tcpv4_prot)) {
+			build_protos(tls_prots[TLSV4], sk->sk_prot);
+			smp_store_release(&saved_tcpv4_prot, sk->sk_prot);
+		}
+		mutex_unlock(&tcpv4_prot_mutex);
+	}
+}
+
+static void tls_hw_sk_destruct(struct sock *sk)
+{
+	struct tls_context *ctx = tls_get_ctx(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	ctx->sk_destruct(sk);
+	/* Free ctx */
+	rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
+	tls_ctx_free(sk, ctx);
+}
+
 static int tls_hw_prot(struct sock *sk)
 {
 	struct tls_context *ctx;
 	struct tls_device *dev;
 	int rc = 0;
 
-	mutex_lock(&device_mutex);
+	spin_lock_bh(&device_spinlock);
 	list_for_each_entry(dev, &device_list, dev_list) {
 		if (dev->feature && dev->feature(dev)) {
 			ctx = create_ctx(sk);
 			if (!ctx)
 				goto out;
 
-			ctx->hash = sk->sk_prot->hash;
-			ctx->unhash = sk->sk_prot->unhash;
-			ctx->sk_proto_close = sk->sk_prot->close;
+			spin_unlock_bh(&device_spinlock);
+			tls_build_proto(sk);
+			ctx->sk_destruct = sk->sk_destruct;
+			sk->sk_destruct = tls_hw_sk_destruct;
 			ctx->rx_conf = TLS_HW_RECORD;
 			ctx->tx_conf = TLS_HW_RECORD;
 			update_sk_prot(sk, ctx);
+			spin_lock_bh(&device_spinlock);
 			rc = 1;
 			break;
 		}
 	}
 out:
-	mutex_unlock(&device_mutex);
+	spin_unlock_bh(&device_spinlock);
 	return rc;
 }
 
@@ -591,13 +682,18 @@
 	struct tls_context *ctx = tls_get_ctx(sk);
 	struct tls_device *dev;
 
-	mutex_lock(&device_mutex);
+	spin_lock_bh(&device_spinlock);
 	list_for_each_entry(dev, &device_list, dev_list) {
-		if (dev->unhash)
+		if (dev->unhash) {
+			kref_get(&dev->kref);
+			spin_unlock_bh(&device_spinlock);
 			dev->unhash(dev, sk);
+			kref_put(&dev->kref, dev->release);
+			spin_lock_bh(&device_spinlock);
+		}
 	}
-	mutex_unlock(&device_mutex);
-	ctx->unhash(sk);
+	spin_unlock_bh(&device_spinlock);
+	ctx->sk_proto->unhash(sk);
 }
 
 static int tls_hw_hash(struct sock *sk)
@@ -606,13 +702,18 @@
 	struct tls_device *dev;
 	int err;
 
-	err = ctx->hash(sk);
-	mutex_lock(&device_mutex);
+	err = ctx->sk_proto->hash(sk);
+	spin_lock_bh(&device_spinlock);
 	list_for_each_entry(dev, &device_list, dev_list) {
-		if (dev->hash)
+		if (dev->hash) {
+			kref_get(&dev->kref);
+			spin_unlock_bh(&device_spinlock);
 			err |= dev->hash(dev, sk);
+			kref_put(&dev->kref, dev->release);
+			spin_lock_bh(&device_spinlock);
+		}
 	}
-	mutex_unlock(&device_mutex);
+	spin_unlock_bh(&device_spinlock);
 
 	if (err)
 		tls_hw_unhash(sk);
@@ -632,12 +733,14 @@
 	prot[TLS_SW][TLS_BASE].sendpage		= tls_sw_sendpage;
 
 	prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE];
-	prot[TLS_BASE][TLS_SW].recvmsg		= tls_sw_recvmsg;
-	prot[TLS_BASE][TLS_SW].close		= tls_sk_proto_close;
+	prot[TLS_BASE][TLS_SW].recvmsg		  = tls_sw_recvmsg;
+	prot[TLS_BASE][TLS_SW].stream_memory_read = tls_sw_stream_read;
+	prot[TLS_BASE][TLS_SW].close		  = tls_sk_proto_close;
 
 	prot[TLS_SW][TLS_SW] = prot[TLS_SW][TLS_BASE];
-	prot[TLS_SW][TLS_SW].recvmsg	= tls_sw_recvmsg;
-	prot[TLS_SW][TLS_SW].close	= tls_sk_proto_close;
+	prot[TLS_SW][TLS_SW].recvmsg		= tls_sw_recvmsg;
+	prot[TLS_SW][TLS_SW].stream_memory_read	= tls_sw_stream_read;
+	prot[TLS_SW][TLS_SW].close		= tls_sk_proto_close;
 
 #ifdef CONFIG_TLS_DEVICE
 	prot[TLS_HW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
@@ -658,17 +761,15 @@
 	prot[TLS_HW_RECORD][TLS_HW_RECORD] = *base;
 	prot[TLS_HW_RECORD][TLS_HW_RECORD].hash		= tls_hw_hash;
 	prot[TLS_HW_RECORD][TLS_HW_RECORD].unhash	= tls_hw_unhash;
-	prot[TLS_HW_RECORD][TLS_HW_RECORD].close	= tls_sk_proto_close;
 }
 
 static int tls_init(struct sock *sk)
 {
-	int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
 	struct tls_context *ctx;
 	int rc = 0;
 
 	if (tls_hw_prot(sk))
-		goto out;
+		return 0;
 
 	/* The TLS ulp is currently supported only for TCP sockets
 	 * in ESTABLISHED state.
@@ -679,69 +780,128 @@
 	if (sk->sk_state != TCP_ESTABLISHED)
 		return -ENOTSUPP;
 
+	tls_build_proto(sk);
+
 	/* allocate tls context */
+	write_lock_bh(&sk->sk_callback_lock);
 	ctx = create_ctx(sk);
 	if (!ctx) {
 		rc = -ENOMEM;
 		goto out;
 	}
-	ctx->setsockopt = sk->sk_prot->setsockopt;
-	ctx->getsockopt = sk->sk_prot->getsockopt;
-	ctx->sk_proto_close = sk->sk_prot->close;
-
-	/* Build IPv6 TLS whenever the address of tcpv6	_prot changes */
-	if (ip_ver == TLSV6 &&
-	    unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) {
-		mutex_lock(&tcpv6_prot_mutex);
-		if (likely(sk->sk_prot != saved_tcpv6_prot)) {
-			build_protos(tls_prots[TLSV6], sk->sk_prot);
-			smp_store_release(&saved_tcpv6_prot, sk->sk_prot);
-		}
-		mutex_unlock(&tcpv6_prot_mutex);
-	}
 
 	ctx->tx_conf = TLS_BASE;
 	ctx->rx_conf = TLS_BASE;
 	update_sk_prot(sk, ctx);
 out:
+	write_unlock_bh(&sk->sk_callback_lock);
 	return rc;
 }
 
+static void tls_update(struct sock *sk, struct proto *p)
+{
+	struct tls_context *ctx;
+
+	ctx = tls_get_ctx(sk);
+	if (likely(ctx))
+		ctx->sk_proto = p;
+	else
+		sk->sk_prot = p;
+}
+
+static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
+{
+	u16 version, cipher_type;
+	struct tls_context *ctx;
+	struct nlattr *start;
+	int err;
+
+	start = nla_nest_start_noflag(skb, INET_ULP_INFO_TLS);
+	if (!start)
+		return -EMSGSIZE;
+
+	rcu_read_lock();
+	ctx = rcu_dereference(inet_csk(sk)->icsk_ulp_data);
+	if (!ctx) {
+		err = 0;
+		goto nla_failure;
+	}
+	version = ctx->prot_info.version;
+	if (version) {
+		err = nla_put_u16(skb, TLS_INFO_VERSION, version);
+		if (err)
+			goto nla_failure;
+	}
+	cipher_type = ctx->prot_info.cipher_type;
+	if (cipher_type) {
+		err = nla_put_u16(skb, TLS_INFO_CIPHER, cipher_type);
+		if (err)
+			goto nla_failure;
+	}
+	err = nla_put_u16(skb, TLS_INFO_TXCONF, tls_user_config(ctx, true));
+	if (err)
+		goto nla_failure;
+
+	err = nla_put_u16(skb, TLS_INFO_RXCONF, tls_user_config(ctx, false));
+	if (err)
+		goto nla_failure;
+
+	rcu_read_unlock();
+	nla_nest_end(skb, start);
+	return 0;
+
+nla_failure:
+	rcu_read_unlock();
+	nla_nest_cancel(skb, start);
+	return err;
+}
+
+static size_t tls_get_info_size(const struct sock *sk)
+{
+	size_t size = 0;
+
+	size += nla_total_size(0) +		/* INET_ULP_INFO_TLS */
+		nla_total_size(sizeof(u16)) +	/* TLS_INFO_VERSION */
+		nla_total_size(sizeof(u16)) +	/* TLS_INFO_CIPHER */
+		nla_total_size(sizeof(u16)) +	/* TLS_INFO_RXCONF */
+		nla_total_size(sizeof(u16)) +	/* TLS_INFO_TXCONF */
+		0;
+
+	return size;
+}
+
 void tls_register_device(struct tls_device *device)
 {
-	mutex_lock(&device_mutex);
+	spin_lock_bh(&device_spinlock);
 	list_add_tail(&device->dev_list, &device_list);
-	mutex_unlock(&device_mutex);
+	spin_unlock_bh(&device_spinlock);
 }
 EXPORT_SYMBOL(tls_register_device);
 
 void tls_unregister_device(struct tls_device *device)
 {
-	mutex_lock(&device_mutex);
+	spin_lock_bh(&device_spinlock);
 	list_del(&device->dev_list);
-	mutex_unlock(&device_mutex);
+	spin_unlock_bh(&device_spinlock);
 }
 EXPORT_SYMBOL(tls_unregister_device);
 
 static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
 	.name			= "tls",
-	.uid			= TCP_ULP_TLS,
-	.user_visible		= true,
 	.owner			= THIS_MODULE,
 	.init			= tls_init,
+	.update			= tls_update,
+	.get_info		= tls_get_info,
+	.get_info_size		= tls_get_info_size,
 };
 
 static int __init tls_register(void)
 {
-	build_protos(tls_prots[TLSV4], &tcp_prot);
-
 	tls_sw_proto_ops = inet_stream_ops;
-	tls_sw_proto_ops.poll = tls_sw_poll;
 	tls_sw_proto_ops.splice_read = tls_sw_splice_read;
+	tls_sw_proto_ops.sendpage_locked   = tls_sw_sendpage_locked,
 
-#ifdef CONFIG_TLS_DEVICE
 	tls_device_init();
-#endif
 	tcp_register_ulp(&tcp_tls_ulp_ops);
 
 	return 0;
@@ -750,9 +910,7 @@
 static void __exit tls_unregister(void)
 {
 	tcp_unregister_ulp(&tcp_tls_ulp_ops);
-#ifdef CONFIG_TLS_DEVICE
 	tls_device_cleanup();
-#endif
 }
 
 module_init(tls_register);

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index b9c6ecf..5dd0f01 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c

@@ -4,6 +4,7 @@
  * Copyright (c) 2016-2017, Lance Chao <lancerchao@fb.com>. All rights reserved.
  * Copyright (c) 2016, Fridolin Pokorny <fridolin.pokorny@gmail.com>. All rights reserved.
  * Copyright (c) 2016, Nikos Mavrogiannopoulos <nmav@gnutls.org>. All rights reserved.
+ * Copyright (c) 2018, Covalent IO, Inc. http://covalent.io
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -41,237 +42,1257 @@
 #include <net/strparser.h>
 #include <net/tls.h>
 
-#define MAX_IV_SIZE	TLS_CIPHER_AES_GCM_128_IV_SIZE
+static int __skb_nsg(struct sk_buff *skb, int offset, int len,
+                     unsigned int recursion_level)
+{
+        int start = skb_headlen(skb);
+        int i, chunk = start - offset;
+        struct sk_buff *frag_iter;
+        int elt = 0;
+
+        if (unlikely(recursion_level >= 24))
+                return -EMSGSIZE;
+
+        if (chunk > 0) {
+                if (chunk > len)
+                        chunk = len;
+                elt++;
+                len -= chunk;
+                if (len == 0)
+                        return elt;
+                offset += chunk;
+        }
+
+        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+                int end;
+
+                WARN_ON(start > offset + len);
+
+                end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
+                chunk = end - offset;
+                if (chunk > 0) {
+                        if (chunk > len)
+                                chunk = len;
+                        elt++;
+                        len -= chunk;
+                        if (len == 0)
+                                return elt;
+                        offset += chunk;
+                }
+                start = end;
+        }
+
+        if (unlikely(skb_has_frag_list(skb))) {
+                skb_walk_frags(skb, frag_iter) {
+                        int end, ret;
+
+                        WARN_ON(start > offset + len);
+
+                        end = start + frag_iter->len;
+                        chunk = end - offset;
+                        if (chunk > 0) {
+                                if (chunk > len)
+                                        chunk = len;
+                                ret = __skb_nsg(frag_iter, offset - start, chunk,
+                                                recursion_level + 1);
+                                if (unlikely(ret < 0))
+                                        return ret;
+                                elt += ret;
+                                len -= chunk;
+                                if (len == 0)
+                                        return elt;
+                                offset += chunk;
+                        }
+                        start = end;
+                }
+        }
+        BUG_ON(len);
+        return elt;
+}
+
+/* Return the number of scatterlist elements required to completely map the
+ * skb, or -EMSGSIZE if the recursion depth is exceeded.
+ */
+static int skb_nsg(struct sk_buff *skb, int offset, int len)
+{
+        return __skb_nsg(skb, offset, len, 0);
+}
+
+static int padding_length(struct tls_sw_context_rx *ctx,
+			  struct tls_prot_info *prot, struct sk_buff *skb)
+{
+	struct strp_msg *rxm = strp_msg(skb);
+	int sub = 0;
+
+	/* Determine zero-padding length */
+	if (prot->version == TLS_1_3_VERSION) {
+		char content_type = 0;
+		int err;
+		int back = 17;
+
+		while (content_type == 0) {
+			if (back > rxm->full_len - prot->prepend_size)
+				return -EBADMSG;
+			err = skb_copy_bits(skb,
+					    rxm->offset + rxm->full_len - back,
+					    &content_type, 1);
+			if (err)
+				return err;
+			if (content_type)
+				break;
+			sub++;
+			back++;
+		}
+		ctx->control = content_type;
+	}
+	return sub;
+}
+
+static void tls_decrypt_done(struct crypto_async_request *req, int err)
+{
+	struct aead_request *aead_req = (struct aead_request *)req;
+	struct scatterlist *sgout = aead_req->dst;
+	struct scatterlist *sgin = aead_req->src;
+	struct tls_sw_context_rx *ctx;
+	struct tls_context *tls_ctx;
+	struct tls_prot_info *prot;
+	struct scatterlist *sg;
+	struct sk_buff *skb;
+	unsigned int pages;
+	int pending;
+
+	skb = (struct sk_buff *)req->data;
+	tls_ctx = tls_get_ctx(skb->sk);
+	ctx = tls_sw_ctx_rx(tls_ctx);
+	prot = &tls_ctx->prot_info;
+
+	/* Propagate if there was an err */
+	if (err) {
+		ctx->async_wait.err = err;
+		tls_err_abort(skb->sk, err);
+	} else {
+		struct strp_msg *rxm = strp_msg(skb);
+		int pad;
+
+		pad = padding_length(ctx, prot, skb);
+		if (pad < 0) {
+			ctx->async_wait.err = pad;
+			tls_err_abort(skb->sk, pad);
+		} else {
+			rxm->full_len -= pad;
+			rxm->offset += prot->prepend_size;
+			rxm->full_len -= prot->overhead_size;
+		}
+	}
+
+	/* After using skb->sk to propagate sk through crypto async callback
+	 * we need to NULL it again.
+	 */
+	skb->sk = NULL;
+
+
+	/* Free the destination pages if skb was not decrypted inplace */
+	if (sgout != sgin) {
+		/* Skip the first S/G entry as it points to AAD */
+		for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) {
+			if (!sg)
+				break;
+			put_page(sg_page(sg));
+		}
+	}
+
+	kfree(aead_req);
+
+	pending = atomic_dec_return(&ctx->decrypt_pending);
+
+	if (!pending && READ_ONCE(ctx->async_notify))
+		complete(&ctx->async_wait.completion);
+}
 
 static int tls_do_decryption(struct sock *sk,
+			     struct sk_buff *skb,
 			     struct scatterlist *sgin,
 			     struct scatterlist *sgout,
 			     char *iv_recv,
 			     size_t data_len,
-			     struct aead_request *aead_req)
+			     struct aead_request *aead_req,
+			     bool async)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
 	int ret;
 
 	aead_request_set_tfm(aead_req, ctx->aead_recv);
-	aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
+	aead_request_set_ad(aead_req, prot->aad_size);
 	aead_request_set_crypt(aead_req, sgin, sgout,
-			       data_len + tls_ctx->rx.tag_size,
+			       data_len + prot->tag_size,
 			       (u8 *)iv_recv);
-	aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-				  crypto_req_done, &ctx->async_wait);
 
-	ret = crypto_wait_req(crypto_aead_decrypt(aead_req), &ctx->async_wait);
+	if (async) {
+		/* Using skb->sk to push sk through to crypto async callback
+		 * handler. This allows propagating errors up to the socket
+		 * if needed. It _must_ be cleared in the async handler
+		 * before consume_skb is called. We _know_ skb->sk is NULL
+		 * because it is a clone from strparser.
+		 */
+		skb->sk = sk;
+		aead_request_set_callback(aead_req,
+					  CRYPTO_TFM_REQ_MAY_BACKLOG,
+					  tls_decrypt_done, skb);
+		atomic_inc(&ctx->decrypt_pending);
+	} else {
+		aead_request_set_callback(aead_req,
+					  CRYPTO_TFM_REQ_MAY_BACKLOG,
+					  crypto_req_done, &ctx->async_wait);
+	}
+
+	ret = crypto_aead_decrypt(aead_req);
+	if (ret == -EINPROGRESS) {
+		if (async)
+			return ret;
+
+		ret = crypto_wait_req(ret, &ctx->async_wait);
+	}
+
+	if (async)
+		atomic_dec(&ctx->decrypt_pending);
+
 	return ret;
 }
 
-static void trim_sg(struct sock *sk, struct scatterlist *sg,
-		    int *sg_num_elem, unsigned int *sg_size, int target_size)
-{
-	int i = *sg_num_elem - 1;
-	int trim = *sg_size - target_size;
-
-	if (trim <= 0) {
-		WARN_ON(trim < 0);
-		return;
-	}
-
-	*sg_size = target_size;
-	while (trim >= sg[i].length) {
-		trim -= sg[i].length;
-		sk_mem_uncharge(sk, sg[i].length);
-		put_page(sg_page(&sg[i]));
-		i--;
-
-		if (i < 0)
-			goto out;
-	}
-
-	sg[i].length -= trim;
-	sk_mem_uncharge(sk, trim);
-
-out:
-	*sg_num_elem = i + 1;
-}
-
-static void trim_both_sgl(struct sock *sk, int target_size)
+static void tls_trim_both_msgs(struct sock *sk, int target_size)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+	struct tls_rec *rec = ctx->open_rec;
 
-	trim_sg(sk, ctx->sg_plaintext_data,
-		&ctx->sg_plaintext_num_elem,
-		&ctx->sg_plaintext_size,
-		target_size);
-
+	sk_msg_trim(sk, &rec->msg_plaintext, target_size);
 	if (target_size > 0)
-		target_size += tls_ctx->tx.overhead_size;
-
-	trim_sg(sk, ctx->sg_encrypted_data,
-		&ctx->sg_encrypted_num_elem,
-		&ctx->sg_encrypted_size,
-		target_size);
+		target_size += prot->overhead_size;
+	sk_msg_trim(sk, &rec->msg_encrypted, target_size);
 }
 
-static int alloc_encrypted_sg(struct sock *sk, int len)
+static int tls_alloc_encrypted_msg(struct sock *sk, int len)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
-	int rc = 0;
+	struct tls_rec *rec = ctx->open_rec;
+	struct sk_msg *msg_en = &rec->msg_encrypted;
 
-	rc = sk_alloc_sg(sk, len,
-			 ctx->sg_encrypted_data, 0,
-			 &ctx->sg_encrypted_num_elem,
-			 &ctx->sg_encrypted_size, 0);
-
-	if (rc == -ENOSPC)
-		ctx->sg_encrypted_num_elem = ARRAY_SIZE(ctx->sg_encrypted_data);
-
-	return rc;
+	return sk_msg_alloc(sk, msg_en, len, 0);
 }
 
-static int alloc_plaintext_sg(struct sock *sk, int len)
+static int tls_clone_plaintext_msg(struct sock *sk, int required)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
+	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+	struct tls_rec *rec = ctx->open_rec;
+	struct sk_msg *msg_pl = &rec->msg_plaintext;
+	struct sk_msg *msg_en = &rec->msg_encrypted;
+	int skip, len;
+
+	/* We add page references worth len bytes from encrypted sg
+	 * at the end of plaintext sg. It is guaranteed that msg_en
+	 * has enough required room (ensured by caller).
+	 */
+	len = required - msg_pl->sg.size;
+
+	/* Skip initial bytes in msg_en's data to be able to use
+	 * same offset of both plain and encrypted data.
+	 */
+	skip = prot->prepend_size + msg_pl->sg.size;
+
+	return sk_msg_clone(sk, msg_pl, msg_en, skip, len);
+}
+
+static struct tls_rec *tls_get_rec(struct sock *sk)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
+	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+	struct sk_msg *msg_pl, *msg_en;
+	struct tls_rec *rec;
+	int mem_size;
+
+	mem_size = sizeof(struct tls_rec) + crypto_aead_reqsize(ctx->aead_send);
+
+	rec = kzalloc(mem_size, sk->sk_allocation);
+	if (!rec)
+		return NULL;
+
+	msg_pl = &rec->msg_plaintext;
+	msg_en = &rec->msg_encrypted;
+
+	sk_msg_init(msg_pl);
+	sk_msg_init(msg_en);
+
+	sg_init_table(rec->sg_aead_in, 2);
+	sg_set_buf(&rec->sg_aead_in[0], rec->aad_space, prot->aad_size);
+	sg_unmark_end(&rec->sg_aead_in[1]);
+
+	sg_init_table(rec->sg_aead_out, 2);
+	sg_set_buf(&rec->sg_aead_out[0], rec->aad_space, prot->aad_size);
+	sg_unmark_end(&rec->sg_aead_out[1]);
+
+	return rec;
+}
+
+static void tls_free_rec(struct sock *sk, struct tls_rec *rec)
+{
+	sk_msg_free(sk, &rec->msg_encrypted);
+	sk_msg_free(sk, &rec->msg_plaintext);
+	kfree(rec);
+}
+
+static void tls_free_open_rec(struct sock *sk)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
-	int rc = 0;
+	struct tls_rec *rec = ctx->open_rec;
 
-	rc = sk_alloc_sg(sk, len, ctx->sg_plaintext_data, 0,
-			 &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size,
-			 tls_ctx->pending_open_record_frags);
-
-	if (rc == -ENOSPC)
-		ctx->sg_plaintext_num_elem = ARRAY_SIZE(ctx->sg_plaintext_data);
-
-	return rc;
-}
-
-static void free_sg(struct sock *sk, struct scatterlist *sg,
-		    int *sg_num_elem, unsigned int *sg_size)
-{
-	int i, n = *sg_num_elem;
-
-	for (i = 0; i < n; ++i) {
-		sk_mem_uncharge(sk, sg[i].length);
-		put_page(sg_page(&sg[i]));
+	if (rec) {
+		tls_free_rec(sk, rec);
+		ctx->open_rec = NULL;
 	}
-	*sg_num_elem = 0;
-	*sg_size = 0;
 }
 
-static void tls_free_both_sg(struct sock *sk)
+int tls_tx_records(struct sock *sk, int flags)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+	struct tls_rec *rec, *tmp;
+	struct sk_msg *msg_en;
+	int tx_flags, rc = 0;
 
-	free_sg(sk, ctx->sg_encrypted_data, &ctx->sg_encrypted_num_elem,
-		&ctx->sg_encrypted_size);
+	if (tls_is_partially_sent_record(tls_ctx)) {
+		rec = list_first_entry(&ctx->tx_list,
+				       struct tls_rec, list);
 
-	free_sg(sk, ctx->sg_plaintext_data, &ctx->sg_plaintext_num_elem,
-		&ctx->sg_plaintext_size);
+		if (flags == -1)
+			tx_flags = rec->tx_flags;
+		else
+			tx_flags = flags;
+
+		rc = tls_push_partial_record(sk, tls_ctx, tx_flags);
+		if (rc)
+			goto tx_err;
+
+		/* Full record has been transmitted.
+		 * Remove the head of tx_list
+		 */
+		list_del(&rec->list);
+		sk_msg_free(sk, &rec->msg_plaintext);
+		kfree(rec);
+	}
+
+	/* Tx all ready records */
+	list_for_each_entry_safe(rec, tmp, &ctx->tx_list, list) {
+		if (READ_ONCE(rec->tx_ready)) {
+			if (flags == -1)
+				tx_flags = rec->tx_flags;
+			else
+				tx_flags = flags;
+
+			msg_en = &rec->msg_encrypted;
+			rc = tls_push_sg(sk, tls_ctx,
+					 &msg_en->sg.data[msg_en->sg.curr],
+					 0, tx_flags);
+			if (rc)
+				goto tx_err;
+
+			list_del(&rec->list);
+			sk_msg_free(sk, &rec->msg_plaintext);
+			kfree(rec);
+		} else {
+			break;
+		}
+	}
+
+tx_err:
+	if (rc < 0 && rc != -EAGAIN)
+		tls_err_abort(sk, EBADMSG);
+
+	return rc;
 }
 
-static int tls_do_encryption(struct tls_context *tls_ctx,
+static void tls_encrypt_done(struct crypto_async_request *req, int err)
+{
+	struct aead_request *aead_req = (struct aead_request *)req;
+	struct sock *sk = req->data;
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
+	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+	struct scatterlist *sge;
+	struct sk_msg *msg_en;
+	struct tls_rec *rec;
+	bool ready = false;
+	int pending;
+
+	rec = container_of(aead_req, struct tls_rec, aead_req);
+	msg_en = &rec->msg_encrypted;
+
+	sge = sk_msg_elem(msg_en, msg_en->sg.curr);
+	sge->offset -= prot->prepend_size;
+	sge->length += prot->prepend_size;
+
+	/* Check if error is previously set on socket */
+	if (err || sk->sk_err) {
+		rec = NULL;
+
+		/* If err is already set on socket, return the same code */
+		if (sk->sk_err) {
+			ctx->async_wait.err = sk->sk_err;
+		} else {
+			ctx->async_wait.err = err;
+			tls_err_abort(sk, err);
+		}
+	}
+
+	if (rec) {
+		struct tls_rec *first_rec;
+
+		/* Mark the record as ready for transmission */
+		smp_store_mb(rec->tx_ready, true);
+
+		/* If received record is at head of tx_list, schedule tx */
+		first_rec = list_first_entry(&ctx->tx_list,
+					     struct tls_rec, list);
+		if (rec == first_rec)
+			ready = true;
+	}
+
+	pending = atomic_dec_return(&ctx->encrypt_pending);
+
+	if (!pending && READ_ONCE(ctx->async_notify))
+		complete(&ctx->async_wait.completion);
+
+	if (!ready)
+		return;
+
+	/* Schedule the transmission */
+	if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
+		schedule_delayed_work(&ctx->tx_work.work, 1);
+}
+
+static int tls_do_encryption(struct sock *sk,
+			     struct tls_context *tls_ctx,
 			     struct tls_sw_context_tx *ctx,
 			     struct aead_request *aead_req,
-			     size_t data_len)
+			     size_t data_len, u32 start)
 {
-	int rc;
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
+	struct tls_rec *rec = ctx->open_rec;
+	struct sk_msg *msg_en = &rec->msg_encrypted;
+	struct scatterlist *sge = sk_msg_elem(msg_en, start);
+	int rc, iv_offset = 0;
 
-	ctx->sg_encrypted_data[0].offset += tls_ctx->tx.prepend_size;
-	ctx->sg_encrypted_data[0].length -= tls_ctx->tx.prepend_size;
+	/* For CCM based ciphers, first byte of IV is a constant */
+	if (prot->cipher_type == TLS_CIPHER_AES_CCM_128) {
+		rec->iv_data[0] = TLS_AES_CCM_IV_B0_BYTE;
+		iv_offset = 1;
+	}
+
+	memcpy(&rec->iv_data[iv_offset], tls_ctx->tx.iv,
+	       prot->iv_size + prot->salt_size);
+
+	xor_iv_with_seq(prot->version, rec->iv_data, tls_ctx->tx.rec_seq);
+
+	sge->offset += prot->prepend_size;
+	sge->length -= prot->prepend_size;
+
+	msg_en->sg.curr = start;
 
 	aead_request_set_tfm(aead_req, ctx->aead_send);
-	aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
-	aead_request_set_crypt(aead_req, ctx->sg_aead_in, ctx->sg_aead_out,
-			       data_len, tls_ctx->tx.iv);
+	aead_request_set_ad(aead_req, prot->aad_size);
+	aead_request_set_crypt(aead_req, rec->sg_aead_in,
+			       rec->sg_aead_out,
+			       data_len, rec->iv_data);
 
 	aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-				  crypto_req_done, &ctx->async_wait);
+				  tls_encrypt_done, sk);
 
-	rc = crypto_wait_req(crypto_aead_encrypt(aead_req), &ctx->async_wait);
+	/* Add the record in tx_list */
+	list_add_tail((struct list_head *)&rec->list, &ctx->tx_list);
+	atomic_inc(&ctx->encrypt_pending);
 
-	ctx->sg_encrypted_data[0].offset -= tls_ctx->tx.prepend_size;
-	ctx->sg_encrypted_data[0].length += tls_ctx->tx.prepend_size;
+	rc = crypto_aead_encrypt(aead_req);
+	if (!rc || rc != -EINPROGRESS) {
+		atomic_dec(&ctx->encrypt_pending);
+		sge->offset -= prot->prepend_size;
+		sge->length += prot->prepend_size;
+	}
 
+	if (!rc) {
+		WRITE_ONCE(rec->tx_ready, true);
+	} else if (rc != -EINPROGRESS) {
+		list_del(&rec->list);
+		return rc;
+	}
+
+	/* Unhook the record from context if encryption is not failure */
+	ctx->open_rec = NULL;
+	tls_advance_record_sn(sk, prot, &tls_ctx->tx);
 	return rc;
 }
 
+static int tls_split_open_record(struct sock *sk, struct tls_rec *from,
+				 struct tls_rec **to, struct sk_msg *msg_opl,
+				 struct sk_msg *msg_oen, u32 split_point,
+				 u32 tx_overhead_size, u32 *orig_end)
+{
+	u32 i, j, bytes = 0, apply = msg_opl->apply_bytes;
+	struct scatterlist *sge, *osge, *nsge;
+	u32 orig_size = msg_opl->sg.size;
+	struct scatterlist tmp = { };
+	struct sk_msg *msg_npl;
+	struct tls_rec *new;
+	int ret;
+
+	new = tls_get_rec(sk);
+	if (!new)
+		return -ENOMEM;
+	ret = sk_msg_alloc(sk, &new->msg_encrypted, msg_opl->sg.size +
+			   tx_overhead_size, 0);
+	if (ret < 0) {
+		tls_free_rec(sk, new);
+		return ret;
+	}
+
+	*orig_end = msg_opl->sg.end;
+	i = msg_opl->sg.start;
+	sge = sk_msg_elem(msg_opl, i);
+	while (apply && sge->length) {
+		if (sge->length > apply) {
+			u32 len = sge->length - apply;
+
+			get_page(sg_page(sge));
+			sg_set_page(&tmp, sg_page(sge), len,
+				    sge->offset + apply);
+			sge->length = apply;
+			bytes += apply;
+			apply = 0;
+		} else {
+			apply -= sge->length;
+			bytes += sge->length;
+		}
+
+		sk_msg_iter_var_next(i);
+		if (i == msg_opl->sg.end)
+			break;
+		sge = sk_msg_elem(msg_opl, i);
+	}
+
+	msg_opl->sg.end = i;
+	msg_opl->sg.curr = i;
+	msg_opl->sg.copybreak = 0;
+	msg_opl->apply_bytes = 0;
+	msg_opl->sg.size = bytes;
+
+	msg_npl = &new->msg_plaintext;
+	msg_npl->apply_bytes = apply;
+	msg_npl->sg.size = orig_size - bytes;
+
+	j = msg_npl->sg.start;
+	nsge = sk_msg_elem(msg_npl, j);
+	if (tmp.length) {
+		memcpy(nsge, &tmp, sizeof(*nsge));
+		sk_msg_iter_var_next(j);
+		nsge = sk_msg_elem(msg_npl, j);
+	}
+
+	osge = sk_msg_elem(msg_opl, i);
+	while (osge->length) {
+		memcpy(nsge, osge, sizeof(*nsge));
+		sg_unmark_end(nsge);
+		sk_msg_iter_var_next(i);
+		sk_msg_iter_var_next(j);
+		if (i == *orig_end)
+			break;
+		osge = sk_msg_elem(msg_opl, i);
+		nsge = sk_msg_elem(msg_npl, j);
+	}
+
+	msg_npl->sg.end = j;
+	msg_npl->sg.curr = j;
+	msg_npl->sg.copybreak = 0;
+
+	*to = new;
+	return 0;
+}
+
+static void tls_merge_open_record(struct sock *sk, struct tls_rec *to,
+				  struct tls_rec *from, u32 orig_end)
+{
+	struct sk_msg *msg_npl = &from->msg_plaintext;
+	struct sk_msg *msg_opl = &to->msg_plaintext;
+	struct scatterlist *osge, *nsge;
+	u32 i, j;
+
+	i = msg_opl->sg.end;
+	sk_msg_iter_var_prev(i);
+	j = msg_npl->sg.start;
+
+	osge = sk_msg_elem(msg_opl, i);
+	nsge = sk_msg_elem(msg_npl, j);
+
+	if (sg_page(osge) == sg_page(nsge) &&
+	    osge->offset + osge->length == nsge->offset) {
+		osge->length += nsge->length;
+		put_page(sg_page(nsge));
+	}
+
+	msg_opl->sg.end = orig_end;
+	msg_opl->sg.curr = orig_end;
+	msg_opl->sg.copybreak = 0;
+	msg_opl->apply_bytes = msg_opl->sg.size + msg_npl->sg.size;
+	msg_opl->sg.size += msg_npl->sg.size;
+
+	sk_msg_free(sk, &to->msg_encrypted);
+	sk_msg_xfer_full(&to->msg_encrypted, &from->msg_encrypted);
+
+	kfree(from);
+}
+
 static int tls_push_record(struct sock *sk, int flags,
 			   unsigned char record_type)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+	struct tls_rec *rec = ctx->open_rec, *tmp = NULL;
+	u32 i, split_point, uninitialized_var(orig_end);
+	struct sk_msg *msg_pl, *msg_en;
 	struct aead_request *req;
+	bool split;
 	int rc;
 
-	req = aead_request_alloc(ctx->aead_send, sk->sk_allocation);
-	if (!req)
-		return -ENOMEM;
+	if (!rec)
+		return 0;
 
-	sg_mark_end(ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem - 1);
-	sg_mark_end(ctx->sg_encrypted_data + ctx->sg_encrypted_num_elem - 1);
+	msg_pl = &rec->msg_plaintext;
+	msg_en = &rec->msg_encrypted;
 
-	tls_make_aad(ctx->aad_space, ctx->sg_plaintext_size,
-		     tls_ctx->tx.rec_seq, tls_ctx->tx.rec_seq_size,
-		     record_type);
-
-	tls_fill_prepend(tls_ctx,
-			 page_address(sg_page(&ctx->sg_encrypted_data[0])) +
-			 ctx->sg_encrypted_data[0].offset,
-			 ctx->sg_plaintext_size, record_type);
-
-	tls_ctx->pending_open_record_frags = 0;
-	set_bit(TLS_PENDING_CLOSED_RECORD, &tls_ctx->flags);
-
-	rc = tls_do_encryption(tls_ctx, ctx, req, ctx->sg_plaintext_size);
-	if (rc < 0) {
-		/* If we are called from write_space and
-		 * we fail, we need to set this SOCK_NOSPACE
-		 * to trigger another write_space in the future.
-		 */
-		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-		goto out_req;
+	split_point = msg_pl->apply_bytes;
+	split = split_point && split_point < msg_pl->sg.size;
+	if (split) {
+		rc = tls_split_open_record(sk, rec, &tmp, msg_pl, msg_en,
+					   split_point, prot->overhead_size,
+					   &orig_end);
+		if (rc < 0)
+			return rc;
+		sk_msg_trim(sk, msg_en, msg_pl->sg.size +
+			    prot->overhead_size);
 	}
 
-	free_sg(sk, ctx->sg_plaintext_data, &ctx->sg_plaintext_num_elem,
-		&ctx->sg_plaintext_size);
+	rec->tx_flags = flags;
+	req = &rec->aead_req;
 
-	ctx->sg_encrypted_num_elem = 0;
-	ctx->sg_encrypted_size = 0;
+	i = msg_pl->sg.end;
+	sk_msg_iter_var_prev(i);
 
-	/* Only pass through MSG_DONTWAIT and MSG_NOSIGNAL flags */
-	rc = tls_push_sg(sk, tls_ctx, ctx->sg_encrypted_data, 0, flags);
-	if (rc < 0 && rc != -EAGAIN)
-		tls_err_abort(sk, EBADMSG);
+	rec->content_type = record_type;
+	if (prot->version == TLS_1_3_VERSION) {
+		/* Add content type to end of message.  No padding added */
+		sg_set_buf(&rec->sg_content_type, &rec->content_type, 1);
+		sg_mark_end(&rec->sg_content_type);
+		sg_chain(msg_pl->sg.data, msg_pl->sg.end + 1,
+			 &rec->sg_content_type);
+	} else {
+		sg_mark_end(sk_msg_elem(msg_pl, i));
+	}
 
-	tls_advance_record_sn(sk, &tls_ctx->tx);
-out_req:
-	aead_request_free(req);
-	return rc;
+	i = msg_pl->sg.start;
+	sg_chain(rec->sg_aead_in, 2, &msg_pl->sg.data[i]);
+
+	i = msg_en->sg.end;
+	sk_msg_iter_var_prev(i);
+	sg_mark_end(sk_msg_elem(msg_en, i));
+
+	i = msg_en->sg.start;
+	sg_chain(rec->sg_aead_out, 2, &msg_en->sg.data[i]);
+
+	tls_make_aad(rec->aad_space, msg_pl->sg.size + prot->tail_size,
+		     tls_ctx->tx.rec_seq, prot->rec_seq_size,
+		     record_type, prot->version);
+
+	tls_fill_prepend(tls_ctx,
+			 page_address(sg_page(&msg_en->sg.data[i])) +
+			 msg_en->sg.data[i].offset,
+			 msg_pl->sg.size + prot->tail_size,
+			 record_type, prot->version);
+
+	tls_ctx->pending_open_record_frags = false;
+
+	rc = tls_do_encryption(sk, tls_ctx, ctx, req,
+			       msg_pl->sg.size + prot->tail_size, i);
+	if (rc < 0) {
+		if (rc != -EINPROGRESS) {
+			tls_err_abort(sk, EBADMSG);
+			if (split) {
+				tls_ctx->pending_open_record_frags = true;
+				tls_merge_open_record(sk, rec, tmp, orig_end);
+			}
+		}
+		ctx->async_capable = 1;
+		return rc;
+	} else if (split) {
+		msg_pl = &tmp->msg_plaintext;
+		msg_en = &tmp->msg_encrypted;
+		sk_msg_trim(sk, msg_en, msg_pl->sg.size + prot->overhead_size);
+		tls_ctx->pending_open_record_frags = true;
+		ctx->open_rec = tmp;
+	}
+
+	return tls_tx_records(sk, flags);
+}
+
+static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
+			       bool full_record, u8 record_type,
+			       size_t *copied, int flags)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+	struct sk_msg msg_redir = { };
+	struct sk_psock *psock;
+	struct sock *sk_redir;
+	struct tls_rec *rec;
+	bool enospc, policy;
+	int err = 0, send;
+	u32 delta = 0;
+
+	policy = !(flags & MSG_SENDPAGE_NOPOLICY);
+	psock = sk_psock_get(sk);
+	if (!psock || !policy) {
+		err = tls_push_record(sk, flags, record_type);
+		if (err) {
+			*copied -= sk_msg_free(sk, msg);
+			tls_free_open_rec(sk);
+		}
+		return err;
+	}
+more_data:
+	enospc = sk_msg_full(msg);
+	if (psock->eval == __SK_NONE) {
+		delta = msg->sg.size;
+		psock->eval = sk_psock_msg_verdict(sk, psock, msg);
+		if (delta < msg->sg.size)
+			delta -= msg->sg.size;
+		else
+			delta = 0;
+	}
+	if (msg->cork_bytes && msg->cork_bytes > msg->sg.size &&
+	    !enospc && !full_record) {
+		err = -ENOSPC;
+		goto out_err;
+	}
+	msg->cork_bytes = 0;
+	send = msg->sg.size;
+	if (msg->apply_bytes && msg->apply_bytes < send)
+		send = msg->apply_bytes;
+
+	switch (psock->eval) {
+	case __SK_PASS:
+		err = tls_push_record(sk, flags, record_type);
+		if (err < 0) {
+			*copied -= sk_msg_free(sk, msg);
+			tls_free_open_rec(sk);
+			goto out_err;
+		}
+		break;
+	case __SK_REDIRECT:
+		sk_redir = psock->sk_redir;
+		memcpy(&msg_redir, msg, sizeof(*msg));
+		if (msg->apply_bytes < send)
+			msg->apply_bytes = 0;
+		else
+			msg->apply_bytes -= send;
+		sk_msg_return_zero(sk, msg, send);
+		msg->sg.size -= send;
+		release_sock(sk);
+		err = tcp_bpf_sendmsg_redir(sk_redir, &msg_redir, send, flags);
+		lock_sock(sk);
+		if (err < 0) {
+			*copied -= sk_msg_free_nocharge(sk, &msg_redir);
+			msg->sg.size = 0;
+		}
+		if (msg->sg.size == 0)
+			tls_free_open_rec(sk);
+		break;
+	case __SK_DROP:
+	default:
+		sk_msg_free_partial(sk, msg, send);
+		if (msg->apply_bytes < send)
+			msg->apply_bytes = 0;
+		else
+			msg->apply_bytes -= send;
+		if (msg->sg.size == 0)
+			tls_free_open_rec(sk);
+		*copied -= (send + delta);
+		err = -EACCES;
+	}
+
+	if (likely(!err)) {
+		bool reset_eval = !ctx->open_rec;
+
+		rec = ctx->open_rec;
+		if (rec) {
+			msg = &rec->msg_plaintext;
+			if (!msg->apply_bytes)
+				reset_eval = true;
+		}
+		if (reset_eval) {
+			psock->eval = __SK_NONE;
+			if (psock->sk_redir) {
+				sock_put(psock->sk_redir);
+				psock->sk_redir = NULL;
+			}
+		}
+		if (rec)
+			goto more_data;
+	}
+ out_err:
+	sk_psock_put(sk, psock);
+	return err;
 }
 
 static int tls_sw_push_pending_record(struct sock *sk, int flags)
 {
-	return tls_push_record(sk, flags, TLS_RECORD_TYPE_DATA);
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+	struct tls_rec *rec = ctx->open_rec;
+	struct sk_msg *msg_pl;
+	size_t copied;
+
+	if (!rec)
+		return 0;
+
+	msg_pl = &rec->msg_plaintext;
+	copied = msg_pl->sg.size;
+	if (!copied)
+		return 0;
+
+	return bpf_exec_tx_verdict(msg_pl, sk, true, TLS_RECORD_TYPE_DATA,
+				   &copied, flags);
 }
 
-static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
-			      int length, int *pages_used,
-			      unsigned int *size_used,
-			      struct scatterlist *to, int to_max_pages,
-			      bool charge)
+int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 {
-	struct page *pages[MAX_SKB_FRAGS];
+	long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
+	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+	bool async_capable = ctx->async_capable;
+	unsigned char record_type = TLS_RECORD_TYPE_DATA;
+	bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
+	bool eor = !(msg->msg_flags & MSG_MORE);
+	size_t try_to_copy, copied = 0;
+	struct sk_msg *msg_pl, *msg_en;
+	struct tls_rec *rec;
+	int required_size;
+	int num_async = 0;
+	bool full_record;
+	int record_room;
+	int num_zc = 0;
+	int orig_size;
+	int ret = 0;
 
-	size_t offset;
-	ssize_t copied, use;
-	int i = 0;
+	if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
+		return -ENOTSUPP;
+
+	mutex_lock(&tls_ctx->tx_lock);
+	lock_sock(sk);
+
+	if (unlikely(msg->msg_controllen)) {
+		ret = tls_proccess_cmsg(sk, msg, &record_type);
+		if (ret) {
+			if (ret == -EINPROGRESS)
+				num_async++;
+			else if (ret != -EAGAIN)
+				goto send_end;
+		}
+	}
+
+	while (msg_data_left(msg)) {
+		if (sk->sk_err) {
+			ret = -sk->sk_err;
+			goto send_end;
+		}
+
+		if (ctx->open_rec)
+			rec = ctx->open_rec;
+		else
+			rec = ctx->open_rec = tls_get_rec(sk);
+		if (!rec) {
+			ret = -ENOMEM;
+			goto send_end;
+		}
+
+		msg_pl = &rec->msg_plaintext;
+		msg_en = &rec->msg_encrypted;
+
+		orig_size = msg_pl->sg.size;
+		full_record = false;
+		try_to_copy = msg_data_left(msg);
+		record_room = TLS_MAX_PAYLOAD_SIZE - msg_pl->sg.size;
+		if (try_to_copy >= record_room) {
+			try_to_copy = record_room;
+			full_record = true;
+		}
+
+		required_size = msg_pl->sg.size + try_to_copy +
+				prot->overhead_size;
+
+		if (!sk_stream_memory_free(sk))
+			goto wait_for_sndbuf;
+
+alloc_encrypted:
+		ret = tls_alloc_encrypted_msg(sk, required_size);
+		if (ret) {
+			if (ret != -ENOSPC)
+				goto wait_for_memory;
+
+			/* Adjust try_to_copy according to the amount that was
+			 * actually allocated. The difference is due
+			 * to max sg elements limit
+			 */
+			try_to_copy -= required_size - msg_en->sg.size;
+			full_record = true;
+		}
+
+		if (!is_kvec && (full_record || eor) && !async_capable) {
+			u32 first = msg_pl->sg.end;
+
+			ret = sk_msg_zerocopy_from_iter(sk, &msg->msg_iter,
+							msg_pl, try_to_copy);
+			if (ret)
+				goto fallback_to_reg_send;
+
+			num_zc++;
+			copied += try_to_copy;
+
+			sk_msg_sg_copy_set(msg_pl, first);
+			ret = bpf_exec_tx_verdict(msg_pl, sk, full_record,
+						  record_type, &copied,
+						  msg->msg_flags);
+			if (ret) {
+				if (ret == -EINPROGRESS)
+					num_async++;
+				else if (ret == -ENOMEM)
+					goto wait_for_memory;
+				else if (ctx->open_rec && ret == -ENOSPC)
+					goto rollback_iter;
+				else if (ret != -EAGAIN)
+					goto send_end;
+			}
+			continue;
+rollback_iter:
+			copied -= try_to_copy;
+			sk_msg_sg_copy_clear(msg_pl, first);
+			iov_iter_revert(&msg->msg_iter,
+					msg_pl->sg.size - orig_size);
+fallback_to_reg_send:
+			sk_msg_trim(sk, msg_pl, orig_size);
+		}
+
+		required_size = msg_pl->sg.size + try_to_copy;
+
+		ret = tls_clone_plaintext_msg(sk, required_size);
+		if (ret) {
+			if (ret != -ENOSPC)
+				goto send_end;
+
+			/* Adjust try_to_copy according to the amount that was
+			 * actually allocated. The difference is due
+			 * to max sg elements limit
+			 */
+			try_to_copy -= required_size - msg_pl->sg.size;
+			full_record = true;
+			sk_msg_trim(sk, msg_en,
+				    msg_pl->sg.size + prot->overhead_size);
+		}
+
+		if (try_to_copy) {
+			ret = sk_msg_memcopy_from_iter(sk, &msg->msg_iter,
+						       msg_pl, try_to_copy);
+			if (ret < 0)
+				goto trim_sgl;
+		}
+
+		/* Open records defined only if successfully copied, otherwise
+		 * we would trim the sg but not reset the open record frags.
+		 */
+		tls_ctx->pending_open_record_frags = true;
+		copied += try_to_copy;
+		if (full_record || eor) {
+			ret = bpf_exec_tx_verdict(msg_pl, sk, full_record,
+						  record_type, &copied,
+						  msg->msg_flags);
+			if (ret) {
+				if (ret == -EINPROGRESS)
+					num_async++;
+				else if (ret == -ENOMEM)
+					goto wait_for_memory;
+				else if (ret != -EAGAIN) {
+					if (ret == -ENOSPC)
+						ret = 0;
+					goto send_end;
+				}
+			}
+		}
+
+		continue;
+
+wait_for_sndbuf:
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+wait_for_memory:
+		ret = sk_stream_wait_memory(sk, &timeo);
+		if (ret) {
+trim_sgl:
+			if (ctx->open_rec)
+				tls_trim_both_msgs(sk, orig_size);
+			goto send_end;
+		}
+
+		if (ctx->open_rec && msg_en->sg.size < required_size)
+			goto alloc_encrypted;
+	}
+
+	if (!num_async) {
+		goto send_end;
+	} else if (num_zc) {
+		/* Wait for pending encryptions to get completed */
+		smp_store_mb(ctx->async_notify, true);
+
+		if (atomic_read(&ctx->encrypt_pending))
+			crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+		else
+			reinit_completion(&ctx->async_wait.completion);
+
+		WRITE_ONCE(ctx->async_notify, false);
+
+		if (ctx->async_wait.err) {
+			ret = ctx->async_wait.err;
+			copied = 0;
+		}
+	}
+
+	/* Transmit if any encryptions have completed */
+	if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) {
+		cancel_delayed_work(&ctx->tx_work.work);
+		tls_tx_records(sk, msg->msg_flags);
+	}
+
+send_end:
+	ret = sk_stream_error(sk, msg->msg_flags, ret);
+
+	release_sock(sk);
+	mutex_unlock(&tls_ctx->tx_lock);
+	return copied ? copied : ret;
+}
+
+static int tls_sw_do_sendpage(struct sock *sk, struct page *page,
+			      int offset, size_t size, int flags)
+{
+	long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
+	unsigned char record_type = TLS_RECORD_TYPE_DATA;
+	struct sk_msg *msg_pl;
+	struct tls_rec *rec;
+	int num_async = 0;
+	size_t copied = 0;
+	bool full_record;
+	int record_room;
+	int ret = 0;
+	bool eor;
+
+	eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST));
+	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+
+	/* Call the sk_stream functions to manage the sndbuf mem. */
+	while (size > 0) {
+		size_t copy, required_size;
+
+		if (sk->sk_err) {
+			ret = -sk->sk_err;
+			goto sendpage_end;
+		}
+
+		if (ctx->open_rec)
+			rec = ctx->open_rec;
+		else
+			rec = ctx->open_rec = tls_get_rec(sk);
+		if (!rec) {
+			ret = -ENOMEM;
+			goto sendpage_end;
+		}
+
+		msg_pl = &rec->msg_plaintext;
+
+		full_record = false;
+		record_room = TLS_MAX_PAYLOAD_SIZE - msg_pl->sg.size;
+		copy = size;
+		if (copy >= record_room) {
+			copy = record_room;
+			full_record = true;
+		}
+
+		required_size = msg_pl->sg.size + copy + prot->overhead_size;
+
+		if (!sk_stream_memory_free(sk))
+			goto wait_for_sndbuf;
+alloc_payload:
+		ret = tls_alloc_encrypted_msg(sk, required_size);
+		if (ret) {
+			if (ret != -ENOSPC)
+				goto wait_for_memory;
+
+			/* Adjust copy according to the amount that was
+			 * actually allocated. The difference is due
+			 * to max sg elements limit
+			 */
+			copy -= required_size - msg_pl->sg.size;
+			full_record = true;
+		}
+
+		sk_msg_page_add(msg_pl, page, copy, offset);
+		sk_mem_charge(sk, copy);
+
+		offset += copy;
+		size -= copy;
+		copied += copy;
+
+		tls_ctx->pending_open_record_frags = true;
+		if (full_record || eor || sk_msg_full(msg_pl)) {
+			ret = bpf_exec_tx_verdict(msg_pl, sk, full_record,
+						  record_type, &copied, flags);
+			if (ret) {
+				if (ret == -EINPROGRESS)
+					num_async++;
+				else if (ret == -ENOMEM)
+					goto wait_for_memory;
+				else if (ret != -EAGAIN) {
+					if (ret == -ENOSPC)
+						ret = 0;
+					goto sendpage_end;
+				}
+			}
+		}
+		continue;
+wait_for_sndbuf:
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+wait_for_memory:
+		ret = sk_stream_wait_memory(sk, &timeo);
+		if (ret) {
+			if (ctx->open_rec)
+				tls_trim_both_msgs(sk, msg_pl->sg.size);
+			goto sendpage_end;
+		}
+
+		if (ctx->open_rec)
+			goto alloc_payload;
+	}
+
+	if (num_async) {
+		/* Transmit if any encryptions have completed */
+		if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) {
+			cancel_delayed_work(&ctx->tx_work.work);
+			tls_tx_records(sk, flags);
+		}
+	}
+sendpage_end:
+	ret = sk_stream_error(sk, flags, ret);
+	return copied ? copied : ret;
+}
+
+int tls_sw_sendpage_locked(struct sock *sk, struct page *page,
+			   int offset, size_t size, int flags)
+{
+	if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
+		      MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY |
+		      MSG_NO_SHARED_FRAGS))
+		return -ENOTSUPP;
+
+	return tls_sw_do_sendpage(sk, page, offset, size, flags);
+}
+
+int tls_sw_sendpage(struct sock *sk, struct page *page,
+		    int offset, size_t size, int flags)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	int ret;
+
+	if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
+		      MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
+		return -ENOTSUPP;
+
+	mutex_lock(&tls_ctx->tx_lock);
+	lock_sock(sk);
+	ret = tls_sw_do_sendpage(sk, page, offset, size, flags);
+	release_sock(sk);
+	mutex_unlock(&tls_ctx->tx_lock);
+	return ret;
+}
+
+static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock,
+				     int flags, long timeo, int *err)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+	struct sk_buff *skb;
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+
+	while (!(skb = ctx->recv_pkt) && sk_psock_queue_empty(psock)) {
+		if (sk->sk_err) {
+			*err = sock_error(sk);
+			return NULL;
+		}
+
+		if (sk->sk_shutdown & RCV_SHUTDOWN)
+			return NULL;
+
+		if (sock_flag(sk, SOCK_DONE))
+			return NULL;
+
+		if ((flags & MSG_DONTWAIT) || !timeo) {
+			*err = -EAGAIN;
+			return NULL;
+		}
+
+		add_wait_queue(sk_sleep(sk), &wait);
+		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+		sk_wait_event(sk, &timeo,
+			      ctx->recv_pkt != skb ||
+			      !sk_psock_queue_empty(psock),
+			      &wait);
+		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+		remove_wait_queue(sk_sleep(sk), &wait);
+
+		/* Handle signals */
+		if (signal_pending(current)) {
+			*err = sock_intr_errno(timeo);
+			return NULL;
+		}
+	}
+
+	return skb;
+}
+
+static int tls_setup_from_iter(struct sock *sk, struct iov_iter *from,
+			       int length, int *pages_used,
+			       unsigned int *size_used,
+			       struct scatterlist *to,
+			       int to_max_pages)
+{
+	int rc = 0, i = 0, num_elem = *pages_used, maxpages;
+	struct page *pages[MAX_SKB_FRAGS];
 	unsigned int size = *size_used;
-	int num_elem = *pages_used;
-	int rc = 0;
-	int maxpages;
+	ssize_t copied, use;
+	size_t offset;
 
 	while (length > 0) {
 		i = 0;
@@ -298,17 +1319,15 @@
 			sg_set_page(&to[num_elem],
 				    pages[i], use, offset);
 			sg_unmark_end(&to[num_elem]);
-			if (charge)
-				sk_mem_charge(sk, use);
+			/* We do not uncharge memory from this API */
 
 			offset = 0;
 			copied -= use;
 
-			++i;
-			++num_elem;
+			i++;
+			num_elem++;
 		}
 	}
-
 	/* Mark the end in the last sg entry if newly added */
 	if (num_elem > *pages_used)
 		sg_mark_end(&to[num_elem - 1]);
@@ -321,340 +1340,6 @@
 	return rc;
 }
 
-static int memcopy_from_iter(struct sock *sk, struct iov_iter *from,
-			     int bytes)
-{
-	struct tls_context *tls_ctx = tls_get_ctx(sk);
-	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
-	struct scatterlist *sg = ctx->sg_plaintext_data;
-	int copy, i, rc = 0;
-
-	for (i = tls_ctx->pending_open_record_frags;
-	     i < ctx->sg_plaintext_num_elem; ++i) {
-		copy = sg[i].length;
-		if (copy_from_iter(
-				page_address(sg_page(&sg[i])) + sg[i].offset,
-				copy, from) != copy) {
-			rc = -EFAULT;
-			goto out;
-		}
-		bytes -= copy;
-
-		++tls_ctx->pending_open_record_frags;
-
-		if (!bytes)
-			break;
-	}
-
-out:
-	return rc;
-}
-
-int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
-{
-	struct tls_context *tls_ctx = tls_get_ctx(sk);
-	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
-	int ret = 0;
-	int required_size;
-	long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
-	bool eor = !(msg->msg_flags & MSG_MORE);
-	size_t try_to_copy, copied = 0;
-	unsigned char record_type = TLS_RECORD_TYPE_DATA;
-	int record_room;
-	bool full_record;
-	int orig_size;
-	bool is_kvec = msg->msg_iter.type & ITER_KVEC;
-
-	if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
-		return -ENOTSUPP;
-
-	lock_sock(sk);
-
-	if (tls_complete_pending_work(sk, tls_ctx, msg->msg_flags, &timeo))
-		goto send_end;
-
-	if (unlikely(msg->msg_controllen)) {
-		ret = tls_proccess_cmsg(sk, msg, &record_type);
-		if (ret)
-			goto send_end;
-	}
-
-	while (msg_data_left(msg)) {
-		if (sk->sk_err) {
-			ret = -sk->sk_err;
-			goto send_end;
-		}
-
-		orig_size = ctx->sg_plaintext_size;
-		full_record = false;
-		try_to_copy = msg_data_left(msg);
-		record_room = TLS_MAX_PAYLOAD_SIZE - ctx->sg_plaintext_size;
-		if (try_to_copy >= record_room) {
-			try_to_copy = record_room;
-			full_record = true;
-		}
-
-		required_size = ctx->sg_plaintext_size + try_to_copy +
-				tls_ctx->tx.overhead_size;
-
-		if (!sk_stream_memory_free(sk))
-			goto wait_for_sndbuf;
-alloc_encrypted:
-		ret = alloc_encrypted_sg(sk, required_size);
-		if (ret) {
-			if (ret != -ENOSPC)
-				goto wait_for_memory;
-
-			/* Adjust try_to_copy according to the amount that was
-			 * actually allocated. The difference is due
-			 * to max sg elements limit
-			 */
-			try_to_copy -= required_size - ctx->sg_encrypted_size;
-			full_record = true;
-		}
-		if (!is_kvec && (full_record || eor)) {
-			ret = zerocopy_from_iter(sk, &msg->msg_iter,
-				try_to_copy, &ctx->sg_plaintext_num_elem,
-				&ctx->sg_plaintext_size,
-				ctx->sg_plaintext_data,
-				ARRAY_SIZE(ctx->sg_plaintext_data),
-				true);
-			if (ret)
-				goto fallback_to_reg_send;
-
-			copied += try_to_copy;
-			ret = tls_push_record(sk, msg->msg_flags, record_type);
-			if (ret)
-				goto send_end;
-			continue;
-
-fallback_to_reg_send:
-			trim_sg(sk, ctx->sg_plaintext_data,
-				&ctx->sg_plaintext_num_elem,
-				&ctx->sg_plaintext_size,
-				orig_size);
-		}
-
-		required_size = ctx->sg_plaintext_size + try_to_copy;
-alloc_plaintext:
-		ret = alloc_plaintext_sg(sk, required_size);
-		if (ret) {
-			if (ret != -ENOSPC)
-				goto wait_for_memory;
-
-			/* Adjust try_to_copy according to the amount that was
-			 * actually allocated. The difference is due
-			 * to max sg elements limit
-			 */
-			try_to_copy -= required_size - ctx->sg_plaintext_size;
-			full_record = true;
-
-			trim_sg(sk, ctx->sg_encrypted_data,
-				&ctx->sg_encrypted_num_elem,
-				&ctx->sg_encrypted_size,
-				ctx->sg_plaintext_size +
-				tls_ctx->tx.overhead_size);
-		}
-
-		ret = memcopy_from_iter(sk, &msg->msg_iter, try_to_copy);
-		if (ret)
-			goto trim_sgl;
-
-		copied += try_to_copy;
-		if (full_record || eor) {
-push_record:
-			ret = tls_push_record(sk, msg->msg_flags, record_type);
-			if (ret) {
-				if (ret == -ENOMEM)
-					goto wait_for_memory;
-
-				goto send_end;
-			}
-		}
-
-		continue;
-
-wait_for_sndbuf:
-		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-wait_for_memory:
-		ret = sk_stream_wait_memory(sk, &timeo);
-		if (ret) {
-trim_sgl:
-			trim_both_sgl(sk, orig_size);
-			goto send_end;
-		}
-
-		if (tls_is_pending_closed_record(tls_ctx))
-			goto push_record;
-
-		if (ctx->sg_encrypted_size < required_size)
-			goto alloc_encrypted;
-
-		goto alloc_plaintext;
-	}
-
-send_end:
-	ret = sk_stream_error(sk, msg->msg_flags, ret);
-
-	release_sock(sk);
-	return copied ? copied : ret;
-}
-
-int tls_sw_sendpage(struct sock *sk, struct page *page,
-		    int offset, size_t size, int flags)
-{
-	struct tls_context *tls_ctx = tls_get_ctx(sk);
-	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
-	int ret = 0;
-	long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
-	bool eor;
-	size_t orig_size = size;
-	unsigned char record_type = TLS_RECORD_TYPE_DATA;
-	struct scatterlist *sg;
-	bool full_record;
-	int record_room;
-
-	if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
-		      MSG_SENDPAGE_NOTLAST))
-		return -ENOTSUPP;
-
-	/* No MSG_EOR from splice, only look at MSG_MORE */
-	eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST));
-
-	lock_sock(sk);
-
-	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
-
-	if (tls_complete_pending_work(sk, tls_ctx, flags, &timeo))
-		goto sendpage_end;
-
-	/* Call the sk_stream functions to manage the sndbuf mem. */
-	while (size > 0) {
-		size_t copy, required_size;
-
-		if (sk->sk_err) {
-			ret = -sk->sk_err;
-			goto sendpage_end;
-		}
-
-		full_record = false;
-		record_room = TLS_MAX_PAYLOAD_SIZE - ctx->sg_plaintext_size;
-		copy = size;
-		if (copy >= record_room) {
-			copy = record_room;
-			full_record = true;
-		}
-		required_size = ctx->sg_plaintext_size + copy +
-			      tls_ctx->tx.overhead_size;
-
-		if (!sk_stream_memory_free(sk))
-			goto wait_for_sndbuf;
-alloc_payload:
-		ret = alloc_encrypted_sg(sk, required_size);
-		if (ret) {
-			if (ret != -ENOSPC)
-				goto wait_for_memory;
-
-			/* Adjust copy according to the amount that was
-			 * actually allocated. The difference is due
-			 * to max sg elements limit
-			 */
-			copy -= required_size - ctx->sg_plaintext_size;
-			full_record = true;
-		}
-
-		get_page(page);
-		sg = ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem;
-		sg_set_page(sg, page, copy, offset);
-		sg_unmark_end(sg);
-
-		ctx->sg_plaintext_num_elem++;
-
-		sk_mem_charge(sk, copy);
-		offset += copy;
-		size -= copy;
-		ctx->sg_plaintext_size += copy;
-		tls_ctx->pending_open_record_frags = ctx->sg_plaintext_num_elem;
-
-		if (full_record || eor ||
-		    ctx->sg_plaintext_num_elem ==
-		    ARRAY_SIZE(ctx->sg_plaintext_data)) {
-push_record:
-			ret = tls_push_record(sk, flags, record_type);
-			if (ret) {
-				if (ret == -ENOMEM)
-					goto wait_for_memory;
-
-				goto sendpage_end;
-			}
-		}
-		continue;
-wait_for_sndbuf:
-		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-wait_for_memory:
-		ret = sk_stream_wait_memory(sk, &timeo);
-		if (ret) {
-			trim_both_sgl(sk, ctx->sg_plaintext_size);
-			goto sendpage_end;
-		}
-
-		if (tls_is_pending_closed_record(tls_ctx))
-			goto push_record;
-
-		goto alloc_payload;
-	}
-
-sendpage_end:
-	if (orig_size > size)
-		ret = orig_size - size;
-	else
-		ret = sk_stream_error(sk, flags, ret);
-
-	release_sock(sk);
-	return ret;
-}
-
-static struct sk_buff *tls_wait_data(struct sock *sk, int flags,
-				     long timeo, int *err)
-{
-	struct tls_context *tls_ctx = tls_get_ctx(sk);
-	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
-	struct sk_buff *skb;
-	DEFINE_WAIT_FUNC(wait, woken_wake_function);
-
-	while (!(skb = ctx->recv_pkt)) {
-		if (sk->sk_err) {
-			*err = sock_error(sk);
-			return NULL;
-		}
-
-		if (sk->sk_shutdown & RCV_SHUTDOWN)
-			return NULL;
-
-		if (sock_flag(sk, SOCK_DONE))
-			return NULL;
-
-		if ((flags & MSG_DONTWAIT) || !timeo) {
-			*err = -EAGAIN;
-			return NULL;
-		}
-
-		add_wait_queue(sk_sleep(sk), &wait);
-		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-		sk_wait_event(sk, &timeo, ctx->recv_pkt != skb, &wait);
-		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-		remove_wait_queue(sk_sleep(sk), &wait);
-
-		/* Handle signals */
-		if (signal_pending(current)) {
-			*err = sock_intr_errno(timeo);
-			return NULL;
-		}
-	}
-
-	return skb;
-}
-
 /* This function decrypts the input skb into either out_iov or in out_sg
  * or in skb buffers itself. The input parameter 'zc' indicates if
  * zero-copy mode needs to be tried or not. With zero-copy mode, either
@@ -666,10 +1351,11 @@
 static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
 			    struct iov_iter *out_iov,
 			    struct scatterlist *out_sg,
-			    int *chunk, bool *zc)
+			    int *chunk, bool *zc, bool async)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	struct strp_msg *rxm = strp_msg(skb);
 	int n_sgin, n_sgout, nsg, mem_size, aead_size, err, pages = 0;
 	struct aead_request *aead_req;
@@ -677,19 +1363,23 @@
 	u8 *aad, *iv, *mem = NULL;
 	struct scatterlist *sgin = NULL;
 	struct scatterlist *sgout = NULL;
-	const int data_len = rxm->full_len - tls_ctx->rx.overhead_size;
+	const int data_len = rxm->full_len - prot->overhead_size +
+			     prot->tail_size;
+	int iv_offset = 0;
 
 	if (*zc && (out_iov || out_sg)) {
 		if (out_iov)
 			n_sgout = iov_iter_npages(out_iov, INT_MAX) + 1;
 		else
 			n_sgout = sg_nents(out_sg);
+		n_sgin = skb_nsg(skb, rxm->offset + prot->prepend_size,
+				 rxm->full_len - prot->prepend_size);
 	} else {
 		n_sgout = 0;
 		*zc = false;
+		n_sgin = skb_cow_data(skb, 0, &unused);
 	}
 
-	n_sgin = skb_cow_data(skb, 0, &unused);
 	if (n_sgin < 1)
 		return -EBADMSG;
 
@@ -700,7 +1390,7 @@
 
 	aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv);
 	mem_size = aead_size + (nsg * sizeof(struct scatterlist));
-	mem_size = mem_size + TLS_AAD_SPACE_SIZE;
+	mem_size = mem_size + prot->aad_size;
 	mem_size = mem_size + crypto_aead_ivsize(ctx->aead_recv);
 
 	/* Allocate a single block of memory which contains
@@ -716,29 +1406,42 @@
 	sgin = (struct scatterlist *)(mem + aead_size);
 	sgout = sgin + n_sgin;
 	aad = (u8 *)(sgout + n_sgout);
-	iv = aad + TLS_AAD_SPACE_SIZE;
+	iv = aad + prot->aad_size;
+
+	/* For CCM based ciphers, first byte of nonce+iv is always '2' */
+	if (prot->cipher_type == TLS_CIPHER_AES_CCM_128) {
+		iv[0] = 2;
+		iv_offset = 1;
+	}
 
 	/* Prepare IV */
 	err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE,
-			    iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
-			    tls_ctx->rx.iv_size);
+			    iv + iv_offset + prot->salt_size,
+			    prot->iv_size);
 	if (err < 0) {
 		kfree(mem);
 		return err;
 	}
-	memcpy(iv, tls_ctx->rx.iv, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+	if (prot->version == TLS_1_3_VERSION)
+		memcpy(iv + iv_offset, tls_ctx->rx.iv,
+		       crypto_aead_ivsize(ctx->aead_recv));
+	else
+		memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->salt_size);
+
+	xor_iv_with_seq(prot->version, iv, tls_ctx->rx.rec_seq);
 
 	/* Prepare AAD */
-	tls_make_aad(aad, rxm->full_len - tls_ctx->rx.overhead_size,
-		     tls_ctx->rx.rec_seq, tls_ctx->rx.rec_seq_size,
-		     ctx->control);
+	tls_make_aad(aad, rxm->full_len - prot->overhead_size +
+		     prot->tail_size,
+		     tls_ctx->rx.rec_seq, prot->rec_seq_size,
+		     ctx->control, prot->version);
 
 	/* Prepare sgin */
 	sg_init_table(sgin, n_sgin);
-	sg_set_buf(&sgin[0], aad, TLS_AAD_SPACE_SIZE);
+	sg_set_buf(&sgin[0], aad, prot->aad_size);
 	err = skb_to_sgvec(skb, &sgin[1],
-			   rxm->offset + tls_ctx->rx.prepend_size,
-			   rxm->full_len - tls_ctx->rx.prepend_size);
+			   rxm->offset + prot->prepend_size,
+			   rxm->full_len - prot->prepend_size);
 	if (err < 0) {
 		kfree(mem);
 		return err;
@@ -747,12 +1450,12 @@
 	if (n_sgout) {
 		if (out_iov) {
 			sg_init_table(sgout, n_sgout);
-			sg_set_buf(&sgout[0], aad, TLS_AAD_SPACE_SIZE);
+			sg_set_buf(&sgout[0], aad, prot->aad_size);
 
 			*chunk = 0;
-			err = zerocopy_from_iter(sk, out_iov, data_len, &pages,
-						 chunk, &sgout[1],
-						 (n_sgout - 1), false);
+			err = tls_setup_from_iter(sk, out_iov, data_len,
+						  &pages, chunk, &sgout[1],
+						  (n_sgout - 1));
 			if (err < 0)
 				goto fallback_to_reg_recv;
 		} else if (out_sg) {
@@ -764,12 +1467,15 @@
 fallback_to_reg_recv:
 		sgout = sgin;
 		pages = 0;
-		*chunk = 0;
+		*chunk = data_len;
 		*zc = false;
 	}
 
 	/* Prepare and submit AEAD request */
-	err = tls_do_decryption(sk, sgin, sgout, iv, data_len, aead_req);
+	err = tls_do_decryption(sk, skb, sgin, sgout, iv,
+				data_len, aead_req, async);
+	if (err == -EINPROGRESS)
+		return err;
 
 	/* Release the pages in case iov was mapped to pages */
 	for (; pages > 0; pages--)
@@ -780,32 +1486,51 @@
 }
 
 static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
-			      struct iov_iter *dest, int *chunk, bool *zc)
+			      struct iov_iter *dest, int *chunk, bool *zc,
+			      bool async)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	struct strp_msg *rxm = strp_msg(skb);
-	int err = 0;
+	int pad, err = 0;
 
-#ifdef CONFIG_TLS_DEVICE
-	err = tls_device_decrypted(sk, skb);
-	if (err < 0)
-		return err;
-#endif
 	if (!ctx->decrypted) {
-		err = decrypt_internal(sk, skb, dest, NULL, chunk, zc);
-		if (err < 0)
-			return err;
+		if (tls_ctx->rx_conf == TLS_HW) {
+			err = tls_device_decrypted(sk, skb);
+			if (err < 0)
+				return err;
+		}
+
+		/* Still not decrypted after tls_device */
+		if (!ctx->decrypted) {
+			err = decrypt_internal(sk, skb, dest, NULL, chunk, zc,
+					       async);
+			if (err < 0) {
+				if (err == -EINPROGRESS)
+					tls_advance_record_sn(sk, prot,
+							      &tls_ctx->rx);
+
+				return err;
+			}
+		} else {
+			*zc = false;
+		}
+
+		pad = padding_length(ctx, prot, skb);
+		if (pad < 0)
+			return pad;
+
+		rxm->full_len -= pad;
+		rxm->offset += prot->prepend_size;
+		rxm->full_len -= prot->overhead_size;
+		tls_advance_record_sn(sk, prot, &tls_ctx->rx);
+		ctx->decrypted = true;
+		ctx->saved_data_ready(sk);
 	} else {
 		*zc = false;
 	}
 
-	rxm->offset += tls_ctx->rx.prepend_size;
-	rxm->full_len -= tls_ctx->rx.overhead_size;
-	tls_advance_record_sn(sk, &tls_ctx->rx);
-	ctx->decrypted = true;
-	ctx->saved_data_ready(sk);
-
 	return err;
 }
 
@@ -815,7 +1540,7 @@
 	bool zc = true;
 	int chunk;
 
-	return decrypt_internal(sk, skb, NULL, sgout, &chunk, &zc);
+	return decrypt_internal(sk, skb, NULL, sgout, &chunk, &zc, false);
 }
 
 static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
@@ -823,23 +1548,134 @@
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
-	struct strp_msg *rxm = strp_msg(skb);
 
-	if (len < rxm->full_len) {
-		rxm->offset += len;
-		rxm->full_len -= len;
+	if (skb) {
+		struct strp_msg *rxm = strp_msg(skb);
 
-		return false;
+		if (len < rxm->full_len) {
+			rxm->offset += len;
+			rxm->full_len -= len;
+			return false;
+		}
+		consume_skb(skb);
 	}
 
 	/* Finished with message */
 	ctx->recv_pkt = NULL;
-	kfree_skb(skb);
 	__strp_unpause(&ctx->strp);
 
 	return true;
 }
 
+/* This function traverses the rx_list in tls receive context to copies the
+ * decrypted records into the buffer provided by caller zero copy is not
+ * true. Further, the records are removed from the rx_list if it is not a peek
+ * case and the record has been consumed completely.
+ */
+static int process_rx_list(struct tls_sw_context_rx *ctx,
+			   struct msghdr *msg,
+			   u8 *control,
+			   bool *cmsg,
+			   size_t skip,
+			   size_t len,
+			   bool zc,
+			   bool is_peek)
+{
+	struct sk_buff *skb = skb_peek(&ctx->rx_list);
+	u8 ctrl = *control;
+	u8 msgc = *cmsg;
+	struct tls_msg *tlm;
+	ssize_t copied = 0;
+
+	/* Set the record type in 'control' if caller didn't pass it */
+	if (!ctrl && skb) {
+		tlm = tls_msg(skb);
+		ctrl = tlm->control;
+	}
+
+	while (skip && skb) {
+		struct strp_msg *rxm = strp_msg(skb);
+		tlm = tls_msg(skb);
+
+		/* Cannot process a record of different type */
+		if (ctrl != tlm->control)
+			return 0;
+
+		if (skip < rxm->full_len)
+			break;
+
+		skip = skip - rxm->full_len;
+		skb = skb_peek_next(skb, &ctx->rx_list);
+	}
+
+	while (len && skb) {
+		struct sk_buff *next_skb;
+		struct strp_msg *rxm = strp_msg(skb);
+		int chunk = min_t(unsigned int, rxm->full_len - skip, len);
+
+		tlm = tls_msg(skb);
+
+		/* Cannot process a record of different type */
+		if (ctrl != tlm->control)
+			return 0;
+
+		/* Set record type if not already done. For a non-data record,
+		 * do not proceed if record type could not be copied.
+		 */
+		if (!msgc) {
+			int cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
+					    sizeof(ctrl), &ctrl);
+			msgc = true;
+			if (ctrl != TLS_RECORD_TYPE_DATA) {
+				if (cerr || msg->msg_flags & MSG_CTRUNC)
+					return -EIO;
+
+				*cmsg = msgc;
+			}
+		}
+
+		if (!zc || (rxm->full_len - skip) > len) {
+			int err = skb_copy_datagram_msg(skb, rxm->offset + skip,
+						    msg, chunk);
+			if (err < 0)
+				return err;
+		}
+
+		len = len - chunk;
+		copied = copied + chunk;
+
+		/* Consume the data from record if it is non-peek case*/
+		if (!is_peek) {
+			rxm->offset = rxm->offset + chunk;
+			rxm->full_len = rxm->full_len - chunk;
+
+			/* Return if there is unconsumed data in the record */
+			if (rxm->full_len - skip)
+				break;
+		}
+
+		/* The remaining skip-bytes must lie in 1st record in rx_list.
+		 * So from the 2nd record, 'skip' should be 0.
+		 */
+		skip = 0;
+
+		if (msg)
+			msg->msg_flags |= MSG_EOR;
+
+		next_skb = skb_peek_next(skb, &ctx->rx_list);
+
+		if (!is_peek) {
+			skb_unlink(skb, &ctx->rx_list);
+			consume_skb(skb);
+		}
+
+		skb = next_skb;
+	}
+
+	*control = ctrl;
+	return copied;
+}
+
 int tls_sw_recvmsg(struct sock *sk,
 		   struct msghdr *msg,
 		   size_t len,
@@ -849,104 +1685,217 @@
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
-	unsigned char control;
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
+	struct sk_psock *psock;
+	unsigned char control = 0;
+	ssize_t decrypted = 0;
 	struct strp_msg *rxm;
+	struct tls_msg *tlm;
 	struct sk_buff *skb;
 	ssize_t copied = 0;
 	bool cmsg = false;
 	int target, err = 0;
 	long timeo;
-	bool is_kvec = msg->msg_iter.type & ITER_KVEC;
+	bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
+	bool is_peek = flags & MSG_PEEK;
+	int num_async = 0;
 
 	flags |= nonblock;
 
 	if (unlikely(flags & MSG_ERRQUEUE))
 		return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR);
 
+	psock = sk_psock_get(sk);
 	lock_sock(sk);
 
-	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
-	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
-	do {
-		bool zc = false;
-		int chunk = 0;
+	/* Process pending decrypted records. It must be non-zero-copy */
+	err = process_rx_list(ctx, msg, &control, &cmsg, 0, len, false,
+			      is_peek);
+	if (err < 0) {
+		tls_err_abort(sk, err);
+		goto end;
+	} else {
+		copied = err;
+	}
 
-		skb = tls_wait_data(sk, flags, timeo, &err);
-		if (!skb)
+	if (len <= copied)
+		goto recv_end;
+
+	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
+	len = len - copied;
+	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+
+	while (len && (decrypted + copied < target || ctx->recv_pkt)) {
+		bool retain_skb = false;
+		bool zc = false;
+		int to_decrypt;
+		int chunk = 0;
+		bool async_capable;
+		bool async = false;
+
+		skb = tls_wait_data(sk, psock, flags, timeo, &err);
+		if (!skb) {
+			if (psock) {
+				int ret = __tcp_bpf_recvmsg(sk, psock,
+							    msg, len, flags);
+
+				if (ret > 0) {
+					decrypted += ret;
+					len -= ret;
+					continue;
+				}
+			}
 			goto recv_end;
+		} else {
+			tlm = tls_msg(skb);
+			if (prot->version == TLS_1_3_VERSION)
+				tlm->control = 0;
+			else
+				tlm->control = ctx->control;
+		}
 
 		rxm = strp_msg(skb);
+
+		to_decrypt = rxm->full_len - prot->overhead_size;
+
+		if (to_decrypt <= len && !is_kvec && !is_peek &&
+		    ctx->control == TLS_RECORD_TYPE_DATA &&
+		    prot->version != TLS_1_3_VERSION)
+			zc = true;
+
+		/* Do not use async mode if record is non-data */
+		if (ctx->control == TLS_RECORD_TYPE_DATA)
+			async_capable = ctx->async_capable;
+		else
+			async_capable = false;
+
+		err = decrypt_skb_update(sk, skb, &msg->msg_iter,
+					 &chunk, &zc, async_capable);
+		if (err < 0 && err != -EINPROGRESS) {
+			tls_err_abort(sk, EBADMSG);
+			goto recv_end;
+		}
+
+		if (err == -EINPROGRESS) {
+			async = true;
+			num_async++;
+		} else if (prot->version == TLS_1_3_VERSION) {
+			tlm->control = ctx->control;
+		}
+
+		/* If the type of records being processed is not known yet,
+		 * set it to record type just dequeued. If it is already known,
+		 * but does not match the record type just dequeued, go to end.
+		 * We always get record type here since for tls1.2, record type
+		 * is known just after record is dequeued from stream parser.
+		 * For tls1.3, we disable async.
+		 */
+
+		if (!control)
+			control = tlm->control;
+		else if (control != tlm->control)
+			goto recv_end;
+
 		if (!cmsg) {
 			int cerr;
 
 			cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
-					sizeof(ctx->control), &ctx->control);
+					sizeof(control), &control);
 			cmsg = true;
-			control = ctx->control;
-			if (ctx->control != TLS_RECORD_TYPE_DATA) {
+			if (control != TLS_RECORD_TYPE_DATA) {
 				if (cerr || msg->msg_flags & MSG_CTRUNC) {
 					err = -EIO;
 					goto recv_end;
 				}
 			}
-		} else if (control != ctx->control) {
-			goto recv_end;
 		}
 
-		if (!ctx->decrypted) {
-			int to_copy = rxm->full_len - tls_ctx->rx.overhead_size;
-
-			if (!is_kvec && to_copy <= len &&
-			    likely(!(flags & MSG_PEEK)))
-				zc = true;
-
-			err = decrypt_skb_update(sk, skb, &msg->msg_iter,
-						 &chunk, &zc);
-			if (err < 0) {
-				tls_err_abort(sk, EBADMSG);
-				goto recv_end;
-			}
-			ctx->decrypted = true;
-		}
+		if (async)
+			goto pick_next_record;
 
 		if (!zc) {
-			chunk = min_t(unsigned int, rxm->full_len, len);
-			err = skb_copy_datagram_msg(skb, rxm->offset, msg,
-						    chunk);
+			if (rxm->full_len > len) {
+				retain_skb = true;
+				chunk = len;
+			} else {
+				chunk = rxm->full_len;
+			}
+
+			err = skb_copy_datagram_msg(skb, rxm->offset,
+						    msg, chunk);
 			if (err < 0)
 				goto recv_end;
-		}
 
-		copied += chunk;
-		len -= chunk;
-		if (likely(!(flags & MSG_PEEK))) {
-			u8 control = ctx->control;
-
-			if (tls_sw_advance_skb(sk, skb, chunk)) {
-				/* Return full control message to
-				 * userspace before trying to parse
-				 * another message type
-				 */
-				msg->msg_flags |= MSG_EOR;
-				if (control != TLS_RECORD_TYPE_DATA)
-					goto recv_end;
+			if (!is_peek) {
+				rxm->offset = rxm->offset + chunk;
+				rxm->full_len = rxm->full_len - chunk;
 			}
-		} else {
-			/* MSG_PEEK right now cannot look beyond current skb
-			 * from strparser, meaning we cannot advance skb here
-			 * and thus unpause strparser since we'd loose original
-			 * one.
-			 */
-			break;
 		}
 
-		/* If we have a new message from strparser, continue now. */
-		if (copied >= target && !ctx->recv_pkt)
+pick_next_record:
+		if (chunk > len)
+			chunk = len;
+
+		decrypted += chunk;
+		len -= chunk;
+
+		/* For async or peek case, queue the current skb */
+		if (async || is_peek || retain_skb) {
+			skb_queue_tail(&ctx->rx_list, skb);
+			skb = NULL;
+		}
+
+		if (tls_sw_advance_skb(sk, skb, chunk)) {
+			/* Return full control message to
+			 * userspace before trying to parse
+			 * another message type
+			 */
+			msg->msg_flags |= MSG_EOR;
+			if (ctx->control != TLS_RECORD_TYPE_DATA)
+				goto recv_end;
+		} else {
 			break;
-	} while (len);
+		}
+	}
 
 recv_end:
+	if (num_async) {
+		/* Wait for all previously submitted records to be decrypted */
+		smp_store_mb(ctx->async_notify, true);
+		if (atomic_read(&ctx->decrypt_pending)) {
+			err = crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+			if (err) {
+				/* one of async decrypt failed */
+				tls_err_abort(sk, err);
+				copied = 0;
+				decrypted = 0;
+				goto end;
+			}
+		} else {
+			reinit_completion(&ctx->async_wait.completion);
+		}
+		WRITE_ONCE(ctx->async_notify, false);
+
+		/* Drain records from the rx_list & copy if required */
+		if (is_peek || is_kvec)
+			err = process_rx_list(ctx, msg, &control, &cmsg, copied,
+					      decrypted, false, is_peek);
+		else
+			err = process_rx_list(ctx, msg, &control, &cmsg, 0,
+					      decrypted, true, is_peek);
+		if (err < 0) {
+			tls_err_abort(sk, err);
+			copied = 0;
+			goto end;
+		}
+	}
+
+	copied += decrypted;
+
+end:
 	release_sock(sk);
+	if (psock)
+		sk_psock_put(sk, psock);
 	return copied ? : err;
 }
 
@@ -969,18 +1918,18 @@
 
 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 
-	skb = tls_wait_data(sk, flags, timeo, &err);
+	skb = tls_wait_data(sk, NULL, flags, timeo, &err);
 	if (!skb)
 		goto splice_read_end;
 
-	/* splice does not support reading control messages */
-	if (ctx->control != TLS_RECORD_TYPE_DATA) {
-		err = -ENOTSUPP;
-		goto splice_read_end;
-	}
-
 	if (!ctx->decrypted) {
-		err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc);
+		err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false);
+
+		/* splice does not support reading control messages */
+		if (ctx->control != TLS_RECORD_TYPE_DATA) {
+			err = -ENOTSUPP;
+			goto splice_read_end;
+		}
 
 		if (err < 0) {
 			tls_err_abort(sk, EBADMSG);
@@ -1003,29 +1952,28 @@
 	return copied ? : err;
 }
 
-unsigned int tls_sw_poll(struct file *file, struct socket *sock,
-			 struct poll_table_struct *wait)
+bool tls_sw_stream_read(const struct sock *sk)
 {
-	unsigned int ret;
-	struct sock *sk = sock->sk;
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+	bool ingress_empty = true;
+	struct sk_psock *psock;
 
-	/* Grab POLLOUT and POLLHUP from the underlying socket */
-	ret = ctx->sk_poll(file, sock, wait);
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (psock)
+		ingress_empty = list_empty(&psock->ingress_msg);
+	rcu_read_unlock();
 
-	/* Clear POLLIN bits, and set based on recv_pkt */
-	ret &= ~(POLLIN | POLLRDNORM);
-	if (ctx->recv_pkt)
-		ret |= POLLIN | POLLRDNORM;
-
-	return ret;
+	return !ingress_empty || ctx->recv_pkt ||
+		!skb_queue_empty(&ctx->rx_list);
 }
 
 static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	char header[TLS_HEADER_SIZE + MAX_IV_SIZE];
 	struct strp_msg *rxm = strp_msg(skb);
 	size_t cipher_overhead;
@@ -1033,17 +1981,17 @@
 	int ret;
 
 	/* Verify that we have a full TLS header, or wait for more data */
-	if (rxm->offset + tls_ctx->rx.prepend_size > skb->len)
+	if (rxm->offset + prot->prepend_size > skb->len)
 		return 0;
 
 	/* Sanity-check size of on-stack buffer. */
-	if (WARN_ON(tls_ctx->rx.prepend_size > sizeof(header))) {
+	if (WARN_ON(prot->prepend_size > sizeof(header))) {
 		ret = -EINVAL;
 		goto read_failure;
 	}
 
 	/* Linearize header to local buffer */
-	ret = skb_copy_bits(skb, rxm->offset, header, tls_ctx->rx.prepend_size);
+	ret = skb_copy_bits(skb, rxm->offset, header, prot->prepend_size);
 
 	if (ret < 0)
 		goto read_failure;
@@ -1052,9 +2000,12 @@
 
 	data_len = ((header[4] & 0xFF) | (header[3] << 8));
 
-	cipher_overhead = tls_ctx->rx.tag_size + tls_ctx->rx.iv_size;
+	cipher_overhead = prot->tag_size;
+	if (prot->version != TLS_1_3_VERSION)
+		cipher_overhead += prot->iv_size;
 
-	if (data_len > TLS_MAX_PAYLOAD_SIZE + cipher_overhead) {
+	if (data_len > TLS_MAX_PAYLOAD_SIZE + cipher_overhead +
+	    prot->tail_size) {
 		ret = -EMSGSIZE;
 		goto read_failure;
 	}
@@ -1063,16 +2014,15 @@
 		goto read_failure;
 	}
 
-	if (header[1] != TLS_VERSION_MINOR(tls_ctx->crypto_recv.info.version) ||
-	    header[2] != TLS_VERSION_MAJOR(tls_ctx->crypto_recv.info.version)) {
+	/* Note that both TLS1.3 and TLS1.2 use TLS_1_2 version here */
+	if (header[1] != TLS_1_2_VERSION_MINOR ||
+	    header[2] != TLS_1_2_VERSION_MAJOR) {
 		ret = -EINVAL;
 		goto read_failure;
 	}
 
-#ifdef CONFIG_TLS_DEVICE
-	handle_device_resync(strp->sk, TCP_SKB_CB(skb)->seq + rxm->offset,
-			     *(u64*)tls_ctx->rx.rec_seq);
-#endif
+	tls_device_rx_resync_new_rec(strp->sk, data_len + TLS_HEADER_SIZE,
+				     TCP_SKB_CB(skb)->seq + rxm->offset);
 	return data_len + TLS_HEADER_SIZE;
 
 read_failure:
@@ -1098,17 +2048,65 @@
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+	struct sk_psock *psock;
 
 	strp_data_ready(&ctx->strp);
+
+	psock = sk_psock_get(sk);
+	if (psock && !list_empty(&psock->ingress_msg)) {
+		ctx->saved_data_ready(sk);
+		sk_psock_put(sk, psock);
+	}
 }
 
-void tls_sw_free_resources_tx(struct sock *sk)
+void tls_sw_cancel_work_tx(struct tls_context *tls_ctx)
+{
+	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+
+	set_bit(BIT_TX_CLOSING, &ctx->tx_bitmask);
+	set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask);
+	cancel_delayed_work_sync(&ctx->tx_work.work);
+}
+
+void tls_sw_release_resources_tx(struct sock *sk)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+	struct tls_rec *rec, *tmp;
+
+	/* Wait for any pending async encryptions to complete */
+	smp_store_mb(ctx->async_notify, true);
+	if (atomic_read(&ctx->encrypt_pending))
+		crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+
+	tls_tx_records(sk, -1);
+
+	/* Free up un-sent records in tx_list. First, free
+	 * the partially sent record if any at head of tx_list.
+	 */
+	if (tls_ctx->partially_sent_record) {
+		tls_free_partial_record(sk, tls_ctx);
+		rec = list_first_entry(&ctx->tx_list,
+				       struct tls_rec, list);
+		list_del(&rec->list);
+		sk_msg_free(sk, &rec->msg_plaintext);
+		kfree(rec);
+	}
+
+	list_for_each_entry_safe(rec, tmp, &ctx->tx_list, list) {
+		list_del(&rec->list);
+		sk_msg_free(sk, &rec->msg_encrypted);
+		sk_msg_free(sk, &rec->msg_plaintext);
+		kfree(rec);
+	}
 
 	crypto_free_aead(ctx->aead_send);
-	tls_free_both_sg(sk);
+	tls_free_open_rec(sk);
+}
+
+void tls_sw_free_ctx_tx(struct tls_context *tls_ctx)
+{
+	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 
 	kfree(ctx);
 }
@@ -1118,41 +2116,114 @@
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
 
+	kfree(tls_ctx->rx.rec_seq);
+	kfree(tls_ctx->rx.iv);
+
 	if (ctx->aead_recv) {
 		kfree_skb(ctx->recv_pkt);
 		ctx->recv_pkt = NULL;
+		skb_queue_purge(&ctx->rx_list);
 		crypto_free_aead(ctx->aead_recv);
 		strp_stop(&ctx->strp);
-		write_lock_bh(&sk->sk_callback_lock);
-		sk->sk_data_ready = ctx->saved_data_ready;
-		write_unlock_bh(&sk->sk_callback_lock);
-		release_sock(sk);
-		strp_done(&ctx->strp);
-		lock_sock(sk);
+		/* If tls_sw_strparser_arm() was not called (cleanup paths)
+		 * we still want to strp_stop(), but sk->sk_data_ready was
+		 * never swapped.
+		 */
+		if (ctx->saved_data_ready) {
+			write_lock_bh(&sk->sk_callback_lock);
+			sk->sk_data_ready = ctx->saved_data_ready;
+			write_unlock_bh(&sk->sk_callback_lock);
+		}
 	}
 }
 
+void tls_sw_strparser_done(struct tls_context *tls_ctx)
+{
+	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+
+	strp_done(&ctx->strp);
+}
+
+void tls_sw_free_ctx_rx(struct tls_context *tls_ctx)
+{
+	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+
+	kfree(ctx);
+}
+
 void tls_sw_free_resources_rx(struct sock *sk)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
-	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
 
 	tls_sw_release_resources_rx(sk);
+	tls_sw_free_ctx_rx(tls_ctx);
+}
 
-	kfree(ctx);
+/* The work handler to transmitt the encrypted records in tx_list */
+static void tx_work_handler(struct work_struct *work)
+{
+	struct delayed_work *delayed_work = to_delayed_work(work);
+	struct tx_work *tx_work = container_of(delayed_work,
+					       struct tx_work, work);
+	struct sock *sk = tx_work->sk;
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context_tx *ctx;
+
+	if (unlikely(!tls_ctx))
+		return;
+
+	ctx = tls_sw_ctx_tx(tls_ctx);
+	if (test_bit(BIT_TX_CLOSING, &ctx->tx_bitmask))
+		return;
+
+	if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
+		return;
+	mutex_lock(&tls_ctx->tx_lock);
+	lock_sock(sk);
+	tls_tx_records(sk, -1);
+	release_sock(sk);
+	mutex_unlock(&tls_ctx->tx_lock);
+}
+
+void tls_sw_write_space(struct sock *sk, struct tls_context *ctx)
+{
+	struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx);
+
+	/* Schedule the transmission if tx list is ready */
+	if (is_tx_ready(tx_ctx) &&
+	    !test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx->tx_bitmask))
+		schedule_delayed_work(&tx_ctx->tx_work.work, 0);
+}
+
+void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx)
+{
+	struct tls_sw_context_rx *rx_ctx = tls_sw_ctx_rx(tls_ctx);
+
+	write_lock_bh(&sk->sk_callback_lock);
+	rx_ctx->saved_data_ready = sk->sk_data_ready;
+	sk->sk_data_ready = tls_data_ready;
+	write_unlock_bh(&sk->sk_callback_lock);
+
+	strp_check_rcv(&rx_ctx->strp);
 }
 
 int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 {
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	struct tls_crypto_info *crypto_info;
 	struct tls12_crypto_info_aes_gcm_128 *gcm_128_info;
+	struct tls12_crypto_info_aes_gcm_256 *gcm_256_info;
+	struct tls12_crypto_info_aes_ccm_128 *ccm_128_info;
 	struct tls_sw_context_tx *sw_ctx_tx = NULL;
 	struct tls_sw_context_rx *sw_ctx_rx = NULL;
 	struct cipher_context *cctx;
 	struct crypto_aead **aead;
 	struct strp_callbacks cb;
-	u16 nonce_size, tag_size, iv_size, rec_seq_size;
-	char *iv, *rec_seq;
+	u16 nonce_size, tag_size, iv_size, rec_seq_size, salt_size;
+	struct crypto_tfm *tfm;
+	char *iv, *rec_seq, *key, *salt, *cipher_name;
+	size_t keysize;
 	int rc = 0;
 
 	if (!ctx) {
@@ -1191,10 +2262,14 @@
 		crypto_info = &ctx->crypto_send.info;
 		cctx = &ctx->tx;
 		aead = &sw_ctx_tx->aead_send;
+		INIT_LIST_HEAD(&sw_ctx_tx->tx_list);
+		INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler);
+		sw_ctx_tx->tx_work.sk = sk;
 	} else {
 		crypto_init_wait(&sw_ctx_rx->async_wait);
 		crypto_info = &ctx->crypto_recv.info;
 		cctx = &ctx->rx;
+		skb_queue_head_init(&sw_ctx_rx->rx_list);
 		aead = &sw_ctx_rx->aead_recv;
 	}
 
@@ -1209,6 +2284,45 @@
 		 ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->rec_seq;
 		gcm_128_info =
 			(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+		keysize = TLS_CIPHER_AES_GCM_128_KEY_SIZE;
+		key = gcm_128_info->key;
+		salt = gcm_128_info->salt;
+		salt_size = TLS_CIPHER_AES_GCM_128_SALT_SIZE;
+		cipher_name = "gcm(aes)";
+		break;
+	}
+	case TLS_CIPHER_AES_GCM_256: {
+		nonce_size = TLS_CIPHER_AES_GCM_256_IV_SIZE;
+		tag_size = TLS_CIPHER_AES_GCM_256_TAG_SIZE;
+		iv_size = TLS_CIPHER_AES_GCM_256_IV_SIZE;
+		iv = ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->iv;
+		rec_seq_size = TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE;
+		rec_seq =
+		 ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->rec_seq;
+		gcm_256_info =
+			(struct tls12_crypto_info_aes_gcm_256 *)crypto_info;
+		keysize = TLS_CIPHER_AES_GCM_256_KEY_SIZE;
+		key = gcm_256_info->key;
+		salt = gcm_256_info->salt;
+		salt_size = TLS_CIPHER_AES_GCM_256_SALT_SIZE;
+		cipher_name = "gcm(aes)";
+		break;
+	}
+	case TLS_CIPHER_AES_CCM_128: {
+		nonce_size = TLS_CIPHER_AES_CCM_128_IV_SIZE;
+		tag_size = TLS_CIPHER_AES_CCM_128_TAG_SIZE;
+		iv_size = TLS_CIPHER_AES_CCM_128_IV_SIZE;
+		iv = ((struct tls12_crypto_info_aes_ccm_128 *)crypto_info)->iv;
+		rec_seq_size = TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE;
+		rec_seq =
+		((struct tls12_crypto_info_aes_ccm_128 *)crypto_info)->rec_seq;
+		ccm_128_info =
+		(struct tls12_crypto_info_aes_ccm_128 *)crypto_info;
+		keysize = TLS_CIPHER_AES_CCM_128_KEY_SIZE;
+		key = ccm_128_info->key;
+		salt = ccm_128_info->salt;
+		salt_size = TLS_CIPHER_AES_CCM_128_SALT_SIZE;
+		cipher_name = "ccm(aes)";
 		break;
 	}
 	default:
@@ -1216,53 +2330,47 @@
 		goto free_priv;
 	}
 
-	/* Sanity-check the IV size for stack allocations. */
-	if (iv_size > MAX_IV_SIZE || nonce_size > MAX_IV_SIZE) {
+	/* Sanity-check the sizes for stack allocations. */
+	if (iv_size > MAX_IV_SIZE || nonce_size > MAX_IV_SIZE ||
+	    rec_seq_size > TLS_MAX_REC_SEQ_SIZE) {
 		rc = -EINVAL;
 		goto free_priv;
 	}
 
-	cctx->prepend_size = TLS_HEADER_SIZE + nonce_size;
-	cctx->tag_size = tag_size;
-	cctx->overhead_size = cctx->prepend_size + cctx->tag_size;
-	cctx->iv_size = iv_size;
-	cctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
-			   GFP_KERNEL);
+	if (crypto_info->version == TLS_1_3_VERSION) {
+		nonce_size = 0;
+		prot->aad_size = TLS_HEADER_SIZE;
+		prot->tail_size = 1;
+	} else {
+		prot->aad_size = TLS_AAD_SPACE_SIZE;
+		prot->tail_size = 0;
+	}
+
+	prot->version = crypto_info->version;
+	prot->cipher_type = crypto_info->cipher_type;
+	prot->prepend_size = TLS_HEADER_SIZE + nonce_size;
+	prot->tag_size = tag_size;
+	prot->overhead_size = prot->prepend_size +
+			      prot->tag_size + prot->tail_size;
+	prot->iv_size = iv_size;
+	prot->salt_size = salt_size;
+	cctx->iv = kmalloc(iv_size + salt_size, GFP_KERNEL);
 	if (!cctx->iv) {
 		rc = -ENOMEM;
 		goto free_priv;
 	}
-	memcpy(cctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
-	memcpy(cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
-	cctx->rec_seq_size = rec_seq_size;
+	/* Note: 128 & 256 bit salt are the same size */
+	prot->rec_seq_size = rec_seq_size;
+	memcpy(cctx->iv, salt, salt_size);
+	memcpy(cctx->iv + salt_size, iv, iv_size);
 	cctx->rec_seq = kmemdup(rec_seq, rec_seq_size, GFP_KERNEL);
 	if (!cctx->rec_seq) {
 		rc = -ENOMEM;
 		goto free_iv;
 	}
 
-	if (sw_ctx_tx) {
-		sg_init_table(sw_ctx_tx->sg_encrypted_data,
-			      ARRAY_SIZE(sw_ctx_tx->sg_encrypted_data));
-		sg_init_table(sw_ctx_tx->sg_plaintext_data,
-			      ARRAY_SIZE(sw_ctx_tx->sg_plaintext_data));
-
-		sg_init_table(sw_ctx_tx->sg_aead_in, 2);
-		sg_set_buf(&sw_ctx_tx->sg_aead_in[0], sw_ctx_tx->aad_space,
-			   sizeof(sw_ctx_tx->aad_space));
-		sg_unmark_end(&sw_ctx_tx->sg_aead_in[1]);
-		sg_chain(sw_ctx_tx->sg_aead_in, 2,
-			 sw_ctx_tx->sg_plaintext_data);
-		sg_init_table(sw_ctx_tx->sg_aead_out, 2);
-		sg_set_buf(&sw_ctx_tx->sg_aead_out[0], sw_ctx_tx->aad_space,
-			   sizeof(sw_ctx_tx->aad_space));
-		sg_unmark_end(&sw_ctx_tx->sg_aead_out[1]);
-		sg_chain(sw_ctx_tx->sg_aead_out, 2,
-			 sw_ctx_tx->sg_encrypted_data);
-	}
-
 	if (!*aead) {
-		*aead = crypto_alloc_aead("gcm(aes)", 0, 0);
+		*aead = crypto_alloc_aead(cipher_name, 0, 0);
 		if (IS_ERR(*aead)) {
 			rc = PTR_ERR(*aead);
 			*aead = NULL;
@@ -1272,31 +2380,30 @@
 
 	ctx->push_pending_record = tls_sw_push_pending_record;
 
-	rc = crypto_aead_setkey(*aead, gcm_128_info->key,
-				TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+	rc = crypto_aead_setkey(*aead, key, keysize);
+
 	if (rc)
 		goto free_aead;
 
-	rc = crypto_aead_setauthsize(*aead, cctx->tag_size);
+	rc = crypto_aead_setauthsize(*aead, prot->tag_size);
 	if (rc)
 		goto free_aead;
 
 	if (sw_ctx_rx) {
+		tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv);
+
+		if (crypto_info->version == TLS_1_3_VERSION)
+			sw_ctx_rx->async_capable = false;
+		else
+			sw_ctx_rx->async_capable =
+				tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
+
 		/* Set up strparser */
 		memset(&cb, 0, sizeof(cb));
 		cb.rcv_msg = tls_queue;
 		cb.parse_msg = tls_read_size;
 
 		strp_init(&sw_ctx_rx->strp, sk, &cb);
-
-		write_lock_bh(&sk->sk_callback_lock);
-		sw_ctx_rx->saved_data_ready = sk->sk_data_ready;
-		sk->sk_data_ready = tls_data_ready;
-		write_unlock_bh(&sk->sk_callback_lock);
-
-		sw_ctx_rx->sk_poll = sk->sk_socket->ops->poll;
-
-		strp_check_rcv(&sw_ctx_rx->strp);
 	}
 
 	goto out;

diff --git a/net/unix/Kconfig b/net/unix/Kconfig
index 8b31ab8..a23a5cc 100644
--- a/net/unix/Kconfig
+++ b/net/unix/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Unix Domain Sockets
 #
@@ -19,6 +20,11 @@
 
 	  Say Y unless you know what you are doing.
 
+config UNIX_SCM
+	bool
+	depends on UNIX
+	default y
+
 config UNIX_DIAG
 	tristate "UNIX: socket monitoring interface"
 	depends on UNIX

diff --git a/net/unix/Makefile b/net/unix/Makefile
index ffd0a27..54e58cc 100644
--- a/net/unix/Makefile
+++ b/net/unix/Makefile

@@ -10,3 +10,5 @@
 
 obj-$(CONFIG_UNIX_DIAG)	+= unix_diag.o
 unix_diag-y		:= diag.o
+
+obj-$(CONFIG_UNIX_SCM)	+= scm.o

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c754f3a..0d8da80 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * NET4:	Implementation of BSD Unix domain sockets.
  *
  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
  *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  * Fixes:
  *		Linus Torvalds	:	Assorted bug cures.
  *		Niibe Yutaka	:	async I/O support.
@@ -49,7 +45,6 @@
  *	     				the core infrastructure is doing that
  *	     				for all net proto families now (2.5.69+)
  *
- *
  * Known differences from reference BSD that was tested:
  *
  *	[TO FIX]
@@ -119,6 +114,8 @@
 #include <linux/freezer.h>
 #include <linux/file.h>
 
+#include "scm.h"
+
 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 EXPORT_SYMBOL_GPL(unix_socket_table);
 DEFINE_SPINLOCK(unix_table_lock);
@@ -225,6 +222,8 @@
 
 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 {
+	*hashp = 0;
+
 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
 		return -EINVAL;
 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
@@ -888,7 +887,7 @@
 	addr->hash ^= sk->sk_type;
 
 	__unix_remove_socket(sk);
-	u->addr = addr;
+	smp_store_release(&u->addr, addr);
 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
 	spin_unlock(&unix_table_lock);
 	err = 0;
@@ -1058,7 +1057,7 @@
 
 	err = 0;
 	__unix_remove_socket(sk);
-	u->addr = addr;
+	smp_store_release(&u->addr, addr);
 	__unix_insert_socket(list, sk);
 
 out_unlock:
@@ -1329,15 +1328,29 @@
 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
 	otheru = unix_sk(other);
 
-	/* copy address information from listening to new sock*/
-	if (otheru->addr) {
-		refcount_inc(&otheru->addr->refcnt);
-		newu->addr = otheru->addr;
-	}
+	/* copy address information from listening to new sock
+	 *
+	 * The contents of *(otheru->addr) and otheru->path
+	 * are seen fully set up here, since we have found
+	 * otheru in hash under unix_table_lock.  Insertion
+	 * into the hash chain we'd found it in had been done
+	 * in an earlier critical area protected by unix_table_lock,
+	 * the same one where we'd set *(otheru->addr) contents,
+	 * as well as otheru->path and otheru->addr itself.
+	 *
+	 * Using smp_store_release() here to set newu->addr
+	 * is enough to make those stores, as well as stores
+	 * to newu->path visible to anyone who gets newu->addr
+	 * by smp_load_acquire().  IOW, the same warranties
+	 * as for unix_sock instances bound in unix_bind() or
+	 * in unix_autobind().
+	 */
 	if (otheru->path.dentry) {
 		path_get(&otheru->path);
 		newu->path = otheru->path;
 	}
+	refcount_inc(&otheru->addr->refcnt);
+	smp_store_release(&newu->addr, otheru->addr);
 
 	/* Set credentials */
 	copy_peercred(sk, other);
@@ -1451,7 +1464,7 @@
 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
 {
 	struct sock *sk = sock->sk;
-	struct unix_sock *u;
+	struct unix_address *addr;
 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
 	int err = 0;
 
@@ -1466,85 +1479,20 @@
 		sock_hold(sk);
 	}
 
-	u = unix_sk(sk);
-	unix_state_lock(sk);
-	if (!u->addr) {
+	addr = smp_load_acquire(&unix_sk(sk)->addr);
+	if (!addr) {
 		sunaddr->sun_family = AF_UNIX;
 		sunaddr->sun_path[0] = 0;
 		err = sizeof(short);
 	} else {
-		struct unix_address *addr = u->addr;
-
 		err = addr->len;
 		memcpy(sunaddr, addr->name, addr->len);
 	}
-	unix_state_unlock(sk);
 	sock_put(sk);
 out:
 	return err;
 }
 
-static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
-{
-	int i;
-
-	scm->fp = UNIXCB(skb).fp;
-	UNIXCB(skb).fp = NULL;
-
-	for (i = scm->fp->count-1; i >= 0; i--)
-		unix_notinflight(scm->fp->user, scm->fp->fp[i]);
-}
-
-static void unix_destruct_scm(struct sk_buff *skb)
-{
-	struct scm_cookie scm;
-	memset(&scm, 0, sizeof(scm));
-	scm.pid  = UNIXCB(skb).pid;
-	if (UNIXCB(skb).fp)
-		unix_detach_fds(&scm, skb);
-
-	/* Alas, it calls VFS */
-	/* So fscking what? fput() had been SMP-safe since the last Summer */
-	scm_destroy(&scm);
-	sock_wfree(skb);
-}
-
-/*
- * The "user->unix_inflight" variable is protected by the garbage
- * collection lock, and we just read it locklessly here. If you go
- * over the limit, there might be a tiny race in actually noticing
- * it across threads. Tough.
- */
-static inline bool too_many_unix_fds(struct task_struct *p)
-{
-	struct user_struct *user = current_user();
-
-	if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
-		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
-	return false;
-}
-
-static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
-{
-	int i;
-
-	if (too_many_unix_fds(current))
-		return -ETOOMANYREFS;
-
-	/*
-	 * Need to duplicate file references for the sake of garbage
-	 * collection.  Otherwise a socket in the fps might become a
-	 * candidate for GC while the skb is not yet queued.
-	 */
-	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
-	if (!UNIXCB(skb).fp)
-		return -ENOMEM;
-
-	for (i = scm->fp->count - 1; i >= 0; i--)
-		unix_inflight(scm->fp->user, scm->fp->fp[i]);
-	return 0;
-}
-
 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
 {
 	int err = 0;
@@ -2071,11 +2019,11 @@
 
 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
 {
-	struct unix_sock *u = unix_sk(sk);
+	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
 
-	if (u->addr) {
-		msg->msg_namelen = u->addr->len;
-		memcpy(msg->msg_name, u->addr->name, u->addr->len);
+	if (addr) {
+		msg->msg_namelen = addr->len;
+		memcpy(msg->msg_name, addr->name, addr->len);
 	}
 }
 
@@ -2087,8 +2035,8 @@
 	struct unix_sock *u = unix_sk(sk);
 	struct sk_buff *skb, *last;
 	long timeo;
+	int skip;
 	int err;
-	int peeked, skip;
 
 	err = -EOPNOTSUPP;
 	if (flags&MSG_OOB)
@@ -2100,8 +2048,8 @@
 		mutex_lock(&u->iolock);
 
 		skip = sk_peek_offset(sk, flags);
-		skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
-					      &err, &last);
+		skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err,
+					      &last);
 		if (skb)
 			break;
 
@@ -2579,15 +2527,14 @@
 	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
-	unix_state_lock(sk);
-	path = unix_sk(sk)->path;
-	if (!path.dentry) {
-		unix_state_unlock(sk);
+	if (!smp_load_acquire(&unix_sk(sk)->addr))
 		return -ENOENT;
-	}
+
+	path = unix_sk(sk)->path;
+	if (!path.dentry)
+		return -ENOENT;
 
 	path_get(&path);
-	unix_state_unlock(sk);
 
 	fd = get_unused_fd_flags(O_CLOEXEC);
 	if (fd < 0)
@@ -2652,7 +2599,7 @@
 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
 
 	/* readable? */
-	if (!skb_queue_empty(&sk->sk_receive_queue))
+	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
 		mask |= EPOLLIN | EPOLLRDNORM;
 
 	/* Connection-based need to check for termination and startup */
@@ -2681,7 +2628,7 @@
 	mask = 0;
 
 	/* exceptional events? */
-	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+	if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
 		mask |= EPOLLERR |
 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
 
@@ -2691,7 +2638,7 @@
 		mask |= EPOLLHUP;
 
 	/* readable? */
-	if (!skb_queue_empty(&sk->sk_receive_queue))
+	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
 		mask |= EPOLLIN | EPOLLRDNORM;
 
 	/* Connection-based need to check for termination and startup */
@@ -2828,7 +2775,7 @@
 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
 			sock_i_ino(s));
 
-		if (u->addr) {
+		if (u->addr) {	// under unix_table_lock here
 			int i, len;
 			seq_putc(seq, ' ');
 

diff --git a/net/unix/diag.c b/net/unix/diag.c
index 384c84e..9ff64f9 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c

@@ -1,16 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/types.h>
 #include <linux/spinlock.h>
 #include <linux/sock_diag.h>
 #include <linux/unix_diag.h>
 #include <linux/skbuff.h>
 #include <linux/module.h>
+#include <linux/uidgid.h>
 #include <net/netlink.h>
 #include <net/af_unix.h>
 #include <net/tcp_states.h>
+#include <net/sock.h>
 
 static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb)
 {
-	struct unix_address *addr = unix_sk(sk)->addr;
+	/* might or might not have unix_table_lock */
+	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
 
 	if (!addr)
 		return 0;
@@ -109,6 +113,12 @@
 	return nla_put(nlskb, UNIX_DIAG_RQLEN, sizeof(rql), &rql);
 }
 
+static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb)
+{
+	uid_t uid = from_kuid_munged(sk_user_ns(nlskb->sk), sock_i_uid(sk));
+	return nla_put(nlskb, UNIX_DIAG_UID, sizeof(uid_t), &uid);
+}
+
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
 		u32 portid, u32 seq, u32 flags, int sk_ino)
 {
@@ -155,6 +165,10 @@
 	if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, sk->sk_shutdown))
 		goto out_nlmsg_trim;
 
+	if ((req->udiag_show & UDIAG_SHOW_UID) &&
+	    sk_diag_dump_uid(sk, skb))
+		goto out_nlmsg_trim;
+
 	nlmsg_end(skb, nlh);
 	return 0;
 

diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index c36757e..12e2dda 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c

@@ -1,9 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * NET3:	Garbage Collector For AF_UNIX sockets
  *
  * Garbage Collector:
  *	Copyright (C) Barak A. Pearlmutter.
- *	Released under the GPL version 2 or later.
  *
  * Chopped about by Alan Cox 22/3/96 to make it fit the AF_UNIX socket problem.
  * If it doesn't work blame me, it worked when Barak sent it.
@@ -24,11 +24,6 @@
  *
  *  - don't just push entire root set; process in place
  *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
  *  Fixes:
  *	Alan Cox	07 Sept	1997	Vmalloc internal stack as needed.
  *					Cope with changing max_files.
@@ -86,77 +81,13 @@
 #include <net/scm.h>
 #include <net/tcp_states.h>
 
+#include "scm.h"
+
 /* Internal data structures and random procedures: */
 
-static LIST_HEAD(gc_inflight_list);
 static LIST_HEAD(gc_candidates);
-static DEFINE_SPINLOCK(unix_gc_lock);
 static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
 
-unsigned int unix_tot_inflight;
-
-struct sock *unix_get_socket(struct file *filp)
-{
-	struct sock *u_sock = NULL;
-	struct inode *inode = file_inode(filp);
-
-	/* Socket ? */
-	if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
-		struct socket *sock = SOCKET_I(inode);
-		struct sock *s = sock->sk;
-
-		/* PF_UNIX ? */
-		if (s && sock->ops && sock->ops->family == PF_UNIX)
-			u_sock = s;
-	}
-	return u_sock;
-}
-
-/* Keep the number of times in flight count for the file
- * descriptor if it is for an AF_UNIX socket.
- */
-
-void unix_inflight(struct user_struct *user, struct file *fp)
-{
-	struct sock *s = unix_get_socket(fp);
-
-	spin_lock(&unix_gc_lock);
-
-	if (s) {
-		struct unix_sock *u = unix_sk(s);
-
-		if (atomic_long_inc_return(&u->inflight) == 1) {
-			BUG_ON(!list_empty(&u->link));
-			list_add_tail(&u->link, &gc_inflight_list);
-		} else {
-			BUG_ON(list_empty(&u->link));
-		}
-		unix_tot_inflight++;
-	}
-	user->unix_inflight++;
-	spin_unlock(&unix_gc_lock);
-}
-
-void unix_notinflight(struct user_struct *user, struct file *fp)
-{
-	struct sock *s = unix_get_socket(fp);
-
-	spin_lock(&unix_gc_lock);
-
-	if (s) {
-		struct unix_sock *u = unix_sk(s);
-
-		BUG_ON(!atomic_long_read(&u->inflight));
-		BUG_ON(list_empty(&u->link));
-
-		if (atomic_long_dec_and_test(&u->inflight))
-			list_del_init(&u->link);
-		unix_tot_inflight--;
-	}
-	user->unix_inflight--;
-	spin_unlock(&unix_gc_lock);
-}
-
 static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
 			  struct sk_buff_head *hitlist)
 {

diff --git a/net/unix/scm.c b/net/unix/scm.c
new file mode 100644
index 0000000..8c40f2b
--- /dev/null
+++ b/net/unix/scm.c

@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/fs.h>
+#include <net/af_unix.h>
+#include <net/scm.h>
+#include <linux/init.h>
+
+#include "scm.h"
+
+unsigned int unix_tot_inflight;
+EXPORT_SYMBOL(unix_tot_inflight);
+
+LIST_HEAD(gc_inflight_list);
+EXPORT_SYMBOL(gc_inflight_list);
+
+DEFINE_SPINLOCK(unix_gc_lock);
+EXPORT_SYMBOL(unix_gc_lock);
+
+struct sock *unix_get_socket(struct file *filp)
+{
+	struct sock *u_sock = NULL;
+	struct inode *inode = file_inode(filp);
+
+	/* Socket ? */
+	if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
+		struct socket *sock = SOCKET_I(inode);
+		struct sock *s = sock->sk;
+
+		/* PF_UNIX ? */
+		if (s && sock->ops && sock->ops->family == PF_UNIX)
+			u_sock = s;
+	} else {
+		/* Could be an io_uring instance */
+		u_sock = io_uring_get_socket(filp);
+	}
+	return u_sock;
+}
+EXPORT_SYMBOL(unix_get_socket);
+
+/* Keep the number of times in flight count for the file
+ * descriptor if it is for an AF_UNIX socket.
+ */
+void unix_inflight(struct user_struct *user, struct file *fp)
+{
+	struct sock *s = unix_get_socket(fp);
+
+	spin_lock(&unix_gc_lock);
+
+	if (s) {
+		struct unix_sock *u = unix_sk(s);
+
+		if (atomic_long_inc_return(&u->inflight) == 1) {
+			BUG_ON(!list_empty(&u->link));
+			list_add_tail(&u->link, &gc_inflight_list);
+		} else {
+			BUG_ON(list_empty(&u->link));
+		}
+		unix_tot_inflight++;
+	}
+	user->unix_inflight++;
+	spin_unlock(&unix_gc_lock);
+}
+
+void unix_notinflight(struct user_struct *user, struct file *fp)
+{
+	struct sock *s = unix_get_socket(fp);
+
+	spin_lock(&unix_gc_lock);
+
+	if (s) {
+		struct unix_sock *u = unix_sk(s);
+
+		BUG_ON(!atomic_long_read(&u->inflight));
+		BUG_ON(list_empty(&u->link));
+
+		if (atomic_long_dec_and_test(&u->inflight))
+			list_del_init(&u->link);
+		unix_tot_inflight--;
+	}
+	user->unix_inflight--;
+	spin_unlock(&unix_gc_lock);
+}
+
+/*
+ * The "user->unix_inflight" variable is protected by the garbage
+ * collection lock, and we just read it locklessly here. If you go
+ * over the limit, there might be a tiny race in actually noticing
+ * it across threads. Tough.
+ */
+static inline bool too_many_unix_fds(struct task_struct *p)
+{
+	struct user_struct *user = current_user();
+
+	if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
+		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
+	return false;
+}
+
+int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+	int i;
+
+	if (too_many_unix_fds(current))
+		return -ETOOMANYREFS;
+
+	/*
+	 * Need to duplicate file references for the sake of garbage
+	 * collection.  Otherwise a socket in the fps might become a
+	 * candidate for GC while the skb is not yet queued.
+	 */
+	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
+	if (!UNIXCB(skb).fp)
+		return -ENOMEM;
+
+	for (i = scm->fp->count - 1; i >= 0; i--)
+		unix_inflight(scm->fp->user, scm->fp->fp[i]);
+	return 0;
+}
+EXPORT_SYMBOL(unix_attach_fds);
+
+void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+	int i;
+
+	scm->fp = UNIXCB(skb).fp;
+	UNIXCB(skb).fp = NULL;
+
+	for (i = scm->fp->count-1; i >= 0; i--)
+		unix_notinflight(scm->fp->user, scm->fp->fp[i]);
+}
+EXPORT_SYMBOL(unix_detach_fds);
+
+void unix_destruct_scm(struct sk_buff *skb)
+{
+	struct scm_cookie scm;
+
+	memset(&scm, 0, sizeof(scm));
+	scm.pid  = UNIXCB(skb).pid;
+	if (UNIXCB(skb).fp)
+		unix_detach_fds(&scm, skb);
+
+	/* Alas, it calls VFS */
+	/* So fscking what? fput() had been SMP-safe since the last Summer */
+	scm_destroy(&scm);
+	sock_wfree(skb);
+}
+EXPORT_SYMBOL(unix_destruct_scm);

diff --git a/net/unix/scm.h b/net/unix/scm.h
new file mode 100644
index 0000000..5a255a4
--- /dev/null
+++ b/net/unix/scm.h

@@ -0,0 +1,10 @@
+#ifndef NET_UNIX_SCM_H
+#define NET_UNIX_SCM_H
+
+extern struct list_head gc_inflight_list;
+extern spinlock_t unix_gc_lock;
+
+int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb);
+void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb);
+
+#endif

diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c
index b3d5150..c09bea8 100644
--- a/net/unix/sysctl_net_unix.c
+++ b/net/unix/sysctl_net_unix.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * NET4:	Sysctl interface to net af_unix subsystem.
  *
  * Authors:	Mike Shaver.
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
  */
 
 #include <linux/mm.h>

diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig
index 970f964..8abcb81 100644
--- a/net/vmw_vsock/Kconfig
+++ b/net/vmw_vsock/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Vsock protocol
 #

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index ab27a28..582a3e4 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c

@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * VMware vSockets Driver
  *
  * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation version 2 and no later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
  */
 
 /* Implementation notes:
@@ -107,6 +99,7 @@
 #include <linux/mutex.h>
 #include <linux/net.h>
 #include <linux/poll.h>
+#include <linux/random.h>
 #include <linux/skbuff.h>
 #include <linux/smp.h>
 #include <linux/socket.h>
@@ -281,7 +274,8 @@
 void vsock_remove_bound(struct vsock_sock *vsk)
 {
 	spin_lock_bh(&vsock_table_lock);
-	__vsock_remove_bound(vsk);
+	if (__vsock_in_bound_table(vsk))
+		__vsock_remove_bound(vsk);
 	spin_unlock_bh(&vsock_table_lock);
 }
 EXPORT_SYMBOL_GPL(vsock_remove_bound);
@@ -289,7 +283,8 @@
 void vsock_remove_connected(struct vsock_sock *vsk)
 {
 	spin_lock_bh(&vsock_table_lock);
-	__vsock_remove_connected(vsk);
+	if (__vsock_in_connected_table(vsk))
+		__vsock_remove_connected(vsk);
 	spin_unlock_bh(&vsock_table_lock);
 }
 EXPORT_SYMBOL_GPL(vsock_remove_connected);
@@ -325,35 +320,10 @@
 }
 EXPORT_SYMBOL_GPL(vsock_find_connected_socket);
 
-static bool vsock_in_bound_table(struct vsock_sock *vsk)
-{
-	bool ret;
-
-	spin_lock_bh(&vsock_table_lock);
-	ret = __vsock_in_bound_table(vsk);
-	spin_unlock_bh(&vsock_table_lock);
-
-	return ret;
-}
-
-static bool vsock_in_connected_table(struct vsock_sock *vsk)
-{
-	bool ret;
-
-	spin_lock_bh(&vsock_table_lock);
-	ret = __vsock_in_connected_table(vsk);
-	spin_unlock_bh(&vsock_table_lock);
-
-	return ret;
-}
-
 void vsock_remove_sock(struct vsock_sock *vsk)
 {
-	if (vsock_in_bound_table(vsk))
-		vsock_remove_bound(vsk);
-
-	if (vsock_in_connected_table(vsk))
-		vsock_remove_connected(vsk);
+	vsock_remove_bound(vsk);
+	vsock_remove_connected(vsk);
 }
 EXPORT_SYMBOL_GPL(vsock_remove_sock);
 
@@ -484,8 +454,7 @@
 	 * incoming packets can't find this socket, and to reduce the reference
 	 * count.
 	 */
-	if (vsock_in_connected_table(vsk))
-		vsock_remove_connected(vsk);
+	vsock_remove_connected(vsk);
 
 	sk->sk_state = TCP_CLOSE;
 
@@ -504,9 +473,13 @@
 static int __vsock_bind_stream(struct vsock_sock *vsk,
 			       struct sockaddr_vm *addr)
 {
-	static u32 port = LAST_RESERVED_PORT + 1;
+	static u32 port;
 	struct sockaddr_vm new_addr;
 
+	if (!port)
+		port = LAST_RESERVED_PORT + 1 +
+			prandom_u32_max(U32_MAX - LAST_RESERVED_PORT);
+
 	vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port);
 
 	if (addr->svm_port == VMADDR_PORT_ANY) {
@@ -665,7 +638,7 @@
 }
 EXPORT_SYMBOL_GPL(__vsock_create);
 
-static void __vsock_release(struct sock *sk)
+static void __vsock_release(struct sock *sk, int level)
 {
 	if (sk) {
 		struct sk_buff *skb;
@@ -675,9 +648,17 @@
 		vsk = vsock_sk(sk);
 		pending = NULL;	/* Compiler warning. */
 
+		/* The release call is supposed to use lock_sock_nested()
+		 * rather than lock_sock(), if a sock lock should be acquired.
+		 */
 		transport->release(vsk);
 
-		lock_sock(sk);
+		/* When "level" is SINGLE_DEPTH_NESTING, use the nested
+		 * version to avoid the warning "possible recursive locking
+		 * detected". When "level" is 0, lock_sock_nested(sk, level)
+		 * is the same as lock_sock(sk).
+		 */
+		lock_sock_nested(sk, level);
 		sock_orphan(sk);
 		sk->sk_shutdown = SHUTDOWN_MASK;
 
@@ -686,7 +667,7 @@
 
 		/* Clean up any sockets that never were accepted. */
 		while ((pending = vsock_dequeue_accept(sk)) != NULL) {
-			__vsock_release(pending);
+			__vsock_release(pending, SINGLE_DEPTH_NESTING);
 			sock_put(pending);
 		}
 
@@ -735,7 +716,7 @@
 
 static int vsock_release(struct socket *sock)
 {
-	__vsock_release(sock->sk);
+	__vsock_release(sock->sk, 0);
 	sock->sk = NULL;
 	sock->state = SS_FREE;
 
@@ -889,7 +870,7 @@
 		 * the queue and write as long as the socket isn't shutdown for
 		 * sending.
 		 */
-		if (!skb_queue_empty(&sk->sk_receive_queue) ||
+		if (!skb_queue_empty_lockless(&sk->sk_receive_queue) ||
 		    (sk->sk_shutdown & RCV_SHUTDOWN)) {
 			mask |= EPOLLIN | EPOLLRDNORM;
 		}
@@ -1434,7 +1415,7 @@
 		break;
 
 	case SO_VM_SOCKETS_CONNECT_TIMEOUT: {
-		struct timeval tv;
+		struct __kernel_old_timeval tv;
 		COPY_IN(tv);
 		if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC &&
 		    tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) {
@@ -1512,7 +1493,7 @@
 		break;
 
 	case SO_VM_SOCKETS_CONNECT_TIMEOUT: {
-		struct timeval tv;
+		struct __kernel_old_timeval tv;
 		tv.tv_sec = vsk->connect_timeout / HZ;
 		tv.tv_usec =
 		    (vsk->connect_timeout -

diff --git a/net/vmw_vsock/af_vsock_tap.c b/net/vmw_vsock/af_vsock_tap.c
index 98f09b5..30ee7e4 100644
--- a/net/vmw_vsock/af_vsock_tap.c
+++ b/net/vmw_vsock/af_vsock_tap.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Tap functions for AF_VSOCK sockets.
  *
  * Code based on net/netlink/af_netlink.c tap functions.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/module.h>

diff --git a/net/vmw_vsock/diag.c b/net/vmw_vsock/diag.c
index 31b5676..a2823b1 100644
--- a/net/vmw_vsock/diag.c
+++ b/net/vmw_vsock/diag.c

@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * vsock sock_diag(7) module
  *
  * Copyright (C) 2017 Red Hat, Inc.
  * Author: Stefan Hajnoczi <stefanha@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation version 2 and no later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
  */
 
 #include <linux/module.h>

diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index a827547..c443db7 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Hyper-V transport for vsock
  *
@@ -6,16 +7,6 @@
  * support in the VM by introducing the new vsock transport.
  *
  * Copyright (c) 2017, Microsoft Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
  */
 #include <linux/module.h>
 #include <linux/vmalloc.h>
@@ -23,18 +14,21 @@
 #include <net/sock.h>
 #include <net/af_vsock.h>
 
-/* The host side's design of the feature requires 6 exact 4KB pages for
- * recv/send rings respectively -- this is suboptimal considering memory
- * consumption, however unluckily we have to live with it, before the
- * host comes up with a better design in the future.
+/* Older (VMBUS version 'VERSION_WIN10' or before) Windows hosts have some
+ * stricter requirements on the hv_sock ring buffer size of six 4K pages. Newer
+ * hosts don't have this limitation; but, keep the defaults the same for compat.
  */
 #define PAGE_SIZE_4K		4096
 #define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6)
 #define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6)
+#define RINGBUFFER_HVS_MAX_SIZE (PAGE_SIZE_4K * 64)
 
 /* The MTU is 16KB per the host side's design */
 #define HVS_MTU_SIZE		(1024 * 16)
 
+/* How long to wait for graceful shutdown of a connection */
+#define HVS_CLOSE_TIMEOUT (8 * HZ)
+
 struct vmpipe_proto_header {
 	u32 pkt_type;
 	u32 data_size;
@@ -52,8 +46,9 @@
 };
 
 /* We can send up to HVS_MTU_SIZE bytes of payload to the host, but let's use
- * a small size, i.e. HVS_SEND_BUF_SIZE, to minimize the dynamically-allocated
- * buffer, because tests show there is no significant performance difference.
+ * a smaller size, i.e. HVS_SEND_BUF_SIZE, to maximize concurrency between the
+ * guest and the host processing as one VMBUS packet is the smallest processing
+ * unit.
  *
  * Note: the buffer can be eliminated in the future when we add new VMBus
  * ringbuffer APIs that allow us to directly copy data from userspace buffer
@@ -82,11 +77,11 @@
 					 VMBUS_PKT_TRAILER_SIZE)
 
 union hvs_service_id {
-	uuid_le	srv_id;
+	guid_t	srv_id;
 
 	struct {
 		unsigned int svm_port;
-		unsigned char b[sizeof(uuid_le) - sizeof(unsigned int)];
+		unsigned char b[sizeof(guid_t) - sizeof(unsigned int)];
 	};
 };
 
@@ -94,8 +89,8 @@
 struct hvsock {
 	struct vsock_sock *vsk;
 
-	uuid_le vm_srv_id;
-	uuid_le host_srv_id;
+	guid_t vm_srv_id;
+	guid_t host_srv_id;
 
 	struct vmbus_channel *chan;
 	struct vmpacket_descriptor *recv_desc;
@@ -164,21 +159,21 @@
 #define MIN_HOST_EPHEMERAL_PORT		(MAX_HOST_LISTEN_PORT + 1)
 
 /* 00000000-facb-11e6-bd58-64006a7986d3 */
-static const uuid_le srv_id_template =
-	UUID_LE(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58,
-		0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3);
+static const guid_t srv_id_template =
+	GUID_INIT(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58,
+		  0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3);
 
-static bool is_valid_srv_id(const uuid_le *id)
+static bool is_valid_srv_id(const guid_t *id)
 {
-	return !memcmp(&id->b[4], &srv_id_template.b[4], sizeof(uuid_le) - 4);
+	return !memcmp(&id->b[4], &srv_id_template.b[4], sizeof(guid_t) - 4);
 }
 
-static unsigned int get_port_by_srv_id(const uuid_le *svr_id)
+static unsigned int get_port_by_srv_id(const guid_t *svr_id)
 {
 	return *((unsigned int *)svr_id);
 }
 
-static void hvs_addr_init(struct sockaddr_vm *addr, const uuid_le *svr_id)
+static void hvs_addr_init(struct sockaddr_vm *addr, const guid_t *svr_id)
 {
 	unsigned int port = get_port_by_srv_id(svr_id);
 
@@ -217,18 +212,6 @@
 	set_channel_pending_send_size(chan,
 				      HVS_PKT_LEN(HVS_SEND_BUF_SIZE));
 
-	/* See hvs_stream_has_space(): we must make sure the host has seen
-	 * the new pending send size, before we can re-check the writable
-	 * bytes.
-	 */
-	virt_mb();
-}
-
-static void hvs_clear_channel_pending_send_size(struct vmbus_channel *chan)
-{
-	set_channel_pending_send_size(chan, 0);
-
-	/* Ditto */
 	virt_mb();
 }
 
@@ -298,39 +281,57 @@
 	if (hvs_channel_readable(chan))
 		sk->sk_data_ready(sk);
 
-	/* See hvs_stream_has_space(): when we reach here, the writable bytes
-	 * may be already less than HVS_PKT_LEN(HVS_SEND_BUF_SIZE).
-	 */
 	if (hv_get_bytes_to_write(&chan->outbound) > 0)
 		sk->sk_write_space(sk);
 }
 
+static void hvs_do_close_lock_held(struct vsock_sock *vsk,
+				   bool cancel_timeout)
+{
+	struct sock *sk = sk_vsock(vsk);
+
+	sock_set_flag(sk, SOCK_DONE);
+	vsk->peer_shutdown = SHUTDOWN_MASK;
+	if (vsock_stream_has_data(vsk) <= 0)
+		sk->sk_state = TCP_CLOSING;
+	sk->sk_state_change(sk);
+	if (vsk->close_work_scheduled &&
+	    (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
+		vsk->close_work_scheduled = false;
+		vsock_remove_sock(vsk);
+
+		/* Release the reference taken while scheduling the timeout */
+		sock_put(sk);
+	}
+}
+
 static void hvs_close_connection(struct vmbus_channel *chan)
 {
 	struct sock *sk = get_per_channel_state(chan);
-	struct vsock_sock *vsk = vsock_sk(sk);
 
 	lock_sock(sk);
-
-	sk->sk_state = TCP_CLOSE;
-	sock_set_flag(sk, SOCK_DONE);
-	vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN;
-
-	sk->sk_state_change(sk);
-
+	hvs_do_close_lock_held(vsock_sk(sk), true);
 	release_sock(sk);
+
+	/* Release the refcnt for the channel that's opened in
+	 * hvs_open_connection().
+	 */
+	sock_put(sk);
 }
 
 static void hvs_open_connection(struct vmbus_channel *chan)
 {
-	uuid_le *if_instance, *if_type;
+	guid_t *if_instance, *if_type;
 	unsigned char conn_from_host;
 
 	struct sockaddr_vm addr;
 	struct sock *sk, *new = NULL;
-	struct vsock_sock *vnew;
-	struct hvsock *hvs, *hvs_new;
+	struct vsock_sock *vnew = NULL;
+	struct hvsock *hvs = NULL;
+	struct hvsock *hvs_new = NULL;
+	int rcvbuf;
 	int ret;
+	int sndbuf;
 
 	if_type = &chan->offermsg.offer.if_type;
 	if_instance = &chan->offermsg.offer.if_instance;
@@ -372,9 +373,34 @@
 	}
 
 	set_channel_read_mode(chan, HV_CALL_DIRECT);
-	ret = vmbus_open(chan, RINGBUFFER_HVS_SND_SIZE,
-			 RINGBUFFER_HVS_RCV_SIZE, NULL, 0,
-			 hvs_channel_cb, conn_from_host ? new : sk);
+
+	/* Use the socket buffer sizes as hints for the VMBUS ring size. For
+	 * server side sockets, 'sk' is the parent socket and thus, this will
+	 * allow the child sockets to inherit the size from the parent. Keep
+	 * the mins to the default value and align to page size as per VMBUS
+	 * requirements.
+	 * For the max, the socket core library will limit the socket buffer
+	 * size that can be set by the user, but, since currently, the hv_sock
+	 * VMBUS ring buffer is physically contiguous allocation, restrict it
+	 * further.
+	 * Older versions of hv_sock host side code cannot handle bigger VMBUS
+	 * ring buffer size. Use the version number to limit the change to newer
+	 * versions.
+	 */
+	if (vmbus_proto_version < VERSION_WIN10_V5) {
+		sndbuf = RINGBUFFER_HVS_SND_SIZE;
+		rcvbuf = RINGBUFFER_HVS_RCV_SIZE;
+	} else {
+		sndbuf = max_t(int, sk->sk_sndbuf, RINGBUFFER_HVS_SND_SIZE);
+		sndbuf = min_t(int, sndbuf, RINGBUFFER_HVS_MAX_SIZE);
+		sndbuf = ALIGN(sndbuf, PAGE_SIZE);
+		rcvbuf = max_t(int, sk->sk_rcvbuf, RINGBUFFER_HVS_RCV_SIZE);
+		rcvbuf = min_t(int, rcvbuf, RINGBUFFER_HVS_MAX_SIZE);
+		rcvbuf = ALIGN(rcvbuf, PAGE_SIZE);
+	}
+
+	ret = vmbus_open(chan, sndbuf, rcvbuf, NULL, 0, hvs_channel_cb,
+			 conn_from_host ? new : sk);
 	if (ret != 0) {
 		if (conn_from_host) {
 			hvs_new->chan = NULL;
@@ -386,8 +412,18 @@
 	}
 
 	set_per_channel_state(chan, conn_from_host ? new : sk);
+
+	/* This reference will be dropped by hvs_close_connection(). */
+	sock_hold(conn_from_host ? new : sk);
 	vmbus_set_chn_rescind_callback(chan, hvs_close_connection);
 
+	/* Set the pending send size to max packet size to always get
+	 * notifications from the host when there is enough writable space.
+	 * The host is optimized to send notifications only when the pending
+	 * size boundary is crossed, and not always.
+	 */
+	hvs_set_channel_pending_send_size(chan);
+
 	if (conn_from_host) {
 		new->sk_state = TCP_ESTABLISHED;
 		sk->sk_ack_backlog++;
@@ -425,6 +461,7 @@
 static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk)
 {
 	struct hvsock *hvs;
+	struct sock *sk = sk_vsock(vsk);
 
 	hvs = kzalloc(sizeof(*hvs), GFP_KERNEL);
 	if (!hvs)
@@ -432,7 +469,8 @@
 
 	vsk->trans = hvs;
 	hvs->vsk = vsk;
-
+	sk->sk_sndbuf = RINGBUFFER_HVS_SND_SIZE;
+	sk->sk_rcvbuf = RINGBUFFER_HVS_RCV_SIZE;
 	return 0;
 }
 
@@ -452,50 +490,80 @@
 	return vmbus_send_tl_connect_request(&h->vm_srv_id, &h->host_srv_id);
 }
 
+static void hvs_shutdown_lock_held(struct hvsock *hvs, int mode)
+{
+	struct vmpipe_proto_header hdr;
+
+	if (hvs->fin_sent || !hvs->chan)
+		return;
+
+	/* It can't fail: see hvs_channel_writable_bytes(). */
+	(void)hvs_send_data(hvs->chan, (struct hvs_send_buf *)&hdr, 0);
+	hvs->fin_sent = true;
+}
+
 static int hvs_shutdown(struct vsock_sock *vsk, int mode)
 {
 	struct sock *sk = sk_vsock(vsk);
-	struct vmpipe_proto_header hdr;
-	struct hvs_send_buf *send_buf;
-	struct hvsock *hvs;
 
 	if (!(mode & SEND_SHUTDOWN))
 		return 0;
 
 	lock_sock(sk);
-
-	hvs = vsk->trans;
-	if (hvs->fin_sent)
-		goto out;
-
-	send_buf = (struct hvs_send_buf *)&hdr;
-
-	/* It can't fail: see hvs_channel_writable_bytes(). */
-	(void)hvs_send_data(hvs->chan, send_buf, 0);
-
-	hvs->fin_sent = true;
-out:
+	hvs_shutdown_lock_held(vsk->trans, mode);
 	release_sock(sk);
 	return 0;
 }
 
+static void hvs_close_timeout(struct work_struct *work)
+{
+	struct vsock_sock *vsk =
+		container_of(work, struct vsock_sock, close_work.work);
+	struct sock *sk = sk_vsock(vsk);
+
+	sock_hold(sk);
+	lock_sock(sk);
+	if (!sock_flag(sk, SOCK_DONE))
+		hvs_do_close_lock_held(vsk, false);
+
+	vsk->close_work_scheduled = false;
+	release_sock(sk);
+	sock_put(sk);
+}
+
+/* Returns true, if it is safe to remove socket; false otherwise */
+static bool hvs_close_lock_held(struct vsock_sock *vsk)
+{
+	struct sock *sk = sk_vsock(vsk);
+
+	if (!(sk->sk_state == TCP_ESTABLISHED ||
+	      sk->sk_state == TCP_CLOSING))
+		return true;
+
+	if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK)
+		hvs_shutdown_lock_held(vsk->trans, SHUTDOWN_MASK);
+
+	if (sock_flag(sk, SOCK_DONE))
+		return true;
+
+	/* This reference will be dropped by the delayed close routine */
+	sock_hold(sk);
+	INIT_DELAYED_WORK(&vsk->close_work, hvs_close_timeout);
+	vsk->close_work_scheduled = true;
+	schedule_delayed_work(&vsk->close_work, HVS_CLOSE_TIMEOUT);
+	return false;
+}
+
 static void hvs_release(struct vsock_sock *vsk)
 {
 	struct sock *sk = sk_vsock(vsk);
-	struct hvsock *hvs = vsk->trans;
-	struct vmbus_channel *chan;
+	bool remove_sock;
 
-	lock_sock(sk);
-
-	sk->sk_state = TCP_CLOSING;
-	vsock_remove_sock(vsk);
-
+	lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+	remove_sock = hvs_close_lock_held(vsk);
 	release_sock(sk);
-
-	chan = hvs->chan;
-	if (chan)
-		hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN);
-
+	if (remove_sock)
+		vsock_remove_sock(vsk);
 }
 
 static void hvs_destruct(struct vsock_sock *vsk)
@@ -598,7 +666,9 @@
 	struct hvsock *hvs = vsk->trans;
 	struct vmbus_channel *chan = hvs->chan;
 	struct hvs_send_buf *send_buf;
-	ssize_t to_write, max_writable, ret;
+	ssize_t to_write, max_writable;
+	ssize_t ret = 0;
+	ssize_t bytes_written = 0;
 
 	BUILD_BUG_ON(sizeof(*send_buf) != PAGE_SIZE_4K);
 
@@ -606,20 +676,34 @@
 	if (!send_buf)
 		return -ENOMEM;
 
-	max_writable = hvs_channel_writable_bytes(chan);
-	to_write = min_t(ssize_t, len, max_writable);
-	to_write = min_t(ssize_t, to_write, HVS_SEND_BUF_SIZE);
+	/* Reader(s) could be draining data from the channel as we write.
+	 * Maximize bandwidth, by iterating until the channel is found to be
+	 * full.
+	 */
+	while (len) {
+		max_writable = hvs_channel_writable_bytes(chan);
+		if (!max_writable)
+			break;
+		to_write = min_t(ssize_t, len, max_writable);
+		to_write = min_t(ssize_t, to_write, HVS_SEND_BUF_SIZE);
+		/* memcpy_from_msg is safe for loop as it advances the offsets
+		 * within the message iterator.
+		 */
+		ret = memcpy_from_msg(send_buf->data, msg, to_write);
+		if (ret < 0)
+			goto out;
 
-	ret = memcpy_from_msg(send_buf->data, msg, to_write);
-	if (ret < 0)
-		goto out;
+		ret = hvs_send_data(hvs->chan, send_buf, to_write);
+		if (ret < 0)
+			goto out;
 
-	ret = hvs_send_data(hvs->chan, send_buf, to_write);
-	if (ret < 0)
-		goto out;
-
-	ret = to_write;
+		bytes_written += to_write;
+		len -= to_write;
+	}
 out:
+	/* If any data has been sent, return that */
+	if (bytes_written)
+		ret = bytes_written;
 	kfree(send_buf);
 	return ret;
 }
@@ -651,23 +735,8 @@
 static s64 hvs_stream_has_space(struct vsock_sock *vsk)
 {
 	struct hvsock *hvs = vsk->trans;
-	struct vmbus_channel *chan = hvs->chan;
-	s64 ret;
 
-	ret = hvs_channel_writable_bytes(chan);
-	if (ret > 0)  {
-		hvs_clear_channel_pending_send_size(chan);
-	} else {
-		/* See hvs_channel_cb() */
-		hvs_set_channel_pending_send_size(chan);
-
-		/* Re-check the writable bytes to avoid race */
-		ret = hvs_channel_writable_bytes(chan);
-		if (ret > 0)
-			hvs_clear_channel_pending_send_size(chan);
-	}
-
-	return ret;
+	return hvs_channel_writable_bytes(hvs->chan);
 }
 
 static u64 hvs_stream_rcvhiwat(struct vsock_sock *vsk)

diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 5d3cce9..082a309 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * virtio transport for vsock
  *
@@ -7,8 +8,6 @@
  *
  * Some of the code is take from Gerd Hoffmann <kraxel@redhat.com>'s
  * early virtio-vsock proof-of-concept bits.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
  */
 #include <linux/spinlock.h>
 #include <linux/module.h>
@@ -39,6 +38,7 @@
 	 * must be accessed with tx_lock held.
 	 */
 	struct mutex tx_lock;
+	bool tx_run;
 
 	struct work_struct send_pkt_work;
 	spinlock_t send_pkt_list_lock;
@@ -54,6 +54,7 @@
 	 * must be accessed with rx_lock held.
 	 */
 	struct mutex rx_lock;
+	bool rx_run;
 	int rx_buf_nr;
 	int rx_buf_max_nr;
 
@@ -61,21 +62,28 @@
 	 * vqs[VSOCK_VQ_EVENT] must be accessed with event_lock held.
 	 */
 	struct mutex event_lock;
+	bool event_run;
 	struct virtio_vsock_event event_list[8];
 
 	u32 guest_cid;
 };
 
-static struct virtio_vsock *virtio_vsock_get(void)
-{
-	return the_virtio_vsock;
-}
-
 static u32 virtio_transport_get_local_cid(void)
 {
-	struct virtio_vsock *vsock = virtio_vsock_get();
+	struct virtio_vsock *vsock;
+	u32 ret;
 
-	return vsock->guest_cid;
+	rcu_read_lock();
+	vsock = rcu_dereference(the_virtio_vsock);
+	if (!vsock) {
+		ret = VMADDR_CID_ANY;
+		goto out_rcu;
+	}
+
+	ret = vsock->guest_cid;
+out_rcu:
+	rcu_read_unlock();
+	return ret;
 }
 
 static void virtio_transport_loopback_work(struct work_struct *work)
@@ -89,6 +97,10 @@
 	spin_unlock_bh(&vsock->loopback_list_lock);
 
 	mutex_lock(&vsock->rx_lock);
+
+	if (!vsock->rx_run)
+		goto out;
+
 	while (!list_empty(&pkts)) {
 		struct virtio_vsock_pkt *pkt;
 
@@ -97,6 +109,7 @@
 
 		virtio_transport_recv_pkt(pkt);
 	}
+out:
 	mutex_unlock(&vsock->rx_lock);
 }
 
@@ -125,6 +138,9 @@
 
 	mutex_lock(&vsock->tx_lock);
 
+	if (!vsock->tx_run)
+		goto out;
+
 	vq = vsock->vqs[VSOCK_VQ_TX];
 
 	for (;;) {
@@ -183,6 +199,7 @@
 	if (added)
 		virtqueue_kick(vq);
 
+out:
 	mutex_unlock(&vsock->tx_lock);
 
 	if (restart_rx)
@@ -195,14 +212,18 @@
 	struct virtio_vsock *vsock;
 	int len = pkt->len;
 
-	vsock = virtio_vsock_get();
+	rcu_read_lock();
+	vsock = rcu_dereference(the_virtio_vsock);
 	if (!vsock) {
 		virtio_transport_free_pkt(pkt);
-		return -ENODEV;
+		len = -ENODEV;
+		goto out_rcu;
 	}
 
-	if (le64_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid)
-		return virtio_transport_send_pkt_loopback(vsock, pkt);
+	if (le64_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid) {
+		len = virtio_transport_send_pkt_loopback(vsock, pkt);
+		goto out_rcu;
+	}
 
 	if (pkt->reply)
 		atomic_inc(&vsock->queued_replies);
@@ -212,6 +233,9 @@
 	spin_unlock_bh(&vsock->send_pkt_list_lock);
 
 	queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work);
+
+out_rcu:
+	rcu_read_unlock();
 	return len;
 }
 
@@ -220,12 +244,14 @@
 {
 	struct virtio_vsock *vsock;
 	struct virtio_vsock_pkt *pkt, *n;
-	int cnt = 0;
+	int cnt = 0, ret;
 	LIST_HEAD(freeme);
 
-	vsock = virtio_vsock_get();
+	rcu_read_lock();
+	vsock = rcu_dereference(the_virtio_vsock);
 	if (!vsock) {
-		return -ENODEV;
+		ret = -ENODEV;
+		goto out_rcu;
 	}
 
 	spin_lock_bh(&vsock->send_pkt_list_lock);
@@ -253,7 +279,11 @@
 			queue_work(virtio_vsock_workqueue, &vsock->rx_work);
 	}
 
-	return 0;
+	ret = 0;
+
+out_rcu:
+	rcu_read_unlock();
+	return ret;
 }
 
 static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
@@ -277,6 +307,7 @@
 			break;
 		}
 
+		pkt->buf_len = buf_len;
 		pkt->len = buf_len;
 
 		sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
@@ -305,6 +336,10 @@
 
 	vq = vsock->vqs[VSOCK_VQ_TX];
 	mutex_lock(&vsock->tx_lock);
+
+	if (!vsock->tx_run)
+		goto out;
+
 	do {
 		struct virtio_vsock_pkt *pkt;
 		unsigned int len;
@@ -315,6 +350,8 @@
 			added = true;
 		}
 	} while (!virtqueue_enable_cb(vq));
+
+out:
 	mutex_unlock(&vsock->tx_lock);
 
 	if (added)
@@ -343,6 +380,9 @@
 
 	mutex_lock(&vsock->rx_lock);
 
+	if (!vsock->rx_run)
+		goto out;
+
 	do {
 		virtqueue_disable_cb(vq);
 		for (;;) {
@@ -452,6 +492,9 @@
 
 	mutex_lock(&vsock->event_lock);
 
+	if (!vsock->event_run)
+		goto out;
+
 	do {
 		struct virtio_vsock_event *event;
 		unsigned int len;
@@ -466,7 +509,7 @@
 	} while (!virtqueue_enable_cb(vq));
 
 	virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]);
-
+out:
 	mutex_unlock(&vsock->event_lock);
 }
 
@@ -563,7 +606,8 @@
 		return ret;
 
 	/* Only one virtio-vsock device per guest is supported */
-	if (the_virtio_vsock) {
+	if (rcu_dereference_protected(the_virtio_vsock,
+				lockdep_is_held(&the_virtio_vsock_mutex))) {
 		ret = -EBUSY;
 		goto out;
 	}
@@ -584,16 +628,10 @@
 
 	virtio_vsock_update_guest_cid(vsock);
 
-	ret = vsock_core_init(&virtio_transport.transport);
-	if (ret < 0)
-		goto out_vqs;
-
 	vsock->rx_buf_nr = 0;
 	vsock->rx_buf_max_nr = 0;
 	atomic_set(&vsock->queued_replies, 0);
 
-	vdev->priv = vsock;
-	the_virtio_vsock = vsock;
 	mutex_init(&vsock->tx_lock);
 	mutex_init(&vsock->rx_lock);
 	mutex_init(&vsock->event_lock);
@@ -607,19 +645,26 @@
 	INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work);
 	INIT_WORK(&vsock->loopback_work, virtio_transport_loopback_work);
 
+	mutex_lock(&vsock->tx_lock);
+	vsock->tx_run = true;
+	mutex_unlock(&vsock->tx_lock);
+
 	mutex_lock(&vsock->rx_lock);
 	virtio_vsock_rx_fill(vsock);
+	vsock->rx_run = true;
 	mutex_unlock(&vsock->rx_lock);
 
 	mutex_lock(&vsock->event_lock);
 	virtio_vsock_event_fill(vsock);
+	vsock->event_run = true;
 	mutex_unlock(&vsock->event_lock);
 
+	vdev->priv = vsock;
+	rcu_assign_pointer(the_virtio_vsock, vsock);
+
 	mutex_unlock(&the_virtio_vsock_mutex);
 	return 0;
 
-out_vqs:
-	vsock->vdev->config->del_vqs(vsock->vdev);
 out:
 	kfree(vsock);
 	mutex_unlock(&the_virtio_vsock_mutex);
@@ -631,12 +676,33 @@
 	struct virtio_vsock *vsock = vdev->priv;
 	struct virtio_vsock_pkt *pkt;
 
-	flush_work(&vsock->loopback_work);
-	flush_work(&vsock->rx_work);
-	flush_work(&vsock->tx_work);
-	flush_work(&vsock->event_work);
-	flush_work(&vsock->send_pkt_work);
+	mutex_lock(&the_virtio_vsock_mutex);
 
+	vdev->priv = NULL;
+	rcu_assign_pointer(the_virtio_vsock, NULL);
+	synchronize_rcu();
+
+	/* Reset all connected sockets when the device disappear */
+	vsock_for_each_connected_socket(virtio_vsock_reset_sock);
+
+	/* Stop all work handlers to make sure no one is accessing the device,
+	 * so we can safely call vdev->config->reset().
+	 */
+	mutex_lock(&vsock->rx_lock);
+	vsock->rx_run = false;
+	mutex_unlock(&vsock->rx_lock);
+
+	mutex_lock(&vsock->tx_lock);
+	vsock->tx_run = false;
+	mutex_unlock(&vsock->tx_lock);
+
+	mutex_lock(&vsock->event_lock);
+	vsock->event_run = false;
+	mutex_unlock(&vsock->event_lock);
+
+	/* Flush all device writes and interrupts, device will not use any
+	 * more buffers.
+	 */
 	vdev->config->reset(vdev);
 
 	mutex_lock(&vsock->rx_lock);
@@ -667,13 +733,20 @@
 	}
 	spin_unlock_bh(&vsock->loopback_list_lock);
 
-	mutex_lock(&the_virtio_vsock_mutex);
-	the_virtio_vsock = NULL;
-	vsock_core_exit();
-	mutex_unlock(&the_virtio_vsock_mutex);
-
+	/* Delete virtqueues and flush outstanding callbacks if any */
 	vdev->config->del_vqs(vdev);
 
+	/* Other works can be queued before 'config->del_vqs()', so we flush
+	 * all works before to free the vsock object to avoid use after free.
+	 */
+	flush_work(&vsock->loopback_work);
+	flush_work(&vsock->rx_work);
+	flush_work(&vsock->tx_work);
+	flush_work(&vsock->event_work);
+	flush_work(&vsock->send_pkt_work);
+
+	mutex_unlock(&the_virtio_vsock_mutex);
+
 	kfree(vsock);
 }
 
@@ -702,15 +775,28 @@
 	virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0);
 	if (!virtio_vsock_workqueue)
 		return -ENOMEM;
+
+	ret = vsock_core_init(&virtio_transport.transport);
+	if (ret)
+		goto out_wq;
+
 	ret = register_virtio_driver(&virtio_vsock_driver);
 	if (ret)
-		destroy_workqueue(virtio_vsock_workqueue);
+		goto out_vci;
+
+	return 0;
+
+out_vci:
+	vsock_core_exit();
+out_wq:
+	destroy_workqueue(virtio_vsock_workqueue);
 	return ret;
 }
 
 static void __exit virtio_vsock_exit(void)
 {
 	unregister_virtio_driver(&virtio_vsock_driver);
+	vsock_core_exit();
 	destroy_workqueue(virtio_vsock_workqueue);
 }
 

diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 3ae3a33..fb2060d 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c

@@ -1,11 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * common code for virtio vsock
  *
  * Copyright (C) 2013-2015 Red Hat, Inc.
  * Author: Asias He <asias@redhat.com>
  *         Stefan Hajnoczi <stefanha@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
  */
 #include <linux/spinlock.h>
 #include <linux/module.h>
@@ -27,6 +26,9 @@
 /* How long to wait for graceful shutdown of a connection */
 #define VSOCK_CLOSE_TIMEOUT (8 * HZ)
 
+/* Threshold for detecting small packets to copy */
+#define GOOD_COPY_LEN  128
+
 static const struct virtio_transport *virtio_transport_get_ops(void)
 {
 	const struct vsock_transport *t = vsock_core_get_transport();
@@ -65,6 +67,9 @@
 		pkt->buf = kmalloc(len, GFP_KERNEL);
 		if (!pkt->buf)
 			goto out_pkt;
+
+		pkt->buf_len = len;
+
 		err = memcpy_from_msg(pkt->buf, info->msg, len);
 		if (err)
 			goto out;
@@ -92,8 +97,17 @@
 	struct virtio_vsock_pkt *pkt = opaque;
 	struct af_vsockmon_hdr *hdr;
 	struct sk_buff *skb;
+	size_t payload_len;
+	void *payload_buf;
 
-	skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + pkt->len,
+	/* A packet could be split to fit the RX buffer, so we can retrieve
+	 * the payload length from the header and the buffer pointer taking
+	 * care of the offset in the original packet.
+	 */
+	payload_len = le32_to_cpu(pkt->hdr.len);
+	payload_buf = pkt->buf + pkt->off;
+
+	skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + payload_len,
 			GFP_ATOMIC);
 	if (!skb)
 		return NULL;
@@ -133,8 +147,8 @@
 
 	skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr));
 
-	if (pkt->len) {
-		skb_put_data(skb, pkt->buf, pkt->len);
+	if (payload_len) {
+		skb_put_data(skb, payload_buf, payload_len);
 	}
 
 	return skb;
@@ -167,8 +181,8 @@
 	vvs = vsk->trans;
 
 	/* we can send less than pkt_len bytes */
-	if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE)
-		pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
+	if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
+		pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
 
 	/* virtio_transport_get_credit might return less than pkt_len credit */
 	pkt_len = virtio_transport_get_credit(vvs, pkt_len);
@@ -190,10 +204,14 @@
 	return virtio_transport_get_ops()->send_pkt(pkt);
 }
 
-static void virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
+static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
 					struct virtio_vsock_pkt *pkt)
 {
+	if (vvs->rx_bytes + pkt->len > vvs->buf_alloc)
+		return false;
+
 	vvs->rx_bytes += pkt->len;
+	return true;
 }
 
 static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
@@ -205,10 +223,11 @@
 
 void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt)
 {
-	spin_lock_bh(&vvs->tx_lock);
+	spin_lock_bh(&vvs->rx_lock);
+	vvs->last_fwd_cnt = vvs->fwd_cnt;
 	pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt);
 	pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc);
-	spin_unlock_bh(&vvs->tx_lock);
+	spin_unlock_bh(&vvs->rx_lock);
 }
 EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt);
 
@@ -256,6 +275,7 @@
 	struct virtio_vsock_sock *vvs = vsk->trans;
 	struct virtio_vsock_pkt *pkt;
 	size_t bytes, total = 0;
+	u32 free_space;
 	int err = -EFAULT;
 
 	spin_lock_bh(&vvs->rx_lock);
@@ -286,11 +306,24 @@
 			virtio_transport_free_pkt(pkt);
 		}
 	}
+
+	free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
+
 	spin_unlock_bh(&vvs->rx_lock);
 
-	/* Send a credit pkt to peer */
-	virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM,
-					    NULL);
+	/* To reduce the number of credit update messages,
+	 * don't update credits as long as lots of space is available.
+	 * Note: the limit chosen here is arbitrary. Setting the limit
+	 * too high causes extra messages. Too low causes transmitter
+	 * stalls. As stalls are in theory more expensive than extra
+	 * messages, we set the limit to a high value. TODO: experiment
+	 * with different values.
+	 */
+	if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
+		virtio_transport_send_credit_update(vsk,
+						    VIRTIO_VSOCK_TYPE_STREAM,
+						    NULL);
+	}
 
 	return total;
 
@@ -429,6 +462,9 @@
 		vvs->buf_size_max = val;
 	vvs->buf_size = val;
 	vvs->buf_alloc = val;
+
+	virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM,
+					    NULL);
 }
 EXPORT_SYMBOL_GPL(virtio_transport_set_buffer_size);
 
@@ -662,6 +698,8 @@
  */
 static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt)
 {
+	const struct virtio_transport *t;
+	struct virtio_vsock_pkt *reply;
 	struct virtio_vsock_pkt_info info = {
 		.op = VIRTIO_VSOCK_OP_RST,
 		.type = le16_to_cpu(pkt->hdr.type),
@@ -672,15 +710,21 @@
 	if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST)
 		return 0;
 
-	pkt = virtio_transport_alloc_pkt(&info, 0,
-					 le64_to_cpu(pkt->hdr.dst_cid),
-					 le32_to_cpu(pkt->hdr.dst_port),
-					 le64_to_cpu(pkt->hdr.src_cid),
-					 le32_to_cpu(pkt->hdr.src_port));
-	if (!pkt)
+	reply = virtio_transport_alloc_pkt(&info, 0,
+					   le64_to_cpu(pkt->hdr.dst_cid),
+					   le32_to_cpu(pkt->hdr.dst_port),
+					   le64_to_cpu(pkt->hdr.src_cid),
+					   le32_to_cpu(pkt->hdr.src_port));
+	if (!reply)
 		return -ENOMEM;
 
-	return virtio_transport_get_ops()->send_pkt(pkt);
+	t = virtio_transport_get_ops();
+	if (!t) {
+		virtio_transport_free_pkt(reply);
+		return -ENOTCONN;
+	}
+
+	return t->send_pkt(reply);
 }
 
 static void virtio_transport_wait_close(struct sock *sk, long timeout)
@@ -778,12 +822,19 @@
 
 void virtio_transport_release(struct vsock_sock *vsk)
 {
+	struct virtio_vsock_sock *vvs = vsk->trans;
+	struct virtio_vsock_pkt *pkt, *tmp;
 	struct sock *sk = &vsk->sk;
 	bool remove_sock = true;
 
-	lock_sock(sk);
+	lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
 	if (sk->sk_type == SOCK_STREAM)
 		remove_sock = virtio_transport_close(vsk);
+
+	list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) {
+		list_del(&pkt->list);
+		virtio_transport_free_pkt(pkt);
+	}
 	release_sock(sk);
 
 	if (remove_sock)
@@ -827,24 +878,64 @@
 	return err;
 }
 
+static void
+virtio_transport_recv_enqueue(struct vsock_sock *vsk,
+			      struct virtio_vsock_pkt *pkt)
+{
+	struct virtio_vsock_sock *vvs = vsk->trans;
+	bool can_enqueue, free_pkt = false;
+
+	pkt->len = le32_to_cpu(pkt->hdr.len);
+	pkt->off = 0;
+
+	spin_lock_bh(&vvs->rx_lock);
+
+	can_enqueue = virtio_transport_inc_rx_pkt(vvs, pkt);
+	if (!can_enqueue) {
+		free_pkt = true;
+		goto out;
+	}
+
+	/* Try to copy small packets into the buffer of last packet queued,
+	 * to avoid wasting memory queueing the entire buffer with a small
+	 * payload.
+	 */
+	if (pkt->len <= GOOD_COPY_LEN && !list_empty(&vvs->rx_queue)) {
+		struct virtio_vsock_pkt *last_pkt;
+
+		last_pkt = list_last_entry(&vvs->rx_queue,
+					   struct virtio_vsock_pkt, list);
+
+		/* If there is space in the last packet queued, we copy the
+		 * new packet in its buffer.
+		 */
+		if (pkt->len <= last_pkt->buf_len - last_pkt->len) {
+			memcpy(last_pkt->buf + last_pkt->len, pkt->buf,
+			       pkt->len);
+			last_pkt->len += pkt->len;
+			free_pkt = true;
+			goto out;
+		}
+	}
+
+	list_add_tail(&pkt->list, &vvs->rx_queue);
+
+out:
+	spin_unlock_bh(&vvs->rx_lock);
+	if (free_pkt)
+		virtio_transport_free_pkt(pkt);
+}
+
 static int
 virtio_transport_recv_connected(struct sock *sk,
 				struct virtio_vsock_pkt *pkt)
 {
 	struct vsock_sock *vsk = vsock_sk(sk);
-	struct virtio_vsock_sock *vvs = vsk->trans;
 	int err = 0;
 
 	switch (le16_to_cpu(pkt->hdr.op)) {
 	case VIRTIO_VSOCK_OP_RW:
-		pkt->len = le32_to_cpu(pkt->hdr.len);
-		pkt->off = 0;
-
-		spin_lock_bh(&vvs->rx_lock);
-		virtio_transport_inc_rx_pkt(vvs, pkt);
-		list_add_tail(&pkt->list, &vvs->rx_queue);
-		spin_unlock_bh(&vvs->rx_lock);
-
+		virtio_transport_recv_enqueue(vsk, pkt);
 		sk->sk_data_ready(sk);
 		return err;
 	case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
@@ -856,8 +947,12 @@
 		if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND)
 			vsk->peer_shutdown |= SEND_SHUTDOWN;
 		if (vsk->peer_shutdown == SHUTDOWN_MASK &&
-		    vsock_stream_has_data(vsk) <= 0)
-			sk->sk_state = TCP_CLOSING;
+		    vsock_stream_has_data(vsk) <= 0 &&
+		    !sock_flag(sk, SOCK_DONE)) {
+			(void)virtio_transport_reset(vsk, NULL);
+
+			virtio_transport_do_close(vsk, true);
+		}
 		if (le32_to_cpu(pkt->hdr.flags))
 			sk->sk_state_change(sk);
 		break;

diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index cb332ad..8c9c4ed 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c

@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * VMware vSockets Driver
  *
  * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation version 2 and no later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
  */
 
 #include <linux/types.h>
@@ -264,6 +256,31 @@
 }
 
 static int
+vmci_transport_alloc_send_control_pkt(struct sockaddr_vm *src,
+				      struct sockaddr_vm *dst,
+				      enum vmci_transport_packet_type type,
+				      u64 size,
+				      u64 mode,
+				      struct vmci_transport_waiting_info *wait,
+				      u16 proto,
+				      struct vmci_handle handle)
+{
+	struct vmci_transport_packet *pkt;
+	int err;
+
+	pkt = kmalloc(sizeof(*pkt), GFP_KERNEL);
+	if (!pkt)
+		return -ENOMEM;
+
+	err = __vmci_transport_send_control_pkt(pkt, src, dst, type, size,
+						mode, wait, proto, handle,
+						true);
+	kfree(pkt);
+
+	return err;
+}
+
+static int
 vmci_transport_send_control_pkt(struct sock *sk,
 				enum vmci_transport_packet_type type,
 				u64 size,
@@ -272,9 +289,7 @@
 				u16 proto,
 				struct vmci_handle handle)
 {
-	struct vmci_transport_packet *pkt;
 	struct vsock_sock *vsk;
-	int err;
 
 	vsk = vsock_sk(sk);
 
@@ -284,17 +299,10 @@
 	if (!vsock_addr_bound(&vsk->remote_addr))
 		return -EINVAL;
 
-	pkt = kmalloc(sizeof(*pkt), GFP_KERNEL);
-	if (!pkt)
-		return -ENOMEM;
-
-	err = __vmci_transport_send_control_pkt(pkt, &vsk->local_addr,
-						&vsk->remote_addr, type, size,
-						mode, wait, proto, handle,
-						true);
-	kfree(pkt);
-
-	return err;
+	return vmci_transport_alloc_send_control_pkt(&vsk->local_addr,
+						     &vsk->remote_addr,
+						     type, size, mode,
+						     wait, proto, handle);
 }
 
 static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst,
@@ -312,12 +320,29 @@
 static int vmci_transport_send_reset(struct sock *sk,
 				     struct vmci_transport_packet *pkt)
 {
+	struct sockaddr_vm *dst_ptr;
+	struct sockaddr_vm dst;
+	struct vsock_sock *vsk;
+
 	if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
 		return 0;
-	return vmci_transport_send_control_pkt(sk,
-					VMCI_TRANSPORT_PACKET_TYPE_RST,
-					0, 0, NULL, VSOCK_PROTO_INVALID,
-					VMCI_INVALID_HANDLE);
+
+	vsk = vsock_sk(sk);
+
+	if (!vsock_addr_bound(&vsk->local_addr))
+		return -EINVAL;
+
+	if (vsock_addr_bound(&vsk->remote_addr)) {
+		dst_ptr = &vsk->remote_addr;
+	} else {
+		vsock_addr_init(&dst, pkt->dg.src.context,
+				pkt->src_port);
+		dst_ptr = &dst;
+	}
+	return vmci_transport_alloc_send_control_pkt(&vsk->local_addr, dst_ptr,
+					     VMCI_TRANSPORT_PACKET_TYPE_RST,
+					     0, 0, NULL, VSOCK_PROTO_INVALID,
+					     VMCI_INVALID_HANDLE);
 }
 
 static int vmci_transport_send_negotiate(struct sock *sk, size_t size)
@@ -1618,6 +1643,10 @@
 
 static void vmci_transport_destruct(struct vsock_sock *vsk)
 {
+	/* transport can be NULL if we hit a failure at init() time */
+	if (!vmci_trans(vsk))
+		return;
+
 	/* Ensure that the detach callback doesn't use the sk/vsk
 	 * we are about to destruct.
 	 */

diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h
index 1820e74..1ca1e86 100644
--- a/net/vmw_vsock/vmci_transport.h
+++ b/net/vmw_vsock/vmci_transport.h

@@ -1,16 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * VMware vSockets Driver
  *
  * Copyright (C) 2013 VMware, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation version 2 and no later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
  */
 
 #ifndef _VMCI_TRANSPORT_H_

diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c
index 41fb427..d69fc4b 100644
--- a/net/vmw_vsock/vmci_transport_notify.c
+++ b/net/vmw_vsock/vmci_transport_notify.c

@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * VMware vSockets Driver
  *
  * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation version 2 and no later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
  */
 
 #include <linux/types.h>

diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h
index 3c464d3..7843f08 100644
--- a/net/vmw_vsock/vmci_transport_notify.h
+++ b/net/vmw_vsock/vmci_transport_notify.h

@@ -1,16 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * VMware vSockets Driver
  *
  * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation version 2 and no later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
  */
 
 #ifndef __VMCI_TRANSPORT_NOTIFY_H__

diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c
index 0cc84f2..0f36d7c 100644
--- a/net/vmw_vsock/vmci_transport_notify_qstate.c
+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c

@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * VMware vSockets Driver
  *
  * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation version 2 and no later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
  */
 
 #include <linux/types.h>

diff --git a/net/vmw_vsock/vsock_addr.c b/net/vmw_vsock/vsock_addr.c
index 82486ee..909de26 100644
--- a/net/vmw_vsock/vsock_addr.c
+++ b/net/vmw_vsock/vsock_addr.c

@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * VMware vSockets Driver
  *
  * Copyright (C) 2007-2012 VMware, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation version 2 and no later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
  */
 
 #include <linux/types.h>

diff --git a/net/wimax/Kconfig b/net/wimax/Kconfig
index e4d97ab..d13762b 100644
--- a/net/wimax/Kconfig
+++ b/net/wimax/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # WiMAX LAN device configuration
 #

diff --git a/net/wimax/debug-levels.h b/net/wimax/debug-levels.h
index 0975adb..ebc287c 100644
--- a/net/wimax/debug-levels.h
+++ b/net/wimax/debug-levels.h

@@ -1,24 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Linux WiMAX Stack
  * Debug levels control file for the wimax module
  *
- *
  * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com>
  * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
  */
 #ifndef __debug_levels__h__
 #define __debug_levels__h__

diff --git a/net/wimax/debugfs.c b/net/wimax/debugfs.c
index 2451484..3c54bb6 100644
--- a/net/wimax/debugfs.c
+++ b/net/wimax/debugfs.c

@@ -1,24 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Linux WiMAX
  * Debugfs support
  *
- *
  * Copyright (C) 2005-2006 Intel Corporation <linux-wimax@intel.com>
  * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
  */
 #include <linux/debugfs.h>
 #include <linux/wimax.h>
@@ -27,49 +13,23 @@
 #define D_SUBMODULE debugfs
 #include "debug-levels.h"
 
-
-#define __debugfs_register(prefix, name, parent)			\
-do {									\
-	result = d_level_register_debugfs(prefix, name, parent);	\
-	if (result < 0)							\
-		goto error;						\
-} while (0)
-
-
-int wimax_debugfs_add(struct wimax_dev *wimax_dev)
+void wimax_debugfs_add(struct wimax_dev *wimax_dev)
 {
-	int result;
 	struct net_device *net_dev = wimax_dev->net_dev;
-	struct device *dev = net_dev->dev.parent;
 	struct dentry *dentry;
 	char buf[128];
 
 	snprintf(buf, sizeof(buf), "wimax:%s", net_dev->name);
 	dentry = debugfs_create_dir(buf, NULL);
-	result = PTR_ERR(dentry);
-	if (IS_ERR(dentry)) {
-		if (result == -ENODEV)
-			result = 0;	/* No debugfs support */
-		else
-			dev_err(dev, "Can't create debugfs dentry: %d\n",
-				result);
-		goto out;
-	}
 	wimax_dev->debugfs_dentry = dentry;
-	__debugfs_register("wimax_dl_", debugfs, dentry);
-	__debugfs_register("wimax_dl_", id_table, dentry);
-	__debugfs_register("wimax_dl_", op_msg, dentry);
-	__debugfs_register("wimax_dl_", op_reset, dentry);
-	__debugfs_register("wimax_dl_", op_rfkill, dentry);
-	__debugfs_register("wimax_dl_", op_state_get, dentry);
-	__debugfs_register("wimax_dl_", stack, dentry);
-	result = 0;
-out:
-	return result;
 
-error:
-	debugfs_remove_recursive(wimax_dev->debugfs_dentry);
-	return result;
+	d_level_register_debugfs("wimax_dl_", debugfs, dentry);
+	d_level_register_debugfs("wimax_dl_", id_table, dentry);
+	d_level_register_debugfs("wimax_dl_", op_msg, dentry);
+	d_level_register_debugfs("wimax_dl_", op_reset, dentry);
+	d_level_register_debugfs("wimax_dl_", op_rfkill, dentry);
+	d_level_register_debugfs("wimax_dl_", op_state_get, dentry);
+	d_level_register_debugfs("wimax_dl_", stack, dentry);
 }
 
 void wimax_debugfs_rm(struct wimax_dev *wimax_dev)

diff --git a/net/wimax/id-table.c b/net/wimax/id-table.c
index a21508d..02eee37 100644
--- a/net/wimax/id-table.c
+++ b/net/wimax/id-table.c

@@ -1,26 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Linux WiMAX
  * Mappping of generic netlink family IDs to net devices
  *
- *
  * Copyright (C) 2005-2006 Intel Corporation <linux-wimax@intel.com>
  * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
  * We assign a single generic netlink family ID to each device (to
  * simplify lookup).
  *

diff --git a/net/wimax/op-msg.c b/net/wimax/op-msg.c
index 101b2fa..6460b57 100644
--- a/net/wimax/op-msg.c
+++ b/net/wimax/op-msg.c

@@ -1,26 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Linux WiMAX
  * Generic messaging interface between userspace and driver/device
  *
- *
  * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com>
  * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
  * This implements a direct communication channel between user space and
  * the driver/device, by which free form messages can be sent back and
  * forth.

diff --git a/net/wimax/op-reset.c b/net/wimax/op-reset.c
index a420791..9899b2e 100644
--- a/net/wimax/op-reset.c
+++ b/net/wimax/op-reset.c

@@ -1,26 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Linux WiMAX
  * Implement and export a method for resetting a WiMAX device
  *
- *
  * Copyright (C) 2008 Intel Corporation <linux-wimax@intel.com>
  * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
  * This implements a simple synchronous call to reset a WiMAX device.
  *
  * Resets aim at being warm, keeping the device handles active;

diff --git a/net/wimax/op-rfkill.c b/net/wimax/op-rfkill.c
index 477364a..248d10b 100644
--- a/net/wimax/op-rfkill.c
+++ b/net/wimax/op-rfkill.c

@@ -1,26 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Linux WiMAX
  * RF-kill framework integration
  *
- *
  * Copyright (C) 2008 Intel Corporation <linux-wimax@intel.com>
  * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
  * This integrates into the Linux Kernel rfkill susbystem so that the
  * drivers just have to do the bare minimal work, which is providing a
  * method to set the software RF-Kill switch and to report changes in

diff --git a/net/wimax/op-state-get.c b/net/wimax/op-state-get.c
index e6788d2..5bc712d 100644
--- a/net/wimax/op-state-get.c
+++ b/net/wimax/op-state-get.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Linux WiMAX
  * Implement and export a method for getting a WiMAX device current state
@@ -7,20 +8,6 @@
  * Based on previous WiMAX core work by:
  *  Copyright (C) 2008 Intel Corporation <linux-wimax@intel.com>
  *  Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
  */
 
 #include <net/wimax.h>

diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index a630781..4b9b1c5 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c

@@ -1,26 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Linux WiMAX
  * Initialization, addition and removal of wimax devices
  *
- *
  * Copyright (C) 2005-2006 Intel Corporation <linux-wimax@intel.com>
  * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
  * This implements:
  *
  *   - basic life cycle of 'struct wimax_dev' [wimax_dev_*()]; on
@@ -419,26 +404,26 @@
 static const struct genl_ops wimax_gnl_ops[] = {
 	{
 		.cmd = WIMAX_GNL_OP_MSG_FROM_USER,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM,
-		.policy = wimax_gnl_policy,
 		.doit = wimax_gnl_doit_msg_from_user,
 	},
 	{
 		.cmd = WIMAX_GNL_OP_RESET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM,
-		.policy = wimax_gnl_policy,
 		.doit = wimax_gnl_doit_reset,
 	},
 	{
 		.cmd = WIMAX_GNL_OP_RFKILL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM,
-		.policy = wimax_gnl_policy,
 		.doit = wimax_gnl_doit_rfkill,
 	},
 	{
 		.cmd = WIMAX_GNL_OP_STATE_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_ADMIN_PERM,
-		.policy = wimax_gnl_policy,
 		.doit = wimax_gnl_doit_state_get,
 	},
 };
@@ -496,12 +481,7 @@
 	/* Set up user-space interaction */
 	mutex_lock(&wimax_dev->mutex);
 	wimax_id_table_add(wimax_dev);
-	result = wimax_debugfs_add(wimax_dev);
-	if (result < 0) {
-		dev_err(dev, "cannot initialize debugfs: %d\n",
-			result);
-		goto error_debugfs_add;
-	}
+	wimax_debugfs_add(wimax_dev);
 
 	__wimax_state_set(wimax_dev, WIMAX_ST_DOWN);
 	mutex_unlock(&wimax_dev->mutex);
@@ -513,10 +493,6 @@
 	d_fnend(3, dev, "(wimax_dev %p net_dev %p) = 0\n", wimax_dev, net_dev);
 	return 0;
 
-error_debugfs_add:
-	wimax_id_table_rm(wimax_dev);
-	mutex_unlock(&wimax_dev->mutex);
-	wimax_rfkill_rm(wimax_dev);
 error_rfkill_add:
 	d_fnend(3, dev, "(wimax_dev %p net_dev %p) = %d\n",
 		wimax_dev, net_dev, result);
@@ -582,6 +558,7 @@
 	.version = WIMAX_GNL_VERSION,
 	.hdrsize = 0,
 	.maxattr = WIMAX_GNL_ATTR_MAX,
+	.policy = wimax_gnl_policy,
 	.module = THIS_MODULE,
 	.ops = wimax_gnl_ops,
 	.n_ops = ARRAY_SIZE(wimax_gnl_ops),

diff --git a/net/wimax/wimax-internal.h b/net/wimax/wimax-internal.h
index 733c4bf..4075120 100644
--- a/net/wimax/wimax-internal.h
+++ b/net/wimax/wimax-internal.h

@@ -1,26 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Linux WiMAX
  * Internal API for kernel space WiMAX stack
  *
- *
  * Copyright (C) 2007 Intel Corporation <linux-wimax@intel.com>
  * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
  * This header file is for declarations and definitions internal to
  * the WiMAX stack. For public APIs and documentation, see
  * include/net/wimax.h and include/linux/wimax.h.
@@ -72,13 +57,10 @@
 void __wimax_state_change(struct wimax_dev *, enum wimax_st);
 
 #ifdef CONFIG_DEBUG_FS
-int wimax_debugfs_add(struct wimax_dev *);
+void wimax_debugfs_add(struct wimax_dev *);
 void wimax_debugfs_rm(struct wimax_dev *);
 #else
-static inline int wimax_debugfs_add(struct wimax_dev *wimax_dev)
-{
-	return 0;
-}
+static inline void wimax_debugfs_add(struct wimax_dev *wimax_dev) {}
 static inline void wimax_debugfs_rm(struct wimax_dev *wimax_dev) {}
 #endif
 

diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 4172204..63cf713 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config WIRELESS_EXT
 	bool
 
@@ -165,7 +166,7 @@
 
 	  If this causes your applications to misbehave you should fix your
 	  applications instead -- they need to register their network
-	  latency requirement, see Documentation/power/pm_qos_interface.txt.
+	  latency requirement, see Documentation/power/pm_qos_interface.rst.
 
 config CFG80211_DEBUGFS
 	bool "cfg80211 DebugFS entries"
@@ -212,12 +213,16 @@
 
 config LIB80211_CRYPT_WEP
 	tristate
+	select CRYPTO_LIB_ARC4
 
 config LIB80211_CRYPT_CCMP
 	tristate
+	select CRYPTO_AES
+	select CRYPTO_CCM
 
 config LIB80211_CRYPT_TKIP
 	tristate
+	select CRYPTO_LIB_ARC4
 
 config LIB80211_DEBUG
 	bool "lib80211 debugging messages"

diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 1d84f91..2eee939 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile

@@ -12,6 +12,7 @@
 
 cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o
 cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o mesh.o ap.o trace.o ocb.o
+cfg80211-y += pmsr.o
 cfg80211-$(CONFIG_OF) += of.o
 cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o
 cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o
@@ -38,6 +39,7 @@
 	@(set -e; \
 	  allf=""; \
 	  for f in $^ ; do \
+	      test -f $$f || continue;\
 	      # similar to hexdump -v -e '1/1 "0x%.2x," "\n"' \
 	      thisf=$$(od -An -v -tx1 < $$f | \
 	                   sed -e 's/ /\n/g' | \

diff --git a/net/wireless/ap.c b/net/wireless/ap.c
index 882d97b..550ac9d 100644
--- a/net/wireless/ap.c
+++ b/net/wireless/ap.c

@@ -41,6 +41,8 @@
 		cfg80211_sched_dfs_chan_update(rdev);
 	}
 
+	schedule_work(&cfg80211_disconnect_work);
+
 	return err;
 }
 

diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 2db713d..fcac5c6 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c

@@ -6,6 +6,7 @@
  *
  * Copyright 2009	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
+ * Copyright 2018       Intel Corporation
  */
 
 #include <linux/export.h>
@@ -13,6 +14,11 @@
 #include "core.h"
 #include "rdev-ops.h"
 
+static bool cfg80211_valid_60g_freq(u32 freq)
+{
+	return freq >= 58320 && freq <= 70200;
+}
+
 void cfg80211_chandef_create(struct cfg80211_chan_def *chandef,
 			     struct ieee80211_channel *chan,
 			     enum nl80211_channel_type chan_type)
@@ -22,6 +28,8 @@
 
 	chandef->chan = chan;
 	chandef->center_freq2 = 0;
+	chandef->edmg.bw_config = 0;
+	chandef->edmg.channels = 0;
 
 	switch (chan_type) {
 	case NL80211_CHAN_NO_HT:
@@ -46,6 +54,91 @@
 }
 EXPORT_SYMBOL(cfg80211_chandef_create);
 
+static bool cfg80211_edmg_chandef_valid(const struct cfg80211_chan_def *chandef)
+{
+	int max_contiguous = 0;
+	int num_of_enabled = 0;
+	int contiguous = 0;
+	int i;
+
+	if (!chandef->edmg.channels || !chandef->edmg.bw_config)
+		return false;
+
+	if (!cfg80211_valid_60g_freq(chandef->chan->center_freq))
+		return false;
+
+	for (i = 0; i < 6; i++) {
+		if (chandef->edmg.channels & BIT(i)) {
+			contiguous++;
+			num_of_enabled++;
+		} else {
+			contiguous = 0;
+		}
+
+		max_contiguous = max(contiguous, max_contiguous);
+	}
+	/* basic verification of edmg configuration according to
+	 * IEEE P802.11ay/D4.0 section 9.4.2.251
+	 */
+	/* check bw_config against contiguous edmg channels */
+	switch (chandef->edmg.bw_config) {
+	case IEEE80211_EDMG_BW_CONFIG_4:
+	case IEEE80211_EDMG_BW_CONFIG_8:
+	case IEEE80211_EDMG_BW_CONFIG_12:
+		if (max_contiguous < 1)
+			return false;
+		break;
+	case IEEE80211_EDMG_BW_CONFIG_5:
+	case IEEE80211_EDMG_BW_CONFIG_9:
+	case IEEE80211_EDMG_BW_CONFIG_13:
+		if (max_contiguous < 2)
+			return false;
+		break;
+	case IEEE80211_EDMG_BW_CONFIG_6:
+	case IEEE80211_EDMG_BW_CONFIG_10:
+	case IEEE80211_EDMG_BW_CONFIG_14:
+		if (max_contiguous < 3)
+			return false;
+		break;
+	case IEEE80211_EDMG_BW_CONFIG_7:
+	case IEEE80211_EDMG_BW_CONFIG_11:
+	case IEEE80211_EDMG_BW_CONFIG_15:
+		if (max_contiguous < 4)
+			return false;
+		break;
+
+	default:
+		return false;
+	}
+
+	/* check bw_config against aggregated (non contiguous) edmg channels */
+	switch (chandef->edmg.bw_config) {
+	case IEEE80211_EDMG_BW_CONFIG_4:
+	case IEEE80211_EDMG_BW_CONFIG_5:
+	case IEEE80211_EDMG_BW_CONFIG_6:
+	case IEEE80211_EDMG_BW_CONFIG_7:
+		break;
+	case IEEE80211_EDMG_BW_CONFIG_8:
+	case IEEE80211_EDMG_BW_CONFIG_9:
+	case IEEE80211_EDMG_BW_CONFIG_10:
+	case IEEE80211_EDMG_BW_CONFIG_11:
+		if (num_of_enabled < 2)
+			return false;
+		break;
+	case IEEE80211_EDMG_BW_CONFIG_12:
+	case IEEE80211_EDMG_BW_CONFIG_13:
+	case IEEE80211_EDMG_BW_CONFIG_14:
+	case IEEE80211_EDMG_BW_CONFIG_15:
+		if (num_of_enabled < 4 || max_contiguous < 2)
+			return false;
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
 bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef)
 {
 	u32 control_freq;
@@ -111,6 +204,15 @@
 		return false;
 	}
 
+	/* channel 14 is only for IEEE 802.11b */
+	if (chandef->center_freq1 == 2484 &&
+	    chandef->width != NL80211_CHAN_WIDTH_20_NOHT)
+		return false;
+
+	if (cfg80211_chandef_is_edmg(chandef) &&
+	    !cfg80211_edmg_chandef_valid(chandef))
+		return false;
+
 	return true;
 }
 EXPORT_SYMBOL(cfg80211_chandef_valid);
@@ -720,12 +822,66 @@
 	return true;
 }
 
+/* check if the operating channels are valid and supported */
+static bool cfg80211_edmg_usable(struct wiphy *wiphy, u8 edmg_channels,
+				 enum ieee80211_edmg_bw_config edmg_bw_config,
+				 int primary_channel,
+				 struct ieee80211_edmg *edmg_cap)
+{
+	struct ieee80211_channel *chan;
+	int i, freq;
+	int channels_counter = 0;
+
+	if (!edmg_channels && !edmg_bw_config)
+		return true;
+
+	if ((!edmg_channels && edmg_bw_config) ||
+	    (edmg_channels && !edmg_bw_config))
+		return false;
+
+	if (!(edmg_channels & BIT(primary_channel - 1)))
+		return false;
+
+	/* 60GHz channels 1..6 */
+	for (i = 0; i < 6; i++) {
+		if (!(edmg_channels & BIT(i)))
+			continue;
+
+		if (!(edmg_cap->channels & BIT(i)))
+			return false;
+
+		channels_counter++;
+
+		freq = ieee80211_channel_to_frequency(i + 1,
+						      NL80211_BAND_60GHZ);
+		chan = ieee80211_get_channel(wiphy, freq);
+		if (!chan || chan->flags & IEEE80211_CHAN_DISABLED)
+			return false;
+	}
+
+	/* IEEE802.11 allows max 4 channels */
+	if (channels_counter > 4)
+		return false;
+
+	/* check bw_config is a subset of what driver supports
+	 * (see IEEE P802.11ay/D4.0 section 9.4.2.251, Table 13)
+	 */
+	if ((edmg_bw_config % 4) > (edmg_cap->bw_config % 4))
+		return false;
+
+	if (edmg_bw_config > edmg_cap->bw_config)
+		return false;
+
+	return true;
+}
+
 bool cfg80211_chandef_usable(struct wiphy *wiphy,
 			     const struct cfg80211_chan_def *chandef,
 			     u32 prohibited_flags)
 {
 	struct ieee80211_sta_ht_cap *ht_cap;
 	struct ieee80211_sta_vht_cap *vht_cap;
+	struct ieee80211_edmg *edmg_cap;
 	u32 width, control_freq, cap;
 
 	if (WARN_ON(!cfg80211_chandef_valid(chandef)))
@@ -733,6 +889,15 @@
 
 	ht_cap = &wiphy->bands[chandef->chan->band]->ht_cap;
 	vht_cap = &wiphy->bands[chandef->chan->band]->vht_cap;
+	edmg_cap = &wiphy->bands[chandef->chan->band]->edmg_cap;
+
+	if (edmg_cap->channels &&
+	    !cfg80211_edmg_usable(wiphy,
+				  chandef->edmg.channels,
+				  chandef->edmg.bw_config,
+				  chandef->chan->hw_value,
+				  edmg_cap))
+		return false;
 
 	control_freq = chandef->chan->center_freq;
 
@@ -747,6 +912,7 @@
 	case NL80211_CHAN_WIDTH_20:
 		if (!ht_cap->ht_supported)
 			return false;
+		/* fall through */
 	case NL80211_CHAN_WIDTH_20_NOHT:
 		prohibited_flags |= IEEE80211_CHAN_NO_20MHZ;
 		width = 20;
@@ -769,6 +935,7 @@
 		cap = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
 		if (cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)
 			return false;
+		/* fall through */
 	case NL80211_CHAN_WIDTH_80:
 		if (!vht_cap->vht_supported)
 			return false;
@@ -891,7 +1058,8 @@
 		if (chan == other_chan)
 			return true;
 
-		if (chan->band != NL80211_BAND_5GHZ)
+		if (chan->band != NL80211_BAND_5GHZ &&
+		    chan->band != NL80211_BAND_6GHZ)
 			continue;
 
 		r1 = cfg80211_get_unii(chan->center_freq);

diff --git a/net/wireless/core.c b/net/wireless/core.c
index a88551f..3505137 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c

@@ -1,9 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * This is the linux wireless configuration interface.
  *
  * Copyright 2006-2010		Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright 2015-2017	Intel Deutschland GmbH
+ * Copyright (C) 2018-2019 Intel Corporation
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -140,12 +142,10 @@
 	if (result)
 		return result;
 
-	if (rdev->wiphy.debugfsdir &&
-	    !debugfs_rename(rdev->wiphy.debugfsdir->d_parent,
-			    rdev->wiphy.debugfsdir,
-			    rdev->wiphy.debugfsdir->d_parent,
-			    newname))
-		pr_err("failed to rename debugfs dir to %s!\n", newname);
+	if (rdev->wiphy.debugfsdir)
+		debugfs_rename(rdev->wiphy.debugfsdir->d_parent,
+			       rdev->wiphy.debugfsdir,
+			       rdev->wiphy.debugfsdir->d_parent, newname);
 
 	nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
 
@@ -190,11 +190,25 @@
 		return err;
 	}
 
+	list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
+		if (!wdev->netdev)
+			continue;
+		nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE);
+	}
+	nl80211_notify_wiphy(rdev, NL80211_CMD_DEL_WIPHY);
+
 	wiphy_net_set(&rdev->wiphy, net);
 
 	err = device_rename(&rdev->wiphy.dev, dev_name(&rdev->wiphy.dev));
 	WARN_ON(err);
 
+	nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
+	list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
+		if (!wdev->netdev)
+			continue;
+		nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
+	}
+
 	return 0;
 }
 
@@ -286,12 +300,13 @@
 	return 0;
 }
 
-static void cfg80211_rfkill_sync_work(struct work_struct *work)
+static void cfg80211_rfkill_block_work(struct work_struct *work)
 {
 	struct cfg80211_registered_device *rdev;
 
-	rdev = container_of(work, struct cfg80211_registered_device, rfkill_sync);
-	cfg80211_rfkill_set_block(rdev, rfkill_blocked(rdev->rfkill));
+	rdev = container_of(work, struct cfg80211_registered_device,
+			    rfkill_block);
+	cfg80211_rfkill_set_block(rdev, true);
 }
 
 static void cfg80211_event_work(struct work_struct *work)
@@ -498,11 +513,11 @@
 				   &rdev->rfkill_ops, rdev);
 
 	if (!rdev->rfkill) {
-		kfree(rdev);
+		wiphy_free(&rdev->wiphy);
 		return NULL;
 	}
 
-	INIT_WORK(&rdev->rfkill_sync, cfg80211_rfkill_sync_work);
+	INIT_WORK(&rdev->rfkill_block, cfg80211_rfkill_block_work);
 	INIT_WORK(&rdev->conn_work, cfg80211_conn_work);
 	INIT_WORK(&rdev->event_work, cfg80211_event_work);
 
@@ -664,6 +679,34 @@
 		return -EINVAL;
 #endif
 
+	if (WARN_ON(wiphy->pmsr_capa && !wiphy->pmsr_capa->ftm.supported))
+		return -EINVAL;
+
+	if (wiphy->pmsr_capa && wiphy->pmsr_capa->ftm.supported) {
+		if (WARN_ON(!wiphy->pmsr_capa->ftm.asap &&
+			    !wiphy->pmsr_capa->ftm.non_asap))
+			return -EINVAL;
+		if (WARN_ON(!wiphy->pmsr_capa->ftm.preambles ||
+			    !wiphy->pmsr_capa->ftm.bandwidths))
+			return -EINVAL;
+		if (WARN_ON(wiphy->pmsr_capa->ftm.preambles &
+				~(BIT(NL80211_PREAMBLE_LEGACY) |
+				  BIT(NL80211_PREAMBLE_HT) |
+				  BIT(NL80211_PREAMBLE_VHT) |
+				  BIT(NL80211_PREAMBLE_DMG))))
+			return -EINVAL;
+		if (WARN_ON(wiphy->pmsr_capa->ftm.bandwidths &
+				~(BIT(NL80211_CHAN_WIDTH_20_NOHT) |
+				  BIT(NL80211_CHAN_WIDTH_20) |
+				  BIT(NL80211_CHAN_WIDTH_40) |
+				  BIT(NL80211_CHAN_WIDTH_80) |
+				  BIT(NL80211_CHAN_WIDTH_80P80) |
+				  BIT(NL80211_CHAN_WIDTH_160) |
+				  BIT(NL80211_CHAN_WIDTH_5) |
+				  BIT(NL80211_CHAN_WIDTH_10))))
+			return -EINVAL;
+	}
+
 	/*
 	 * if a wiphy has unsupported modes for regulatory channel enforcement,
 	 * opt-out of enforcement checking
@@ -815,6 +858,19 @@
 		return -EINVAL;
 	}
 
+	for (i = 0; i < rdev->wiphy.n_vendor_commands; i++) {
+		/*
+		 * Validate we have a policy (can be explicitly set to
+		 * VENDOR_CMD_RAW_DATA which is non-NULL) and also that
+		 * we have at least one of doit/dumpit.
+		 */
+		if (WARN_ON(!rdev->wiphy.vendor_commands[i].policy))
+			return -EINVAL;
+		if (WARN_ON(!rdev->wiphy.vendor_commands[i].doit &&
+			    !rdev->wiphy.vendor_commands[i].dumpit))
+			return -EINVAL;
+	}
+
 #ifdef CONFIG_PM
 	if (WARN_ON(rdev->wiphy.wowlan && rdev->wiphy.wowlan->n_patterns &&
 		    (!rdev->wiphy.wowlan->pattern_min_len ||
@@ -842,11 +898,8 @@
 	cfg80211_rdev_list_generation++;
 
 	/* add to debugfs */
-	rdev->wiphy.debugfsdir =
-		debugfs_create_dir(wiphy_name(&rdev->wiphy),
-				   ieee80211_debugfs_dir);
-	if (IS_ERR(rdev->wiphy.debugfsdir))
-		rdev->wiphy.debugfsdir = NULL;
+	rdev->wiphy.debugfsdir = debugfs_create_dir(wiphy_name(&rdev->wiphy),
+						    ieee80211_debugfs_dir);
 
 	cfg80211_debugfs_rdev_add(rdev);
 	nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
@@ -1009,7 +1062,7 @@
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 
 	if (rfkill_set_hw_state(rdev->rfkill, blocked))
-		schedule_work(&rdev->rfkill_sync);
+		schedule_work(&rdev->rfkill_block);
 }
 EXPORT_SYMBOL(wiphy_rfkill_set_hw_state);
 
@@ -1019,36 +1072,51 @@
 	wdev->cqm_config = NULL;
 }
 
-void cfg80211_unregister_wdev(struct wireless_dev *wdev)
+static void __cfg80211_unregister_wdev(struct wireless_dev *wdev, bool sync)
 {
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 
 	ASSERT_RTNL();
 
-	if (WARN_ON(wdev->netdev))
-		return;
+	flush_work(&wdev->pmsr_free_wk);
 
 	nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE);
 
 	list_del_rcu(&wdev->list);
-	synchronize_rcu();
+	if (sync)
+		synchronize_rcu();
 	rdev->devlist_generation++;
 
+	cfg80211_mlme_purge_registrations(wdev);
+
 	switch (wdev->iftype) {
 	case NL80211_IFTYPE_P2P_DEVICE:
-		cfg80211_mlme_purge_registrations(wdev);
 		cfg80211_stop_p2p_device(rdev, wdev);
 		break;
 	case NL80211_IFTYPE_NAN:
 		cfg80211_stop_nan(rdev, wdev);
 		break;
 	default:
-		WARN_ON_ONCE(1);
 		break;
 	}
 
+#ifdef CONFIG_CFG80211_WEXT
+	kzfree(wdev->wext.keys);
+#endif
+	/* only initialized if we have a netdev */
+	if (wdev->netdev)
+		flush_work(&wdev->disconnect_wk);
+
 	cfg80211_cqm_config_free(wdev);
 }
+
+void cfg80211_unregister_wdev(struct wireless_dev *wdev)
+{
+	if (WARN_ON(wdev->netdev))
+		return;
+
+	__cfg80211_unregister_wdev(wdev, true);
+}
 EXPORT_SYMBOL(cfg80211_unregister_wdev);
 
 static const struct device_type wiphy_type = {
@@ -1074,6 +1142,8 @@
 	ASSERT_RTNL();
 	ASSERT_WDEV_LOCK(wdev);
 
+	cfg80211_pmsr_wdev_down(wdev);
+
 	switch (wdev->iftype) {
 	case NL80211_IFTYPE_ADHOC:
 		__cfg80211_leave_ibss(rdev, dev, true);
@@ -1153,6 +1223,33 @@
 }
 EXPORT_SYMBOL(cfg80211_stop_iface);
 
+void cfg80211_init_wdev(struct cfg80211_registered_device *rdev,
+			struct wireless_dev *wdev)
+{
+	mutex_init(&wdev->mtx);
+	INIT_LIST_HEAD(&wdev->event_list);
+	spin_lock_init(&wdev->event_lock);
+	INIT_LIST_HEAD(&wdev->mgmt_registrations);
+	spin_lock_init(&wdev->mgmt_registrations_lock);
+	INIT_LIST_HEAD(&wdev->pmsr_list);
+	spin_lock_init(&wdev->pmsr_lock);
+	INIT_WORK(&wdev->pmsr_free_wk, cfg80211_pmsr_free_wk);
+
+	/*
+	 * We get here also when the interface changes network namespaces,
+	 * as it's registered into the new one, but we don't want it to
+	 * change ID in that case. Checking if the ID is already assigned
+	 * works, because 0 isn't considered a valid ID and the memory is
+	 * 0-initialized.
+	 */
+	if (!wdev->identifier)
+		wdev->identifier = ++rdev->wdev_id;
+	list_add_rcu(&wdev->list, &rdev->wiphy.wdev_list);
+	rdev->devlist_generation++;
+
+	nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
+}
+
 static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 					 unsigned long state, void *ptr)
 {
@@ -1178,23 +1275,6 @@
 		 * called within code protected by it when interfaces
 		 * are added with nl80211.
 		 */
-		mutex_init(&wdev->mtx);
-		INIT_LIST_HEAD(&wdev->event_list);
-		spin_lock_init(&wdev->event_lock);
-		INIT_LIST_HEAD(&wdev->mgmt_registrations);
-		spin_lock_init(&wdev->mgmt_registrations_lock);
-
-		/*
-		 * We get here also when the interface changes network namespaces,
-		 * as it's registered into the new one, but we don't want it to
-		 * change ID in that case. Checking if the ID is already assigned
-		 * works, because 0 isn't considered a valid ID and the memory is
-		 * 0-initialized.
-		 */
-		if (!wdev->identifier)
-			wdev->identifier = ++rdev->wdev_id;
-		list_add_rcu(&wdev->list, &rdev->wiphy.wdev_list);
-		rdev->devlist_generation++;
 		/* can only change netns with wiphy */
 		dev->features |= NETIF_F_NETNS_LOCAL;
 
@@ -1223,7 +1303,7 @@
 
 		INIT_WORK(&wdev->disconnect_wk, cfg80211_autodisconnect_wk);
 
-		nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
+		cfg80211_init_wdev(rdev, wdev);
 		break;
 	case NETDEV_GOING_DOWN:
 		cfg80211_leave(rdev, wdev);
@@ -1238,7 +1318,7 @@
 
 		list_for_each_entry_safe(pos, tmp,
 					 &rdev->sched_scan_req_list, list) {
-			if (WARN_ON(pos && pos->dev == wdev->netdev))
+			if (WARN_ON(pos->dev == wdev->netdev))
 				cfg80211_stop_sched_scan_req(rdev, pos, false);
 		}
 
@@ -1302,17 +1382,8 @@
 		 * remove and clean it up.
 		 */
 		if (!list_empty(&wdev->list)) {
-			nl80211_notify_iface(rdev, wdev,
-					     NL80211_CMD_DEL_INTERFACE);
+			__cfg80211_unregister_wdev(wdev, false);
 			sysfs_remove_link(&dev->dev.kobj, "phy80211");
-			list_del_rcu(&wdev->list);
-			rdev->devlist_generation++;
-			cfg80211_mlme_purge_registrations(wdev);
-#ifdef CONFIG_CFG80211_WEXT
-			kzfree(wdev->wext.keys);
-#endif
-			flush_work(&wdev->disconnect_wk);
-			cfg80211_cqm_config_free(wdev);
 		}
 		/*
 		 * synchronise (so that we won't find this netdev
@@ -1335,8 +1406,10 @@
 		}
 		break;
 	case NETDEV_PRE_UP:
-		if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype)))
+		if (!cfg80211_iftype_allowed(wdev->wiphy, wdev->iftype,
+					     wdev->use_4addr, 0))
 			return notifier_from_errno(-EOPNOTSUPP);
+
 		if (rfkill_blocked(rdev->rfkill))
 			return notifier_from_errno(-ERFKILL);
 		break;

diff --git a/net/wireless/core.h b/net/wireless/core.h
index 7f52ef5..ed487e3 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h

@@ -3,6 +3,7 @@
  * Wireless configuration interface internals.
  *
  * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright (C) 2018-2019 Intel Corporation
  */
 #ifndef __NET_WIRELESS_CORE_H
 #define __NET_WIRELESS_CORE_H
@@ -27,7 +28,7 @@
 	/* rfkill support */
 	struct rfkill_ops rfkill_ops;
 	struct rfkill *rfkill;
-	struct work_struct rfkill_sync;
+	struct work_struct rfkill_block;
 
 	/* ISO / IEC 3166 alpha2 for which this device is receiving
 	 * country IEs on, this can help disregard country IEs from APs
@@ -66,6 +67,7 @@
 	/* protected by RTNL only */
 	int num_running_ifaces;
 	int num_running_monitor_ifaces;
+	u64 cookie_counter;
 
 	/* BSSes/scanning */
 	spinlock_t bss_lock;
@@ -133,6 +135,16 @@
 #endif
 }
 
+static inline u64 cfg80211_assign_cookie(struct cfg80211_registered_device *rdev)
+{
+	u64 r = ++rdev->cookie_counter;
+
+	if (WARN_ON(r == 0))
+		r = ++rdev->cookie_counter;
+
+	return r;
+}
+
 extern struct workqueue_struct *cfg80211_wq;
 extern struct list_head cfg80211_rdev_list;
 extern int cfg80211_rdev_list_generation;
@@ -170,12 +182,23 @@
 static inline void cfg80211_hold_bss(struct cfg80211_internal_bss *bss)
 {
 	atomic_inc(&bss->hold);
+	if (bss->pub.transmitted_bss) {
+		bss = container_of(bss->pub.transmitted_bss,
+				   struct cfg80211_internal_bss, pub);
+		atomic_inc(&bss->hold);
+	}
 }
 
 static inline void cfg80211_unhold_bss(struct cfg80211_internal_bss *bss)
 {
 	int r = atomic_dec_return(&bss->hold);
 	WARN_ON(r < 0);
+	if (bss->pub.transmitted_bss) {
+		bss = container_of(bss->pub.transmitted_bss,
+				   struct cfg80211_internal_bss, pub);
+		r = atomic_dec_return(&bss->hold);
+		WARN_ON(r < 0);
+	}
 }
 
 
@@ -187,6 +210,9 @@
 int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
 			  struct net *net);
 
+void cfg80211_init_wdev(struct cfg80211_registered_device *rdev,
+			struct wireless_dev *wdev);
+
 static inline void wdev_lock(struct wireless_dev *wdev)
 	__acquires(wdev)
 {
@@ -280,6 +306,8 @@
 void cfg80211_bss_expire(struct cfg80211_registered_device *rdev);
 void cfg80211_bss_age(struct cfg80211_registered_device *rdev,
                       unsigned long age_secs);
+void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev,
+				     struct ieee80211_channel *channel);
 
 /* IBSS */
 int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
@@ -430,6 +458,8 @@
 bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range,
 				u32 center_freq_khz, u32 bw_khz);
 
+extern struct work_struct cfg80211_disconnect_work;
+
 /**
  * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable
  * @wiphy: the wiphy to validate against
@@ -503,6 +533,10 @@
 void cfg80211_stop_nan(struct cfg80211_registered_device *rdev,
 		       struct wireless_dev *wdev);
 
+struct cfg80211_internal_bss *
+cfg80211_bss_update(struct cfg80211_registered_device *rdev,
+		    struct cfg80211_internal_bss *tmp,
+		    bool signal_valid, unsigned long ts);
 #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS
 #define CFG80211_DEV_WARN_ON(cond)	WARN_ON(cond)
 #else
@@ -516,4 +550,8 @@
 
 void cfg80211_cqm_config_free(struct wireless_dev *wdev);
 
+void cfg80211_release_pmsr(struct wireless_dev *wdev, u32 portid);
+void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev);
+void cfg80211_pmsr_free_wk(struct work_struct *work);
+
 #endif /* __NET_WIRELESS_CORE_H */

diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c
index 30fc6eb..76b845f 100644
--- a/net/wireless/debugfs.c
+++ b/net/wireless/debugfs.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * cfg80211 debugfs
  *
  * Copyright 2009	Luis R. Rodriguez <lrodriguez@atheros.com>
  * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/slab.h>

diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index d1743e6..ae8fe66 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c

@@ -104,13 +104,19 @@
 		* use the mandatory rate set for 11b or
 		* 11a for maximum compatibility.
 		*/
-		struct ieee80211_supported_band *sband =
-			rdev->wiphy.bands[params->chandef.chan->band];
+		struct ieee80211_supported_band *sband;
+		enum nl80211_band band;
+		u32 flag;
 		int j;
-		u32 flag = params->chandef.chan->band == NL80211_BAND_5GHZ ?
-			IEEE80211_RATE_MANDATORY_A :
-			IEEE80211_RATE_MANDATORY_B;
 
+		band = params->chandef.chan->band;
+		if (band == NL80211_BAND_5GHZ ||
+		    band == NL80211_BAND_6GHZ)
+			flag = IEEE80211_RATE_MANDATORY_A;
+		else
+			flag = IEEE80211_RATE_MANDATORY_B;
+
+		sband = rdev->wiphy.bands[band];
 		for (j = 0; j < sband->n_bitrates; j++) {
 			if (sband->bitrates[j].flags & flag)
 				params->basic_rates |= BIT(j);

diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c
index 801d478..cc7b9fd 100644
--- a/net/wireless/lib80211.c
+++ b/net/wireless/lib80211.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * lib80211 -- common bits for IEEE802.11 drivers
  *

diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c
index 6beab0c..6a5f08f 100644
--- a/net/wireless/lib80211_crypt_ccmp.c
+++ b/net/wireless/lib80211_crypt_ccmp.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * lib80211 crypt: host-based CCMP encryption implementation for lib80211
  *
  * Copyright (c) 2003-2004, Jouni Malinen <j@w1.fi>
  * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation. See README and COPYING for
- * more details.
  */
 
 #include <linux/kernel.h>
@@ -26,6 +22,7 @@
 #include <linux/ieee80211.h>
 
 #include <linux/crypto.h>
+#include <crypto/aead.h>
 
 #include <net/lib80211.h>
 
@@ -52,20 +49,13 @@
 
 	int key_idx;
 
-	struct crypto_cipher *tfm;
+	struct crypto_aead *tfm;
 
 	/* scratch buffers for virt_to_page() (crypto API) */
-	u8 tx_b0[AES_BLOCK_LEN], tx_b[AES_BLOCK_LEN],
-	    tx_e[AES_BLOCK_LEN], tx_s0[AES_BLOCK_LEN];
-	u8 rx_b0[AES_BLOCK_LEN], rx_b[AES_BLOCK_LEN], rx_a[AES_BLOCK_LEN];
+	u8 tx_aad[2 * AES_BLOCK_LEN];
+	u8 rx_aad[2 * AES_BLOCK_LEN];
 };
 
-static inline void lib80211_ccmp_aes_encrypt(struct crypto_cipher *tfm,
-					      const u8 pt[16], u8 ct[16])
-{
-	crypto_cipher_encrypt_one(tfm, ct, pt);
-}
-
 static void *lib80211_ccmp_init(int key_idx)
 {
 	struct lib80211_ccmp_data *priv;
@@ -75,7 +65,7 @@
 		goto fail;
 	priv->key_idx = key_idx;
 
-	priv->tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
+	priv->tfm = crypto_alloc_aead("ccm(aes)", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(priv->tfm)) {
 		priv->tfm = NULL;
 		goto fail;
@@ -86,7 +76,7 @@
       fail:
 	if (priv) {
 		if (priv->tfm)
-			crypto_free_cipher(priv->tfm);
+			crypto_free_aead(priv->tfm);
 		kfree(priv);
 	}
 
@@ -97,25 +87,16 @@
 {
 	struct lib80211_ccmp_data *_priv = priv;
 	if (_priv && _priv->tfm)
-		crypto_free_cipher(_priv->tfm);
+		crypto_free_aead(_priv->tfm);
 	kfree(priv);
 }
 
-static inline void xor_block(u8 * b, u8 * a, size_t len)
-{
-	int i;
-	for (i = 0; i < len; i++)
-		b[i] ^= a[i];
-}
-
-static void ccmp_init_blocks(struct crypto_cipher *tfm,
-			     struct ieee80211_hdr *hdr,
-			     u8 * pn, size_t dlen, u8 * b0, u8 * auth, u8 * s0)
+static int ccmp_init_iv_and_aad(const struct ieee80211_hdr *hdr,
+				const u8 *pn, u8 *iv, u8 *aad)
 {
 	u8 *pos, qc = 0;
 	size_t aad_len;
 	int a4_included, qc_included;
-	u8 aad[2 * AES_BLOCK_LEN];
 
 	a4_included = ieee80211_has_a4(hdr->frame_control);
 	qc_included = ieee80211_is_data_qos(hdr->frame_control);
@@ -131,17 +112,19 @@
 		aad_len += 2;
 	}
 
-	/* CCM Initial Block:
-	 * Flag (Include authentication header, M=3 (8-octet MIC),
-	 *       L=1 (2-octet Dlen))
-	 * Nonce: 0x00 | A2 | PN
-	 * Dlen */
-	b0[0] = 0x59;
-	b0[1] = qc;
-	memcpy(b0 + 2, hdr->addr2, ETH_ALEN);
-	memcpy(b0 + 8, pn, CCMP_PN_LEN);
-	b0[14] = (dlen >> 8) & 0xff;
-	b0[15] = dlen & 0xff;
+	/* In CCM, the initial vectors (IV) used for CTR mode encryption and CBC
+	 * mode authentication are not allowed to collide, yet both are derived
+	 * from the same vector. We only set L := 1 here to indicate that the
+	 * data size can be represented in (L+1) bytes. The CCM layer will take
+	 * care of storing the data length in the top (L+1) bytes and setting
+	 * and clearing the other bits as is required to derive the two IVs.
+	 */
+	iv[0] = 0x1;
+
+	/* Nonce: QC | A2 | PN */
+	iv[1] = qc;
+	memcpy(iv + 2, hdr->addr2, ETH_ALEN);
+	memcpy(iv + 8, pn, CCMP_PN_LEN);
 
 	/* AAD:
 	 * FC with bits 4..6 and 11..13 masked to zero; 14 is always one
@@ -151,31 +134,20 @@
 	 * QC (if present)
 	 */
 	pos = (u8 *) hdr;
-	aad[0] = 0;		/* aad_len >> 8 */
-	aad[1] = aad_len & 0xff;
-	aad[2] = pos[0] & 0x8f;
-	aad[3] = pos[1] & 0xc7;
-	memcpy(aad + 4, hdr->addr1, 3 * ETH_ALEN);
+	aad[0] = pos[0] & 0x8f;
+	aad[1] = pos[1] & 0xc7;
+	memcpy(aad + 2, hdr->addr1, 3 * ETH_ALEN);
 	pos = (u8 *) & hdr->seq_ctrl;
-	aad[22] = pos[0] & 0x0f;
-	aad[23] = 0;		/* all bits masked */
-	memset(aad + 24, 0, 8);
+	aad[20] = pos[0] & 0x0f;
+	aad[21] = 0;		/* all bits masked */
+	memset(aad + 22, 0, 8);
 	if (a4_included)
-		memcpy(aad + 24, hdr->addr4, ETH_ALEN);
+		memcpy(aad + 22, hdr->addr4, ETH_ALEN);
 	if (qc_included) {
-		aad[a4_included ? 30 : 24] = qc;
+		aad[a4_included ? 28 : 22] = qc;
 		/* rest of QC masked */
 	}
-
-	/* Start with the first block and AAD */
-	lib80211_ccmp_aes_encrypt(tfm, b0, auth);
-	xor_block(auth, aad, AES_BLOCK_LEN);
-	lib80211_ccmp_aes_encrypt(tfm, auth, auth);
-	xor_block(auth, &aad[AES_BLOCK_LEN], AES_BLOCK_LEN);
-	lib80211_ccmp_aes_encrypt(tfm, auth, auth);
-	b0[0] &= 0x07;
-	b0[14] = b0[15] = 0;
-	lib80211_ccmp_aes_encrypt(tfm, b0, s0);
+	return aad_len;
 }
 
 static int lib80211_ccmp_hdr(struct sk_buff *skb, int hdr_len,
@@ -218,13 +190,13 @@
 static int lib80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 {
 	struct lib80211_ccmp_data *key = priv;
-	int data_len, i, blocks, last, len;
-	u8 *pos, *mic;
 	struct ieee80211_hdr *hdr;
-	u8 *b0 = key->tx_b0;
-	u8 *b = key->tx_b;
-	u8 *e = key->tx_e;
-	u8 *s0 = key->tx_s0;
+	struct aead_request *req;
+	struct scatterlist sg[2];
+	u8 *aad = key->tx_aad;
+	u8 iv[AES_BLOCK_LEN];
+	int len, data_len, aad_len;
+	int ret;
 
 	if (skb_tailroom(skb) < CCMP_MIC_LEN || skb->len < hdr_len)
 		return -1;
@@ -234,31 +206,28 @@
 	if (len < 0)
 		return -1;
 
-	pos = skb->data + hdr_len + CCMP_HDR_LEN;
+	req = aead_request_alloc(key->tfm, GFP_ATOMIC);
+	if (!req)
+		return -ENOMEM;
+
 	hdr = (struct ieee80211_hdr *)skb->data;
-	ccmp_init_blocks(key->tfm, hdr, key->tx_pn, data_len, b0, b, s0);
+	aad_len = ccmp_init_iv_and_aad(hdr, key->tx_pn, iv, aad);
 
-	blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN);
-	last = data_len % AES_BLOCK_LEN;
+	skb_put(skb, CCMP_MIC_LEN);
 
-	for (i = 1; i <= blocks; i++) {
-		len = (i == blocks && last) ? last : AES_BLOCK_LEN;
-		/* Authentication */
-		xor_block(b, pos, len);
-		lib80211_ccmp_aes_encrypt(key->tfm, b, b);
-		/* Encryption, with counter */
-		b0[14] = (i >> 8) & 0xff;
-		b0[15] = i & 0xff;
-		lib80211_ccmp_aes_encrypt(key->tfm, b0, e);
-		xor_block(pos, e, len);
-		pos += len;
-	}
+	sg_init_table(sg, 2);
+	sg_set_buf(&sg[0], aad, aad_len);
+	sg_set_buf(&sg[1], skb->data + hdr_len + CCMP_HDR_LEN,
+		   data_len + CCMP_MIC_LEN);
 
-	mic = skb_put(skb, CCMP_MIC_LEN);
-	for (i = 0; i < CCMP_MIC_LEN; i++)
-		mic[i] = b[i] ^ s0[i];
+	aead_request_set_callback(req, 0, NULL, NULL);
+	aead_request_set_ad(req, aad_len);
+	aead_request_set_crypt(req, sg, sg, data_len, iv);
 
-	return 0;
+	ret = crypto_aead_encrypt(req);
+	aead_request_free(req);
+
+	return ret;
 }
 
 /*
@@ -287,13 +256,13 @@
 	struct lib80211_ccmp_data *key = priv;
 	u8 keyidx, *pos;
 	struct ieee80211_hdr *hdr;
-	u8 *b0 = key->rx_b0;
-	u8 *b = key->rx_b;
-	u8 *a = key->rx_a;
+	struct aead_request *req;
+	struct scatterlist sg[2];
+	u8 *aad = key->rx_aad;
+	u8 iv[AES_BLOCK_LEN];
 	u8 pn[6];
-	int i, blocks, last, len;
-	size_t data_len = skb->len - hdr_len - CCMP_HDR_LEN - CCMP_MIC_LEN;
-	u8 *mic = skb->data + skb->len - CCMP_MIC_LEN;
+	int aad_len, ret;
+	size_t data_len = skb->len - hdr_len - CCMP_HDR_LEN;
 
 	if (skb->len < hdr_len + CCMP_HDR_LEN + CCMP_MIC_LEN) {
 		key->dot11RSNAStatsCCMPFormatErrors++;
@@ -341,28 +310,26 @@
 		return -4;
 	}
 
-	ccmp_init_blocks(key->tfm, hdr, pn, data_len, b0, a, b);
-	xor_block(mic, b, CCMP_MIC_LEN);
+	req = aead_request_alloc(key->tfm, GFP_ATOMIC);
+	if (!req)
+		return -ENOMEM;
 
-	blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN);
-	last = data_len % AES_BLOCK_LEN;
+	aad_len = ccmp_init_iv_and_aad(hdr, pn, iv, aad);
 
-	for (i = 1; i <= blocks; i++) {
-		len = (i == blocks && last) ? last : AES_BLOCK_LEN;
-		/* Decrypt, with counter */
-		b0[14] = (i >> 8) & 0xff;
-		b0[15] = i & 0xff;
-		lib80211_ccmp_aes_encrypt(key->tfm, b0, b);
-		xor_block(pos, b, len);
-		/* Authentication */
-		xor_block(a, pos, len);
-		lib80211_ccmp_aes_encrypt(key->tfm, a, a);
-		pos += len;
-	}
+	sg_init_table(sg, 2);
+	sg_set_buf(&sg[0], aad, aad_len);
+	sg_set_buf(&sg[1], pos, data_len);
 
-	if (memcmp(mic, a, CCMP_MIC_LEN) != 0) {
-		net_dbg_ratelimited("CCMP: decrypt failed: STA=%pM\n",
-				    hdr->addr2);
+	aead_request_set_callback(req, 0, NULL, NULL);
+	aead_request_set_ad(req, aad_len);
+	aead_request_set_crypt(req, sg, sg, data_len, iv);
+
+	ret = crypto_aead_decrypt(req);
+	aead_request_free(req);
+
+	if (ret) {
+		net_dbg_ratelimited("CCMP: decrypt failed: STA=%pM (%d)\n",
+				    hdr->addr2, ret);
 		key->dot11RSNAStatsCCMPDecryptErrors++;
 		return -5;
 	}
@@ -381,7 +348,7 @@
 {
 	struct lib80211_ccmp_data *data = priv;
 	int keyidx;
-	struct crypto_cipher *tfm = data->tfm;
+	struct crypto_aead *tfm = data->tfm;
 
 	keyidx = data->key_idx;
 	memset(data, 0, sizeof(*data));
@@ -398,7 +365,9 @@
 			data->rx_pn[4] = seq[1];
 			data->rx_pn[5] = seq[0];
 		}
-		crypto_cipher_setkey(data->tfm, data->key, CCMP_TK_LEN);
+		if (crypto_aead_setauthsize(data->tfm, CCMP_MIC_LEN) ||
+		    crypto_aead_setkey(data->tfm, data->key, CCMP_TK_LEN))
+			return -1;
 	} else if (len == 0)
 		data->key_set = 0;
 	else

diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index e6bce1f..f5e842b 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c

@@ -1,18 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * lib80211 crypt: host-based TKIP encryption implementation for lib80211
  *
  * Copyright (c) 2003-2004, Jouni Malinen <j@w1.fi>
  * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation. See README and COPYING for
- * more details.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/err.h>
+#include <linux/fips.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
@@ -29,8 +26,9 @@
 #include <linux/ieee80211.h>
 #include <net/iw_handler.h>
 
+#include <crypto/arc4.h>
 #include <crypto/hash.h>
-#include <crypto/skcipher.h>
+#include <linux/crypto.h>
 #include <linux/crc32.h>
 
 #include <net/lib80211.h>
@@ -64,9 +62,9 @@
 
 	int key_idx;
 
-	struct crypto_skcipher *rx_tfm_arc4;
+	struct arc4_ctx rx_ctx_arc4;
+	struct arc4_ctx tx_ctx_arc4;
 	struct crypto_shash *rx_tfm_michael;
-	struct crypto_skcipher *tx_tfm_arc4;
 	struct crypto_shash *tx_tfm_michael;
 
 	/* scratch buffers for virt_to_page() (crypto API) */
@@ -93,32 +91,21 @@
 {
 	struct lib80211_tkip_data *priv;
 
+	if (fips_enabled)
+		return NULL;
+
 	priv = kzalloc(sizeof(*priv), GFP_ATOMIC);
 	if (priv == NULL)
 		goto fail;
 
 	priv->key_idx = key_idx;
 
-	priv->tx_tfm_arc4 = crypto_alloc_skcipher("ecb(arc4)", 0,
-						  CRYPTO_ALG_ASYNC);
-	if (IS_ERR(priv->tx_tfm_arc4)) {
-		priv->tx_tfm_arc4 = NULL;
-		goto fail;
-	}
-
 	priv->tx_tfm_michael = crypto_alloc_shash("michael_mic", 0, 0);
 	if (IS_ERR(priv->tx_tfm_michael)) {
 		priv->tx_tfm_michael = NULL;
 		goto fail;
 	}
 
-	priv->rx_tfm_arc4 = crypto_alloc_skcipher("ecb(arc4)", 0,
-						  CRYPTO_ALG_ASYNC);
-	if (IS_ERR(priv->rx_tfm_arc4)) {
-		priv->rx_tfm_arc4 = NULL;
-		goto fail;
-	}
-
 	priv->rx_tfm_michael = crypto_alloc_shash("michael_mic", 0, 0);
 	if (IS_ERR(priv->rx_tfm_michael)) {
 		priv->rx_tfm_michael = NULL;
@@ -130,9 +117,7 @@
       fail:
 	if (priv) {
 		crypto_free_shash(priv->tx_tfm_michael);
-		crypto_free_skcipher(priv->tx_tfm_arc4);
 		crypto_free_shash(priv->rx_tfm_michael);
-		crypto_free_skcipher(priv->rx_tfm_arc4);
 		kfree(priv);
 	}
 
@@ -144,11 +129,9 @@
 	struct lib80211_tkip_data *_priv = priv;
 	if (_priv) {
 		crypto_free_shash(_priv->tx_tfm_michael);
-		crypto_free_skcipher(_priv->tx_tfm_arc4);
 		crypto_free_shash(_priv->rx_tfm_michael);
-		crypto_free_skcipher(_priv->rx_tfm_arc4);
 	}
-	kfree(priv);
+	kzfree(priv);
 }
 
 static inline u16 RotR1(u16 val)
@@ -344,12 +327,9 @@
 static int lib80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 {
 	struct lib80211_tkip_data *tkey = priv;
-	SKCIPHER_REQUEST_ON_STACK(req, tkey->tx_tfm_arc4);
 	int len;
 	u8 rc4key[16], *pos, *icv;
 	u32 crc;
-	struct scatterlist sg;
-	int err;
 
 	if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) {
 		struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
@@ -374,14 +354,10 @@
 	icv[2] = crc >> 16;
 	icv[3] = crc >> 24;
 
-	crypto_skcipher_setkey(tkey->tx_tfm_arc4, rc4key, 16);
-	sg_init_one(&sg, pos, len + 4);
-	skcipher_request_set_tfm(req, tkey->tx_tfm_arc4);
-	skcipher_request_set_callback(req, 0, NULL, NULL);
-	skcipher_request_set_crypt(req, &sg, &sg, len + 4, NULL);
-	err = crypto_skcipher_encrypt(req);
-	skcipher_request_zero(req);
-	return err;
+	arc4_setkey(&tkey->tx_ctx_arc4, rc4key, 16);
+	arc4_crypt(&tkey->tx_ctx_arc4, pos, pos, len + 4);
+
+	return 0;
 }
 
 /*
@@ -400,7 +376,6 @@
 static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 {
 	struct lib80211_tkip_data *tkey = priv;
-	SKCIPHER_REQUEST_ON_STACK(req, tkey->rx_tfm_arc4);
 	u8 rc4key[16];
 	u8 keyidx, *pos;
 	u32 iv32;
@@ -408,9 +383,7 @@
 	struct ieee80211_hdr *hdr;
 	u8 icv[4];
 	u32 crc;
-	struct scatterlist sg;
 	int plen;
-	int err;
 
 	hdr = (struct ieee80211_hdr *)skb->data;
 
@@ -463,18 +436,8 @@
 
 	plen = skb->len - hdr_len - 12;
 
-	crypto_skcipher_setkey(tkey->rx_tfm_arc4, rc4key, 16);
-	sg_init_one(&sg, pos, plen + 4);
-	skcipher_request_set_tfm(req, tkey->rx_tfm_arc4);
-	skcipher_request_set_callback(req, 0, NULL, NULL);
-	skcipher_request_set_crypt(req, &sg, &sg, plen + 4, NULL);
-	err = crypto_skcipher_decrypt(req);
-	skcipher_request_zero(req);
-	if (err) {
-		net_dbg_ratelimited("TKIP: failed to decrypt received packet from %pM\n",
-				    hdr->addr2);
-		return -7;
-	}
+	arc4_setkey(&tkey->rx_ctx_arc4, rc4key, 16);
+	arc4_crypt(&tkey->rx_ctx_arc4, pos, pos, plen + 4);
 
 	crc = ~crc32_le(~0, pos, plen);
 	icv[0] = crc;
@@ -520,7 +483,6 @@
 	}
 
 	desc->tfm = tfm_michael;
-	desc->flags = 0;
 
 	if (crypto_shash_setkey(tfm_michael, key, 8))
 		return -1;
@@ -660,17 +622,17 @@
 	struct lib80211_tkip_data *tkey = priv;
 	int keyidx;
 	struct crypto_shash *tfm = tkey->tx_tfm_michael;
-	struct crypto_skcipher *tfm2 = tkey->tx_tfm_arc4;
+	struct arc4_ctx *tfm2 = &tkey->tx_ctx_arc4;
 	struct crypto_shash *tfm3 = tkey->rx_tfm_michael;
-	struct crypto_skcipher *tfm4 = tkey->rx_tfm_arc4;
+	struct arc4_ctx *tfm4 = &tkey->rx_ctx_arc4;
 
 	keyidx = tkey->key_idx;
 	memset(tkey, 0, sizeof(*tkey));
 	tkey->key_idx = keyidx;
 	tkey->tx_tfm_michael = tfm;
-	tkey->tx_tfm_arc4 = tfm2;
+	tkey->tx_ctx_arc4 = *tfm2;
 	tkey->rx_tfm_michael = tfm3;
-	tkey->rx_tfm_arc4 = tfm4;
+	tkey->rx_ctx_arc4 = *tfm4;
 	if (len == TKIP_KEY_LEN) {
 		memcpy(tkey->key, key, TKIP_KEY_LEN);
 		tkey->key_set = 1;

diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c
index d05f58b..dafc6f3 100644
--- a/net/wireless/lib80211_crypt_wep.c
+++ b/net/wireless/lib80211_crypt_wep.c

@@ -1,16 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * lib80211 crypt: host-based WEP encryption implementation for lib80211
  *
  * Copyright (c) 2002-2004, Jouni Malinen <j@w1.fi>
  * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation. See README and COPYING for
- * more details.
  */
 
 #include <linux/err.h>
+#include <linux/fips.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
@@ -22,7 +19,7 @@
 
 #include <net/lib80211.h>
 
-#include <crypto/skcipher.h>
+#include <crypto/arc4.h>
 #include <linux/crc32.h>
 
 MODULE_AUTHOR("Jouni Malinen");
@@ -35,52 +32,31 @@
 	u8 key[WEP_KEY_LEN + 1];
 	u8 key_len;
 	u8 key_idx;
-	struct crypto_skcipher *tx_tfm;
-	struct crypto_skcipher *rx_tfm;
+	struct arc4_ctx tx_ctx;
+	struct arc4_ctx rx_ctx;
 };
 
 static void *lib80211_wep_init(int keyidx)
 {
 	struct lib80211_wep_data *priv;
 
+	if (fips_enabled)
+		return NULL;
+
 	priv = kzalloc(sizeof(*priv), GFP_ATOMIC);
 	if (priv == NULL)
-		goto fail;
+		return NULL;
 	priv->key_idx = keyidx;
 
-	priv->tx_tfm = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(priv->tx_tfm)) {
-		priv->tx_tfm = NULL;
-		goto fail;
-	}
-
-	priv->rx_tfm = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(priv->rx_tfm)) {
-		priv->rx_tfm = NULL;
-		goto fail;
-	}
 	/* start WEP IV from a random value */
 	get_random_bytes(&priv->iv, 4);
 
 	return priv;
-
-      fail:
-	if (priv) {
-		crypto_free_skcipher(priv->tx_tfm);
-		crypto_free_skcipher(priv->rx_tfm);
-		kfree(priv);
-	}
-	return NULL;
 }
 
 static void lib80211_wep_deinit(void *priv)
 {
-	struct lib80211_wep_data *_priv = priv;
-	if (_priv) {
-		crypto_free_skcipher(_priv->tx_tfm);
-		crypto_free_skcipher(_priv->rx_tfm);
-	}
-	kfree(priv);
+	kzfree(priv);
 }
 
 /* Add WEP IV/key info to a frame that has at least 4 bytes of headroom */
@@ -129,12 +105,9 @@
 static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 {
 	struct lib80211_wep_data *wep = priv;
-	SKCIPHER_REQUEST_ON_STACK(req, wep->tx_tfm);
 	u32 crc, klen, len;
 	u8 *pos, *icv;
-	struct scatterlist sg;
 	u8 key[WEP_KEY_LEN + 3];
-	int err;
 
 	/* other checks are in lib80211_wep_build_iv */
 	if (skb_tailroom(skb) < 4)
@@ -162,14 +135,10 @@
 	icv[2] = crc >> 16;
 	icv[3] = crc >> 24;
 
-	crypto_skcipher_setkey(wep->tx_tfm, key, klen);
-	sg_init_one(&sg, pos, len + 4);
-	skcipher_request_set_tfm(req, wep->tx_tfm);
-	skcipher_request_set_callback(req, 0, NULL, NULL);
-	skcipher_request_set_crypt(req, &sg, &sg, len + 4, NULL);
-	err = crypto_skcipher_encrypt(req);
-	skcipher_request_zero(req);
-	return err;
+	arc4_setkey(&wep->tx_ctx, key, klen);
+	arc4_crypt(&wep->tx_ctx, pos, pos, len + 4);
+
+	return 0;
 }
 
 /* Perform WEP decryption on given buffer. Buffer includes whole WEP part of
@@ -182,12 +151,9 @@
 static int lib80211_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 {
 	struct lib80211_wep_data *wep = priv;
-	SKCIPHER_REQUEST_ON_STACK(req, wep->rx_tfm);
 	u32 crc, klen, plen;
 	u8 key[WEP_KEY_LEN + 3];
 	u8 keyidx, *pos, icv[4];
-	struct scatterlist sg;
-	int err;
 
 	if (skb->len < hdr_len + 8)
 		return -1;
@@ -208,15 +174,8 @@
 	/* Apply RC4 to data and compute CRC32 over decrypted data */
 	plen = skb->len - hdr_len - 8;
 
-	crypto_skcipher_setkey(wep->rx_tfm, key, klen);
-	sg_init_one(&sg, pos, plen + 4);
-	skcipher_request_set_tfm(req, wep->rx_tfm);
-	skcipher_request_set_callback(req, 0, NULL, NULL);
-	skcipher_request_set_crypt(req, &sg, &sg, plen + 4, NULL);
-	err = crypto_skcipher_decrypt(req);
-	skcipher_request_zero(req);
-	if (err)
-		return -7;
+	arc4_setkey(&wep->rx_ctx, key, klen);
+	arc4_crypt(&wep->rx_ctx, pos, pos, plen + 4);
 
 	crc = ~crc32_le(~0, pos, plen);
 	icv[0] = crc;

diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 12b3edf..f946201 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c

@@ -21,7 +21,8 @@
 
 
 void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss,
-			    const u8 *buf, size_t len, int uapsd_queues)
+			    const u8 *buf, size_t len, int uapsd_queues,
+			    const u8 *req_ies, size_t req_ies_len)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
@@ -33,6 +34,8 @@
 	cr.status = (int)le16_to_cpu(mgmt->u.assoc_resp.status_code);
 	cr.bssid = mgmt->bssid;
 	cr.bss = bss;
+	cr.req_ie = req_ies;
+	cr.req_ie_len = req_ies_len;
 	cr.resp_ie = mgmt->u.assoc_resp.variable;
 	cr.resp_ie_len =
 		len - offsetof(struct ieee80211_mgmt, u.assoc_resp.variable);
@@ -52,7 +55,8 @@
 		return;
 	}
 
-	nl80211_send_rx_assoc(rdev, dev, buf, len, GFP_KERNEL, uapsd_queues);
+	nl80211_send_rx_assoc(rdev, dev, buf, len, GFP_KERNEL, uapsd_queues,
+			      req_ies, req_ies_len);
 	/* update current_bss etc., consumes the bss reference */
 	__cfg80211_connect_result(dev, &cr, cr.status == WLAN_STATUS_SUCCESS);
 }
@@ -272,11 +276,11 @@
 
 	p1 = (u8*)(ht_capa);
 	p2 = (u8*)(ht_capa_mask);
-	for (i = 0; i<sizeof(*ht_capa); i++)
+	for (i = 0; i < sizeof(*ht_capa); i++)
 		p1[i] &= p2[i];
 }
 
-/*  Do a logical ht_capa &= ht_capa_mask.  */
+/*  Do a logical vht_capa &= vht_capa_mask.  */
 void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa,
 				const struct ieee80211_vht_cap *vht_capa_mask)
 {

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 176edfe..7b72286 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c

@@ -1,10 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * This is the new netlink-based wireless configuration interface.
  *
  * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright 2015-2017	Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018-2019 Intel Corporation
  */
 
 #include <linux/if.h>
@@ -200,8 +201,128 @@
 	return __cfg80211_rdev_from_attrs(netns, info->attrs);
 }
 
+static int validate_beacon_head(const struct nlattr *attr,
+				struct netlink_ext_ack *extack)
+{
+	const u8 *data = nla_data(attr);
+	unsigned int len = nla_len(attr);
+	const struct element *elem;
+	const struct ieee80211_mgmt *mgmt = (void *)data;
+	unsigned int fixedlen = offsetof(struct ieee80211_mgmt,
+					 u.beacon.variable);
+
+	if (len < fixedlen)
+		goto err;
+
+	if (ieee80211_hdrlen(mgmt->frame_control) !=
+	    offsetof(struct ieee80211_mgmt, u.beacon))
+		goto err;
+
+	data += fixedlen;
+	len -= fixedlen;
+
+	for_each_element(elem, data, len) {
+		/* nothing */
+	}
+
+	if (for_each_element_completed(elem, data, len))
+		return 0;
+
+err:
+	NL_SET_ERR_MSG_ATTR(extack, attr, "malformed beacon head");
+	return -EINVAL;
+}
+
+static int validate_ie_attr(const struct nlattr *attr,
+			    struct netlink_ext_ack *extack)
+{
+	const u8 *data = nla_data(attr);
+	unsigned int len = nla_len(attr);
+	const struct element *elem;
+
+	for_each_element(elem, data, len) {
+		/* nothing */
+	}
+
+	if (for_each_element_completed(elem, data, len))
+		return 0;
+
+	NL_SET_ERR_MSG_ATTR(extack, attr, "malformed information elements");
+	return -EINVAL;
+}
+
 /* policy for the attributes */
-static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
+static const struct nla_policy
+nl80211_ftm_responder_policy[NL80211_FTM_RESP_ATTR_MAX + 1] = {
+	[NL80211_FTM_RESP_ATTR_ENABLED] = { .type = NLA_FLAG, },
+	[NL80211_FTM_RESP_ATTR_LCI] = { .type = NLA_BINARY,
+					.len = U8_MAX },
+	[NL80211_FTM_RESP_ATTR_CIVICLOC] = { .type = NLA_BINARY,
+					     .len = U8_MAX },
+};
+
+static const struct nla_policy
+nl80211_pmsr_ftm_req_attr_policy[NL80211_PMSR_FTM_REQ_ATTR_MAX + 1] = {
+	[NL80211_PMSR_FTM_REQ_ATTR_ASAP] = { .type = NLA_FLAG },
+	[NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE] = { .type = NLA_U32 },
+	[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP] =
+		NLA_POLICY_MAX(NLA_U8, 15),
+	[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD] = { .type = NLA_U16 },
+	[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION] =
+		NLA_POLICY_MAX(NLA_U8, 15),
+	[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST] =
+		NLA_POLICY_MAX(NLA_U8, 31),
+	[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES] = { .type = NLA_U8 },
+	[NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI] = { .type = NLA_FLAG },
+	[NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC] = { .type = NLA_FLAG },
+};
+
+static const struct nla_policy
+nl80211_pmsr_req_data_policy[NL80211_PMSR_TYPE_MAX + 1] = {
+	[NL80211_PMSR_TYPE_FTM] =
+		NLA_POLICY_NESTED(nl80211_pmsr_ftm_req_attr_policy),
+};
+
+static const struct nla_policy
+nl80211_pmsr_req_attr_policy[NL80211_PMSR_REQ_ATTR_MAX + 1] = {
+	[NL80211_PMSR_REQ_ATTR_DATA] =
+		NLA_POLICY_NESTED(nl80211_pmsr_req_data_policy),
+	[NL80211_PMSR_REQ_ATTR_GET_AP_TSF] = { .type = NLA_FLAG },
+};
+
+static const struct nla_policy
+nl80211_psmr_peer_attr_policy[NL80211_PMSR_PEER_ATTR_MAX + 1] = {
+	[NL80211_PMSR_PEER_ATTR_ADDR] = NLA_POLICY_ETH_ADDR,
+	/*
+	 * we could specify this again to be the top-level policy,
+	 * but that would open us up to recursion problems ...
+	 */
+	[NL80211_PMSR_PEER_ATTR_CHAN] = { .type = NLA_NESTED },
+	[NL80211_PMSR_PEER_ATTR_REQ] =
+		NLA_POLICY_NESTED(nl80211_pmsr_req_attr_policy),
+	[NL80211_PMSR_PEER_ATTR_RESP] = { .type = NLA_REJECT },
+};
+
+static const struct nla_policy
+nl80211_pmsr_attr_policy[NL80211_PMSR_ATTR_MAX + 1] = {
+	[NL80211_PMSR_ATTR_MAX_PEERS] = { .type = NLA_REJECT },
+	[NL80211_PMSR_ATTR_REPORT_AP_TSF] = { .type = NLA_REJECT },
+	[NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR] = { .type = NLA_REJECT },
+	[NL80211_PMSR_ATTR_TYPE_CAPA] = { .type = NLA_REJECT },
+	[NL80211_PMSR_ATTR_PEERS] =
+		NLA_POLICY_NESTED_ARRAY(nl80211_psmr_peer_attr_policy),
+};
+
+static const struct nla_policy
+he_obss_pd_policy[NL80211_HE_OBSS_PD_ATTR_MAX + 1] = {
+	[NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET] =
+		NLA_POLICY_RANGE(NLA_U8, 1, 20),
+	[NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET] =
+		NLA_POLICY_RANGE(NLA_U8, 1, 20),
+};
+
+const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
+	[0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD },
 	[NL80211_ATTR_WIPHY] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING,
 				      .len = 20-1 },
@@ -209,50 +330,70 @@
 
 	[NL80211_ATTR_WIPHY_FREQ] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_CHANNEL_TYPE] = { .type = NLA_U32 },
+	[NL80211_ATTR_WIPHY_EDMG_CHANNELS] = NLA_POLICY_RANGE(NLA_U8,
+						NL80211_EDMG_CHANNELS_MIN,
+						NL80211_EDMG_CHANNELS_MAX),
+	[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG] = NLA_POLICY_RANGE(NLA_U8,
+						NL80211_EDMG_BW_CONFIG_MIN,
+						NL80211_EDMG_BW_CONFIG_MAX),
+
 	[NL80211_ATTR_CHANNEL_WIDTH] = { .type = NLA_U32 },
 	[NL80211_ATTR_CENTER_FREQ1] = { .type = NLA_U32 },
 	[NL80211_ATTR_CENTER_FREQ2] = { .type = NLA_U32 },
 
-	[NL80211_ATTR_WIPHY_RETRY_SHORT] = { .type = NLA_U8 },
-	[NL80211_ATTR_WIPHY_RETRY_LONG] = { .type = NLA_U8 },
+	[NL80211_ATTR_WIPHY_RETRY_SHORT] = NLA_POLICY_MIN(NLA_U8, 1),
+	[NL80211_ATTR_WIPHY_RETRY_LONG] = NLA_POLICY_MIN(NLA_U8, 1),
 	[NL80211_ATTR_WIPHY_FRAG_THRESHOLD] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_RTS_THRESHOLD] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_COVERAGE_CLASS] = { .type = NLA_U8 },
 	[NL80211_ATTR_WIPHY_DYN_ACK] = { .type = NLA_FLAG },
 
-	[NL80211_ATTR_IFTYPE] = { .type = NLA_U32 },
+	[NL80211_ATTR_IFTYPE] = NLA_POLICY_MAX(NLA_U32, NL80211_IFTYPE_MAX),
 	[NL80211_ATTR_IFINDEX] = { .type = NLA_U32 },
 	[NL80211_ATTR_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ-1 },
 
-	[NL80211_ATTR_MAC] = { .len = ETH_ALEN },
-	[NL80211_ATTR_PREV_BSSID] = { .len = ETH_ALEN },
+	[NL80211_ATTR_MAC] = { .type = NLA_EXACT_LEN_WARN, .len = ETH_ALEN },
+	[NL80211_ATTR_PREV_BSSID] = {
+		.type = NLA_EXACT_LEN_WARN,
+		.len = ETH_ALEN
+	},
 
 	[NL80211_ATTR_KEY] = { .type = NLA_NESTED, },
 	[NL80211_ATTR_KEY_DATA] = { .type = NLA_BINARY,
 				    .len = WLAN_MAX_KEY_LEN },
-	[NL80211_ATTR_KEY_IDX] = { .type = NLA_U8 },
+	[NL80211_ATTR_KEY_IDX] = NLA_POLICY_MAX(NLA_U8, 5),
 	[NL80211_ATTR_KEY_CIPHER] = { .type = NLA_U32 },
 	[NL80211_ATTR_KEY_DEFAULT] = { .type = NLA_FLAG },
 	[NL80211_ATTR_KEY_SEQ] = { .type = NLA_BINARY, .len = 16 },
-	[NL80211_ATTR_KEY_TYPE] = { .type = NLA_U32 },
+	[NL80211_ATTR_KEY_TYPE] =
+		NLA_POLICY_MAX(NLA_U32, NUM_NL80211_KEYTYPES),
 
 	[NL80211_ATTR_BEACON_INTERVAL] = { .type = NLA_U32 },
 	[NL80211_ATTR_DTIM_PERIOD] = { .type = NLA_U32 },
-	[NL80211_ATTR_BEACON_HEAD] = { .type = NLA_BINARY,
-				       .len = IEEE80211_MAX_DATA_LEN },
-	[NL80211_ATTR_BEACON_TAIL] = { .type = NLA_BINARY,
-				       .len = IEEE80211_MAX_DATA_LEN },
-	[NL80211_ATTR_STA_AID] = { .type = NLA_U16 },
+	[NL80211_ATTR_BEACON_HEAD] =
+		NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_beacon_head,
+				       IEEE80211_MAX_DATA_LEN),
+	[NL80211_ATTR_BEACON_TAIL] =
+		NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_ie_attr,
+				       IEEE80211_MAX_DATA_LEN),
+	[NL80211_ATTR_STA_AID] =
+		NLA_POLICY_RANGE(NLA_U16, 1, IEEE80211_MAX_AID),
 	[NL80211_ATTR_STA_FLAGS] = { .type = NLA_NESTED },
 	[NL80211_ATTR_STA_LISTEN_INTERVAL] = { .type = NLA_U16 },
 	[NL80211_ATTR_STA_SUPPORTED_RATES] = { .type = NLA_BINARY,
 					       .len = NL80211_MAX_SUPP_RATES },
-	[NL80211_ATTR_STA_PLINK_ACTION] = { .type = NLA_U8 },
+	[NL80211_ATTR_STA_PLINK_ACTION] =
+		NLA_POLICY_MAX(NLA_U8, NUM_NL80211_PLINK_ACTIONS - 1),
+	[NL80211_ATTR_STA_TX_POWER_SETTING] =
+		NLA_POLICY_RANGE(NLA_U8,
+				 NL80211_TX_POWER_AUTOMATIC,
+				 NL80211_TX_POWER_FIXED),
+	[NL80211_ATTR_STA_TX_POWER] = { .type = NLA_S16 },
 	[NL80211_ATTR_STA_VLAN] = { .type = NLA_U32 },
 	[NL80211_ATTR_MNTR_FLAGS] = { /* NLA_NESTED can't be empty */ },
 	[NL80211_ATTR_MESH_ID] = { .type = NLA_BINARY,
 				   .len = IEEE80211_MAX_MESH_ID_LEN },
-	[NL80211_ATTR_MPATH_NEXT_HOP] = { .type = NLA_U32 },
+	[NL80211_ATTR_MPATH_NEXT_HOP] = NLA_POLICY_ETH_ADDR_COMPAT,
 
 	[NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 },
 	[NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED },
@@ -267,11 +408,15 @@
 	[NL80211_ATTR_MESH_CONFIG] = { .type = NLA_NESTED },
 	[NL80211_ATTR_SUPPORT_MESH_AUTH] = { .type = NLA_FLAG },
 
-	[NL80211_ATTR_HT_CAPABILITY] = { .len = NL80211_HT_CAPABILITY_LEN },
+	[NL80211_ATTR_HT_CAPABILITY] = {
+		.type = NLA_EXACT_LEN_WARN,
+		.len = NL80211_HT_CAPABILITY_LEN
+	},
 
 	[NL80211_ATTR_MGMT_SUBTYPE] = { .type = NLA_U8 },
-	[NL80211_ATTR_IE] = { .type = NLA_BINARY,
-			      .len = IEEE80211_MAX_DATA_LEN },
+	[NL80211_ATTR_IE] = NLA_POLICY_VALIDATE_FN(NLA_BINARY,
+						   validate_ie_attr,
+						   IEEE80211_MAX_DATA_LEN),
 	[NL80211_ATTR_SCAN_FREQUENCIES] = { .type = NLA_NESTED },
 	[NL80211_ATTR_SCAN_SSIDS] = { .type = NLA_NESTED },
 
@@ -281,7 +426,9 @@
 	[NL80211_ATTR_REASON_CODE] = { .type = NLA_U16 },
 	[NL80211_ATTR_FREQ_FIXED] = { .type = NLA_FLAG },
 	[NL80211_ATTR_TIMED_OUT] = { .type = NLA_FLAG },
-	[NL80211_ATTR_USE_MFP] = { .type = NLA_U32 },
+	[NL80211_ATTR_USE_MFP] = NLA_POLICY_RANGE(NLA_U32,
+						  NL80211_MFP_NO,
+						  NL80211_MFP_OPTIONAL),
 	[NL80211_ATTR_STA_FLAGS2] = {
 		.len = sizeof(struct nl80211_sta_flag_update),
 	},
@@ -294,14 +441,19 @@
 	[NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 },
 	[NL80211_ATTR_PID] = { .type = NLA_U32 },
 	[NL80211_ATTR_4ADDR] = { .type = NLA_U8 },
-	[NL80211_ATTR_PMKID] = { .len = WLAN_PMKID_LEN },
+	[NL80211_ATTR_PMKID] = {
+		.type = NLA_EXACT_LEN_WARN,
+		.len = WLAN_PMKID_LEN
+	},
 	[NL80211_ATTR_DURATION] = { .type = NLA_U32 },
 	[NL80211_ATTR_COOKIE] = { .type = NLA_U64 },
 	[NL80211_ATTR_TX_RATES] = { .type = NLA_NESTED },
 	[NL80211_ATTR_FRAME] = { .type = NLA_BINARY,
 				 .len = IEEE80211_MAX_DATA_LEN },
 	[NL80211_ATTR_FRAME_MATCH] = { .type = NLA_BINARY, },
-	[NL80211_ATTR_PS_STATE] = { .type = NLA_U32 },
+	[NL80211_ATTR_PS_STATE] = NLA_POLICY_RANGE(NLA_U32,
+						   NL80211_PS_DISABLED,
+						   NL80211_PS_ENABLED),
 	[NL80211_ATTR_CQM] = { .type = NLA_NESTED, },
 	[NL80211_ATTR_LOCAL_STATE_CHANGE] = { .type = NLA_FLAG },
 	[NL80211_ATTR_AP_ISOLATE] = { .type = NLA_U8 },
@@ -314,15 +466,23 @@
 	[NL80211_ATTR_OFFCHANNEL_TX_OK] = { .type = NLA_FLAG },
 	[NL80211_ATTR_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED },
 	[NL80211_ATTR_WOWLAN_TRIGGERS] = { .type = NLA_NESTED },
-	[NL80211_ATTR_STA_PLINK_STATE] = { .type = NLA_U8 },
+	[NL80211_ATTR_STA_PLINK_STATE] =
+		NLA_POLICY_MAX(NLA_U8, NUM_NL80211_PLINK_STATES - 1),
+	[NL80211_ATTR_MESH_PEER_AID] =
+		NLA_POLICY_RANGE(NLA_U16, 1, IEEE80211_MAX_AID),
 	[NL80211_ATTR_SCHED_SCAN_INTERVAL] = { .type = NLA_U32 },
 	[NL80211_ATTR_REKEY_DATA] = { .type = NLA_NESTED },
 	[NL80211_ATTR_SCAN_SUPP_RATES] = { .type = NLA_NESTED },
-	[NL80211_ATTR_HIDDEN_SSID] = { .type = NLA_U32 },
-	[NL80211_ATTR_IE_PROBE_RESP] = { .type = NLA_BINARY,
-					 .len = IEEE80211_MAX_DATA_LEN },
-	[NL80211_ATTR_IE_ASSOC_RESP] = { .type = NLA_BINARY,
-					 .len = IEEE80211_MAX_DATA_LEN },
+	[NL80211_ATTR_HIDDEN_SSID] =
+		NLA_POLICY_RANGE(NLA_U32,
+				 NL80211_HIDDEN_SSID_NOT_IN_USE,
+				 NL80211_HIDDEN_SSID_ZERO_CONTENTS),
+	[NL80211_ATTR_IE_PROBE_RESP] =
+		NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_ie_attr,
+				       IEEE80211_MAX_DATA_LEN),
+	[NL80211_ATTR_IE_ASSOC_RESP] =
+		NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_ie_attr,
+				       IEEE80211_MAX_DATA_LEN),
 	[NL80211_ATTR_ROAM_SUPPORT] = { .type = NLA_FLAG },
 	[NL80211_ATTR_SCHED_SCAN_MATCH] = { .type = NLA_NESTED },
 	[NL80211_ATTR_TX_NO_CCK_RATE] = { .type = NLA_FLAG },
@@ -346,11 +506,17 @@
 	[NL80211_ATTR_WDEV] = { .type = NLA_U64 },
 	[NL80211_ATTR_USER_REG_HINT_TYPE] = { .type = NLA_U32 },
 	[NL80211_ATTR_AUTH_DATA] = { .type = NLA_BINARY, },
-	[NL80211_ATTR_VHT_CAPABILITY] = { .len = NL80211_VHT_CAPABILITY_LEN },
+	[NL80211_ATTR_VHT_CAPABILITY] = {
+		.type = NLA_EXACT_LEN_WARN,
+		.len = NL80211_VHT_CAPABILITY_LEN
+	},
 	[NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 },
-	[NL80211_ATTR_P2P_CTWINDOW] = { .type = NLA_U8 },
-	[NL80211_ATTR_P2P_OPPPS] = { .type = NLA_U8 },
-	[NL80211_ATTR_LOCAL_MESH_POWER_MODE] = {. type = NLA_U32 },
+	[NL80211_ATTR_P2P_CTWINDOW] = NLA_POLICY_MAX(NLA_U8, 127),
+	[NL80211_ATTR_P2P_OPPPS] = NLA_POLICY_MAX(NLA_U8, 1),
+	[NL80211_ATTR_LOCAL_MESH_POWER_MODE] =
+		NLA_POLICY_RANGE(NLA_U32,
+				 NL80211_MESH_POWER_UNKNOWN + 1,
+				 NL80211_MESH_POWER_MAX),
 	[NL80211_ATTR_ACL_POLICY] = {. type = NLA_U32 },
 	[NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED },
 	[NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 },
@@ -363,7 +529,8 @@
 	[NL80211_ATTR_MDID] = { .type = NLA_U16 },
 	[NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY,
 				  .len = IEEE80211_MAX_DATA_LEN },
-	[NL80211_ATTR_PEER_AID] = { .type = NLA_U16 },
+	[NL80211_ATTR_PEER_AID] =
+		NLA_POLICY_RANGE(NLA_U16, 1, IEEE80211_MAX_AID),
 	[NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 },
 	[NL80211_ATTR_CH_SWITCH_BLOCK_TX] = { .type = NLA_FLAG },
 	[NL80211_ATTR_CSA_IES] = { .type = NLA_NESTED },
@@ -378,36 +545,50 @@
 	[NL80211_ATTR_VENDOR_DATA] = { .type = NLA_BINARY },
 	[NL80211_ATTR_QOS_MAP] = { .type = NLA_BINARY,
 				   .len = IEEE80211_QOS_MAP_LEN_MAX },
-	[NL80211_ATTR_MAC_HINT] = { .len = ETH_ALEN },
+	[NL80211_ATTR_MAC_HINT] = {
+		.type = NLA_EXACT_LEN_WARN,
+		.len = ETH_ALEN
+	},
 	[NL80211_ATTR_WIPHY_FREQ_HINT] = { .type = NLA_U32 },
 	[NL80211_ATTR_TDLS_PEER_CAPABILITY] = { .type = NLA_U32 },
 	[NL80211_ATTR_SOCKET_OWNER] = { .type = NLA_FLAG },
 	[NL80211_ATTR_CSA_C_OFFSETS_TX] = { .type = NLA_BINARY },
 	[NL80211_ATTR_USE_RRM] = { .type = NLA_FLAG },
-	[NL80211_ATTR_TSID] = { .type = NLA_U8 },
-	[NL80211_ATTR_USER_PRIO] = { .type = NLA_U8 },
+	[NL80211_ATTR_TSID] = NLA_POLICY_MAX(NLA_U8, IEEE80211_NUM_TIDS - 1),
+	[NL80211_ATTR_USER_PRIO] =
+		NLA_POLICY_MAX(NLA_U8, IEEE80211_NUM_UPS - 1),
 	[NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 },
 	[NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 },
-	[NL80211_ATTR_MAC_MASK] = { .len = ETH_ALEN },
+	[NL80211_ATTR_MAC_MASK] = {
+		.type = NLA_EXACT_LEN_WARN,
+		.len = ETH_ALEN
+	},
 	[NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG },
 	[NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 },
 	[NL80211_ATTR_SCHED_SCAN_DELAY] = { .type = NLA_U32 },
 	[NL80211_ATTR_REG_INDOOR] = { .type = NLA_FLAG },
 	[NL80211_ATTR_PBSS] = { .type = NLA_FLAG },
 	[NL80211_ATTR_BSS_SELECT] = { .type = NLA_NESTED },
-	[NL80211_ATTR_STA_SUPPORT_P2P_PS] = { .type = NLA_U8 },
+	[NL80211_ATTR_STA_SUPPORT_P2P_PS] =
+		NLA_POLICY_MAX(NLA_U8, NUM_NL80211_P2P_PS_STATUS - 1),
 	[NL80211_ATTR_MU_MIMO_GROUP_DATA] = {
 		.len = VHT_MUMIMO_GROUPS_DATA_LEN
 	},
-	[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = { .len = ETH_ALEN },
-	[NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 },
+	[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = {
+		.type = NLA_EXACT_LEN_WARN,
+		.len = ETH_ALEN
+	},
+	[NL80211_ATTR_NAN_MASTER_PREF] = NLA_POLICY_MIN(NLA_U8, 1),
 	[NL80211_ATTR_BANDS] = { .type = NLA_U32 },
 	[NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED },
 	[NL80211_ATTR_FILS_KEK] = { .type = NLA_BINARY,
 				    .len = FILS_MAX_KEK_LEN },
-	[NL80211_ATTR_FILS_NONCES] = { .len = 2 * FILS_NONCE_LEN },
+	[NL80211_ATTR_FILS_NONCES] = {
+		.type = NLA_EXACT_LEN_WARN,
+		.len = 2 * FILS_NONCE_LEN
+	},
 	[NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED] = { .type = NLA_FLAG, },
-	[NL80211_ATTR_BSSID] = { .len = ETH_ALEN },
+	[NL80211_ATTR_BSSID] = { .type = NLA_EXACT_LEN_WARN, .len = ETH_ALEN },
 	[NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI] = { .type = NLA_S8 },
 	[NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST] = {
 		.len = sizeof(struct nl80211_bss_select_rssi_adjust)
@@ -420,7 +601,7 @@
 	[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] = { .type = NLA_U16 },
 	[NL80211_ATTR_FILS_ERP_RRK] = { .type = NLA_BINARY,
 					.len = FILS_ERP_MAX_RRK_LEN },
-	[NL80211_ATTR_FILS_CACHE_ID] = { .len = 2 },
+	[NL80211_ATTR_FILS_CACHE_ID] = { .type = NLA_EXACT_LEN_WARN, .len = 2 },
 	[NL80211_ATTR_PMK] = { .type = NLA_BINARY, .len = PMK_MAX_LEN },
 	[NL80211_ATTR_SCHED_SCAN_MULTI] = { .type = NLA_FLAG },
 	[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT] = { .type = NLA_FLAG },
@@ -430,6 +611,19 @@
 	[NL80211_ATTR_TXQ_QUANTUM] = { .type = NLA_U32 },
 	[NL80211_ATTR_HE_CAPABILITY] = { .type = NLA_BINARY,
 					 .len = NL80211_HE_MAX_CAPABILITY_LEN },
+
+	[NL80211_ATTR_FTM_RESPONDER] = {
+		.type = NLA_NESTED,
+		.validation_data = nl80211_ftm_responder_policy,
+	},
+	[NL80211_ATTR_TIMEOUT] = NLA_POLICY_MIN(NLA_U32, 1),
+	[NL80211_ATTR_PEER_MEASUREMENTS] =
+		NLA_POLICY_NESTED(nl80211_pmsr_attr_policy),
+	[NL80211_ATTR_AIRTIME_WEIGHT] = NLA_POLICY_MIN(NLA_U16, 1),
+	[NL80211_ATTR_SAE_PASSWORD] = { .type = NLA_BINARY,
+					.len = SAE_PASSWORD_MAX_LEN },
+	[NL80211_ATTR_TWT_RESPONDER] = { .type = NLA_FLAG },
+	[NL80211_ATTR_HE_OBSS_PD] = NLA_POLICY_NESTED(he_obss_pd_policy),
 };
 
 /* policy for the key attributes */
@@ -440,8 +634,9 @@
 	[NL80211_KEY_SEQ] = { .type = NLA_BINARY, .len = 16 },
 	[NL80211_KEY_DEFAULT] = { .type = NLA_FLAG },
 	[NL80211_KEY_DEFAULT_MGMT] = { .type = NLA_FLAG },
-	[NL80211_KEY_TYPE] = { .type = NLA_U32 },
+	[NL80211_KEY_TYPE] = NLA_POLICY_MAX(NLA_U32, NUM_NL80211_KEYTYPES - 1),
 	[NL80211_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED },
+	[NL80211_KEY_MODE] = NLA_POLICY_RANGE(NLA_U8, 0, NL80211_KEY_SET_TX),
 };
 
 /* policy for the key default flags */
@@ -471,10 +666,13 @@
 nl80211_wowlan_tcp_policy[NUM_NL80211_WOWLAN_TCP] = {
 	[NL80211_WOWLAN_TCP_SRC_IPV4] = { .type = NLA_U32 },
 	[NL80211_WOWLAN_TCP_DST_IPV4] = { .type = NLA_U32 },
-	[NL80211_WOWLAN_TCP_DST_MAC] = { .len = ETH_ALEN },
+	[NL80211_WOWLAN_TCP_DST_MAC] = {
+		.type = NLA_EXACT_LEN_WARN,
+		.len = ETH_ALEN
+	},
 	[NL80211_WOWLAN_TCP_SRC_PORT] = { .type = NLA_U16 },
 	[NL80211_WOWLAN_TCP_DST_PORT] = { .type = NLA_U16 },
-	[NL80211_WOWLAN_TCP_DATA_PAYLOAD] = { .len = 1 },
+	[NL80211_WOWLAN_TCP_DATA_PAYLOAD] = { .type = NLA_MIN_LEN, .len = 1 },
 	[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ] = {
 		.len = sizeof(struct nl80211_wowlan_tcp_data_seq)
 	},
@@ -482,8 +680,8 @@
 		.len = sizeof(struct nl80211_wowlan_tcp_data_token)
 	},
 	[NL80211_WOWLAN_TCP_DATA_INTERVAL] = { .type = NLA_U32 },
-	[NL80211_WOWLAN_TCP_WAKE_PAYLOAD] = { .len = 1 },
-	[NL80211_WOWLAN_TCP_WAKE_MASK] = { .len = 1 },
+	[NL80211_WOWLAN_TCP_WAKE_PAYLOAD] = { .type = NLA_MIN_LEN, .len = 1 },
+	[NL80211_WOWLAN_TCP_WAKE_MASK] = { .type = NLA_MIN_LEN, .len = 1 },
 };
 #endif /* CONFIG_PM */
 
@@ -491,24 +689,49 @@
 static const struct nla_policy
 nl80211_coalesce_policy[NUM_NL80211_ATTR_COALESCE_RULE] = {
 	[NL80211_ATTR_COALESCE_RULE_DELAY] = { .type = NLA_U32 },
-	[NL80211_ATTR_COALESCE_RULE_CONDITION] = { .type = NLA_U32 },
+	[NL80211_ATTR_COALESCE_RULE_CONDITION] =
+		NLA_POLICY_RANGE(NLA_U32,
+				 NL80211_COALESCE_CONDITION_MATCH,
+				 NL80211_COALESCE_CONDITION_NO_MATCH),
 	[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN] = { .type = NLA_NESTED },
 };
 
 /* policy for GTK rekey offload attributes */
 static const struct nla_policy
 nl80211_rekey_policy[NUM_NL80211_REKEY_DATA] = {
-	[NL80211_REKEY_DATA_KEK] = { .len = NL80211_KEK_LEN },
-	[NL80211_REKEY_DATA_KCK] = { .len = NL80211_KCK_LEN },
-	[NL80211_REKEY_DATA_REPLAY_CTR] = { .len = NL80211_REPLAY_CTR_LEN },
+	[NL80211_REKEY_DATA_KEK] = {
+		.type = NLA_EXACT_LEN_WARN,
+		.len = NL80211_KEK_LEN,
+	},
+	[NL80211_REKEY_DATA_KCK] = {
+		.type = NLA_EXACT_LEN_WARN,
+		.len = NL80211_KCK_LEN,
+	},
+	[NL80211_REKEY_DATA_REPLAY_CTR] = {
+		.type = NLA_EXACT_LEN_WARN,
+		.len = NL80211_REPLAY_CTR_LEN
+	},
+};
+
+static const struct nla_policy
+nl80211_match_band_rssi_policy[NUM_NL80211_BANDS] = {
+	[NL80211_BAND_2GHZ] = { .type = NLA_S32 },
+	[NL80211_BAND_5GHZ] = { .type = NLA_S32 },
+	[NL80211_BAND_6GHZ] = { .type = NLA_S32 },
+	[NL80211_BAND_60GHZ] = { .type = NLA_S32 },
 };
 
 static const struct nla_policy
 nl80211_match_policy[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1] = {
 	[NL80211_SCHED_SCAN_MATCH_ATTR_SSID] = { .type = NLA_BINARY,
 						 .len = IEEE80211_MAX_SSID_LEN },
-	[NL80211_SCHED_SCAN_MATCH_ATTR_BSSID] = { .len = ETH_ALEN },
+	[NL80211_SCHED_SCAN_MATCH_ATTR_BSSID] = {
+		.type = NLA_EXACT_LEN_WARN,
+		.len = ETH_ALEN
+	},
 	[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI] = { .type = NLA_U32 },
+	[NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI] =
+		NLA_POLICY_NESTED(nl80211_match_band_rssi_policy),
 };
 
 static const struct nla_policy
@@ -537,7 +760,10 @@
 	[NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE] = { .type = NLA_FLAG },
 	[NL80211_NAN_FUNC_FOLLOW_UP_ID] = { .type = NLA_U8 },
 	[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] = { .type = NLA_U8 },
-	[NL80211_NAN_FUNC_FOLLOW_UP_DEST] = { .len = ETH_ALEN },
+	[NL80211_NAN_FUNC_FOLLOW_UP_DEST] = {
+		.type = NLA_EXACT_LEN_WARN,
+		.len = ETH_ALEN
+	},
 	[NL80211_NAN_FUNC_CLOSE_RANGE] = { .type = NLA_FLAG },
 	[NL80211_NAN_FUNC_TTL] = { .type = NLA_U32 },
 	[NL80211_NAN_FUNC_SERVICE_INFO] = { .type = NLA_BINARY,
@@ -567,23 +793,32 @@
 	[NL80211_PKTPAT_OFFSET] = { .type = NLA_U32 },
 };
 
-static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
-				     struct netlink_callback *cb,
-				     struct cfg80211_registered_device **rdev,
-				     struct wireless_dev **wdev)
+int nl80211_prepare_wdev_dump(struct netlink_callback *cb,
+			      struct cfg80211_registered_device **rdev,
+			      struct wireless_dev **wdev)
 {
 	int err;
 
 	if (!cb->args[0]) {
-		err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
-				  genl_family_attrbuf(&nl80211_fam),
-				  nl80211_fam.maxattr, nl80211_policy, NULL);
-		if (err)
-			return err;
+		struct nlattr **attrbuf;
 
-		*wdev = __cfg80211_wdev_from_attrs(
-					sock_net(skb->sk),
-					genl_family_attrbuf(&nl80211_fam));
+		attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf),
+				  GFP_KERNEL);
+		if (!attrbuf)
+			return -ENOMEM;
+
+		err = nlmsg_parse_deprecated(cb->nlh,
+					     GENL_HDRLEN + nl80211_fam.hdrsize,
+					     attrbuf, nl80211_fam.maxattr,
+					     nl80211_policy, NULL);
+		if (err) {
+			kfree(attrbuf);
+			return err;
+		}
+
+		*wdev = __cfg80211_wdev_from_attrs(sock_net(cb->skb->sk),
+						   attrbuf);
+		kfree(attrbuf);
 		if (IS_ERR(*wdev))
 			return PTR_ERR(*wdev);
 		*rdev = wiphy_to_rdev((*wdev)->wiphy);
@@ -614,39 +849,9 @@
 	return 0;
 }
 
-/* IE validation */
-static bool is_valid_ie_attr(const struct nlattr *attr)
-{
-	const u8 *pos;
-	int len;
-
-	if (!attr)
-		return true;
-
-	pos = nla_data(attr);
-	len = nla_len(attr);
-
-	while (len) {
-		u8 elemlen;
-
-		if (len < 2)
-			return false;
-		len -= 2;
-
-		elemlen = pos[1];
-		if (elemlen > len)
-			return false;
-
-		len -= elemlen;
-		pos += 2 + elemlen;
-	}
-
-	return true;
-}
-
 /* message building helper */
-static inline void *nl80211hdr_put(struct sk_buff *skb, u32 portid, u32 seq,
-				   int flags, u8 cmd)
+void *nl80211hdr_put(struct sk_buff *skb, u32 portid, u32 seq,
+		     int flags, u8 cmd)
 {
 	/* since there is no private header just add the generic one */
 	return genlmsg_put(skb, portid, seq, &nl80211_fam, flags, cmd);
@@ -657,13 +862,13 @@
 {
 	int j;
 	struct nlattr *nl_wmm_rules =
-		nla_nest_start(msg, NL80211_FREQUENCY_ATTR_WMM);
+		nla_nest_start_noflag(msg, NL80211_FREQUENCY_ATTR_WMM);
 
 	if (!nl_wmm_rules)
 		goto nla_put_failure;
 
 	for (j = 0; j < IEEE80211_NUM_ACS; j++) {
-		struct nlattr *nl_wmm_rule = nla_nest_start(msg, j);
+		struct nlattr *nl_wmm_rule = nla_nest_start_noflag(msg, j);
 
 		if (!nl_wmm_rule)
 			goto nla_put_failure;
@@ -792,7 +997,7 @@
 		return false;						  \
 	} while (0)
 
-	txqattr = nla_nest_start(msg, attrtype);
+	txqattr = nla_nest_start_noflag(msg, attrtype);
 	if (!txqattr)
 		return false;
 
@@ -827,8 +1032,9 @@
 				 struct key_parse *k)
 {
 	struct nlattr *tb[NL80211_KEY_MAX + 1];
-	int err = nla_parse_nested(tb, NL80211_KEY_MAX, key,
-				   nl80211_key_policy, info->extack);
+	int err = nla_parse_nested_deprecated(tb, NL80211_KEY_MAX, key,
+					      nl80211_key_policy,
+					      info->extack);
 	if (err)
 		return err;
 
@@ -858,20 +1064,17 @@
 	if (tb[NL80211_KEY_CIPHER])
 		k->p.cipher = nla_get_u32(tb[NL80211_KEY_CIPHER]);
 
-	if (tb[NL80211_KEY_TYPE]) {
+	if (tb[NL80211_KEY_TYPE])
 		k->type = nla_get_u32(tb[NL80211_KEY_TYPE]);
-		if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES)
-			return genl_err_attr(info, -EINVAL,
-					     tb[NL80211_KEY_TYPE]);
-	}
 
 	if (tb[NL80211_KEY_DEFAULT_TYPES]) {
 		struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES];
 
-		err = nla_parse_nested(kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1,
-				       tb[NL80211_KEY_DEFAULT_TYPES],
-				       nl80211_key_default_policy,
-				       info->extack);
+		err = nla_parse_nested_deprecated(kdt,
+						  NUM_NL80211_KEY_DEFAULT_TYPES - 1,
+						  tb[NL80211_KEY_DEFAULT_TYPES],
+						  nl80211_key_default_policy,
+						  info->extack);
 		if (err)
 			return err;
 
@@ -879,6 +1082,9 @@
 		k->def_multi = kdt[NL80211_KEY_DEFAULT_TYPE_MULTICAST];
 	}
 
+	if (tb[NL80211_KEY_MODE])
+		k->p.mode = nla_get_u8(tb[NL80211_KEY_MODE]);
+
 	return 0;
 }
 
@@ -910,21 +1116,16 @@
 	if (k->defmgmt)
 		k->def_multi = true;
 
-	if (info->attrs[NL80211_ATTR_KEY_TYPE]) {
+	if (info->attrs[NL80211_ATTR_KEY_TYPE])
 		k->type = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]);
-		if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES) {
-			GENL_SET_ERR_MSG(info, "key type out of range");
-			return -EINVAL;
-		}
-	}
 
 	if (info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES]) {
 		struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES];
-		int err = nla_parse_nested(kdt,
-					   NUM_NL80211_KEY_DEFAULT_TYPES - 1,
-					   info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES],
-					   nl80211_key_default_policy,
-					   info->extack);
+		int err = nla_parse_nested_deprecated(kdt,
+						      NUM_NL80211_KEY_DEFAULT_TYPES - 1,
+						      info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES],
+						      nl80211_key_default_policy,
+						      info->extack);
 		if (err)
 			return err;
 
@@ -1113,7 +1314,7 @@
 
 static int nl80211_put_iftypes(struct sk_buff *msg, u32 attr, u16 ifmodes)
 {
-	struct nlattr *nl_modes = nla_nest_start(msg, attr);
+	struct nlattr *nl_modes = nla_nest_start_noflag(msg, attr);
 	int i;
 
 	if (!nl_modes)
@@ -1141,8 +1342,8 @@
 	struct nlattr *nl_combis;
 	int i, j;
 
-	nl_combis = nla_nest_start(msg,
-				NL80211_ATTR_INTERFACE_COMBINATIONS);
+	nl_combis = nla_nest_start_noflag(msg,
+					  NL80211_ATTR_INTERFACE_COMBINATIONS);
 	if (!nl_combis)
 		goto nla_put_failure;
 
@@ -1152,18 +1353,19 @@
 
 		c = &wiphy->iface_combinations[i];
 
-		nl_combi = nla_nest_start(msg, i + 1);
+		nl_combi = nla_nest_start_noflag(msg, i + 1);
 		if (!nl_combi)
 			goto nla_put_failure;
 
-		nl_limits = nla_nest_start(msg, NL80211_IFACE_COMB_LIMITS);
+		nl_limits = nla_nest_start_noflag(msg,
+						  NL80211_IFACE_COMB_LIMITS);
 		if (!nl_limits)
 			goto nla_put_failure;
 
 		for (j = 0; j < c->n_limits; j++) {
 			struct nlattr *nl_limit;
 
-			nl_limit = nla_nest_start(msg, j + 1);
+			nl_limit = nla_nest_start_noflag(msg, j + 1);
 			if (!nl_limit)
 				goto nla_put_failure;
 			if (nla_put_u32(msg, NL80211_IFACE_LIMIT_MAX,
@@ -1216,7 +1418,8 @@
 	if (!tcp)
 		return 0;
 
-	nl_tcp = nla_nest_start(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION);
+	nl_tcp = nla_nest_start_noflag(msg,
+				       NL80211_WOWLAN_TRIG_TCP_CONNECTION);
 	if (!nl_tcp)
 		return -ENOBUFS;
 
@@ -1256,7 +1459,8 @@
 	if (!rdev->wiphy.wowlan)
 		return 0;
 
-	nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED);
+	nl_wowlan = nla_nest_start_noflag(msg,
+					  NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED);
 	if (!nl_wowlan)
 		return -ENOBUFS;
 
@@ -1385,7 +1589,8 @@
 
 	if (sband->n_iftype_data) {
 		struct nlattr *nl_iftype_data =
-			nla_nest_start(msg, NL80211_BAND_ATTR_IFTYPE_DATA);
+			nla_nest_start_noflag(msg,
+					      NL80211_BAND_ATTR_IFTYPE_DATA);
 		int err;
 
 		if (!nl_iftype_data)
@@ -1394,7 +1599,7 @@
 		for (i = 0; i < sband->n_iftype_data; i++) {
 			struct nlattr *iftdata;
 
-			iftdata = nla_nest_start(msg, i + 1);
+			iftdata = nla_nest_start_noflag(msg, i + 1);
 			if (!iftdata)
 				return -ENOBUFS;
 
@@ -1409,13 +1614,22 @@
 		nla_nest_end(msg, nl_iftype_data);
 	}
 
+	/* add EDMG info */
+	if (sband->edmg_cap.channels &&
+	    (nla_put_u8(msg, NL80211_BAND_ATTR_EDMG_CHANNELS,
+		       sband->edmg_cap.channels) ||
+	    nla_put_u8(msg, NL80211_BAND_ATTR_EDMG_BW_CONFIG,
+		       sband->edmg_cap.bw_config)))
+
+		return -ENOBUFS;
+
 	/* add bitrates */
-	nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES);
+	nl_rates = nla_nest_start_noflag(msg, NL80211_BAND_ATTR_RATES);
 	if (!nl_rates)
 		return -ENOBUFS;
 
 	for (i = 0; i < sband->n_bitrates; i++) {
-		nl_rate = nla_nest_start(msg, i);
+		nl_rate = nla_nest_start_noflag(msg, i);
 		if (!nl_rate)
 			return -ENOBUFS;
 
@@ -1448,12 +1662,12 @@
 	if (!mgmt_stypes)
 		return 0;
 
-	nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES);
+	nl_ifs = nla_nest_start_noflag(msg, NL80211_ATTR_TX_FRAME_TYPES);
 	if (!nl_ifs)
 		return -ENOBUFS;
 
 	for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
-		nl_ftypes = nla_nest_start(msg, ift);
+		nl_ftypes = nla_nest_start_noflag(msg, ift);
 		if (!nl_ftypes)
 			return -ENOBUFS;
 		i = 0;
@@ -1471,12 +1685,12 @@
 
 	nla_nest_end(msg, nl_ifs);
 
-	nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES);
+	nl_ifs = nla_nest_start_noflag(msg, NL80211_ATTR_RX_FRAME_TYPES);
 	if (!nl_ifs)
 		return -ENOBUFS;
 
 	for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
-		nl_ftypes = nla_nest_start(msg, ift);
+		nl_ftypes = nla_nest_start_noflag(msg, ift);
 		if (!nl_ftypes)
 			return -ENOBUFS;
 		i = 0;
@@ -1585,6 +1799,91 @@
 	return -ENOBUFS;
 }
 
+static int
+nl80211_send_pmsr_ftm_capa(const struct cfg80211_pmsr_capabilities *cap,
+			   struct sk_buff *msg)
+{
+	struct nlattr *ftm;
+
+	if (!cap->ftm.supported)
+		return 0;
+
+	ftm = nla_nest_start_noflag(msg, NL80211_PMSR_TYPE_FTM);
+	if (!ftm)
+		return -ENOBUFS;
+
+	if (cap->ftm.asap && nla_put_flag(msg, NL80211_PMSR_FTM_CAPA_ATTR_ASAP))
+		return -ENOBUFS;
+	if (cap->ftm.non_asap &&
+	    nla_put_flag(msg, NL80211_PMSR_FTM_CAPA_ATTR_NON_ASAP))
+		return -ENOBUFS;
+	if (cap->ftm.request_lci &&
+	    nla_put_flag(msg, NL80211_PMSR_FTM_CAPA_ATTR_REQ_LCI))
+		return -ENOBUFS;
+	if (cap->ftm.request_civicloc &&
+	    nla_put_flag(msg, NL80211_PMSR_FTM_CAPA_ATTR_REQ_CIVICLOC))
+		return -ENOBUFS;
+	if (nla_put_u32(msg, NL80211_PMSR_FTM_CAPA_ATTR_PREAMBLES,
+			cap->ftm.preambles))
+		return -ENOBUFS;
+	if (nla_put_u32(msg, NL80211_PMSR_FTM_CAPA_ATTR_BANDWIDTHS,
+			cap->ftm.bandwidths))
+		return -ENOBUFS;
+	if (cap->ftm.max_bursts_exponent >= 0 &&
+	    nla_put_u32(msg, NL80211_PMSR_FTM_CAPA_ATTR_MAX_BURSTS_EXPONENT,
+			cap->ftm.max_bursts_exponent))
+		return -ENOBUFS;
+	if (cap->ftm.max_ftms_per_burst &&
+	    nla_put_u32(msg, NL80211_PMSR_FTM_CAPA_ATTR_MAX_FTMS_PER_BURST,
+			cap->ftm.max_ftms_per_burst))
+		return -ENOBUFS;
+
+	nla_nest_end(msg, ftm);
+	return 0;
+}
+
+static int nl80211_send_pmsr_capa(struct cfg80211_registered_device *rdev,
+				  struct sk_buff *msg)
+{
+	const struct cfg80211_pmsr_capabilities *cap = rdev->wiphy.pmsr_capa;
+	struct nlattr *pmsr, *caps;
+
+	if (!cap)
+		return 0;
+
+	/*
+	 * we don't need to clean up anything here since the caller
+	 * will genlmsg_cancel() if we fail
+	 */
+
+	pmsr = nla_nest_start_noflag(msg, NL80211_ATTR_PEER_MEASUREMENTS);
+	if (!pmsr)
+		return -ENOBUFS;
+
+	if (nla_put_u32(msg, NL80211_PMSR_ATTR_MAX_PEERS, cap->max_peers))
+		return -ENOBUFS;
+
+	if (cap->report_ap_tsf &&
+	    nla_put_flag(msg, NL80211_PMSR_ATTR_REPORT_AP_TSF))
+		return -ENOBUFS;
+
+	if (cap->randomize_mac_addr &&
+	    nla_put_flag(msg, NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR))
+		return -ENOBUFS;
+
+	caps = nla_nest_start_noflag(msg, NL80211_PMSR_ATTR_TYPE_CAPA);
+	if (!caps)
+		return -ENOBUFS;
+
+	if (nl80211_send_pmsr_ftm_capa(cap, msg))
+		return -ENOBUFS;
+
+	nla_nest_end(msg, caps);
+	nla_nest_end(msg, pmsr);
+
+	return 0;
+}
+
 struct nl80211_dump_wiphy_state {
 	s64 filter_wiphy;
 	long start;
@@ -1676,6 +1975,7 @@
 		state->split_start++;
 		if (state->split)
 			break;
+		/* fall through */
 	case 1:
 		if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES,
 			    sizeof(u32) * rdev->wiphy.n_cipher_suites,
@@ -1722,6 +2022,7 @@
 		state->split_start++;
 		if (state->split)
 			break;
+		/* fall through */
 	case 2:
 		if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES,
 					rdev->wiphy.interface_modes))
@@ -1729,8 +2030,10 @@
 		state->split_start++;
 		if (state->split)
 			break;
+		/* fall through */
 	case 3:
-		nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS);
+		nl_bands = nla_nest_start_noflag(msg,
+						 NL80211_ATTR_WIPHY_BANDS);
 		if (!nl_bands)
 			goto nla_put_failure;
 
@@ -1743,7 +2046,7 @@
 			if (!sband)
 				continue;
 
-			nl_band = nla_nest_start(msg, band);
+			nl_band = nla_nest_start_noflag(msg, band);
 			if (!nl_band)
 				goto nla_put_failure;
 
@@ -1754,17 +2057,19 @@
 				state->chan_start++;
 				if (state->split)
 					break;
+				/* fall through */
 			default:
 				/* add frequencies */
-				nl_freqs = nla_nest_start(
-					msg, NL80211_BAND_ATTR_FREQS);
+				nl_freqs = nla_nest_start_noflag(msg,
+								 NL80211_BAND_ATTR_FREQS);
 				if (!nl_freqs)
 					goto nla_put_failure;
 
 				for (i = state->chan_start - 1;
 				     i < sband->n_channels;
 				     i++) {
-					nl_freq = nla_nest_start(msg, i);
+					nl_freq = nla_nest_start_noflag(msg,
+									i);
 					if (!nl_freq)
 						goto nla_put_failure;
 
@@ -1807,8 +2112,10 @@
 			state->split_start++;
 		if (state->split)
 			break;
+		/* fall through */
 	case 4:
-		nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS);
+		nl_cmds = nla_nest_start_noflag(msg,
+						NL80211_ATTR_SUPPORTED_COMMANDS);
 		if (!nl_cmds)
 			goto nla_put_failure;
 
@@ -1826,6 +2133,7 @@
 				CMD(add_tx_ts, ADD_TX_TS);
 			CMD(set_multicast_to_unicast, SET_MULTICAST_TO_UNICAST);
 			CMD(update_connect_params, UPDATE_CONNECT_PARAMS);
+			CMD(update_ft_ies, UPDATE_FT_IES);
 		}
 #undef CMD
 
@@ -1833,6 +2141,7 @@
 		state->split_start++;
 		if (state->split)
 			break;
+		/* fall through */
 	case 5:
 		if (rdev->ops->remain_on_channel &&
 		    (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) &&
@@ -1850,6 +2159,7 @@
 		state->split_start++;
 		if (state->split)
 			break;
+		/* fall through */
 	case 6:
 #ifdef CONFIG_PM
 		if (nl80211_send_wowlan(msg, rdev, state->split))
@@ -1860,6 +2170,7 @@
 #else
 		state->split_start++;
 #endif
+		/* fall through */
 	case 7:
 		if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES,
 					rdev->wiphy.software_iftypes))
@@ -1872,6 +2183,7 @@
 		state->split_start++;
 		if (state->split)
 			break;
+		/* fall through */
 	case 8:
 		if ((rdev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) &&
 		    nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME,
@@ -1929,6 +2241,30 @@
 			    rdev->wiphy.vht_capa_mod_mask))
 			goto nla_put_failure;
 
+		if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN,
+			    rdev->wiphy.perm_addr))
+			goto nla_put_failure;
+
+		if (!is_zero_ether_addr(rdev->wiphy.addr_mask) &&
+		    nla_put(msg, NL80211_ATTR_MAC_MASK, ETH_ALEN,
+			    rdev->wiphy.addr_mask))
+			goto nla_put_failure;
+
+		if (rdev->wiphy.n_addresses > 1) {
+			void *attr;
+
+			attr = nla_nest_start(msg, NL80211_ATTR_MAC_ADDRS);
+			if (!attr)
+				goto nla_put_failure;
+
+			for (i = 0; i < rdev->wiphy.n_addresses; i++)
+				if (nla_put(msg, i + 1, ETH_ALEN,
+					    rdev->wiphy.addresses[i].addr))
+					goto nla_put_failure;
+
+			nla_nest_end(msg, attr);
+		}
+
 		state->split_start++;
 		break;
 	case 10:
@@ -1952,7 +2288,8 @@
 			const struct nl80211_vendor_cmd_info *info;
 			struct nlattr *nested;
 
-			nested = nla_nest_start(msg, NL80211_ATTR_VENDOR_DATA);
+			nested = nla_nest_start_noflag(msg,
+						       NL80211_ATTR_VENDOR_DATA);
 			if (!nested)
 				goto nla_put_failure;
 
@@ -1968,8 +2305,8 @@
 			const struct nl80211_vendor_cmd_info *info;
 			struct nlattr *nested;
 
-			nested = nla_nest_start(msg,
-						NL80211_ATTR_VENDOR_EVENTS);
+			nested = nla_nest_start_noflag(msg,
+						       NL80211_ATTR_VENDOR_EVENTS);
 			if (!nested)
 				goto nla_put_failure;
 
@@ -2006,7 +2343,8 @@
 			struct nlattr *nested;
 			u32 bss_select_support = rdev->wiphy.bss_select_support;
 
-			nested = nla_nest_start(msg, NL80211_ATTR_BSS_SELECT);
+			nested = nla_nest_start_noflag(msg,
+						       NL80211_ATTR_BSS_SELECT);
 			if (!nested)
 				goto nla_put_failure;
 
@@ -2028,8 +2366,8 @@
 		    rdev->wiphy.iftype_ext_capab) {
 			struct nlattr *nested_ext_capab, *nested;
 
-			nested = nla_nest_start(msg,
-						NL80211_ATTR_IFTYPE_EXT_CAPA);
+			nested = nla_nest_start_noflag(msg,
+						       NL80211_ATTR_IFTYPE_EXT_CAPA);
 			if (!nested)
 				goto nla_put_failure;
 
@@ -2039,7 +2377,8 @@
 
 				capab = &rdev->wiphy.iftype_ext_capab[i];
 
-				nested_ext_capab = nla_nest_start(msg, i);
+				nested_ext_capab = nla_nest_start_noflag(msg,
+									 i);
 				if (!nested_ext_capab ||
 				    nla_put_u32(msg, NL80211_ATTR_IFTYPE,
 						capab->iftype) ||
@@ -2088,6 +2427,21 @@
 				goto nla_put_failure;
 		}
 
+		state->split_start++;
+		break;
+	case 14:
+		if (nl80211_send_pmsr_capa(rdev, msg))
+			goto nla_put_failure;
+
+		state->split_start++;
+		break;
+	case 15:
+		if (rdev->wiphy.akm_suites &&
+		    nla_put(msg, NL80211_ATTR_AKM_SUITES,
+			    sizeof(u32) * rdev->wiphy.n_akm_suites,
+			    rdev->wiphy.akm_suites))
+			goto nla_put_failure;
+
 		/* done */
 		state->split_start = 0;
 		break;
@@ -2105,12 +2459,21 @@
 				    struct netlink_callback *cb,
 				    struct nl80211_dump_wiphy_state *state)
 {
-	struct nlattr **tb = genl_family_attrbuf(&nl80211_fam);
-	int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, tb,
-			      nl80211_fam.maxattr, nl80211_policy, NULL);
+	struct nlattr **tb = kcalloc(NUM_NL80211_ATTR, sizeof(*tb), GFP_KERNEL);
+	int ret;
+
+	if (!tb)
+		return -ENOMEM;
+
+	ret = nlmsg_parse_deprecated(cb->nlh,
+				     GENL_HDRLEN + nl80211_fam.hdrsize,
+				     tb, nl80211_fam.maxattr,
+				     nl80211_policy, NULL);
 	/* ignore parse errors for backward compatibility */
-	if (ret)
-		return 0;
+	if (ret) {
+		ret = 0;
+		goto out;
+	}
 
 	state->split = tb[NL80211_ATTR_SPLIT_WIPHY_DUMP];
 	if (tb[NL80211_ATTR_WIPHY])
@@ -2123,8 +2486,10 @@
 		int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]);
 
 		netdev = __dev_get_by_index(sock_net(skb->sk), ifidx);
-		if (!netdev)
-			return -ENODEV;
+		if (!netdev) {
+			ret = -ENODEV;
+			goto out;
+		}
 		if (netdev->ieee80211_ptr) {
 			rdev = wiphy_to_rdev(
 				netdev->ieee80211_ptr->wiphy);
@@ -2132,7 +2497,10 @@
 		}
 	}
 
-	return 0;
+	ret = 0;
+out:
+	kfree(tb);
+	return ret;
 }
 
 static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
@@ -2288,16 +2656,20 @@
 		wdev->iftype == NL80211_IFTYPE_P2P_GO;
 }
 
-static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
-				 struct genl_info *info,
-				 struct cfg80211_chan_def *chandef)
+int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
+			  struct genl_info *info,
+			  struct cfg80211_chan_def *chandef)
 {
+	struct netlink_ext_ack *extack = info->extack;
+	struct nlattr **attrs = info->attrs;
 	u32 control_freq;
 
-	if (!info->attrs[NL80211_ATTR_WIPHY_FREQ])
+	if (!attrs[NL80211_ATTR_WIPHY_FREQ])
 		return -EINVAL;
 
-	control_freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
+	control_freq = nla_get_u32(attrs[NL80211_ATTR_WIPHY_FREQ]);
+
+	memset(chandef, 0, sizeof(*chandef));
 
 	chandef->chan = ieee80211_get_channel(&rdev->wiphy, control_freq);
 	chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
@@ -2305,14 +2677,16 @@
 	chandef->center_freq2 = 0;
 
 	/* Primary channel not allowed */
-	if (!chandef->chan || chandef->chan->flags & IEEE80211_CHAN_DISABLED)
+	if (!chandef->chan || chandef->chan->flags & IEEE80211_CHAN_DISABLED) {
+		NL_SET_ERR_MSG_ATTR(extack, attrs[NL80211_ATTR_WIPHY_FREQ],
+				    "Channel is disabled");
 		return -EINVAL;
+	}
 
-	if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
+	if (attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
 		enum nl80211_channel_type chantype;
 
-		chantype = nla_get_u32(
-				info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]);
+		chantype = nla_get_u32(attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]);
 
 		switch (chantype) {
 		case NL80211_CHAN_NO_HT:
@@ -2322,42 +2696,68 @@
 			cfg80211_chandef_create(chandef, chandef->chan,
 						chantype);
 			/* user input for center_freq is incorrect */
-			if (info->attrs[NL80211_ATTR_CENTER_FREQ1] &&
-			    chandef->center_freq1 != nla_get_u32(
-					info->attrs[NL80211_ATTR_CENTER_FREQ1]))
+			if (attrs[NL80211_ATTR_CENTER_FREQ1] &&
+			    chandef->center_freq1 != nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1])) {
+				NL_SET_ERR_MSG_ATTR(extack,
+						    attrs[NL80211_ATTR_CENTER_FREQ1],
+						    "bad center frequency 1");
 				return -EINVAL;
+			}
 			/* center_freq2 must be zero */
-			if (info->attrs[NL80211_ATTR_CENTER_FREQ2] &&
-			    nla_get_u32(info->attrs[NL80211_ATTR_CENTER_FREQ2]))
+			if (attrs[NL80211_ATTR_CENTER_FREQ2] &&
+			    nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ2])) {
+				NL_SET_ERR_MSG_ATTR(extack,
+						    attrs[NL80211_ATTR_CENTER_FREQ2],
+						    "center frequency 2 can't be used");
 				return -EINVAL;
+			}
 			break;
 		default:
+			NL_SET_ERR_MSG_ATTR(extack,
+					    attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE],
+					    "invalid channel type");
 			return -EINVAL;
 		}
-	} else if (info->attrs[NL80211_ATTR_CHANNEL_WIDTH]) {
+	} else if (attrs[NL80211_ATTR_CHANNEL_WIDTH]) {
 		chandef->width =
-			nla_get_u32(info->attrs[NL80211_ATTR_CHANNEL_WIDTH]);
-		if (info->attrs[NL80211_ATTR_CENTER_FREQ1])
+			nla_get_u32(attrs[NL80211_ATTR_CHANNEL_WIDTH]);
+		if (attrs[NL80211_ATTR_CENTER_FREQ1])
 			chandef->center_freq1 =
-				nla_get_u32(
-					info->attrs[NL80211_ATTR_CENTER_FREQ1]);
-		if (info->attrs[NL80211_ATTR_CENTER_FREQ2])
+				nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1]);
+		if (attrs[NL80211_ATTR_CENTER_FREQ2])
 			chandef->center_freq2 =
-				nla_get_u32(
-					info->attrs[NL80211_ATTR_CENTER_FREQ2]);
+				nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ2]);
 	}
 
-	if (!cfg80211_chandef_valid(chandef))
+	if (info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]) {
+		chandef->edmg.channels =
+		      nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]);
+
+		if (info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG])
+			chandef->edmg.bw_config =
+		     nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]);
+	} else {
+		chandef->edmg.bw_config = 0;
+		chandef->edmg.channels = 0;
+	}
+
+	if (!cfg80211_chandef_valid(chandef)) {
+		NL_SET_ERR_MSG(extack, "invalid channel definition");
 		return -EINVAL;
+	}
 
 	if (!cfg80211_chandef_usable(&rdev->wiphy, chandef,
-				     IEEE80211_CHAN_DISABLED))
+				     IEEE80211_CHAN_DISABLED)) {
+		NL_SET_ERR_MSG(extack, "(extension) channel is disabled");
 		return -EINVAL;
+	}
 
 	if ((chandef->width == NL80211_CHAN_WIDTH_5 ||
 	     chandef->width == NL80211_CHAN_WIDTH_10) &&
-	    !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ))
+	    !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ)) {
+		NL_SET_ERR_MSG(extack, "5/10 MHz not supported");
 		return -EINVAL;
+	}
 
 	return 0;
 }
@@ -2532,10 +2932,11 @@
 		nla_for_each_nested(nl_txq_params,
 				    info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS],
 				    rem_txq_params) {
-			result = nla_parse_nested(tb, NL80211_TXQ_ATTR_MAX,
-						  nl_txq_params,
-						  txq_params_policy,
-						  info->extack);
+			result = nla_parse_nested_deprecated(tb,
+							     NL80211_TXQ_ATTR_MAX,
+							     nl_txq_params,
+							     txq_params_policy,
+							     info->extack);
 			if (result)
 				return result;
 			result = parse_txq_params(tb, &txq_params);
@@ -2617,8 +3018,6 @@
 	if (info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]) {
 		retry_short = nla_get_u8(
 			info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]);
-		if (retry_short == 0)
-			return -EINVAL;
 
 		changed |= WIPHY_PARAM_RETRY_SHORT;
 	}
@@ -2626,8 +3025,6 @@
 	if (info->attrs[NL80211_ATTR_WIPHY_RETRY_LONG]) {
 		retry_long = nla_get_u8(
 			info->attrs[NL80211_ATTR_WIPHY_RETRY_LONG]);
-		if (retry_long == 0)
-			return -EINVAL;
 
 		changed |= WIPHY_PARAM_RETRY_LONG;
 	}
@@ -2750,12 +3147,6 @@
 	return 0;
 }
 
-static inline u64 wdev_id(struct wireless_dev *wdev)
-{
-	return (u64)wdev->identifier |
-	       ((u64)wiphy_to_rdev(wdev->wiphy)->wiphy_idx << 32);
-}
-
 static int nl80211_send_chandef(struct sk_buff *msg,
 				const struct cfg80211_chan_def *chandef)
 {
@@ -2788,14 +3179,15 @@
 
 static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags,
 			      struct cfg80211_registered_device *rdev,
-			      struct wireless_dev *wdev, bool removal)
+			      struct wireless_dev *wdev,
+			      enum nl80211_commands cmd)
 {
 	struct net_device *dev = wdev->netdev;
-	u8 cmd = NL80211_CMD_NEW_INTERFACE;
 	void *hdr;
 
-	if (removal)
-		cmd = NL80211_CMD_DEL_INTERFACE;
+	WARN_ON(cmd != NL80211_CMD_NEW_INTERFACE &&
+		cmd != NL80211_CMD_DEL_INTERFACE &&
+		cmd != NL80211_CMD_SET_INTERFACE);
 
 	hdr = nl80211hdr_put(msg, portid, seq, flags, cmd);
 	if (!hdr)
@@ -2819,7 +3211,7 @@
 
 	if (rdev->ops->get_channel) {
 		int ret;
-		struct cfg80211_chan_def chandef;
+		struct cfg80211_chan_def chandef = {};
 
 		ret = rdev_get_channel(rdev, wdev, &chandef);
 		if (ret == 0) {
@@ -2943,7 +3335,8 @@
 			}
 			if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).portid,
 					       cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					       rdev, wdev, false) < 0) {
+					       rdev, wdev,
+					       NL80211_CMD_NEW_INTERFACE) < 0) {
 				goto out;
 			}
 			if_idx++;
@@ -2973,7 +3366,7 @@
 		return -ENOMEM;
 
 	if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0,
-			       rdev, wdev, false) < 0) {
+			       rdev, wdev, NL80211_CMD_NEW_INTERFACE) < 0) {
 		nlmsg_free(msg);
 		return -ENOBUFS;
 	}
@@ -3000,8 +3393,7 @@
 	if (!nla)
 		return -EINVAL;
 
-	if (nla_parse_nested(flags, NL80211_MNTR_FLAG_MAX, nla,
-			     mntr_flags_policy, NULL))
+	if (nla_parse_nested_deprecated(flags, NL80211_MNTR_FLAG_MAX, nla, mntr_flags_policy, NULL))
 		return -EINVAL;
 
 	for (flag = 1; flag <= NL80211_MNTR_FLAG_MAX; flag++)
@@ -3119,8 +3511,6 @@
 		ntype = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]);
 		if (otype != ntype)
 			change = true;
-		if (ntype > NL80211_IFTYPE_MAX)
-			return -EINVAL;
 	}
 
 	if (info->attrs[NL80211_ATTR_MESH_ID]) {
@@ -3165,6 +3555,12 @@
 	if (!err && params.use_4addr != -1)
 		dev->ieee80211_ptr->use_4addr = params.use_4addr;
 
+	if (change && !err) {
+		struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+		nl80211_notify_iface(rdev, wdev, NL80211_CMD_SET_INTERFACE);
+	}
+
 	return err;
 }
 
@@ -3185,14 +3581,10 @@
 	if (!info->attrs[NL80211_ATTR_IFNAME])
 		return -EINVAL;
 
-	if (info->attrs[NL80211_ATTR_IFTYPE]) {
+	if (info->attrs[NL80211_ATTR_IFTYPE])
 		type = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]);
-		if (type > NL80211_IFTYPE_MAX)
-			return -EINVAL;
-	}
 
-	if (!rdev->ops->add_virtual_intf ||
-	    !(rdev->wiphy.interface_modes & (1 << type)))
+	if (!rdev->ops->add_virtual_intf)
 		return -EOPNOTSUPP;
 
 	if ((type == NL80211_IFTYPE_P2P_DEVICE || type == NL80211_IFTYPE_NAN ||
@@ -3211,6 +3603,9 @@
 			return err;
 	}
 
+	if (!cfg80211_iftype_allowed(&rdev->wiphy, type, params.use_4addr, 0))
+		return -EOPNOTSUPP;
+
 	err = nl80211_parse_mon_options(rdev, type, info, &params);
 	if (err < 0)
 		return err;
@@ -3252,35 +3647,18 @@
 		 * P2P Device and NAN do not have a netdev, so don't go
 		 * through the netdev notifier and must be added here
 		 */
-		mutex_init(&wdev->mtx);
-		INIT_LIST_HEAD(&wdev->event_list);
-		spin_lock_init(&wdev->event_lock);
-		INIT_LIST_HEAD(&wdev->mgmt_registrations);
-		spin_lock_init(&wdev->mgmt_registrations_lock);
-
-		wdev->identifier = ++rdev->wdev_id;
-		list_add_rcu(&wdev->list, &rdev->wiphy.wdev_list);
-		rdev->devlist_generation++;
+		cfg80211_init_wdev(rdev, wdev);
 		break;
 	default:
 		break;
 	}
 
 	if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0,
-			       rdev, wdev, false) < 0) {
+			       rdev, wdev, NL80211_CMD_NEW_INTERFACE) < 0) {
 		nlmsg_free(msg);
 		return -ENOBUFS;
 	}
 
-	/*
-	 * For wdevs which have no associated netdev object (e.g. of type
-	 * NL80211_IFTYPE_P2P_DEVICE), emit the NEW_INTERFACE event here.
-	 * For all other types, the event will be generated from the
-	 * netdev notifier
-	 */
-	if (!wdev->netdev)
-		nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
-
 	return genlmsg_reply(msg, info);
 }
 
@@ -3344,7 +3722,7 @@
 			 params->cipher)))
 		goto nla_put_failure;
 
-	key = nla_nest_start(cookie->msg, NL80211_ATTR_KEY);
+	key = nla_nest_start_noflag(cookie->msg, NL80211_ATTR_KEY);
 	if (!key)
 		goto nla_put_failure;
 
@@ -3359,7 +3737,7 @@
 			 params->cipher)))
 		goto nla_put_failure;
 
-	if (nla_put_u8(cookie->msg, NL80211_ATTR_KEY_IDX, cookie->idx))
+	if (nla_put_u8(cookie->msg, NL80211_KEY_IDX, cookie->idx))
 		goto nla_put_failure;
 
 	nla_nest_end(cookie->msg, key);
@@ -3386,9 +3764,6 @@
 	if (info->attrs[NL80211_ATTR_KEY_IDX])
 		key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
 
-	if (key_idx > 5)
-		return -EINVAL;
-
 	if (info->attrs[NL80211_ATTR_MAC])
 		mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
 
@@ -3396,8 +3771,6 @@
 	if (info->attrs[NL80211_ATTR_KEY_TYPE]) {
 		u32 kt = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]);
 
-		if (kt >= NUM_NL80211_KEYTYPES)
-			return -EINVAL;
 		if (kt != NL80211_KEYTYPE_GROUP &&
 		    kt != NL80211_KEYTYPE_PAIRWISE)
 			return -EINVAL;
@@ -3462,8 +3835,11 @@
 	if (key.idx < 0)
 		return -EINVAL;
 
-	/* only support setting default key */
-	if (!key.def && !key.defmgmt)
+	/* Only support setting default key and
+	 * Extended Key ID action NL80211_KEY_SET_TX.
+	 */
+	if (!key.def && !key.defmgmt &&
+	    !(key.p.mode == NL80211_KEY_SET_TX))
 		return -EINVAL;
 
 	wdev_lock(dev->ieee80211_ptr);
@@ -3487,7 +3863,7 @@
 #ifdef CONFIG_CFG80211_WEXT
 		dev->ieee80211_ptr->wext.default_key = key.idx;
 #endif
-	} else {
+	} else if (key.defmgmt) {
 		if (key.def_uni || !key.def_multi) {
 			err = -EINVAL;
 			goto out;
@@ -3509,8 +3885,25 @@
 #ifdef CONFIG_CFG80211_WEXT
 		dev->ieee80211_ptr->wext.default_mgmt_key = key.idx;
 #endif
-	}
+	} else if (key.p.mode == NL80211_KEY_SET_TX &&
+		   wiphy_ext_feature_isset(&rdev->wiphy,
+					   NL80211_EXT_FEATURE_EXT_KEY_ID)) {
+		u8 *mac_addr = NULL;
 
+		if (info->attrs[NL80211_ATTR_MAC])
+			mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+		if (!mac_addr || key.idx < 0 || key.idx > 1) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		err = rdev_add_key(rdev, dev, key.idx,
+				   NL80211_KEYTYPE_PAIRWISE,
+				   mac_addr, &key.p);
+	} else {
+		err = -EINVAL;
+	}
  out:
 	wdev_unlock(dev->ieee80211_ptr);
 
@@ -3671,8 +4064,7 @@
 	if (n_entries > wiphy->max_acl_mac_addrs)
 		return ERR_PTR(-ENOTSUPP);
 
-	acl = kzalloc(sizeof(*acl) + (sizeof(struct mac_address) * n_entries),
-		      GFP_KERNEL);
+	acl = kzalloc(struct_size(acl, mac_addrs, n_entries), GFP_KERNEL);
 	if (!acl)
 		return ERR_PTR(-ENOMEM);
 
@@ -3831,7 +4223,10 @@
 				    .len = NL80211_MAX_SUPP_RATES },
 	[NL80211_TXRATE_HT] = { .type = NLA_BINARY,
 				.len = NL80211_MAX_SUPP_HT_RATES },
-	[NL80211_TXRATE_VHT] = { .len = sizeof(struct nl80211_txrate_vht)},
+	[NL80211_TXRATE_VHT] = {
+		.type = NLA_EXACT_LEN_WARN,
+		.len = sizeof(struct nl80211_txrate_vht),
+	},
 	[NL80211_TXRATE_GI] = { .type = NLA_U8 },
 };
 
@@ -3882,8 +4277,10 @@
 		sband = rdev->wiphy.bands[band];
 		if (sband == NULL)
 			return -EINVAL;
-		err = nla_parse_nested(tb, NL80211_TXRATE_MAX, tx_rates,
-				       nl80211_txattr_policy, info->extack);
+		err = nla_parse_nested_deprecated(tb, NL80211_TXRATE_MAX,
+						  tx_rates,
+						  nl80211_txattr_policy,
+						  info->extack);
 		if (err)
 			return err;
 		if (tb[NL80211_TXRATE_LEGACY]) {
@@ -3998,16 +4395,12 @@
 	return 0;
 }
 
-static int nl80211_parse_beacon(struct nlattr *attrs[],
+static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev,
+				struct nlattr *attrs[],
 				struct cfg80211_beacon_data *bcn)
 {
 	bool haveinfo = false;
-
-	if (!is_valid_ie_attr(attrs[NL80211_ATTR_BEACON_TAIL]) ||
-	    !is_valid_ie_attr(attrs[NL80211_ATTR_IE]) ||
-	    !is_valid_ie_attr(attrs[NL80211_ATTR_IE_PROBE_RESP]) ||
-	    !is_valid_ie_attr(attrs[NL80211_ATTR_IE_ASSOC_RESP]))
-		return -EINVAL;
+	int err;
 
 	memset(bcn, 0, sizeof(*bcn));
 
@@ -4052,6 +4445,64 @@
 		bcn->probe_resp_len = nla_len(attrs[NL80211_ATTR_PROBE_RESP]);
 	}
 
+	if (attrs[NL80211_ATTR_FTM_RESPONDER]) {
+		struct nlattr *tb[NL80211_FTM_RESP_ATTR_MAX + 1];
+
+		err = nla_parse_nested_deprecated(tb,
+						  NL80211_FTM_RESP_ATTR_MAX,
+						  attrs[NL80211_ATTR_FTM_RESPONDER],
+						  NULL, NULL);
+		if (err)
+			return err;
+
+		if (tb[NL80211_FTM_RESP_ATTR_ENABLED] &&
+		    wiphy_ext_feature_isset(&rdev->wiphy,
+					    NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER))
+			bcn->ftm_responder = 1;
+		else
+			return -EOPNOTSUPP;
+
+		if (tb[NL80211_FTM_RESP_ATTR_LCI]) {
+			bcn->lci = nla_data(tb[NL80211_FTM_RESP_ATTR_LCI]);
+			bcn->lci_len = nla_len(tb[NL80211_FTM_RESP_ATTR_LCI]);
+		}
+
+		if (tb[NL80211_FTM_RESP_ATTR_CIVICLOC]) {
+			bcn->civicloc = nla_data(tb[NL80211_FTM_RESP_ATTR_CIVICLOC]);
+			bcn->civicloc_len = nla_len(tb[NL80211_FTM_RESP_ATTR_CIVICLOC]);
+		}
+	} else {
+		bcn->ftm_responder = -1;
+	}
+
+	return 0;
+}
+
+static int nl80211_parse_he_obss_pd(struct nlattr *attrs,
+				    struct ieee80211_he_obss_pd *he_obss_pd)
+{
+	struct nlattr *tb[NL80211_HE_OBSS_PD_ATTR_MAX + 1];
+	int err;
+
+	err = nla_parse_nested(tb, NL80211_HE_OBSS_PD_ATTR_MAX, attrs,
+			       he_obss_pd_policy, NULL);
+	if (err)
+		return err;
+
+	if (!tb[NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET] ||
+	    !tb[NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET])
+		return -EINVAL;
+
+	he_obss_pd->min_offset =
+		nla_get_u32(tb[NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET]);
+	he_obss_pd->max_offset =
+		nla_get_u32(tb[NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET]);
+
+	if (he_obss_pd->min_offset >= he_obss_pd->max_offset)
+		return -EINVAL;
+
+	he_obss_pd->enable = true;
+
 	return 0;
 }
 
@@ -4096,6 +4547,9 @@
 	cap = cfg80211_find_ie(WLAN_EID_VHT_CAPABILITY, ies, ies_len);
 	if (cap && cap[1] >= sizeof(*params->vht_cap))
 		params->vht_cap = (void *)(cap + 2);
+	cap = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_CAPABILITY, ies, ies_len);
+	if (cap && cap[1] >= sizeof(*params->he_cap) + 1)
+		params->he_cap = (void *)(cap + 3);
 }
 
 static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev,
@@ -4141,6 +4595,8 @@
 		return true;
 	case NL80211_CMD_CONNECT:
 		if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) &&
+		    !wiphy_ext_feature_isset(&rdev->wiphy,
+					     NL80211_EXT_FEATURE_SAE_OFFLOAD) &&
 		    auth_type == NL80211_AUTHTYPE_SAE)
 			return false;
 
@@ -4195,7 +4651,7 @@
 	    !info->attrs[NL80211_ATTR_BEACON_HEAD])
 		return -EINVAL;
 
-	err = nl80211_parse_beacon(info->attrs, &params.beacon);
+	err = nl80211_parse_beacon(rdev, info->attrs, &params.beacon);
 	if (err)
 		return err;
 
@@ -4225,14 +4681,9 @@
 			return -EINVAL;
 	}
 
-	if (info->attrs[NL80211_ATTR_HIDDEN_SSID]) {
+	if (info->attrs[NL80211_ATTR_HIDDEN_SSID])
 		params.hidden_ssid = nla_get_u32(
 			info->attrs[NL80211_ATTR_HIDDEN_SSID]);
-		if (params.hidden_ssid != NL80211_HIDDEN_SSID_NOT_IN_USE &&
-		    params.hidden_ssid != NL80211_HIDDEN_SSID_ZERO_LEN &&
-		    params.hidden_ssid != NL80211_HIDDEN_SSID_ZERO_CONTENTS)
-			return -EINVAL;
-	}
 
 	params.privacy = !!info->attrs[NL80211_ATTR_PRIVACY];
 
@@ -4262,8 +4713,6 @@
 			return -EINVAL;
 		params.p2p_ctwindow =
 			nla_get_u8(info->attrs[NL80211_ATTR_P2P_CTWINDOW]);
-		if (params.p2p_ctwindow > 127)
-			return -EINVAL;
 		if (params.p2p_ctwindow != 0 &&
 		    !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_CTWIN))
 			return -EINVAL;
@@ -4275,8 +4724,6 @@
 		if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
 			return -EINVAL;
 		tmp = nla_get_u8(info->attrs[NL80211_ATTR_P2P_OPPPS]);
-		if (tmp > 1)
-			return -EINVAL;
 		params.p2p_opp_ps = tmp;
 		if (params.p2p_opp_ps != 0 &&
 		    !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_OPPPS))
@@ -4340,8 +4787,22 @@
 			return PTR_ERR(params.acl);
 	}
 
+	params.twt_responder =
+		    nla_get_flag(info->attrs[NL80211_ATTR_TWT_RESPONDER]);
+
+	if (info->attrs[NL80211_ATTR_HE_OBSS_PD]) {
+		err = nl80211_parse_he_obss_pd(
+					info->attrs[NL80211_ATTR_HE_OBSS_PD],
+					&params.he_obss_pd);
+		if (err)
+			return err;
+	}
+
 	nl80211_calculate_ap_params(&params);
 
+	if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT])
+		params.flags |= AP_SETTINGS_EXTERNAL_AUTH_SUPPORT;
+
 	wdev_lock(wdev);
 	err = rdev_start_ap(rdev, dev, &params);
 	if (!err) {
@@ -4379,7 +4840,7 @@
 	if (!wdev->beacon_interval)
 		return -EINVAL;
 
-	err = nl80211_parse_beacon(info->attrs, &params);
+	err = nl80211_parse_beacon(rdev, info->attrs, &params);
 	if (err)
 		return err;
 
@@ -4439,8 +4900,7 @@
 	if (!nla)
 		return 0;
 
-	if (nla_parse_nested(flags, NL80211_STA_FLAG_MAX, nla,
-			     sta_flags_policy, info->extack))
+	if (nla_parse_nested_deprecated(flags, NL80211_STA_FLAG_MAX, nla, sta_flags_policy, info->extack))
 		return -EINVAL;
 
 	/*
@@ -4485,15 +4945,14 @@
 	return 0;
 }
 
-static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
-				 int attr)
+bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, int attr)
 {
 	struct nlattr *rate;
 	u32 bitrate;
 	u16 bitrate_compat;
 	enum nl80211_rate_info rate_flg;
 
-	rate = nla_nest_start(msg, attr);
+	rate = nla_nest_start_noflag(msg, attr);
 	if (!rate)
 		return false;
 
@@ -4580,7 +5039,7 @@
 	if (!mask)
 		return true;
 
-	attr = nla_nest_start(msg, id);
+	attr = nla_nest_start_noflag(msg, id);
 	if (!attr)
 		return false;
 
@@ -4607,15 +5066,17 @@
 	struct nlattr *sinfoattr, *bss_param;
 
 	hdr = nl80211hdr_put(msg, portid, seq, flags, cmd);
-	if (!hdr)
+	if (!hdr) {
+		cfg80211_sinfo_release_content(sinfo);
 		return -1;
+	}
 
 	if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
 	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr) ||
 	    nla_put_u32(msg, NL80211_ATTR_GENERATION, sinfo->generation))
 		goto nla_put_failure;
 
-	sinfoattr = nla_nest_start(msg, NL80211_ATTR_STA_INFO);
+	sinfoattr = nla_nest_start_noflag(msg, NL80211_ATTR_STA_INFO);
 	if (!sinfoattr)
 		goto nla_put_failure;
 
@@ -4635,6 +5096,7 @@
 
 	PUT_SINFO(CONNECTED_TIME, connected_time, u32);
 	PUT_SINFO(INACTIVE_TIME, inactive_time, u32);
+	PUT_SINFO_U64(ASSOC_AT_BOOTTIME, assoc_at);
 
 	if (sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES) |
 			     BIT_ULL(NL80211_STA_INFO_RX_BYTES64)) &&
@@ -4654,6 +5116,11 @@
 	PUT_SINFO(PLID, plid, u16);
 	PUT_SINFO(PLINK_STATE, plink_state, u8);
 	PUT_SINFO_U64(RX_DURATION, rx_duration);
+	PUT_SINFO_U64(TX_DURATION, tx_duration);
+
+	if (wiphy_ext_feature_isset(&rdev->wiphy,
+				    NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
+		PUT_SINFO(AIRTIME_WEIGHT, airtime_weight, u16);
 
 	switch (rdev->wiphy.signal_type) {
 	case CFG80211_SIGNAL_TYPE_MBM:
@@ -4691,13 +5158,16 @@
 	PUT_SINFO(TX_RETRIES, tx_retries, u32);
 	PUT_SINFO(TX_FAILED, tx_failed, u32);
 	PUT_SINFO(EXPECTED_THROUGHPUT, expected_throughput, u32);
+	PUT_SINFO(AIRTIME_LINK_METRIC, airtime_link_metric, u32);
 	PUT_SINFO(BEACON_LOSS, beacon_loss_count, u32);
 	PUT_SINFO(LOCAL_PM, local_pm, u32);
 	PUT_SINFO(PEER_PM, peer_pm, u32);
 	PUT_SINFO(NONPEER_PM, nonpeer_pm, u32);
+	PUT_SINFO(CONNECTED_TO_GATE, connected_to_gate, u8);
 
 	if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_BSS_PARAM)) {
-		bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM);
+		bss_param = nla_nest_start_noflag(msg,
+						  NL80211_STA_INFO_BSS_PARAM);
 		if (!bss_param)
 			goto nla_put_failure;
 
@@ -4725,10 +5195,13 @@
 	PUT_SINFO_U64(RX_DROP_MISC, rx_dropped_misc);
 	PUT_SINFO_U64(BEACON_RX, rx_beacon);
 	PUT_SINFO(BEACON_SIGNAL_AVG, rx_beacon_signal_avg, u8);
-	PUT_SINFO(ACK_SIGNAL, ack_signal, u8);
+	PUT_SINFO(RX_MPDUS, rx_mpdu_count, u32);
+	PUT_SINFO(FCS_ERROR_COUNT, fcs_err_count, u32);
 	if (wiphy_ext_feature_isset(&rdev->wiphy,
-				    NL80211_EXT_FEATURE_DATA_ACK_SIGNAL_SUPPORT))
-		PUT_SINFO(DATA_ACK_SIGNAL_AVG, avg_ack_signal, s8);
+				    NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT)) {
+		PUT_SINFO(ACK_SIGNAL, ack_signal, u8);
+		PUT_SINFO(ACK_SIGNAL_AVG, avg_ack_signal, s8);
+	}
 
 #undef PUT_SINFO
 #undef PUT_SINFO_U64
@@ -4737,7 +5210,8 @@
 		struct nlattr *tidsattr;
 		int tid;
 
-		tidsattr = nla_nest_start(msg, NL80211_STA_INFO_TID_STATS);
+		tidsattr = nla_nest_start_noflag(msg,
+						 NL80211_STA_INFO_TID_STATS);
 		if (!tidsattr)
 			goto nla_put_failure;
 
@@ -4750,7 +5224,7 @@
 			if (!tidstats->filled)
 				continue;
 
-			tidattr = nla_nest_start(msg, tid + 1);
+			tidattr = nla_nest_start_noflag(msg, tid + 1);
 			if (!tidattr)
 				goto nla_put_failure;
 
@@ -4807,7 +5281,7 @@
 	int err;
 
 	rtnl_lock();
-	err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
+	err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
 	if (err)
 		goto out_err;
 
@@ -5098,8 +5572,9 @@
 		return 0;
 
 	nla = info->attrs[NL80211_ATTR_STA_WME];
-	err = nla_parse_nested(tb, NL80211_STA_WME_MAX, nla,
-			       nl80211_sta_wme_policy, info->extack);
+	err = nla_parse_nested_deprecated(tb, NL80211_STA_WME_MAX, nla,
+					  nl80211_sta_wme_policy,
+					  info->extack);
 	if (err)
 		return err;
 
@@ -5185,6 +5660,36 @@
 	return nl80211_parse_sta_wme(info, params);
 }
 
+static int nl80211_parse_sta_txpower_setting(struct genl_info *info,
+					     struct station_parameters *params)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	int idx;
+
+	if (info->attrs[NL80211_ATTR_STA_TX_POWER_SETTING]) {
+		if (!rdev->ops->set_tx_power ||
+		    !wiphy_ext_feature_isset(&rdev->wiphy,
+					 NL80211_EXT_FEATURE_STA_TX_PWR))
+			return -EOPNOTSUPP;
+
+		idx = NL80211_ATTR_STA_TX_POWER_SETTING;
+		params->txpwr.type = nla_get_u8(info->attrs[idx]);
+
+		if (params->txpwr.type == NL80211_TX_POWER_LIMITED) {
+			idx = NL80211_ATTR_STA_TX_POWER;
+
+			if (info->attrs[idx])
+				params->txpwr.power =
+					nla_get_s16(info->attrs[idx]);
+			else
+				return -EINVAL;
+		}
+		params->sta_modify_mask |= STATION_PARAM_APPLY_STA_TXPOWER;
+	}
+
+	return 0;
+}
+
 static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -5212,17 +5717,11 @@
 	else
 		params.listen_interval = -1;
 
-	if (info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]) {
-		u8 tmp;
-
-		tmp = nla_get_u8(info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]);
-		if (tmp >= NUM_NL80211_P2P_PS_STATUS)
-			return -EINVAL;
-
-		params.support_p2p_ps = tmp;
-	} else {
+	if (info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS])
+		params.support_p2p_ps =
+			nla_get_u8(info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]);
+	else
 		params.support_p2p_ps = -1;
-	}
 
 	if (!info->attrs[NL80211_ATTR_MAC])
 		return -EINVAL;
@@ -5252,44 +5751,42 @@
 	if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params))
 		return -EINVAL;
 
-	if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) {
+	if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION])
 		params.plink_action =
 			nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
-		if (params.plink_action >= NUM_NL80211_PLINK_ACTIONS)
-			return -EINVAL;
-	}
 
 	if (info->attrs[NL80211_ATTR_STA_PLINK_STATE]) {
 		params.plink_state =
 			nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]);
-		if (params.plink_state >= NUM_NL80211_PLINK_STATES)
-			return -EINVAL;
-		if (info->attrs[NL80211_ATTR_MESH_PEER_AID]) {
+		if (info->attrs[NL80211_ATTR_MESH_PEER_AID])
 			params.peer_aid = nla_get_u16(
 				info->attrs[NL80211_ATTR_MESH_PEER_AID]);
-			if (params.peer_aid > IEEE80211_MAX_AID)
-				return -EINVAL;
-		}
 		params.sta_modify_mask |= STATION_PARAM_APPLY_PLINK_STATE;
 	}
 
-	if (info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]) {
-		enum nl80211_mesh_power_mode pm = nla_get_u32(
+	if (info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE])
+		params.local_pm = nla_get_u32(
 			info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]);
 
-		if (pm <= NL80211_MESH_POWER_UNKNOWN ||
-		    pm > NL80211_MESH_POWER_MAX)
-			return -EINVAL;
-
-		params.local_pm = pm;
-	}
-
 	if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) {
 		params.opmode_notif_used = true;
 		params.opmode_notif =
 			nla_get_u8(info->attrs[NL80211_ATTR_OPMODE_NOTIF]);
 	}
 
+	if (info->attrs[NL80211_ATTR_AIRTIME_WEIGHT])
+		params.airtime_weight =
+			nla_get_u16(info->attrs[NL80211_ATTR_AIRTIME_WEIGHT]);
+
+	if (params.airtime_weight &&
+	    !wiphy_ext_feature_isset(&rdev->wiphy,
+				     NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
+		return -EOPNOTSUPP;
+
+	err = nl80211_parse_sta_txpower_setting(info, &params);
+	if (err)
+		return err;
+
 	/* Include parameters for TDLS peer (will check later) */
 	err = nl80211_set_station_tdls(info, &params);
 	if (err)
@@ -5360,13 +5857,8 @@
 		nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]);
 
 	if (info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]) {
-		u8 tmp;
-
-		tmp = nla_get_u8(info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]);
-		if (tmp >= NUM_NL80211_P2P_PS_STATUS)
-			return -EINVAL;
-
-		params.support_p2p_ps = tmp;
+		params.support_p2p_ps =
+			nla_get_u8(info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]);
 	} else {
 		/*
 		 * if not specified, assume it's supported for P2P GO interface,
@@ -5380,8 +5872,6 @@
 		params.aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]);
 	else
 		params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]);
-	if (!params.aid || params.aid > IEEE80211_MAX_AID)
-		return -EINVAL;
 
 	if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) {
 		params.capability =
@@ -5421,12 +5911,22 @@
 			nla_get_u8(info->attrs[NL80211_ATTR_OPMODE_NOTIF]);
 	}
 
-	if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) {
+	if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION])
 		params.plink_action =
 			nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
-		if (params.plink_action >= NUM_NL80211_PLINK_ACTIONS)
-			return -EINVAL;
-	}
+
+	if (info->attrs[NL80211_ATTR_AIRTIME_WEIGHT])
+		params.airtime_weight =
+			nla_get_u16(info->attrs[NL80211_ATTR_AIRTIME_WEIGHT]);
+
+	if (params.airtime_weight &&
+	    !wiphy_ext_feature_isset(&rdev->wiphy,
+				     NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
+		return -EOPNOTSUPP;
+
+	err = nl80211_parse_sta_txpower_setting(info, &params);
+	if (err)
+		return err;
 
 	err = nl80211_parse_sta_channel_info(info, &params);
 	if (err)
@@ -5610,7 +6110,7 @@
 	    nla_put_u32(msg, NL80211_ATTR_GENERATION, pinfo->generation))
 		goto nla_put_failure;
 
-	pinfoattr = nla_nest_start(msg, NL80211_ATTR_MPATH_INFO);
+	pinfoattr = nla_nest_start_noflag(msg, NL80211_ATTR_MPATH_INFO);
 	if (!pinfoattr)
 		goto nla_put_failure;
 	if ((pinfo->filled & MPATH_INFO_FRAME_QLEN) &&
@@ -5633,7 +6133,13 @@
 			 pinfo->discovery_timeout)) ||
 	    ((pinfo->filled & MPATH_INFO_DISCOVERY_RETRIES) &&
 	     nla_put_u8(msg, NL80211_MPATH_INFO_DISCOVERY_RETRIES,
-			pinfo->discovery_retries)))
+			pinfo->discovery_retries)) ||
+	    ((pinfo->filled & MPATH_INFO_HOP_COUNT) &&
+	     nla_put_u8(msg, NL80211_MPATH_INFO_HOP_COUNT,
+			pinfo->hop_count)) ||
+	    ((pinfo->filled & MPATH_INFO_PATH_CHANGE) &&
+	     nla_put_u32(msg, NL80211_MPATH_INFO_PATH_CHANGE,
+			 pinfo->path_change_count)))
 		goto nla_put_failure;
 
 	nla_nest_end(msg, pinfoattr);
@@ -5658,7 +6164,7 @@
 	int err;
 
 	rtnl_lock();
-	err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
+	err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
 	if (err)
 		goto out_err;
 
@@ -5799,6 +6305,9 @@
 	if (!rdev->ops->del_mpath)
 		return -EOPNOTSUPP;
 
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT)
+		return -EOPNOTSUPP;
+
 	return rdev_del_mpath(rdev, dev, dst);
 }
 
@@ -5854,7 +6363,7 @@
 	int err;
 
 	rtnl_lock();
-	err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
+	err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
 	if (err)
 		goto out_err;
 
@@ -5936,9 +6445,7 @@
 		if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
 			return -EINVAL;
 		params.p2p_ctwindow =
-			nla_get_s8(info->attrs[NL80211_ATTR_P2P_CTWINDOW]);
-		if (params.p2p_ctwindow < 0)
-			return -EINVAL;
+			nla_get_u8(info->attrs[NL80211_ATTR_P2P_CTWINDOW]);
 		if (params.p2p_ctwindow != 0 &&
 		    !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_CTWIN))
 			return -EINVAL;
@@ -5950,8 +6457,6 @@
 		if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
 			return -EINVAL;
 		tmp = nla_get_u8(info->attrs[NL80211_ATTR_P2P_OPPPS]);
-		if (tmp > 1)
-			return -EINVAL;
 		params.p2p_opp_ps = tmp;
 		if (params.p2p_opp_ps &&
 		    !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_OPPPS))
@@ -6059,7 +6564,7 @@
 			     NL80211_CMD_GET_MESH_CONFIG);
 	if (!hdr)
 		goto out;
-	pinfoattr = nla_nest_start(msg, NL80211_ATTR_MESH_CONFIG);
+	pinfoattr = nla_nest_start_noflag(msg, NL80211_ATTR_MESH_CONFIG);
 	if (!pinfoattr)
 		goto nla_put_failure;
 	if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
@@ -6118,7 +6623,9 @@
 	    nla_put_u16(msg, NL80211_MESHCONF_AWAKE_WINDOW,
 			cur_params.dot11MeshAwakeWindowDuration) ||
 	    nla_put_u32(msg, NL80211_MESHCONF_PLINK_TIMEOUT,
-			cur_params.plink_timeout))
+			cur_params.plink_timeout) ||
+	    nla_put_u8(msg, NL80211_MESHCONF_CONNECTED_TO_GATE,
+		       cur_params.dot11MeshConnectedToMeshGate))
 		goto nla_put_failure;
 	nla_nest_end(msg, pinfoattr);
 	genlmsg_end(msg, hdr);
@@ -6130,35 +6637,52 @@
 	return -ENOBUFS;
 }
 
-static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_ATTR_MAX+1] = {
-	[NL80211_MESHCONF_RETRY_TIMEOUT] = { .type = NLA_U16 },
-	[NL80211_MESHCONF_CONFIRM_TIMEOUT] = { .type = NLA_U16 },
-	[NL80211_MESHCONF_HOLDING_TIMEOUT] = { .type = NLA_U16 },
-	[NL80211_MESHCONF_MAX_PEER_LINKS] = { .type = NLA_U16 },
-	[NL80211_MESHCONF_MAX_RETRIES] = { .type = NLA_U8 },
-	[NL80211_MESHCONF_TTL] = { .type = NLA_U8 },
-	[NL80211_MESHCONF_ELEMENT_TTL] = { .type = NLA_U8 },
-	[NL80211_MESHCONF_AUTO_OPEN_PLINKS] = { .type = NLA_U8 },
-	[NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR] = { .type = NLA_U32 },
+static const struct nla_policy
+nl80211_meshconf_params_policy[NL80211_MESHCONF_ATTR_MAX+1] = {
+	[NL80211_MESHCONF_RETRY_TIMEOUT] =
+		NLA_POLICY_RANGE(NLA_U16, 1, 255),
+	[NL80211_MESHCONF_CONFIRM_TIMEOUT] =
+		NLA_POLICY_RANGE(NLA_U16, 1, 255),
+	[NL80211_MESHCONF_HOLDING_TIMEOUT] =
+		NLA_POLICY_RANGE(NLA_U16, 1, 255),
+	[NL80211_MESHCONF_MAX_PEER_LINKS] =
+		NLA_POLICY_RANGE(NLA_U16, 0, 255),
+	[NL80211_MESHCONF_MAX_RETRIES] = NLA_POLICY_MAX(NLA_U8, 16),
+	[NL80211_MESHCONF_TTL] = NLA_POLICY_MIN(NLA_U8, 1),
+	[NL80211_MESHCONF_ELEMENT_TTL] = NLA_POLICY_MIN(NLA_U8, 1),
+	[NL80211_MESHCONF_AUTO_OPEN_PLINKS] = NLA_POLICY_MAX(NLA_U8, 1),
+	[NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR] =
+		NLA_POLICY_RANGE(NLA_U32, 1, 255),
 	[NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES] = { .type = NLA_U8 },
 	[NL80211_MESHCONF_PATH_REFRESH_TIME] = { .type = NLA_U32 },
-	[NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT] = { .type = NLA_U16 },
+	[NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT] = NLA_POLICY_MIN(NLA_U16, 1),
 	[NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT] = { .type = NLA_U32 },
-	[NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL] = { .type = NLA_U16 },
-	[NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL] = { .type = NLA_U16 },
-	[NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME] = { .type = NLA_U16 },
-	[NL80211_MESHCONF_HWMP_ROOTMODE] = { .type = NLA_U8 },
-	[NL80211_MESHCONF_HWMP_RANN_INTERVAL] = { .type = NLA_U16 },
-	[NL80211_MESHCONF_GATE_ANNOUNCEMENTS] = { .type = NLA_U8 },
-	[NL80211_MESHCONF_FORWARDING] = { .type = NLA_U8 },
-	[NL80211_MESHCONF_RSSI_THRESHOLD] = { .type = NLA_U32 },
+	[NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL] =
+		NLA_POLICY_MIN(NLA_U16, 1),
+	[NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL] =
+		NLA_POLICY_MIN(NLA_U16, 1),
+	[NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME] =
+		NLA_POLICY_MIN(NLA_U16, 1),
+	[NL80211_MESHCONF_HWMP_ROOTMODE] = NLA_POLICY_MAX(NLA_U8, 4),
+	[NL80211_MESHCONF_HWMP_RANN_INTERVAL] =
+		NLA_POLICY_MIN(NLA_U16, 1),
+	[NL80211_MESHCONF_GATE_ANNOUNCEMENTS] = NLA_POLICY_MAX(NLA_U8, 1),
+	[NL80211_MESHCONF_FORWARDING] = NLA_POLICY_MAX(NLA_U8, 1),
+	[NL80211_MESHCONF_RSSI_THRESHOLD] =
+		NLA_POLICY_RANGE(NLA_S32, -255, 0),
 	[NL80211_MESHCONF_HT_OPMODE] = { .type = NLA_U16 },
 	[NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT] = { .type = NLA_U32 },
-	[NL80211_MESHCONF_HWMP_ROOT_INTERVAL] = { .type = NLA_U16 },
-	[NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL] = { .type = NLA_U16 },
-	[NL80211_MESHCONF_POWER_MODE] = { .type = NLA_U32 },
+	[NL80211_MESHCONF_HWMP_ROOT_INTERVAL] =
+		NLA_POLICY_MIN(NLA_U16, 1),
+	[NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL] =
+		NLA_POLICY_MIN(NLA_U16, 1),
+	[NL80211_MESHCONF_POWER_MODE] =
+		NLA_POLICY_RANGE(NLA_U32,
+				 NL80211_MESH_POWER_ACTIVE,
+				 NL80211_MESH_POWER_MAX),
 	[NL80211_MESHCONF_AWAKE_WINDOW] = { .type = NLA_U16 },
 	[NL80211_MESHCONF_PLINK_TIMEOUT] = { .type = NLA_U32 },
+	[NL80211_MESHCONF_CONNECTED_TO_GATE] = NLA_POLICY_RANGE(NLA_U8, 0, 1),
 };
 
 static const struct nla_policy
@@ -6169,68 +6693,12 @@
 	[NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG },
 	[NL80211_MESH_SETUP_AUTH_PROTOCOL] = { .type = NLA_U8 },
 	[NL80211_MESH_SETUP_USERSPACE_MPM] = { .type = NLA_FLAG },
-	[NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY,
-				    .len = IEEE80211_MAX_DATA_LEN },
+	[NL80211_MESH_SETUP_IE] =
+		NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_ie_attr,
+				       IEEE80211_MAX_DATA_LEN),
 	[NL80211_MESH_SETUP_USERSPACE_AMPE] = { .type = NLA_FLAG },
 };
 
-static int nl80211_check_bool(const struct nlattr *nla, u8 min, u8 max, bool *out)
-{
-	u8 val = nla_get_u8(nla);
-	if (val < min || val > max)
-		return -EINVAL;
-	*out = val;
-	return 0;
-}
-
-static int nl80211_check_u8(const struct nlattr *nla, u8 min, u8 max, u8 *out)
-{
-	u8 val = nla_get_u8(nla);
-	if (val < min || val > max)
-		return -EINVAL;
-	*out = val;
-	return 0;
-}
-
-static int nl80211_check_u16(const struct nlattr *nla, u16 min, u16 max, u16 *out)
-{
-	u16 val = nla_get_u16(nla);
-	if (val < min || val > max)
-		return -EINVAL;
-	*out = val;
-	return 0;
-}
-
-static int nl80211_check_u32(const struct nlattr *nla, u32 min, u32 max, u32 *out)
-{
-	u32 val = nla_get_u32(nla);
-	if (val < min || val > max)
-		return -EINVAL;
-	*out = val;
-	return 0;
-}
-
-static int nl80211_check_s32(const struct nlattr *nla, s32 min, s32 max, s32 *out)
-{
-	s32 val = nla_get_s32(nla);
-	if (val < min || val > max)
-		return -EINVAL;
-	*out = val;
-	return 0;
-}
-
-static int nl80211_check_power_mode(const struct nlattr *nla,
-				    enum nl80211_mesh_power_mode min,
-				    enum nl80211_mesh_power_mode max,
-				    enum nl80211_mesh_power_mode *out)
-{
-	u32 val = nla_get_u32(nla);
-	if (val < min || val > max)
-		return -EINVAL;
-	*out = val;
-	return 0;
-}
-
 static int nl80211_parse_mesh_config(struct genl_info *info,
 				     struct mesh_config *cfg,
 				     u32 *mask_out)
@@ -6239,20 +6707,17 @@
 	u32 mask = 0;
 	u16 ht_opmode;
 
-#define FILL_IN_MESH_PARAM_IF_SET(tb, cfg, param, min, max, mask, attr, fn) \
-do {									    \
-	if (tb[attr]) {							    \
-		if (fn(tb[attr], min, max, &cfg->param))		    \
-			return -EINVAL;					    \
-		mask |= (1 << (attr - 1));				    \
-	}								    \
+#define FILL_IN_MESH_PARAM_IF_SET(tb, cfg, param, mask, attr, fn)	\
+do {									\
+	if (tb[attr]) {							\
+		cfg->param = fn(tb[attr]);				\
+		mask |= BIT((attr) - 1);				\
+	}								\
 } while (0)
 
 	if (!info->attrs[NL80211_ATTR_MESH_CONFIG])
 		return -EINVAL;
-	if (nla_parse_nested(tb, NL80211_MESHCONF_ATTR_MAX,
-			     info->attrs[NL80211_ATTR_MESH_CONFIG],
-			     nl80211_meshconf_params_policy, info->extack))
+	if (nla_parse_nested_deprecated(tb, NL80211_MESHCONF_ATTR_MAX, info->attrs[NL80211_ATTR_MESH_CONFIG], nl80211_meshconf_params_policy, info->extack))
 		return -EINVAL;
 
 	/* This makes sure that there aren't more than 32 mesh config
@@ -6260,75 +6725,76 @@
 	BUILD_BUG_ON(NL80211_MESHCONF_ATTR_MAX > 32);
 
 	/* Fill in the params struct */
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout, 1, 255,
-				  mask, NL80211_MESHCONF_RETRY_TIMEOUT,
-				  nl80211_check_u16);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout, 1, 255,
-				  mask, NL80211_MESHCONF_CONFIRM_TIMEOUT,
-				  nl80211_check_u16);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHoldingTimeout, 1, 255,
-				  mask, NL80211_MESHCONF_HOLDING_TIMEOUT,
-				  nl80211_check_u16);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxPeerLinks, 0, 255,
-				  mask, NL80211_MESHCONF_MAX_PEER_LINKS,
-				  nl80211_check_u16);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxRetries, 0, 16,
-				  mask, NL80211_MESHCONF_MAX_RETRIES,
-				  nl80211_check_u8);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL, 1, 255,
-				  mask, NL80211_MESHCONF_TTL, nl80211_check_u8);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, element_ttl, 1, 255,
-				  mask, NL80211_MESHCONF_ELEMENT_TTL,
-				  nl80211_check_u8);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks, 0, 1,
-				  mask, NL80211_MESHCONF_AUTO_OPEN_PLINKS,
-				  nl80211_check_bool);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout, mask,
+				  NL80211_MESHCONF_RETRY_TIMEOUT, nla_get_u16);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout, mask,
+				  NL80211_MESHCONF_CONFIRM_TIMEOUT,
+				  nla_get_u16);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHoldingTimeout, mask,
+				  NL80211_MESHCONF_HOLDING_TIMEOUT,
+				  nla_get_u16);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxPeerLinks, mask,
+				  NL80211_MESHCONF_MAX_PEER_LINKS,
+				  nla_get_u16);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxRetries, mask,
+				  NL80211_MESHCONF_MAX_RETRIES, nla_get_u8);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL, mask,
+				  NL80211_MESHCONF_TTL, nla_get_u8);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, element_ttl, mask,
+				  NL80211_MESHCONF_ELEMENT_TTL, nla_get_u8);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks, mask,
+				  NL80211_MESHCONF_AUTO_OPEN_PLINKS,
+				  nla_get_u8);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshNbrOffsetMaxNeighbor,
-				  1, 255, mask,
+				  mask,
 				  NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR,
-				  nl80211_check_u32);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries, 0, 255,
-				  mask, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES,
-				  nl80211_check_u8);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, path_refresh_time, 1, 65535,
-				  mask, NL80211_MESHCONF_PATH_REFRESH_TIME,
-				  nl80211_check_u32);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, min_discovery_timeout, 1, 65535,
-				  mask, NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT,
-				  nl80211_check_u16);
+				  nla_get_u32);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries, mask,
+				  NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES,
+				  nla_get_u8);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, path_refresh_time, mask,
+				  NL80211_MESHCONF_PATH_REFRESH_TIME,
+				  nla_get_u32);
+	if (mask & BIT(NL80211_MESHCONF_PATH_REFRESH_TIME) &&
+	    (cfg->path_refresh_time < 1 || cfg->path_refresh_time > 65535))
+		return -EINVAL;
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, min_discovery_timeout, mask,
+				  NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT,
+				  nla_get_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathTimeout,
-				  1, 65535, mask,
+				  mask,
 				  NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT,
-				  nl80211_check_u32);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPpreqMinInterval,
-				  1, 65535, mask,
+				  nla_get_u32);
+	if (mask & BIT(NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT) &&
+	    (cfg->dot11MeshHWMPactivePathTimeout < 1 ||
+	     cfg->dot11MeshHWMPactivePathTimeout > 65535))
+		return -EINVAL;
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPpreqMinInterval, mask,
 				  NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL,
-				  nl80211_check_u16);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPperrMinInterval,
-				  1, 65535, mask,
+				  nla_get_u16);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPperrMinInterval, mask,
 				  NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL,
-				  nl80211_check_u16);
+				  nla_get_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg,
-				  dot11MeshHWMPnetDiameterTraversalTime,
-				  1, 65535, mask,
+				  dot11MeshHWMPnetDiameterTraversalTime, mask,
 				  NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME,
-				  nl80211_check_u16);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRootMode, 0, 4,
-				  mask, NL80211_MESHCONF_HWMP_ROOTMODE,
-				  nl80211_check_u8);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRannInterval, 1, 65535,
-				  mask, NL80211_MESHCONF_HWMP_RANN_INTERVAL,
-				  nl80211_check_u16);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg,
-				  dot11MeshGateAnnouncementProtocol, 0, 1,
+				  nla_get_u16);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRootMode, mask,
+				  NL80211_MESHCONF_HWMP_ROOTMODE, nla_get_u8);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRannInterval, mask,
+				  NL80211_MESHCONF_HWMP_RANN_INTERVAL,
+				  nla_get_u16);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshGateAnnouncementProtocol,
 				  mask, NL80211_MESHCONF_GATE_ANNOUNCEMENTS,
-				  nl80211_check_bool);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, 0, 1,
-				  mask, NL80211_MESHCONF_FORWARDING,
-				  nl80211_check_bool);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, -255, 0,
-				  mask, NL80211_MESHCONF_RSSI_THRESHOLD,
-				  nl80211_check_s32);
+				  nla_get_u8);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, mask,
+				  NL80211_MESHCONF_FORWARDING, nla_get_u8);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, mask,
+				  NL80211_MESHCONF_RSSI_THRESHOLD,
+				  nla_get_s32);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConnectedToMeshGate, mask,
+				  NL80211_MESHCONF_CONNECTED_TO_GATE,
+				  nla_get_u8);
 	/*
 	 * Check HT operation mode based on
 	 * IEEE 802.11-2016 9.4.2.57 HT Operation element.
@@ -6347,29 +6813,27 @@
 		cfg->ht_opmode = ht_opmode;
 		mask |= (1 << (NL80211_MESHCONF_HT_OPMODE - 1));
 	}
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathToRootTimeout,
-				  1, 65535, mask,
-				  NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT,
-				  nl80211_check_u32);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMProotInterval, 1, 65535,
-				  mask, NL80211_MESHCONF_HWMP_ROOT_INTERVAL,
-				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg,
-				  dot11MeshHWMPconfirmationInterval,
-				  1, 65535, mask,
+				  dot11MeshHWMPactivePathToRootTimeout, mask,
+				  NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT,
+				  nla_get_u32);
+	if (mask & BIT(NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT) &&
+	    (cfg->dot11MeshHWMPactivePathToRootTimeout < 1 ||
+	     cfg->dot11MeshHWMPactivePathToRootTimeout > 65535))
+		return -EINVAL;
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMProotInterval, mask,
+				  NL80211_MESHCONF_HWMP_ROOT_INTERVAL,
+				  nla_get_u16);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPconfirmationInterval,
+				  mask,
 				  NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL,
-				  nl80211_check_u16);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, power_mode,
-				  NL80211_MESH_POWER_ACTIVE,
-				  NL80211_MESH_POWER_MAX,
-				  mask, NL80211_MESHCONF_POWER_MODE,
-				  nl80211_check_power_mode);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration,
-				  0, 65535, mask,
-				  NL80211_MESHCONF_AWAKE_WINDOW, nl80211_check_u16);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, 0, 0xffffffff,
-				  mask, NL80211_MESHCONF_PLINK_TIMEOUT,
-				  nl80211_check_u32);
+				  nla_get_u16);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, power_mode, mask,
+				  NL80211_MESHCONF_POWER_MODE, nla_get_u32);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration, mask,
+				  NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, mask,
+				  NL80211_MESHCONF_PLINK_TIMEOUT, nla_get_u32);
 	if (mask_out)
 		*mask_out = mask;
 
@@ -6386,9 +6850,7 @@
 
 	if (!info->attrs[NL80211_ATTR_MESH_SETUP])
 		return -EINVAL;
-	if (nla_parse_nested(tb, NL80211_MESH_SETUP_ATTR_MAX,
-			     info->attrs[NL80211_ATTR_MESH_SETUP],
-			     nl80211_mesh_setup_params_policy, info->extack))
+	if (nla_parse_nested_deprecated(tb, NL80211_MESH_SETUP_ATTR_MAX, info->attrs[NL80211_ATTR_MESH_SETUP], nl80211_mesh_setup_params_policy, info->extack))
 		return -EINVAL;
 
 	if (tb[NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC])
@@ -6412,8 +6874,6 @@
 	if (tb[NL80211_MESH_SETUP_IE]) {
 		struct nlattr *ieattr =
 			tb[NL80211_MESH_SETUP_IE];
-		if (!is_valid_ie_attr(ieattr))
-			return -EINVAL;
 		setup->ie = nla_data(ieattr);
 		setup->ie_len = nla_len(ieattr);
 	}
@@ -6479,7 +6939,7 @@
 	     nla_put_u8(msg, NL80211_ATTR_DFS_REGION, regdom->dfs_region)))
 		goto nla_put_failure;
 
-	nl_reg_rules = nla_nest_start(msg, NL80211_ATTR_REG_RULES);
+	nl_reg_rules = nla_nest_start_noflag(msg, NL80211_ATTR_REG_RULES);
 	if (!nl_reg_rules)
 		goto nla_put_failure;
 
@@ -6494,7 +6954,7 @@
 		freq_range = &reg_rule->freq_range;
 		power_rule = &reg_rule->power_rule;
 
-		nl_reg_rule = nla_nest_start(msg, i);
+		nl_reg_rule = nla_nest_start_noflag(msg, i);
 		if (!nl_reg_rule)
 			goto nla_put_failure;
 
@@ -6732,7 +7192,7 @@
 	struct nlattr *nl_reg_rule;
 	char *alpha2;
 	int rem_reg_rules, r;
-	u32 num_rules = 0, rule_idx = 0, size_of_regd;
+	u32 num_rules = 0, rule_idx = 0;
 	enum nl80211_dfs_regions dfs_region = NL80211_DFS_UNSET;
 	struct ieee80211_regdomain *rd;
 
@@ -6757,10 +7217,7 @@
 	if (!reg_is_valid_request(alpha2))
 		return -EINVAL;
 
-	size_of_regd = sizeof(struct ieee80211_regdomain) +
-		       num_rules * sizeof(struct ieee80211_reg_rule);
-
-	rd = kzalloc(size_of_regd, GFP_KERNEL);
+	rd = kzalloc(struct_size(rd, reg_rules, num_rules), GFP_KERNEL);
 	if (!rd)
 		return -ENOMEM;
 
@@ -6777,9 +7234,9 @@
 
 	nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES],
 			    rem_reg_rules) {
-		r = nla_parse_nested(tb, NL80211_REG_RULE_ATTR_MAX,
-				     nl_reg_rule, reg_rule_policy,
-				     info->extack);
+		r = nla_parse_nested_deprecated(tb, NL80211_REG_RULE_ATTR_MAX,
+						nl_reg_rule, reg_rule_policy,
+						info->extack);
 		if (r)
 			goto bad_reg;
 		r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]);
@@ -6850,8 +7307,9 @@
 	if (!nla_ok(nest, nla_len(nest)))
 		return -EINVAL;
 
-	err = nla_parse_nested(attr, NL80211_BSS_SELECT_ATTR_MAX, nest,
-			       nl80211_bss_select_policy, NULL);
+	err = nla_parse_nested_deprecated(attr, NL80211_BSS_SELECT_ATTR_MAX,
+					  nest, nl80211_bss_select_policy,
+					  NULL);
 	if (err)
 		return err;
 
@@ -6898,8 +7356,8 @@
 	return 0;
 }
 
-static int nl80211_parse_random_mac(struct nlattr **attrs,
-				    u8 *mac_addr, u8 *mac_addr_mask)
+int nl80211_parse_random_mac(struct nlattr **attrs,
+			     u8 *mac_addr, u8 *mac_addr_mask)
 {
 	int i;
 
@@ -7046,9 +7504,6 @@
 	int err, tmp, n_ssids = 0, n_channels, i;
 	size_t ie_len;
 
-	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
-		return -EINVAL;
-
 	wiphy = &rdev->wiphy;
 
 	if (wdev->iftype == NL80211_IFTYPE_NAN)
@@ -7347,8 +7802,10 @@
 		if (WARN_ON(i >= n_plans))
 			return -EINVAL;
 
-		err = nla_parse_nested(plan, NL80211_SCHED_SCAN_PLAN_MAX,
-				       attr, nl80211_plan_policy, NULL);
+		err = nla_parse_nested_deprecated(plan,
+						  NL80211_SCHED_SCAN_PLAN_MAX,
+						  attr, nl80211_plan_policy,
+						  NULL);
 		if (err)
 			return err;
 
@@ -7390,6 +7847,41 @@
 	return 0;
 }
 
+static int
+nl80211_parse_sched_scan_per_band_rssi(struct wiphy *wiphy,
+				       struct cfg80211_match_set *match_sets,
+				       struct nlattr *tb_band_rssi,
+				       s32 rssi_thold)
+{
+	struct nlattr *attr;
+	int i, tmp, ret = 0;
+
+	if (!wiphy_ext_feature_isset(wiphy,
+		    NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD)) {
+		if (tb_band_rssi)
+			ret = -EOPNOTSUPP;
+		else
+			for (i = 0; i < NUM_NL80211_BANDS; i++)
+				match_sets->per_band_rssi_thold[i] =
+					NL80211_SCAN_RSSI_THOLD_OFF;
+		return ret;
+	}
+
+	for (i = 0; i < NUM_NL80211_BANDS; i++)
+		match_sets->per_band_rssi_thold[i] = rssi_thold;
+
+	nla_for_each_nested(attr, tb_band_rssi, tmp) {
+		enum nl80211_band band = nla_type(attr);
+
+		if (band < 0 || band >= NUM_NL80211_BANDS)
+			return -EINVAL;
+
+		match_sets->per_band_rssi_thold[band] =	nla_get_s32(attr);
+	}
+
+	return 0;
+}
+
 static struct cfg80211_sched_scan_request *
 nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
 			 struct nlattr **attrs, int max_match_sets)
@@ -7402,9 +7894,6 @@
 	struct nlattr *tb[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1];
 	s32 default_match_rssi = NL80211_SCAN_RSSI_THOLD_OFF;
 
-	if (!is_valid_ie_attr(attrs[NL80211_ATTR_IE]))
-		return ERR_PTR(-EINVAL);
-
 	if (attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
 		n_channels = validate_scan_freqs(
 				attrs[NL80211_ATTR_SCAN_FREQUENCIES]);
@@ -7437,10 +7926,11 @@
 				    tmp) {
 			struct nlattr *rssi;
 
-			err = nla_parse_nested(tb,
-					       NL80211_SCHED_SCAN_MATCH_ATTR_MAX,
-					       attr, nl80211_match_policy,
-					       NULL);
+			err = nla_parse_nested_deprecated(tb,
+							  NL80211_SCHED_SCAN_MATCH_ATTR_MAX,
+							  attr,
+							  nl80211_match_policy,
+							  NULL);
 			if (err)
 				return ERR_PTR(err);
 
@@ -7624,51 +8114,64 @@
 				    tmp) {
 			struct nlattr *ssid, *bssid, *rssi;
 
-			err = nla_parse_nested(tb,
-					       NL80211_SCHED_SCAN_MATCH_ATTR_MAX,
-					       attr, nl80211_match_policy,
-					       NULL);
+			err = nla_parse_nested_deprecated(tb,
+							  NL80211_SCHED_SCAN_MATCH_ATTR_MAX,
+							  attr,
+							  nl80211_match_policy,
+							  NULL);
 			if (err)
 				goto out_free;
 			ssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID];
 			bssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_BSSID];
-			if (ssid || bssid) {
-				if (WARN_ON(i >= n_match_sets)) {
-					/* this indicates a programming error,
-					 * the loop above should have verified
-					 * things properly
-					 */
+
+			if (!ssid && !bssid) {
+				i++;
+				continue;
+			}
+
+			if (WARN_ON(i >= n_match_sets)) {
+				/* this indicates a programming error,
+				 * the loop above should have verified
+				 * things properly
+				 */
+				err = -EINVAL;
+				goto out_free;
+			}
+
+			if (ssid) {
+				if (nla_len(ssid) > IEEE80211_MAX_SSID_LEN) {
 					err = -EINVAL;
 					goto out_free;
 				}
-
-				if (ssid) {
-					if (nla_len(ssid) > IEEE80211_MAX_SSID_LEN) {
-						err = -EINVAL;
-						goto out_free;
-					}
-					memcpy(request->match_sets[i].ssid.ssid,
-					       nla_data(ssid), nla_len(ssid));
-					request->match_sets[i].ssid.ssid_len =
-						nla_len(ssid);
-				}
-				if (bssid) {
-					if (nla_len(bssid) != ETH_ALEN) {
-						err = -EINVAL;
-						goto out_free;
-					}
-					memcpy(request->match_sets[i].bssid,
-					       nla_data(bssid), ETH_ALEN);
-				}
-
-				/* special attribute - old implementation w/a */
-				request->match_sets[i].rssi_thold =
-					default_match_rssi;
-				rssi = tb[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI];
-				if (rssi)
-					request->match_sets[i].rssi_thold =
-						nla_get_s32(rssi);
+				memcpy(request->match_sets[i].ssid.ssid,
+				       nla_data(ssid), nla_len(ssid));
+				request->match_sets[i].ssid.ssid_len =
+					nla_len(ssid);
 			}
+			if (bssid) {
+				if (nla_len(bssid) != ETH_ALEN) {
+					err = -EINVAL;
+					goto out_free;
+				}
+				memcpy(request->match_sets[i].bssid,
+				       nla_data(bssid), ETH_ALEN);
+			}
+
+			/* special attribute - old implementation w/a */
+			request->match_sets[i].rssi_thold = default_match_rssi;
+			rssi = tb[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI];
+			if (rssi)
+				request->match_sets[i].rssi_thold =
+					nla_get_s32(rssi);
+
+			/* Parse per band RSSI attribute */
+			err = nl80211_parse_sched_scan_per_band_rssi(wiphy,
+				&request->match_sets[i],
+				tb[NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI],
+				request->match_sets[i].rssi_thold);
+			if (err)
+				goto out_free;
+
 			i++;
 		}
 
@@ -7764,7 +8267,7 @@
 	 */
 	if (want_multi && rdev->wiphy.max_sched_scan_reqs > 1) {
 		while (!sched_scan_req->reqid)
-			sched_scan_req->reqid = rdev->wiphy.cookie_counter++;
+			sched_scan_req->reqid = cfg80211_assign_cookie(rdev);
 	}
 
 	err = rdev_sched_scan_start(rdev, dev, sched_scan_req);
@@ -7871,6 +8374,60 @@
 	return err;
 }
 
+static int nl80211_notify_radar_detection(struct sk_buff *skb,
+					  struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_chan_def chandef;
+	enum nl80211_dfs_regions dfs_region;
+	int err;
+
+	dfs_region = reg_get_dfs_region(wiphy);
+	if (dfs_region == NL80211_DFS_UNSET) {
+		GENL_SET_ERR_MSG(info,
+				 "DFS Region is not set. Unexpected Radar indication");
+		return -EINVAL;
+	}
+
+	err = nl80211_parse_chandef(rdev, info, &chandef);
+	if (err) {
+		GENL_SET_ERR_MSG(info, "Unable to extract chandef info");
+		return err;
+	}
+
+	err = cfg80211_chandef_dfs_required(wiphy, &chandef, wdev->iftype);
+	if (err < 0) {
+		GENL_SET_ERR_MSG(info, "chandef is invalid");
+		return err;
+	}
+
+	if (err == 0) {
+		GENL_SET_ERR_MSG(info,
+				 "Unexpected Radar indication for chandef/iftype");
+		return -EINVAL;
+	}
+
+	/* Do not process this notification if radar is already detected
+	 * by kernel on this channel, and return success.
+	 */
+	if (chandef.chan->dfs_state == NL80211_DFS_UNAVAILABLE)
+		return 0;
+
+	cfg80211_set_dfs_state(wiphy, &chandef, NL80211_DFS_UNAVAILABLE);
+
+	cfg80211_sched_dfs_chan_update(rdev);
+
+	rdev->radar_chandef = chandef;
+
+	/* Propagate this notification to other radios as well */
+	queue_work(cfg80211_wq, &rdev->propagate_radar_detect_wk);
+
+	return 0;
+}
+
 static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -7919,6 +8476,7 @@
 	}
 
 	memset(&params, 0, sizeof(params));
+	params.beacon_csa.ftm_responder = -1;
 
 	if (!info->attrs[NL80211_ATTR_WIPHY_FREQ] ||
 	    !info->attrs[NL80211_ATTR_CH_SWITCH_COUNT])
@@ -7940,17 +8498,17 @@
 	if (!need_new_beacon)
 		goto skip_beacons;
 
-	err = nl80211_parse_beacon(info->attrs, &params.beacon_after);
+	err = nl80211_parse_beacon(rdev, info->attrs, &params.beacon_after);
 	if (err)
 		return err;
 
-	err = nla_parse_nested(csa_attrs, NL80211_ATTR_MAX,
-			       info->attrs[NL80211_ATTR_CSA_IES],
-			       nl80211_policy, info->extack);
+	err = nla_parse_nested_deprecated(csa_attrs, NL80211_ATTR_MAX,
+					  info->attrs[NL80211_ATTR_CSA_IES],
+					  nl80211_policy, info->extack);
 	if (err)
 		return err;
 
-	err = nl80211_parse_beacon(csa_attrs, &params.beacon_csa);
+	err = nl80211_parse_beacon(rdev, csa_attrs, &params.beacon_csa);
 	if (err)
 		return err;
 
@@ -8070,7 +8628,7 @@
 			      NL80211_ATTR_PAD))
 		goto nla_put_failure;
 
-	bss = nla_nest_start(msg, NL80211_ATTR_BSS);
+	bss = nla_nest_start_noflag(msg, NL80211_ATTR_BSS);
 	if (!bss)
 		goto nla_put_failure;
 	if ((!is_zero_ether_addr(res->bssid) &&
@@ -8187,7 +8745,7 @@
 	int err;
 
 	rtnl_lock();
-	err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
+	err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
 	if (err) {
 		rtnl_unlock();
 		return err;
@@ -8247,7 +8805,7 @@
 	if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex))
 		goto nla_put_failure;
 
-	infoattr = nla_nest_start(msg, NL80211_ATTR_SURVEY_INFO);
+	infoattr = nla_nest_start_noflag(msg, NL80211_ATTR_SURVEY_INFO);
 	if (!infoattr)
 		goto nla_put_failure;
 
@@ -8286,6 +8844,10 @@
 	    nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_SCAN,
 			      survey->time_scan, NL80211_SURVEY_INFO_PAD))
 		goto nla_put_failure;
+	if ((survey->filled & SURVEY_INFO_TIME_BSS_RX) &&
+	    nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_BSS_RX,
+			      survey->time_bss_rx, NL80211_SURVEY_INFO_PAD))
+		goto nla_put_failure;
 
 	nla_nest_end(msg, infoattr);
 
@@ -8299,7 +8861,7 @@
 
 static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam);
+	struct nlattr **attrbuf;
 	struct survey_info survey;
 	struct cfg80211_registered_device *rdev;
 	struct wireless_dev *wdev;
@@ -8307,8 +8869,12 @@
 	int res;
 	bool radio_stats;
 
+	attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), GFP_KERNEL);
+	if (!attrbuf)
+		return -ENOMEM;
+
 	rtnl_lock();
-	res = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
+	res = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
 	if (res)
 		goto out_err;
 
@@ -8351,6 +8917,7 @@
 	cb->args[2] = survey_idx;
 	res = skb->len;
  out_err:
+	kfree(attrbuf);
 	rtnl_unlock();
 	return res;
 }
@@ -8358,7 +8925,8 @@
 static bool nl80211_valid_wpa_versions(u32 wpa_versions)
 {
 	return !(wpa_versions & ~(NL80211_WPA_VERSION_1 |
-				  NL80211_WPA_VERSION_2));
+				  NL80211_WPA_VERSION_2 |
+				  NL80211_WPA_VERSION_3));
 }
 
 static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
@@ -8372,9 +8940,6 @@
 	struct key_parse key;
 	bool local_state_change;
 
-	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
-		return -EINVAL;
-
 	if (!info->attrs[NL80211_ATTR_MAC])
 		return -EINVAL;
 
@@ -8597,6 +9162,16 @@
 		settings->psk = nla_data(info->attrs[NL80211_ATTR_PMK]);
 	}
 
+	if (info->attrs[NL80211_ATTR_SAE_PASSWORD]) {
+		if (!wiphy_ext_feature_isset(&rdev->wiphy,
+					     NL80211_EXT_FEATURE_SAE_OFFLOAD))
+			return -EINVAL;
+		settings->sae_pwd =
+			nla_data(info->attrs[NL80211_ATTR_SAE_PASSWORD]);
+		settings->sae_pwd_len =
+			nla_len(info->attrs[NL80211_ATTR_SAE_PASSWORD]);
+	}
+
 	return 0;
 }
 
@@ -8613,9 +9188,6 @@
 	    dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid)
 		return -EPERM;
 
-	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
-		return -EINVAL;
-
 	if (!info->attrs[NL80211_ATTR_MAC] ||
 	    !info->attrs[NL80211_ATTR_SSID] ||
 	    !info->attrs[NL80211_ATTR_WIPHY_FREQ])
@@ -8739,9 +9311,6 @@
 	    dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid)
 		return -EPERM;
 
-	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
-		return -EINVAL;
-
 	if (!info->attrs[NL80211_ATTR_MAC])
 		return -EINVAL;
 
@@ -8790,9 +9359,6 @@
 	    dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid)
 		return -EPERM;
 
-	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
-		return -EINVAL;
-
 	if (!info->attrs[NL80211_ATTR_MAC])
 		return -EINVAL;
 
@@ -8867,9 +9433,6 @@
 
 	memset(&ibss, 0, sizeof(ibss));
 
-	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
-		return -EINVAL;
-
 	if (!info->attrs[NL80211_ATTR_SSID] ||
 	    !nla_len(info->attrs[NL80211_ATTR_SSID]))
 		return -EINVAL;
@@ -8993,8 +9556,10 @@
 	if (info->attrs[NL80211_ATTR_CONTROL_PORT_OVER_NL80211]) {
 		int r = validate_pae_over_nl80211(rdev, info);
 
-		if (r < 0)
+		if (r < 0) {
+			kzfree(connkeys);
 			return r;
+		}
 
 		ibss.control_port_over_nl80211 = true;
 	}
@@ -9101,7 +9666,7 @@
 			goto nla_put_failure;
 	}
 
-	data = nla_nest_start(skb, attr);
+	data = nla_nest_start_noflag(skb, attr);
 	if (!data)
 		goto nla_put_failure;
 
@@ -9120,6 +9685,7 @@
 					   struct wireless_dev *wdev,
 					   enum nl80211_commands cmd,
 					   enum nl80211_attrs attr,
+					   unsigned int portid,
 					   int vendor_event_idx,
 					   int approxlen, gfp_t gfp)
 {
@@ -9143,7 +9709,7 @@
 		return NULL;
 	}
 
-	return __cfg80211_alloc_vendor_skb(rdev, wdev, approxlen, 0, 0,
+	return __cfg80211_alloc_vendor_skb(rdev, wdev, approxlen, portid, 0,
 					   cmd, attr, info, gfp);
 }
 EXPORT_SYMBOL(__cfg80211_alloc_event_skb);
@@ -9152,6 +9718,7 @@
 {
 	struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0];
 	void *hdr = ((void **)skb->cb)[1];
+	struct nlmsghdr *nlhdr = nlmsg_hdr(skb);
 	struct nlattr *data = ((void **)skb->cb)[2];
 	enum nl80211_multicast_groups mcgrp = NL80211_MCGRP_TESTMODE;
 
@@ -9161,11 +9728,16 @@
 	nla_nest_end(skb, data);
 	genlmsg_end(skb, hdr);
 
-	if (data->nla_type == NL80211_ATTR_VENDOR_DATA)
-		mcgrp = NL80211_MCGRP_VENDOR;
+	if (nlhdr->nlmsg_pid) {
+		genlmsg_unicast(wiphy_net(&rdev->wiphy), skb,
+				nlhdr->nlmsg_pid);
+	} else {
+		if (data->nla_type == NL80211_ATTR_VENDOR_DATA)
+			mcgrp = NL80211_MCGRP_VENDOR;
 
-	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), skb, 0,
-				mcgrp, gfp);
+		genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
+					skb, 0, mcgrp, gfp);
+	}
 }
 EXPORT_SYMBOL(__cfg80211_send_event_skb);
 
@@ -9205,6 +9777,7 @@
 				 struct netlink_callback *cb)
 {
 	struct cfg80211_registered_device *rdev;
+	struct nlattr **attrbuf = NULL;
 	int err;
 	long phy_idx;
 	void *data = NULL;
@@ -9225,11 +9798,17 @@
 			goto out_err;
 		}
 	} else {
-		struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam);
+		attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf),
+				  GFP_KERNEL);
+		if (!attrbuf) {
+			err = -ENOMEM;
+			goto out_err;
+		}
 
-		err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
-				  attrbuf, nl80211_fam.maxattr,
-				  nl80211_policy, NULL);
+		err = nlmsg_parse_deprecated(cb->nlh,
+					     GENL_HDRLEN + nl80211_fam.hdrsize,
+					     attrbuf, nl80211_fam.maxattr,
+					     nl80211_policy, NULL);
 		if (err)
 			goto out_err;
 
@@ -9268,7 +9847,7 @@
 			break;
 		}
 
-		tmdata = nla_nest_start(skb, NL80211_ATTR_TESTDATA);
+		tmdata = nla_nest_start_noflag(skb, NL80211_ATTR_TESTDATA);
 		if (!tmdata) {
 			genlmsg_cancel(skb, hdr);
 			break;
@@ -9291,6 +9870,7 @@
 	/* see above */
 	cb->args[0] = phy_idx + 1;
  out_err:
+	kfree(attrbuf);
 	rtnl_unlock();
 	return err;
 }
@@ -9307,9 +9887,6 @@
 
 	memset(&connect, 0, sizeof(connect));
 
-	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
-		return -EINVAL;
-
 	if (!info->attrs[NL80211_ATTR_SSID] ||
 	    !nla_len(info->attrs[NL80211_ATTR_SSID]))
 		return -EINVAL;
@@ -9368,11 +9945,6 @@
 		    !wiphy_ext_feature_isset(&rdev->wiphy,
 					     NL80211_EXT_FEATURE_MFP_OPTIONAL))
 			return -EOPNOTSUPP;
-
-		if (connect.mfp != NL80211_MFP_REQUIRED &&
-		    connect.mfp != NL80211_MFP_NO &&
-		    connect.mfp != NL80211_MFP_OPTIONAL)
-			return -EINVAL;
 	} else {
 		connect.mfp = NL80211_MFP_NO;
 	}
@@ -9393,6 +9965,15 @@
 			return -EINVAL;
 	}
 
+	if (info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]) {
+		connect.edmg.channels =
+		      nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]);
+
+		if (info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG])
+			connect.edmg.bw_config =
+				nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]);
+	}
+
 	if (connect.privacy && info->attrs[NL80211_ATTR_KEYS]) {
 		connkeys = nl80211_parse_connkeys(rdev, info, NULL);
 		if (IS_ERR(connkeys))
@@ -9545,8 +10126,6 @@
 		return -EOPNOTSUPP;
 
 	if (info->attrs[NL80211_ATTR_IE]) {
-		if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
-			return -EINVAL;
 		connect.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
 		connect.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
 		changed |= UPDATE_ASSOC_IES;
@@ -9706,7 +10285,10 @@
 	}
 
 	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
-	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT &&
+	    !(dev->ieee80211_ptr->iftype == NL80211_IFTYPE_AP &&
+	      wiphy_ext_feature_isset(&rdev->wiphy,
+				      NL80211_EXT_FEATURE_AP_PMKSA_CACHING)))
 		return -EOPNOTSUPP;
 
 	switch (info->genlhdr->cmd) {
@@ -10131,9 +10713,6 @@
 
 	ps_state = nla_get_u32(info->attrs[NL80211_ATTR_PS_STATE]);
 
-	if (ps_state != NL80211_PS_DISABLED && ps_state != NL80211_PS_ENABLED)
-		return -EINVAL;
-
 	wdev = dev->ieee80211_ptr;
 
 	if (!rdev->ops->set_power_mgmt)
@@ -10264,9 +10843,11 @@
 	hyst = wdev->cqm_config->rssi_hyst;
 	n = wdev->cqm_config->n_rssi_thresholds;
 
-	for (i = 0; i < n; i++)
+	for (i = 0; i < n; i++) {
+		i = array_index_nospec(i, n);
 		if (last < wdev->cqm_config->rssi_thresholds[i])
 			break;
+	}
 
 	low_index = i - 1;
 	if (low_index >= 0) {
@@ -10363,8 +10944,9 @@
 	if (!cqm)
 		return -EINVAL;
 
-	err = nla_parse_nested(attrs, NL80211_ATTR_CQM_MAX, cqm,
-			       nl80211_attr_cqm_policy, info->extack);
+	err = nla_parse_nested_deprecated(attrs, NL80211_ATTR_CQM_MAX, cqm,
+					  nl80211_attr_cqm_policy,
+					  info->extack);
 	if (err)
 		return err;
 
@@ -10556,12 +11138,12 @@
 	if (!wowlan->n_patterns)
 		return 0;
 
-	nl_pats = nla_nest_start(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN);
+	nl_pats = nla_nest_start_noflag(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN);
 	if (!nl_pats)
 		return -ENOBUFS;
 
 	for (i = 0; i < wowlan->n_patterns; i++) {
-		nl_pat = nla_nest_start(msg, i + 1);
+		nl_pat = nla_nest_start_noflag(msg, i + 1);
 		if (!nl_pat)
 			return -ENOBUFS;
 		pat_len = wowlan->patterns[i].pattern_len;
@@ -10587,7 +11169,8 @@
 	if (!tcp)
 		return 0;
 
-	nl_tcp = nla_nest_start(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION);
+	nl_tcp = nla_nest_start_noflag(msg,
+				       NL80211_WOWLAN_TRIG_TCP_CONNECTION);
 	if (!nl_tcp)
 		return -ENOBUFS;
 
@@ -10631,7 +11214,7 @@
 	if (!req)
 		return 0;
 
-	nd = nla_nest_start(msg, NL80211_WOWLAN_TRIG_NET_DETECT);
+	nd = nla_nest_start_noflag(msg, NL80211_WOWLAN_TRIG_NET_DETECT);
 	if (!nd)
 		return -ENOBUFS;
 
@@ -10657,7 +11240,7 @@
 			return -ENOBUFS;
 	}
 
-	freqs = nla_nest_start(msg, NL80211_ATTR_SCAN_FREQUENCIES);
+	freqs = nla_nest_start_noflag(msg, NL80211_ATTR_SCAN_FREQUENCIES);
 	if (!freqs)
 		return -ENOBUFS;
 
@@ -10669,12 +11252,13 @@
 	nla_nest_end(msg, freqs);
 
 	if (req->n_match_sets) {
-		matches = nla_nest_start(msg, NL80211_ATTR_SCHED_SCAN_MATCH);
+		matches = nla_nest_start_noflag(msg,
+						NL80211_ATTR_SCHED_SCAN_MATCH);
 		if (!matches)
 			return -ENOBUFS;
 
 		for (i = 0; i < req->n_match_sets; i++) {
-			match = nla_nest_start(msg, i);
+			match = nla_nest_start_noflag(msg, i);
 			if (!match)
 				return -ENOBUFS;
 
@@ -10687,17 +11271,16 @@
 		nla_nest_end(msg, matches);
 	}
 
-	scan_plans = nla_nest_start(msg, NL80211_ATTR_SCHED_SCAN_PLANS);
+	scan_plans = nla_nest_start_noflag(msg, NL80211_ATTR_SCHED_SCAN_PLANS);
 	if (!scan_plans)
 		return -ENOBUFS;
 
 	for (i = 0; i < req->n_scan_plans; i++) {
-		scan_plan = nla_nest_start(msg, i + 1);
+		scan_plan = nla_nest_start_noflag(msg, i + 1);
 		if (!scan_plan)
 			return -ENOBUFS;
 
-		if (!scan_plan ||
-		    nla_put_u32(msg, NL80211_SCHED_SCAN_PLAN_INTERVAL,
+		if (nla_put_u32(msg, NL80211_SCHED_SCAN_PLAN_INTERVAL,
 				req->scan_plans[i].interval) ||
 		    (req->scan_plans[i].iterations &&
 		     nla_put_u32(msg, NL80211_SCHED_SCAN_PLAN_ITERATIONS,
@@ -10742,7 +11325,8 @@
 	if (rdev->wiphy.wowlan_config) {
 		struct nlattr *nl_wowlan;
 
-		nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS);
+		nl_wowlan = nla_nest_start_noflag(msg,
+						  NL80211_ATTR_WOWLAN_TRIGGERS);
 		if (!nl_wowlan)
 			goto nla_put_failure;
 
@@ -10800,8 +11384,8 @@
 	if (!rdev->wiphy.wowlan->tcp)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TCP, attr,
-			       nl80211_wowlan_tcp_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, MAX_NL80211_WOWLAN_TCP, attr,
+					  nl80211_wowlan_tcp_policy, NULL);
 	if (err)
 		return err;
 
@@ -10946,8 +11530,8 @@
 		goto out;
 	}
 
-	err = nla_parse_nested(tb, NL80211_ATTR_MAX, attr, nl80211_policy,
-			       NULL);
+	err = nla_parse_nested_deprecated(tb, NL80211_ATTR_MAX, attr,
+					  nl80211_policy, NULL);
 	if (err)
 		goto out;
 
@@ -10982,9 +11566,9 @@
 		goto set_wakeup;
 	}
 
-	err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TRIG,
-			       info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS],
-			       nl80211_wowlan_policy, info->extack);
+	err = nla_parse_nested_deprecated(tb, MAX_NL80211_WOWLAN_TRIG,
+					  info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS],
+					  nl80211_wowlan_policy, info->extack);
 	if (err)
 		return err;
 
@@ -11066,9 +11650,11 @@
 				    rem) {
 			u8 *mask_pat;
 
-			err = nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
-					       nl80211_packet_pattern_policy,
-					       info->extack);
+			err = nla_parse_nested_deprecated(pat_tb,
+							  MAX_NL80211_PKTPAT,
+							  pat,
+							  nl80211_packet_pattern_policy,
+							  info->extack);
 			if (err)
 				goto error;
 
@@ -11176,12 +11762,12 @@
 	if (!rdev->coalesce->n_rules)
 		return 0;
 
-	nl_rules = nla_nest_start(msg, NL80211_ATTR_COALESCE_RULE);
+	nl_rules = nla_nest_start_noflag(msg, NL80211_ATTR_COALESCE_RULE);
 	if (!nl_rules)
 		return -ENOBUFS;
 
 	for (i = 0; i < rdev->coalesce->n_rules; i++) {
-		nl_rule = nla_nest_start(msg, i + 1);
+		nl_rule = nla_nest_start_noflag(msg, i + 1);
 		if (!nl_rule)
 			return -ENOBUFS;
 
@@ -11194,13 +11780,13 @@
 				rule->condition))
 			return -ENOBUFS;
 
-		nl_pats = nla_nest_start(msg,
-				NL80211_ATTR_COALESCE_RULE_PKT_PATTERN);
+		nl_pats = nla_nest_start_noflag(msg,
+						NL80211_ATTR_COALESCE_RULE_PKT_PATTERN);
 		if (!nl_pats)
 			return -ENOBUFS;
 
 		for (j = 0; j < rule->n_patterns; j++) {
-			nl_pat = nla_nest_start(msg, j + 1);
+			nl_pat = nla_nest_start_noflag(msg, j + 1);
 			if (!nl_pat)
 				return -ENOBUFS;
 			pat_len = rule->patterns[j].pattern_len;
@@ -11281,8 +11867,8 @@
 	int rem, pat_len, mask_len, pkt_offset, n_patterns = 0;
 	struct nlattr *pat_tb[NUM_NL80211_PKTPAT];
 
-	err = nla_parse_nested(tb, NL80211_ATTR_COALESCE_RULE_MAX, rule,
-			       nl80211_coalesce_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, NL80211_ATTR_COALESCE_RULE_MAX,
+					  rule, nl80211_coalesce_policy, NULL);
 	if (err)
 		return err;
 
@@ -11295,9 +11881,6 @@
 	if (tb[NL80211_ATTR_COALESCE_RULE_CONDITION])
 		new_rule->condition =
 			nla_get_u32(tb[NL80211_ATTR_COALESCE_RULE_CONDITION]);
-	if (new_rule->condition != NL80211_COALESCE_CONDITION_MATCH &&
-	    new_rule->condition != NL80211_COALESCE_CONDITION_NO_MATCH)
-		return -EINVAL;
 
 	if (!tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN])
 		return -EINVAL;
@@ -11320,8 +11903,10 @@
 			    rem) {
 		u8 *mask_pat;
 
-		err = nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
-				       nl80211_packet_pattern_policy, NULL);
+		err = nla_parse_nested_deprecated(pat_tb, MAX_NL80211_PKTPAT,
+						  pat,
+						  nl80211_packet_pattern_policy,
+						  NULL);
 		if (err)
 			return err;
 
@@ -11443,9 +12028,9 @@
 	if (!info->attrs[NL80211_ATTR_REKEY_DATA])
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, MAX_NL80211_REKEY_DATA,
-			       info->attrs[NL80211_ATTR_REKEY_DATA],
-			       nl80211_rekey_policy, info->extack);
+	err = nla_parse_nested_deprecated(tb, MAX_NL80211_REKEY_DATA,
+					  info->attrs[NL80211_ATTR_REKEY_DATA],
+					  nl80211_rekey_policy, info->extack);
 	if (err)
 		return err;
 
@@ -11650,8 +12235,6 @@
 
 	conf.master_pref =
 		nla_get_u8(info->attrs[NL80211_ATTR_NAN_MASTER_PREF]);
-	if (!conf.master_pref)
-		return -EINVAL;
 
 	if (info->attrs[NL80211_ATTR_BANDS]) {
 		u32 bands = nla_get_u32(info->attrs[NL80211_ATTR_BANDS]);
@@ -11759,9 +12342,10 @@
 	if (!info->attrs[NL80211_ATTR_NAN_FUNC])
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, NL80211_NAN_FUNC_ATTR_MAX,
-			       info->attrs[NL80211_ATTR_NAN_FUNC],
-			       nl80211_nan_func_policy, info->extack);
+	err = nla_parse_nested_deprecated(tb, NL80211_NAN_FUNC_ATTR_MAX,
+					  info->attrs[NL80211_ATTR_NAN_FUNC],
+					  nl80211_nan_func_policy,
+					  info->extack);
 	if (err)
 		return err;
 
@@ -11769,7 +12353,7 @@
 	if (!func)
 		return -ENOMEM;
 
-	func->cookie = wdev->wiphy->cookie_counter++;
+	func->cookie = cfg80211_assign_cookie(rdev);
 
 	if (!tb[NL80211_NAN_FUNC_TYPE] ||
 	    nla_get_u8(tb[NL80211_NAN_FUNC_TYPE]) > NL80211_NAN_FUNC_MAX_TYPE) {
@@ -11857,9 +12441,11 @@
 	if (tb[NL80211_NAN_FUNC_SRF]) {
 		struct nlattr *srf_tb[NUM_NL80211_NAN_SRF_ATTR];
 
-		err = nla_parse_nested(srf_tb, NL80211_NAN_SRF_ATTR_MAX,
-				       tb[NL80211_NAN_FUNC_SRF],
-				       nl80211_nan_srf_policy, info->extack);
+		err = nla_parse_nested_deprecated(srf_tb,
+						  NL80211_NAN_SRF_ATTR_MAX,
+						  tb[NL80211_NAN_FUNC_SRF],
+						  nl80211_nan_srf_policy,
+						  info->extack);
 		if (err)
 			goto out;
 
@@ -11957,7 +12543,7 @@
 			      NL80211_ATTR_PAD))
 		goto nla_put_failure;
 
-	func_attr = nla_nest_start(msg, NL80211_ATTR_NAN_FUNC);
+	func_attr = nla_nest_start_noflag(msg, NL80211_ATTR_NAN_FUNC);
 	if (!func_attr)
 		goto nla_put_failure;
 
@@ -12074,11 +12660,12 @@
 	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, match->addr))
 		goto nla_put_failure;
 
-	match_attr = nla_nest_start(msg, NL80211_ATTR_NAN_MATCH);
+	match_attr = nla_nest_start_noflag(msg, NL80211_ATTR_NAN_MATCH);
 	if (!match_attr)
 		goto nla_put_failure;
 
-	local_func_attr = nla_nest_start(msg, NL80211_NAN_MATCH_FUNC_LOCAL);
+	local_func_attr = nla_nest_start_noflag(msg,
+						NL80211_NAN_MATCH_FUNC_LOCAL);
 	if (!local_func_attr)
 		goto nla_put_failure;
 
@@ -12087,7 +12674,8 @@
 
 	nla_nest_end(msg, local_func_attr);
 
-	peer_func_attr = nla_nest_start(msg, NL80211_NAN_MATCH_FUNC_PEER);
+	peer_func_attr = nla_nest_start_noflag(msg,
+					       NL80211_NAN_MATCH_FUNC_PEER);
 	if (!peer_func_attr)
 		goto nla_put_failure;
 
@@ -12153,7 +12741,7 @@
 			      NL80211_ATTR_PAD))
 		goto nla_put_failure;
 
-	func_attr = nla_nest_start(msg, NL80211_ATTR_NAN_FUNC);
+	func_attr = nla_nest_start_noflag(msg, NL80211_ATTR_NAN_FUNC);
 	if (!func_attr)
 		goto nla_put_failure;
 
@@ -12215,8 +12803,7 @@
 		return -EOPNOTSUPP;
 
 	if (!info->attrs[NL80211_ATTR_MDID] ||
-	    !info->attrs[NL80211_ATTR_IE] ||
-	    !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
+	    !info->attrs[NL80211_ATTR_IE])
 		return -EINVAL;
 
 	memset(&ft_params, 0, sizeof(ft_params));
@@ -12285,6 +12872,29 @@
 	return 0;
 }
 
+static int nl80211_vendor_check_policy(const struct wiphy_vendor_command *vcmd,
+				       struct nlattr *attr,
+				       struct netlink_ext_ack *extack)
+{
+	if (vcmd->policy == VENDOR_CMD_RAW_DATA) {
+		if (attr->nla_type & NLA_F_NESTED) {
+			NL_SET_ERR_MSG_ATTR(extack, attr,
+					    "unexpected nested data");
+			return -EINVAL;
+		}
+
+		return 0;
+	}
+
+	if (!(attr->nla_type & NLA_F_NESTED)) {
+		NL_SET_ERR_MSG_ATTR(extack, attr, "expected nested data");
+		return -EINVAL;
+	}
+
+	return nl80211_validate_nested(attr, vcmd->maxattr, vcmd->policy,
+				       extack);
+}
+
 static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -12343,11 +12953,16 @@
 		if (info->attrs[NL80211_ATTR_VENDOR_DATA]) {
 			data = nla_data(info->attrs[NL80211_ATTR_VENDOR_DATA]);
 			len = nla_len(info->attrs[NL80211_ATTR_VENDOR_DATA]);
+
+			err = nl80211_vendor_check_policy(vcmd,
+					info->attrs[NL80211_ATTR_VENDOR_DATA],
+					info->extack);
+			if (err)
+				return err;
 		}
 
 		rdev->cur_cmd_info = info;
-		err = rdev->wiphy.vendor_commands[i].doit(&rdev->wiphy, wdev,
-							  data, len);
+		err = vcmd->doit(&rdev->wiphy, wdev, data, len);
 		rdev->cur_cmd_info = NULL;
 		return err;
 	}
@@ -12360,7 +12975,7 @@
 				       struct cfg80211_registered_device **rdev,
 				       struct wireless_dev **wdev)
 {
-	struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam);
+	struct nlattr **attrbuf;
 	u32 vid, subcmd;
 	unsigned int i;
 	int vcmd_idx = -1;
@@ -12391,22 +13006,32 @@
 		return 0;
 	}
 
-	err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, attrbuf,
-			  nl80211_fam.maxattr, nl80211_policy, NULL);
+	attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), GFP_KERNEL);
+	if (!attrbuf)
+		return -ENOMEM;
+
+	err = nlmsg_parse_deprecated(cb->nlh,
+				     GENL_HDRLEN + nl80211_fam.hdrsize,
+				     attrbuf, nl80211_fam.maxattr,
+				     nl80211_policy, NULL);
 	if (err)
-		return err;
+		goto out;
 
 	if (!attrbuf[NL80211_ATTR_VENDOR_ID] ||
-	    !attrbuf[NL80211_ATTR_VENDOR_SUBCMD])
-		return -EINVAL;
+	    !attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) {
+		err = -EINVAL;
+		goto out;
+	}
 
 	*wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), attrbuf);
 	if (IS_ERR(*wdev))
 		*wdev = NULL;
 
 	*rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf);
-	if (IS_ERR(*rdev))
-		return PTR_ERR(*rdev);
+	if (IS_ERR(*rdev)) {
+		err = PTR_ERR(*rdev);
+		goto out;
+	}
 
 	vid = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_ID]);
 	subcmd = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_SUBCMD]);
@@ -12419,19 +13044,30 @@
 		if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd)
 			continue;
 
-		if (!vcmd->dumpit)
-			return -EOPNOTSUPP;
+		if (!vcmd->dumpit) {
+			err = -EOPNOTSUPP;
+			goto out;
+		}
 
 		vcmd_idx = i;
 		break;
 	}
 
-	if (vcmd_idx < 0)
-		return -EOPNOTSUPP;
+	if (vcmd_idx < 0) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
 
 	if (attrbuf[NL80211_ATTR_VENDOR_DATA]) {
 		data = nla_data(attrbuf[NL80211_ATTR_VENDOR_DATA]);
 		data_len = nla_len(attrbuf[NL80211_ATTR_VENDOR_DATA]);
+
+		err = nl80211_vendor_check_policy(
+				&(*rdev)->wiphy.vendor_commands[vcmd_idx],
+				attrbuf[NL80211_ATTR_VENDOR_DATA],
+				cb->extack);
+		if (err)
+			goto out;
 	}
 
 	/* 0 is the first index - add 1 to parse only once */
@@ -12443,7 +13079,10 @@
 	cb->args[4] = data_len;
 
 	/* keep rtnl locked in successful case */
-	return 0;
+	err = 0;
+out:
+	kfree(attrbuf);
+	return err;
 }
 
 static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
@@ -12503,7 +13142,8 @@
 			break;
 		}
 
-		vendor_data = nla_nest_start(skb, NL80211_ATTR_VENDOR_DATA);
+		vendor_data = nla_nest_start_noflag(skb,
+						    NL80211_ATTR_VENDOR_DATA);
 		if (!vendor_data) {
 			genlmsg_cancel(skb, hdr);
 			break;
@@ -12567,6 +13207,17 @@
 }
 EXPORT_SYMBOL_GPL(cfg80211_vendor_cmd_reply);
 
+unsigned int cfg80211_vendor_cmd_get_sender(struct wiphy *wiphy)
+{
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+
+	if (WARN_ON(!rdev->cur_cmd_info))
+		return 0;
+
+	return rdev->cur_cmd_info->snd_portid;
+}
+EXPORT_SYMBOL_GPL(cfg80211_vendor_cmd_get_sender);
+
 static int nl80211_set_qos_map(struct sk_buff *skb,
 			       struct genl_info *info)
 {
@@ -12636,12 +13287,7 @@
 		return -EINVAL;
 
 	tsid = nla_get_u8(info->attrs[NL80211_ATTR_TSID]);
-	if (tsid >= IEEE80211_NUM_TIDS)
-		return -EINVAL;
-
 	up = nla_get_u8(info->attrs[NL80211_ATTR_USER_PRIO]);
-	if (up >= IEEE80211_NUM_UPS)
-		return -EINVAL;
 
 	/* WMM uses TIDs 0-7 even for TSPEC */
 	if (tsid >= IEEE80211_FIRST_TSPEC_TSID) {
@@ -12911,7 +13557,9 @@
 	if (!rdev->ops->external_auth)
 		return -EOPNOTSUPP;
 
-	if (!info->attrs[NL80211_ATTR_SSID])
+	if (!info->attrs[NL80211_ATTR_SSID] &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
 		return -EINVAL;
 
 	if (!info->attrs[NL80211_ATTR_BSSID])
@@ -12922,18 +13570,24 @@
 
 	memset(&params, 0, sizeof(params));
 
-	params.ssid.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
-	if (params.ssid.ssid_len == 0 ||
-	    params.ssid.ssid_len > IEEE80211_MAX_SSID_LEN)
-		return -EINVAL;
-	memcpy(params.ssid.ssid, nla_data(info->attrs[NL80211_ATTR_SSID]),
-	       params.ssid.ssid_len);
+	if (info->attrs[NL80211_ATTR_SSID]) {
+		params.ssid.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
+		if (params.ssid.ssid_len == 0 ||
+		    params.ssid.ssid_len > IEEE80211_MAX_SSID_LEN)
+			return -EINVAL;
+		memcpy(params.ssid.ssid,
+		       nla_data(info->attrs[NL80211_ATTR_SSID]),
+		       params.ssid.ssid_len);
+	}
 
 	memcpy(params.bssid, nla_data(info->attrs[NL80211_ATTR_BSSID]),
 	       ETH_ALEN);
 
 	params.status = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]);
 
+	if (info->attrs[NL80211_ATTR_PMKID])
+		params.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]);
+
 	return rdev_external_auth(rdev, dev, &params);
 }
 
@@ -12999,6 +13653,143 @@
 	return err;
 }
 
+static int nl80211_get_ftm_responder_stats(struct sk_buff *skb,
+					   struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_ftm_responder_stats ftm_stats = {};
+	struct sk_buff *msg;
+	void *hdr;
+	struct nlattr *ftm_stats_attr;
+	int err;
+
+	if (wdev->iftype != NL80211_IFTYPE_AP || !wdev->beacon_interval)
+		return -EOPNOTSUPP;
+
+	err = rdev_get_ftm_responder_stats(rdev, dev, &ftm_stats);
+	if (err)
+		return err;
+
+	if (!ftm_stats.filled)
+		return -ENODATA;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
+			     NL80211_CMD_GET_FTM_RESPONDER_STATS);
+	if (!hdr)
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex))
+		goto nla_put_failure;
+
+	ftm_stats_attr = nla_nest_start_noflag(msg,
+					       NL80211_ATTR_FTM_RESPONDER_STATS);
+	if (!ftm_stats_attr)
+		goto nla_put_failure;
+
+#define SET_FTM(field, name, type)					 \
+	do { if ((ftm_stats.filled & BIT(NL80211_FTM_STATS_ ## name)) && \
+	    nla_put_ ## type(msg, NL80211_FTM_STATS_ ## name,		 \
+			     ftm_stats.field))				 \
+		goto nla_put_failure; } while (0)
+#define SET_FTM_U64(field, name)					 \
+	do { if ((ftm_stats.filled & BIT(NL80211_FTM_STATS_ ## name)) && \
+	    nla_put_u64_64bit(msg, NL80211_FTM_STATS_ ## name,		 \
+			      ftm_stats.field, NL80211_FTM_STATS_PAD))	 \
+		goto nla_put_failure; } while (0)
+
+	SET_FTM(success_num, SUCCESS_NUM, u32);
+	SET_FTM(partial_num, PARTIAL_NUM, u32);
+	SET_FTM(failed_num, FAILED_NUM, u32);
+	SET_FTM(asap_num, ASAP_NUM, u32);
+	SET_FTM(non_asap_num, NON_ASAP_NUM, u32);
+	SET_FTM_U64(total_duration_ms, TOTAL_DURATION_MSEC);
+	SET_FTM(unknown_triggers_num, UNKNOWN_TRIGGERS_NUM, u32);
+	SET_FTM(reschedule_requests_num, RESCHEDULE_REQUESTS_NUM, u32);
+	SET_FTM(out_of_window_triggers_num, OUT_OF_WINDOW_TRIGGERS_NUM, u32);
+#undef SET_FTM
+
+	nla_nest_end(msg, ftm_stats_attr);
+
+	genlmsg_end(msg, hdr);
+	return genlmsg_reply(msg, info);
+
+nla_put_failure:
+	nlmsg_free(msg);
+	return -ENOBUFS;
+}
+
+static int nl80211_update_owe_info(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct cfg80211_update_owe_info owe_info;
+	struct net_device *dev = info->user_ptr[1];
+
+	if (!rdev->ops->update_owe_info)
+		return -EOPNOTSUPP;
+
+	if (!info->attrs[NL80211_ATTR_STATUS_CODE] ||
+	    !info->attrs[NL80211_ATTR_MAC])
+		return -EINVAL;
+
+	memset(&owe_info, 0, sizeof(owe_info));
+	owe_info.status = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]);
+	nla_memcpy(owe_info.peer, info->attrs[NL80211_ATTR_MAC], ETH_ALEN);
+
+	if (info->attrs[NL80211_ATTR_IE]) {
+		owe_info.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
+		owe_info.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+	}
+
+	return rdev_update_owe_info(rdev, dev, &owe_info);
+}
+
+static int nl80211_probe_mesh_link(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct station_info sinfo = {};
+	const u8 *buf;
+	size_t len;
+	u8 *dest;
+	int err;
+
+	if (!rdev->ops->probe_mesh_link || !rdev->ops->get_station)
+		return -EOPNOTSUPP;
+
+	if (!info->attrs[NL80211_ATTR_MAC] ||
+	    !info->attrs[NL80211_ATTR_FRAME]) {
+		GENL_SET_ERR_MSG(info, "Frame or MAC missing");
+		return -EINVAL;
+	}
+
+	if (wdev->iftype != NL80211_IFTYPE_MESH_POINT)
+		return -EOPNOTSUPP;
+
+	dest = nla_data(info->attrs[NL80211_ATTR_MAC]);
+	buf = nla_data(info->attrs[NL80211_ATTR_FRAME]);
+	len = nla_len(info->attrs[NL80211_ATTR_FRAME]);
+
+	if (len < sizeof(struct ethhdr))
+		return -EINVAL;
+
+	if (!ether_addr_equal(buf, dest) || is_multicast_ether_addr(buf) ||
+	    !ether_addr_equal(buf + ETH_ALEN, dev->dev_addr))
+		return -EINVAL;
+
+	err = rdev_get_station(rdev, dev, dest, &sinfo);
+	if (err)
+		return err;
+
+	return rdev_probe_mesh_link(rdev, dev, dest, buf, len);
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -13105,66 +13896,66 @@
 static const struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_get_wiphy,
 		.dumpit = nl80211_dump_wiphy,
 		.done = nl80211_dump_wiphy_done,
-		.policy = nl80211_policy,
 		/* can be retrieved by unprivileged users */
 		.internal_flags = NL80211_FLAG_NEED_WIPHY |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_SET_WIPHY,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_set_wiphy,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_GET_INTERFACE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_get_interface,
 		.dumpit = nl80211_dump_interface,
-		.policy = nl80211_policy,
 		/* can be retrieved by unprivileged users */
 		.internal_flags = NL80211_FLAG_NEED_WDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_SET_INTERFACE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_set_interface,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_NEW_INTERFACE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_new_interface,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WIPHY |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_DEL_INTERFACE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_del_interface,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_GET_KEY,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_get_key,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_SET_KEY,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_set_key,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL |
@@ -13172,8 +13963,8 @@
 	},
 	{
 		.cmd = NL80211_CMD_NEW_KEY,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_new_key,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL |
@@ -13181,15 +13972,15 @@
 	},
 	{
 		.cmd = NL80211_CMD_DEL_KEY,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_del_key,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_SET_BEACON,
-		.policy = nl80211_policy,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.doit = nl80211_set_beacon,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
@@ -13197,7 +13988,7 @@
 	},
 	{
 		.cmd = NL80211_CMD_START_AP,
-		.policy = nl80211_policy,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.doit = nl80211_start_ap,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
@@ -13205,7 +13996,7 @@
 	},
 	{
 		.cmd = NL80211_CMD_STOP_AP,
-		.policy = nl80211_policy,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.doit = nl80211_stop_ap,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
@@ -13213,172 +14004,172 @@
 	},
 	{
 		.cmd = NL80211_CMD_GET_STATION,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_get_station,
 		.dumpit = nl80211_dump_station,
-		.policy = nl80211_policy,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_SET_STATION,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_set_station,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_NEW_STATION,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_new_station,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_DEL_STATION,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_del_station,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_GET_MPATH,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_get_mpath,
 		.dumpit = nl80211_dump_mpath,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_GET_MPP,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_get_mpp,
 		.dumpit = nl80211_dump_mpp,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_SET_MPATH,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_set_mpath,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_NEW_MPATH,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_new_mpath,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_DEL_MPATH,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_del_mpath,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_SET_BSS,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_set_bss,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_GET_REG,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_get_reg_do,
 		.dumpit = nl80211_get_reg_dump,
-		.policy = nl80211_policy,
 		.internal_flags = NL80211_FLAG_NEED_RTNL,
 		/* can be retrieved by unprivileged users */
 	},
 #ifdef CONFIG_CFG80211_CRDA_SUPPORT
 	{
 		.cmd = NL80211_CMD_SET_REG,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_set_reg,
-		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_RTNL,
 	},
 #endif
 	{
 		.cmd = NL80211_CMD_REQ_SET_REG,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_req_set_reg,
-		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
 	{
 		.cmd = NL80211_CMD_RELOAD_REGDB,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_reload_regdb,
-		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
 	{
 		.cmd = NL80211_CMD_GET_MESH_CONFIG,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_get_mesh_config,
-		.policy = nl80211_policy,
 		/* can be retrieved by unprivileged users */
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_SET_MESH_CONFIG,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_update_mesh_config,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_TRIGGER_SCAN,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_trigger_scan,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_ABORT_SCAN,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_abort_scan,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_GET_SCAN,
-		.policy = nl80211_policy,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.dumpit = nl80211_dump_scan,
 	},
 	{
 		.cmd = NL80211_CMD_START_SCHED_SCAN,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_start_sched_scan,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_STOP_SCHED_SCAN,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_stop_sched_scan,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_AUTHENTICATE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_authenticate,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL |
@@ -13386,40 +14177,41 @@
 	},
 	{
 		.cmd = NL80211_CMD_ASSOCIATE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_associate,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
-				  NL80211_FLAG_NEED_RTNL,
+				  NL80211_FLAG_NEED_RTNL |
+				  NL80211_FLAG_CLEAR_SKB,
 	},
 	{
 		.cmd = NL80211_CMD_DEAUTHENTICATE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_deauthenticate,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_DISASSOCIATE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_disassociate,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_JOIN_IBSS,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_join_ibss,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_LEAVE_IBSS,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_leave_ibss,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
@@ -13427,9 +14219,9 @@
 #ifdef CONFIG_NL80211_TESTMODE
 	{
 		.cmd = NL80211_CMD_TESTMODE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_testmode_do,
 		.dumpit = nl80211_testmode_dump,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WIPHY |
 				  NL80211_FLAG_NEED_RTNL,
@@ -13437,181 +14229,184 @@
 #endif
 	{
 		.cmd = NL80211_CMD_CONNECT,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_connect,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
-				  NL80211_FLAG_NEED_RTNL,
+				  NL80211_FLAG_NEED_RTNL |
+				  NL80211_FLAG_CLEAR_SKB,
 	},
 	{
 		.cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_update_connect_params,
-		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
-				  NL80211_FLAG_NEED_RTNL,
+				  NL80211_FLAG_NEED_RTNL |
+				  NL80211_FLAG_CLEAR_SKB,
 	},
 	{
 		.cmd = NL80211_CMD_DISCONNECT,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_disconnect,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_SET_WIPHY_NETNS,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_wiphy_netns,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WIPHY |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_GET_SURVEY,
-		.policy = nl80211_policy,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.dumpit = nl80211_dump_survey,
 	},
 	{
 		.cmd = NL80211_CMD_SET_PMKSA,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_setdel_pmksa,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
-				  NL80211_FLAG_NEED_RTNL,
+				  NL80211_FLAG_NEED_RTNL |
+				  NL80211_FLAG_CLEAR_SKB,
 	},
 	{
 		.cmd = NL80211_CMD_DEL_PMKSA,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_setdel_pmksa,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_FLUSH_PMKSA,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_flush_pmksa,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_REMAIN_ON_CHANNEL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_remain_on_channel,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_cancel_remain_on_channel,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_SET_TX_BITRATE_MASK,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_set_tx_bitrate_mask,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_REGISTER_FRAME,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_register_mgmt,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_FRAME,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_tx_mgmt,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_FRAME_WAIT_CANCEL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_tx_mgmt_cancel_wait,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_SET_POWER_SAVE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_set_power_save,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_GET_POWER_SAVE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_get_power_save,
-		.policy = nl80211_policy,
 		/* can be retrieved by unprivileged users */
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_SET_CQM,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_set_cqm,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_SET_CHANNEL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_set_channel,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_SET_WDS_PEER,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_set_wds_peer,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_JOIN_MESH,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_join_mesh,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_LEAVE_MESH,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_leave_mesh,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_JOIN_OCB,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_join_ocb,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_LEAVE_OCB,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_leave_ocb,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
@@ -13619,16 +14414,16 @@
 #ifdef CONFIG_PM
 	{
 		.cmd = NL80211_CMD_GET_WOWLAN,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_get_wowlan,
-		.policy = nl80211_policy,
 		/* can be retrieved by unprivileged users */
 		.internal_flags = NL80211_FLAG_NEED_WIPHY |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_SET_WOWLAN,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_set_wowlan,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WIPHY |
 				  NL80211_FLAG_NEED_RTNL,
@@ -13636,8 +14431,8 @@
 #endif
 	{
 		.cmd = NL80211_CMD_SET_REKEY_OFFLOAD,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_set_rekey_data,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL |
@@ -13645,267 +14440,306 @@
 	},
 	{
 		.cmd = NL80211_CMD_TDLS_MGMT,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_tdls_mgmt,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_TDLS_OPER,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_tdls_oper,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_UNEXPECTED_FRAME,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_register_unexpected_frame,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_PROBE_CLIENT,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_probe_client,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_REGISTER_BEACONS,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_register_beacons,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WIPHY |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_SET_NOACK_MAP,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_set_noack_map,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_START_P2P_DEVICE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_start_p2p_device,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_STOP_P2P_DEVICE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_stop_p2p_device,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_START_NAN,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_start_nan,
-		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_STOP_NAN,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_stop_nan,
-		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_ADD_NAN_FUNCTION,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_nan_add_func,
-		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_DEL_NAN_FUNCTION,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_nan_del_func,
-		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_CHANGE_NAN_CONFIG,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_nan_change_config,
-		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_SET_MCAST_RATE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_set_mcast_rate,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_SET_MAC_ACL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_set_mac_acl,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_RADAR_DETECT,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_start_radar_detection,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_GET_PROTOCOL_FEATURES,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_get_protocol_features,
-		.policy = nl80211_policy,
 	},
 	{
 		.cmd = NL80211_CMD_UPDATE_FT_IES,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_update_ft_ies,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_CRIT_PROTOCOL_START,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_crit_protocol_start,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_CRIT_PROTOCOL_STOP,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_crit_protocol_stop,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_GET_COALESCE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_get_coalesce,
-		.policy = nl80211_policy,
 		.internal_flags = NL80211_FLAG_NEED_WIPHY |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_SET_COALESCE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_set_coalesce,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WIPHY |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_CHANNEL_SWITCH,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_channel_switch,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_VENDOR,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_vendor_cmd,
 		.dumpit = nl80211_vendor_cmd_dump,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WIPHY |
-				  NL80211_FLAG_NEED_RTNL,
+				  NL80211_FLAG_NEED_RTNL |
+				  NL80211_FLAG_CLEAR_SKB,
 	},
 	{
 		.cmd = NL80211_CMD_SET_QOS_MAP,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_set_qos_map,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_ADD_TX_TS,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_add_tx_ts,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_DEL_TX_TS,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_del_tx_ts,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_TDLS_CHANNEL_SWITCH,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_tdls_channel_switch,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_tdls_cancel_channel_switch,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_SET_MULTICAST_TO_UNICAST,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_set_multicast_to_unicast,
-		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_SET_PMK,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_set_pmk,
-		.policy = nl80211_policy,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
-				  NL80211_FLAG_NEED_RTNL,
+				  NL80211_FLAG_NEED_RTNL |
+				  NL80211_FLAG_CLEAR_SKB,
 	},
 	{
 		.cmd = NL80211_CMD_DEL_PMK,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_del_pmk,
-		.policy = nl80211_policy,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_EXTERNAL_AUTH,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_external_auth,
-		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
 		.cmd = NL80211_CMD_CONTROL_PORT_FRAME,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = nl80211_tx_control_port,
-		.policy = nl80211_policy,
+		.flags = GENL_UNS_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_GET_FTM_RESPONDER_STATS,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = nl80211_get_ftm_responder_stats,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_PEER_MEASUREMENT_START,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = nl80211_pmsr_start,
+		.flags = GENL_UNS_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_NOTIFY_RADAR,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = nl80211_notify_radar_detection,
+		.flags = GENL_UNS_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_UPDATE_OWE_INFO,
+		.doit = nl80211_update_owe_info,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_PROBE_MESH_LINK,
+		.doit = nl80211_probe_mesh_link,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
@@ -13917,6 +14751,7 @@
 	.hdrsize = 0,			/* no private header */
 	.version = 1,			/* no particular meaning now */
 	.maxattr = NL80211_ATTR_MAX,
+	.policy = nl80211_policy,
 	.netnsok = true,
 	.pre_doit = nl80211_pre_doit,
 	.post_doit = nl80211_post_doit,
@@ -13925,6 +14760,7 @@
 	.n_ops = ARRAY_SIZE(nl80211_ops),
 	.mcgrps = nl80211_mcgrps,
 	.n_mcgrps = ARRAY_SIZE(nl80211_mcgrps),
+	.parallel_ops = true,
 };
 
 /* notification functions */
@@ -13957,15 +14793,11 @@
 {
 	struct sk_buff *msg;
 
-	WARN_ON(cmd != NL80211_CMD_NEW_INTERFACE &&
-		cmd != NL80211_CMD_DEL_INTERFACE);
-
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!msg)
 		return;
 
-	if (nl80211_send_iface(msg, 0, 0, 0, rdev, wdev,
-			       cmd == NL80211_CMD_DEL_INTERFACE) < 0) {
+	if (nl80211_send_iface(msg, 0, 0, 0, rdev, wdev, cmd) < 0) {
 		nlmsg_free(msg);
 		return;
 	}
@@ -13984,7 +14816,7 @@
 	if (WARN_ON(!req))
 		return 0;
 
-	nest = nla_nest_start(msg, NL80211_ATTR_SCAN_SSIDS);
+	nest = nla_nest_start_noflag(msg, NL80211_ATTR_SCAN_SSIDS);
 	if (!nest)
 		goto nla_put_failure;
 	for (i = 0; i < req->n_ssids; i++) {
@@ -13993,7 +14825,7 @@
 	}
 	nla_nest_end(msg, nest);
 
-	nest = nla_nest_start(msg, NL80211_ATTR_SCAN_FREQUENCIES);
+	nest = nla_nest_start_noflag(msg, NL80211_ATTR_SCAN_FREQUENCIES);
 	if (!nest)
 		goto nla_put_failure;
 	for (i = 0; i < req->n_channels; i++) {
@@ -14205,12 +15037,10 @@
 		return;
 
 	hdr = nl80211hdr_put(msg, 0, 0, 0, cmd_id);
-	if (!hdr) {
-		nlmsg_free(msg);
-		return;
-	}
+	if (!hdr)
+		goto nla_put_failure;
 
-	if (nl80211_reg_change_event_fill(msg, request) == false)
+	if (!nl80211_reg_change_event_fill(msg, request))
 		goto nla_put_failure;
 
 	genlmsg_end(msg, hdr);
@@ -14230,12 +15060,13 @@
 				    struct net_device *netdev,
 				    const u8 *buf, size_t len,
 				    enum nl80211_commands cmd, gfp_t gfp,
-				    int uapsd_queues)
+				    int uapsd_queues, const u8 *req_ies,
+				    size_t req_ies_len)
 {
 	struct sk_buff *msg;
 	void *hdr;
 
-	msg = nlmsg_new(100 + len, gfp);
+	msg = nlmsg_new(100 + len + req_ies_len, gfp);
 	if (!msg)
 		return;
 
@@ -14247,12 +15078,14 @@
 
 	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
 	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
-	    nla_put(msg, NL80211_ATTR_FRAME, len, buf))
+	    nla_put(msg, NL80211_ATTR_FRAME, len, buf) ||
+	    (req_ies &&
+	     nla_put(msg, NL80211_ATTR_REQ_IE, req_ies_len, req_ies)))
 		goto nla_put_failure;
 
 	if (uapsd_queues >= 0) {
 		struct nlattr *nla_wmm =
-			nla_nest_start(msg, NL80211_ATTR_STA_WME);
+			nla_nest_start_noflag(msg, NL80211_ATTR_STA_WME);
 		if (!nla_wmm)
 			goto nla_put_failure;
 
@@ -14278,15 +15111,17 @@
 			  size_t len, gfp_t gfp)
 {
 	nl80211_send_mlme_event(rdev, netdev, buf, len,
-				NL80211_CMD_AUTHENTICATE, gfp, -1);
+				NL80211_CMD_AUTHENTICATE, gfp, -1, NULL, 0);
 }
 
 void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev,
 			   struct net_device *netdev, const u8 *buf,
-			   size_t len, gfp_t gfp, int uapsd_queues)
+			   size_t len, gfp_t gfp, int uapsd_queues,
+			   const u8 *req_ies, size_t req_ies_len)
 {
 	nl80211_send_mlme_event(rdev, netdev, buf, len,
-				NL80211_CMD_ASSOCIATE, gfp, uapsd_queues);
+				NL80211_CMD_ASSOCIATE, gfp, uapsd_queues,
+				req_ies, req_ies_len);
 }
 
 void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
@@ -14294,7 +15129,7 @@
 			 size_t len, gfp_t gfp)
 {
 	nl80211_send_mlme_event(rdev, netdev, buf, len,
-				NL80211_CMD_DEAUTHENTICATE, gfp, -1);
+				NL80211_CMD_DEAUTHENTICATE, gfp, -1, NULL, 0);
 }
 
 void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
@@ -14302,7 +15137,7 @@
 			   size_t len, gfp_t gfp)
 {
 	nl80211_send_mlme_event(rdev, netdev, buf, len,
-				NL80211_CMD_DISASSOCIATE, gfp, -1);
+				NL80211_CMD_DISASSOCIATE, gfp, -1, NULL, 0);
 }
 
 void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf,
@@ -14323,7 +15158,8 @@
 		cmd = NL80211_CMD_UNPROT_DISASSOCIATE;
 
 	trace_cfg80211_rx_unprot_mlme_mgmt(dev, buf, len);
-	nl80211_send_mlme_event(rdev, dev, buf, len, cmd, GFP_ATOMIC, -1);
+	nl80211_send_mlme_event(rdev, dev, buf, len, cmd, GFP_ATOMIC, -1,
+				NULL, 0);
 }
 EXPORT_SYMBOL(cfg80211_rx_unprot_mlme_mgmt);
 
@@ -14502,7 +15338,9 @@
 		return;
 	}
 
-	if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid))
+	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
+	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid))
 		goto nla_put_failure;
 
 	genlmsg_end(msg, hdr);
@@ -14584,7 +15422,8 @@
 }
 
 void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr,
-					const u8* ie, u8 ie_len, gfp_t gfp)
+					const u8 *ie, u8 ie_len,
+					int sig_dbm, gfp_t gfp)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
@@ -14610,7 +15449,9 @@
 	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
 	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) ||
 	    (ie_len && ie &&
-	     nla_put(msg, NL80211_ATTR_IE, ie_len , ie)))
+	     nla_put(msg, NL80211_ATTR_IE, ie_len, ie)) ||
+	    (sig_dbm &&
+	     nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)))
 		goto nla_put_failure;
 
 	genlmsg_end(msg, hdr);
@@ -14687,7 +15528,7 @@
 		goto nla_put_failure;
 
 	/* Before */
-	nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_BEFORE);
+	nl_freq = nla_nest_start_noflag(msg, NL80211_ATTR_FREQ_BEFORE);
 	if (!nl_freq)
 		goto nla_put_failure;
 
@@ -14696,7 +15537,7 @@
 	nla_nest_end(msg, nl_freq);
 
 	/* After */
-	nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_AFTER);
+	nl_freq = nla_nest_start_noflag(msg, NL80211_ATTR_FREQ_AFTER);
 	if (!nl_freq)
 		goto nla_put_failure;
 
@@ -14789,6 +15630,19 @@
 }
 EXPORT_SYMBOL(cfg80211_remain_on_channel_expired);
 
+void cfg80211_tx_mgmt_expired(struct wireless_dev *wdev, u64 cookie,
+					struct ieee80211_channel *chan,
+					gfp_t gfp)
+{
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+
+	trace_cfg80211_tx_mgmt_expired(wdev, cookie, chan);
+	nl80211_send_remain_on_chan_event(NL80211_CMD_FRAME_WAIT_CANCEL,
+					  rdev, wdev, cookie, chan, 0, gfp);
+}
+EXPORT_SYMBOL(cfg80211_tx_mgmt_expired);
+
 void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
 		      struct station_info *sinfo, gfp_t gfp)
 {
@@ -15130,7 +15984,7 @@
 	if (mac && nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac))
 		goto nla_put_failure;
 
-	cb[1] = nla_nest_start(msg, NL80211_ATTR_CQM);
+	cb[1] = nla_nest_start_noflag(msg, NL80211_ATTR_CQM);
 	if (!cb[1])
 		goto nla_put_failure;
 
@@ -15291,7 +16145,7 @@
 	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid))
 		goto nla_put_failure;
 
-	rekey_attr = nla_nest_start(msg, NL80211_ATTR_REKEY_DATA);
+	rekey_attr = nla_nest_start_noflag(msg, NL80211_ATTR_REKEY_DATA);
 	if (!rekey_attr)
 		goto nla_put_failure;
 
@@ -15346,7 +16200,7 @@
 	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex))
 		goto nla_put_failure;
 
-	attr = nla_nest_start(msg, NL80211_ATTR_PMKSA_CANDIDATE);
+	attr = nla_nest_start_noflag(msg, NL80211_ATTR_PMKSA_CANDIDATE);
 	if (!attr)
 		goto nla_put_failure;
 
@@ -15433,6 +16287,13 @@
 
 	wdev->chandef = *chandef;
 	wdev->preset_chandef = *chandef;
+
+	if (wdev->iftype == NL80211_IFTYPE_STATION &&
+	    !WARN_ON(!wdev->current_bss))
+		cfg80211_update_assoc_bss_entry(wdev, chandef->chan);
+
+	cfg80211_sched_dfs_chan_update(rdev);
+
 	nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL,
 				 NL80211_CMD_CH_SWITCH_NOTIFY, 0);
 }
@@ -15651,15 +16512,15 @@
 	struct nlattr *nl_results, *nl_match, *nl_freqs;
 	int i, j;
 
-	nl_results = nla_nest_start(
-		msg, NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS);
+	nl_results = nla_nest_start_noflag(msg,
+					   NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS);
 	if (!nl_results)
 		return -EMSGSIZE;
 
 	for (i = 0; i < nd->n_matches; i++) {
 		struct cfg80211_wowlan_nd_match *match = nd->matches[i];
 
-		nl_match = nla_nest_start(msg, i);
+		nl_match = nla_nest_start_noflag(msg, i);
 		if (!nl_match)
 			break;
 
@@ -15677,8 +16538,8 @@
 		}
 
 		if (match->n_channels) {
-			nl_freqs = nla_nest_start(
-				msg, NL80211_ATTR_SCAN_FREQUENCIES);
+			nl_freqs = nla_nest_start_noflag(msg,
+							 NL80211_ATTR_SCAN_FREQUENCIES);
 			if (!nl_freqs) {
 				nla_nest_cancel(msg, nl_match);
 				goto out;
@@ -15737,7 +16598,8 @@
 	if (wakeup) {
 		struct nlattr *reasons;
 
-		reasons = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS);
+		reasons = nla_nest_start_noflag(msg,
+						NL80211_ATTR_WOWLAN_TRIGGERS);
 		if (!reasons)
 			goto free_msg;
 
@@ -15893,6 +16755,8 @@
 			} else if (wdev->conn_owner_nlportid == notify->portid) {
 				schedule_work(&wdev->disconnect_wk);
 			}
+
+			cfg80211_release_pmsr(wdev, notify->portid);
 		}
 
 		spin_lock_bh(&rdev->beacon_registrations_lock);
@@ -16074,6 +16938,46 @@
 }
 EXPORT_SYMBOL(cfg80211_external_auth_request);
 
+void cfg80211_update_owe_info_event(struct net_device *netdev,
+				    struct cfg80211_update_owe_info *owe_info,
+				    gfp_t gfp)
+{
+	struct wiphy *wiphy = netdev->ieee80211_ptr->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+	struct sk_buff *msg;
+	void *hdr;
+
+	trace_cfg80211_update_owe_info_event(wiphy, netdev, owe_info);
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_UPDATE_OWE_INFO);
+	if (!hdr)
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
+	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, owe_info->peer))
+		goto nla_put_failure;
+
+	if (!owe_info->ie_len ||
+	    nla_put(msg, NL80211_ATTR_IE, owe_info->ie_len, owe_info->ie))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+				NL80211_MCGRP_MLME, gfp);
+	return;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+EXPORT_SYMBOL(cfg80211_update_owe_info_event);
+
 /* initialisation/exit functions */
 
 int __init nl80211_init(void)

diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 79e47fe..a41e94a 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h

@@ -1,4 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Portions of this file
+ * Copyright (C) 2018 Intel Corporation
+ */
 #ifndef __NET_WIRELESS_NL80211_H
 #define __NET_WIRELESS_NL80211_H
 
@@ -6,6 +10,30 @@
 
 int nl80211_init(void);
 void nl80211_exit(void);
+
+extern const struct nla_policy nl80211_policy[NUM_NL80211_ATTR];
+
+void *nl80211hdr_put(struct sk_buff *skb, u32 portid, u32 seq,
+		     int flags, u8 cmd);
+bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
+			  int attr);
+
+static inline u64 wdev_id(struct wireless_dev *wdev)
+{
+	return (u64)wdev->identifier |
+	       ((u64)wiphy_to_rdev(wdev->wiphy)->wiphy_idx << 32);
+}
+
+int nl80211_prepare_wdev_dump(struct netlink_callback *cb,
+			      struct cfg80211_registered_device **rdev,
+			      struct wireless_dev **wdev);
+
+int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
+			  struct genl_info *info,
+			  struct cfg80211_chan_def *chandef);
+int nl80211_parse_random_mac(struct nlattr **attrs,
+			     u8 *mac_addr, u8 *mac_addr_mask);
+
 void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev,
 			  enum nl80211_commands cmd);
 void nl80211_notify_iface(struct cfg80211_registered_device *rdev,
@@ -39,7 +67,8 @@
 void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev,
 			   struct net_device *netdev,
 			   const u8 *buf, size_t len, gfp_t gfp,
-			   int uapsd_queues);
+			   int uapsd_queues,
+			   const u8 *req_ies, size_t req_ies_len);
 void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
 			 struct net_device *netdev,
 			 const u8 *buf, size_t len, gfp_t gfp);
@@ -95,4 +124,8 @@
 
 void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev);
 
+/* peer measurement */
+int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info);
+int nl80211_pmsr_dump_results(struct sk_buff *skb, struct netlink_callback *cb);
+
 #endif /* __NET_WIRELESS_NL80211_H */

diff --git a/net/wireless/ocb.c b/net/wireless/ocb.c
index e64dbf1..2d26a6d 100644
--- a/net/wireless/ocb.c
+++ b/net/wireless/ocb.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * OCB mode implementation
  *
@@ -5,10 +6,6 @@
  *            (c) 2014 Volkswagen Group Research
  * Author:    Rostislav Lisovy <rostislav.lisovy@fel.cvut.cz>
  * Funded by: Volkswagen Group Research
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/ieee80211.h>

diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c
new file mode 100644
index 0000000..c09fbf0
--- /dev/null
+++ b/net/wireless/pmsr.c

@@ -0,0 +1,600 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 - 2019 Intel Corporation
+ */
+#ifndef __PMSR_H
+#define __PMSR_H
+#include <net/cfg80211.h>
+#include "core.h"
+#include "nl80211.h"
+#include "rdev-ops.h"
+
+static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
+			  struct nlattr *ftmreq,
+			  struct cfg80211_pmsr_request_peer *out,
+			  struct genl_info *info)
+{
+	const struct cfg80211_pmsr_capabilities *capa = rdev->wiphy.pmsr_capa;
+	struct nlattr *tb[NL80211_PMSR_FTM_REQ_ATTR_MAX + 1];
+	u32 preamble = NL80211_PREAMBLE_DMG; /* only optional in DMG */
+
+	/* validate existing data */
+	if (!(rdev->wiphy.pmsr_capa->ftm.bandwidths & BIT(out->chandef.width))) {
+		NL_SET_ERR_MSG(info->extack, "FTM: unsupported bandwidth");
+		return -EINVAL;
+	}
+
+	/* no validation needed - was already done via nested policy */
+	nla_parse_nested_deprecated(tb, NL80211_PMSR_FTM_REQ_ATTR_MAX, ftmreq,
+				    NULL, NULL);
+
+	if (tb[NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE])
+		preamble = nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE]);
+
+	/* set up values - struct is 0-initialized */
+	out->ftm.requested = true;
+
+	switch (out->chandef.chan->band) {
+	case NL80211_BAND_60GHZ:
+		/* optional */
+		break;
+	default:
+		if (!tb[NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE]) {
+			NL_SET_ERR_MSG(info->extack,
+				       "FTM: must specify preamble");
+			return -EINVAL;
+		}
+	}
+
+	if (!(capa->ftm.preambles & BIT(preamble))) {
+		NL_SET_ERR_MSG_ATTR(info->extack,
+				    tb[NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE],
+				    "FTM: invalid preamble");
+		return -EINVAL;
+	}
+
+	out->ftm.preamble = preamble;
+
+	out->ftm.burst_period = 0;
+	if (tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD])
+		out->ftm.burst_period =
+			nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD]);
+
+	out->ftm.asap = !!tb[NL80211_PMSR_FTM_REQ_ATTR_ASAP];
+	if (out->ftm.asap && !capa->ftm.asap) {
+		NL_SET_ERR_MSG_ATTR(info->extack,
+				    tb[NL80211_PMSR_FTM_REQ_ATTR_ASAP],
+				    "FTM: ASAP mode not supported");
+		return -EINVAL;
+	}
+
+	if (!out->ftm.asap && !capa->ftm.non_asap) {
+		NL_SET_ERR_MSG(info->extack,
+			       "FTM: non-ASAP mode not supported");
+		return -EINVAL;
+	}
+
+	out->ftm.num_bursts_exp = 0;
+	if (tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP])
+		out->ftm.num_bursts_exp =
+			nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP]);
+
+	if (capa->ftm.max_bursts_exponent >= 0 &&
+	    out->ftm.num_bursts_exp > capa->ftm.max_bursts_exponent) {
+		NL_SET_ERR_MSG_ATTR(info->extack,
+				    tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP],
+				    "FTM: max NUM_BURSTS_EXP must be set lower than the device limit");
+		return -EINVAL;
+	}
+
+	out->ftm.burst_duration = 15;
+	if (tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION])
+		out->ftm.burst_duration =
+			nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION]);
+
+	out->ftm.ftms_per_burst = 0;
+	if (tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST])
+		out->ftm.ftms_per_burst =
+			nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST]);
+
+	if (capa->ftm.max_ftms_per_burst &&
+	    (out->ftm.ftms_per_burst > capa->ftm.max_ftms_per_burst ||
+	     out->ftm.ftms_per_burst == 0)) {
+		NL_SET_ERR_MSG_ATTR(info->extack,
+				    tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST],
+				    "FTM: FTMs per burst must be set lower than the device limit but non-zero");
+		return -EINVAL;
+	}
+
+	out->ftm.ftmr_retries = 3;
+	if (tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES])
+		out->ftm.ftmr_retries =
+			nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES]);
+
+	out->ftm.request_lci = !!tb[NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI];
+	if (out->ftm.request_lci && !capa->ftm.request_lci) {
+		NL_SET_ERR_MSG_ATTR(info->extack,
+				    tb[NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI],
+				    "FTM: LCI request not supported");
+	}
+
+	out->ftm.request_civicloc =
+		!!tb[NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC];
+	if (out->ftm.request_civicloc && !capa->ftm.request_civicloc) {
+		NL_SET_ERR_MSG_ATTR(info->extack,
+				    tb[NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC],
+			    "FTM: civic location request not supported");
+	}
+
+	return 0;
+}
+
+static int pmsr_parse_peer(struct cfg80211_registered_device *rdev,
+			   struct nlattr *peer,
+			   struct cfg80211_pmsr_request_peer *out,
+			   struct genl_info *info)
+{
+	struct nlattr *tb[NL80211_PMSR_PEER_ATTR_MAX + 1];
+	struct nlattr *req[NL80211_PMSR_REQ_ATTR_MAX + 1];
+	struct nlattr *treq;
+	int err, rem;
+
+	/* no validation needed - was already done via nested policy */
+	nla_parse_nested_deprecated(tb, NL80211_PMSR_PEER_ATTR_MAX, peer,
+				    NULL, NULL);
+
+	if (!tb[NL80211_PMSR_PEER_ATTR_ADDR] ||
+	    !tb[NL80211_PMSR_PEER_ATTR_CHAN] ||
+	    !tb[NL80211_PMSR_PEER_ATTR_REQ]) {
+		NL_SET_ERR_MSG_ATTR(info->extack, peer,
+				    "insufficient peer data");
+		return -EINVAL;
+	}
+
+	memcpy(out->addr, nla_data(tb[NL80211_PMSR_PEER_ATTR_ADDR]), ETH_ALEN);
+
+	/* reuse info->attrs */
+	memset(info->attrs, 0, sizeof(*info->attrs) * (NL80211_ATTR_MAX + 1));
+	/* need to validate here, we don't want to have validation recursion */
+	err = nla_parse_nested_deprecated(info->attrs, NL80211_ATTR_MAX,
+					  tb[NL80211_PMSR_PEER_ATTR_CHAN],
+					  nl80211_policy, info->extack);
+	if (err)
+		return err;
+
+	err = nl80211_parse_chandef(rdev, info, &out->chandef);
+	if (err)
+		return err;
+
+	/* no validation needed - was already done via nested policy */
+	nla_parse_nested_deprecated(req, NL80211_PMSR_REQ_ATTR_MAX,
+				    tb[NL80211_PMSR_PEER_ATTR_REQ], NULL,
+				    NULL);
+
+	if (!req[NL80211_PMSR_REQ_ATTR_DATA]) {
+		NL_SET_ERR_MSG_ATTR(info->extack,
+				    tb[NL80211_PMSR_PEER_ATTR_REQ],
+				    "missing request type/data");
+		return -EINVAL;
+	}
+
+	if (req[NL80211_PMSR_REQ_ATTR_GET_AP_TSF])
+		out->report_ap_tsf = true;
+
+	if (out->report_ap_tsf && !rdev->wiphy.pmsr_capa->report_ap_tsf) {
+		NL_SET_ERR_MSG_ATTR(info->extack,
+				    req[NL80211_PMSR_REQ_ATTR_GET_AP_TSF],
+				    "reporting AP TSF is not supported");
+		return -EINVAL;
+	}
+
+	nla_for_each_nested(treq, req[NL80211_PMSR_REQ_ATTR_DATA], rem) {
+		switch (nla_type(treq)) {
+		case NL80211_PMSR_TYPE_FTM:
+			err = pmsr_parse_ftm(rdev, treq, out, info);
+			break;
+		default:
+			NL_SET_ERR_MSG_ATTR(info->extack, treq,
+					    "unsupported measurement type");
+			err = -EINVAL;
+		}
+	}
+
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr *reqattr = info->attrs[NL80211_ATTR_PEER_MEASUREMENTS];
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct wireless_dev *wdev = info->user_ptr[1];
+	struct cfg80211_pmsr_request *req;
+	struct nlattr *peers, *peer;
+	int count, rem, err, idx;
+
+	if (!rdev->wiphy.pmsr_capa)
+		return -EOPNOTSUPP;
+
+	if (!reqattr)
+		return -EINVAL;
+
+	peers = nla_find(nla_data(reqattr), nla_len(reqattr),
+			 NL80211_PMSR_ATTR_PEERS);
+	if (!peers)
+		return -EINVAL;
+
+	count = 0;
+	nla_for_each_nested(peer, peers, rem) {
+		count++;
+
+		if (count > rdev->wiphy.pmsr_capa->max_peers) {
+			NL_SET_ERR_MSG_ATTR(info->extack, peer,
+					    "Too many peers used");
+			return -EINVAL;
+		}
+	}
+
+	req = kzalloc(struct_size(req, peers, count), GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	if (info->attrs[NL80211_ATTR_TIMEOUT])
+		req->timeout = nla_get_u32(info->attrs[NL80211_ATTR_TIMEOUT]);
+
+	if (info->attrs[NL80211_ATTR_MAC]) {
+		if (!rdev->wiphy.pmsr_capa->randomize_mac_addr) {
+			NL_SET_ERR_MSG_ATTR(info->extack,
+					    info->attrs[NL80211_ATTR_MAC],
+					    "device cannot randomize MAC address");
+			err = -EINVAL;
+			goto out_err;
+		}
+
+		err = nl80211_parse_random_mac(info->attrs, req->mac_addr,
+					       req->mac_addr_mask);
+		if (err)
+			goto out_err;
+	} else {
+		memcpy(req->mac_addr, wdev_address(wdev), ETH_ALEN);
+		eth_broadcast_addr(req->mac_addr_mask);
+	}
+
+	idx = 0;
+	nla_for_each_nested(peer, peers, rem) {
+		/* NB: this reuses info->attrs, but we no longer need it */
+		err = pmsr_parse_peer(rdev, peer, &req->peers[idx], info);
+		if (err)
+			goto out_err;
+		idx++;
+	}
+
+	req->n_peers = count;
+	req->cookie = cfg80211_assign_cookie(rdev);
+	req->nl_portid = info->snd_portid;
+
+	err = rdev_start_pmsr(rdev, wdev, req);
+	if (err)
+		goto out_err;
+
+	list_add_tail(&req->list, &wdev->pmsr_list);
+
+	nl_set_extack_cookie_u64(info->extack, req->cookie);
+	return 0;
+out_err:
+	kfree(req);
+	return err;
+}
+
+void cfg80211_pmsr_complete(struct wireless_dev *wdev,
+			    struct cfg80211_pmsr_request *req,
+			    gfp_t gfp)
+{
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+	struct sk_buff *msg;
+	void *hdr;
+
+	trace_cfg80211_pmsr_complete(wdev->wiphy, wdev, req->cookie);
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		goto free_request;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0,
+			     NL80211_CMD_PEER_MEASUREMENT_COMPLETE);
+	if (!hdr)
+		goto free_msg;
+
+	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+	    nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev),
+			      NL80211_ATTR_PAD))
+		goto free_msg;
+
+	if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, req->cookie,
+			      NL80211_ATTR_PAD))
+		goto free_msg;
+
+	genlmsg_end(msg, hdr);
+	genlmsg_unicast(wiphy_net(wdev->wiphy), msg, req->nl_portid);
+	goto free_request;
+free_msg:
+	nlmsg_free(msg);
+free_request:
+	spin_lock_bh(&wdev->pmsr_lock);
+	list_del(&req->list);
+	spin_unlock_bh(&wdev->pmsr_lock);
+	kfree(req);
+}
+EXPORT_SYMBOL_GPL(cfg80211_pmsr_complete);
+
+static int nl80211_pmsr_send_ftm_res(struct sk_buff *msg,
+				     struct cfg80211_pmsr_result *res)
+{
+	if (res->status == NL80211_PMSR_STATUS_FAILURE) {
+		if (nla_put_u32(msg, NL80211_PMSR_FTM_RESP_ATTR_FAIL_REASON,
+				res->ftm.failure_reason))
+			goto error;
+
+		if (res->ftm.failure_reason ==
+			NL80211_PMSR_FTM_FAILURE_PEER_BUSY &&
+		    res->ftm.busy_retry_time &&
+		    nla_put_u32(msg, NL80211_PMSR_FTM_RESP_ATTR_BUSY_RETRY_TIME,
+				res->ftm.busy_retry_time))
+			goto error;
+
+		return 0;
+	}
+
+#define PUT(tp, attr, val)						\
+	do {								\
+		if (nla_put_##tp(msg,					\
+				 NL80211_PMSR_FTM_RESP_ATTR_##attr,	\
+				 res->ftm.val))				\
+			goto error;					\
+	} while (0)
+
+#define PUTOPT(tp, attr, val)						\
+	do {								\
+		if (res->ftm.val##_valid)				\
+			PUT(tp, attr, val);				\
+	} while (0)
+
+#define PUT_U64(attr, val)						\
+	do {								\
+		if (nla_put_u64_64bit(msg,				\
+				      NL80211_PMSR_FTM_RESP_ATTR_##attr,\
+				      res->ftm.val,			\
+				      NL80211_PMSR_FTM_RESP_ATTR_PAD))	\
+			goto error;					\
+	} while (0)
+
+#define PUTOPT_U64(attr, val)						\
+	do {								\
+		if (res->ftm.val##_valid)				\
+			PUT_U64(attr, val);				\
+	} while (0)
+
+	if (res->ftm.burst_index >= 0)
+		PUT(u32, BURST_INDEX, burst_index);
+	PUTOPT(u32, NUM_FTMR_ATTEMPTS, num_ftmr_attempts);
+	PUTOPT(u32, NUM_FTMR_SUCCESSES, num_ftmr_successes);
+	PUT(u8, NUM_BURSTS_EXP, num_bursts_exp);
+	PUT(u8, BURST_DURATION, burst_duration);
+	PUT(u8, FTMS_PER_BURST, ftms_per_burst);
+	PUTOPT(s32, RSSI_AVG, rssi_avg);
+	PUTOPT(s32, RSSI_SPREAD, rssi_spread);
+	if (res->ftm.tx_rate_valid &&
+	    !nl80211_put_sta_rate(msg, &res->ftm.tx_rate,
+				  NL80211_PMSR_FTM_RESP_ATTR_TX_RATE))
+		goto error;
+	if (res->ftm.rx_rate_valid &&
+	    !nl80211_put_sta_rate(msg, &res->ftm.rx_rate,
+				  NL80211_PMSR_FTM_RESP_ATTR_RX_RATE))
+		goto error;
+	PUTOPT_U64(RTT_AVG, rtt_avg);
+	PUTOPT_U64(RTT_VARIANCE, rtt_variance);
+	PUTOPT_U64(RTT_SPREAD, rtt_spread);
+	PUTOPT_U64(DIST_AVG, dist_avg);
+	PUTOPT_U64(DIST_VARIANCE, dist_variance);
+	PUTOPT_U64(DIST_SPREAD, dist_spread);
+	if (res->ftm.lci && res->ftm.lci_len &&
+	    nla_put(msg, NL80211_PMSR_FTM_RESP_ATTR_LCI,
+		    res->ftm.lci_len, res->ftm.lci))
+		goto error;
+	if (res->ftm.civicloc && res->ftm.civicloc_len &&
+	    nla_put(msg, NL80211_PMSR_FTM_RESP_ATTR_CIVICLOC,
+		    res->ftm.civicloc_len, res->ftm.civicloc))
+		goto error;
+#undef PUT
+#undef PUTOPT
+#undef PUT_U64
+#undef PUTOPT_U64
+
+	return 0;
+error:
+	return -ENOSPC;
+}
+
+static int nl80211_pmsr_send_result(struct sk_buff *msg,
+				    struct cfg80211_pmsr_result *res)
+{
+	struct nlattr *pmsr, *peers, *peer, *resp, *data, *typedata;
+
+	pmsr = nla_nest_start_noflag(msg, NL80211_ATTR_PEER_MEASUREMENTS);
+	if (!pmsr)
+		goto error;
+
+	peers = nla_nest_start_noflag(msg, NL80211_PMSR_ATTR_PEERS);
+	if (!peers)
+		goto error;
+
+	peer = nla_nest_start_noflag(msg, 1);
+	if (!peer)
+		goto error;
+
+	if (nla_put(msg, NL80211_PMSR_PEER_ATTR_ADDR, ETH_ALEN, res->addr))
+		goto error;
+
+	resp = nla_nest_start_noflag(msg, NL80211_PMSR_PEER_ATTR_RESP);
+	if (!resp)
+		goto error;
+
+	if (nla_put_u32(msg, NL80211_PMSR_RESP_ATTR_STATUS, res->status) ||
+	    nla_put_u64_64bit(msg, NL80211_PMSR_RESP_ATTR_HOST_TIME,
+			      res->host_time, NL80211_PMSR_RESP_ATTR_PAD))
+		goto error;
+
+	if (res->ap_tsf_valid &&
+	    nla_put_u64_64bit(msg, NL80211_PMSR_RESP_ATTR_AP_TSF,
+			      res->ap_tsf, NL80211_PMSR_RESP_ATTR_PAD))
+		goto error;
+
+	if (res->final && nla_put_flag(msg, NL80211_PMSR_RESP_ATTR_FINAL))
+		goto error;
+
+	data = nla_nest_start_noflag(msg, NL80211_PMSR_RESP_ATTR_DATA);
+	if (!data)
+		goto error;
+
+	typedata = nla_nest_start_noflag(msg, res->type);
+	if (!typedata)
+		goto error;
+
+	switch (res->type) {
+	case NL80211_PMSR_TYPE_FTM:
+		if (nl80211_pmsr_send_ftm_res(msg, res))
+			goto error;
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+	nla_nest_end(msg, typedata);
+	nla_nest_end(msg, data);
+	nla_nest_end(msg, resp);
+	nla_nest_end(msg, peer);
+	nla_nest_end(msg, peers);
+	nla_nest_end(msg, pmsr);
+
+	return 0;
+error:
+	return -ENOSPC;
+}
+
+void cfg80211_pmsr_report(struct wireless_dev *wdev,
+			  struct cfg80211_pmsr_request *req,
+			  struct cfg80211_pmsr_result *result,
+			  gfp_t gfp)
+{
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+	struct sk_buff *msg;
+	void *hdr;
+	int err;
+
+	trace_cfg80211_pmsr_report(wdev->wiphy, wdev, req->cookie,
+				   result->addr);
+
+	/*
+	 * Currently, only variable items are LCI and civic location,
+	 * both of which are reasonably short so we don't need to
+	 * worry about them here for the allocation.
+	 */
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_PEER_MEASUREMENT_RESULT);
+	if (!hdr)
+		goto free;
+
+	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+	    nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev),
+			      NL80211_ATTR_PAD))
+		goto free;
+
+	if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, req->cookie,
+			      NL80211_ATTR_PAD))
+		goto free;
+
+	err = nl80211_pmsr_send_result(msg, result);
+	if (err) {
+		pr_err_ratelimited("peer measurement result: message didn't fit!");
+		goto free;
+	}
+
+	genlmsg_end(msg, hdr);
+	genlmsg_unicast(wiphy_net(wdev->wiphy), msg, req->nl_portid);
+	return;
+free:
+	nlmsg_free(msg);
+}
+EXPORT_SYMBOL_GPL(cfg80211_pmsr_report);
+
+static void cfg80211_pmsr_process_abort(struct wireless_dev *wdev)
+{
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+	struct cfg80211_pmsr_request *req, *tmp;
+	LIST_HEAD(free_list);
+
+	lockdep_assert_held(&wdev->mtx);
+
+	spin_lock_bh(&wdev->pmsr_lock);
+	list_for_each_entry_safe(req, tmp, &wdev->pmsr_list, list) {
+		if (req->nl_portid)
+			continue;
+		list_move_tail(&req->list, &free_list);
+	}
+	spin_unlock_bh(&wdev->pmsr_lock);
+
+	list_for_each_entry_safe(req, tmp, &free_list, list) {
+		rdev_abort_pmsr(rdev, wdev, req);
+
+		kfree(req);
+	}
+}
+
+void cfg80211_pmsr_free_wk(struct work_struct *work)
+{
+	struct wireless_dev *wdev = container_of(work, struct wireless_dev,
+						 pmsr_free_wk);
+
+	wdev_lock(wdev);
+	cfg80211_pmsr_process_abort(wdev);
+	wdev_unlock(wdev);
+}
+
+void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev)
+{
+	struct cfg80211_pmsr_request *req;
+	bool found = false;
+
+	spin_lock_bh(&wdev->pmsr_lock);
+	list_for_each_entry(req, &wdev->pmsr_list, list) {
+		found = true;
+		req->nl_portid = 0;
+	}
+	spin_unlock_bh(&wdev->pmsr_lock);
+
+	if (found)
+		cfg80211_pmsr_process_abort(wdev);
+
+	WARN_ON(!list_empty(&wdev->pmsr_list));
+}
+
+void cfg80211_release_pmsr(struct wireless_dev *wdev, u32 portid)
+{
+	struct cfg80211_pmsr_request *req;
+
+	spin_lock_bh(&wdev->pmsr_lock);
+	list_for_each_entry(req, &wdev->pmsr_list, list) {
+		if (req->nl_portid == portid) {
+			req->nl_portid = 0;
+			schedule_work(&wdev->pmsr_free_wk);
+		}
+	}
+	spin_unlock_bh(&wdev->pmsr_lock);
+}
+
+#endif /* __PMSR_H */

diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 364f5d6..e853a4f 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h

@@ -77,7 +77,8 @@
 			       struct key_params *params)
 {
 	int ret;
-	trace_rdev_add_key(&rdev->wiphy, netdev, key_index, pairwise, mac_addr);
+	trace_rdev_add_key(&rdev->wiphy, netdev, key_index, pairwise,
+			   mac_addr, params->mode);
 	ret = rdev->ops->add_key(&rdev->wiphy, netdev, key_index, pairwise,
 				  mac_addr, params);
 	trace_rdev_return_int(&rdev->wiphy, ret);
@@ -1232,4 +1233,70 @@
 	return ret;
 }
 
+static inline int
+rdev_get_ftm_responder_stats(struct cfg80211_registered_device *rdev,
+			     struct net_device *dev,
+			     struct cfg80211_ftm_responder_stats *ftm_stats)
+{
+	int ret = -EOPNOTSUPP;
+
+	trace_rdev_get_ftm_responder_stats(&rdev->wiphy, dev, ftm_stats);
+	if (rdev->ops->get_ftm_responder_stats)
+		ret = rdev->ops->get_ftm_responder_stats(&rdev->wiphy, dev,
+							ftm_stats);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+	return ret;
+}
+
+static inline int
+rdev_start_pmsr(struct cfg80211_registered_device *rdev,
+		struct wireless_dev *wdev,
+		struct cfg80211_pmsr_request *request)
+{
+	int ret = -EOPNOTSUPP;
+
+	trace_rdev_start_pmsr(&rdev->wiphy, wdev, request->cookie);
+	if (rdev->ops->start_pmsr)
+		ret = rdev->ops->start_pmsr(&rdev->wiphy, wdev, request);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+	return ret;
+}
+
+static inline void
+rdev_abort_pmsr(struct cfg80211_registered_device *rdev,
+		struct wireless_dev *wdev,
+		struct cfg80211_pmsr_request *request)
+{
+	trace_rdev_abort_pmsr(&rdev->wiphy, wdev, request->cookie);
+	if (rdev->ops->abort_pmsr)
+		rdev->ops->abort_pmsr(&rdev->wiphy, wdev, request);
+	trace_rdev_return_void(&rdev->wiphy);
+}
+
+static inline int rdev_update_owe_info(struct cfg80211_registered_device *rdev,
+				       struct net_device *dev,
+				       struct cfg80211_update_owe_info *oweinfo)
+{
+	int ret = -EOPNOTSUPP;
+
+	trace_rdev_update_owe_info(&rdev->wiphy, dev, oweinfo);
+	if (rdev->ops->update_owe_info)
+		ret = rdev->ops->update_owe_info(&rdev->wiphy, dev, oweinfo);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+	return ret;
+}
+
+static inline int
+rdev_probe_mesh_link(struct cfg80211_registered_device *rdev,
+		     struct net_device *dev, const u8 *dest,
+		     const void *buf, size_t len)
+{
+	int ret;
+
+	trace_rdev_probe_mesh_link(&rdev->wiphy, dev, dest, buf, len);
+	ret = rdev->ops->probe_mesh_link(&rdev->wiphy, dev, buf, len);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+	return ret;
+}
+
 #endif /* __CFG80211_RDEV_OPS */

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 24cfa27..446c76d 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c

@@ -5,7 +5,7 @@
  * Copyright 2008-2011	Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright      2017  Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018 - 2019 Intel Corporation
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -131,7 +131,8 @@
 /* Used to track the userspace process controlling the indoor setting */
 static u32 reg_is_indoor_portid;
 
-static void restore_regulatory_settings(bool reset_user);
+static void restore_regulatory_settings(bool reset_user, bool cached);
+static void print_regdomain(const struct ieee80211_regdomain *rd);
 
 static const struct ieee80211_regdomain *get_cfg80211_regdom(void)
 {
@@ -263,6 +264,7 @@
 
 static char *ieee80211_regdom = "00";
 static char user_alpha2[2];
+static const struct ieee80211_regdomain *cfg80211_user_regdom;
 
 module_param(ieee80211_regdom, charp, 0444);
 MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code");
@@ -425,14 +427,10 @@
 reg_copy_regd(const struct ieee80211_regdomain *src_regd)
 {
 	struct ieee80211_regdomain *regd;
-	int size_of_regd;
 	unsigned int i;
 
-	size_of_regd =
-		sizeof(struct ieee80211_regdomain) +
-		src_regd->n_reg_rules * sizeof(struct ieee80211_reg_rule);
-
-	regd = kzalloc(size_of_regd, GFP_KERNEL);
+	regd = kzalloc(struct_size(regd, reg_rules, src_regd->n_reg_rules),
+		       GFP_KERNEL);
 	if (!regd)
 		return ERR_PTR(-ENOMEM);
 
@@ -445,6 +443,15 @@
 	return regd;
 }
 
+static void cfg80211_save_user_regdom(const struct ieee80211_regdomain *rd)
+{
+	ASSERT_RTNL();
+
+	if (!IS_ERR(cfg80211_user_regdom))
+		kfree(cfg80211_user_regdom);
+	cfg80211_user_regdom = reg_copy_regd(rd);
+}
+
 struct reg_regdb_apply_request {
 	struct list_head list;
 	const struct ieee80211_regdomain *regdom;
@@ -510,7 +517,7 @@
 	pr_debug("Timeout while waiting for CRDA to reply, restoring regulatory settings\n");
 	rtnl_lock();
 	reg_crda_timeouts++;
-	restore_regulatory_settings(true);
+	restore_regulatory_settings(true, false);
 	rtnl_unlock();
 }
 
@@ -847,22 +854,36 @@
 	return true;
 }
 
-static void set_wmm_rule(struct ieee80211_reg_rule *rrule,
-			 struct fwdb_wmm_rule *wmm)
+static void set_wmm_rule(const struct fwdb_header *db,
+			 const struct fwdb_country *country,
+			 const struct fwdb_rule *rule,
+			 struct ieee80211_reg_rule *rrule)
 {
-	struct ieee80211_wmm_rule *rule = &rrule->wmm_rule;
-	unsigned int i;
+	struct ieee80211_wmm_rule *wmm_rule = &rrule->wmm_rule;
+	struct fwdb_wmm_rule *wmm;
+	unsigned int i, wmm_ptr;
+
+	wmm_ptr = be16_to_cpu(rule->wmm_ptr) << 2;
+	wmm = (void *)((u8 *)db + wmm_ptr);
+
+	if (!valid_wmm(wmm)) {
+		pr_err("Invalid regulatory WMM rule %u-%u in domain %c%c\n",
+		       be32_to_cpu(rule->start), be32_to_cpu(rule->end),
+		       country->alpha2[0], country->alpha2[1]);
+		return;
+	}
 
 	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
-		rule->client[i].cw_min =
+		wmm_rule->client[i].cw_min =
 			ecw2cw((wmm->client[i].ecw & 0xf0) >> 4);
-		rule->client[i].cw_max = ecw2cw(wmm->client[i].ecw & 0x0f);
-		rule->client[i].aifsn =  wmm->client[i].aifsn;
-		rule->client[i].cot = 1000 * be16_to_cpu(wmm->client[i].cot);
-		rule->ap[i].cw_min = ecw2cw((wmm->ap[i].ecw & 0xf0) >> 4);
-		rule->ap[i].cw_max = ecw2cw(wmm->ap[i].ecw & 0x0f);
-		rule->ap[i].aifsn = wmm->ap[i].aifsn;
-		rule->ap[i].cot = 1000 * be16_to_cpu(wmm->ap[i].cot);
+		wmm_rule->client[i].cw_max = ecw2cw(wmm->client[i].ecw & 0x0f);
+		wmm_rule->client[i].aifsn =  wmm->client[i].aifsn;
+		wmm_rule->client[i].cot =
+			1000 * be16_to_cpu(wmm->client[i].cot);
+		wmm_rule->ap[i].cw_min = ecw2cw((wmm->ap[i].ecw & 0xf0) >> 4);
+		wmm_rule->ap[i].cw_max = ecw2cw(wmm->ap[i].ecw & 0x0f);
+		wmm_rule->ap[i].aifsn = wmm->ap[i].aifsn;
+		wmm_rule->ap[i].cot = 1000 * be16_to_cpu(wmm->ap[i].cot);
 	}
 
 	rrule->has_wmm = true;
@@ -870,7 +891,7 @@
 
 static int __regdb_query_wmm(const struct fwdb_header *db,
 			     const struct fwdb_country *country, int freq,
-			     struct ieee80211_reg_rule *rule)
+			     struct ieee80211_reg_rule *rrule)
 {
 	unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
 	struct fwdb_collection *coll = (void *)((u8 *)db + ptr);
@@ -879,18 +900,14 @@
 	for (i = 0; i < coll->n_rules; i++) {
 		__be16 *rules_ptr = (void *)((u8 *)coll + ALIGN(coll->len, 2));
 		unsigned int rule_ptr = be16_to_cpu(rules_ptr[i]) << 2;
-		struct fwdb_rule *rrule = (void *)((u8 *)db + rule_ptr);
-		struct fwdb_wmm_rule *wmm;
-		unsigned int wmm_ptr;
+		struct fwdb_rule *rule = (void *)((u8 *)db + rule_ptr);
 
-		if (rrule->len < offsetofend(struct fwdb_rule, wmm_ptr))
+		if (rule->len < offsetofend(struct fwdb_rule, wmm_ptr))
 			continue;
 
-		if (freq >= KHZ_TO_MHZ(be32_to_cpu(rrule->start)) &&
-		    freq <= KHZ_TO_MHZ(be32_to_cpu(rrule->end))) {
-			wmm_ptr = be16_to_cpu(rrule->wmm_ptr) << 2;
-			wmm = (void *)((u8 *)db + wmm_ptr);
-			set_wmm_rule(rule, wmm);
+		if (freq >= KHZ_TO_MHZ(be32_to_cpu(rule->start)) &&
+		    freq <= KHZ_TO_MHZ(be32_to_cpu(rule->end))) {
+			set_wmm_rule(db, country, rule, rrule);
 			return 0;
 		}
 	}
@@ -927,12 +944,10 @@
 	unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
 	struct fwdb_collection *coll = (void *)((u8 *)db + ptr);
 	struct ieee80211_regdomain *regdom;
-	unsigned int size_of_regd, i;
+	unsigned int i;
 
-	size_of_regd = sizeof(struct ieee80211_regdomain) +
-		coll->n_rules * sizeof(struct ieee80211_reg_rule);
-
-	regdom = kzalloc(size_of_regd, GFP_KERNEL);
+	regdom = kzalloc(struct_size(regdom, reg_rules, coll->n_rules),
+			 GFP_KERNEL);
 	if (!regdom)
 		return -ENOMEM;
 
@@ -972,12 +987,8 @@
 		if (rule->len >= offsetofend(struct fwdb_rule, cac_timeout))
 			rrule->dfs_cac_ms =
 				1000 * be16_to_cpu(rule->cac_timeout);
-		if (rule->len >= offsetofend(struct fwdb_rule, wmm_ptr)) {
-			u32 wmm_ptr = be16_to_cpu(rule->wmm_ptr) << 2;
-			struct fwdb_wmm_rule *wmm = (void *)((u8 *)db + wmm_ptr);
-
-			set_wmm_rule(rrule, wmm);
-		}
+		if (rule->len >= offsetofend(struct fwdb_rule, wmm_ptr))
+			set_wmm_rule(db, country, rule, rrule);
 	}
 
 	return reg_schedule_apply(regdom);
@@ -1018,8 +1029,13 @@
 	}
 
 	rtnl_lock();
-	if (WARN_ON(regdb && !IS_ERR(regdb))) {
-		/* just restore and free new db */
+	if (regdb && !IS_ERR(regdb)) {
+		/* negative case - a bug
+		 * positive case - can happen due to race in case of multiple cb's in
+		 * queue, due to usage of asynchronous callback
+		 *
+		 * Either case, just restore and free new db.
+		 */
 	} else if (set_error) {
 		regdb = ERR_PTR(set_error);
 	} else if (fw) {
@@ -1033,7 +1049,7 @@
 	}
 
 	if (restore)
-		restore_regulatory_settings(true);
+		restore_regulatory_settings(true, false);
 
 	rtnl_unlock();
 
@@ -1249,7 +1265,7 @@
  * definitions (the "2.4 GHz band", the "5 GHz band" and the "60GHz band"),
  * however it is safe for now to assume that a frequency rule should not be
  * part of a frequency's band if the start freq or end freq are off by more
- * than 2 GHz for the 2.4 and 5 GHz bands, and by more than 10 GHz for the
+ * than 2 GHz for the 2.4 and 5 GHz bands, and by more than 20 GHz for the
  * 60 GHz band.
  * This resolution can be lowered and should be considered as we add
  * regulatory rule support for other "bands".
@@ -1264,7 +1280,7 @@
 	 * with the Channel starting frequency above 45 GHz.
 	 */
 	u32 limit = freq_khz > 45 * ONE_GHZ_IN_KHZ ?
-			10 * ONE_GHZ_IN_KHZ : 2 * ONE_GHZ_IN_KHZ;
+			20 * ONE_GHZ_IN_KHZ : 2 * ONE_GHZ_IN_KHZ;
 	if (abs(freq_khz - freq_range->start_freq_khz) <= limit)
 		return true;
 	if (abs(freq_khz - freq_range->end_freq_khz) <= limit)
@@ -1287,6 +1303,16 @@
 	return dfs_region1;
 }
 
+static void reg_wmm_rules_intersect(const struct ieee80211_wmm_ac *wmm_ac1,
+				    const struct ieee80211_wmm_ac *wmm_ac2,
+				    struct ieee80211_wmm_ac *intersect)
+{
+	intersect->cw_min = max_t(u16, wmm_ac1->cw_min, wmm_ac2->cw_min);
+	intersect->cw_max = max_t(u16, wmm_ac1->cw_max, wmm_ac2->cw_max);
+	intersect->cot = min_t(u16, wmm_ac1->cot, wmm_ac2->cot);
+	intersect->aifsn = max_t(u8, wmm_ac1->aifsn, wmm_ac2->aifsn);
+}
+
 /*
  * Helper for regdom_intersect(), this does the real
  * mathematical intersection fun
@@ -1301,6 +1327,8 @@
 	struct ieee80211_freq_range *freq_range;
 	const struct ieee80211_power_rule *power_rule1, *power_rule2;
 	struct ieee80211_power_rule *power_rule;
+	const struct ieee80211_wmm_rule *wmm_rule1, *wmm_rule2;
+	struct ieee80211_wmm_rule *wmm_rule;
 	u32 freq_diff, max_bandwidth1, max_bandwidth2;
 
 	freq_range1 = &rule1->freq_range;
@@ -1311,6 +1339,10 @@
 	power_rule2 = &rule2->power_rule;
 	power_rule = &intersected_rule->power_rule;
 
+	wmm_rule1 = &rule1->wmm_rule;
+	wmm_rule2 = &rule2->wmm_rule;
+	wmm_rule = &intersected_rule->wmm_rule;
+
 	freq_range->start_freq_khz = max(freq_range1->start_freq_khz,
 					 freq_range2->start_freq_khz);
 	freq_range->end_freq_khz = min(freq_range1->end_freq_khz,
@@ -1354,6 +1386,29 @@
 	intersected_rule->dfs_cac_ms = max(rule1->dfs_cac_ms,
 					   rule2->dfs_cac_ms);
 
+	if (rule1->has_wmm && rule2->has_wmm) {
+		u8 ac;
+
+		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+			reg_wmm_rules_intersect(&wmm_rule1->client[ac],
+						&wmm_rule2->client[ac],
+						&wmm_rule->client[ac]);
+			reg_wmm_rules_intersect(&wmm_rule1->ap[ac],
+						&wmm_rule2->ap[ac],
+						&wmm_rule->ap[ac]);
+		}
+
+		intersected_rule->has_wmm = true;
+	} else if (rule1->has_wmm) {
+		*wmm_rule = *wmm_rule1;
+		intersected_rule->has_wmm = true;
+	} else if (rule2->has_wmm) {
+		*wmm_rule = *wmm_rule2;
+		intersected_rule->has_wmm = true;
+	} else {
+		intersected_rule->has_wmm = false;
+	}
+
 	if (!is_valid_reg_rule(intersected_rule))
 		return -EINVAL;
 
@@ -1428,7 +1483,7 @@
 regdom_intersect(const struct ieee80211_regdomain *rd1,
 		 const struct ieee80211_regdomain *rd2)
 {
-	int r, size_of_regd;
+	int r;
 	unsigned int x, y;
 	unsigned int num_rules = 0;
 	const struct ieee80211_reg_rule *rule1, *rule2;
@@ -1459,10 +1514,7 @@
 	if (!num_rules)
 		return NULL;
 
-	size_of_regd = sizeof(struct ieee80211_regdomain) +
-		       num_rules * sizeof(struct ieee80211_reg_rule);
-
-	rd = kzalloc(size_of_regd, GFP_KERNEL);
+	rd = kzalloc(struct_size(rd, reg_rules, num_rules), GFP_KERNEL);
 	if (!rd)
 		return NULL;
 
@@ -2056,7 +2108,7 @@
 
 static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev)
 {
-	struct cfg80211_chan_def chandef;
+	struct cfg80211_chan_def chandef = {};
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	enum nl80211_iftype iftype;
 
@@ -2718,9 +2770,7 @@
 	list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
 		wiphy = &rdev->wiphy;
 		if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED &&
-		    request->initiator == NL80211_REGDOM_SET_BY_USER &&
-		    request->user_reg_hint_type ==
-				NL80211_USER_REG_HINT_CELL_BASE)
+		    request->initiator == NL80211_REGDOM_SET_BY_USER)
 			reg_call_notifier(wiphy, request);
 	}
 }
@@ -2738,7 +2788,7 @@
 
 	/* When last_request->processed becomes true this will be rescheduled */
 	if (lr && !lr->processed) {
-		reg_process_hint(lr);
+		pr_debug("Pending regulatory request, waiting for it to be processed...\n");
 		return;
 	}
 
@@ -3108,7 +3158,7 @@
  * keep their own regulatory domain on wiphy->regd so that does does
  * not need to be remembered.
  */
-static void restore_regulatory_settings(bool reset_user)
+static void restore_regulatory_settings(bool reset_user, bool cached)
 {
 	char alpha2[2];
 	char world_alpha2[2];
@@ -3167,15 +3217,41 @@
 			restore_custom_reg_settings(&rdev->wiphy);
 	}
 
-	regulatory_hint_core(world_alpha2);
+	if (cached && (!is_an_alpha2(alpha2) ||
+		       !IS_ERR_OR_NULL(cfg80211_user_regdom))) {
+		reset_regdomains(false, cfg80211_world_regdom);
+		update_all_wiphy_regulatory(NL80211_REGDOM_SET_BY_CORE);
+		print_regdomain(get_cfg80211_regdom());
+		nl80211_send_reg_change_event(&core_request_world);
+		reg_set_request_processed();
 
-	/*
-	 * This restores the ieee80211_regdom module parameter
-	 * preference or the last user requested regulatory
-	 * settings, user regulatory settings takes precedence.
-	 */
-	if (is_an_alpha2(alpha2))
-		regulatory_hint_user(alpha2, NL80211_USER_REG_HINT_USER);
+		if (is_an_alpha2(alpha2) &&
+		    !regulatory_hint_user(alpha2, NL80211_USER_REG_HINT_USER)) {
+			struct regulatory_request *ureq;
+
+			spin_lock(&reg_requests_lock);
+			ureq = list_last_entry(&reg_requests_list,
+					       struct regulatory_request,
+					       list);
+			list_del(&ureq->list);
+			spin_unlock(&reg_requests_lock);
+
+			notify_self_managed_wiphys(ureq);
+			reg_update_last_request(ureq);
+			set_regdom(reg_copy_regd(cfg80211_user_regdom),
+				   REGD_SOURCE_CACHED);
+		}
+	} else {
+		regulatory_hint_core(world_alpha2);
+
+		/*
+		 * This restores the ieee80211_regdom module parameter
+		 * preference or the last user requested regulatory
+		 * settings, user regulatory settings takes precedence.
+		 */
+		if (is_an_alpha2(alpha2))
+			regulatory_hint_user(alpha2, NL80211_USER_REG_HINT_USER);
+	}
 
 	spin_lock(&reg_requests_lock);
 	list_splice_tail_init(&tmp_reg_req_list, &reg_requests_list);
@@ -3186,13 +3262,59 @@
 	schedule_work(&reg_work);
 }
 
-void regulatory_hint_disconnect(void)
+static bool is_wiphy_all_set_reg_flag(enum ieee80211_regulatory_flags flag)
 {
-	pr_debug("All devices are disconnected, going to restore regulatory settings\n");
-	restore_regulatory_settings(false);
+	struct cfg80211_registered_device *rdev;
+	struct wireless_dev *wdev;
+
+	list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+		list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
+			wdev_lock(wdev);
+			if (!(wdev->wiphy->regulatory_flags & flag)) {
+				wdev_unlock(wdev);
+				return false;
+			}
+			wdev_unlock(wdev);
+		}
+	}
+
+	return true;
 }
 
-static bool freq_is_chan_12_13_14(u16 freq)
+void regulatory_hint_disconnect(void)
+{
+	/* Restore of regulatory settings is not required when wiphy(s)
+	 * ignore IE from connected access point but clearance of beacon hints
+	 * is required when wiphy(s) supports beacon hints.
+	 */
+	if (is_wiphy_all_set_reg_flag(REGULATORY_COUNTRY_IE_IGNORE)) {
+		struct reg_beacon *reg_beacon, *btmp;
+
+		if (is_wiphy_all_set_reg_flag(REGULATORY_DISABLE_BEACON_HINTS))
+			return;
+
+		spin_lock_bh(&reg_pending_beacons_lock);
+		list_for_each_entry_safe(reg_beacon, btmp,
+					 &reg_pending_beacons, list) {
+			list_del(&reg_beacon->list);
+			kfree(reg_beacon);
+		}
+		spin_unlock_bh(&reg_pending_beacons_lock);
+
+		list_for_each_entry_safe(reg_beacon, btmp,
+					 &reg_beacon_list, list) {
+			list_del(&reg_beacon->list);
+			kfree(reg_beacon);
+		}
+
+		return;
+	}
+
+	pr_debug("All devices are disconnected, going to restore regulatory settings\n");
+	restore_regulatory_settings(false, true);
+}
+
+static bool freq_is_chan_12_13_14(u32 freq)
 {
 	if (freq == ieee80211_channel_to_frequency(12, NL80211_BAND_2GHZ) ||
 	    freq == ieee80211_channel_to_frequency(13, NL80211_BAND_2GHZ) ||
@@ -3506,6 +3628,9 @@
 	bool user_reset = false;
 	int r;
 
+	if (IS_ERR_OR_NULL(rd))
+		return -ENODATA;
+
 	if (!reg_is_valid_request(rd->alpha2)) {
 		kfree(rd);
 		return -EINVAL;
@@ -3522,6 +3647,7 @@
 		r = reg_set_rd_core(rd);
 		break;
 	case NL80211_REGDOM_SET_BY_USER:
+		cfg80211_save_user_regdom(rd);
 		r = reg_set_rd_user(rd, lr);
 		user_reset = true;
 		break;
@@ -3544,7 +3670,7 @@
 			break;
 		default:
 			/* Back to world regulatory in case of errors */
-			restore_regulatory_settings(user_reset);
+			restore_regulatory_settings(user_reset, false);
 		}
 
 		kfree(rd);
@@ -3643,10 +3769,9 @@
 		/*
 		 * The last request may have been received before this
 		 * registration call. Call the driver notifier if
-		 * initiator is USER and user type is CELL_BASE.
+		 * initiator is USER.
 		 */
-		if (lr->initiator == NL80211_REGDOM_SET_BY_USER &&
-		    lr->user_reg_hint_type == NL80211_USER_REG_HINT_CELL_BASE)
+		if (lr->initiator == NL80211_REGDOM_SET_BY_USER)
 			reg_call_notifier(wiphy, lr);
 	}
 
@@ -3681,8 +3806,9 @@
 }
 
 /*
- * See http://www.fcc.gov/document/5-ghz-unlicensed-spectrum-unii, for
- * UNII band definitions
+ * See FCC notices for UNII band definitions
+ *  5GHz: https://www.fcc.gov/document/5-ghz-unlicensed-spectrum-unii
+ *  6GHz: https://www.fcc.gov/document/fcc-proposes-more-spectrum-unlicensed-use-0
  */
 int cfg80211_get_unii(int freq)
 {
@@ -3706,6 +3832,22 @@
 	if (freq > 5725 && freq <= 5825)
 		return 4;
 
+	/* UNII-5 */
+	if (freq > 5925 && freq <= 6425)
+		return 5;
+
+	/* UNII-6 */
+	if (freq > 6425 && freq <= 6525)
+		return 6;
+
+	/* UNII-7 */
+	if (freq > 6525 && freq <= 6875)
+		return 7;
+
+	/* UNII-8 */
+	if (freq > 6875 && freq <= 7125)
+		return 8;
+
 	return -EINVAL;
 }
 
@@ -3741,6 +3883,7 @@
 
 	return pre_cac_allowed;
 }
+EXPORT_SYMBOL(regulatory_pre_cac_allowed);
 
 void regulatory_propagate_dfs_state(struct wiphy *wiphy,
 				    struct cfg80211_chan_def *chandef,
@@ -3779,6 +3922,15 @@
 {
 	int err;
 
+	/*
+	 * It's possible that - due to other bugs/issues - cfg80211
+	 * never called regulatory_init() below, or that it failed;
+	 * in that case, don't try to do any further work here as
+	 * it's doomed to lead to crashes.
+	 */
+	if (IS_ERR_OR_NULL(reg_pdev))
+		return -EINVAL;
+
 	err = load_builtin_regdb_keys();
 	if (err)
 		return err;
@@ -3871,6 +4023,8 @@
 
 	if (!IS_ERR_OR_NULL(regdb))
 		kfree(regdb);
+	if (!IS_ERR_OR_NULL(cfg80211_user_regdom))
+		kfree(cfg80211_user_regdom);
 
 	free_regdb_keyring();
 }

diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index 9ceeb5f..dc8f689 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h

@@ -5,6 +5,7 @@
 
 /*
  * Copyright 2008-2011	Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
+ * Copyright (C) 2019 Intel Corporation
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -22,6 +23,7 @@
 enum ieee80211_regd_source {
 	REGD_SOURCE_INTERNAL_DB,
 	REGD_SOURCE_CRDA,
+	REGD_SOURCE_CACHED,
 };
 
 extern const struct ieee80211_regdomain __rcu *cfg80211_regdomain;
@@ -154,14 +156,6 @@
 #define REG_PRE_CAC_EXPIRY_GRACE_MS 2000
 
 /**
- * regulatory_pre_cac_allowed - if pre-CAC allowed in the current dfs domain
- * @wiphy: wiphy for which pre-CAC capability is checked.
-
- * Pre-CAC is allowed only in ETSI domain.
- */
-bool regulatory_pre_cac_allowed(struct wiphy *wiphy);
-
-/**
  * regulatory_propagate_dfs_state - Propagate DFS channel state to other wiphys
  * @wiphy - wiphy on which radar is detected and the event will be propagated
  *	to other available wiphys having the same DFS domain

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index d0e7472..aef240f 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c

@@ -5,6 +5,7 @@
  * Copyright 2008 Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright 2016	Intel Deutschland GmbH
+ * Copyright (C) 2018-2019 Intel Corporation
  */
 #include <linux/kernel.h>
 #include <linux/slab.h>
@@ -109,6 +110,12 @@
 				   pub);
 		bss->refcount++;
 	}
+	if (bss->pub.transmitted_bss) {
+		bss = container_of(bss->pub.transmitted_bss,
+				   struct cfg80211_internal_bss,
+				   pub);
+		bss->refcount++;
+	}
 }
 
 static inline void bss_ref_put(struct cfg80211_registered_device *rdev,
@@ -125,6 +132,18 @@
 		if (hbss->refcount == 0)
 			bss_free(hbss);
 	}
+
+	if (bss->pub.transmitted_bss) {
+		struct cfg80211_internal_bss *tbss;
+
+		tbss = container_of(bss->pub.transmitted_bss,
+				    struct cfg80211_internal_bss,
+				    pub);
+		tbss->refcount--;
+		if (tbss->refcount == 0)
+			bss_free(tbss);
+	}
+
 	bss->refcount--;
 	if (bss->refcount == 0)
 		bss_free(bss);
@@ -150,6 +169,7 @@
 	}
 
 	list_del_init(&bss->list);
+	list_del_init(&bss->pub.nontrans_list);
 	rb_erase(&bss->rbn, &rdev->bss_tree);
 	rdev->bss_entries--;
 	WARN_ONCE((rdev->bss_entries == 0) ^ list_empty(&rdev->bss_list),
@@ -159,6 +179,219 @@
 	return true;
 }
 
+bool cfg80211_is_element_inherited(const struct element *elem,
+				   const struct element *non_inherit_elem)
+{
+	u8 id_len, ext_id_len, i, loop_len, id;
+	const u8 *list;
+
+	if (elem->id == WLAN_EID_MULTIPLE_BSSID)
+		return false;
+
+	if (!non_inherit_elem || non_inherit_elem->datalen < 2)
+		return true;
+
+	/*
+	 * non inheritance element format is:
+	 * ext ID (56) | IDs list len | list | extension IDs list len | list
+	 * Both lists are optional. Both lengths are mandatory.
+	 * This means valid length is:
+	 * elem_len = 1 (extension ID) + 2 (list len fields) + list lengths
+	 */
+	id_len = non_inherit_elem->data[1];
+	if (non_inherit_elem->datalen < 3 + id_len)
+		return true;
+
+	ext_id_len = non_inherit_elem->data[2 + id_len];
+	if (non_inherit_elem->datalen < 3 + id_len + ext_id_len)
+		return true;
+
+	if (elem->id == WLAN_EID_EXTENSION) {
+		if (!ext_id_len)
+			return true;
+		loop_len = ext_id_len;
+		list = &non_inherit_elem->data[3 + id_len];
+		id = elem->data[0];
+	} else {
+		if (!id_len)
+			return true;
+		loop_len = id_len;
+		list = &non_inherit_elem->data[2];
+		id = elem->id;
+	}
+
+	for (i = 0; i < loop_len; i++) {
+		if (list[i] == id)
+			return false;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL(cfg80211_is_element_inherited);
+
+static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen,
+				  const u8 *subelement, size_t subie_len,
+				  u8 *new_ie, gfp_t gfp)
+{
+	u8 *pos, *tmp;
+	const u8 *tmp_old, *tmp_new;
+	const struct element *non_inherit_elem;
+	u8 *sub_copy;
+
+	/* copy subelement as we need to change its content to
+	 * mark an ie after it is processed.
+	 */
+	sub_copy = kmemdup(subelement, subie_len, gfp);
+	if (!sub_copy)
+		return 0;
+
+	pos = &new_ie[0];
+
+	/* set new ssid */
+	tmp_new = cfg80211_find_ie(WLAN_EID_SSID, sub_copy, subie_len);
+	if (tmp_new) {
+		memcpy(pos, tmp_new, tmp_new[1] + 2);
+		pos += (tmp_new[1] + 2);
+	}
+
+	/* get non inheritance list if exists */
+	non_inherit_elem =
+		cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
+				       sub_copy, subie_len);
+
+	/* go through IEs in ie (skip SSID) and subelement,
+	 * merge them into new_ie
+	 */
+	tmp_old = cfg80211_find_ie(WLAN_EID_SSID, ie, ielen);
+	tmp_old = (tmp_old) ? tmp_old + tmp_old[1] + 2 : ie;
+
+	while (tmp_old + tmp_old[1] + 2 - ie <= ielen) {
+		if (tmp_old[0] == 0) {
+			tmp_old++;
+			continue;
+		}
+
+		if (tmp_old[0] == WLAN_EID_EXTENSION)
+			tmp = (u8 *)cfg80211_find_ext_ie(tmp_old[2], sub_copy,
+							 subie_len);
+		else
+			tmp = (u8 *)cfg80211_find_ie(tmp_old[0], sub_copy,
+						     subie_len);
+
+		if (!tmp) {
+			const struct element *old_elem = (void *)tmp_old;
+
+			/* ie in old ie but not in subelement */
+			if (cfg80211_is_element_inherited(old_elem,
+							  non_inherit_elem)) {
+				memcpy(pos, tmp_old, tmp_old[1] + 2);
+				pos += tmp_old[1] + 2;
+			}
+		} else {
+			/* ie in transmitting ie also in subelement,
+			 * copy from subelement and flag the ie in subelement
+			 * as copied (by setting eid field to WLAN_EID_SSID,
+			 * which is skipped anyway).
+			 * For vendor ie, compare OUI + type + subType to
+			 * determine if they are the same ie.
+			 */
+			if (tmp_old[0] == WLAN_EID_VENDOR_SPECIFIC) {
+				if (!memcmp(tmp_old + 2, tmp + 2, 5)) {
+					/* same vendor ie, copy from
+					 * subelement
+					 */
+					memcpy(pos, tmp, tmp[1] + 2);
+					pos += tmp[1] + 2;
+					tmp[0] = WLAN_EID_SSID;
+				} else {
+					memcpy(pos, tmp_old, tmp_old[1] + 2);
+					pos += tmp_old[1] + 2;
+				}
+			} else {
+				/* copy ie from subelement into new ie */
+				memcpy(pos, tmp, tmp[1] + 2);
+				pos += tmp[1] + 2;
+				tmp[0] = WLAN_EID_SSID;
+			}
+		}
+
+		if (tmp_old + tmp_old[1] + 2 - ie == ielen)
+			break;
+
+		tmp_old += tmp_old[1] + 2;
+	}
+
+	/* go through subelement again to check if there is any ie not
+	 * copied to new ie, skip ssid, capability, bssid-index ie
+	 */
+	tmp_new = sub_copy;
+	while (tmp_new + tmp_new[1] + 2 - sub_copy <= subie_len) {
+		if (!(tmp_new[0] == WLAN_EID_NON_TX_BSSID_CAP ||
+		      tmp_new[0] == WLAN_EID_SSID)) {
+			memcpy(pos, tmp_new, tmp_new[1] + 2);
+			pos += tmp_new[1] + 2;
+		}
+		if (tmp_new + tmp_new[1] + 2 - sub_copy == subie_len)
+			break;
+		tmp_new += tmp_new[1] + 2;
+	}
+
+	kfree(sub_copy);
+	return pos - new_ie;
+}
+
+static bool is_bss(struct cfg80211_bss *a, const u8 *bssid,
+		   const u8 *ssid, size_t ssid_len)
+{
+	const struct cfg80211_bss_ies *ies;
+	const u8 *ssidie;
+
+	if (bssid && !ether_addr_equal(a->bssid, bssid))
+		return false;
+
+	if (!ssid)
+		return true;
+
+	ies = rcu_access_pointer(a->ies);
+	if (!ies)
+		return false;
+	ssidie = cfg80211_find_ie(WLAN_EID_SSID, ies->data, ies->len);
+	if (!ssidie)
+		return false;
+	if (ssidie[1] != ssid_len)
+		return false;
+	return memcmp(ssidie + 2, ssid, ssid_len) == 0;
+}
+
+static int
+cfg80211_add_nontrans_list(struct cfg80211_bss *trans_bss,
+			   struct cfg80211_bss *nontrans_bss)
+{
+	const u8 *ssid;
+	size_t ssid_len;
+	struct cfg80211_bss *bss = NULL;
+
+	rcu_read_lock();
+	ssid = ieee80211_bss_get_ie(nontrans_bss, WLAN_EID_SSID);
+	if (!ssid) {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+	ssid_len = ssid[1];
+	ssid = ssid + 2;
+	rcu_read_unlock();
+
+	/* check if nontrans_bss is in the list */
+	list_for_each_entry(bss, &trans_bss->nontrans_list, nontrans_list) {
+		if (is_bss(bss, nontrans_bss->bssid, ssid, ssid_len))
+			return 0;
+	}
+
+	/* add to the list */
+	list_add_tail(&nontrans_bss->nontrans_list, &trans_bss->nontrans_list);
+	return 0;
+}
+
 static void __cfg80211_bss_expire(struct cfg80211_registered_device *rdev,
 				  unsigned long expire_time)
 {
@@ -480,73 +713,43 @@
 	__cfg80211_bss_expire(rdev, jiffies - IEEE80211_SCAN_RESULT_EXPIRE);
 }
 
-const u8 *cfg80211_find_ie_match(u8 eid, const u8 *ies, int len,
-				 const u8 *match, int match_len,
-				 int match_offset)
+const struct element *
+cfg80211_find_elem_match(u8 eid, const u8 *ies, unsigned int len,
+			 const u8 *match, unsigned int match_len,
+			 unsigned int match_offset)
 {
-	/* match_offset can't be smaller than 2, unless match_len is
-	 * zero, in which case match_offset must be zero as well.
-	 */
-	if (WARN_ON((match_len && match_offset < 2) ||
-		    (!match_len && match_offset)))
-		return NULL;
+	const struct element *elem;
 
-	while (len >= 2 && len >= ies[1] + 2) {
-		if ((ies[0] == eid) &&
-		    (ies[1] + 2 >= match_offset + match_len) &&
-		    !memcmp(ies + match_offset, match, match_len))
-			return ies;
-
-		len -= ies[1] + 2;
-		ies += ies[1] + 2;
+	for_each_element_id(elem, eid, ies, len) {
+		if (elem->datalen >= match_offset + match_len &&
+		    !memcmp(elem->data + match_offset, match, match_len))
+			return elem;
 	}
 
 	return NULL;
 }
-EXPORT_SYMBOL(cfg80211_find_ie_match);
+EXPORT_SYMBOL(cfg80211_find_elem_match);
 
-const u8 *cfg80211_find_vendor_ie(unsigned int oui, int oui_type,
-				  const u8 *ies, int len)
+const struct element *cfg80211_find_vendor_elem(unsigned int oui, int oui_type,
+						const u8 *ies,
+						unsigned int len)
 {
-	const u8 *ie;
+	const struct element *elem;
 	u8 match[] = { oui >> 16, oui >> 8, oui, oui_type };
 	int match_len = (oui_type < 0) ? 3 : sizeof(match);
 
 	if (WARN_ON(oui_type > 0xff))
 		return NULL;
 
-	ie = cfg80211_find_ie_match(WLAN_EID_VENDOR_SPECIFIC, ies, len,
-				    match, match_len, 2);
+	elem = cfg80211_find_elem_match(WLAN_EID_VENDOR_SPECIFIC, ies, len,
+					match, match_len, 0);
 
-	if (ie && (ie[1] < 4))
+	if (!elem || elem->datalen < 4)
 		return NULL;
 
-	return ie;
+	return elem;
 }
-EXPORT_SYMBOL(cfg80211_find_vendor_ie);
-
-static bool is_bss(struct cfg80211_bss *a, const u8 *bssid,
-		   const u8 *ssid, size_t ssid_len)
-{
-	const struct cfg80211_bss_ies *ies;
-	const u8 *ssidie;
-
-	if (bssid && !ether_addr_equal(a->bssid, bssid))
-		return false;
-
-	if (!ssid)
-		return true;
-
-	ies = rcu_access_pointer(a->ies);
-	if (!ies)
-		return false;
-	ssidie = cfg80211_find_ie(WLAN_EID_SSID, ies->data, ies->len);
-	if (!ssidie)
-		return false;
-	if (ssidie[1] != ssid_len)
-		return false;
-	return memcmp(ssidie + 2, ssid, ssid_len) == 0;
-}
+EXPORT_SYMBOL(cfg80211_find_vendor_elem);
 
 /**
  * enum bss_compare_mode - BSS compare mode
@@ -882,18 +1085,111 @@
 	return true;
 }
 
+struct cfg80211_non_tx_bss {
+	struct cfg80211_bss *tx_bss;
+	u8 max_bssid_indicator;
+	u8 bssid_index;
+};
+
+static bool
+cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
+			  struct cfg80211_internal_bss *known,
+			  struct cfg80211_internal_bss *new,
+			  bool signal_valid)
+{
+	lockdep_assert_held(&rdev->bss_lock);
+
+	/* Update IEs */
+	if (rcu_access_pointer(new->pub.proberesp_ies)) {
+		const struct cfg80211_bss_ies *old;
+
+		old = rcu_access_pointer(known->pub.proberesp_ies);
+
+		rcu_assign_pointer(known->pub.proberesp_ies,
+				   new->pub.proberesp_ies);
+		/* Override possible earlier Beacon frame IEs */
+		rcu_assign_pointer(known->pub.ies,
+				   new->pub.proberesp_ies);
+		if (old)
+			kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head);
+	} else if (rcu_access_pointer(new->pub.beacon_ies)) {
+		const struct cfg80211_bss_ies *old;
+		struct cfg80211_internal_bss *bss;
+
+		if (known->pub.hidden_beacon_bss &&
+		    !list_empty(&known->hidden_list)) {
+			const struct cfg80211_bss_ies *f;
+
+			/* The known BSS struct is one of the probe
+			 * response members of a group, but we're
+			 * receiving a beacon (beacon_ies in the new
+			 * bss is used). This can only mean that the
+			 * AP changed its beacon from not having an
+			 * SSID to showing it, which is confusing so
+			 * drop this information.
+			 */
+
+			f = rcu_access_pointer(new->pub.beacon_ies);
+			kfree_rcu((struct cfg80211_bss_ies *)f, rcu_head);
+			return false;
+		}
+
+		old = rcu_access_pointer(known->pub.beacon_ies);
+
+		rcu_assign_pointer(known->pub.beacon_ies, new->pub.beacon_ies);
+
+		/* Override IEs if they were from a beacon before */
+		if (old == rcu_access_pointer(known->pub.ies))
+			rcu_assign_pointer(known->pub.ies, new->pub.beacon_ies);
+
+		/* Assign beacon IEs to all sub entries */
+		list_for_each_entry(bss, &known->hidden_list, hidden_list) {
+			const struct cfg80211_bss_ies *ies;
+
+			ies = rcu_access_pointer(bss->pub.beacon_ies);
+			WARN_ON(ies != old);
+
+			rcu_assign_pointer(bss->pub.beacon_ies,
+					   new->pub.beacon_ies);
+		}
+
+		if (old)
+			kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head);
+	}
+
+	known->pub.beacon_interval = new->pub.beacon_interval;
+
+	/* don't update the signal if beacon was heard on
+	 * adjacent channel.
+	 */
+	if (signal_valid)
+		known->pub.signal = new->pub.signal;
+	known->pub.capability = new->pub.capability;
+	known->ts = new->ts;
+	known->ts_boottime = new->ts_boottime;
+	known->parent_tsf = new->parent_tsf;
+	known->pub.chains = new->pub.chains;
+	memcpy(known->pub.chain_signal, new->pub.chain_signal,
+	       IEEE80211_MAX_CHAINS);
+	ether_addr_copy(known->parent_bssid, new->parent_bssid);
+	known->pub.max_bssid_indicator = new->pub.max_bssid_indicator;
+	known->pub.bssid_index = new->pub.bssid_index;
+
+	return true;
+}
+
 /* Returned bss is reference counted and must be cleaned up appropriately. */
-static struct cfg80211_internal_bss *
+struct cfg80211_internal_bss *
 cfg80211_bss_update(struct cfg80211_registered_device *rdev,
 		    struct cfg80211_internal_bss *tmp,
-		    bool signal_valid)
+		    bool signal_valid, unsigned long ts)
 {
 	struct cfg80211_internal_bss *found = NULL;
 
 	if (WARN_ON(!tmp->pub.channel))
 		return NULL;
 
-	tmp->ts = jiffies;
+	tmp->ts = ts;
 
 	spin_lock_bh(&rdev->bss_lock);
 
@@ -905,86 +1201,8 @@
 	found = rb_find_bss(rdev, tmp, BSS_CMP_REGULAR);
 
 	if (found) {
-		/* Update IEs */
-		if (rcu_access_pointer(tmp->pub.proberesp_ies)) {
-			const struct cfg80211_bss_ies *old;
-
-			old = rcu_access_pointer(found->pub.proberesp_ies);
-
-			rcu_assign_pointer(found->pub.proberesp_ies,
-					   tmp->pub.proberesp_ies);
-			/* Override possible earlier Beacon frame IEs */
-			rcu_assign_pointer(found->pub.ies,
-					   tmp->pub.proberesp_ies);
-			if (old)
-				kfree_rcu((struct cfg80211_bss_ies *)old,
-					  rcu_head);
-		} else if (rcu_access_pointer(tmp->pub.beacon_ies)) {
-			const struct cfg80211_bss_ies *old;
-			struct cfg80211_internal_bss *bss;
-
-			if (found->pub.hidden_beacon_bss &&
-			    !list_empty(&found->hidden_list)) {
-				const struct cfg80211_bss_ies *f;
-
-				/*
-				 * The found BSS struct is one of the probe
-				 * response members of a group, but we're
-				 * receiving a beacon (beacon_ies in the tmp
-				 * bss is used). This can only mean that the
-				 * AP changed its beacon from not having an
-				 * SSID to showing it, which is confusing so
-				 * drop this information.
-				 */
-
-				f = rcu_access_pointer(tmp->pub.beacon_ies);
-				kfree_rcu((struct cfg80211_bss_ies *)f,
-					  rcu_head);
-				goto drop;
-			}
-
-			old = rcu_access_pointer(found->pub.beacon_ies);
-
-			rcu_assign_pointer(found->pub.beacon_ies,
-					   tmp->pub.beacon_ies);
-
-			/* Override IEs if they were from a beacon before */
-			if (old == rcu_access_pointer(found->pub.ies))
-				rcu_assign_pointer(found->pub.ies,
-						   tmp->pub.beacon_ies);
-
-			/* Assign beacon IEs to all sub entries */
-			list_for_each_entry(bss, &found->hidden_list,
-					    hidden_list) {
-				const struct cfg80211_bss_ies *ies;
-
-				ies = rcu_access_pointer(bss->pub.beacon_ies);
-				WARN_ON(ies != old);
-
-				rcu_assign_pointer(bss->pub.beacon_ies,
-						   tmp->pub.beacon_ies);
-			}
-
-			if (old)
-				kfree_rcu((struct cfg80211_bss_ies *)old,
-					  rcu_head);
-		}
-
-		found->pub.beacon_interval = tmp->pub.beacon_interval;
-		/*
-		 * don't update the signal if beacon was heard on
-		 * adjacent channel.
-		 */
-		if (signal_valid)
-			found->pub.signal = tmp->pub.signal;
-		found->pub.capability = tmp->pub.capability;
-		found->ts = tmp->ts;
-		found->ts_boottime = tmp->ts_boottime;
-		found->parent_tsf = tmp->parent_tsf;
-		found->pub.chains = tmp->pub.chains;
-		memcpy(found->pub.chain_signal, tmp->pub.chain_signal,
-		       IEEE80211_MAX_CHAINS);
-		ether_addr_copy(found->parent_bssid, tmp->parent_bssid);
+		if (!cfg80211_update_known_bss(rdev, found, tmp, signal_valid))
+			goto drop;
 	} else {
 		struct cfg80211_internal_bss *new;
 		struct cfg80211_internal_bss *hidden;
@@ -1009,6 +1227,7 @@
 		memcpy(new, tmp, sizeof(*new));
 		new->refcount = 1;
 		INIT_LIST_HEAD(&new->hidden_list);
+		INIT_LIST_HEAD(&new->pub.nontrans_list);
 
 		if (rcu_access_pointer(tmp->pub.proberesp_ies)) {
 			hidden = rb_find_bss(rdev, tmp, BSS_CMP_HIDE_ZLEN);
@@ -1042,6 +1261,17 @@
 			goto drop;
 		}
 
+		/* This must be before the call to bss_ref_get */
+		if (tmp->pub.transmitted_bss) {
+			struct cfg80211_internal_bss *pbss =
+				container_of(tmp->pub.transmitted_bss,
+					     struct cfg80211_internal_bss,
+					     pub);
+
+			new->pub.transmitted_bss = tmp->pub.transmitted_bss;
+			bss_ref_get(rdev, pbss);
+		}
+
 		list_add_tail(&new->list, &rdev->bss_list);
 		rdev->bss_entries++;
 		rb_insert_bss(rdev, new);
@@ -1130,19 +1360,22 @@
 }
 
 /* Returned bss is reference counted and must be cleaned up appropriately. */
-struct cfg80211_bss *
-cfg80211_inform_bss_data(struct wiphy *wiphy,
-			 struct cfg80211_inform_bss *data,
-			 enum cfg80211_bss_frame_type ftype,
-			 const u8 *bssid, u64 tsf, u16 capability,
-			 u16 beacon_interval, const u8 *ie, size_t ielen,
-			 gfp_t gfp)
+static struct cfg80211_bss *
+cfg80211_inform_single_bss_data(struct wiphy *wiphy,
+				struct cfg80211_inform_bss *data,
+				enum cfg80211_bss_frame_type ftype,
+				const u8 *bssid, u64 tsf, u16 capability,
+				u16 beacon_interval, const u8 *ie, size_t ielen,
+				struct cfg80211_non_tx_bss *non_tx_data,
+				gfp_t gfp)
 {
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct cfg80211_bss_ies *ies;
 	struct ieee80211_channel *channel;
 	struct cfg80211_internal_bss tmp = {}, *res;
 	int bss_type;
 	bool signal_valid;
+	unsigned long ts;
 
 	if (WARN_ON(!wiphy))
 		return NULL;
@@ -1163,6 +1396,14 @@
 	tmp.pub.beacon_interval = beacon_interval;
 	tmp.pub.capability = capability;
 	tmp.ts_boottime = data->boottime_ns;
+	if (non_tx_data) {
+		tmp.pub.transmitted_bss = non_tx_data->tx_bss;
+		ts = bss_from_pub(non_tx_data->tx_bss)->ts;
+		tmp.pub.bssid_index = non_tx_data->bssid_index;
+		tmp.pub.max_bssid_indicator = non_tx_data->max_bssid_indicator;
+	} else {
+		ts = jiffies;
+	}
 
 	/*
 	 * If we do not know here whether the IEs are from a Beacon or Probe
@@ -1183,7 +1424,7 @@
 	switch (ftype) {
 	case CFG80211_BSS_FTYPE_BEACON:
 		ies->from_beacon = true;
-		/* fall through to assign */
+		/* fall through */
 	case CFG80211_BSS_FTYPE_UNKNOWN:
 		rcu_assign_pointer(tmp.pub.beacon_ies, ies);
 		break;
@@ -1195,7 +1436,7 @@
 
 	signal_valid = abs(data->chan->center_freq - channel->center_freq) <=
 		wiphy->max_adj_channel_rssi_comp;
-	res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid);
+	res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid, ts);
 	if (!res)
 		return NULL;
 
@@ -1209,19 +1450,350 @@
 			regulatory_hint_found_beacon(wiphy, channel, gfp);
 	}
 
+	if (non_tx_data) {
+		/* this is a nontransmitting bss, we need to add it to
+		 * transmitting bss' list if it is not there
+		 */
+		if (cfg80211_add_nontrans_list(non_tx_data->tx_bss,
+					       &res->pub)) {
+			if (__cfg80211_unlink_bss(rdev, res))
+				rdev->bss_generation++;
+		}
+	}
+
 	trace_cfg80211_return_bss(&res->pub);
 	/* cfg80211_bss_update gives us a referenced result */
 	return &res->pub;
 }
+
+static const struct element
+*cfg80211_get_profile_continuation(const u8 *ie, size_t ielen,
+				   const struct element *mbssid_elem,
+				   const struct element *sub_elem)
+{
+	const u8 *mbssid_end = mbssid_elem->data + mbssid_elem->datalen;
+	const struct element *next_mbssid;
+	const struct element *next_sub;
+
+	next_mbssid = cfg80211_find_elem(WLAN_EID_MULTIPLE_BSSID,
+					 mbssid_end,
+					 ielen - (mbssid_end - ie));
+
+	/*
+	 * If is is not the last subelement in current MBSSID IE or there isn't
+	 * a next MBSSID IE - profile is complete.
+	*/
+	if ((sub_elem->data + sub_elem->datalen < mbssid_end - 1) ||
+	    !next_mbssid)
+		return NULL;
+
+	/* For any length error, just return NULL */
+
+	if (next_mbssid->datalen < 4)
+		return NULL;
+
+	next_sub = (void *)&next_mbssid->data[1];
+
+	if (next_mbssid->data + next_mbssid->datalen <
+	    next_sub->data + next_sub->datalen)
+		return NULL;
+
+	if (next_sub->id != 0 || next_sub->datalen < 2)
+		return NULL;
+
+	/*
+	 * Check if the first element in the next sub element is a start
+	 * of a new profile
+	 */
+	return next_sub->data[0] == WLAN_EID_NON_TX_BSSID_CAP ?
+	       NULL : next_mbssid;
+}
+
+size_t cfg80211_merge_profile(const u8 *ie, size_t ielen,
+			      const struct element *mbssid_elem,
+			      const struct element *sub_elem,
+			      u8 *merged_ie, size_t max_copy_len)
+{
+	size_t copied_len = sub_elem->datalen;
+	const struct element *next_mbssid;
+
+	if (sub_elem->datalen > max_copy_len)
+		return 0;
+
+	memcpy(merged_ie, sub_elem->data, sub_elem->datalen);
+
+	while ((next_mbssid = cfg80211_get_profile_continuation(ie, ielen,
+								mbssid_elem,
+								sub_elem))) {
+		const struct element *next_sub = (void *)&next_mbssid->data[1];
+
+		if (copied_len + next_sub->datalen > max_copy_len)
+			break;
+		memcpy(merged_ie + copied_len, next_sub->data,
+		       next_sub->datalen);
+		copied_len += next_sub->datalen;
+	}
+
+	return copied_len;
+}
+EXPORT_SYMBOL(cfg80211_merge_profile);
+
+static void cfg80211_parse_mbssid_data(struct wiphy *wiphy,
+				       struct cfg80211_inform_bss *data,
+				       enum cfg80211_bss_frame_type ftype,
+				       const u8 *bssid, u64 tsf,
+				       u16 beacon_interval, const u8 *ie,
+				       size_t ielen,
+				       struct cfg80211_non_tx_bss *non_tx_data,
+				       gfp_t gfp)
+{
+	const u8 *mbssid_index_ie;
+	const struct element *elem, *sub;
+	size_t new_ie_len;
+	u8 new_bssid[ETH_ALEN];
+	u8 *new_ie, *profile;
+	u64 seen_indices = 0;
+	u16 capability;
+	struct cfg80211_bss *bss;
+
+	if (!non_tx_data)
+		return;
+	if (!cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen))
+		return;
+	if (!wiphy->support_mbssid)
+		return;
+	if (wiphy->support_only_he_mbssid &&
+	    !cfg80211_find_ext_ie(WLAN_EID_EXT_HE_CAPABILITY, ie, ielen))
+		return;
+
+	new_ie = kmalloc(IEEE80211_MAX_DATA_LEN, gfp);
+	if (!new_ie)
+		return;
+
+	profile = kmalloc(ielen, gfp);
+	if (!profile)
+		goto out;
+
+	for_each_element_id(elem, WLAN_EID_MULTIPLE_BSSID, ie, ielen) {
+		if (elem->datalen < 4)
+			continue;
+		for_each_element(sub, elem->data + 1, elem->datalen - 1) {
+			u8 profile_len;
+
+			if (sub->id != 0 || sub->datalen < 4) {
+				/* not a valid BSS profile */
+				continue;
+			}
+
+			if (sub->data[0] != WLAN_EID_NON_TX_BSSID_CAP ||
+			    sub->data[1] != 2) {
+				/* The first element within the Nontransmitted
+				 * BSSID Profile is not the Nontransmitted
+				 * BSSID Capability element.
+				 */
+				continue;
+			}
+
+			memset(profile, 0, ielen);
+			profile_len = cfg80211_merge_profile(ie, ielen,
+							     elem,
+							     sub,
+							     profile,
+							     ielen);
+
+			/* found a Nontransmitted BSSID Profile */
+			mbssid_index_ie = cfg80211_find_ie
+				(WLAN_EID_MULTI_BSSID_IDX,
+				 profile, profile_len);
+			if (!mbssid_index_ie || mbssid_index_ie[1] < 1 ||
+			    mbssid_index_ie[2] == 0 ||
+			    mbssid_index_ie[2] > 46) {
+				/* No valid Multiple BSSID-Index element */
+				continue;
+			}
+
+			if (seen_indices & BIT_ULL(mbssid_index_ie[2]))
+				/* We don't support legacy split of a profile */
+				net_dbg_ratelimited("Partial info for BSSID index %d\n",
+						    mbssid_index_ie[2]);
+
+			seen_indices |= BIT_ULL(mbssid_index_ie[2]);
+
+			non_tx_data->bssid_index = mbssid_index_ie[2];
+			non_tx_data->max_bssid_indicator = elem->data[0];
+
+			cfg80211_gen_new_bssid(bssid,
+					       non_tx_data->max_bssid_indicator,
+					       non_tx_data->bssid_index,
+					       new_bssid);
+			memset(new_ie, 0, IEEE80211_MAX_DATA_LEN);
+			new_ie_len = cfg80211_gen_new_ie(ie, ielen,
+							 profile,
+							 profile_len, new_ie,
+							 gfp);
+			if (!new_ie_len)
+				continue;
+
+			capability = get_unaligned_le16(profile + 2);
+			bss = cfg80211_inform_single_bss_data(wiphy, data,
+							      ftype,
+							      new_bssid, tsf,
+							      capability,
+							      beacon_interval,
+							      new_ie,
+							      new_ie_len,
+							      non_tx_data,
+							      gfp);
+			if (!bss)
+				break;
+			cfg80211_put_bss(wiphy, bss);
+		}
+	}
+
+out:
+	kfree(new_ie);
+	kfree(profile);
+}
+
+struct cfg80211_bss *
+cfg80211_inform_bss_data(struct wiphy *wiphy,
+			 struct cfg80211_inform_bss *data,
+			 enum cfg80211_bss_frame_type ftype,
+			 const u8 *bssid, u64 tsf, u16 capability,
+			 u16 beacon_interval, const u8 *ie, size_t ielen,
+			 gfp_t gfp)
+{
+	struct cfg80211_bss *res;
+	struct cfg80211_non_tx_bss non_tx_data;
+
+	res = cfg80211_inform_single_bss_data(wiphy, data, ftype, bssid, tsf,
+					      capability, beacon_interval, ie,
+					      ielen, NULL, gfp);
+	if (!res)
+		return NULL;
+	non_tx_data.tx_bss = res;
+	cfg80211_parse_mbssid_data(wiphy, data, ftype, bssid, tsf,
+				   beacon_interval, ie, ielen, &non_tx_data,
+				   gfp);
+	return res;
+}
 EXPORT_SYMBOL(cfg80211_inform_bss_data);
 
-/* cfg80211_inform_bss_width_frame helper */
-struct cfg80211_bss *
-cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
-			       struct cfg80211_inform_bss *data,
-			       struct ieee80211_mgmt *mgmt, size_t len,
-			       gfp_t gfp)
+static void
+cfg80211_parse_mbssid_frame_data(struct wiphy *wiphy,
+				 struct cfg80211_inform_bss *data,
+				 struct ieee80211_mgmt *mgmt, size_t len,
+				 struct cfg80211_non_tx_bss *non_tx_data,
+				 gfp_t gfp)
+{
+	enum cfg80211_bss_frame_type ftype;
+	const u8 *ie = mgmt->u.probe_resp.variable;
+	size_t ielen = len - offsetof(struct ieee80211_mgmt,
+				      u.probe_resp.variable);
 
+	ftype = ieee80211_is_beacon(mgmt->frame_control) ?
+		CFG80211_BSS_FTYPE_BEACON : CFG80211_BSS_FTYPE_PRESP;
+
+	cfg80211_parse_mbssid_data(wiphy, data, ftype, mgmt->bssid,
+				   le64_to_cpu(mgmt->u.probe_resp.timestamp),
+				   le16_to_cpu(mgmt->u.probe_resp.beacon_int),
+				   ie, ielen, non_tx_data, gfp);
+}
+
+static void
+cfg80211_update_notlisted_nontrans(struct wiphy *wiphy,
+				   struct cfg80211_bss *nontrans_bss,
+				   struct ieee80211_mgmt *mgmt, size_t len)
+{
+	u8 *ie, *new_ie, *pos;
+	const u8 *nontrans_ssid, *trans_ssid, *mbssid;
+	size_t ielen = len - offsetof(struct ieee80211_mgmt,
+				      u.probe_resp.variable);
+	size_t new_ie_len;
+	struct cfg80211_bss_ies *new_ies;
+	const struct cfg80211_bss_ies *old;
+	u8 cpy_len;
+
+	lockdep_assert_held(&wiphy_to_rdev(wiphy)->bss_lock);
+
+	ie = mgmt->u.probe_resp.variable;
+
+	new_ie_len = ielen;
+	trans_ssid = cfg80211_find_ie(WLAN_EID_SSID, ie, ielen);
+	if (!trans_ssid)
+		return;
+	new_ie_len -= trans_ssid[1];
+	mbssid = cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen);
+	/*
+	 * It's not valid to have the MBSSID element before SSID
+	 * ignore if that happens - the code below assumes it is
+	 * after (while copying things inbetween).
+	 */
+	if (!mbssid || mbssid < trans_ssid)
+		return;
+	new_ie_len -= mbssid[1];
+
+	nontrans_ssid = ieee80211_bss_get_ie(nontrans_bss, WLAN_EID_SSID);
+	if (!nontrans_ssid)
+		return;
+
+	new_ie_len += nontrans_ssid[1];
+
+	/* generate new ie for nontrans BSS
+	 * 1. replace SSID with nontrans BSS' SSID
+	 * 2. skip MBSSID IE
+	 */
+	new_ie = kzalloc(new_ie_len, GFP_ATOMIC);
+	if (!new_ie)
+		return;
+
+	new_ies = kzalloc(sizeof(*new_ies) + new_ie_len, GFP_ATOMIC);
+	if (!new_ies)
+		goto out_free;
+
+	pos = new_ie;
+
+	/* copy the nontransmitted SSID */
+	cpy_len = nontrans_ssid[1] + 2;
+	memcpy(pos, nontrans_ssid, cpy_len);
+	pos += cpy_len;
+	/* copy the IEs between SSID and MBSSID */
+	cpy_len = trans_ssid[1] + 2;
+	memcpy(pos, (trans_ssid + cpy_len), (mbssid - (trans_ssid + cpy_len)));
+	pos += (mbssid - (trans_ssid + cpy_len));
+	/* copy the IEs after MBSSID */
+	cpy_len = mbssid[1] + 2;
+	memcpy(pos, mbssid + cpy_len, ((ie + ielen) - (mbssid + cpy_len)));
+
+	/* update ie */
+	new_ies->len = new_ie_len;
+	new_ies->tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp);
+	new_ies->from_beacon = ieee80211_is_beacon(mgmt->frame_control);
+	memcpy(new_ies->data, new_ie, new_ie_len);
+	if (ieee80211_is_probe_resp(mgmt->frame_control)) {
+		old = rcu_access_pointer(nontrans_bss->proberesp_ies);
+		rcu_assign_pointer(nontrans_bss->proberesp_ies, new_ies);
+		rcu_assign_pointer(nontrans_bss->ies, new_ies);
+		if (old)
+			kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head);
+	} else {
+		old = rcu_access_pointer(nontrans_bss->beacon_ies);
+		rcu_assign_pointer(nontrans_bss->beacon_ies, new_ies);
+		rcu_assign_pointer(nontrans_bss->ies, new_ies);
+		if (old)
+			kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head);
+	}
+
+out_free:
+	kfree(new_ie);
+}
+
+/* cfg80211_inform_bss_width_frame helper */
+static struct cfg80211_bss *
+cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy,
+				      struct cfg80211_inform_bss *data,
+				      struct ieee80211_mgmt *mgmt, size_t len,
+				      gfp_t gfp)
 {
 	struct cfg80211_internal_bss tmp = {}, *res;
 	struct cfg80211_bss_ies *ies;
@@ -1282,7 +1854,8 @@
 
 	signal_valid = abs(data->chan->center_freq - channel->center_freq) <=
 		wiphy->max_adj_channel_rssi_comp;
-	res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid);
+	res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid,
+				  jiffies);
 	if (!res)
 		return NULL;
 
@@ -1300,6 +1873,56 @@
 	/* cfg80211_bss_update gives us a referenced result */
 	return &res->pub;
 }
+
+struct cfg80211_bss *
+cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
+			       struct cfg80211_inform_bss *data,
+			       struct ieee80211_mgmt *mgmt, size_t len,
+			       gfp_t gfp)
+{
+	struct cfg80211_bss *res, *tmp_bss;
+	const u8 *ie = mgmt->u.probe_resp.variable;
+	const struct cfg80211_bss_ies *ies1, *ies2;
+	size_t ielen = len - offsetof(struct ieee80211_mgmt,
+				      u.probe_resp.variable);
+	struct cfg80211_non_tx_bss non_tx_data;
+
+	res = cfg80211_inform_single_bss_frame_data(wiphy, data, mgmt,
+						    len, gfp);
+	if (!res || !wiphy->support_mbssid ||
+	    !cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen))
+		return res;
+	if (wiphy->support_only_he_mbssid &&
+	    !cfg80211_find_ext_ie(WLAN_EID_EXT_HE_CAPABILITY, ie, ielen))
+		return res;
+
+	non_tx_data.tx_bss = res;
+	/* process each non-transmitting bss */
+	cfg80211_parse_mbssid_frame_data(wiphy, data, mgmt, len,
+					 &non_tx_data, gfp);
+
+	spin_lock_bh(&wiphy_to_rdev(wiphy)->bss_lock);
+
+	/* check if the res has other nontransmitting bss which is not
+	 * in MBSSID IE
+	 */
+	ies1 = rcu_access_pointer(res->ies);
+
+	/* go through nontrans_list, if the timestamp of the BSS is
+	 * earlier than the timestamp of the transmitting BSS then
+	 * update it
+	 */
+	list_for_each_entry(tmp_bss, &res->nontrans_list,
+			    nontrans_list) {
+		ies2 = rcu_access_pointer(tmp_bss->ies);
+		if (ies2->tsf < ies1->tsf)
+			cfg80211_update_notlisted_nontrans(wiphy, tmp_bss,
+							   mgmt, len);
+	}
+	spin_unlock_bh(&wiphy_to_rdev(wiphy)->bss_lock);
+
+	return res;
+}
 EXPORT_SYMBOL(cfg80211_inform_bss_frame_data);
 
 void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
@@ -1337,7 +1960,8 @@
 void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
 {
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
-	struct cfg80211_internal_bss *bss;
+	struct cfg80211_internal_bss *bss, *tmp1;
+	struct cfg80211_bss *nontrans_bss, *tmp;
 
 	if (WARN_ON(!pub))
 		return;
@@ -1345,14 +1969,125 @@
 	bss = container_of(pub, struct cfg80211_internal_bss, pub);
 
 	spin_lock_bh(&rdev->bss_lock);
-	if (!list_empty(&bss->list)) {
-		if (__cfg80211_unlink_bss(rdev, bss))
+	if (list_empty(&bss->list))
+		goto out;
+
+	list_for_each_entry_safe(nontrans_bss, tmp,
+				 &pub->nontrans_list,
+				 nontrans_list) {
+		tmp1 = container_of(nontrans_bss,
+				    struct cfg80211_internal_bss, pub);
+		if (__cfg80211_unlink_bss(rdev, tmp1))
 			rdev->bss_generation++;
 	}
+
+	if (__cfg80211_unlink_bss(rdev, bss))
+		rdev->bss_generation++;
+out:
 	spin_unlock_bh(&rdev->bss_lock);
 }
 EXPORT_SYMBOL(cfg80211_unlink_bss);
 
+void cfg80211_bss_iter(struct wiphy *wiphy,
+		       struct cfg80211_chan_def *chandef,
+		       void (*iter)(struct wiphy *wiphy,
+				    struct cfg80211_bss *bss,
+				    void *data),
+		       void *iter_data)
+{
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+	struct cfg80211_internal_bss *bss;
+
+	spin_lock_bh(&rdev->bss_lock);
+
+	list_for_each_entry(bss, &rdev->bss_list, list) {
+		if (!chandef || cfg80211_is_sub_chan(chandef, bss->pub.channel))
+			iter(wiphy, &bss->pub, iter_data);
+	}
+
+	spin_unlock_bh(&rdev->bss_lock);
+}
+EXPORT_SYMBOL(cfg80211_bss_iter);
+
+void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev,
+				     struct ieee80211_channel *chan)
+{
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+	struct cfg80211_internal_bss *cbss = wdev->current_bss;
+	struct cfg80211_internal_bss *new = NULL;
+	struct cfg80211_internal_bss *bss;
+	struct cfg80211_bss *nontrans_bss;
+	struct cfg80211_bss *tmp;
+
+	spin_lock_bh(&rdev->bss_lock);
+
+	if (WARN_ON(cbss->pub.channel == chan))
+		goto done;
+
+	/* use transmitting bss */
+	if (cbss->pub.transmitted_bss)
+		cbss = container_of(cbss->pub.transmitted_bss,
+				    struct cfg80211_internal_bss,
+				    pub);
+
+	cbss->pub.channel = chan;
+
+	list_for_each_entry(bss, &rdev->bss_list, list) {
+		if (!cfg80211_bss_type_match(bss->pub.capability,
+					     bss->pub.channel->band,
+					     wdev->conn_bss_type))
+			continue;
+
+		if (bss == cbss)
+			continue;
+
+		if (!cmp_bss(&bss->pub, &cbss->pub, BSS_CMP_REGULAR)) {
+			new = bss;
+			break;
+		}
+	}
+
+	if (new) {
+		/* to save time, update IEs for transmitting bss only */
+		if (cfg80211_update_known_bss(rdev, cbss, new, false)) {
+			new->pub.proberesp_ies = NULL;
+			new->pub.beacon_ies = NULL;
+		}
+
+		list_for_each_entry_safe(nontrans_bss, tmp,
+					 &new->pub.nontrans_list,
+					 nontrans_list) {
+			bss = container_of(nontrans_bss,
+					   struct cfg80211_internal_bss, pub);
+			if (__cfg80211_unlink_bss(rdev, bss))
+				rdev->bss_generation++;
+		}
+
+		WARN_ON(atomic_read(&new->hold));
+		if (!WARN_ON(!__cfg80211_unlink_bss(rdev, new)))
+			rdev->bss_generation++;
+	}
+
+	rb_erase(&cbss->rbn, &rdev->bss_tree);
+	rb_insert_bss(rdev, cbss);
+	rdev->bss_generation++;
+
+	list_for_each_entry_safe(nontrans_bss, tmp,
+				 &cbss->pub.nontrans_list,
+				 nontrans_list) {
+		bss = container_of(nontrans_bss,
+				   struct cfg80211_internal_bss, pub);
+		bss->pub.channel = chan;
+		rb_erase(&bss->rbn, &rdev->bss_tree);
+		rb_insert_bss(rdev, bss);
+		rdev->bss_generation++;
+	}
+
+done:
+	spin_unlock_bh(&rdev->bss_lock);
+}
+
 #ifdef CONFIG_CFG80211_WEXT
 static struct cfg80211_registered_device *
 cfg80211_get_dev_from_ifindex(struct net *net, int ifindex)

diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index d536b07..7a6c38d 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c

@@ -642,11 +642,15 @@
 	 * All devices must be idle as otherwise if you are actively
 	 * scanning some new beacon hints could be learned and would
 	 * count as new regulatory hints.
+	 * Also if there is any other active beaconing interface we
+	 * need not issue a disconnect hint and reset any info such
+	 * as chan dfs state, etc.
 	 */
 	list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
 		list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
 			wdev_lock(wdev);
-			if (wdev->conn || wdev->current_bss)
+			if (wdev->conn || wdev->current_bss ||
+			    cfg80211_beaconing_iface_active(wdev))
 				is_all_idle = false;
 			wdev_unlock(wdev);
 		}
@@ -663,7 +667,7 @@
 	rtnl_unlock();
 }
 
-static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work);
+DECLARE_WORK(cfg80211_disconnect_work, disconnect_work);
 
 
 /*
@@ -792,12 +796,36 @@
 	u8 *next;
 
 	if (params->bss) {
-		/* Make sure the bss entry provided by the driver is valid. */
 		struct cfg80211_internal_bss *ibss = bss_from_pub(params->bss);
 
-		if (WARN_ON(list_empty(&ibss->list))) {
-			cfg80211_put_bss(wdev->wiphy, params->bss);
-			return;
+		if (list_empty(&ibss->list)) {
+			struct cfg80211_bss *found = NULL, *tmp = params->bss;
+
+			found = cfg80211_get_bss(wdev->wiphy, NULL,
+						 params->bss->bssid,
+						 wdev->ssid, wdev->ssid_len,
+						 wdev->conn_bss_type,
+						 IEEE80211_PRIVACY_ANY);
+			if (found) {
+				/* The same BSS is already updated so use it
+				 * instead, as it has latest info.
+				 */
+				params->bss = found;
+			} else {
+				/* Update with BSS provided by driver, it will
+				 * be freshly added and ref cnted, we can free
+				 * the old one.
+				 *
+				 * signal_valid can be false, as we are not
+				 * expecting the BSS to be found.
+				 *
+				 * keep the old timestamp to avoid confusion
+				 */
+				cfg80211_bss_update(rdev, ibss, false,
+						    ibss->ts);
+			}
+
+			cfg80211_put_bss(wdev->wiphy, tmp);
 		}
 	}
 
@@ -1171,6 +1199,8 @@
 
 	cfg80211_oper_and_ht_capa(&connect->ht_capa_mask,
 				  rdev->wiphy.ht_capa_mod_mask);
+	cfg80211_oper_and_vht_capa(&connect->vht_capa_mask,
+				   rdev->wiphy.vht_capa_mod_mask);
 
 	if (connkeys && connkeys->def >= 0) {
 		int idx;

diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 6ab32f6..3ac1f48 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c

@@ -1,11 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * This file provides /sys/class/ieee80211/<wiphy name>/
  * and some default attributes.
  *
  * Copyright 2005-2006	Jiri Benc <jbenc@suse.cz>
  * Copyright 2006	Johannes Berg <johannes@sipsolutions.net>
- *
- * This file is GPLv2 as found in COPYING.
  */
 
 #include <linux/device.h>

diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 7c73510..d98ad2b 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h

@@ -112,7 +112,7 @@
 	} while (0)
 
 #define CHAN_ENTRY __field(enum nl80211_band, band) \
-		   __field(u16, center_freq)
+		   __field(u32, center_freq)
 #define CHAN_ASSIGN(chan)					  \
 	do {							  \
 		if (chan) {					  \
@@ -361,6 +361,24 @@
 	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG)
 );
 
+DECLARE_EVENT_CLASS(wiphy_wdev_cookie_evt,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie),
+	TP_ARGS(wiphy, wdev, cookie),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		WDEV_ENTRY
+		__field(u64, cookie)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		WDEV_ASSIGN;
+		__entry->cookie = cookie;
+	),
+	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie: %lld",
+		  WIPHY_PR_ARG, WDEV_PR_ARG,
+		  (unsigned long long)__entry->cookie)
+);
+
 DEFINE_EVENT(wiphy_wdev_evt, rdev_return_wdev,
 	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
 	TP_ARGS(wiphy, wdev)
@@ -412,12 +430,6 @@
 		  BOOL_TO_STR(__entry->pairwise), MAC_PR_ARG(mac_addr))
 );
 
-DEFINE_EVENT(key_handle, rdev_add_key,
-	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 key_index,
-		 bool pairwise, const u8 *mac_addr),
-	TP_ARGS(wiphy, netdev, key_index, pairwise, mac_addr)
-);
-
 DEFINE_EVENT(key_handle, rdev_get_key,
 	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 key_index,
 		 bool pairwise, const u8 *mac_addr),
@@ -430,6 +442,33 @@
 	TP_ARGS(wiphy, netdev, key_index, pairwise, mac_addr)
 );
 
+TRACE_EVENT(rdev_add_key,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 key_index,
+		 bool pairwise, const u8 *mac_addr, u8 mode),
+	TP_ARGS(wiphy, netdev, key_index, pairwise, mac_addr, mode),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		MAC_ENTRY(mac_addr)
+		__field(u8, key_index)
+		__field(bool, pairwise)
+		__field(u8, mode)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		MAC_ASSIGN(mac_addr, mac_addr);
+		__entry->key_index = key_index;
+		__entry->pairwise = pairwise;
+		__entry->mode = mode;
+	),
+	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", key_index: %u, "
+		  "mode: %u, pairwise: %s, mac addr: " MAC_PR_FMT,
+		  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->key_index,
+		  __entry->mode, BOOL_TO_STR(__entry->pairwise),
+		  MAC_PR_ARG(mac_addr))
+);
+
 TRACE_EVENT(rdev_set_default_key,
 	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 key_index,
 		 bool unicast, bool multicast),
@@ -770,9 +809,9 @@
 );
 
 TRACE_EVENT(rdev_dump_station,
-	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int idx,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx,
 		 u8 *mac),
-	TP_ARGS(wiphy, netdev, idx, mac),
+	TP_ARGS(wiphy, netdev, _idx, mac),
 	TP_STRUCT__entry(
 		WIPHY_ENTRY
 		NETDEV_ENTRY
@@ -783,7 +822,7 @@
 		WIPHY_ASSIGN;
 		NETDEV_ASSIGN;
 		MAC_ASSIGN(sta_mac, mac);
-		__entry->idx = idx;
+		__entry->idx = _idx;
 	),
 	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: " MAC_PR_FMT ", idx: %d",
 		  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(sta_mac),
@@ -847,9 +886,9 @@
 );
 
 TRACE_EVENT(rdev_dump_mpath,
-	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int idx,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx,
 		 u8 *dst, u8 *next_hop),
-	TP_ARGS(wiphy, netdev, idx, dst, next_hop),
+	TP_ARGS(wiphy, netdev, _idx, dst, next_hop),
 	TP_STRUCT__entry(
 		WIPHY_ENTRY
 		NETDEV_ENTRY
@@ -862,7 +901,7 @@
 		NETDEV_ASSIGN;
 		MAC_ASSIGN(dst, dst);
 		MAC_ASSIGN(next_hop, next_hop);
-		__entry->idx = idx;
+		__entry->idx = _idx;
 	),
 	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", index: %d, destination: "
 		  MAC_PR_FMT ", next hop: " MAC_PR_FMT,
@@ -892,9 +931,9 @@
 );
 
 TRACE_EVENT(rdev_dump_mpp,
-	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int idx,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx,
 		 u8 *dst, u8 *mpp),
-	TP_ARGS(wiphy, netdev, idx, mpp, dst),
+	TP_ARGS(wiphy, netdev, _idx, mpp, dst),
 	TP_STRUCT__entry(
 		WIPHY_ENTRY
 		NETDEV_ENTRY
@@ -907,7 +946,7 @@
 		NETDEV_ASSIGN;
 		MAC_ASSIGN(dst, dst);
 		MAC_ASSIGN(mpp, mpp);
-		__entry->idx = idx;
+		__entry->idx = _idx;
 	),
 	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", index: %d, destination: "
 		  MAC_PR_FMT ", mpp: " MAC_PR_FMT,
@@ -1673,8 +1712,8 @@
 );
 
 TRACE_EVENT(rdev_dump_survey,
-	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int idx),
-	TP_ARGS(wiphy, netdev, idx),
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx),
+	TP_ARGS(wiphy, netdev, _idx),
 	TP_STRUCT__entry(
 		WIPHY_ENTRY
 		NETDEV_ENTRY
@@ -1683,7 +1722,7 @@
 	TP_fast_assign(
 		WIPHY_ASSIGN;
 		NETDEV_ASSIGN;
-		__entry->idx = idx;
+		__entry->idx = _idx;
 	),
 	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", index: %d",
 		  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->idx)
@@ -2368,6 +2407,151 @@
 		      __entry->bssid, __entry->ssid, __entry->status)
 );
 
+TRACE_EVENT(rdev_start_radar_detection,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		 struct cfg80211_chan_def *chandef,
+		 u32 cac_time_ms),
+	TP_ARGS(wiphy, netdev, chandef, cac_time_ms),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		CHAN_DEF_ENTRY
+		__field(u32, cac_time_ms)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		CHAN_DEF_ASSIGN(chandef);
+		__entry->cac_time_ms = cac_time_ms;
+	),
+	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT
+		  ", cac_time_ms=%u",
+		  WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG,
+		  __entry->cac_time_ms)
+);
+
+TRACE_EVENT(rdev_set_mcast_rate,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		 int *mcast_rate),
+	TP_ARGS(wiphy, netdev, mcast_rate),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		__array(int, mcast_rate, NUM_NL80211_BANDS)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		memcpy(__entry->mcast_rate, mcast_rate,
+		       sizeof(int) * NUM_NL80211_BANDS);
+	),
+	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", "
+		  "mcast_rates [2.4GHz=0x%x, 5.2GHz=0x%x, 6GHz=0x%x, 60GHz=0x%x]",
+		  WIPHY_PR_ARG, NETDEV_PR_ARG,
+		  __entry->mcast_rate[NL80211_BAND_2GHZ],
+		  __entry->mcast_rate[NL80211_BAND_5GHZ],
+		  __entry->mcast_rate[NL80211_BAND_6GHZ],
+		  __entry->mcast_rate[NL80211_BAND_60GHZ])
+);
+
+TRACE_EVENT(rdev_set_coalesce,
+	TP_PROTO(struct wiphy *wiphy, struct cfg80211_coalesce *coalesce),
+	TP_ARGS(wiphy, coalesce),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		__field(int, n_rules)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		__entry->n_rules = coalesce ? coalesce->n_rules : 0;
+	),
+	TP_printk(WIPHY_PR_FMT ", n_rules=%d",
+		  WIPHY_PR_ARG, __entry->n_rules)
+);
+
+DEFINE_EVENT(wiphy_wdev_evt, rdev_abort_scan,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
+	TP_ARGS(wiphy, wdev)
+);
+
+TRACE_EVENT(rdev_set_multicast_to_unicast,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		 const bool enabled),
+	TP_ARGS(wiphy, netdev, enabled),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		__field(bool, enabled)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		__entry->enabled = enabled;
+	),
+	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", unicast: %s",
+		  WIPHY_PR_ARG, NETDEV_PR_ARG,
+		  BOOL_TO_STR(__entry->enabled))
+);
+
+DEFINE_EVENT(wiphy_wdev_evt, rdev_get_txq_stats,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
+	TP_ARGS(wiphy, wdev)
+);
+
+TRACE_EVENT(rdev_get_ftm_responder_stats,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		 struct cfg80211_ftm_responder_stats *ftm_stats),
+
+	TP_ARGS(wiphy, netdev, ftm_stats),
+
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		__field(u64, timestamp)
+		__field(u32, success_num)
+		__field(u32, partial_num)
+		__field(u32, failed_num)
+		__field(u32, asap_num)
+		__field(u32, non_asap_num)
+		__field(u64, duration)
+		__field(u32, unknown_triggers)
+		__field(u32, reschedule)
+		__field(u32, out_of_window)
+	),
+
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		__entry->success_num = ftm_stats->success_num;
+		__entry->partial_num = ftm_stats->partial_num;
+		__entry->failed_num = ftm_stats->failed_num;
+		__entry->asap_num = ftm_stats->asap_num;
+		__entry->non_asap_num = ftm_stats->non_asap_num;
+		__entry->duration = ftm_stats->total_duration_ms;
+		__entry->unknown_triggers = ftm_stats->unknown_triggers_num;
+		__entry->reschedule = ftm_stats->reschedule_requests_num;
+		__entry->out_of_window = ftm_stats->out_of_window_triggers_num;
+	),
+
+	TP_printk(WIPHY_PR_FMT "Ftm responder stats: success %u, partial %u, "
+		"failed %u, asap %u, non asap %u, total duration %llu, unknown "
+		"triggers %u, rescheduled %u, out of window %u", WIPHY_PR_ARG,
+		__entry->success_num, __entry->partial_num, __entry->failed_num,
+		__entry->asap_num, __entry->non_asap_num, __entry->duration,
+		__entry->unknown_triggers, __entry->reschedule,
+		__entry->out_of_window)
+);
+
+DEFINE_EVENT(wiphy_wdev_cookie_evt, rdev_start_pmsr,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie),
+	TP_ARGS(wiphy, wdev, cookie)
+);
+
+DEFINE_EVENT(wiphy_wdev_cookie_evt, rdev_abort_pmsr,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie),
+	TP_ARGS(wiphy, wdev, cookie)
+);
+
 /*************************************************************
  *	     cfg80211 exported functions traces		     *
  *************************************************************/
@@ -2569,6 +2753,24 @@
 		  WDEV_PR_ARG, __entry->cookie, CHAN_PR_ARG)
 );
 
+TRACE_EVENT(cfg80211_tx_mgmt_expired,
+	TP_PROTO(struct wireless_dev *wdev, u64 cookie,
+		 struct ieee80211_channel *chan),
+	TP_ARGS(wdev, cookie, chan),
+	TP_STRUCT__entry(
+		WDEV_ENTRY
+		__field(u64, cookie)
+		CHAN_ENTRY
+	),
+	TP_fast_assign(
+		WDEV_ASSIGN;
+		__entry->cookie = cookie;
+		CHAN_ASSIGN(chan);
+	),
+	TP_printk(WDEV_PR_FMT ", cookie: %llu, " CHAN_PR_FMT,
+		  WDEV_PR_ARG, __entry->cookie, CHAN_PR_ARG)
+);
+
 TRACE_EVENT(cfg80211_new_sta,
 	TP_PROTO(struct net_device *netdev, const u8 *mac_addr,
 		 struct station_info *sinfo),
@@ -3161,104 +3363,101 @@
 		  WIPHY_PR_ARG, WDEV_PR_ARG)
 );
 
-TRACE_EVENT(rdev_start_radar_detection,
-	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
-		 struct cfg80211_chan_def *chandef,
-		 u32 cac_time_ms),
-	TP_ARGS(wiphy, netdev, chandef, cac_time_ms),
-	TP_STRUCT__entry(
-		WIPHY_ENTRY
-		NETDEV_ENTRY
-		CHAN_DEF_ENTRY
-		__field(u32, cac_time_ms)
-	),
-	TP_fast_assign(
-		WIPHY_ASSIGN;
-		NETDEV_ASSIGN;
-		CHAN_DEF_ASSIGN(chandef);
-		__entry->cac_time_ms = cac_time_ms;
-	),
-	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT
-		  ", cac_time_ms=%u",
-		  WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG,
-		  __entry->cac_time_ms)
-);
-
-TRACE_EVENT(rdev_set_mcast_rate,
-	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
-		 int *mcast_rate),
-	TP_ARGS(wiphy, netdev, mcast_rate),
-	TP_STRUCT__entry(
-		WIPHY_ENTRY
-		NETDEV_ENTRY
-		__array(int, mcast_rate, NUM_NL80211_BANDS)
-	),
-	TP_fast_assign(
-		WIPHY_ASSIGN;
-		NETDEV_ASSIGN;
-		memcpy(__entry->mcast_rate, mcast_rate,
-		       sizeof(int) * NUM_NL80211_BANDS);
-	),
-	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", "
-		  "mcast_rates [2.4GHz=0x%x, 5.2GHz=0x%x, 60GHz=0x%x]",
-		  WIPHY_PR_ARG, NETDEV_PR_ARG,
-		  __entry->mcast_rate[NL80211_BAND_2GHZ],
-		  __entry->mcast_rate[NL80211_BAND_5GHZ],
-		  __entry->mcast_rate[NL80211_BAND_60GHZ])
-);
-
-TRACE_EVENT(rdev_set_coalesce,
-	TP_PROTO(struct wiphy *wiphy, struct cfg80211_coalesce *coalesce),
-	TP_ARGS(wiphy, coalesce),
-	TP_STRUCT__entry(
-		WIPHY_ENTRY
-		__field(int, n_rules)
-	),
-	TP_fast_assign(
-		WIPHY_ASSIGN;
-		__entry->n_rules = coalesce ? coalesce->n_rules : 0;
-	),
-	TP_printk(WIPHY_PR_FMT ", n_rules=%d",
-		  WIPHY_PR_ARG, __entry->n_rules)
-);
-
-DEFINE_EVENT(wiphy_wdev_evt, rdev_abort_scan,
-	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
-	TP_ARGS(wiphy, wdev)
-);
-
-TRACE_EVENT(rdev_set_multicast_to_unicast,
-	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
-		 const bool enabled),
-	TP_ARGS(wiphy, netdev, enabled),
-	TP_STRUCT__entry(
-		WIPHY_ENTRY
-		NETDEV_ENTRY
-		__field(bool, enabled)
-	),
-	TP_fast_assign(
-		WIPHY_ASSIGN;
-		NETDEV_ASSIGN;
-		__entry->enabled = enabled;
-	),
-	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", unicast: %s",
-		  WIPHY_PR_ARG, NETDEV_PR_ARG,
-		  BOOL_TO_STR(__entry->enabled))
-);
-
-TRACE_EVENT(rdev_get_txq_stats,
-	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
-	TP_ARGS(wiphy, wdev),
+TRACE_EVENT(cfg80211_pmsr_report,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
+		 u64 cookie, const u8 *addr),
+	TP_ARGS(wiphy, wdev, cookie, addr),
 	TP_STRUCT__entry(
 		WIPHY_ENTRY
 		WDEV_ENTRY
+		__field(u64, cookie)
+		MAC_ENTRY(addr)
 	),
 	TP_fast_assign(
 		WIPHY_ASSIGN;
 		WDEV_ASSIGN;
+		__entry->cookie = cookie;
+		MAC_ASSIGN(addr, addr);
 	),
-	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG)
+	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie:%lld, " MAC_PR_FMT,
+		  WIPHY_PR_ARG, WDEV_PR_ARG,
+		  (unsigned long long)__entry->cookie,
+		  MAC_PR_ARG(addr))
 );
+
+TRACE_EVENT(cfg80211_pmsr_complete,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie),
+	TP_ARGS(wiphy, wdev, cookie),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		WDEV_ENTRY
+		__field(u64, cookie)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		WDEV_ASSIGN;
+		__entry->cookie = cookie;
+	),
+	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie:%lld",
+		  WIPHY_PR_ARG, WDEV_PR_ARG,
+		  (unsigned long long)__entry->cookie)
+);
+
+TRACE_EVENT(rdev_update_owe_info,
+	    TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		     struct cfg80211_update_owe_info *owe_info),
+	    TP_ARGS(wiphy, netdev, owe_info),
+	    TP_STRUCT__entry(WIPHY_ENTRY
+			     NETDEV_ENTRY
+			     MAC_ENTRY(peer)
+			     __field(u16, status)
+			     __dynamic_array(u8, ie, owe_info->ie_len)),
+	    TP_fast_assign(WIPHY_ASSIGN;
+			   NETDEV_ASSIGN;
+			   MAC_ASSIGN(peer, owe_info->peer);
+			   __entry->status = owe_info->status;
+			   memcpy(__get_dynamic_array(ie),
+				  owe_info->ie, owe_info->ie_len);),
+	    TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT
+		  " status %d", WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer),
+		  __entry->status)
+);
+
+TRACE_EVENT(cfg80211_update_owe_info_event,
+	    TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		     struct cfg80211_update_owe_info *owe_info),
+	    TP_ARGS(wiphy, netdev, owe_info),
+	    TP_STRUCT__entry(WIPHY_ENTRY
+			     NETDEV_ENTRY
+			     MAC_ENTRY(peer)
+			     __dynamic_array(u8, ie, owe_info->ie_len)),
+	    TP_fast_assign(WIPHY_ASSIGN;
+			   NETDEV_ASSIGN;
+			   MAC_ASSIGN(peer, owe_info->peer);
+			   memcpy(__get_dynamic_array(ie), owe_info->ie,
+				  owe_info->ie_len);),
+	    TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT,
+		      WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer))
+);
+
+TRACE_EVENT(rdev_probe_mesh_link,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		 const u8 *dest, const u8 *buf, size_t len),
+	TP_ARGS(wiphy, netdev, dest, buf, len),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		MAC_ENTRY(dest)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		MAC_ASSIGN(dest, dest);
+	),
+	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT,
+		  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(dest))
+);
+
 #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
 
 #undef TRACE_INCLUDE_PATH

diff --git a/net/wireless/util.c b/net/wireless/util.c
index aad1c8e..5b4ed5b 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c

@@ -5,17 +5,21 @@
  * Copyright 2007-2009	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright 2017	Intel Deutschland GmbH
+ * Copyright (C) 2018-2019 Intel Corporation
  */
 #include <linux/export.h>
 #include <linux/bitops.h>
 #include <linux/etherdevice.h>
 #include <linux/slab.h>
+#include <linux/ieee80211.h>
 #include <net/cfg80211.h>
 #include <net/ip.h>
 #include <net/dsfield.h>
 #include <linux/if_vlan.h>
 #include <linux/mpls.h>
 #include <linux/gcd.h>
+#include <linux/bitfield.h>
+#include <linux/nospec.h>
 #include "core.h"
 #include "rdev-ops.h"
 
@@ -87,8 +91,13 @@
 		else
 			return 5000 + chan * 5;
 		break;
+	case NL80211_BAND_6GHZ:
+		/* see 802.11ax D4.1 27.3.22.2 */
+		if (chan <= 253)
+			return 5940 + chan * 5;
+		break;
 	case NL80211_BAND_60GHZ:
-		if (chan < 5)
+		if (chan < 7)
 			return 56160 + chan * 2160;
 		break;
 	default:
@@ -107,9 +116,12 @@
 		return (freq - 2407) / 5;
 	else if (freq >= 4910 && freq <= 4980)
 		return (freq - 4000) / 5;
-	else if (freq <= 45000) /* DMG band lower limit */
+	else if (freq < 5945)
 		return (freq - 5000) / 5;
-	else if (freq >= 58320 && freq <= 64800)
+	else if (freq <= 45000) /* DMG band lower limit */
+		/* see 802.11ax D4.1 27.3.22.2 */
+		return (freq - 5940) / 5;
+	else if (freq >= 58320 && freq <= 70200)
 		return (freq - 56160) / 2160;
 	else
 		return 0;
@@ -144,6 +156,7 @@
 
 	switch (sband->band) {
 	case NL80211_BAND_5GHZ:
+	case NL80211_BAND_6GHZ:
 		want = 3;
 		for (i = 0; i < sband->n_bitrates; i++) {
 			if (sband->bitrates[i].bitrate == 60 ||
@@ -229,18 +242,32 @@
 
 	switch (params->cipher) {
 	case WLAN_CIPHER_SUITE_TKIP:
+		/* Extended Key ID can only be used with CCMP/GCMP ciphers */
+		if ((pairwise && key_idx) ||
+		    params->mode != NL80211_KEY_RX_TX)
+			return -EINVAL;
+		break;
 	case WLAN_CIPHER_SUITE_CCMP:
 	case WLAN_CIPHER_SUITE_CCMP_256:
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
-		/* Disallow pairwise keys with non-zero index unless it's WEP
-		 * or a vendor specific cipher (because current deployments use
-		 * pairwise WEP keys with non-zero indices and for vendor
-		 * specific ciphers this should be validated in the driver or
-		 * hardware level - but 802.11i clearly specifies to use zero)
+		/* IEEE802.11-2016 allows only 0 and - when supporting
+		 * Extended Key ID - 1 as index for pairwise keys.
+		 * @NL80211_KEY_NO_TX is only allowed for pairwise keys when
+		 * the driver supports Extended Key ID.
+		 * @NL80211_KEY_SET_TX can't be set when installing and
+		 * validating a key.
 		 */
-		if (pairwise && key_idx)
+		if ((params->mode == NL80211_KEY_NO_TX && !pairwise) ||
+		    params->mode == NL80211_KEY_SET_TX)
 			return -EINVAL;
+		if (wiphy_ext_feature_isset(&rdev->wiphy,
+					    NL80211_EXT_FEATURE_EXT_KEY_ID)) {
+			if (pairwise && (key_idx < 0 || key_idx > 1))
+				return -EINVAL;
+		} else if (pairwise && key_idx) {
+			return -EINVAL;
+		}
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
 	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
@@ -712,20 +739,25 @@
 {
 	unsigned int dscp;
 	unsigned char vlan_priority;
+	unsigned int ret;
 
 	/* skb->priority values from 256->263 are magic values to
 	 * directly indicate a specific 802.1d priority.  This is used
 	 * to allow 802.1d priority to be passed directly in from VLAN
 	 * tags, etc.
 	 */
-	if (skb->priority >= 256 && skb->priority <= 263)
-		return skb->priority - 256;
+	if (skb->priority >= 256 && skb->priority <= 263) {
+		ret = skb->priority - 256;
+		goto out;
+	}
 
 	if (skb_vlan_tag_present(skb)) {
 		vlan_priority = (skb_vlan_tag_get(skb) & VLAN_PRIO_MASK)
 			>> VLAN_PRIO_SHIFT;
-		if (vlan_priority > 0)
-			return vlan_priority;
+		if (vlan_priority > 0) {
+			ret = vlan_priority;
+			goto out;
+		}
 	}
 
 	switch (skb->protocol) {
@@ -744,8 +776,9 @@
 		if (!mpls)
 			return 0;
 
-		return (ntohl(mpls->entry) & MPLS_LS_TC_MASK)
+		ret = (ntohl(mpls->entry) & MPLS_LS_TC_MASK)
 			>> MPLS_LS_TC_SHIFT;
+		goto out;
 	}
 	case htons(ETH_P_80221):
 		/* 802.21 is always network control traffic */
@@ -758,22 +791,28 @@
 		unsigned int i, tmp_dscp = dscp >> 2;
 
 		for (i = 0; i < qos_map->num_des; i++) {
-			if (tmp_dscp == qos_map->dscp_exception[i].dscp)
-				return qos_map->dscp_exception[i].up;
+			if (tmp_dscp == qos_map->dscp_exception[i].dscp) {
+				ret = qos_map->dscp_exception[i].up;
+				goto out;
+			}
 		}
 
 		for (i = 0; i < 8; i++) {
 			if (tmp_dscp >= qos_map->up[i].low &&
-			    tmp_dscp <= qos_map->up[i].high)
-				return i;
+			    tmp_dscp <= qos_map->up[i].high) {
+				ret = i;
+				goto out;
+			}
 		}
 	}
 
-	return dscp >> 5;
+	ret = dscp >> 5;
+out:
+	return array_index_nospec(ret, IEEE80211_NUM_TIDS);
 }
 EXPORT_SYMBOL(cfg80211_classify8021d);
 
-const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie)
+const struct element *ieee80211_bss_get_elem(struct cfg80211_bss *bss, u8 id)
 {
 	const struct cfg80211_bss_ies *ies;
 
@@ -781,9 +820,9 @@
 	if (!ies)
 		return NULL;
 
-	return cfg80211_find_ie(ie, ies->data, ies->len);
+	return cfg80211_find_elem(id, ies->data, ies->len);
 }
-EXPORT_SYMBOL(ieee80211_bss_get_ie);
+EXPORT_SYMBOL(ieee80211_bss_get_elem);
 
 void cfg80211_upload_connect_keys(struct wireless_dev *wdev)
 {
@@ -930,6 +969,7 @@
 		}
 
 		cfg80211_process_rdev_events(rdev);
+		cfg80211_mlme_purge_registrations(dev->ieee80211_ptr);
 	}
 
 	err = rdev_change_virtual_intf(rdev, dev, ntype, params);
@@ -1009,7 +1049,7 @@
 	return (bitrate + 50000) / 100000;
 }
 
-static u32 cfg80211_calculate_bitrate_60g(struct rate_info *rate)
+static u32 cfg80211_calculate_bitrate_dmg(struct rate_info *rate)
 {
 	static const u32 __mcs2bitrate[] = {
 		/* control PHY */
@@ -1056,6 +1096,40 @@
 	return __mcs2bitrate[rate->mcs];
 }
 
+static u32 cfg80211_calculate_bitrate_edmg(struct rate_info *rate)
+{
+	static const u32 __mcs2bitrate[] = {
+		/* control PHY */
+		[0] =   275,
+		/* SC PHY */
+		[1] =  3850,
+		[2] =  7700,
+		[3] =  9625,
+		[4] = 11550,
+		[5] = 12512, /* 1251.25 mbps */
+		[6] = 13475,
+		[7] = 15400,
+		[8] = 19250,
+		[9] = 23100,
+		[10] = 25025,
+		[11] = 26950,
+		[12] = 30800,
+		[13] = 38500,
+		[14] = 46200,
+		[15] = 50050,
+		[16] = 53900,
+		[17] = 57750,
+		[18] = 69300,
+		[19] = 75075,
+		[20] = 80850,
+	};
+
+	if (WARN_ON_ONCE(rate->mcs >= ARRAY_SIZE(__mcs2bitrate)))
+		return 0;
+
+	return __mcs2bitrate[rate->mcs] * rate->n_bonded_ch;
+}
+
 static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
 {
 	static const u32 base[4][10] = {
@@ -1204,9 +1278,11 @@
 	else if (rate->bw == RATE_INFO_BW_HE_RU &&
 		 rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_26)
 		result = rates_26[rate->he_gi];
-	else if (WARN(1, "invalid HE MCS: bw:%d, ru:%d\n",
-		      rate->bw, rate->he_ru_alloc))
+	else {
+		WARN(1, "invalid HE MCS: bw:%d, ru:%d\n",
+		     rate->bw, rate->he_ru_alloc);
 		return 0;
+	}
 
 	/* now scale to the appropriate MCS */
 	tmp = result;
@@ -1219,15 +1295,17 @@
 	if (rate->he_dcm)
 		result /= 2;
 
-	return result;
+	return result / 10000;
 }
 
 u32 cfg80211_calculate_bitrate(struct rate_info *rate)
 {
 	if (rate->flags & RATE_INFO_FLAGS_MCS)
 		return cfg80211_calculate_bitrate_ht(rate);
-	if (rate->flags & RATE_INFO_FLAGS_60G)
-		return cfg80211_calculate_bitrate_60g(rate);
+	if (rate->flags & RATE_INFO_FLAGS_DMG)
+		return cfg80211_calculate_bitrate_dmg(rate);
+	if (rate->flags & RATE_INFO_FLAGS_EDMG)
+		return cfg80211_calculate_bitrate_edmg(rate);
 	if (rate->flags & RATE_INFO_FLAGS_VHT_MCS)
 		return cfg80211_calculate_bitrate_vht(rate);
 	if (rate->flags & RATE_INFO_FLAGS_HE_MCS)
@@ -1439,6 +1517,9 @@
 	case 128 ... 130:
 		*band = NL80211_BAND_5GHZ;
 		return true;
+	case 131 ... 135:
+		*band = NL80211_BAND_6GHZ;
+		return true;
 	case 81:
 	case 82:
 	case 83:
@@ -1478,7 +1559,8 @@
 	}
 
 	if (freq == 2484) {
-		if (chandef->width > NL80211_CHAN_WIDTH_40)
+		/* channel 14 is only for IEEE 802.11b */
+		if (chandef->width != NL80211_CHAN_WIDTH_20_NOHT)
 			return false;
 
 		*op_class = 82; /* channel 14 */
@@ -1570,7 +1652,7 @@
 	}
 
 	/* 56.16 GHz, channel 1..4 */
-	if (freq >= 56160 + 2160 * 1 && freq <= 56160 + 2160 * 4) {
+	if (freq >= 56160 + 2160 * 1 && freq <= 56160 + 2160 * 6) {
 		if (chandef->width >= NL80211_CHAN_WIDTH_40)
 			return false;
 
@@ -1670,7 +1752,7 @@
 	for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
 		num_interfaces += params->iftype_num[iftype];
 		if (params->iftype_num[iftype] > 0 &&
-		    !(wiphy->software_iftypes & BIT(iftype)))
+		    !cfg80211_iftype_allowed(wiphy, iftype, 0, 1))
 			used_iftypes |= BIT(iftype);
 	}
 
@@ -1692,7 +1774,7 @@
 			return -ENOMEM;
 
 		for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
-			if (wiphy->software_iftypes & BIT(iftype))
+			if (cfg80211_iftype_allowed(wiphy, iftype, 0, 1))
 				continue;
 			for (j = 0; j < c->n_limits; j++) {
 				all_iftypes |= limits[j].types;
@@ -1895,3 +1977,176 @@
 const unsigned char bridge_tunnel_header[] __aligned(2) =
 	{ 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 };
 EXPORT_SYMBOL(bridge_tunnel_header);
+
+/* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */
+struct iapp_layer2_update {
+	u8 da[ETH_ALEN];	/* broadcast */
+	u8 sa[ETH_ALEN];	/* STA addr */
+	__be16 len;		/* 6 */
+	u8 dsap;		/* 0 */
+	u8 ssap;		/* 0 */
+	u8 control;
+	u8 xid_info[3];
+} __packed;
+
+void cfg80211_send_layer2_update(struct net_device *dev, const u8 *addr)
+{
+	struct iapp_layer2_update *msg;
+	struct sk_buff *skb;
+
+	/* Send Level 2 Update Frame to update forwarding tables in layer 2
+	 * bridge devices */
+
+	skb = dev_alloc_skb(sizeof(*msg));
+	if (!skb)
+		return;
+	msg = skb_put(skb, sizeof(*msg));
+
+	/* 802.2 Type 1 Logical Link Control (LLC) Exchange Identifier (XID)
+	 * Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */
+
+	eth_broadcast_addr(msg->da);
+	ether_addr_copy(msg->sa, addr);
+	msg->len = htons(6);
+	msg->dsap = 0;
+	msg->ssap = 0x01;	/* NULL LSAP, CR Bit: Response */
+	msg->control = 0xaf;	/* XID response lsb.1111F101.
+				 * F=0 (no poll command; unsolicited frame) */
+	msg->xid_info[0] = 0x81;	/* XID format identifier */
+	msg->xid_info[1] = 1;	/* LLC types/classes: Type 1 LLC */
+	msg->xid_info[2] = 0;	/* XID sender's receive window size (RW) */
+
+	skb->dev = dev;
+	skb->protocol = eth_type_trans(skb, dev);
+	memset(skb->cb, 0, sizeof(skb->cb));
+	netif_rx_ni(skb);
+}
+EXPORT_SYMBOL(cfg80211_send_layer2_update);
+
+int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
+			      enum ieee80211_vht_chanwidth bw,
+			      int mcs, bool ext_nss_bw_capable)
+{
+	u16 map = le16_to_cpu(cap->supp_mcs.rx_mcs_map);
+	int max_vht_nss = 0;
+	int ext_nss_bw;
+	int supp_width;
+	int i, mcs_encoding;
+
+	if (map == 0xffff)
+		return 0;
+
+	if (WARN_ON(mcs > 9))
+		return 0;
+	if (mcs <= 7)
+		mcs_encoding = 0;
+	else if (mcs == 8)
+		mcs_encoding = 1;
+	else
+		mcs_encoding = 2;
+
+	/* find max_vht_nss for the given MCS */
+	for (i = 7; i >= 0; i--) {
+		int supp = (map >> (2 * i)) & 3;
+
+		if (supp == 3)
+			continue;
+
+		if (supp >= mcs_encoding) {
+			max_vht_nss = i + 1;
+			break;
+		}
+	}
+
+	if (!(cap->supp_mcs.tx_mcs_map &
+			cpu_to_le16(IEEE80211_VHT_EXT_NSS_BW_CAPABLE)))
+		return max_vht_nss;
+
+	ext_nss_bw = le32_get_bits(cap->vht_cap_info,
+				   IEEE80211_VHT_CAP_EXT_NSS_BW_MASK);
+	supp_width = le32_get_bits(cap->vht_cap_info,
+				   IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK);
+
+	/* if not capable, treat ext_nss_bw as 0 */
+	if (!ext_nss_bw_capable)
+		ext_nss_bw = 0;
+
+	/* This is invalid */
+	if (supp_width == 3)
+		return 0;
+
+	/* This is an invalid combination so pretend nothing is supported */
+	if (supp_width == 2 && (ext_nss_bw == 1 || ext_nss_bw == 2))
+		return 0;
+
+	/*
+	 * Cover all the special cases according to IEEE 802.11-2016
+	 * Table 9-250. All other cases are either factor of 1 or not
+	 * valid/supported.
+	 */
+	switch (bw) {
+	case IEEE80211_VHT_CHANWIDTH_USE_HT:
+	case IEEE80211_VHT_CHANWIDTH_80MHZ:
+		if ((supp_width == 1 || supp_width == 2) &&
+		    ext_nss_bw == 3)
+			return 2 * max_vht_nss;
+		break;
+	case IEEE80211_VHT_CHANWIDTH_160MHZ:
+		if (supp_width == 0 &&
+		    (ext_nss_bw == 1 || ext_nss_bw == 2))
+			return max_vht_nss / 2;
+		if (supp_width == 0 &&
+		    ext_nss_bw == 3)
+			return (3 * max_vht_nss) / 4;
+		if (supp_width == 1 &&
+		    ext_nss_bw == 3)
+			return 2 * max_vht_nss;
+		break;
+	case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
+		if (supp_width == 0 && ext_nss_bw == 1)
+			return 0; /* not possible */
+		if (supp_width == 0 &&
+		    ext_nss_bw == 2)
+			return max_vht_nss / 2;
+		if (supp_width == 0 &&
+		    ext_nss_bw == 3)
+			return (3 * max_vht_nss) / 4;
+		if (supp_width == 1 &&
+		    ext_nss_bw == 0)
+			return 0; /* not possible */
+		if (supp_width == 1 &&
+		    ext_nss_bw == 1)
+			return max_vht_nss / 2;
+		if (supp_width == 1 &&
+		    ext_nss_bw == 2)
+			return (3 * max_vht_nss) / 4;
+		break;
+	}
+
+	/* not covered or invalid combination received */
+	return max_vht_nss;
+}
+EXPORT_SYMBOL(ieee80211_get_vht_max_nss);
+
+bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype,
+			     bool is_4addr, u8 check_swif)
+
+{
+	bool is_vlan = iftype == NL80211_IFTYPE_AP_VLAN;
+
+	switch (check_swif) {
+	case 0:
+		if (is_vlan && is_4addr)
+			return wiphy->flags & WIPHY_FLAG_4ADDR_AP;
+		return wiphy->interface_modes & BIT(iftype);
+	case 1:
+		if (!(wiphy->software_iftypes & BIT(iftype)) && is_vlan)
+			return wiphy->flags & WIPHY_FLAG_4ADDR_AP;
+		return wiphy->software_iftypes & BIT(iftype);
+	default:
+		break;
+	}
+
+	return false;
+}
+EXPORT_SYMBOL(cfg80211_iftype_allowed);

diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 06943d9..cac9e28 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c

@@ -7,6 +7,7 @@
  * we directly assign the wireless handlers of wireless interfaces.
  *
  * Copyright 2008-2009	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright (C) 2019 Intel Corporation
  */
 
 #include <linux/export.h>
@@ -353,9 +354,6 @@
 		changed |= WIPHY_PARAM_RETRY_SHORT;
 	}
 
-	if (!changed)
-		return 0;
-
 	err = rdev_set_wiphy_params(rdev, changed);
 	if (err) {
 		wdev->wiphy->retry_short = oshort;
@@ -800,7 +798,7 @@
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
-	struct cfg80211_chan_def chandef;
+	struct cfg80211_chan_def chandef = {};
 	int ret;
 
 	switch (wdev->iftype) {
@@ -867,8 +865,8 @@
 			}
 		}
 	} else {
-		rfkill_set_sw_state(rdev->rfkill, true);
-		schedule_work(&rdev->rfkill_sync);
+		if (rfkill_set_sw_state(rdev->rfkill, true))
+			schedule_work(&rdev->rfkill_block);
 		return 0;
 	}
 
@@ -1337,6 +1335,7 @@
 			wstats.qual.qual = sig + 110;
 			break;
 		}
+		/* fall through */
 	case CFG80211_SIGNAL_TYPE_UNSPEC:
 		if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_SIGNAL)) {
 			wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED;
@@ -1345,6 +1344,7 @@
 			wstats.qual.qual = sinfo.signal;
 			break;
 		}
+		/* fall through */
 	default:
 		wstats.qual.updated |= IW_QUAL_LEVEL_INVALID;
 		wstats.qual.updated |= IW_QUAL_QUAL_INVALID;

diff --git a/net/wireless/wext-compat.h b/net/wireless/wext-compat.h
index 94c7405..8d3cc15 100644
--- a/net/wireless/wext-compat.h
+++ b/net/wireless/wext-compat.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 #ifndef __WEXT_COMPAT
 #define __WEXT_COMPAT
 

diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index c67d7a8..73fd0ea 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c

@@ -202,6 +202,7 @@
 			       struct iw_point *data, char *ssid)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	int ret = 0;
 
 	/* call only for station! */
 	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
@@ -219,7 +220,10 @@
 		if (ie) {
 			data->flags = 1;
 			data->length = ie[1];
-			memcpy(ssid, ie + 2, data->length);
+			if (data->length > IW_ESSID_MAX_SIZE)
+				ret = -EINVAL;
+			else
+				memcpy(ssid, ie + 2, data->length);
 		}
 		rcu_read_unlock();
 	} else if (wdev->wext.connect.ssid && wdev->wext.connect.ssid_len) {
@@ -229,7 +233,7 @@
 	}
 	wdev_unlock(wdev);
 
-	return 0;
+	return ret;
 }
 
 int cfg80211_mgd_wext_siwap(struct net_device *dev,

diff --git a/net/x25/Kconfig b/net/x25/Kconfig
index 59fcb41..2ecb2e5 100644
--- a/net/x25/Kconfig
+++ b/net/x25/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # CCITT X.25 Packet Layer
 #

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index d49aa79..6aee9f5 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	X.25 Packet Layer release 002
  *
@@ -7,12 +8,6 @@
  *
  *	This code REQUIRES 2.1.15 or higher
  *
- *	This module:
- *		This module is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  *	History
  *	X.25 001	Jonathan Naylor	Started coding.
  *	X.25 002	Jonathan Naylor	Centralised disconnect handling.
@@ -100,7 +95,7 @@
 	}
 
 	len = *skb->data;
-	needed = 1 + (len >> 4) + (len & 0x0f);
+	needed = 1 + ((len >> 4) + (len & 0x0f) + 1) / 2;
 
 	if (!pskb_may_pull(skb, needed)) {
 		/* packet is too short to hold the addresses it claims
@@ -288,7 +283,7 @@
 	sk_for_each(s, &x25_list)
 		if ((!strcmp(addr->x25_addr,
 			x25_sk(s)->source_addr.x25_addr) ||
-				!strcmp(addr->x25_addr,
+				!strcmp(x25_sk(s)->source_addr.x25_addr,
 					null_x25_address.x25_addr)) &&
 					s->sk_state == TCP_LISTEN) {
 			/*
@@ -352,17 +347,15 @@
 	unsigned int lci = 1;
 	struct sock *sk;
 
-	read_lock_bh(&x25_list_lock);
-
-	while ((sk = __x25_find_socket(lci, nb)) != NULL) {
+	while ((sk = x25_find_socket(lci, nb)) != NULL) {
 		sock_put(sk);
 		if (++lci == 4096) {
 			lci = 0;
 			break;
 		}
+		cond_resched();
 	}
 
-	read_unlock_bh(&x25_list_lock);
 	return lci;
 }
 
@@ -681,25 +674,32 @@
 	struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr;
 	int len, i, rc = 0;
 
-	if (!sock_flag(sk, SOCK_ZAPPED) ||
-	    addr_len != sizeof(struct sockaddr_x25) ||
+	if (addr_len != sizeof(struct sockaddr_x25) ||
 	    addr->sx25_family != AF_X25) {
 		rc = -EINVAL;
 		goto out;
 	}
 
-	len = strlen(addr->sx25_addr.x25_addr);
-	for (i = 0; i < len; i++) {
-		if (!isdigit(addr->sx25_addr.x25_addr[i])) {
-			rc = -EINVAL;
-			goto out;
+	/* check for the null_x25_address */
+	if (strcmp(addr->sx25_addr.x25_addr, null_x25_address.x25_addr)) {
+
+		len = strlen(addr->sx25_addr.x25_addr);
+		for (i = 0; i < len; i++) {
+			if (!isdigit(addr->sx25_addr.x25_addr[i])) {
+				rc = -EINVAL;
+				goto out;
+			}
 		}
 	}
 
 	lock_sock(sk);
-	x25_sk(sk)->source_addr = addr->sx25_addr;
-	x25_insert_socket(sk);
-	sock_reset_flag(sk, SOCK_ZAPPED);
+	if (sock_flag(sk, SOCK_ZAPPED)) {
+		x25_sk(sk)->source_addr = addr->sx25_addr;
+		x25_insert_socket(sk);
+		sock_reset_flag(sk, SOCK_ZAPPED);
+	} else {
+		rc = -EINVAL;
+	}
 	release_sock(sk);
 	SOCK_DEBUG(sk, "x25_bind: socket is bound\n");
 out:
@@ -815,8 +815,13 @@
 	sock->state = SS_CONNECTED;
 	rc = 0;
 out_put_neigh:
-	if (rc)
+	if (rc) {
+		read_lock_bh(&x25_list_lock);
 		x25_neigh_put(x25->neighbour);
+		x25->neighbour = NULL;
+		read_unlock_bh(&x25_list_lock);
+		x25->state = X25_STATE_0;
+	}
 out_put_route:
 	x25_route_put(rt);
 out:
@@ -1388,18 +1393,6 @@
 		break;
 	}
 
-	case SIOCGSTAMP:
-		rc = -EINVAL;
-		if (sk)
-			rc = sock_get_timestamp(sk,
-						(struct timeval __user *)argp);
-		break;
-	case SIOCGSTAMPNS:
-		rc = -EINVAL;
-		if (sk)
-			rc = sock_get_timestampns(sk,
-					(struct timespec __user *)argp);
-		break;
 	case SIOCGIFADDR:
 	case SIOCSIFADDR:
 	case SIOCGIFDSTADDR:
@@ -1671,8 +1664,6 @@
 				unsigned long arg)
 {
 	void __user *argp = compat_ptr(arg);
-	struct sock *sk = sock->sk;
-
 	int rc = -ENOIOCTLCMD;
 
 	switch(cmd) {
@@ -1680,18 +1671,6 @@
 	case TIOCINQ:
 		rc = x25_ioctl(sock, cmd, (unsigned long)argp);
 		break;
-	case SIOCGSTAMP:
-		rc = -EINVAL;
-		if (sk)
-			rc = compat_sock_get_timestamp(sk,
-					(struct timeval __user*)argp);
-		break;
-	case SIOCGSTAMPNS:
-		rc = -EINVAL;
-		if (sk)
-			rc = compat_sock_get_timestampns(sk,
-					(struct timespec __user*)argp);
-		break;
 	case SIOCGIFADDR:
 	case SIOCSIFADDR:
 	case SIOCGIFDSTADDR:
@@ -1755,6 +1734,7 @@
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = compat_x25_ioctl,
 #endif
+	.gettstamp =	sock_gettstamp,
 	.listen =	x25_listen,
 	.shutdown =	sock_no_shutdown,
 	.setsockopt =	x25_setsockopt,

diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index 3923123..00e7823 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	X.25 Packet Layer release 002
  *
@@ -6,12 +7,6 @@
  *
  *	This code REQUIRES 2.1.15 or higher
  *
- *	This module:
- *		This module is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  *	History
  *	X.25 001	Jonathan Naylor	Started coding.
  *      2000-09-04	Henner Eisen	Prevent freeing a dangling skb.
@@ -60,7 +55,7 @@
 		if (!sock_owned_by_user(sk)) {
 			queued = x25_process_rx_frame(sk, skb);
 		} else {
-			queued = !sk_add_backlog(sk, skb, sk->sk_rcvbuf);
+			queued = !sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf));
 		}
 		bh_unlock_sock(sk);
 		sock_put(sk);

diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index ad1734d..7fb3276 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	X.25 Packet Layer release 002
  *
@@ -7,12 +8,6 @@
  *
  *	This code REQUIRES 2.1.15 or higher
  *
- *	This module:
- *		This module is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  *	History
  *	X.25 001	Split from x25_subr.c
  *	mar/20/00	Daniela Squassoni Disabling/enabling of facilities

diff --git a/net/x25/x25_forward.c b/net/x25/x25_forward.c
index cf561f1..c829999 100644
--- a/net/x25/x25_forward.c
+++ b/net/x25/x25_forward.c

@@ -1,10 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- *	This module:
- *		This module is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  *	History
  *	03-01-2007	Added forwarding for x.25	Andrew Hendry
  */

diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 3c12cae..f97c433 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	X.25 Packet Layer release 002
  *
@@ -7,12 +8,6 @@
  *
  *	This code REQUIRES 2.1.15 or higher
  *
- *	This module:
- *		This module is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  *	History
  *	X.25 001	Jonathan Naylor	  Started coding.
  *	X.25 002	Jonathan Naylor	  Centralised disconnection code.
@@ -142,6 +137,15 @@
 			sk->sk_state_change(sk);
 		break;
 	}
+	case X25_CALL_REQUEST:
+		/* call collision */
+		x25->causediag.cause      = 0x01;
+		x25->causediag.diagnostic = 0x48;
+
+		x25_write_internal(sk, X25_CLEAR_REQUEST);
+		x25_disconnect(sk, EISCONN, 0x01, 0x48);
+		break;
+
 	case X25_CLEAR_REQUEST:
 		if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2))
 			goto out_clear;

diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
index a6a8ab0..7d02532 100644
--- a/net/x25/x25_link.c
+++ b/net/x25/x25_link.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	X.25 Packet Layer release 002
  *
@@ -7,12 +8,6 @@
  *
  *	This code REQUIRES 2.1.15 or higher
  *
- *	This module:
- *		This module is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  *	History
  *	X.25 001	Jonathan Naylor	  Started coding.
  *	X.25 002	Jonathan Naylor	  New timer architecture.

diff --git a/net/x25/x25_out.c b/net/x25/x25_out.c
index 0144271..dbc0940 100644
--- a/net/x25/x25_out.c
+++ b/net/x25/x25_out.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	X.25 Packet Layer release 002
  *
@@ -7,12 +8,6 @@
  *
  *	This code REQUIRES 2.1.15 or higher
  *
- *	This module:
- *		This module is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  *	History
  *	X.25 001	Jonathan Naylor	Started coding.
  *	X.25 002	Jonathan Naylor	New timer architecture.

diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c
index da52c9d..3bddcbd 100644
--- a/net/x25/x25_proc.c
+++ b/net/x25/x25_proc.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	X.25 Packet Layer release 002
  *
@@ -7,12 +8,6 @@
  *
  *	This code REQUIRES 2.4 with seq_file support
  *
- *	This module:
- *		This module is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  *	History
  *	2002/10/06	Arnaldo Carvalho de Melo  seq_file support
  */

diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c
index b85b889..b8e94d5 100644
--- a/net/x25/x25_route.c
+++ b/net/x25/x25_route.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	X.25 Packet Layer release 002
  *
@@ -7,12 +8,6 @@
  *
  *	This code REQUIRES 2.1.15 or higher
  *
- *	This module:
- *		This module is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  *	History
  *	X.25 001	Jonathan Naylor	Started coding.
  */

diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c
index 7431037..8aa415a 100644
--- a/net/x25/x25_subr.c
+++ b/net/x25/x25_subr.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	X.25 Packet Layer release 002
  *
@@ -7,12 +8,6 @@
  *
  *	This code REQUIRES 2.1.15 or higher
  *
- *	This module:
- *		This module is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  *	History
  *	X.25 001	Jonathan Naylor	  Started coding.
  *	X.25 002	Jonathan Naylor	  Centralised disconnection processing.

diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c
index fa34610..9376365 100644
--- a/net/x25/x25_timer.c
+++ b/net/x25/x25_timer.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	X.25 Packet Layer release 002
  *
@@ -7,12 +8,6 @@
  *
  *	This code REQUIRES 2.1.15 or higher
  *
- *	This module:
- *		This module is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
  *	History
  *	X.25 001	Jonathan Naylor	Started coding.
  *	X.25 002	Jonathan Naylor	New timer architecture.

diff --git a/net/xdp/Kconfig b/net/xdp/Kconfig
index 90e4a71..71af2fe 100644
--- a/net/xdp/Kconfig
+++ b/net/xdp/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config XDP_SOCKETS
 	bool "XDP sockets"
 	depends on BPF_SYSCALL
@@ -5,3 +6,11 @@
 	help
 	  XDP sockets allows a channel between XDP programs and
 	  userspace applications.
+
+config XDP_SOCKETS_DIAG
+	tristate "XDP sockets: monitoring interface"
+	depends on XDP_SOCKETS
+	default n
+	help
+	  Support for PF_XDP sockets monitoring interface used by the ss tool.
+	  If unsure, say Y.

diff --git a/net/xdp/Makefile b/net/xdp/Makefile
index 04f0731..71e2bda 100644
--- a/net/xdp/Makefile
+++ b/net/xdp/Makefile

@@ -1 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o
+obj-$(CONFIG_XDP_SOCKETS_DIAG) += xsk_diag.o

diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index bfe2dbe..3049af2 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c

@@ -13,16 +13,23 @@
 #include <linux/mm.h>
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
+#include <linux/idr.h>
+#include <linux/vmalloc.h>
 
 #include "xdp_umem.h"
 #include "xsk_queue.h"
 
 #define XDP_UMEM_MIN_CHUNK_SIZE 2048
 
+static DEFINE_IDA(umem_ida);
+
 void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
 {
 	unsigned long flags;
 
+	if (!xs->tx)
+		return;
+
 	spin_lock_irqsave(&umem->xsk_list_lock, flags);
 	list_add_rcu(&xs->list, &umem->xsk_list);
 	spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
@@ -32,37 +39,62 @@
 {
 	unsigned long flags;
 
-	if (xs->dev) {
-		spin_lock_irqsave(&umem->xsk_list_lock, flags);
-		list_del_rcu(&xs->list);
-		spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
+	if (!xs->tx)
+		return;
 
-		if (umem->zc)
-			synchronize_net();
-	}
+	spin_lock_irqsave(&umem->xsk_list_lock, flags);
+	list_del_rcu(&xs->list);
+	spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
 }
 
-int xdp_umem_query(struct net_device *dev, u16 queue_id)
+/* The umem is stored both in the _rx struct and the _tx struct as we do
+ * not know if the device has more tx queues than rx, or the opposite.
+ * This might also change during run time.
+ */
+static int xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem,
+			       u16 queue_id)
 {
-	struct netdev_bpf bpf;
+	if (queue_id >= max_t(unsigned int,
+			      dev->real_num_rx_queues,
+			      dev->real_num_tx_queues))
+		return -EINVAL;
 
-	ASSERT_RTNL();
+	if (queue_id < dev->real_num_rx_queues)
+		dev->_rx[queue_id].umem = umem;
+	if (queue_id < dev->real_num_tx_queues)
+		dev->_tx[queue_id].umem = umem;
 
-	memset(&bpf, 0, sizeof(bpf));
-	bpf.command = XDP_QUERY_XSK_UMEM;
-	bpf.xsk.queue_id = queue_id;
+	return 0;
+}
 
-	if (!dev->netdev_ops->ndo_bpf)
-		return 0;
-	return dev->netdev_ops->ndo_bpf(dev, &bpf) ?: !!bpf.xsk.umem;
+struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
+				       u16 queue_id)
+{
+	if (queue_id < dev->real_num_rx_queues)
+		return dev->_rx[queue_id].umem;
+	if (queue_id < dev->real_num_tx_queues)
+		return dev->_tx[queue_id].umem;
+
+	return NULL;
+}
+EXPORT_SYMBOL(xdp_get_umem_from_qid);
+
+static void xdp_clear_umem_at_qid(struct net_device *dev, u16 queue_id)
+{
+	if (queue_id < dev->real_num_rx_queues)
+		dev->_rx[queue_id].umem = NULL;
+	if (queue_id < dev->real_num_tx_queues)
+		dev->_tx[queue_id].umem = NULL;
 }
 
 int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
-			u32 queue_id, u16 flags)
+			u16 queue_id, u16 flags)
 {
 	bool force_zc, force_copy;
 	struct netdev_bpf bpf;
-	int err;
+	int err = 0;
+
+	ASSERT_RTNL();
 
 	force_zc = flags & XDP_ZEROCOPY;
 	force_copy = flags & XDP_COPY;
@@ -70,19 +102,34 @@
 	if (force_zc && force_copy)
 		return -EINVAL;
 
+	if (xdp_get_umem_from_qid(dev, queue_id))
+		return -EBUSY;
+
+	err = xdp_reg_umem_at_qid(dev, umem, queue_id);
+	if (err)
+		return err;
+
+	umem->dev = dev;
+	umem->queue_id = queue_id;
+
+	if (flags & XDP_USE_NEED_WAKEUP) {
+		umem->flags |= XDP_UMEM_USES_NEED_WAKEUP;
+		/* Tx needs to be explicitly woken up the first time.
+		 * Also for supporting drivers that do not implement this
+		 * feature. They will always have to call sendto().
+		 */
+		xsk_set_tx_need_wakeup(umem);
+	}
+
+	dev_hold(dev);
+
 	if (force_copy)
+		/* For copy-mode, we are done. */
 		return 0;
 
-	if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_async_xmit)
-		return force_zc ? -EOPNOTSUPP : 0; /* fail or fallback */
-
-	bpf.command = XDP_QUERY_XSK_UMEM;
-
-	rtnl_lock();
-	err = xdp_umem_query(dev, queue_id);
-	if (err) {
-		err = err < 0 ? -EOPNOTSUPP : -EBUSY;
-		goto err_rtnl_unlock;
+	if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_wakeup) {
+		err = -EOPNOTSUPP;
+		goto err_unreg_umem;
 	}
 
 	bpf.command = XDP_SETUP_XSK_UMEM;
@@ -91,52 +138,81 @@
 
 	err = dev->netdev_ops->ndo_bpf(dev, &bpf);
 	if (err)
-		goto err_rtnl_unlock;
-	rtnl_unlock();
+		goto err_unreg_umem;
 
-	dev_hold(dev);
-	umem->dev = dev;
-	umem->queue_id = queue_id;
 	umem->zc = true;
 	return 0;
 
-err_rtnl_unlock:
-	rtnl_unlock();
-	return force_zc ? err : 0; /* fail or fallback */
+err_unreg_umem:
+	if (!force_zc)
+		err = 0; /* fallback to copy mode */
+	if (err)
+		xdp_clear_umem_at_qid(dev, queue_id);
+	return err;
 }
 
-static void xdp_umem_clear_dev(struct xdp_umem *umem)
+void xdp_umem_clear_dev(struct xdp_umem *umem)
 {
 	struct netdev_bpf bpf;
 	int err;
 
-	if (umem->dev) {
+	ASSERT_RTNL();
+
+	if (!umem->dev)
+		return;
+
+	if (umem->zc) {
 		bpf.command = XDP_SETUP_XSK_UMEM;
 		bpf.xsk.umem = NULL;
 		bpf.xsk.queue_id = umem->queue_id;
 
-		rtnl_lock();
 		err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf);
-		rtnl_unlock();
 
 		if (err)
 			WARN(1, "failed to disable umem!\n");
-
-		dev_put(umem->dev);
-		umem->dev = NULL;
 	}
+
+	xdp_clear_umem_at_qid(umem->dev, umem->queue_id);
+
+	dev_put(umem->dev);
+	umem->dev = NULL;
+	umem->zc = false;
+}
+
+static void xdp_umem_unmap_pages(struct xdp_umem *umem)
+{
+	unsigned int i;
+
+	for (i = 0; i < umem->npgs; i++)
+		if (PageHighMem(umem->pgs[i]))
+			vunmap(umem->pages[i].addr);
+}
+
+static int xdp_umem_map_pages(struct xdp_umem *umem)
+{
+	unsigned int i;
+	void *addr;
+
+	for (i = 0; i < umem->npgs; i++) {
+		if (PageHighMem(umem->pgs[i]))
+			addr = vmap(&umem->pgs[i], 1, VM_MAP, PAGE_KERNEL);
+		else
+			addr = page_address(umem->pgs[i]);
+
+		if (!addr) {
+			xdp_umem_unmap_pages(umem);
+			return -ENOMEM;
+		}
+
+		umem->pages[i].addr = addr;
+	}
+
+	return 0;
 }
 
 static void xdp_umem_unpin_pages(struct xdp_umem *umem)
 {
-	unsigned int i;
-
-	for (i = 0; i < umem->npgs; i++) {
-		struct page *page = umem->pgs[i];
-
-		set_page_dirty_lock(page);
-		put_page(page);
-	}
+	put_user_pages_dirty_lock(umem->pgs, umem->npgs, true);
 
 	kfree(umem->pgs);
 	umem->pgs = NULL;
@@ -152,10 +228,11 @@
 
 static void xdp_umem_release(struct xdp_umem *umem)
 {
-	struct task_struct *task;
-	struct mm_struct *mm;
-
+	rtnl_lock();
 	xdp_umem_clear_dev(umem);
+	rtnl_unlock();
+
+	ida_simple_remove(&umem_ida, umem->id);
 
 	if (umem->fq) {
 		xskq_destroy(umem->fq);
@@ -167,23 +244,15 @@
 		umem->cq = NULL;
 	}
 
+	xsk_reuseq_destroy(umem);
+
+	xdp_umem_unmap_pages(umem);
 	xdp_umem_unpin_pages(umem);
 
-	task = get_pid_task(umem->pid, PIDTYPE_PID);
-	put_pid(umem->pid);
-	if (!task)
-		goto out;
-	mm = get_task_mm(task);
-	put_task_struct(task);
-	if (!mm)
-		goto out;
-
-	mmput(mm);
 	kfree(umem->pages);
 	umem->pages = NULL;
 
 	xdp_umem_unaccount_pages(umem);
-out:
 	kfree(umem);
 }
 
@@ -221,10 +290,10 @@
 	if (!umem->pgs)
 		return -ENOMEM;
 
-	down_write(&current->mm->mmap_sem);
+	down_read(&current->mm->mmap_sem);
 	npgs = get_user_pages(umem->address, umem->npgs,
-			      gup_flags, &umem->pgs[0], NULL);
-	up_write(&current->mm->mmap_sem);
+			      gup_flags | FOLL_LONGTERM, &umem->pgs[0], NULL);
+	up_read(&current->mm->mmap_sem);
 
 	if (npgs != umem->npgs) {
 		if (npgs >= 0) {
@@ -270,10 +339,11 @@
 
 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 {
+	bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
 	u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
 	unsigned int chunks, chunks_per_page;
 	u64 addr = mr->addr, size = mr->len;
-	int size_chk, err, i;
+	int size_chk, err;
 
 	if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
 		/* Strictly speaking we could support this, if:
@@ -285,7 +355,11 @@
 		return -EINVAL;
 	}
 
-	if (!is_power_of_2(chunk_size))
+	if (mr->flags & ~(XDP_UMEM_UNALIGNED_CHUNK_FLAG |
+			XDP_UMEM_USES_NEED_WAKEUP))
+		return -EINVAL;
+
+	if (!unaligned_chunks && !is_power_of_2(chunk_size))
 		return -EINVAL;
 
 	if (!PAGE_ALIGNED(addr)) {
@@ -302,25 +376,26 @@
 	if (chunks == 0)
 		return -EINVAL;
 
-	chunks_per_page = PAGE_SIZE / chunk_size;
-	if (chunks < chunks_per_page || chunks % chunks_per_page)
-		return -EINVAL;
-
-	headroom = ALIGN(headroom, 64);
+	if (!unaligned_chunks) {
+		chunks_per_page = PAGE_SIZE / chunk_size;
+		if (chunks < chunks_per_page || chunks % chunks_per_page)
+			return -EINVAL;
+	}
 
 	size_chk = chunk_size - headroom - XDP_PACKET_HEADROOM;
 	if (size_chk < 0)
 		return -EINVAL;
 
-	umem->pid = get_task_pid(current, PIDTYPE_PID);
 	umem->address = (unsigned long)addr;
-	umem->props.chunk_mask = ~((u64)chunk_size - 1);
-	umem->props.size = size;
+	umem->chunk_mask = unaligned_chunks ? XSK_UNALIGNED_BUF_ADDR_MASK
+					    : ~((u64)chunk_size - 1);
+	umem->size = size;
 	umem->headroom = headroom;
 	umem->chunk_size_nohr = chunk_size - headroom;
 	umem->npgs = size / PAGE_SIZE;
 	umem->pgs = NULL;
 	umem->user = NULL;
+	umem->flags = mr->flags;
 	INIT_LIST_HEAD(&umem->xsk_list);
 	spin_lock_init(&umem->xsk_list_lock);
 
@@ -328,7 +403,7 @@
 
 	err = xdp_umem_account_pages(umem);
 	if (err)
-		goto out;
+		return err;
 
 	err = xdp_umem_pin_pages(umem);
 	if (err)
@@ -337,18 +412,19 @@
 	umem->pages = kcalloc(umem->npgs, sizeof(*umem->pages), GFP_KERNEL);
 	if (!umem->pages) {
 		err = -ENOMEM;
-		goto out_account;
+		goto out_pin;
 	}
 
-	for (i = 0; i < umem->npgs; i++)
-		umem->pages[i].addr = page_address(umem->pgs[i]);
+	err = xdp_umem_map_pages(umem);
+	if (!err)
+		return 0;
 
-	return 0;
+	kfree(umem->pages);
 
+out_pin:
+	xdp_umem_unpin_pages(umem);
 out_account:
 	xdp_umem_unaccount_pages(umem);
-out:
-	put_pid(umem->pid);
 	return err;
 }
 
@@ -361,8 +437,16 @@
 	if (!umem)
 		return ERR_PTR(-ENOMEM);
 
+	err = ida_simple_get(&umem_ida, 0, 0, GFP_KERNEL);
+	if (err < 0) {
+		kfree(umem);
+		return ERR_PTR(err);
+	}
+	umem->id = err;
+
 	err = xdp_umem_reg(umem, mr);
 	if (err) {
+		ida_simple_remove(&umem_ida, umem->id);
 		kfree(umem);
 		return ERR_PTR(err);
 	}

diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h
index f115603..a63a9fb 100644
--- a/net/xdp/xdp_umem.h
+++ b/net/xdp/xdp_umem.h

@@ -8,18 +8,9 @@
 
 #include <net/xdp_sock.h>
 
-static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
-{
-	return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1));
-}
-
-static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
-{
-	return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1));
-}
-
 int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
-			u32 queue_id, u16 flags);
+			u16 queue_id, u16 flags);
+void xdp_umem_clear_dev(struct xdp_umem *umem);
 bool xdp_umem_validate_queues(struct xdp_umem *umem);
 void xdp_get_umem(struct xdp_umem *umem);
 void xdp_put_umem(struct xdp_umem *umem);

diff --git a/net/xdp/xdp_umem_props.h b/net/xdp/xdp_umem_props.h
deleted file mode 100644
index 40eab10..0000000
--- a/net/xdp/xdp_umem_props.h
+++ /dev/null

@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* XDP user-space packet buffer
- * Copyright(c) 2018 Intel Corporation.
- */
-
-#ifndef XDP_UMEM_PROPS_H_
-#define XDP_UMEM_PROPS_H_
-
-struct xdp_umem_props {
-	u64 chunk_mask;
-	u64 size;
-};
-
-#endif /* XDP_UMEM_PROPS_H_ */

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 6615040..9044073 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c

@@ -27,23 +27,25 @@
 
 #include "xsk_queue.h"
 #include "xdp_umem.h"
+#include "xsk.h"
 
 #define TX_BATCH_SIZE 16
 
-static struct xdp_sock *xdp_sk(struct sock *sk)
-{
-	return (struct xdp_sock *)sk;
-}
-
 bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
 {
 	return READ_ONCE(xs->rx) &&  READ_ONCE(xs->umem) &&
 		READ_ONCE(xs->umem->fq);
 }
 
+bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt)
+{
+	return xskq_has_addrs(umem->fq, cnt);
+}
+EXPORT_SYMBOL(xsk_umem_has_addrs);
+
 u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
 {
-	return xskq_peek_addr(umem->fq, addr);
+	return xskq_peek_addr(umem->fq, addr, umem);
 }
 EXPORT_SYMBOL(xsk_umem_peek_addr);
 
@@ -53,22 +55,116 @@
 }
 EXPORT_SYMBOL(xsk_umem_discard_addr);
 
+void xsk_set_rx_need_wakeup(struct xdp_umem *umem)
+{
+	if (umem->need_wakeup & XDP_WAKEUP_RX)
+		return;
+
+	umem->fq->ring->flags |= XDP_RING_NEED_WAKEUP;
+	umem->need_wakeup |= XDP_WAKEUP_RX;
+}
+EXPORT_SYMBOL(xsk_set_rx_need_wakeup);
+
+void xsk_set_tx_need_wakeup(struct xdp_umem *umem)
+{
+	struct xdp_sock *xs;
+
+	if (umem->need_wakeup & XDP_WAKEUP_TX)
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+		xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP;
+	}
+	rcu_read_unlock();
+
+	umem->need_wakeup |= XDP_WAKEUP_TX;
+}
+EXPORT_SYMBOL(xsk_set_tx_need_wakeup);
+
+void xsk_clear_rx_need_wakeup(struct xdp_umem *umem)
+{
+	if (!(umem->need_wakeup & XDP_WAKEUP_RX))
+		return;
+
+	umem->fq->ring->flags &= ~XDP_RING_NEED_WAKEUP;
+	umem->need_wakeup &= ~XDP_WAKEUP_RX;
+}
+EXPORT_SYMBOL(xsk_clear_rx_need_wakeup);
+
+void xsk_clear_tx_need_wakeup(struct xdp_umem *umem)
+{
+	struct xdp_sock *xs;
+
+	if (!(umem->need_wakeup & XDP_WAKEUP_TX))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+		xs->tx->ring->flags &= ~XDP_RING_NEED_WAKEUP;
+	}
+	rcu_read_unlock();
+
+	umem->need_wakeup &= ~XDP_WAKEUP_TX;
+}
+EXPORT_SYMBOL(xsk_clear_tx_need_wakeup);
+
+bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
+{
+	return umem->flags & XDP_UMEM_USES_NEED_WAKEUP;
+}
+EXPORT_SYMBOL(xsk_umem_uses_need_wakeup);
+
+/* If a buffer crosses a page boundary, we need to do 2 memcpy's, one for
+ * each page. This is only required in copy mode.
+ */
+static void __xsk_rcv_memcpy(struct xdp_umem *umem, u64 addr, void *from_buf,
+			     u32 len, u32 metalen)
+{
+	void *to_buf = xdp_umem_get_data(umem, addr);
+
+	addr = xsk_umem_add_offset_to_addr(addr);
+	if (xskq_crosses_non_contig_pg(umem, addr, len + metalen)) {
+		void *next_pg_addr = umem->pages[(addr >> PAGE_SHIFT) + 1].addr;
+		u64 page_start = addr & ~(PAGE_SIZE - 1);
+		u64 first_len = PAGE_SIZE - (addr - page_start);
+
+		memcpy(to_buf, from_buf, first_len + metalen);
+		memcpy(next_pg_addr, from_buf + first_len, len - first_len);
+
+		return;
+	}
+
+	memcpy(to_buf, from_buf, len + metalen);
+}
+
 static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
 {
-	void *buffer;
-	u64 addr;
+	u64 offset = xs->umem->headroom;
+	u64 addr, memcpy_addr;
+	void *from_buf;
+	u32 metalen;
 	int err;
 
-	if (!xskq_peek_addr(xs->umem->fq, &addr) ||
-	    len > xs->umem->chunk_size_nohr) {
+	if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) ||
+	    len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
 		xs->rx_dropped++;
 		return -ENOSPC;
 	}
 
-	addr += xs->umem->headroom;
+	if (unlikely(xdp_data_meta_unsupported(xdp))) {
+		from_buf = xdp->data;
+		metalen = 0;
+	} else {
+		from_buf = xdp->data_meta;
+		metalen = xdp->data - xdp->data_meta;
+	}
 
-	buffer = xdp_umem_get_data(xs->umem, addr);
-	memcpy(buffer, xdp->data, len);
+	memcpy_addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
+	__xsk_rcv_memcpy(xs->umem, memcpy_addr, from_buf, len, metalen);
+
+	offset += metalen;
+	addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
 	err = xskq_produce_batch_desc(xs->rx, addr, len);
 	if (!err) {
 		xskq_discard_addr(xs->umem->fq);
@@ -90,10 +186,23 @@
 	return err;
 }
 
+static bool xsk_is_bound(struct xdp_sock *xs)
+{
+	if (READ_ONCE(xs->state) == XSK_BOUND) {
+		/* Matches smp_wmb() in bind(). */
+		smp_rmb();
+		return true;
+	}
+	return false;
+}
+
 int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
 	u32 len;
 
+	if (!xsk_is_bound(xs))
+		return -EINVAL;
+
 	if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
 		return -EINVAL;
 
@@ -111,32 +220,47 @@
 
 int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
+	u32 metalen = xdp->data - xdp->data_meta;
 	u32 len = xdp->data_end - xdp->data;
+	u64 offset = xs->umem->headroom;
 	void *buffer;
 	u64 addr;
 	int err;
 
-	if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
-		return -EINVAL;
+	spin_lock_bh(&xs->rx_lock);
 
-	if (!xskq_peek_addr(xs->umem->fq, &addr) ||
-	    len > xs->umem->chunk_size_nohr) {
-		xs->rx_dropped++;
-		return -ENOSPC;
+	if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) {
+		err = -EINVAL;
+		goto out_unlock;
 	}
 
-	addr += xs->umem->headroom;
+	if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) ||
+	    len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
+		err = -ENOSPC;
+		goto out_drop;
+	}
 
+	addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
 	buffer = xdp_umem_get_data(xs->umem, addr);
-	memcpy(buffer, xdp->data, len);
-	err = xskq_produce_batch_desc(xs->rx, addr, len);
-	if (!err) {
-		xskq_discard_addr(xs->umem->fq);
-		xsk_flush(xs);
-		return 0;
-	}
+	memcpy(buffer, xdp->data_meta, len + metalen);
 
+	addr = xsk_umem_adjust_offset(xs->umem, addr, metalen);
+	err = xskq_produce_batch_desc(xs->rx, addr, len);
+	if (err)
+		goto out_drop;
+
+	xskq_discard_addr(xs->umem->fq);
+	xskq_produce_flush_desc(xs->rx);
+
+	spin_unlock_bh(&xs->rx_lock);
+
+	xs->sk.sk_data_ready(&xs->sk);
+	return 0;
+
+out_drop:
 	xs->rx_dropped++;
+out_unlock:
+	spin_unlock_bh(&xs->rx_lock);
 	return err;
 }
 
@@ -158,22 +282,18 @@
 }
 EXPORT_SYMBOL(xsk_umem_consume_tx_done);
 
-bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len)
+bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)
 {
-	struct xdp_desc desc;
 	struct xdp_sock *xs;
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
-		if (!xskq_peek_desc(xs->tx, &desc))
+		if (!xskq_peek_desc(xs->tx, desc, umem))
 			continue;
 
-		if (xskq_produce_addr_lazy(umem->cq, desc.addr))
+		if (xskq_produce_addr_lazy(umem->cq, desc->addr))
 			goto out;
 
-		*dma = xdp_umem_get_dma(umem, desc.addr);
-		*len = desc.len;
-
 		xskq_discard_desc(xs->tx);
 		rcu_read_unlock();
 		return true;
@@ -185,12 +305,12 @@
 }
 EXPORT_SYMBOL(xsk_umem_consume_tx);
 
-static int xsk_zc_xmit(struct sock *sk)
+static int xsk_zc_xmit(struct xdp_sock *xs)
 {
-	struct xdp_sock *xs = xdp_sk(sk);
 	struct net_device *dev = xs->dev;
 
-	return dev->netdev_ops->ndo_xsk_async_xmit(dev, xs->queue_id);
+	return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
+					       XDP_WAKEUP_TX);
 }
 
 static void xsk_destruct_skb(struct sk_buff *skb)
@@ -206,11 +326,10 @@
 	sock_wfree(skb);
 }
 
-static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
-			    size_t total_len)
+static int xsk_generic_xmit(struct sock *sk)
 {
-	u32 max_batch = TX_BATCH_SIZE;
 	struct xdp_sock *xs = xdp_sk(sk);
+	u32 max_batch = TX_BATCH_SIZE;
 	bool sent_frame = false;
 	struct xdp_desc desc;
 	struct sk_buff *skb;
@@ -218,7 +337,10 @@
 
 	mutex_lock(&xs->mutex);
 
-	while (xskq_peek_desc(xs->tx, &desc)) {
+	if (xs->queue_id >= xs->dev->real_num_tx_queues)
+		goto out;
+
+	while (xskq_peek_desc(xs->tx, &desc, xs->umem)) {
 		char *buffer;
 		u64 addr;
 		u32 len;
@@ -228,12 +350,6 @@
 			goto out;
 		}
 
-		if (xskq_reserve_addr(xs->umem->cq))
-			goto out;
-
-		if (xs->queue_id >= xs->dev->real_num_tx_queues)
-			goto out;
-
 		len = desc.len;
 		skb = sock_alloc_send_skb(sk, len, 1, &err);
 		if (unlikely(!skb)) {
@@ -245,7 +361,7 @@
 		addr = desc.addr;
 		buffer = xdp_umem_get_data(xs->umem, addr);
 		err = skb_store_bits(skb, 0, buffer, len);
-		if (unlikely(err)) {
+		if (unlikely(err) || xskq_reserve_addr(xs->umem->cq)) {
 			kfree_skb(skb);
 			goto out;
 		}
@@ -253,7 +369,7 @@
 		skb->dev = xs->dev;
 		skb->priority = sk->sk_priority;
 		skb->mark = sk->sk_mark;
-		skb_shinfo(skb)->destructor_arg = (void *)(long)addr;
+		skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
 		skb->destructor = xsk_destruct_skb;
 
 		err = dev_direct_xmit(skb, xs->queue_id);
@@ -276,22 +392,30 @@
 	return err;
 }
 
+static int __xsk_sendmsg(struct sock *sk)
+{
+	struct xdp_sock *xs = xdp_sk(sk);
+
+	if (unlikely(!(xs->dev->flags & IFF_UP)))
+		return -ENETDOWN;
+	if (unlikely(!xs->tx))
+		return -ENOBUFS;
+
+	return xs->zc ? xsk_zc_xmit(xs) : xsk_generic_xmit(sk);
+}
+
 static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 {
 	bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
 	struct sock *sk = sock->sk;
 	struct xdp_sock *xs = xdp_sk(sk);
 
-	if (unlikely(!xs->dev))
+	if (unlikely(!xsk_is_bound(xs)))
 		return -ENXIO;
-	if (unlikely(!(xs->dev->flags & IFF_UP)))
-		return -ENETDOWN;
-	if (unlikely(!xs->tx))
-		return -ENOBUFS;
-	if (need_wait)
+	if (unlikely(need_wait))
 		return -EOPNOTSUPP;
 
-	return (xs->zc) ? xsk_zc_xmit(sk) : xsk_generic_xmit(sk, m, total_len);
+	return __xsk_sendmsg(sk);
 }
 
 static unsigned int xsk_poll(struct file *file, struct socket *sock,
@@ -300,6 +424,23 @@
 	unsigned int mask = datagram_poll(file, sock, wait);
 	struct sock *sk = sock->sk;
 	struct xdp_sock *xs = xdp_sk(sk);
+	struct net_device *dev;
+	struct xdp_umem *umem;
+
+	if (unlikely(!xsk_is_bound(xs)))
+		return mask;
+
+	dev = xs->dev;
+	umem = xs->umem;
+
+	if (umem->need_wakeup) {
+		if (dev->netdev_ops->ndo_xsk_wakeup)
+			dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
+							umem->need_wakeup);
+		else
+			/* Poll needs to drive Tx also in copy mode */
+			__xsk_sendmsg(sk);
+	}
 
 	if (xs->rx && !xskq_empty_desc(xs->rx))
 		mask |= POLLIN | POLLRDNORM;
@@ -323,10 +464,71 @@
 
 	/* Make sure queue is ready before it can be seen by others */
 	smp_wmb();
-	*queue = q;
+	WRITE_ONCE(*queue, q);
 	return 0;
 }
 
+static void xsk_unbind_dev(struct xdp_sock *xs)
+{
+	struct net_device *dev = xs->dev;
+
+	if (xs->state != XSK_BOUND)
+		return;
+	WRITE_ONCE(xs->state, XSK_UNBOUND);
+
+	/* Wait for driver to stop using the xdp socket. */
+	xdp_del_sk_umem(xs->umem, xs);
+	xs->dev = NULL;
+	synchronize_net();
+	dev_put(dev);
+}
+
+static struct xsk_map *xsk_get_map_list_entry(struct xdp_sock *xs,
+					      struct xdp_sock ***map_entry)
+{
+	struct xsk_map *map = NULL;
+	struct xsk_map_node *node;
+
+	*map_entry = NULL;
+
+	spin_lock_bh(&xs->map_list_lock);
+	node = list_first_entry_or_null(&xs->map_list, struct xsk_map_node,
+					node);
+	if (node) {
+		WARN_ON(xsk_map_inc(node->map));
+		map = node->map;
+		*map_entry = node->map_entry;
+	}
+	spin_unlock_bh(&xs->map_list_lock);
+	return map;
+}
+
+static void xsk_delete_from_maps(struct xdp_sock *xs)
+{
+	/* This function removes the current XDP socket from all the
+	 * maps it resides in. We need to take extra care here, due to
+	 * the two locks involved. Each map has a lock synchronizing
+	 * updates to the entries, and each socket has a lock that
+	 * synchronizes access to the list of maps (map_list). For
+	 * deadlock avoidance the locks need to be taken in the order
+	 * "map lock"->"socket map list lock". We start off by
+	 * accessing the socket map list, and take a reference to the
+	 * map to guarantee existence between the
+	 * xsk_get_map_list_entry() and xsk_map_try_sock_delete()
+	 * calls. Then we ask the map to remove the socket, which
+	 * tries to remove the socket from the map. Note that there
+	 * might be updates to the map between
+	 * xsk_get_map_list_entry() and xsk_map_try_sock_delete().
+	 */
+	struct xdp_sock **map_entry = NULL;
+	struct xsk_map *map;
+
+	while ((map = xsk_get_map_list_entry(xs, &map_entry))) {
+		xsk_map_try_sock_delete(map, xs, map_entry);
+		xsk_map_put(map);
+	}
+}
+
 static int xsk_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
@@ -338,16 +540,21 @@
 
 	net = sock_net(sk);
 
+	mutex_lock(&net->xdp.lock);
+	sk_del_node_init_rcu(sk);
+	mutex_unlock(&net->xdp.lock);
+
 	local_bh_disable();
 	sock_prot_inuse_add(net, sk->sk_prot, -1);
 	local_bh_enable();
 
-	if (xs->dev) {
-		/* Wait for driver to stop using the xdp socket. */
-		synchronize_net();
-		dev_put(xs->dev);
-		xs->dev = NULL;
-	}
+	xsk_delete_from_maps(xs);
+	mutex_lock(&xs->mutex);
+	xsk_unbind_dev(xs);
+	mutex_unlock(&xs->mutex);
+
+	xskq_destroy(xs->rx);
+	xskq_destroy(xs->tx);
 
 	sock_orphan(sk);
 	sock->sk = NULL;
@@ -375,6 +582,24 @@
 	return sock;
 }
 
+/* Check if umem pages are contiguous.
+ * If zero-copy mode, use the DMA address to do the page contiguity check
+ * For all other modes we use addr (kernel virtual address)
+ * Store the result in the low bits of addr.
+ */
+static void xsk_check_page_contiguity(struct xdp_umem *umem, u32 flags)
+{
+	struct xdp_umem_page *pgs = umem->pages;
+	int i, is_contig;
+
+	for (i = 0; i < umem->npgs - 1; i++) {
+		is_contig = (flags & XDP_ZEROCOPY) ?
+			(pgs[i].dma + PAGE_SIZE == pgs[i + 1].dma) :
+			(pgs[i].addr + PAGE_SIZE == pgs[i + 1].addr);
+		pgs[i].addr += is_contig << XSK_NEXT_PG_CONTIG_SHIFT;
+	}
+}
+
 static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 {
 	struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
@@ -389,8 +614,14 @@
 	if (sxdp->sxdp_family != AF_XDP)
 		return -EINVAL;
 
+	flags = sxdp->sxdp_flags;
+	if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY |
+		      XDP_USE_NEED_WAKEUP))
+		return -EINVAL;
+
+	rtnl_lock();
 	mutex_lock(&xs->mutex);
-	if (xs->dev) {
+	if (xs->state != XSK_READY) {
 		err = -EBUSY;
 		goto out_release;
 	}
@@ -408,19 +639,12 @@
 
 	qid = sxdp->sxdp_queue_id;
 
-	if ((xs->rx && qid >= dev->real_num_rx_queues) ||
-	    (xs->tx && qid >= dev->real_num_tx_queues)) {
-		err = -EINVAL;
-		goto out_unlock;
-	}
-
-	flags = sxdp->sxdp_flags;
-
 	if (flags & XDP_SHARED_UMEM) {
 		struct xdp_sock *umem_xs;
 		struct socket *sock;
 
-		if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY)) {
+		if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY) ||
+		    (flags & XDP_USE_NEED_WAKEUP)) {
 			/* Cannot specify flags for shared sockets. */
 			err = -EINVAL;
 			goto out_unlock;
@@ -439,48 +663,67 @@
 		}
 
 		umem_xs = xdp_sk(sock->sk);
-		if (!umem_xs->umem) {
-			/* No umem to inherit. */
+		if (!xsk_is_bound(umem_xs)) {
 			err = -EBADF;
 			sockfd_put(sock);
 			goto out_unlock;
-		} else if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
+		}
+		if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
 			err = -EINVAL;
 			sockfd_put(sock);
 			goto out_unlock;
 		}
 
 		xdp_get_umem(umem_xs->umem);
-		xs->umem = umem_xs->umem;
+		WRITE_ONCE(xs->umem, umem_xs->umem);
 		sockfd_put(sock);
 	} else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
 		err = -EINVAL;
 		goto out_unlock;
 	} else {
 		/* This xsk has its own umem. */
-		xskq_set_umem(xs->umem->fq, &xs->umem->props);
-		xskq_set_umem(xs->umem->cq, &xs->umem->props);
+		xskq_set_umem(xs->umem->fq, xs->umem->size,
+			      xs->umem->chunk_mask);
+		xskq_set_umem(xs->umem->cq, xs->umem->size,
+			      xs->umem->chunk_mask);
 
 		err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
 		if (err)
 			goto out_unlock;
+
+		xsk_check_page_contiguity(xs->umem, flags);
 	}
 
 	xs->dev = dev;
 	xs->zc = xs->umem->zc;
 	xs->queue_id = qid;
-	xskq_set_umem(xs->rx, &xs->umem->props);
-	xskq_set_umem(xs->tx, &xs->umem->props);
+	xskq_set_umem(xs->rx, xs->umem->size, xs->umem->chunk_mask);
+	xskq_set_umem(xs->tx, xs->umem->size, xs->umem->chunk_mask);
 	xdp_add_sk_umem(xs->umem, xs);
 
 out_unlock:
-	if (err)
+	if (err) {
 		dev_put(dev);
+	} else {
+		/* Matches smp_rmb() in bind() for shared umem
+		 * sockets, and xsk_is_bound().
+		 */
+		smp_wmb();
+		WRITE_ONCE(xs->state, XSK_BOUND);
+	}
 out_release:
 	mutex_unlock(&xs->mutex);
+	rtnl_unlock();
 	return err;
 }
 
+struct xdp_umem_reg_v1 {
+	__u64 addr; /* Start of packet data area */
+	__u64 len; /* Length of packet data area */
+	__u32 chunk_size;
+	__u32 headroom;
+};
+
 static int xsk_setsockopt(struct socket *sock, int level, int optname,
 			  char __user *optval, unsigned int optlen)
 {
@@ -504,21 +747,34 @@
 			return -EFAULT;
 
 		mutex_lock(&xs->mutex);
+		if (xs->state != XSK_READY) {
+			mutex_unlock(&xs->mutex);
+			return -EBUSY;
+		}
 		q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx;
 		err = xsk_init_queue(entries, q, false);
+		if (!err && optname == XDP_TX_RING)
+			/* Tx needs to be explicitly woken up the first time */
+			xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP;
 		mutex_unlock(&xs->mutex);
 		return err;
 	}
 	case XDP_UMEM_REG:
 	{
-		struct xdp_umem_reg mr;
+		size_t mr_size = sizeof(struct xdp_umem_reg);
+		struct xdp_umem_reg mr = {};
 		struct xdp_umem *umem;
 
-		if (copy_from_user(&mr, optval, sizeof(mr)))
+		if (optlen < sizeof(struct xdp_umem_reg_v1))
+			return -EINVAL;
+		else if (optlen < sizeof(mr))
+			mr_size = sizeof(struct xdp_umem_reg_v1);
+
+		if (copy_from_user(&mr, optval, mr_size))
 			return -EFAULT;
 
 		mutex_lock(&xs->mutex);
-		if (xs->umem) {
+		if (xs->state != XSK_READY || xs->umem) {
 			mutex_unlock(&xs->mutex);
 			return -EBUSY;
 		}
@@ -531,7 +787,7 @@
 
 		/* Make sure umem is ready before it can be seen by others */
 		smp_wmb();
-		xs->umem = umem;
+		WRITE_ONCE(xs->umem, umem);
 		mutex_unlock(&xs->mutex);
 		return 0;
 	}
@@ -545,6 +801,10 @@
 			return -EFAULT;
 
 		mutex_lock(&xs->mutex);
+		if (xs->state != XSK_READY) {
+			mutex_unlock(&xs->mutex);
+			return -EBUSY;
+		}
 		if (!xs->umem) {
 			mutex_unlock(&xs->mutex);
 			return -EINVAL;
@@ -563,6 +823,20 @@
 	return -ENOPROTOOPT;
 }
 
+static void xsk_enter_rxtx_offsets(struct xdp_ring_offset_v1 *ring)
+{
+	ring->producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
+	ring->consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
+	ring->desc = offsetof(struct xdp_rxtx_ring, desc);
+}
+
+static void xsk_enter_umem_offsets(struct xdp_ring_offset_v1 *ring)
+{
+	ring->producer = offsetof(struct xdp_umem_ring, ptrs.producer);
+	ring->consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
+	ring->desc = offsetof(struct xdp_umem_ring, desc);
+}
+
 static int xsk_getsockopt(struct socket *sock, int level, int optname,
 			  char __user *optval, int __user *optlen)
 {
@@ -602,26 +876,69 @@
 	case XDP_MMAP_OFFSETS:
 	{
 		struct xdp_mmap_offsets off;
+		struct xdp_mmap_offsets_v1 off_v1;
+		bool flags_supported = true;
+		void *to_copy;
 
-		if (len < sizeof(off))
+		if (len < sizeof(off_v1))
+			return -EINVAL;
+		else if (len < sizeof(off))
+			flags_supported = false;
+
+		if (flags_supported) {
+			/* xdp_ring_offset is identical to xdp_ring_offset_v1
+			 * except for the flags field added to the end.
+			 */
+			xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *)
+					       &off.rx);
+			xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *)
+					       &off.tx);
+			xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *)
+					       &off.fr);
+			xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *)
+					       &off.cr);
+			off.rx.flags = offsetof(struct xdp_rxtx_ring,
+						ptrs.flags);
+			off.tx.flags = offsetof(struct xdp_rxtx_ring,
+						ptrs.flags);
+			off.fr.flags = offsetof(struct xdp_umem_ring,
+						ptrs.flags);
+			off.cr.flags = offsetof(struct xdp_umem_ring,
+						ptrs.flags);
+
+			len = sizeof(off);
+			to_copy = &off;
+		} else {
+			xsk_enter_rxtx_offsets(&off_v1.rx);
+			xsk_enter_rxtx_offsets(&off_v1.tx);
+			xsk_enter_umem_offsets(&off_v1.fr);
+			xsk_enter_umem_offsets(&off_v1.cr);
+
+			len = sizeof(off_v1);
+			to_copy = &off_v1;
+		}
+
+		if (copy_to_user(optval, to_copy, len))
+			return -EFAULT;
+		if (put_user(len, optlen))
+			return -EFAULT;
+
+		return 0;
+	}
+	case XDP_OPTIONS:
+	{
+		struct xdp_options opts = {};
+
+		if (len < sizeof(opts))
 			return -EINVAL;
 
-		off.rx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
-		off.rx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
-		off.rx.desc	= offsetof(struct xdp_rxtx_ring, desc);
-		off.tx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
-		off.tx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
-		off.tx.desc	= offsetof(struct xdp_rxtx_ring, desc);
+		mutex_lock(&xs->mutex);
+		if (xs->zc)
+			opts.flags |= XDP_OPTIONS_ZEROCOPY;
+		mutex_unlock(&xs->mutex);
 
-		off.fr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
-		off.fr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
-		off.fr.desc	= offsetof(struct xdp_umem_ring, desc);
-		off.cr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
-		off.cr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
-		off.cr.desc	= offsetof(struct xdp_umem_ring, desc);
-
-		len = sizeof(off);
-		if (copy_to_user(optval, &off, len))
+		len = sizeof(opts);
+		if (copy_to_user(optval, &opts, len))
 			return -EFAULT;
 		if (put_user(len, optlen))
 			return -EFAULT;
@@ -646,6 +963,9 @@
 	unsigned long pfn;
 	struct page *qpg;
 
+	if (READ_ONCE(xs->state) != XSK_READY)
+		return -EBUSY;
+
 	if (offset == XDP_PGOFF_RX_RING) {
 		q = READ_ONCE(xs->rx);
 	} else if (offset == XDP_PGOFF_TX_RING) {
@@ -655,6 +975,8 @@
 		if (!umem)
 			return -EINVAL;
 
+		/* Matches the smp_wmb() in XDP_UMEM_REG */
+		smp_rmb();
 		if (offset == XDP_UMEM_PGOFF_FILL_RING)
 			q = READ_ONCE(umem->fq);
 		else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
@@ -664,8 +986,10 @@
 	if (!q)
 		return -EINVAL;
 
+	/* Matches the smp_wmb() in xsk_init_queue */
+	smp_rmb();
 	qpg = virt_to_head_page(q->ring);
-	if (size > (PAGE_SIZE << compound_order(qpg)))
+	if (size > page_size(qpg))
 		return -EINVAL;
 
 	pfn = virt_to_phys(q->ring) >> PAGE_SHIFT;
@@ -673,6 +997,38 @@
 			       size, vma->vm_page_prot);
 }
 
+static int xsk_notifier(struct notifier_block *this,
+			unsigned long msg, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct net *net = dev_net(dev);
+	struct sock *sk;
+
+	switch (msg) {
+	case NETDEV_UNREGISTER:
+		mutex_lock(&net->xdp.lock);
+		sk_for_each(sk, &net->xdp.list) {
+			struct xdp_sock *xs = xdp_sk(sk);
+
+			mutex_lock(&xs->mutex);
+			if (xs->dev == dev) {
+				sk->sk_err = ENETDOWN;
+				if (!sock_flag(sk, SOCK_DEAD))
+					sk->sk_error_report(sk);
+
+				xsk_unbind_dev(xs);
+
+				/* Clear device references in umem. */
+				xdp_umem_clear_dev(xs->umem);
+			}
+			mutex_unlock(&xs->mutex);
+		}
+		mutex_unlock(&net->xdp.lock);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
 static struct proto xsk_proto = {
 	.name =		"XDP",
 	.owner =	THIS_MODULE,
@@ -707,9 +1063,6 @@
 	if (!sock_flag(sk, SOCK_DEAD))
 		return;
 
-	xskq_destroy(xs->rx);
-	xskq_destroy(xs->tx);
-	xdp_del_sk_umem(xs->umem, xs);
 	xdp_put_umem(xs->umem);
 
 	sk_refcnt_debug_dec(sk);
@@ -747,9 +1100,18 @@
 	sock_set_flag(sk, SOCK_RCU_FREE);
 
 	xs = xdp_sk(sk);
+	xs->state = XSK_READY;
 	mutex_init(&xs->mutex);
+	spin_lock_init(&xs->rx_lock);
 	spin_lock_init(&xs->tx_completion_lock);
 
+	INIT_LIST_HEAD(&xs->map_list);
+	spin_lock_init(&xs->map_list_lock);
+
+	mutex_lock(&net->xdp.lock);
+	sk_add_node_rcu(sk, &net->xdp.list);
+	mutex_unlock(&net->xdp.lock);
+
 	local_bh_disable();
 	sock_prot_inuse_add(net, &xsk_proto, 1);
 	local_bh_enable();
@@ -763,6 +1125,27 @@
 	.owner	= THIS_MODULE,
 };
 
+static struct notifier_block xsk_netdev_notifier = {
+	.notifier_call	= xsk_notifier,
+};
+
+static int __net_init xsk_net_init(struct net *net)
+{
+	mutex_init(&net->xdp.lock);
+	INIT_HLIST_HEAD(&net->xdp.list);
+	return 0;
+}
+
+static void __net_exit xsk_net_exit(struct net *net)
+{
+	WARN_ON_ONCE(!hlist_empty(&net->xdp.list));
+}
+
+static struct pernet_operations xsk_net_ops = {
+	.init = xsk_net_init,
+	.exit = xsk_net_exit,
+};
+
 static int __init xsk_init(void)
 {
 	int err;
@@ -775,8 +1158,20 @@
 	if (err)
 		goto out_proto;
 
+	err = register_pernet_subsys(&xsk_net_ops);
+	if (err)
+		goto out_sk;
+
+	err = register_netdevice_notifier(&xsk_netdev_notifier);
+	if (err)
+		goto out_pernet;
+
 	return 0;
 
+out_pernet:
+	unregister_pernet_subsys(&xsk_net_ops);
+out_sk:
+	sock_unregister(PF_XDP);
 out_proto:
 	proto_unregister(&xsk_proto);
 out:

diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h
new file mode 100644
index 0000000..4cfd106
--- /dev/null
+++ b/net/xdp/xsk.h

@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2019 Intel Corporation. */
+
+#ifndef XSK_H_
+#define XSK_H_
+
+struct xdp_ring_offset_v1 {
+	__u64 producer;
+	__u64 consumer;
+	__u64 desc;
+};
+
+struct xdp_mmap_offsets_v1 {
+	struct xdp_ring_offset_v1 rx;
+	struct xdp_ring_offset_v1 tx;
+	struct xdp_ring_offset_v1 fr;
+	struct xdp_ring_offset_v1 cr;
+};
+
+static inline struct xdp_sock *xdp_sk(struct sock *sk)
+{
+	return (struct xdp_sock *)sk;
+}
+
+#endif /* XSK_H_ */

diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
new file mode 100644
index 0000000..f59791b
--- /dev/null
+++ b/net/xdp/xsk_diag.c

@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0
+/* XDP sockets monitoring support
+ *
+ * Copyright(c) 2019 Intel Corporation.
+ *
+ * Author: Björn Töpel <bjorn.topel@intel.com>
+ */
+
+#include <linux/module.h>
+#include <net/xdp_sock.h>
+#include <linux/xdp_diag.h>
+#include <linux/sock_diag.h>
+
+#include "xsk_queue.h"
+#include "xsk.h"
+
+static int xsk_diag_put_info(const struct xdp_sock *xs, struct sk_buff *nlskb)
+{
+	struct xdp_diag_info di = {};
+
+	di.ifindex = xs->dev ? xs->dev->ifindex : 0;
+	di.queue_id = xs->queue_id;
+	return nla_put(nlskb, XDP_DIAG_INFO, sizeof(di), &di);
+}
+
+static int xsk_diag_put_ring(const struct xsk_queue *queue, int nl_type,
+			     struct sk_buff *nlskb)
+{
+	struct xdp_diag_ring dr = {};
+
+	dr.entries = queue->nentries;
+	return nla_put(nlskb, nl_type, sizeof(dr), &dr);
+}
+
+static int xsk_diag_put_rings_cfg(const struct xdp_sock *xs,
+				  struct sk_buff *nlskb)
+{
+	int err = 0;
+
+	if (xs->rx)
+		err = xsk_diag_put_ring(xs->rx, XDP_DIAG_RX_RING, nlskb);
+	if (!err && xs->tx)
+		err = xsk_diag_put_ring(xs->tx, XDP_DIAG_TX_RING, nlskb);
+	return err;
+}
+
+static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
+{
+	struct xdp_umem *umem = xs->umem;
+	struct xdp_diag_umem du = {};
+	int err;
+
+	if (!umem)
+		return 0;
+
+	du.id = umem->id;
+	du.size = umem->size;
+	du.num_pages = umem->npgs;
+	du.chunk_size = umem->chunk_size_nohr + umem->headroom;
+	du.headroom = umem->headroom;
+	du.ifindex = umem->dev ? umem->dev->ifindex : 0;
+	du.queue_id = umem->queue_id;
+	du.flags = 0;
+	if (umem->zc)
+		du.flags |= XDP_DU_F_ZEROCOPY;
+	du.refs = refcount_read(&umem->users);
+
+	err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du);
+
+	if (!err && umem->fq)
+		err = xsk_diag_put_ring(umem->fq, XDP_DIAG_UMEM_FILL_RING, nlskb);
+	if (!err && umem->cq) {
+		err = xsk_diag_put_ring(umem->cq, XDP_DIAG_UMEM_COMPLETION_RING,
+					nlskb);
+	}
+	return err;
+}
+
+static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
+			 struct xdp_diag_req *req,
+			 struct user_namespace *user_ns,
+			 u32 portid, u32 seq, u32 flags, int sk_ino)
+{
+	struct xdp_sock *xs = xdp_sk(sk);
+	struct xdp_diag_msg *msg;
+	struct nlmsghdr *nlh;
+
+	nlh = nlmsg_put(nlskb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*msg),
+			flags);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	msg = nlmsg_data(nlh);
+	memset(msg, 0, sizeof(*msg));
+	msg->xdiag_family = AF_XDP;
+	msg->xdiag_type = sk->sk_type;
+	msg->xdiag_ino = sk_ino;
+	sock_diag_save_cookie(sk, msg->xdiag_cookie);
+
+	mutex_lock(&xs->mutex);
+	if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb))
+		goto out_nlmsg_trim;
+
+	if ((req->xdiag_show & XDP_SHOW_INFO) &&
+	    nla_put_u32(nlskb, XDP_DIAG_UID,
+			from_kuid_munged(user_ns, sock_i_uid(sk))))
+		goto out_nlmsg_trim;
+
+	if ((req->xdiag_show & XDP_SHOW_RING_CFG) &&
+	    xsk_diag_put_rings_cfg(xs, nlskb))
+		goto out_nlmsg_trim;
+
+	if ((req->xdiag_show & XDP_SHOW_UMEM) &&
+	    xsk_diag_put_umem(xs, nlskb))
+		goto out_nlmsg_trim;
+
+	if ((req->xdiag_show & XDP_SHOW_MEMINFO) &&
+	    sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO))
+		goto out_nlmsg_trim;
+
+	mutex_unlock(&xs->mutex);
+	nlmsg_end(nlskb, nlh);
+	return 0;
+
+out_nlmsg_trim:
+	mutex_unlock(&xs->mutex);
+	nlmsg_cancel(nlskb, nlh);
+	return -EMSGSIZE;
+}
+
+static int xsk_diag_dump(struct sk_buff *nlskb, struct netlink_callback *cb)
+{
+	struct xdp_diag_req *req = nlmsg_data(cb->nlh);
+	struct net *net = sock_net(nlskb->sk);
+	int num = 0, s_num = cb->args[0];
+	struct sock *sk;
+
+	mutex_lock(&net->xdp.lock);
+
+	sk_for_each(sk, &net->xdp.list) {
+		if (!net_eq(sock_net(sk), net))
+			continue;
+		if (num++ < s_num)
+			continue;
+
+		if (xsk_diag_fill(sk, nlskb, req,
+				  sk_user_ns(NETLINK_CB(cb->skb).sk),
+				  NETLINK_CB(cb->skb).portid,
+				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
+				  sock_i_ino(sk)) < 0) {
+			num--;
+			break;
+		}
+	}
+
+	mutex_unlock(&net->xdp.lock);
+	cb->args[0] = num;
+	return nlskb->len;
+}
+
+static int xsk_diag_handler_dump(struct sk_buff *nlskb, struct nlmsghdr *hdr)
+{
+	struct netlink_dump_control c = { .dump = xsk_diag_dump };
+	int hdrlen = sizeof(struct xdp_diag_req);
+	struct net *net = sock_net(nlskb->sk);
+
+	if (nlmsg_len(hdr) < hdrlen)
+		return -EINVAL;
+
+	if (!(hdr->nlmsg_flags & NLM_F_DUMP))
+		return -EOPNOTSUPP;
+
+	return netlink_dump_start(net->diag_nlsk, nlskb, hdr, &c);
+}
+
+static const struct sock_diag_handler xsk_diag_handler = {
+	.family = AF_XDP,
+	.dump = xsk_diag_handler_dump,
+};
+
+static int __init xsk_diag_init(void)
+{
+	return sock_diag_register(&xsk_diag_handler);
+}
+
+static void __exit xsk_diag_exit(void)
+{
+	sock_diag_unregister(&xsk_diag_handler);
+}
+
+module_init(xsk_diag_init);
+module_exit(xsk_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_XDP);

diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c
index 6c32e92..b665045 100644
--- a/net/xdp/xsk_queue.c
+++ b/net/xdp/xsk_queue.c

@@ -3,16 +3,19 @@
  * Copyright(c) 2018 Intel Corporation.
  */
 
+#include <linux/log2.h>
 #include <linux/slab.h>
+#include <linux/overflow.h>
 
 #include "xsk_queue.h"
 
-void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props)
+void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask)
 {
 	if (!q)
 		return;
 
-	q->umem_props = *umem_props;
+	q->size = size;
+	q->chunk_mask = chunk_mask;
 }
 
 static u32 xskq_umem_get_ring_size(struct xsk_queue *q)
@@ -61,3 +64,56 @@
 	page_frag_free(q->ring);
 	kfree(q);
 }
+
+struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries)
+{
+	struct xdp_umem_fq_reuse *newq;
+
+	/* Check for overflow */
+	if (nentries > (u32)roundup_pow_of_two(nentries))
+		return NULL;
+	nentries = roundup_pow_of_two(nentries);
+
+	newq = kvmalloc(struct_size(newq, handles, nentries), GFP_KERNEL);
+	if (!newq)
+		return NULL;
+	memset(newq, 0, offsetof(typeof(*newq), handles));
+
+	newq->nentries = nentries;
+	return newq;
+}
+EXPORT_SYMBOL_GPL(xsk_reuseq_prepare);
+
+struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
+					  struct xdp_umem_fq_reuse *newq)
+{
+	struct xdp_umem_fq_reuse *oldq = umem->fq_reuse;
+
+	if (!oldq) {
+		umem->fq_reuse = newq;
+		return NULL;
+	}
+
+	if (newq->nentries < oldq->length)
+		return newq;
+
+	memcpy(newq->handles, oldq->handles,
+	       array_size(oldq->length, sizeof(u64)));
+	newq->length = oldq->length;
+
+	umem->fq_reuse = newq;
+	return oldq;
+}
+EXPORT_SYMBOL_GPL(xsk_reuseq_swap);
+
+void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq)
+{
+	kvfree(rq);
+}
+EXPORT_SYMBOL_GPL(xsk_reuseq_free);
+
+void xsk_reuseq_destroy(struct xdp_umem *umem)
+{
+	xsk_reuseq_free(umem->fq_reuse);
+	umem->fq_reuse = NULL;
+}

diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 8a64b15..eddae46 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h

@@ -16,6 +16,7 @@
 struct xdp_ring {
 	u32 producer ____cacheline_aligned_in_smp;
 	u32 consumer ____cacheline_aligned_in_smp;
+	u32 flags;
 };
 
 /* Used for the RX and TX queues for packets */
@@ -31,7 +32,8 @@
 };
 
 struct xsk_queue {
-	struct xdp_umem_props umem_props;
+	u64 chunk_mask;
+	u64 size;
 	u32 ring_mask;
 	u32 nentries;
 	u32 prod_head;
@@ -42,6 +44,48 @@
 	u64 invalid_descs;
 };
 
+/* The structure of the shared state of the rings are the same as the
+ * ring buffer in kernel/events/ring_buffer.c. For the Rx and completion
+ * ring, the kernel is the producer and user space is the consumer. For
+ * the Tx and fill rings, the kernel is the consumer and user space is
+ * the producer.
+ *
+ * producer                         consumer
+ *
+ * if (LOAD ->consumer) {           LOAD ->producer
+ *                    (A)           smp_rmb()       (C)
+ *    STORE $data                   LOAD $data
+ *    smp_wmb()       (B)           smp_mb()        (D)
+ *    STORE ->producer              STORE ->consumer
+ * }
+ *
+ * (A) pairs with (D), and (B) pairs with (C).
+ *
+ * Starting with (B), it protects the data from being written after
+ * the producer pointer. If this barrier was missing, the consumer
+ * could observe the producer pointer being set and thus load the data
+ * before the producer has written the new data. The consumer would in
+ * this case load the old data.
+ *
+ * (C) protects the consumer from speculatively loading the data before
+ * the producer pointer actually has been read. If we do not have this
+ * barrier, some architectures could load old data as speculative loads
+ * are not discarded as the CPU does not know there is a dependency
+ * between ->producer and data.
+ *
+ * (A) is a control dependency that separates the load of ->consumer
+ * from the stores of $data. In case ->consumer indicates there is no
+ * room in the buffer to store $data we do not. So no barrier is needed.
+ *
+ * (D) protects the load of the data to be observed to happen after the
+ * store of the consumer pointer. If we did not have this memory
+ * barrier, the producer could observe the consumer pointer being set
+ * and overwrite the data with a new value before the consumer got the
+ * chance to read the old value. The consumer would thus miss reading
+ * the old entry and very likely read the new entry twice, once right
+ * now and again after circling through the ring.
+ */
+
 /* Common functions operating for both RXTX and umem queues */
 
 static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q)
@@ -74,11 +118,36 @@
 	return q->nentries - (producer - q->cons_tail);
 }
 
+static inline bool xskq_has_addrs(struct xsk_queue *q, u32 cnt)
+{
+	u32 entries = q->prod_tail - q->cons_tail;
+
+	if (entries >= cnt)
+		return true;
+
+	/* Refresh the local pointer. */
+	q->prod_tail = READ_ONCE(q->ring->producer);
+	entries = q->prod_tail - q->cons_tail;
+
+	return entries >= cnt;
+}
+
 /* UMEM queue */
 
+static inline bool xskq_crosses_non_contig_pg(struct xdp_umem *umem, u64 addr,
+					      u64 length)
+{
+	bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE;
+	bool next_pg_contig =
+		(unsigned long)umem->pages[(addr >> PAGE_SHIFT)].addr &
+			XSK_NEXT_PG_CONTIG_MASK;
+
+	return cross_pg && !next_pg_contig;
+}
+
 static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
 {
-	if (addr >= q->umem_props.size) {
+	if (addr >= q->size) {
 		q->invalid_descs++;
 		return false;
 	}
@@ -86,25 +155,54 @@
 	return true;
 }
 
-static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr)
+static inline bool xskq_is_valid_addr_unaligned(struct xsk_queue *q, u64 addr,
+						u64 length,
+						struct xdp_umem *umem)
+{
+	u64 base_addr = xsk_umem_extract_addr(addr);
+
+	addr = xsk_umem_add_offset_to_addr(addr);
+	if (base_addr >= q->size || addr >= q->size ||
+	    xskq_crosses_non_contig_pg(umem, addr, length)) {
+		q->invalid_descs++;
+		return false;
+	}
+
+	return true;
+}
+
+static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr,
+				      struct xdp_umem *umem)
 {
 	while (q->cons_tail != q->cons_head) {
 		struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
 		unsigned int idx = q->cons_tail & q->ring_mask;
 
-		*addr = READ_ONCE(ring->desc[idx]) & q->umem_props.chunk_mask;
+		*addr = READ_ONCE(ring->desc[idx]) & q->chunk_mask;
+
+		if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
+			if (xskq_is_valid_addr_unaligned(q, *addr,
+							 umem->chunk_size_nohr,
+							 umem))
+				return addr;
+			goto out;
+		}
+
 		if (xskq_is_valid_addr(q, *addr))
 			return addr;
 
+out:
 		q->cons_tail++;
 	}
 
 	return NULL;
 }
 
-static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr)
+static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr,
+				  struct xdp_umem *umem)
 {
 	if (q->cons_tail == q->cons_head) {
+		smp_mb(); /* D, matches A */
 		WRITE_ONCE(q->ring->consumer, q->cons_tail);
 		q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE);
 
@@ -112,7 +210,7 @@
 		smp_rmb();
 	}
 
-	return xskq_validate_addr(q, addr);
+	return xskq_validate_addr(q, addr, umem);
 }
 
 static inline void xskq_discard_addr(struct xsk_queue *q)
@@ -127,10 +225,11 @@
 	if (xskq_nb_free(q, q->prod_tail, 1) == 0)
 		return -ENOSPC;
 
+	/* A, matches D */
 	ring->desc[q->prod_tail++ & q->ring_mask] = addr;
 
 	/* Order producer and data */
-	smp_wmb();
+	smp_wmb(); /* B, matches C */
 
 	WRITE_ONCE(q->ring->producer, q->prod_tail);
 	return 0;
@@ -143,6 +242,7 @@
 	if (xskq_nb_free(q, q->prod_head, LAZY_UPDATE_THRESHOLD) == 0)
 		return -ENOSPC;
 
+	/* A, matches D */
 	ring->desc[q->prod_head++ & q->ring_mask] = addr;
 	return 0;
 }
@@ -151,7 +251,7 @@
 					     u32 nb_entries)
 {
 	/* Order producer and data */
-	smp_wmb();
+	smp_wmb(); /* B, matches C */
 
 	q->prod_tail += nb_entries;
 	WRITE_ONCE(q->ring->producer, q->prod_tail);
@@ -162,19 +262,33 @@
 	if (xskq_nb_free(q, q->prod_head, 1) == 0)
 		return -ENOSPC;
 
+	/* A, matches D */
 	q->prod_head++;
 	return 0;
 }
 
 /* Rx/Tx queue */
 
-static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d)
+static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d,
+				      struct xdp_umem *umem)
 {
+	if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
+		if (!xskq_is_valid_addr_unaligned(q, d->addr, d->len, umem))
+			return false;
+
+		if (d->len > umem->chunk_size_nohr || d->options) {
+			q->invalid_descs++;
+			return false;
+		}
+
+		return true;
+	}
+
 	if (!xskq_is_valid_addr(q, d->addr))
 		return false;
 
-	if (((d->addr + d->len) & q->umem_props.chunk_mask) !=
-	    (d->addr & q->umem_props.chunk_mask)) {
+	if (((d->addr + d->len) & q->chunk_mask) != (d->addr & q->chunk_mask) ||
+	    d->options) {
 		q->invalid_descs++;
 		return false;
 	}
@@ -183,14 +297,15 @@
 }
 
 static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,
-						  struct xdp_desc *desc)
+						  struct xdp_desc *desc,
+						  struct xdp_umem *umem)
 {
 	while (q->cons_tail != q->cons_head) {
 		struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
 		unsigned int idx = q->cons_tail & q->ring_mask;
 
 		*desc = READ_ONCE(ring->desc[idx]);
-		if (xskq_is_valid_desc(q, desc))
+		if (xskq_is_valid_desc(q, desc, umem))
 			return desc;
 
 		q->cons_tail++;
@@ -200,17 +315,19 @@
 }
 
 static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
-					      struct xdp_desc *desc)
+					      struct xdp_desc *desc,
+					      struct xdp_umem *umem)
 {
 	if (q->cons_tail == q->cons_head) {
+		smp_mb(); /* D, matches A */
 		WRITE_ONCE(q->ring->consumer, q->cons_tail);
 		q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE);
 
 		/* Order consumer and data */
-		smp_rmb();
+		smp_rmb(); /* C, matches B */
 	}
 
-	return xskq_validate_desc(q, desc);
+	return xskq_validate_desc(q, desc, umem);
 }
 
 static inline void xskq_discard_desc(struct xsk_queue *q)
@@ -227,6 +344,7 @@
 	if (xskq_nb_free(q, q->prod_head, 1) == 0)
 		return -ENOSPC;
 
+	/* A, matches D */
 	idx = (q->prod_head++) & q->ring_mask;
 	ring->desc[idx].addr = addr;
 	ring->desc[idx].len = len;
@@ -237,9 +355,9 @@
 static inline void xskq_produce_flush_desc(struct xsk_queue *q)
 {
 	/* Order producer and data */
-	smp_wmb();
+	smp_wmb(); /* B, matches C */
 
-	q->prod_tail = q->prod_head,
+	q->prod_tail = q->prod_head;
 	WRITE_ONCE(q->ring->producer, q->prod_tail);
 }
 
@@ -253,8 +371,11 @@
 	return xskq_nb_free(q, q->prod_tail, q->nentries) == q->nentries;
 }
 
-void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props);
+void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask);
 struct xsk_queue *xskq_create(u32 nentries, bool umem_queue);
 void xskq_destroy(struct xsk_queue *q_ops);
 
+/* Executed by the core when the entire UMEM gets freed */
+void xsk_reuseq_destroy(struct xdp_umem *umem);
+
 #endif /* _LINUX_XSK_QUEUE_H */

diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 4a9ee2d..51bb601 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig

@@ -1,23 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # XFRM configuration
 #
 config XFRM
        bool
-       depends on NET
+       depends on INET
        select GRO_CELLS
+       select SKB_EXTENSIONS
 
 config XFRM_OFFLOAD
        bool
-       depends on XFRM
 
 config XFRM_ALGO
 	tristate
 	select XFRM
 	select CRYPTO
+	select CRYPTO_HASH
+	select CRYPTO_BLKCIPHER
 
+if INET
 config XFRM_USER
 	tristate "Transformation user configuration interface"
-	depends on INET
 	select XFRM_ALGO
 	---help---
 	  Support for Transformation(XFRM) user configuration interface
@@ -56,7 +59,7 @@
 
 config XFRM_STATISTICS
 	bool "Transformation statistics"
-	depends on INET && XFRM && PROC_FS
+	depends on XFRM && PROC_FS
 	---help---
 	  This statistics is not a SNMP/MIB specification but shows
 	  statistics about transformation error (or almost error) factor
@@ -95,3 +98,5 @@
 	  <draft-sugimoto-mip6-pfkey-migrate>.
 
 	  If unsure, say N.
+
+endif # INET

diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 44ac85f..32a378e 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * xfrm algorithm interface
  *
  * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
  */
 
 #include <crypto/hash.h>

diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 5611b75..189ef15 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * xfrm_device.c - IPsec device offloading code.
  *
@@ -5,11 +6,6 @@
  *
  * Author:
  * Steffen Klassert <steffen.klassert@secunet.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/errno.h>
@@ -23,6 +19,60 @@
 #include <linux/notifier.h>
 
 #ifdef CONFIG_XFRM_OFFLOAD
+static void __xfrm_transport_prep(struct xfrm_state *x, struct sk_buff *skb,
+				  unsigned int hsize)
+{
+	struct xfrm_offload *xo = xfrm_offload(skb);
+
+	skb_reset_mac_len(skb);
+	pskb_pull(skb, skb->mac_len + hsize + x->props.header_len);
+
+	if (xo->flags & XFRM_GSO_SEGMENT) {
+		skb_reset_transport_header(skb);
+		skb->transport_header -= x->props.header_len;
+	}
+}
+
+static void __xfrm_mode_tunnel_prep(struct xfrm_state *x, struct sk_buff *skb,
+				    unsigned int hsize)
+
+{
+	struct xfrm_offload *xo = xfrm_offload(skb);
+
+	if (xo->flags & XFRM_GSO_SEGMENT)
+		skb->transport_header = skb->network_header + hsize;
+
+	skb_reset_mac_len(skb);
+	pskb_pull(skb, skb->mac_len + x->props.header_len);
+}
+
+/* Adjust pointers into the packet when IPsec is done at layer2 */
+static void xfrm_outer_mode_prep(struct xfrm_state *x, struct sk_buff *skb)
+{
+	switch (x->outer_mode.encap) {
+	case XFRM_MODE_TUNNEL:
+		if (x->outer_mode.family == AF_INET)
+			return __xfrm_mode_tunnel_prep(x, skb,
+						       sizeof(struct iphdr));
+		if (x->outer_mode.family == AF_INET6)
+			return __xfrm_mode_tunnel_prep(x, skb,
+						       sizeof(struct ipv6hdr));
+		break;
+	case XFRM_MODE_TRANSPORT:
+		if (x->outer_mode.family == AF_INET)
+			return __xfrm_transport_prep(x, skb,
+						     sizeof(struct iphdr));
+		if (x->outer_mode.family == AF_INET6)
+			return __xfrm_transport_prep(x, skb,
+						     sizeof(struct ipv6hdr));
+		break;
+	case XFRM_MODE_ROUTEOPTIMIZATION:
+	case XFRM_MODE_IN_TRIGGER:
+	case XFRM_MODE_BEET:
+		break;
+	}
+}
+
 struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again)
 {
 	int err;
@@ -32,6 +82,7 @@
 	struct softnet_data *sd;
 	netdev_features_t esp_features = features;
 	struct xfrm_offload *xo = xfrm_offload(skb);
+	struct sec_path *sp;
 
 	if (!xo)
 		return skb;
@@ -39,7 +90,8 @@
 	if (!(features & NETIF_F_HW_ESP))
 		esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
 
-	x = skb->sp->xvec[skb->sp->len - 1];
+	sp = skb_sec_path(skb);
+	x = sp->xvec[sp->len - 1];
 	if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND)
 		return skb;
 
@@ -76,7 +128,8 @@
 	}
 
 	if (!skb->next) {
-		x->outer_mode->xmit(x, skb);
+		esp_features |= skb->dev->gso_partial_features;
+		xfrm_outer_mode_prep(x, skb);
 
 		xo->flags |= XFRM_DEV_RESUME;
 
@@ -99,12 +152,14 @@
 
 	do {
 		struct sk_buff *nskb = skb2->next;
-		skb2->next = NULL;
+
+		esp_features |= skb->dev->gso_partial_features;
+		skb_mark_not_on_list(skb2);
 
 		xo = xfrm_offload(skb2);
 		xo->flags |= XFRM_DEV_RESUME;
 
-		x->outer_mode->xmit(x, skb2);
+		xfrm_outer_mode_prep(x, skb2);
 
 		err = x->type_offload->xmit(x, skb2, esp_features);
 		if (!err) {
@@ -192,9 +247,13 @@
 
 	err = dev->xfrmdev_ops->xdo_dev_state_add(x);
 	if (err) {
+		xso->num_exthdrs = 0;
+		xso->flags = 0;
 		xso->dev = NULL;
 		dev_put(dev);
-		return err;
+
+		if (err != -EOPNOTSUPP)
+			return err;
 	}
 
 	return 0;
@@ -212,9 +271,8 @@
 		return false;
 
 	if ((!dev || (dev == xfrm_dst_path(dst)->dev)) &&
-	    (!xdst->child->xfrm && x->type->get_mtu)) {
-		mtu = x->type->get_mtu(x, xdst->child_mtu_cached);
-
+	    (!xdst->child->xfrm)) {
+		mtu = xfrm_state_mtu(x, xdst->child_mtu_cached);
 		if (skb->len <= mtu)
 			goto ok;
 
@@ -241,7 +299,7 @@
 	unsigned long flags;
 
 	rcu_read_lock();
-	txq = netdev_pick_tx(dev, skb, NULL);
+	txq = netdev_core_pick_tx(dev, skb, NULL);
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
 	if (!netif_xmit_frozen_or_stopped(txq))

diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c
index 2ad33ce..eca8d84 100644
--- a/net/xfrm/xfrm_hash.c
+++ b/net/xfrm/xfrm_hash.c

@@ -6,7 +6,7 @@
 
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/xfrm.h>

diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index 61be810..ce66323 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h

@@ -13,7 +13,7 @@
 
 static inline unsigned int __xfrm6_addr_hash(const xfrm_address_t *addr)
 {
-	return ntohl(addr->a6[2] ^ addr->a6[3]);
+	return jhash2((__force u32 *)addr->a6, 4, 0);
 }
 
 static inline unsigned int __xfrm4_daddr_saddr_hash(const xfrm_address_t *daddr,
@@ -26,8 +26,7 @@
 static inline unsigned int __xfrm6_daddr_saddr_hash(const xfrm_address_t *daddr,
 						    const xfrm_address_t *saddr)
 {
-	return ntohl(daddr->a6[2] ^ daddr->a6[3] ^
-		     saddr->a6[2] ^ saddr->a6[3]);
+	return __xfrm6_addr_hash(daddr) ^ __xfrm6_addr_hash(saddr);
 }
 
 static inline u32 __bits2mask32(__u8 bits)

diff --git a/net/xfrm/xfrm_inout.h b/net/xfrm/xfrm_inout.h
new file mode 100644
index 0000000..c7b0318
--- /dev/null
+++ b/net/xfrm/xfrm_inout.h

@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/ipv6.h>
+#include <net/dsfield.h>
+#include <net/xfrm.h>
+
+#ifndef XFRM_INOUT_H
+#define XFRM_INOUT_H 1
+
+static inline void xfrm6_beet_make_header(struct sk_buff *skb)
+{
+	struct ipv6hdr *iph = ipv6_hdr(skb);
+
+	iph->version = 6;
+
+	memcpy(iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
+	       sizeof(iph->flow_lbl));
+	iph->nexthdr = XFRM_MODE_SKB_CB(skb)->protocol;
+
+	ipv6_change_dsfield(iph, 0, XFRM_MODE_SKB_CB(skb)->tos);
+	iph->hop_limit = XFRM_MODE_SKB_CB(skb)->ttl;
+}
+
+static inline void xfrm4_beet_make_header(struct sk_buff *skb)
+{
+	struct iphdr *iph = ip_hdr(skb);
+
+	iph->ihl = 5;
+	iph->version = 4;
+
+	iph->protocol = XFRM_MODE_SKB_CB(skb)->protocol;
+	iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
+
+	iph->id = XFRM_MODE_SKB_CB(skb)->id;
+	iph->frag_off = XFRM_MODE_SKB_CB(skb)->frag_off;
+	iph->ttl = XFRM_MODE_SKB_CB(skb)->ttl;
+}
+
+#endif

diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index be3520e..2c86a2f 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c

@@ -21,6 +21,8 @@
 #include <net/ip_tunnels.h>
 #include <net/ip6_tunnel.h>
 
+#include "xfrm_inout.h"
+
 struct xfrm_trans_tasklet {
 	struct tasklet_struct tasklet;
 	struct sk_buff_head queue;
@@ -38,8 +40,6 @@
 
 #define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))
 
-static struct kmem_cache *secpath_cachep __ro_after_init;
-
 static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
 static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1];
 
@@ -111,56 +111,24 @@
 	return ret;
 }
 
-void __secpath_destroy(struct sec_path *sp)
+struct sec_path *secpath_set(struct sk_buff *skb)
 {
-	int i;
-	for (i = 0; i < sp->len; i++)
-		xfrm_state_put(sp->xvec[i]);
-	kmem_cache_free(secpath_cachep, sp);
-}
-EXPORT_SYMBOL(__secpath_destroy);
+	struct sec_path *sp, *tmp = skb_ext_find(skb, SKB_EXT_SEC_PATH);
 
-struct sec_path *secpath_dup(struct sec_path *src)
-{
-	struct sec_path *sp;
-
-	sp = kmem_cache_alloc(secpath_cachep, GFP_ATOMIC);
+	sp = skb_ext_add(skb, SKB_EXT_SEC_PATH);
 	if (!sp)
 		return NULL;
 
-	sp->len = 0;
+	if (tmp) /* reused existing one (was COW'd if needed) */
+		return sp;
+
+	/* allocated new secpath */
+	memset(sp->ovec, 0, sizeof(sp->ovec));
 	sp->olen = 0;
+	sp->len = 0;
 
-	memset(sp->ovec, 0, sizeof(sp->ovec[XFRM_MAX_OFFLOAD_DEPTH]));
-
-	if (src) {
-		int i;
-
-		memcpy(sp, src, sizeof(*sp));
-		for (i = 0; i < sp->len; i++)
-			xfrm_state_hold(sp->xvec[i]);
-	}
-	refcount_set(&sp->refcnt, 1);
 	return sp;
 }
-EXPORT_SYMBOL(secpath_dup);
-
-int secpath_set(struct sk_buff *skb)
-{
-	struct sec_path *sp;
-
-	/* Allocate new secpath or COW existing one. */
-	if (!skb->sp || refcount_read(&skb->sp->refcnt) != 1) {
-		sp = secpath_dup(skb->sp);
-		if (!sp)
-			return -ENOMEM;
-
-		if (skb->sp)
-			secpath_put(skb->sp);
-		skb->sp = sp;
-	}
-	return 0;
-}
 EXPORT_SYMBOL(secpath_set);
 
 /* Fetch spi and seq from ipsec header */
@@ -200,35 +168,300 @@
 }
 EXPORT_SYMBOL(xfrm_parse_spi);
 
-int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
+static int xfrm4_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct xfrm_mode *inner_mode = x->inner_mode;
+	struct iphdr *iph;
+	int optlen = 0;
+	int err = -EINVAL;
+
+	if (unlikely(XFRM_MODE_SKB_CB(skb)->protocol == IPPROTO_BEETPH)) {
+		struct ip_beet_phdr *ph;
+		int phlen;
+
+		if (!pskb_may_pull(skb, sizeof(*ph)))
+			goto out;
+
+		ph = (struct ip_beet_phdr *)skb->data;
+
+		phlen = sizeof(*ph) + ph->padlen;
+		optlen = ph->hdrlen * 8 + (IPV4_BEET_PHMAXLEN - phlen);
+		if (optlen < 0 || optlen & 3 || optlen > 250)
+			goto out;
+
+		XFRM_MODE_SKB_CB(skb)->protocol = ph->nexthdr;
+
+		if (!pskb_may_pull(skb, phlen))
+			goto out;
+		__skb_pull(skb, phlen);
+	}
+
+	skb_push(skb, sizeof(*iph));
+	skb_reset_network_header(skb);
+	skb_mac_header_rebuild(skb);
+
+	xfrm4_beet_make_header(skb);
+
+	iph = ip_hdr(skb);
+
+	iph->ihl += optlen / 4;
+	iph->tot_len = htons(skb->len);
+	iph->daddr = x->sel.daddr.a4;
+	iph->saddr = x->sel.saddr.a4;
+	iph->check = 0;
+	iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
+	err = 0;
+out:
+	return err;
+}
+
+static void ipip_ecn_decapsulate(struct sk_buff *skb)
+{
+	struct iphdr *inner_iph = ipip_hdr(skb);
+
+	if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos))
+		IP_ECN_set_ce(inner_iph);
+}
+
+static int xfrm4_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err = -EINVAL;
+
+	if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP)
+		goto out;
+
+	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+		goto out;
+
+	err = skb_unclone(skb, GFP_ATOMIC);
+	if (err)
+		goto out;
+
+	if (x->props.flags & XFRM_STATE_DECAP_DSCP)
+		ipv4_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, ipip_hdr(skb));
+	if (!(x->props.flags & XFRM_STATE_NOECN))
+		ipip_ecn_decapsulate(skb);
+
+	skb_reset_network_header(skb);
+	skb_mac_header_rebuild(skb);
+	if (skb->mac_len)
+		eth_hdr(skb)->h_proto = skb->protocol;
+
+	err = 0;
+
+out:
+	return err;
+}
+
+static void ipip6_ecn_decapsulate(struct sk_buff *skb)
+{
+	struct ipv6hdr *inner_iph = ipipv6_hdr(skb);
+
+	if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos))
+		IP6_ECN_set_ce(skb, inner_iph);
+}
+
+static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err = -EINVAL;
+
+	if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
+		goto out;
+	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+		goto out;
+
+	err = skb_unclone(skb, GFP_ATOMIC);
+	if (err)
+		goto out;
+
+	if (x->props.flags & XFRM_STATE_DECAP_DSCP)
+		ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)),
+			       ipipv6_hdr(skb));
+	if (!(x->props.flags & XFRM_STATE_NOECN))
+		ipip6_ecn_decapsulate(skb);
+
+	skb_reset_network_header(skb);
+	skb_mac_header_rebuild(skb);
+	if (skb->mac_len)
+		eth_hdr(skb)->h_proto = skb->protocol;
+
+	err = 0;
+
+out:
+	return err;
+}
+
+static int xfrm6_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipv6hdr *ip6h;
+	int size = sizeof(struct ipv6hdr);
 	int err;
 
-	err = x->outer_mode->afinfo->extract_input(x, skb);
+	err = skb_cow_head(skb, size + skb->mac_len);
+	if (err)
+		goto out;
+
+	__skb_push(skb, size);
+	skb_reset_network_header(skb);
+	skb_mac_header_rebuild(skb);
+
+	xfrm6_beet_make_header(skb);
+
+	ip6h = ipv6_hdr(skb);
+	ip6h->payload_len = htons(skb->len - size);
+	ip6h->daddr = x->sel.daddr.in6;
+	ip6h->saddr = x->sel.saddr.in6;
+	err = 0;
+out:
+	return err;
+}
+
+/* Remove encapsulation header.
+ *
+ * The IP header will be moved over the top of the encapsulation
+ * header.
+ *
+ * On entry, the transport header shall point to where the IP header
+ * should be and the network header shall be set to where the IP
+ * header currently is.  skb->data shall point to the start of the
+ * payload.
+ */
+static int
+xfrm_inner_mode_encap_remove(struct xfrm_state *x,
+			     const struct xfrm_mode *inner_mode,
+			     struct sk_buff *skb)
+{
+	switch (inner_mode->encap) {
+	case XFRM_MODE_BEET:
+		if (inner_mode->family == AF_INET)
+			return xfrm4_remove_beet_encap(x, skb);
+		if (inner_mode->family == AF_INET6)
+			return xfrm6_remove_beet_encap(x, skb);
+		break;
+	case XFRM_MODE_TUNNEL:
+		if (inner_mode->family == AF_INET)
+			return xfrm4_remove_tunnel_encap(x, skb);
+		if (inner_mode->family == AF_INET6)
+			return xfrm6_remove_tunnel_encap(x, skb);
+		break;
+	}
+
+	WARN_ON_ONCE(1);
+	return -EOPNOTSUPP;
+}
+
+static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	const struct xfrm_mode *inner_mode = &x->inner_mode;
+	const struct xfrm_state_afinfo *afinfo;
+	int err = -EAFNOSUPPORT;
+
+	rcu_read_lock();
+	afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family);
+	if (likely(afinfo))
+		err = afinfo->extract_input(x, skb);
+	rcu_read_unlock();
+
 	if (err)
 		return err;
 
 	if (x->sel.family == AF_UNSPEC) {
 		inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
-		if (inner_mode == NULL)
+		if (!inner_mode)
 			return -EAFNOSUPPORT;
 	}
 
-	skb->protocol = inner_mode->afinfo->eth_proto;
-	return inner_mode->input2(x, skb);
+	switch (inner_mode->family) {
+	case AF_INET:
+		skb->protocol = htons(ETH_P_IP);
+		break;
+	case AF_INET6:
+		skb->protocol = htons(ETH_P_IPV6);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+
+	return xfrm_inner_mode_encap_remove(x, inner_mode, skb);
 }
-EXPORT_SYMBOL(xfrm_prepare_input);
+
+/* Remove encapsulation header.
+ *
+ * The IP header will be moved over the top of the encapsulation header.
+ *
+ * On entry, skb_transport_header() shall point to where the IP header
+ * should be and skb_network_header() shall be set to where the IP header
+ * currently is.  skb->data shall point to the start of the payload.
+ */
+static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int ihl = skb->data - skb_transport_header(skb);
+
+	if (skb->transport_header != skb->network_header) {
+		memmove(skb_transport_header(skb),
+			skb_network_header(skb), ihl);
+		skb->network_header = skb->transport_header;
+	}
+	ip_hdr(skb)->tot_len = htons(skb->len + ihl);
+	skb_reset_transport_header(skb);
+	return 0;
+}
+
+static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	int ihl = skb->data - skb_transport_header(skb);
+
+	if (skb->transport_header != skb->network_header) {
+		memmove(skb_transport_header(skb),
+			skb_network_header(skb), ihl);
+		skb->network_header = skb->transport_header;
+	}
+	ipv6_hdr(skb)->payload_len = htons(skb->len + ihl -
+					   sizeof(struct ipv6hdr));
+	skb_reset_transport_header(skb);
+	return 0;
+#else
+	WARN_ON_ONCE(1);
+	return -EAFNOSUPPORT;
+#endif
+}
+
+static int xfrm_inner_mode_input(struct xfrm_state *x,
+				 const struct xfrm_mode *inner_mode,
+				 struct sk_buff *skb)
+{
+	switch (inner_mode->encap) {
+	case XFRM_MODE_BEET:
+	case XFRM_MODE_TUNNEL:
+		return xfrm_prepare_input(x, skb);
+	case XFRM_MODE_TRANSPORT:
+		if (inner_mode->family == AF_INET)
+			return xfrm4_transport_input(x, skb);
+		if (inner_mode->family == AF_INET6)
+			return xfrm6_transport_input(x, skb);
+		break;
+	case XFRM_MODE_ROUTEOPTIMIZATION:
+		WARN_ON_ONCE(1);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
 
 int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 {
+	const struct xfrm_state_afinfo *afinfo;
 	struct net *net = dev_net(skb->dev);
+	const struct xfrm_mode *inner_mode;
 	int err;
 	__be32 seq;
 	__be32 seq_hi;
 	struct xfrm_state *x = NULL;
 	xfrm_address_t *daddr;
-	struct xfrm_mode *inner_mode;
 	u32 mark = skb->mark;
 	unsigned int family = AF_UNSPEC;
 	int decaps = 0;
@@ -236,6 +469,7 @@
 	bool xfrm_gro = false;
 	bool crypto_done = false;
 	struct xfrm_offload *xo = xfrm_offload(skb);
+	struct sec_path *sp;
 
 	if (encap_type < 0) {
 		x = xfrm_input_state(skb);
@@ -246,10 +480,13 @@
 			else
 				XFRM_INC_STATS(net,
 					       LINUX_MIB_XFRMINSTATEINVALID);
+
+			if (encap_type == -1)
+				dev_put(skb->dev);
 			goto drop;
 		}
 
-		family = x->outer_mode->afinfo->family;
+		family = x->outer_mode.family;
 
 		/* An encap_type of -1 indicates async resumption. */
 		if (encap_type == -1) {
@@ -312,8 +549,8 @@
 		break;
 	}
 
-	err = secpath_set(skb);
-	if (err) {
+	sp = secpath_set(skb);
+	if (!sp) {
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR);
 		goto drop;
 	}
@@ -328,7 +565,9 @@
 	daddr = (xfrm_address_t *)(skb_network_header(skb) +
 				   XFRM_SPI_SKB_CB(skb)->daddroff);
 	do {
-		if (skb->sp->len == XFRM_MAX_DEPTH) {
+		sp = skb_sec_path(skb);
+
+		if (sp->len == XFRM_MAX_DEPTH) {
 			secpath_reset(skb);
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
 			goto drop;
@@ -344,7 +583,13 @@
 
 		skb->mark = xfrm_smark_get(skb->mark, x);
 
-		skb->sp->xvec[skb->sp->len++] = x;
+		sp->xvec[sp->len++] = x;
+
+		skb_dst_force(skb);
+		if (!skb_dst(skb)) {
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR);
+			goto drop;
+		}
 
 lock:
 		spin_lock(&x->lock);
@@ -385,7 +630,6 @@
 		XFRM_SKB_CB(skb)->seq.input.low = seq;
 		XFRM_SKB_CB(skb)->seq.input.hi = seq_hi;
 
-		skb_dst_force(skb);
 		dev_hold(skb->dev);
 
 		if (crypto_done)
@@ -426,7 +670,7 @@
 
 		XFRM_MODE_SKB_CB(skb)->protocol = nexthdr;
 
-		inner_mode = x->inner_mode;
+		inner_mode = &x->inner_mode;
 
 		if (x->sel.family == AF_UNSPEC) {
 			inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
@@ -436,12 +680,12 @@
 			}
 		}
 
-		if (inner_mode->input(x, skb)) {
+		if (xfrm_inner_mode_input(x, inner_mode, skb)) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
 			goto drop;
 		}
 
-		if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
+		if (x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL) {
 			decaps = 1;
 			break;
 		}
@@ -451,7 +695,7 @@
 		 * transport mode so the outer address is identical.
 		 */
 		daddr = &x->id.daddr;
-		family = x->outer_mode->afinfo->family;
+		family = x->outer_mode.family;
 
 		err = xfrm_parse_spi(skb, nexthdr, &spi, &seq);
 		if (err < 0) {
@@ -465,11 +709,12 @@
 	if (err)
 		goto drop;
 
-	nf_reset(skb);
+	nf_reset_ct(skb);
 
 	if (decaps) {
-		if (skb->sp)
-			skb->sp->olen = 0;
+		sp = skb_sec_path(skb);
+		if (sp)
+			sp->olen = 0;
 		skb_dst_drop(skb);
 		gro_cells_receive(&gro_cells, skb);
 		return 0;
@@ -478,10 +723,16 @@
 		if (xo)
 			xfrm_gro = xo->flags & XFRM_GRO;
 
-		err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async);
+		err = -EAFNOSUPPORT;
+		rcu_read_lock();
+		afinfo = xfrm_state_afinfo_get_rcu(x->inner_mode.family);
+		if (likely(afinfo))
+			err = afinfo->transport_finish(skb, xfrm_gro || async);
+		rcu_read_unlock();
 		if (xfrm_gro) {
-			if (skb->sp)
-				skb->sp->olen = 0;
+			sp = skb_sec_path(skb);
+			if (sp)
+				sp->olen = 0;
 			skb_dst_drop(skb);
 			gro_cells_receive(&gro_cells, skb);
 			return err;
@@ -546,11 +797,6 @@
 	if (err)
 		gro_cells.cells = NULL;
 
-	secpath_cachep = kmem_cache_create("secpath_cache",
-					   sizeof(struct sec_path),
-					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
-					   NULL);
-
 	for_each_possible_cpu(i) {
 		struct xfrm_trans_tasklet *trans;
 

diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 6f05e83..0f5131b 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c

@@ -70,17 +70,28 @@
 	return NULL;
 }
 
-static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb)
+static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb,
+					    unsigned short family)
 {
 	struct xfrmi_net *xfrmn;
-	int ifindex;
 	struct xfrm_if *xi;
+	int ifindex = 0;
 
-	if (!skb->dev)
+	if (!secpath_exists(skb) || !skb->dev)
 		return NULL;
 
-	xfrmn = net_generic(dev_net(skb->dev), xfrmi_net_id);
-	ifindex = skb->dev->ifindex;
+	switch (family) {
+	case AF_INET6:
+		ifindex = inet6_sdif(skb);
+		break;
+	case AF_INET:
+		ifindex = inet_sdif(skb);
+		break;
+	}
+	if (!ifindex)
+		ifindex = skb->dev->ifindex;
+
+	xfrmn = net_generic(xs_net(xfrm_input_state(skb)), xfrmi_net_id);
 
 	for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) {
 		if (ifindex == xi->dev->ifindex &&
@@ -122,7 +133,7 @@
 	free_percpu(dev->tstats);
 }
 
-static int xfrmi_create2(struct net_device *dev)
+static int xfrmi_create(struct net_device *dev)
 {
 	struct xfrm_if *xi = netdev_priv(dev);
 	struct net *net = dev_net(dev);
@@ -134,8 +145,6 @@
 	if (err < 0)
 		goto out;
 
-	strcpy(xi->p.name, dev->name);
-
 	dev_hold(dev);
 	xfrmi_link(xfrmn, xi);
 
@@ -145,54 +154,7 @@
 	return err;
 }
 
-static struct xfrm_if *xfrmi_create(struct net *net, struct xfrm_if_parms *p)
-{
-	struct net_device *dev;
-	struct xfrm_if *xi;
-	char name[IFNAMSIZ];
-	int err;
-
-	if (p->name[0]) {
-		strlcpy(name, p->name, IFNAMSIZ);
-	} else {
-		err = -EINVAL;
-		goto failed;
-	}
-
-	dev = alloc_netdev(sizeof(*xi), name, NET_NAME_UNKNOWN, xfrmi_dev_setup);
-	if (!dev) {
-		err = -EAGAIN;
-		goto failed;
-	}
-
-	dev_net_set(dev, net);
-
-	xi = netdev_priv(dev);
-	xi->p = *p;
-	xi->net = net;
-	xi->dev = dev;
-	xi->phydev = dev_get_by_index(net, p->link);
-	if (!xi->phydev) {
-		err = -ENODEV;
-		goto failed_free;
-	}
-
-	err = xfrmi_create2(dev);
-	if (err < 0)
-		goto failed_dev_put;
-
-	return xi;
-
-failed_dev_put:
-	dev_put(xi->phydev);
-failed_free:
-	free_netdev(dev);
-failed:
-	return ERR_PTR(err);
-}
-
-static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p,
-				   int create)
+static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p)
 {
 	struct xfrm_if __rcu **xip;
 	struct xfrm_if *xi;
@@ -200,17 +162,11 @@
 
 	for (xip = &xfrmn->xfrmi[0];
 	     (xi = rtnl_dereference(*xip)) != NULL;
-	     xip = &xi->next) {
-		if (xi->p.if_id == p->if_id) {
-			if (create)
-				return ERR_PTR(-EEXIST);
-
+	     xip = &xi->next)
+		if (xi->p.if_id == p->if_id)
 			return xi;
-		}
-	}
-	if (!create)
-		return ERR_PTR(-ENODEV);
-	return xfrmi_create(net, p);
+
+	return NULL;
 }
 
 static void xfrmi_dev_uninit(struct net_device *dev)
@@ -219,7 +175,6 @@
 	struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id);
 
 	xfrmi_unlink(xfrmn, xi);
-	dev_put(xi->phydev);
 	dev_put(dev);
 }
 
@@ -230,7 +185,7 @@
 	skb->skb_iif = 0;
 	skb->ignore_df = 0;
 	skb_dst_drop(skb);
-	nf_reset(skb);
+	nf_reset_ct(skb);
 	nf_reset_trace(skb);
 
 	if (!xnet)
@@ -244,14 +199,14 @@
 
 static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
 {
+	const struct xfrm_mode *inner_mode;
 	struct pcpu_sw_netstats *tstats;
-	struct xfrm_mode *inner_mode;
 	struct net_device *dev;
 	struct xfrm_state *x;
 	struct xfrm_if *xi;
 	bool xnet;
 
-	if (err && !skb->sp)
+	if (err && !secpath_exists(skb))
 		return 0;
 
 	x = xfrm_input_state(skb);
@@ -273,7 +228,7 @@
 	xnet = !net_eq(xi->net, dev_net(skb->dev));
 
 	if (xnet) {
-		inner_mode = x->inner_mode;
+		inner_mode = &x->inner_mode;
 
 		if (x->sel.family == AF_UNSPEC) {
 			inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
@@ -285,7 +240,7 @@
 		}
 
 		if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb,
-				       inner_mode->afinfo->family))
+				       inner_mode->family))
 			return -EPERM;
 	}
 
@@ -336,7 +291,7 @@
 	if (tdev == dev) {
 		stats->collisions++;
 		net_warn_ratelimited("%s: Local routing loop detected!\n",
-				     xi->p.name);
+				     dev->name);
 		goto tx_err_dst_release;
 	}
 
@@ -406,7 +361,7 @@
 		goto tx_err;
 	}
 
-	fl.flowi_oif = xi->phydev->ifindex;
+	fl.flowi_oif = xi->p.link;
 
 	ret = xfrmi_xmit2(skb, dev, &fl);
 	if (ret < 0)
@@ -472,9 +427,9 @@
 	}
 
 	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
-		ipv4_update_pmtu(skb, net, info, 0, 0, protocol, 0);
+		ipv4_update_pmtu(skb, net, info, 0, protocol);
 	else
-		ipv4_redirect(skb, net, 0, 0, protocol, 0);
+		ipv4_redirect(skb, net, 0, protocol);
 	xfrm_state_put(x);
 
 	return 0;
@@ -547,7 +502,7 @@
 
 static int xfrmi_update(struct xfrm_if *xi, struct xfrm_if_parms *p)
 {
-	struct net *net = dev_net(xi->dev);
+	struct net *net = xi->net;
 	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
 	int err;
 
@@ -564,9 +519,6 @@
 {
 	int cpu;
 
-	if (!dev->tstats)
-		return;
-
 	for_each_possible_cpu(cpu) {
 		struct pcpu_sw_netstats *stats;
 		struct pcpu_sw_netstats tmp;
@@ -595,7 +547,7 @@
 {
 	struct xfrm_if *xi = netdev_priv(dev);
 
-	return xi->phydev->ifindex;
+	return xi->p.link;
 }
 
 
@@ -621,12 +573,14 @@
 	dev->needs_free_netdev	= true;
 	dev->priv_destructor	= xfrmi_dev_free;
 	netif_keep_dst(dev);
+
+	eth_broadcast_addr(dev->broadcast);
 }
 
 static int xfrmi_dev_init(struct net_device *dev)
 {
 	struct xfrm_if *xi = netdev_priv(dev);
-	struct net_device *phydev = xi->phydev;
+	struct net_device *phydev = __dev_get_by_index(xi->net, xi->p.link);
 	int err;
 
 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
@@ -641,13 +595,19 @@
 
 	dev->features |= NETIF_F_LLTX;
 
-	dev->needed_headroom = phydev->needed_headroom;
-	dev->needed_tailroom = phydev->needed_tailroom;
+	if (phydev) {
+		dev->needed_headroom = phydev->needed_headroom;
+		dev->needed_tailroom = phydev->needed_tailroom;
 
-	if (is_zero_ether_addr(dev->dev_addr))
-		eth_hw_addr_inherit(dev, phydev);
-	if (is_zero_ether_addr(dev->broadcast))
-		memcpy(dev->broadcast, phydev->broadcast, dev->addr_len);
+		if (is_zero_ether_addr(dev->dev_addr))
+			eth_hw_addr_inherit(dev, phydev);
+		if (is_zero_ether_addr(dev->broadcast))
+			memcpy(dev->broadcast, phydev->broadcast,
+			       dev->addr_len);
+	} else {
+		eth_hw_addr_random(dev);
+		eth_broadcast_addr(dev->broadcast);
+	}
 
 	return 0;
 }
@@ -678,21 +638,22 @@
 			struct netlink_ext_ack *extack)
 {
 	struct net *net = dev_net(dev);
-	struct xfrm_if_parms *p;
+	struct xfrm_if_parms p;
 	struct xfrm_if *xi;
+	int err;
+
+	xfrmi_netlink_parms(data, &p);
+	xi = xfrmi_locate(net, &p);
+	if (xi)
+		return -EEXIST;
 
 	xi = netdev_priv(dev);
-	p = &xi->p;
+	xi->p = p;
+	xi->net = net;
+	xi->dev = dev;
 
-	xfrmi_netlink_parms(data, p);
-
-	if (!tb[IFLA_IFNAME])
-		return -EINVAL;
-
-	nla_strlcpy(p->name, tb[IFLA_IFNAME], IFNAMSIZ);
-
-	xi = xfrmi_locate(net, p, 1);
-	return PTR_ERR_OR_ZERO(xi);
+	err = xfrmi_create(dev);
+	return err;
 }
 
 static void xfrmi_dellink(struct net_device *dev, struct list_head *head)
@@ -705,20 +666,19 @@
 			   struct netlink_ext_ack *extack)
 {
 	struct xfrm_if *xi = netdev_priv(dev);
-	struct net *net = dev_net(dev);
+	struct net *net = xi->net;
+	struct xfrm_if_parms p;
 
-	xfrmi_netlink_parms(data, &xi->p);
-
-	xi = xfrmi_locate(net, &xi->p, 0);
-
-	if (IS_ERR_OR_NULL(xi)) {
+	xfrmi_netlink_parms(data, &p);
+	xi = xfrmi_locate(net, &p);
+	if (!xi) {
 		xi = netdev_priv(dev);
 	} else {
 		if (xi->dev != dev)
 			return -EEXIST;
 	}
 
-	return xfrmi_update(xi, &xi->p);
+	return xfrmi_update(xi, &p);
 }
 
 static size_t xfrmi_get_size(const struct net_device *dev)
@@ -745,11 +705,11 @@
 	return -EMSGSIZE;
 }
 
-struct net *xfrmi_get_link_net(const struct net_device *dev)
+static struct net *xfrmi_get_link_net(const struct net_device *dev)
 {
 	struct xfrm_if *xi = netdev_priv(dev);
 
-	return dev_net(xi->phydev);
+	return xi->net;
 }
 
 static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = {
@@ -785,11 +745,6 @@
 	unregister_netdevice_many(&list);
 }
 
-static int __net_init xfrmi_init_net(struct net *net)
-{
-	return 0;
-}
-
 static void __net_exit xfrmi_exit_net(struct net *net)
 {
 	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
@@ -800,7 +755,6 @@
 }
 
 static struct pernet_operations xfrmi_net_ops = {
-	.init = xfrmi_init_net,
 	.exit = xfrmi_exit_net,
 	.id   = &xfrmi_net_id,
 	.size = sizeof(struct xfrmi_net),

diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index a00ec71..4d42244 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * IP Payload Compression Protocol (IPComp) - RFC3173.
  *
  * Copyright (c) 2003 James Morris <jmorris@intercode.com.au>
  * Copyright (c) 2003-2008 Herbert Xu <herbert@gondor.apana.org.au>
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
  * Todo:
  *   - Tunable compression parameters.
  *   - Compression stats.
@@ -89,7 +85,7 @@
 		if (dlen < len)
 			len = dlen;
 
-		frag->page_offset = 0;
+		skb_frag_off_set(frag, 0);
 		skb_frag_size_set(frag, len);
 		memcpy(skb_frag_address(frag), scratch, len);
 

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 261995d..b1db55b 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * xfrm_output.c - Common IPsec encapsulation code.
  *
  * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/errno.h>
@@ -17,9 +13,13 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <net/dst.h>
+#include <net/inet_ecn.h>
 #include <net/xfrm.h>
 
+#include "xfrm_inout.h"
+
 static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb);
+static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb);
 
 static int xfrm_skb_check_space(struct sk_buff *skb)
 {
@@ -50,6 +50,360 @@
 	return child;
 }
 
+/* Add encapsulation header.
+ *
+ * The IP header will be moved forward to make space for the encapsulation
+ * header.
+ */
+static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct iphdr *iph = ip_hdr(skb);
+	int ihl = iph->ihl * 4;
+
+	skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+
+	skb_set_network_header(skb, -x->props.header_len);
+	skb->mac_header = skb->network_header +
+			  offsetof(struct iphdr, protocol);
+	skb->transport_header = skb->network_header + ihl;
+	__skb_pull(skb, ihl);
+	memmove(skb_network_header(skb), iph, ihl);
+	return 0;
+}
+
+/* Add encapsulation header.
+ *
+ * The IP header and mutable extension headers will be moved forward to make
+ * space for the encapsulation header.
+ */
+static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	struct ipv6hdr *iph;
+	u8 *prevhdr;
+	int hdr_len;
+
+	iph = ipv6_hdr(skb);
+	skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+
+	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
+	if (hdr_len < 0)
+		return hdr_len;
+	skb_set_mac_header(skb,
+			   (prevhdr - x->props.header_len) - skb->data);
+	skb_set_network_header(skb, -x->props.header_len);
+	skb->transport_header = skb->network_header + hdr_len;
+	__skb_pull(skb, hdr_len);
+	memmove(ipv6_hdr(skb), iph, hdr_len);
+	return 0;
+#else
+	WARN_ON_ONCE(1);
+	return -EAFNOSUPPORT;
+#endif
+}
+
+/* Add route optimization header space.
+ *
+ * The IP header and mutable extension headers will be moved forward to make
+ * space for the route optimization header.
+ */
+static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	struct ipv6hdr *iph;
+	u8 *prevhdr;
+	int hdr_len;
+
+	iph = ipv6_hdr(skb);
+
+	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
+	if (hdr_len < 0)
+		return hdr_len;
+	skb_set_mac_header(skb,
+			   (prevhdr - x->props.header_len) - skb->data);
+	skb_set_network_header(skb, -x->props.header_len);
+	skb->transport_header = skb->network_header + hdr_len;
+	__skb_pull(skb, hdr_len);
+	memmove(ipv6_hdr(skb), iph, hdr_len);
+
+	x->lastused = ktime_get_real_seconds();
+
+	return 0;
+#else
+	WARN_ON_ONCE(1);
+	return -EAFNOSUPPORT;
+#endif
+}
+
+/* Add encapsulation header.
+ *
+ * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
+ */
+static int xfrm4_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ip_beet_phdr *ph;
+	struct iphdr *top_iph;
+	int hdrlen, optlen;
+
+	hdrlen = 0;
+	optlen = XFRM_MODE_SKB_CB(skb)->optlen;
+	if (unlikely(optlen))
+		hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4);
+
+	skb_set_network_header(skb, -x->props.header_len - hdrlen +
+			       (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph)));
+	if (x->sel.family != AF_INET6)
+		skb->network_header += IPV4_BEET_PHMAXLEN;
+	skb->mac_header = skb->network_header +
+			  offsetof(struct iphdr, protocol);
+	skb->transport_header = skb->network_header + sizeof(*top_iph);
+
+	xfrm4_beet_make_header(skb);
+
+	ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen);
+
+	top_iph = ip_hdr(skb);
+
+	if (unlikely(optlen)) {
+		if (WARN_ON(optlen < 0))
+			return -EINVAL;
+
+		ph->padlen = 4 - (optlen & 4);
+		ph->hdrlen = optlen / 8;
+		ph->nexthdr = top_iph->protocol;
+		if (ph->padlen)
+			memset(ph + 1, IPOPT_NOP, ph->padlen);
+
+		top_iph->protocol = IPPROTO_BEETPH;
+		top_iph->ihl = sizeof(struct iphdr) / 4;
+	}
+
+	top_iph->saddr = x->props.saddr.a4;
+	top_iph->daddr = x->id.daddr.a4;
+
+	return 0;
+}
+
+/* Add encapsulation header.
+ *
+ * The top IP header will be constructed per RFC 2401.
+ */
+static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	struct iphdr *top_iph;
+	int flags;
+
+	skb_set_inner_network_header(skb, skb_network_offset(skb));
+	skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+
+	skb_set_network_header(skb, -x->props.header_len);
+	skb->mac_header = skb->network_header +
+			  offsetof(struct iphdr, protocol);
+	skb->transport_header = skb->network_header + sizeof(*top_iph);
+	top_iph = ip_hdr(skb);
+
+	top_iph->ihl = 5;
+	top_iph->version = 4;
+
+	top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family);
+
+	/* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */
+	if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
+		top_iph->tos = 0;
+	else
+		top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
+	top_iph->tos = INET_ECN_encapsulate(top_iph->tos,
+					    XFRM_MODE_SKB_CB(skb)->tos);
+
+	flags = x->props.flags;
+	if (flags & XFRM_STATE_NOECN)
+		IP_ECN_clear(top_iph);
+
+	top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
+		0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
+
+	top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst));
+
+	top_iph->saddr = x->props.saddr.a4;
+	top_iph->daddr = x->id.daddr.a4;
+	ip_select_ident(dev_net(dst->dev), skb, NULL);
+
+	return 0;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int xfrm6_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	struct ipv6hdr *top_iph;
+	int dsfield;
+
+	skb_set_inner_network_header(skb, skb_network_offset(skb));
+	skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+
+	skb_set_network_header(skb, -x->props.header_len);
+	skb->mac_header = skb->network_header +
+			  offsetof(struct ipv6hdr, nexthdr);
+	skb->transport_header = skb->network_header + sizeof(*top_iph);
+	top_iph = ipv6_hdr(skb);
+
+	top_iph->version = 6;
+
+	memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
+	       sizeof(top_iph->flow_lbl));
+	top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family);
+
+	if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
+		dsfield = 0;
+	else
+		dsfield = XFRM_MODE_SKB_CB(skb)->tos;
+	dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos);
+	if (x->props.flags & XFRM_STATE_NOECN)
+		dsfield &= ~INET_ECN_MASK;
+	ipv6_change_dsfield(top_iph, 0, dsfield);
+	top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst));
+	top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
+	top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
+	return 0;
+}
+
+static int xfrm6_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipv6hdr *top_iph;
+	struct ip_beet_phdr *ph;
+	int optlen, hdr_len;
+
+	hdr_len = 0;
+	optlen = XFRM_MODE_SKB_CB(skb)->optlen;
+	if (unlikely(optlen))
+		hdr_len += IPV4_BEET_PHMAXLEN - (optlen & 4);
+
+	skb_set_network_header(skb, -x->props.header_len - hdr_len);
+	if (x->sel.family != AF_INET6)
+		skb->network_header += IPV4_BEET_PHMAXLEN;
+	skb->mac_header = skb->network_header +
+			  offsetof(struct ipv6hdr, nexthdr);
+	skb->transport_header = skb->network_header + sizeof(*top_iph);
+	ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdr_len);
+
+	xfrm6_beet_make_header(skb);
+
+	top_iph = ipv6_hdr(skb);
+	if (unlikely(optlen)) {
+		if (WARN_ON(optlen < 0))
+			return -EINVAL;
+
+		ph->padlen = 4 - (optlen & 4);
+		ph->hdrlen = optlen / 8;
+		ph->nexthdr = top_iph->nexthdr;
+		if (ph->padlen)
+			memset(ph + 1, IPOPT_NOP, ph->padlen);
+
+		top_iph->nexthdr = IPPROTO_BEETPH;
+	}
+
+	top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
+	top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
+	return 0;
+}
+#endif
+
+/* Add encapsulation header.
+ *
+ * On exit, the transport header will be set to the start of the
+ * encapsulation header to be filled in by x->type->output and the mac
+ * header will be set to the nextheader (protocol for IPv4) field of the
+ * extension header directly preceding the encapsulation header, or in
+ * its absence, that of the top IP header.
+ * The value of the network header will always point to the top IP header
+ * while skb->data will point to the payload.
+ */
+static int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err;
+
+	err = xfrm_inner_extract_output(x, skb);
+	if (err)
+		return err;
+
+	IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
+	skb->protocol = htons(ETH_P_IP);
+
+	switch (x->outer_mode.encap) {
+	case XFRM_MODE_BEET:
+		return xfrm4_beet_encap_add(x, skb);
+	case XFRM_MODE_TUNNEL:
+		return xfrm4_tunnel_encap_add(x, skb);
+	}
+
+	WARN_ON_ONCE(1);
+	return -EOPNOTSUPP;
+}
+
+static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	int err;
+
+	err = xfrm_inner_extract_output(x, skb);
+	if (err)
+		return err;
+
+	skb->ignore_df = 1;
+	skb->protocol = htons(ETH_P_IPV6);
+
+	switch (x->outer_mode.encap) {
+	case XFRM_MODE_BEET:
+		return xfrm6_beet_encap_add(x, skb);
+	case XFRM_MODE_TUNNEL:
+		return xfrm6_tunnel_encap_add(x, skb);
+	default:
+		WARN_ON_ONCE(1);
+		return -EOPNOTSUPP;
+	}
+#endif
+	WARN_ON_ONCE(1);
+	return -EAFNOSUPPORT;
+}
+
+static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	switch (x->outer_mode.encap) {
+	case XFRM_MODE_BEET:
+	case XFRM_MODE_TUNNEL:
+		if (x->outer_mode.family == AF_INET)
+			return xfrm4_prepare_output(x, skb);
+		if (x->outer_mode.family == AF_INET6)
+			return xfrm6_prepare_output(x, skb);
+		break;
+	case XFRM_MODE_TRANSPORT:
+		if (x->outer_mode.family == AF_INET)
+			return xfrm4_transport_output(x, skb);
+		if (x->outer_mode.family == AF_INET6)
+			return xfrm6_transport_output(x, skb);
+		break;
+	case XFRM_MODE_ROUTEOPTIMIZATION:
+		if (x->outer_mode.family == AF_INET6)
+			return xfrm6_ro_output(x, skb);
+		WARN_ON_ONCE(1);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+#if IS_ENABLED(CONFIG_NET_PKTGEN)
+int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	return xfrm_outer_mode_output(x, skb);
+}
+EXPORT_SYMBOL_GPL(pktgen_xfrm_outer_mode_output);
+#endif
+
 static int xfrm_output_one(struct sk_buff *skb, int err)
 {
 	struct dst_entry *dst = skb_dst(skb);
@@ -68,7 +422,7 @@
 
 		skb->mark = xfrm_smark_get(skb->mark, x);
 
-		err = x->outer_mode->output(x, skb);
+		err = xfrm_outer_mode_output(x, skb);
 		if (err) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR);
 			goto error_nolock;
@@ -102,6 +456,7 @@
 		skb_dst_force(skb);
 		if (!skb_dst(skb)) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
+			err = -EHOSTUNREACH;
 			goto error_nolock;
 		}
 
@@ -130,7 +485,7 @@
 		}
 		skb_dst_set(skb, dst);
 		x = dst->xfrm;
-	} while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL));
+	} while (x && !(x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL));
 
 	return 0;
 
@@ -147,7 +502,7 @@
 	struct net *net = xs_net(skb_dst(skb)->xfrm);
 
 	while (likely((err = xfrm_output_one(skb, err)) == 0)) {
-		nf_reset(skb);
+		nf_reset_ct(skb);
 
 		err = skb_dst(skb)->ops->local_out(net, skb->sk, skb);
 		if (unlikely(err != 1))
@@ -193,7 +548,7 @@
 		struct sk_buff *nskb = segs->next;
 		int err;
 
-		segs->next = NULL;
+		skb_mark_not_on_list(segs);
 		err = xfrm_output2(net, sk, segs);
 
 		if (unlikely(err)) {
@@ -218,19 +573,16 @@
 	if (xfrm_dev_offload_ok(skb, x)) {
 		struct sec_path *sp;
 
-		sp = secpath_dup(skb->sp);
+		sp = secpath_set(skb);
 		if (!sp) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
 			kfree_skb(skb);
 			return -ENOMEM;
 		}
-		if (skb->sp)
-			secpath_put(skb->sp);
-		skb->sp = sp;
 		skb->encapsulation = 1;
 
 		sp->olen++;
-		sp->xvec[skb->sp->len++] = x;
+		sp->xvec[sp->len++] = x;
 		xfrm_state_hold(x);
 
 		if (skb_is_gso(skb)) {
@@ -260,20 +612,29 @@
 }
 EXPORT_SYMBOL_GPL(xfrm_output);
 
-int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)
+static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct xfrm_mode *inner_mode;
+	const struct xfrm_state_afinfo *afinfo;
+	const struct xfrm_mode *inner_mode;
+	int err = -EAFNOSUPPORT;
+
 	if (x->sel.family == AF_UNSPEC)
 		inner_mode = xfrm_ip2inner_mode(x,
 				xfrm_af2proto(skb_dst(skb)->ops->family));
 	else
-		inner_mode = x->inner_mode;
+		inner_mode = &x->inner_mode;
 
 	if (inner_mode == NULL)
 		return -EAFNOSUPPORT;
-	return inner_mode->afinfo->extract_output(x, skb);
+
+	rcu_read_lock();
+	afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family);
+	if (likely(afinfo))
+		err = afinfo->extract_output(x, skb);
+	rcu_read_unlock();
+
+	return err;
 }
-EXPORT_SYMBOL_GPL(xfrm_inner_extract_output);
 
 void xfrm_local_error(struct sk_buff *skb, int mtu)
 {

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 119a427..f2d1e57 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * xfrm_policy.c
  *
@@ -26,10 +27,15 @@
 #include <linux/cache.h>
 #include <linux/cpu.h>
 #include <linux/audit.h>
+#include <linux/rhashtable.h>
+#include <linux/if_tunnel.h>
 #include <net/dst.h>
 #include <net/flow.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+#include <net/mip6.h>
+#endif
 #ifdef CONFIG_XFRM_STATISTICS
 #include <net/snmp.h>
 #endif
@@ -45,6 +51,99 @@
 	u8 flags;
 };
 
+/* prefixes smaller than this are stored in lists, not trees. */
+#define INEXACT_PREFIXLEN_IPV4	16
+#define INEXACT_PREFIXLEN_IPV6	48
+
+struct xfrm_pol_inexact_node {
+	struct rb_node node;
+	union {
+		xfrm_address_t addr;
+		struct rcu_head rcu;
+	};
+	u8 prefixlen;
+
+	struct rb_root root;
+
+	/* the policies matching this node, can be empty list */
+	struct hlist_head hhead;
+};
+
+/* xfrm inexact policy search tree:
+ * xfrm_pol_inexact_bin = hash(dir,type,family,if_id);
+ *  |
+ * +---- root_d: sorted by daddr:prefix
+ * |                 |
+ * |        xfrm_pol_inexact_node
+ * |                 |
+ * |                 +- root: sorted by saddr/prefix
+ * |                 |              |
+ * |                 |         xfrm_pol_inexact_node
+ * |                 |              |
+ * |                 |              + root: unused
+ * |                 |              |
+ * |                 |              + hhead: saddr:daddr policies
+ * |                 |
+ * |                 +- coarse policies and all any:daddr policies
+ * |
+ * +---- root_s: sorted by saddr:prefix
+ * |                 |
+ * |        xfrm_pol_inexact_node
+ * |                 |
+ * |                 + root: unused
+ * |                 |
+ * |                 + hhead: saddr:any policies
+ * |
+ * +---- coarse policies and all any:any policies
+ *
+ * Lookups return four candidate lists:
+ * 1. any:any list from top-level xfrm_pol_inexact_bin
+ * 2. any:daddr list from daddr tree
+ * 3. saddr:daddr list from 2nd level daddr tree
+ * 4. saddr:any list from saddr tree
+ *
+ * This result set then needs to be searched for the policy with
+ * the lowest priority.  If two results have same prio, youngest one wins.
+ */
+
+struct xfrm_pol_inexact_key {
+	possible_net_t net;
+	u32 if_id;
+	u16 family;
+	u8 dir, type;
+};
+
+struct xfrm_pol_inexact_bin {
+	struct xfrm_pol_inexact_key k;
+	struct rhash_head head;
+	/* list containing '*:*' policies */
+	struct hlist_head hhead;
+
+	seqcount_t count;
+	/* tree sorted by daddr/prefix */
+	struct rb_root root_d;
+
+	/* tree sorted by saddr/prefix */
+	struct rb_root root_s;
+
+	/* slow path below */
+	struct list_head inexact_bins;
+	struct rcu_head rcu;
+};
+
+enum xfrm_pol_inexact_candidate_type {
+	XFRM_POL_CAND_BOTH,
+	XFRM_POL_CAND_SADDR,
+	XFRM_POL_CAND_DADDR,
+	XFRM_POL_CAND_ANY,
+
+	XFRM_POL_CAND_MAX,
+};
+
+struct xfrm_pol_inexact_candidates {
+	struct hlist_head *res[XFRM_POL_CAND_MAX];
+};
+
 static DEFINE_SPINLOCK(xfrm_if_cb_lock);
 static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly;
 
@@ -55,6 +154,9 @@
 static struct kmem_cache *xfrm_dst_cache __ro_after_init;
 static __read_mostly seqcount_t xfrm_policy_hash_generation;
 
+static struct rhashtable xfrm_policy_inexact_table;
+static const struct rhashtable_params xfrm_pol_inexact_params;
+
 static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr);
 static int stale_bundle(struct dst_entry *dst);
 static int xfrm_bundle_ok(struct xfrm_dst *xdst);
@@ -64,6 +166,25 @@
 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
 						int dir);
 
+static struct xfrm_pol_inexact_bin *
+xfrm_policy_inexact_lookup(struct net *net, u8 type, u16 family, u8 dir,
+			   u32 if_id);
+
+static struct xfrm_pol_inexact_bin *
+xfrm_policy_inexact_lookup_rcu(struct net *net,
+			       u8 type, u16 family, u8 dir, u32 if_id);
+static struct xfrm_policy *
+xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy,
+			bool excl);
+static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
+					    struct xfrm_policy *policy);
+
+static bool
+xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand,
+				    struct xfrm_pol_inexact_bin *b,
+				    const xfrm_address_t *saddr,
+				    const xfrm_address_t *daddr);
+
 static inline bool xfrm_pol_hold_rcu(struct xfrm_policy *policy)
 {
 	return refcount_inc_not_zero(&policy->refcnt);
@@ -269,6 +390,7 @@
 	if (policy) {
 		write_pnet(&policy->xp_net, net);
 		INIT_LIST_HEAD(&policy->walk.all);
+		INIT_HLIST_NODE(&policy->bydst_inexact_list);
 		INIT_HLIST_NODE(&policy->bydst);
 		INIT_HLIST_NODE(&policy->byidx);
 		rwlock_init(&policy->lock);
@@ -365,7 +487,7 @@
 	hash = __sel_hash(sel, family, hmask, dbits, sbits);
 
 	if (hash == hmask + 1)
-		return &net->xfrm.policy_inexact[dir];
+		return NULL;
 
 	return rcu_dereference_check(net->xfrm.policy_bydst[dir].table,
 		     lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash;
@@ -463,9 +585,6 @@
 	odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
 				lockdep_is_held(&net->xfrm.xfrm_policy_lock));
 
-	odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
-				lockdep_is_held(&net->xfrm.xfrm_policy_lock));
-
 	for (i = hmask; i >= 0; i--)
 		xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir);
 
@@ -563,6 +682,526 @@
 	mutex_unlock(&hash_resize_mutex);
 }
 
+/* Make sure *pol can be inserted into fastbin.
+ * Useful to check that later insert requests will be sucessful
+ * (provided xfrm_policy_lock is held throughout).
+ */
+static struct xfrm_pol_inexact_bin *
+xfrm_policy_inexact_alloc_bin(const struct xfrm_policy *pol, u8 dir)
+{
+	struct xfrm_pol_inexact_bin *bin, *prev;
+	struct xfrm_pol_inexact_key k = {
+		.family = pol->family,
+		.type = pol->type,
+		.dir = dir,
+		.if_id = pol->if_id,
+	};
+	struct net *net = xp_net(pol);
+
+	lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
+
+	write_pnet(&k.net, net);
+	bin = rhashtable_lookup_fast(&xfrm_policy_inexact_table, &k,
+				     xfrm_pol_inexact_params);
+	if (bin)
+		return bin;
+
+	bin = kzalloc(sizeof(*bin), GFP_ATOMIC);
+	if (!bin)
+		return NULL;
+
+	bin->k = k;
+	INIT_HLIST_HEAD(&bin->hhead);
+	bin->root_d = RB_ROOT;
+	bin->root_s = RB_ROOT;
+	seqcount_init(&bin->count);
+
+	prev = rhashtable_lookup_get_insert_key(&xfrm_policy_inexact_table,
+						&bin->k, &bin->head,
+						xfrm_pol_inexact_params);
+	if (!prev) {
+		list_add(&bin->inexact_bins, &net->xfrm.inexact_bins);
+		return bin;
+	}
+
+	kfree(bin);
+
+	return IS_ERR(prev) ? NULL : prev;
+}
+
+static bool xfrm_pol_inexact_addr_use_any_list(const xfrm_address_t *addr,
+					       int family, u8 prefixlen)
+{
+	if (xfrm_addr_any(addr, family))
+		return true;
+
+	if (family == AF_INET6 && prefixlen < INEXACT_PREFIXLEN_IPV6)
+		return true;
+
+	if (family == AF_INET && prefixlen < INEXACT_PREFIXLEN_IPV4)
+		return true;
+
+	return false;
+}
+
+static bool
+xfrm_policy_inexact_insert_use_any_list(const struct xfrm_policy *policy)
+{
+	const xfrm_address_t *addr;
+	bool saddr_any, daddr_any;
+	u8 prefixlen;
+
+	addr = &policy->selector.saddr;
+	prefixlen = policy->selector.prefixlen_s;
+
+	saddr_any = xfrm_pol_inexact_addr_use_any_list(addr,
+						       policy->family,
+						       prefixlen);
+	addr = &policy->selector.daddr;
+	prefixlen = policy->selector.prefixlen_d;
+	daddr_any = xfrm_pol_inexact_addr_use_any_list(addr,
+						       policy->family,
+						       prefixlen);
+	return saddr_any && daddr_any;
+}
+
+static void xfrm_pol_inexact_node_init(struct xfrm_pol_inexact_node *node,
+				       const xfrm_address_t *addr, u8 prefixlen)
+{
+	node->addr = *addr;
+	node->prefixlen = prefixlen;
+}
+
+static struct xfrm_pol_inexact_node *
+xfrm_pol_inexact_node_alloc(const xfrm_address_t *addr, u8 prefixlen)
+{
+	struct xfrm_pol_inexact_node *node;
+
+	node = kzalloc(sizeof(*node), GFP_ATOMIC);
+	if (node)
+		xfrm_pol_inexact_node_init(node, addr, prefixlen);
+
+	return node;
+}
+
+static int xfrm_policy_addr_delta(const xfrm_address_t *a,
+				  const xfrm_address_t *b,
+				  u8 prefixlen, u16 family)
+{
+	unsigned int pdw, pbi;
+	int delta = 0;
+
+	switch (family) {
+	case AF_INET:
+		if (sizeof(long) == 4 && prefixlen == 0)
+			return ntohl(a->a4) - ntohl(b->a4);
+		return (ntohl(a->a4) & ((~0UL << (32 - prefixlen)))) -
+		       (ntohl(b->a4) & ((~0UL << (32 - prefixlen))));
+	case AF_INET6:
+		pdw = prefixlen >> 5;
+		pbi = prefixlen & 0x1f;
+
+		if (pdw) {
+			delta = memcmp(a->a6, b->a6, pdw << 2);
+			if (delta)
+				return delta;
+		}
+		if (pbi) {
+			u32 mask = ~0u << (32 - pbi);
+
+			delta = (ntohl(a->a6[pdw]) & mask) -
+				(ntohl(b->a6[pdw]) & mask);
+		}
+		break;
+	default:
+		break;
+	}
+
+	return delta;
+}
+
+static void xfrm_policy_inexact_list_reinsert(struct net *net,
+					      struct xfrm_pol_inexact_node *n,
+					      u16 family)
+{
+	unsigned int matched_s, matched_d;
+	struct xfrm_policy *policy, *p;
+
+	matched_s = 0;
+	matched_d = 0;
+
+	list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
+		struct hlist_node *newpos = NULL;
+		bool matches_s, matches_d;
+
+		if (!policy->bydst_reinsert)
+			continue;
+
+		WARN_ON_ONCE(policy->family != family);
+
+		policy->bydst_reinsert = false;
+		hlist_for_each_entry(p, &n->hhead, bydst) {
+			if (policy->priority > p->priority)
+				newpos = &p->bydst;
+			else if (policy->priority == p->priority &&
+				 policy->pos > p->pos)
+				newpos = &p->bydst;
+			else
+				break;
+		}
+
+		if (newpos)
+			hlist_add_behind_rcu(&policy->bydst, newpos);
+		else
+			hlist_add_head_rcu(&policy->bydst, &n->hhead);
+
+		/* paranoia checks follow.
+		 * Check that the reinserted policy matches at least
+		 * saddr or daddr for current node prefix.
+		 *
+		 * Matching both is fine, matching saddr in one policy
+		 * (but not daddr) and then matching only daddr in another
+		 * is a bug.
+		 */
+		matches_s = xfrm_policy_addr_delta(&policy->selector.saddr,
+						   &n->addr,
+						   n->prefixlen,
+						   family) == 0;
+		matches_d = xfrm_policy_addr_delta(&policy->selector.daddr,
+						   &n->addr,
+						   n->prefixlen,
+						   family) == 0;
+		if (matches_s && matches_d)
+			continue;
+
+		WARN_ON_ONCE(!matches_s && !matches_d);
+		if (matches_s)
+			matched_s++;
+		if (matches_d)
+			matched_d++;
+		WARN_ON_ONCE(matched_s && matched_d);
+	}
+}
+
+static void xfrm_policy_inexact_node_reinsert(struct net *net,
+					      struct xfrm_pol_inexact_node *n,
+					      struct rb_root *new,
+					      u16 family)
+{
+	struct xfrm_pol_inexact_node *node;
+	struct rb_node **p, *parent;
+
+	/* we should not have another subtree here */
+	WARN_ON_ONCE(!RB_EMPTY_ROOT(&n->root));
+restart:
+	parent = NULL;
+	p = &new->rb_node;
+	while (*p) {
+		u8 prefixlen;
+		int delta;
+
+		parent = *p;
+		node = rb_entry(*p, struct xfrm_pol_inexact_node, node);
+
+		prefixlen = min(node->prefixlen, n->prefixlen);
+
+		delta = xfrm_policy_addr_delta(&n->addr, &node->addr,
+					       prefixlen, family);
+		if (delta < 0) {
+			p = &parent->rb_left;
+		} else if (delta > 0) {
+			p = &parent->rb_right;
+		} else {
+			bool same_prefixlen = node->prefixlen == n->prefixlen;
+			struct xfrm_policy *tmp;
+
+			hlist_for_each_entry(tmp, &n->hhead, bydst) {
+				tmp->bydst_reinsert = true;
+				hlist_del_rcu(&tmp->bydst);
+			}
+
+			node->prefixlen = prefixlen;
+
+			xfrm_policy_inexact_list_reinsert(net, node, family);
+
+			if (same_prefixlen) {
+				kfree_rcu(n, rcu);
+				return;
+			}
+
+			rb_erase(*p, new);
+			kfree_rcu(n, rcu);
+			n = node;
+			goto restart;
+		}
+	}
+
+	rb_link_node_rcu(&n->node, parent, p);
+	rb_insert_color(&n->node, new);
+}
+
+/* merge nodes v and n */
+static void xfrm_policy_inexact_node_merge(struct net *net,
+					   struct xfrm_pol_inexact_node *v,
+					   struct xfrm_pol_inexact_node *n,
+					   u16 family)
+{
+	struct xfrm_pol_inexact_node *node;
+	struct xfrm_policy *tmp;
+	struct rb_node *rnode;
+
+	/* To-be-merged node v has a subtree.
+	 *
+	 * Dismantle it and insert its nodes to n->root.
+	 */
+	while ((rnode = rb_first(&v->root)) != NULL) {
+		node = rb_entry(rnode, struct xfrm_pol_inexact_node, node);
+		rb_erase(&node->node, &v->root);
+		xfrm_policy_inexact_node_reinsert(net, node, &n->root,
+						  family);
+	}
+
+	hlist_for_each_entry(tmp, &v->hhead, bydst) {
+		tmp->bydst_reinsert = true;
+		hlist_del_rcu(&tmp->bydst);
+	}
+
+	xfrm_policy_inexact_list_reinsert(net, n, family);
+}
+
+static struct xfrm_pol_inexact_node *
+xfrm_policy_inexact_insert_node(struct net *net,
+				struct rb_root *root,
+				xfrm_address_t *addr,
+				u16 family, u8 prefixlen, u8 dir)
+{
+	struct xfrm_pol_inexact_node *cached = NULL;
+	struct rb_node **p, *parent = NULL;
+	struct xfrm_pol_inexact_node *node;
+
+	p = &root->rb_node;
+	while (*p) {
+		int delta;
+
+		parent = *p;
+		node = rb_entry(*p, struct xfrm_pol_inexact_node, node);
+
+		delta = xfrm_policy_addr_delta(addr, &node->addr,
+					       node->prefixlen,
+					       family);
+		if (delta == 0 && prefixlen >= node->prefixlen) {
+			WARN_ON_ONCE(cached); /* ipsec policies got lost */
+			return node;
+		}
+
+		if (delta < 0)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+
+		if (prefixlen < node->prefixlen) {
+			delta = xfrm_policy_addr_delta(addr, &node->addr,
+						       prefixlen,
+						       family);
+			if (delta)
+				continue;
+
+			/* This node is a subnet of the new prefix. It needs
+			 * to be removed and re-inserted with the smaller
+			 * prefix and all nodes that are now also covered
+			 * by the reduced prefixlen.
+			 */
+			rb_erase(&node->node, root);
+
+			if (!cached) {
+				xfrm_pol_inexact_node_init(node, addr,
+							   prefixlen);
+				cached = node;
+			} else {
+				/* This node also falls within the new
+				 * prefixlen. Merge the to-be-reinserted
+				 * node and this one.
+				 */
+				xfrm_policy_inexact_node_merge(net, node,
+							       cached, family);
+				kfree_rcu(node, rcu);
+			}
+
+			/* restart */
+			p = &root->rb_node;
+			parent = NULL;
+		}
+	}
+
+	node = cached;
+	if (!node) {
+		node = xfrm_pol_inexact_node_alloc(addr, prefixlen);
+		if (!node)
+			return NULL;
+	}
+
+	rb_link_node_rcu(&node->node, parent, p);
+	rb_insert_color(&node->node, root);
+
+	return node;
+}
+
+static void xfrm_policy_inexact_gc_tree(struct rb_root *r, bool rm)
+{
+	struct xfrm_pol_inexact_node *node;
+	struct rb_node *rn = rb_first(r);
+
+	while (rn) {
+		node = rb_entry(rn, struct xfrm_pol_inexact_node, node);
+
+		xfrm_policy_inexact_gc_tree(&node->root, rm);
+		rn = rb_next(rn);
+
+		if (!hlist_empty(&node->hhead) || !RB_EMPTY_ROOT(&node->root)) {
+			WARN_ON_ONCE(rm);
+			continue;
+		}
+
+		rb_erase(&node->node, r);
+		kfree_rcu(node, rcu);
+	}
+}
+
+static void __xfrm_policy_inexact_prune_bin(struct xfrm_pol_inexact_bin *b, bool net_exit)
+{
+	write_seqcount_begin(&b->count);
+	xfrm_policy_inexact_gc_tree(&b->root_d, net_exit);
+	xfrm_policy_inexact_gc_tree(&b->root_s, net_exit);
+	write_seqcount_end(&b->count);
+
+	if (!RB_EMPTY_ROOT(&b->root_d) || !RB_EMPTY_ROOT(&b->root_s) ||
+	    !hlist_empty(&b->hhead)) {
+		WARN_ON_ONCE(net_exit);
+		return;
+	}
+
+	if (rhashtable_remove_fast(&xfrm_policy_inexact_table, &b->head,
+				   xfrm_pol_inexact_params) == 0) {
+		list_del(&b->inexact_bins);
+		kfree_rcu(b, rcu);
+	}
+}
+
+static void xfrm_policy_inexact_prune_bin(struct xfrm_pol_inexact_bin *b)
+{
+	struct net *net = read_pnet(&b->k.net);
+
+	spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+	__xfrm_policy_inexact_prune_bin(b, false);
+	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+}
+
+static void __xfrm_policy_inexact_flush(struct net *net)
+{
+	struct xfrm_pol_inexact_bin *bin, *t;
+
+	lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
+
+	list_for_each_entry_safe(bin, t, &net->xfrm.inexact_bins, inexact_bins)
+		__xfrm_policy_inexact_prune_bin(bin, false);
+}
+
+static struct hlist_head *
+xfrm_policy_inexact_alloc_chain(struct xfrm_pol_inexact_bin *bin,
+				struct xfrm_policy *policy, u8 dir)
+{
+	struct xfrm_pol_inexact_node *n;
+	struct net *net;
+
+	net = xp_net(policy);
+	lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
+
+	if (xfrm_policy_inexact_insert_use_any_list(policy))
+		return &bin->hhead;
+
+	if (xfrm_pol_inexact_addr_use_any_list(&policy->selector.daddr,
+					       policy->family,
+					       policy->selector.prefixlen_d)) {
+		write_seqcount_begin(&bin->count);
+		n = xfrm_policy_inexact_insert_node(net,
+						    &bin->root_s,
+						    &policy->selector.saddr,
+						    policy->family,
+						    policy->selector.prefixlen_s,
+						    dir);
+		write_seqcount_end(&bin->count);
+		if (!n)
+			return NULL;
+
+		return &n->hhead;
+	}
+
+	/* daddr is fixed */
+	write_seqcount_begin(&bin->count);
+	n = xfrm_policy_inexact_insert_node(net,
+					    &bin->root_d,
+					    &policy->selector.daddr,
+					    policy->family,
+					    policy->selector.prefixlen_d, dir);
+	write_seqcount_end(&bin->count);
+	if (!n)
+		return NULL;
+
+	/* saddr is wildcard */
+	if (xfrm_pol_inexact_addr_use_any_list(&policy->selector.saddr,
+					       policy->family,
+					       policy->selector.prefixlen_s))
+		return &n->hhead;
+
+	write_seqcount_begin(&bin->count);
+	n = xfrm_policy_inexact_insert_node(net,
+					    &n->root,
+					    &policy->selector.saddr,
+					    policy->family,
+					    policy->selector.prefixlen_s, dir);
+	write_seqcount_end(&bin->count);
+	if (!n)
+		return NULL;
+
+	return &n->hhead;
+}
+
+static struct xfrm_policy *
+xfrm_policy_inexact_insert(struct xfrm_policy *policy, u8 dir, int excl)
+{
+	struct xfrm_pol_inexact_bin *bin;
+	struct xfrm_policy *delpol;
+	struct hlist_head *chain;
+	struct net *net;
+
+	bin = xfrm_policy_inexact_alloc_bin(policy, dir);
+	if (!bin)
+		return ERR_PTR(-ENOMEM);
+
+	net = xp_net(policy);
+	lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
+
+	chain = xfrm_policy_inexact_alloc_chain(bin, policy, dir);
+	if (!chain) {
+		__xfrm_policy_inexact_prune_bin(bin, false);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	delpol = xfrm_policy_insert_list(chain, policy, excl);
+	if (delpol && excl) {
+		__xfrm_policy_inexact_prune_bin(bin, false);
+		return ERR_PTR(-EEXIST);
+	}
+
+	chain = &net->xfrm.policy_inexact[dir];
+	xfrm_policy_insert_inexact_list(chain, policy);
+
+	if (delpol)
+		__xfrm_policy_inexact_prune_bin(bin, false);
+
+	return delpol;
+}
+
 static void xfrm_hash_rebuild(struct work_struct *work)
 {
 	struct net *net = container_of(work, struct net,
@@ -591,14 +1230,66 @@
 	} while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
 
 	spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+	write_seqcount_begin(&xfrm_policy_hash_generation);
+
+	/* make sure that we can insert the indirect policies again before
+	 * we start with destructive action.
+	 */
+	list_for_each_entry(policy, &net->xfrm.policy_all, walk.all) {
+		struct xfrm_pol_inexact_bin *bin;
+		u8 dbits, sbits;
+
+		dir = xfrm_policy_id2dir(policy->index);
+		if (policy->walk.dead || dir >= XFRM_POLICY_MAX)
+			continue;
+
+		if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
+			if (policy->family == AF_INET) {
+				dbits = rbits4;
+				sbits = lbits4;
+			} else {
+				dbits = rbits6;
+				sbits = lbits6;
+			}
+		} else {
+			if (policy->family == AF_INET) {
+				dbits = lbits4;
+				sbits = rbits4;
+			} else {
+				dbits = lbits6;
+				sbits = rbits6;
+			}
+		}
+
+		if (policy->selector.prefixlen_d < dbits ||
+		    policy->selector.prefixlen_s < sbits)
+			continue;
+
+		bin = xfrm_policy_inexact_alloc_bin(policy, dir);
+		if (!bin)
+			goto out_unlock;
+
+		if (!xfrm_policy_inexact_alloc_chain(bin, policy, dir))
+			goto out_unlock;
+	}
 
 	/* reset the bydst and inexact table in all directions */
 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
-		INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
+		struct hlist_node *n;
+
+		hlist_for_each_entry_safe(policy, n,
+					  &net->xfrm.policy_inexact[dir],
+					  bydst_inexact_list) {
+			hlist_del_rcu(&policy->bydst);
+			hlist_del_init(&policy->bydst_inexact_list);
+		}
+
 		hmask = net->xfrm.policy_bydst[dir].hmask;
 		odst = net->xfrm.policy_bydst[dir].table;
-		for (i = hmask; i >= 0; i--)
-			INIT_HLIST_HEAD(odst + i);
+		for (i = hmask; i >= 0; i--) {
+			hlist_for_each_entry_safe(policy, n, odst + i, bydst)
+				hlist_del_rcu(&policy->bydst);
+		}
 		if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
 			/* dir out => dst = remote, src = local */
 			net->xfrm.policy_bydst[dir].dbits4 = rbits4;
@@ -616,15 +1307,24 @@
 
 	/* re-insert all policies by order of creation */
 	list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
-		if (policy->walk.dead ||
-		    xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) {
+		if (policy->walk.dead)
+			continue;
+		dir = xfrm_policy_id2dir(policy->index);
+		if (dir >= XFRM_POLICY_MAX) {
 			/* skip socket policies */
 			continue;
 		}
 		newpos = NULL;
 		chain = policy_hash_bysel(net, &policy->selector,
-					  policy->family,
-					  xfrm_policy_id2dir(policy->index));
+					  policy->family, dir);
+
+		if (!chain) {
+			void *p = xfrm_policy_inexact_insert(policy, dir, 0);
+
+			WARN_ONCE(IS_ERR(p), "reinsert: %ld\n", PTR_ERR(p));
+			continue;
+		}
+
 		hlist_for_each_entry(pol, chain, bydst) {
 			if (policy->priority >= pol->priority)
 				newpos = &pol->bydst;
@@ -637,6 +1337,9 @@
 			hlist_add_head_rcu(&policy->bydst, chain);
 	}
 
+out_unlock:
+	__xfrm_policy_inexact_flush(net);
+	write_seqcount_end(&xfrm_policy_hash_generation);
 	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 
 	mutex_unlock(&hash_resize_mutex);
@@ -740,18 +1443,97 @@
 	return false;
 }
 
-int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
+static u32 xfrm_pol_bin_key(const void *data, u32 len, u32 seed)
 {
-	struct net *net = xp_net(policy);
-	struct xfrm_policy *pol;
-	struct xfrm_policy *delpol;
-	struct hlist_head *chain;
-	struct hlist_node *newpos;
+	const struct xfrm_pol_inexact_key *k = data;
+	u32 a = k->type << 24 | k->dir << 16 | k->family;
 
-	spin_lock_bh(&net->xfrm.xfrm_policy_lock);
-	chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
-	delpol = NULL;
-	newpos = NULL;
+	return jhash_3words(a, k->if_id, net_hash_mix(read_pnet(&k->net)),
+			    seed);
+}
+
+static u32 xfrm_pol_bin_obj(const void *data, u32 len, u32 seed)
+{
+	const struct xfrm_pol_inexact_bin *b = data;
+
+	return xfrm_pol_bin_key(&b->k, 0, seed);
+}
+
+static int xfrm_pol_bin_cmp(struct rhashtable_compare_arg *arg,
+			    const void *ptr)
+{
+	const struct xfrm_pol_inexact_key *key = arg->key;
+	const struct xfrm_pol_inexact_bin *b = ptr;
+	int ret;
+
+	if (!net_eq(read_pnet(&b->k.net), read_pnet(&key->net)))
+		return -1;
+
+	ret = b->k.dir ^ key->dir;
+	if (ret)
+		return ret;
+
+	ret = b->k.type ^ key->type;
+	if (ret)
+		return ret;
+
+	ret = b->k.family ^ key->family;
+	if (ret)
+		return ret;
+
+	return b->k.if_id ^ key->if_id;
+}
+
+static const struct rhashtable_params xfrm_pol_inexact_params = {
+	.head_offset		= offsetof(struct xfrm_pol_inexact_bin, head),
+	.hashfn			= xfrm_pol_bin_key,
+	.obj_hashfn		= xfrm_pol_bin_obj,
+	.obj_cmpfn		= xfrm_pol_bin_cmp,
+	.automatic_shrinking	= true,
+};
+
+static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
+					    struct xfrm_policy *policy)
+{
+	struct xfrm_policy *pol, *delpol = NULL;
+	struct hlist_node *newpos = NULL;
+	int i = 0;
+
+	hlist_for_each_entry(pol, chain, bydst_inexact_list) {
+		if (pol->type == policy->type &&
+		    pol->if_id == policy->if_id &&
+		    !selector_cmp(&pol->selector, &policy->selector) &&
+		    xfrm_policy_mark_match(policy, pol) &&
+		    xfrm_sec_ctx_match(pol->security, policy->security) &&
+		    !WARN_ON(delpol)) {
+			delpol = pol;
+			if (policy->priority > pol->priority)
+				continue;
+		} else if (policy->priority >= pol->priority) {
+			newpos = &pol->bydst_inexact_list;
+			continue;
+		}
+		if (delpol)
+			break;
+	}
+
+	if (newpos)
+		hlist_add_behind_rcu(&policy->bydst_inexact_list, newpos);
+	else
+		hlist_add_head_rcu(&policy->bydst_inexact_list, chain);
+
+	hlist_for_each_entry(pol, chain, bydst_inexact_list) {
+		pol->pos = i;
+		i++;
+	}
+}
+
+static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain,
+						   struct xfrm_policy *policy,
+						   bool excl)
+{
+	struct xfrm_policy *pol, *newpos = NULL, *delpol = NULL;
+
 	hlist_for_each_entry(pol, chain, bydst) {
 		if (pol->type == policy->type &&
 		    pol->if_id == policy->if_id &&
@@ -759,24 +1541,45 @@
 		    xfrm_policy_mark_match(policy, pol) &&
 		    xfrm_sec_ctx_match(pol->security, policy->security) &&
 		    !WARN_ON(delpol)) {
-			if (excl) {
-				spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
-				return -EEXIST;
-			}
+			if (excl)
+				return ERR_PTR(-EEXIST);
 			delpol = pol;
 			if (policy->priority > pol->priority)
 				continue;
 		} else if (policy->priority >= pol->priority) {
-			newpos = &pol->bydst;
+			newpos = pol;
 			continue;
 		}
 		if (delpol)
 			break;
 	}
+
 	if (newpos)
-		hlist_add_behind_rcu(&policy->bydst, newpos);
+		hlist_add_behind_rcu(&policy->bydst, &newpos->bydst);
 	else
 		hlist_add_head_rcu(&policy->bydst, chain);
+
+	return delpol;
+}
+
+int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
+{
+	struct net *net = xp_net(policy);
+	struct xfrm_policy *delpol;
+	struct hlist_head *chain;
+
+	spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+	chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
+	if (chain)
+		delpol = xfrm_policy_insert_list(chain, policy, excl);
+	else
+		delpol = xfrm_policy_inexact_insert(policy, dir, excl);
+
+	if (IS_ERR(delpol)) {
+		spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+		return PTR_ERR(delpol);
+	}
+
 	__xfrm_policy_link(policy, dir);
 
 	/* After previous checking, family can either be AF_INET or AF_INET6 */
@@ -806,43 +1609,96 @@
 }
 EXPORT_SYMBOL(xfrm_policy_insert);
 
+static struct xfrm_policy *
+__xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id,
+			u8 type, int dir,
+			struct xfrm_selector *sel,
+			struct xfrm_sec_ctx *ctx)
+{
+	struct xfrm_policy *pol;
+
+	if (!chain)
+		return NULL;
+
+	hlist_for_each_entry(pol, chain, bydst) {
+		if (pol->type == type &&
+		    pol->if_id == if_id &&
+		    (mark & pol->mark.m) == pol->mark.v &&
+		    !selector_cmp(sel, &pol->selector) &&
+		    xfrm_sec_ctx_match(ctx, pol->security))
+			return pol;
+	}
+
+	return NULL;
+}
+
 struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
 					  u8 type, int dir,
 					  struct xfrm_selector *sel,
 					  struct xfrm_sec_ctx *ctx, int delete,
 					  int *err)
 {
-	struct xfrm_policy *pol, *ret;
+	struct xfrm_pol_inexact_bin *bin = NULL;
+	struct xfrm_policy *pol, *ret = NULL;
 	struct hlist_head *chain;
 
 	*err = 0;
 	spin_lock_bh(&net->xfrm.xfrm_policy_lock);
 	chain = policy_hash_bysel(net, sel, sel->family, dir);
-	ret = NULL;
-	hlist_for_each_entry(pol, chain, bydst) {
-		if (pol->type == type &&
-		    pol->if_id == if_id &&
-		    (mark & pol->mark.m) == pol->mark.v &&
-		    !selector_cmp(sel, &pol->selector) &&
-		    xfrm_sec_ctx_match(ctx, pol->security)) {
-			xfrm_pol_hold(pol);
-			if (delete) {
-				*err = security_xfrm_policy_delete(
-								pol->security);
-				if (*err) {
-					spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
-					return pol;
-				}
-				__xfrm_policy_unlink(pol, dir);
-			}
-			ret = pol;
-			break;
+	if (!chain) {
+		struct xfrm_pol_inexact_candidates cand;
+		int i;
+
+		bin = xfrm_policy_inexact_lookup(net, type,
+						 sel->family, dir, if_id);
+		if (!bin) {
+			spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+			return NULL;
 		}
+
+		if (!xfrm_policy_find_inexact_candidates(&cand, bin,
+							 &sel->saddr,
+							 &sel->daddr)) {
+			spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+			return NULL;
+		}
+
+		pol = NULL;
+		for (i = 0; i < ARRAY_SIZE(cand.res); i++) {
+			struct xfrm_policy *tmp;
+
+			tmp = __xfrm_policy_bysel_ctx(cand.res[i], mark,
+						      if_id, type, dir,
+						      sel, ctx);
+			if (!tmp)
+				continue;
+
+			if (!pol || tmp->pos < pol->pos)
+				pol = tmp;
+		}
+	} else {
+		pol = __xfrm_policy_bysel_ctx(chain, mark, if_id, type, dir,
+					      sel, ctx);
+	}
+
+	if (pol) {
+		xfrm_pol_hold(pol);
+		if (delete) {
+			*err = security_xfrm_policy_delete(pol->security);
+			if (*err) {
+				spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+				return pol;
+			}
+			__xfrm_policy_unlink(pol, dir);
+		}
+		ret = pol;
 	}
 	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 
 	if (ret && delete)
 		xfrm_policy_kill(ret);
+	if (bin && delete)
+		xfrm_policy_inexact_prune_bin(bin);
 	return ret;
 }
 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
@@ -892,36 +1748,19 @@
 static inline int
 xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
 {
-	int dir, err = 0;
+	struct xfrm_policy *pol;
+	int err = 0;
 
-	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
-		struct xfrm_policy *pol;
-		int i;
+	list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
+		if (pol->walk.dead ||
+		    xfrm_policy_id2dir(pol->index) >= XFRM_POLICY_MAX ||
+		    pol->type != type)
+			continue;
 
-		hlist_for_each_entry(pol,
-				     &net->xfrm.policy_inexact[dir], bydst) {
-			if (pol->type != type)
-				continue;
-			err = security_xfrm_policy_delete(pol->security);
-			if (err) {
-				xfrm_audit_policy_delete(pol, 0, task_valid);
-				return err;
-			}
-		}
-		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
-			hlist_for_each_entry(pol,
-					     net->xfrm.policy_bydst[dir].table + i,
-					     bydst) {
-				if (pol->type != type)
-					continue;
-				err = security_xfrm_policy_delete(
-								pol->security);
-				if (err) {
-					xfrm_audit_policy_delete(pol, 0,
-								 task_valid);
-					return err;
-				}
-			}
+		err = security_xfrm_policy_delete(pol->security);
+		if (err) {
+			xfrm_audit_policy_delete(pol, 0, task_valid);
+			return err;
 		}
 	}
 	return err;
@@ -937,6 +1776,7 @@
 int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
 {
 	int dir, err = 0, cnt = 0;
+	struct xfrm_policy *pol;
 
 	spin_lock_bh(&net->xfrm.xfrm_policy_lock);
 
@@ -944,48 +1784,25 @@
 	if (err)
 		goto out;
 
-	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
-		struct xfrm_policy *pol;
-		int i;
+again:
+	list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
+		dir = xfrm_policy_id2dir(pol->index);
+		if (pol->walk.dead ||
+		    dir >= XFRM_POLICY_MAX ||
+		    pol->type != type)
+			continue;
 
-	again1:
-		hlist_for_each_entry(pol,
-				     &net->xfrm.policy_inexact[dir], bydst) {
-			if (pol->type != type)
-				continue;
-			__xfrm_policy_unlink(pol, dir);
-			spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
-			cnt++;
-
-			xfrm_audit_policy_delete(pol, 1, task_valid);
-
-			xfrm_policy_kill(pol);
-
-			spin_lock_bh(&net->xfrm.xfrm_policy_lock);
-			goto again1;
-		}
-
-		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
-	again2:
-			hlist_for_each_entry(pol,
-					     net->xfrm.policy_bydst[dir].table + i,
-					     bydst) {
-				if (pol->type != type)
-					continue;
-				__xfrm_policy_unlink(pol, dir);
-				spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
-				cnt++;
-
-				xfrm_audit_policy_delete(pol, 1, task_valid);
-				xfrm_policy_kill(pol);
-
-				spin_lock_bh(&net->xfrm.xfrm_policy_lock);
-				goto again2;
-			}
-		}
-
+		__xfrm_policy_unlink(pol, dir);
+		spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+		cnt++;
+		xfrm_audit_policy_delete(pol, 1, task_valid);
+		xfrm_policy_kill(pol);
+		spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+		goto again;
 	}
-	if (!cnt)
+	if (cnt)
+		__xfrm_policy_inexact_flush(net);
+	else
 		err = -ESRCH;
 out:
 	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
@@ -1084,21 +1901,188 @@
 	if (match)
 		ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid,
 						  dir);
-
 	return ret;
 }
 
+static struct xfrm_pol_inexact_node *
+xfrm_policy_lookup_inexact_addr(const struct rb_root *r,
+				seqcount_t *count,
+				const xfrm_address_t *addr, u16 family)
+{
+	const struct rb_node *parent;
+	int seq;
+
+again:
+	seq = read_seqcount_begin(count);
+
+	parent = rcu_dereference_raw(r->rb_node);
+	while (parent) {
+		struct xfrm_pol_inexact_node *node;
+		int delta;
+
+		node = rb_entry(parent, struct xfrm_pol_inexact_node, node);
+
+		delta = xfrm_policy_addr_delta(addr, &node->addr,
+					       node->prefixlen, family);
+		if (delta < 0) {
+			parent = rcu_dereference_raw(parent->rb_left);
+			continue;
+		} else if (delta > 0) {
+			parent = rcu_dereference_raw(parent->rb_right);
+			continue;
+		}
+
+		return node;
+	}
+
+	if (read_seqcount_retry(count, seq))
+		goto again;
+
+	return NULL;
+}
+
+static bool
+xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand,
+				    struct xfrm_pol_inexact_bin *b,
+				    const xfrm_address_t *saddr,
+				    const xfrm_address_t *daddr)
+{
+	struct xfrm_pol_inexact_node *n;
+	u16 family;
+
+	if (!b)
+		return false;
+
+	family = b->k.family;
+	memset(cand, 0, sizeof(*cand));
+	cand->res[XFRM_POL_CAND_ANY] = &b->hhead;
+
+	n = xfrm_policy_lookup_inexact_addr(&b->root_d, &b->count, daddr,
+					    family);
+	if (n) {
+		cand->res[XFRM_POL_CAND_DADDR] = &n->hhead;
+		n = xfrm_policy_lookup_inexact_addr(&n->root, &b->count, saddr,
+						    family);
+		if (n)
+			cand->res[XFRM_POL_CAND_BOTH] = &n->hhead;
+	}
+
+	n = xfrm_policy_lookup_inexact_addr(&b->root_s, &b->count, saddr,
+					    family);
+	if (n)
+		cand->res[XFRM_POL_CAND_SADDR] = &n->hhead;
+
+	return true;
+}
+
+static struct xfrm_pol_inexact_bin *
+xfrm_policy_inexact_lookup_rcu(struct net *net, u8 type, u16 family,
+			       u8 dir, u32 if_id)
+{
+	struct xfrm_pol_inexact_key k = {
+		.family = family,
+		.type = type,
+		.dir = dir,
+		.if_id = if_id,
+	};
+
+	write_pnet(&k.net, net);
+
+	return rhashtable_lookup(&xfrm_policy_inexact_table, &k,
+				 xfrm_pol_inexact_params);
+}
+
+static struct xfrm_pol_inexact_bin *
+xfrm_policy_inexact_lookup(struct net *net, u8 type, u16 family,
+			   u8 dir, u32 if_id)
+{
+	struct xfrm_pol_inexact_bin *bin;
+
+	lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
+
+	rcu_read_lock();
+	bin = xfrm_policy_inexact_lookup_rcu(net, type, family, dir, if_id);
+	rcu_read_unlock();
+
+	return bin;
+}
+
+static struct xfrm_policy *
+__xfrm_policy_eval_candidates(struct hlist_head *chain,
+			      struct xfrm_policy *prefer,
+			      const struct flowi *fl,
+			      u8 type, u16 family, int dir, u32 if_id)
+{
+	u32 priority = prefer ? prefer->priority : ~0u;
+	struct xfrm_policy *pol;
+
+	if (!chain)
+		return NULL;
+
+	hlist_for_each_entry_rcu(pol, chain, bydst) {
+		int err;
+
+		if (pol->priority > priority)
+			break;
+
+		err = xfrm_policy_match(pol, fl, type, family, dir, if_id);
+		if (err) {
+			if (err != -ESRCH)
+				return ERR_PTR(err);
+
+			continue;
+		}
+
+		if (prefer) {
+			/* matches.  Is it older than *prefer? */
+			if (pol->priority == priority &&
+			    prefer->pos < pol->pos)
+				return prefer;
+		}
+
+		return pol;
+	}
+
+	return NULL;
+}
+
+static struct xfrm_policy *
+xfrm_policy_eval_candidates(struct xfrm_pol_inexact_candidates *cand,
+			    struct xfrm_policy *prefer,
+			    const struct flowi *fl,
+			    u8 type, u16 family, int dir, u32 if_id)
+{
+	struct xfrm_policy *tmp;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(cand->res); i++) {
+		tmp = __xfrm_policy_eval_candidates(cand->res[i],
+						    prefer,
+						    fl, type, family, dir,
+						    if_id);
+		if (!tmp)
+			continue;
+
+		if (IS_ERR(tmp))
+			return tmp;
+		prefer = tmp;
+	}
+
+	return prefer;
+}
+
 static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
 						     const struct flowi *fl,
 						     u16 family, u8 dir,
 						     u32 if_id)
 {
-	int err;
-	struct xfrm_policy *pol, *ret;
+	struct xfrm_pol_inexact_candidates cand;
 	const xfrm_address_t *daddr, *saddr;
+	struct xfrm_pol_inexact_bin *bin;
+	struct xfrm_policy *pol, *ret;
 	struct hlist_head *chain;
 	unsigned int sequence;
-	u32 priority;
+	int err;
 
 	daddr = xfrm_flowi_daddr(fl, family);
 	saddr = xfrm_flowi_saddr(fl, family);
@@ -1112,7 +2096,6 @@
 		chain = policy_hash_direct(net, daddr, saddr, family, dir);
 	} while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence));
 
-	priority = ~0U;
 	ret = NULL;
 	hlist_for_each_entry_rcu(pol, chain, bydst) {
 		err = xfrm_policy_match(pol, fl, type, family, dir, if_id);
@@ -1125,29 +2108,23 @@
 			}
 		} else {
 			ret = pol;
-			priority = ret->priority;
 			break;
 		}
 	}
-	chain = &net->xfrm.policy_inexact[dir];
-	hlist_for_each_entry_rcu(pol, chain, bydst) {
-		if ((pol->priority >= priority) && ret)
-			break;
+	bin = xfrm_policy_inexact_lookup_rcu(net, type, family, dir, if_id);
+	if (!bin || !xfrm_policy_find_inexact_candidates(&cand, bin, saddr,
+							 daddr))
+		goto skip_inexact;
 
-		err = xfrm_policy_match(pol, fl, type, family, dir, if_id);
-		if (err) {
-			if (err == -ESRCH)
-				continue;
-			else {
-				ret = ERR_PTR(err);
-				goto fail;
-			}
-		} else {
-			ret = pol;
-			break;
-		}
+	pol = xfrm_policy_eval_candidates(&cand, ret, fl, type,
+					  family, dir, if_id);
+	if (pol) {
+		ret = pol;
+		if (IS_ERR(pol))
+			goto fail;
 	}
 
+skip_inexact:
 	if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence))
 		goto retry;
 
@@ -1239,6 +2216,7 @@
 	/* Socket policies are not hashed. */
 	if (!hlist_unhashed(&pol->bydst)) {
 		hlist_del_rcu(&pol->bydst);
+		hlist_del_init(&pol->bydst_inexact_list);
 		hlist_del(&pol->byidx);
 	}
 
@@ -1478,18 +2456,10 @@
 
 static int xfrm_get_tos(const struct flowi *fl, int family)
 {
-	const struct xfrm_policy_afinfo *afinfo;
-	int tos;
+	if (family == AF_INET)
+		return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos;
 
-	afinfo = xfrm_policy_get_afinfo(family);
-	if (!afinfo)
-		return 0;
-
-	tos = afinfo->get_tos(fl);
-
-	rcu_read_unlock();
-
-	return tos;
+	return 0;
 }
 
 static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
@@ -1527,21 +2497,14 @@
 	return xdst;
 }
 
-static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
-				 int nfheader_len)
+static void xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
+			   int nfheader_len)
 {
-	const struct xfrm_policy_afinfo *afinfo =
-		xfrm_policy_get_afinfo(dst->ops->family);
-	int err;
-
-	if (!afinfo)
-		return -EINVAL;
-
-	err = afinfo->init_path(path, dst, nfheader_len);
-
-	rcu_read_unlock();
-
-	return err;
+	if (dst->ops->family == AF_INET6) {
+		struct rt6_info *rt = (struct rt6_info *)dst;
+		path->path_cookie = rt6_get_cookie(rt);
+		path->u.rt6.rt6i_nfheader_len = nfheader_len;
+	}
 }
 
 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
@@ -1573,10 +2536,11 @@
 					    const struct flowi *fl,
 					    struct dst_entry *dst)
 {
+	const struct xfrm_state_afinfo *afinfo;
+	const struct xfrm_mode *inner_mode;
 	struct net *net = xp_net(policy);
 	unsigned long now = jiffies;
 	struct net_device *dev;
-	struct xfrm_mode *inner_mode;
 	struct xfrm_dst *xdst_prev = NULL;
 	struct xfrm_dst *xdst0 = NULL;
 	int i = 0;
@@ -1622,13 +2586,16 @@
 				goto put_states;
 			}
 		} else
-			inner_mode = xfrm[i]->inner_mode;
+			inner_mode = &xfrm[i]->inner_mode;
 
 		xdst->route = dst;
 		dst_copy_metrics(dst1, dst);
 
 		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
-			__u32 mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);
+			__u32 mark = 0;
+
+			if (xfrm[i]->props.smark.v || xfrm[i]->props.smark.m)
+				mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);
 
 			family = xfrm[i]->props.family;
 			dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
@@ -1647,7 +2614,14 @@
 		dst1->lastuse = now;
 
 		dst1->input = dst_discard;
-		dst1->output = inner_mode->afinfo->output;
+
+		rcu_read_lock();
+		afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family);
+		if (likely(afinfo))
+			dst1->output = afinfo->output;
+		else
+			dst1->output = dst_discard_out;
+		rcu_read_unlock();
 
 		xdst_prev = xdst;
 
@@ -1811,7 +2785,7 @@
 		pq->timeout = pq->timeout << 1;
 		if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout))
 			xfrm_pol_hold(pol);
-	goto out;
+		goto out;
 	}
 
 	dst_release(dst);
@@ -1834,7 +2808,7 @@
 			continue;
 		}
 
-		nf_reset(skb);
+		nf_reset_ct(skb);
 		skb_dst_drop(skb);
 		skb_dst_set(skb, dst);
 
@@ -2225,11 +3199,12 @@
 static inline int
 xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
 {
+	struct sec_path *sp = skb_sec_path(skb);
 	struct xfrm_state *x;
 
-	if (!skb->sp || idx < 0 || idx >= skb->sp->len)
+	if (!sp || idx < 0 || idx >= sp->len)
 		return 0;
-	x = skb->sp->xvec[idx];
+	x = sp->xvec[idx];
 	if (!x->type->reject)
 		return 0;
 	return x->type->reject(x, skb, fl);
@@ -2287,20 +3262,231 @@
 	return start;
 }
 
+static void
+decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	int ihl = iph->ihl;
+	u8 *xprth = skb_network_header(skb) + ihl * 4;
+	struct flowi4 *fl4 = &fl->u.ip4;
+	int oif = 0;
+
+	if (skb_dst(skb) && skb_dst(skb)->dev)
+		oif = skb_dst(skb)->dev->ifindex;
+
+	memset(fl4, 0, sizeof(struct flowi4));
+	fl4->flowi4_mark = skb->mark;
+	fl4->flowi4_oif = reverse ? skb->skb_iif : oif;
+
+	fl4->flowi4_proto = iph->protocol;
+	fl4->daddr = reverse ? iph->saddr : iph->daddr;
+	fl4->saddr = reverse ? iph->daddr : iph->saddr;
+	fl4->flowi4_tos = iph->tos;
+
+	if (!ip_is_fragment(iph)) {
+		switch (iph->protocol) {
+		case IPPROTO_UDP:
+		case IPPROTO_UDPLITE:
+		case IPPROTO_TCP:
+		case IPPROTO_SCTP:
+		case IPPROTO_DCCP:
+			if (xprth + 4 < skb->data ||
+			    pskb_may_pull(skb, xprth + 4 - skb->data)) {
+				__be16 *ports;
+
+				xprth = skb_network_header(skb) + ihl * 4;
+				ports = (__be16 *)xprth;
+
+				fl4->fl4_sport = ports[!!reverse];
+				fl4->fl4_dport = ports[!reverse];
+			}
+			break;
+		case IPPROTO_ICMP:
+			if (xprth + 2 < skb->data ||
+			    pskb_may_pull(skb, xprth + 2 - skb->data)) {
+				u8 *icmp;
+
+				xprth = skb_network_header(skb) + ihl * 4;
+				icmp = xprth;
+
+				fl4->fl4_icmp_type = icmp[0];
+				fl4->fl4_icmp_code = icmp[1];
+			}
+			break;
+		case IPPROTO_ESP:
+			if (xprth + 4 < skb->data ||
+			    pskb_may_pull(skb, xprth + 4 - skb->data)) {
+				__be32 *ehdr;
+
+				xprth = skb_network_header(skb) + ihl * 4;
+				ehdr = (__be32 *)xprth;
+
+				fl4->fl4_ipsec_spi = ehdr[0];
+			}
+			break;
+		case IPPROTO_AH:
+			if (xprth + 8 < skb->data ||
+			    pskb_may_pull(skb, xprth + 8 - skb->data)) {
+				__be32 *ah_hdr;
+
+				xprth = skb_network_header(skb) + ihl * 4;
+				ah_hdr = (__be32 *)xprth;
+
+				fl4->fl4_ipsec_spi = ah_hdr[1];
+			}
+			break;
+		case IPPROTO_COMP:
+			if (xprth + 4 < skb->data ||
+			    pskb_may_pull(skb, xprth + 4 - skb->data)) {
+				__be16 *ipcomp_hdr;
+
+				xprth = skb_network_header(skb) + ihl * 4;
+				ipcomp_hdr = (__be16 *)xprth;
+
+				fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
+			}
+			break;
+		case IPPROTO_GRE:
+			if (xprth + 12 < skb->data ||
+			    pskb_may_pull(skb, xprth + 12 - skb->data)) {
+				__be16 *greflags;
+				__be32 *gre_hdr;
+
+				xprth = skb_network_header(skb) + ihl * 4;
+				greflags = (__be16 *)xprth;
+				gre_hdr = (__be32 *)xprth;
+
+				if (greflags[0] & GRE_KEY) {
+					if (greflags[0] & GRE_CSUM)
+						gre_hdr++;
+					fl4->fl4_gre_key = gre_hdr[1];
+				}
+			}
+			break;
+		default:
+			fl4->fl4_ipsec_spi = 0;
+			break;
+		}
+	}
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void
+decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse)
+{
+	struct flowi6 *fl6 = &fl->u.ip6;
+	int onlyproto = 0;
+	const struct ipv6hdr *hdr = ipv6_hdr(skb);
+	u32 offset = sizeof(*hdr);
+	struct ipv6_opt_hdr *exthdr;
+	const unsigned char *nh = skb_network_header(skb);
+	u16 nhoff = IP6CB(skb)->nhoff;
+	int oif = 0;
+	u8 nexthdr;
+
+	if (!nhoff)
+		nhoff = offsetof(struct ipv6hdr, nexthdr);
+
+	nexthdr = nh[nhoff];
+
+	if (skb_dst(skb) && skb_dst(skb)->dev)
+		oif = skb_dst(skb)->dev->ifindex;
+
+	memset(fl6, 0, sizeof(struct flowi6));
+	fl6->flowi6_mark = skb->mark;
+	fl6->flowi6_oif = reverse ? skb->skb_iif : oif;
+
+	fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
+	fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
+
+	while (nh + offset + sizeof(*exthdr) < skb->data ||
+	       pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) {
+		nh = skb_network_header(skb);
+		exthdr = (struct ipv6_opt_hdr *)(nh + offset);
+
+		switch (nexthdr) {
+		case NEXTHDR_FRAGMENT:
+			onlyproto = 1;
+			/* fall through */
+		case NEXTHDR_ROUTING:
+		case NEXTHDR_HOP:
+		case NEXTHDR_DEST:
+			offset += ipv6_optlen(exthdr);
+			nexthdr = exthdr->nexthdr;
+			exthdr = (struct ipv6_opt_hdr *)(nh + offset);
+			break;
+		case IPPROTO_UDP:
+		case IPPROTO_UDPLITE:
+		case IPPROTO_TCP:
+		case IPPROTO_SCTP:
+		case IPPROTO_DCCP:
+			if (!onlyproto && (nh + offset + 4 < skb->data ||
+			     pskb_may_pull(skb, nh + offset + 4 - skb->data))) {
+				__be16 *ports;
+
+				nh = skb_network_header(skb);
+				ports = (__be16 *)(nh + offset);
+				fl6->fl6_sport = ports[!!reverse];
+				fl6->fl6_dport = ports[!reverse];
+			}
+			fl6->flowi6_proto = nexthdr;
+			return;
+		case IPPROTO_ICMPV6:
+			if (!onlyproto && (nh + offset + 2 < skb->data ||
+			    pskb_may_pull(skb, nh + offset + 2 - skb->data))) {
+				u8 *icmp;
+
+				nh = skb_network_header(skb);
+				icmp = (u8 *)(nh + offset);
+				fl6->fl6_icmp_type = icmp[0];
+				fl6->fl6_icmp_code = icmp[1];
+			}
+			fl6->flowi6_proto = nexthdr;
+			return;
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+		case IPPROTO_MH:
+			offset += ipv6_optlen(exthdr);
+			if (!onlyproto && (nh + offset + 3 < skb->data ||
+			    pskb_may_pull(skb, nh + offset + 3 - skb->data))) {
+				struct ip6_mh *mh;
+
+				nh = skb_network_header(skb);
+				mh = (struct ip6_mh *)(nh + offset);
+				fl6->fl6_mh_type = mh->ip6mh_type;
+			}
+			fl6->flowi6_proto = nexthdr;
+			return;
+#endif
+		/* XXX Why are there these headers? */
+		case IPPROTO_AH:
+		case IPPROTO_ESP:
+		case IPPROTO_COMP:
+		default:
+			fl6->fl6_ipsec_spi = 0;
+			fl6->flowi6_proto = nexthdr;
+			return;
+		}
+	}
+}
+#endif
+
 int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
 			  unsigned int family, int reverse)
 {
-	const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
-	int err;
-
-	if (unlikely(afinfo == NULL))
+	switch (family) {
+	case AF_INET:
+		decode_session4(skb, fl, reverse);
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case AF_INET6:
+		decode_session6(skb, fl, reverse);
+		break;
+#endif
+	default:
 		return -EAFNOSUPPORT;
+	}
 
-	afinfo->decode_session(skb, fl, reverse);
-
-	err = security_xfrm_decode_session(skb, &fl->flowi_secid);
-	rcu_read_unlock();
-	return err;
+	return security_xfrm_decode_session(skb, &fl->flowi_secid);
 }
 EXPORT_SYMBOL(__xfrm_decode_session);
 
@@ -2329,6 +3515,7 @@
 	struct flowi fl;
 	int xerr_idx = -1;
 	const struct xfrm_if_cb *ifcb;
+	struct sec_path *sp;
 	struct xfrm_if *xi;
 	u32 if_id = 0;
 
@@ -2336,9 +3523,11 @@
 	ifcb = xfrm_if_get_cb();
 
 	if (ifcb) {
-		xi = ifcb->decode_session(skb);
-		if (xi)
+		xi = ifcb->decode_session(skb, family);
+		if (xi) {
 			if_id = xi->p.if_id;
+			net = xi->net;
+		}
 	}
 	rcu_read_unlock();
 
@@ -2353,11 +3542,12 @@
 	nf_nat_decode_session(skb, &fl, family);
 
 	/* First, check used SA against their selectors. */
-	if (skb->sp) {
+	sp = skb_sec_path(skb);
+	if (sp) {
 		int i;
 
-		for (i = skb->sp->len-1; i >= 0; i--) {
-			struct xfrm_state *x = skb->sp->xvec[i];
+		for (i = sp->len - 1; i >= 0; i--) {
+			struct xfrm_state *x = sp->xvec[i];
 			if (!xfrm_selector_match(&x->sel, &fl, family)) {
 				XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
 				return 0;
@@ -2384,7 +3574,7 @@
 	}
 
 	if (!pol) {
-		if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
+		if (sp && secpath_has_nontransport(sp, 0, &xerr_idx)) {
 			xfrm_secpath_reject(xerr_idx, skb, &fl);
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
 			return 0;
@@ -2413,7 +3603,6 @@
 #endif
 
 	if (pol->action == XFRM_POLICY_ALLOW) {
-		struct sec_path *sp;
 		static struct sec_path dummy;
 		struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
 		struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
@@ -2421,7 +3610,8 @@
 		int ti = 0;
 		int i, k;
 
-		if ((sp = skb->sp) == NULL)
+		sp = skb_sec_path(skb);
+		if (!sp)
 			sp = &dummy;
 
 		for (pi = 0; pi < npols; pi++) {
@@ -2439,7 +3629,7 @@
 		}
 		xfrm_nr = ti;
 		if (npols > 1) {
-			xfrm_tmpl_sort(stp, tpp, xfrm_nr, family, net);
+			xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
 			tpp = stp;
 		}
 
@@ -2816,13 +4006,17 @@
 static int __net_init xfrm_policy_init(struct net *net)
 {
 	unsigned int hmask, sz;
-	int dir;
+	int dir, err;
 
-	if (net_eq(net, &init_net))
+	if (net_eq(net, &init_net)) {
 		xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
 					   sizeof(struct xfrm_dst),
 					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 					   NULL);
+		err = rhashtable_init(&xfrm_policy_inexact_table,
+				      &xfrm_pol_inexact_params);
+		BUG_ON(err);
+	}
 
 	hmask = 8 - 1;
 	sz = (hmask+1) * sizeof(struct hlist_head);
@@ -2857,6 +4051,7 @@
 	seqlock_init(&net->xfrm.policy_hthresh.lock);
 
 	INIT_LIST_HEAD(&net->xfrm.policy_all);
+	INIT_LIST_HEAD(&net->xfrm.inexact_bins);
 	INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
 	INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild);
 	return 0;
@@ -2875,6 +4070,7 @@
 
 static void xfrm_policy_fini(struct net *net)
 {
+	struct xfrm_pol_inexact_bin *b, *t;
 	unsigned int sz;
 	int dir;
 
@@ -2900,6 +4096,11 @@
 	sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
 	WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
 	xfrm_hash_free(net->xfrm.policy_byidx, sz);
+
+	spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+	list_for_each_entry_safe(b, t, &net->xfrm.inexact_bins, inexact_bins)
+		__xfrm_policy_inexact_prune_bin(b, true);
+	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 }
 
 static int __net_init xfrm_net_init(struct net *net)
@@ -3065,7 +4266,7 @@
 		}
 	}
 	chain = &net->xfrm.policy_inexact[dir];
-	hlist_for_each_entry(pol, chain, bydst) {
+	hlist_for_each_entry(pol, chain, bydst_inexact_list) {
 		if ((pol->priority >= priority) && ret)
 			break;
 

diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index 178318d..fee9b5c 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * xfrm_proc.c
  *
  * Copyright (C)2006-2007 USAGI/WIDE Project
  *
  * Authors:	Masahide NAKAMURA <nakam@linux-ipv6.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>

diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 9e3a5e8..98943f8 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c

@@ -1,21 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * xfrm_replay.c - xfrm replay detection, derived from xfrm_state.c.
  *
  * Copyright (C) 2010 secunet Security Networks AG
  * Copyright (C) 2010 Steffen Klassert <steffen.klassert@secunet.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 #include <linux/export.h>

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 7a34990..f342356 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * xfrm_state.c
  *
@@ -26,6 +27,8 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 
+#include <crypto/aead.h>
+
 #include "xfrm_hash.h"
 
 #define xfrm_state_deref_prot(table, net) \
@@ -173,66 +176,135 @@
 int __xfrm_state_delete(struct xfrm_state *x);
 
 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
-bool km_is_alive(const struct km_event *c);
+static bool km_is_alive(const struct km_event *c);
 void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
 
-static DEFINE_SPINLOCK(xfrm_type_lock);
 int xfrm_register_type(const struct xfrm_type *type, unsigned short family)
 {
 	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
-	const struct xfrm_type **typemap;
 	int err = 0;
 
-	if (unlikely(afinfo == NULL))
+	if (!afinfo)
 		return -EAFNOSUPPORT;
-	typemap = afinfo->type_map;
-	spin_lock_bh(&xfrm_type_lock);
 
-	if (likely(typemap[type->proto] == NULL))
-		typemap[type->proto] = type;
-	else
-		err = -EEXIST;
-	spin_unlock_bh(&xfrm_type_lock);
+#define X(afi, T, name) do {			\
+		WARN_ON((afi)->type_ ## name);	\
+		(afi)->type_ ## name = (T);	\
+	} while (0)
+
+	switch (type->proto) {
+	case IPPROTO_COMP:
+		X(afinfo, type, comp);
+		break;
+	case IPPROTO_AH:
+		X(afinfo, type, ah);
+		break;
+	case IPPROTO_ESP:
+		X(afinfo, type, esp);
+		break;
+	case IPPROTO_IPIP:
+		X(afinfo, type, ipip);
+		break;
+	case IPPROTO_DSTOPTS:
+		X(afinfo, type, dstopts);
+		break;
+	case IPPROTO_ROUTING:
+		X(afinfo, type, routing);
+		break;
+	case IPPROTO_IPV6:
+		X(afinfo, type, ipip6);
+		break;
+	default:
+		WARN_ON(1);
+		err = -EPROTONOSUPPORT;
+		break;
+	}
+#undef X
 	rcu_read_unlock();
 	return err;
 }
 EXPORT_SYMBOL(xfrm_register_type);
 
-int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family)
+void xfrm_unregister_type(const struct xfrm_type *type, unsigned short family)
 {
 	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
-	const struct xfrm_type **typemap;
-	int err = 0;
 
 	if (unlikely(afinfo == NULL))
-		return -EAFNOSUPPORT;
-	typemap = afinfo->type_map;
-	spin_lock_bh(&xfrm_type_lock);
+		return;
 
-	if (unlikely(typemap[type->proto] != type))
-		err = -ENOENT;
-	else
-		typemap[type->proto] = NULL;
-	spin_unlock_bh(&xfrm_type_lock);
+#define X(afi, T, name) do {				\
+		WARN_ON((afi)->type_ ## name != (T));	\
+		(afi)->type_ ## name = NULL;		\
+	} while (0)
+
+	switch (type->proto) {
+	case IPPROTO_COMP:
+		X(afinfo, type, comp);
+		break;
+	case IPPROTO_AH:
+		X(afinfo, type, ah);
+		break;
+	case IPPROTO_ESP:
+		X(afinfo, type, esp);
+		break;
+	case IPPROTO_IPIP:
+		X(afinfo, type, ipip);
+		break;
+	case IPPROTO_DSTOPTS:
+		X(afinfo, type, dstopts);
+		break;
+	case IPPROTO_ROUTING:
+		X(afinfo, type, routing);
+		break;
+	case IPPROTO_IPV6:
+		X(afinfo, type, ipip6);
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
+#undef X
 	rcu_read_unlock();
-	return err;
 }
 EXPORT_SYMBOL(xfrm_unregister_type);
 
 static const struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
 {
+	const struct xfrm_type *type = NULL;
 	struct xfrm_state_afinfo *afinfo;
-	const struct xfrm_type **typemap;
-	const struct xfrm_type *type;
 	int modload_attempted = 0;
 
 retry:
 	afinfo = xfrm_state_get_afinfo(family);
 	if (unlikely(afinfo == NULL))
 		return NULL;
-	typemap = afinfo->type_map;
 
-	type = READ_ONCE(typemap[proto]);
+	switch (proto) {
+	case IPPROTO_COMP:
+		type = afinfo->type_comp;
+		break;
+	case IPPROTO_AH:
+		type = afinfo->type_ah;
+		break;
+	case IPPROTO_ESP:
+		type = afinfo->type_esp;
+		break;
+	case IPPROTO_IPIP:
+		type = afinfo->type_ipip;
+		break;
+	case IPPROTO_DSTOPTS:
+		type = afinfo->type_dstopts;
+		break;
+	case IPPROTO_ROUTING:
+		type = afinfo->type_routing;
+		break;
+	case IPPROTO_IPV6:
+		type = afinfo->type_ipip6;
+		break;
+	default:
+		break;
+	}
+
 	if (unlikely(type && !try_module_get(type->owner)))
 		type = NULL;
 
@@ -252,65 +324,71 @@
 	module_put(type->owner);
 }
 
-static DEFINE_SPINLOCK(xfrm_type_offload_lock);
 int xfrm_register_type_offload(const struct xfrm_type_offload *type,
 			       unsigned short family)
 {
 	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
-	const struct xfrm_type_offload **typemap;
 	int err = 0;
 
 	if (unlikely(afinfo == NULL))
 		return -EAFNOSUPPORT;
-	typemap = afinfo->type_offload_map;
-	spin_lock_bh(&xfrm_type_offload_lock);
 
-	if (likely(typemap[type->proto] == NULL))
-		typemap[type->proto] = type;
-	else
-		err = -EEXIST;
-	spin_unlock_bh(&xfrm_type_offload_lock);
+	switch (type->proto) {
+	case IPPROTO_ESP:
+		WARN_ON(afinfo->type_offload_esp);
+		afinfo->type_offload_esp = type;
+		break;
+	default:
+		WARN_ON(1);
+		err = -EPROTONOSUPPORT;
+		break;
+	}
+
 	rcu_read_unlock();
 	return err;
 }
 EXPORT_SYMBOL(xfrm_register_type_offload);
 
-int xfrm_unregister_type_offload(const struct xfrm_type_offload *type,
-				 unsigned short family)
+void xfrm_unregister_type_offload(const struct xfrm_type_offload *type,
+				  unsigned short family)
 {
 	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
-	const struct xfrm_type_offload **typemap;
-	int err = 0;
 
 	if (unlikely(afinfo == NULL))
-		return -EAFNOSUPPORT;
-	typemap = afinfo->type_offload_map;
-	spin_lock_bh(&xfrm_type_offload_lock);
+		return;
 
-	if (unlikely(typemap[type->proto] != type))
-		err = -ENOENT;
-	else
-		typemap[type->proto] = NULL;
-	spin_unlock_bh(&xfrm_type_offload_lock);
+	switch (type->proto) {
+	case IPPROTO_ESP:
+		WARN_ON(afinfo->type_offload_esp != type);
+		afinfo->type_offload_esp = NULL;
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
 	rcu_read_unlock();
-	return err;
 }
 EXPORT_SYMBOL(xfrm_unregister_type_offload);
 
 static const struct xfrm_type_offload *
 xfrm_get_type_offload(u8 proto, unsigned short family, bool try_load)
 {
+	const struct xfrm_type_offload *type = NULL;
 	struct xfrm_state_afinfo *afinfo;
-	const struct xfrm_type_offload **typemap;
-	const struct xfrm_type_offload *type;
 
 retry:
 	afinfo = xfrm_state_get_afinfo(family);
 	if (unlikely(afinfo == NULL))
 		return NULL;
-	typemap = afinfo->type_offload_map;
 
-	type = typemap[proto];
+	switch (proto) {
+	case IPPROTO_ESP:
+		type = afinfo->type_offload_esp;
+		break;
+	default:
+		break;
+	}
+
 	if ((type && !try_module_get(type->owner)))
 		type = NULL;
 
@@ -330,100 +408,67 @@
 	module_put(type->owner);
 }
 
-static DEFINE_SPINLOCK(xfrm_mode_lock);
-int xfrm_register_mode(struct xfrm_mode *mode, int family)
+static const struct xfrm_mode xfrm4_mode_map[XFRM_MODE_MAX] = {
+	[XFRM_MODE_BEET] = {
+		.encap = XFRM_MODE_BEET,
+		.flags = XFRM_MODE_FLAG_TUNNEL,
+		.family = AF_INET,
+	},
+	[XFRM_MODE_TRANSPORT] = {
+		.encap = XFRM_MODE_TRANSPORT,
+		.family = AF_INET,
+	},
+	[XFRM_MODE_TUNNEL] = {
+		.encap = XFRM_MODE_TUNNEL,
+		.flags = XFRM_MODE_FLAG_TUNNEL,
+		.family = AF_INET,
+	},
+};
+
+static const struct xfrm_mode xfrm6_mode_map[XFRM_MODE_MAX] = {
+	[XFRM_MODE_BEET] = {
+		.encap = XFRM_MODE_BEET,
+		.flags = XFRM_MODE_FLAG_TUNNEL,
+		.family = AF_INET6,
+	},
+	[XFRM_MODE_ROUTEOPTIMIZATION] = {
+		.encap = XFRM_MODE_ROUTEOPTIMIZATION,
+		.family = AF_INET6,
+	},
+	[XFRM_MODE_TRANSPORT] = {
+		.encap = XFRM_MODE_TRANSPORT,
+		.family = AF_INET6,
+	},
+	[XFRM_MODE_TUNNEL] = {
+		.encap = XFRM_MODE_TUNNEL,
+		.flags = XFRM_MODE_FLAG_TUNNEL,
+		.family = AF_INET6,
+	},
+};
+
+static const struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
 {
-	struct xfrm_state_afinfo *afinfo;
-	struct xfrm_mode **modemap;
-	int err;
-
-	if (unlikely(mode->encap >= XFRM_MODE_MAX))
-		return -EINVAL;
-
-	afinfo = xfrm_state_get_afinfo(family);
-	if (unlikely(afinfo == NULL))
-		return -EAFNOSUPPORT;
-
-	err = -EEXIST;
-	modemap = afinfo->mode_map;
-	spin_lock_bh(&xfrm_mode_lock);
-	if (modemap[mode->encap])
-		goto out;
-
-	err = -ENOENT;
-	if (!try_module_get(afinfo->owner))
-		goto out;
-
-	mode->afinfo = afinfo;
-	modemap[mode->encap] = mode;
-	err = 0;
-
-out:
-	spin_unlock_bh(&xfrm_mode_lock);
-	rcu_read_unlock();
-	return err;
-}
-EXPORT_SYMBOL(xfrm_register_mode);
-
-int xfrm_unregister_mode(struct xfrm_mode *mode, int family)
-{
-	struct xfrm_state_afinfo *afinfo;
-	struct xfrm_mode **modemap;
-	int err;
-
-	if (unlikely(mode->encap >= XFRM_MODE_MAX))
-		return -EINVAL;
-
-	afinfo = xfrm_state_get_afinfo(family);
-	if (unlikely(afinfo == NULL))
-		return -EAFNOSUPPORT;
-
-	err = -ENOENT;
-	modemap = afinfo->mode_map;
-	spin_lock_bh(&xfrm_mode_lock);
-	if (likely(modemap[mode->encap] == mode)) {
-		modemap[mode->encap] = NULL;
-		module_put(mode->afinfo->owner);
-		err = 0;
-	}
-
-	spin_unlock_bh(&xfrm_mode_lock);
-	rcu_read_unlock();
-	return err;
-}
-EXPORT_SYMBOL(xfrm_unregister_mode);
-
-static struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
-{
-	struct xfrm_state_afinfo *afinfo;
-	struct xfrm_mode *mode;
-	int modload_attempted = 0;
+	const struct xfrm_mode *mode;
 
 	if (unlikely(encap >= XFRM_MODE_MAX))
 		return NULL;
 
-retry:
-	afinfo = xfrm_state_get_afinfo(family);
-	if (unlikely(afinfo == NULL))
-		return NULL;
-
-	mode = READ_ONCE(afinfo->mode_map[encap]);
-	if (unlikely(mode && !try_module_get(mode->owner)))
-		mode = NULL;
-
-	rcu_read_unlock();
-	if (!mode && !modload_attempted) {
-		request_module("xfrm-mode-%d-%d", family, encap);
-		modload_attempted = 1;
-		goto retry;
+	switch (family) {
+	case AF_INET:
+		mode = &xfrm4_mode_map[encap];
+		if (mode->family == family)
+			return mode;
+		break;
+	case AF_INET6:
+		mode = &xfrm6_mode_map[encap];
+		if (mode->family == family)
+			return mode;
+		break;
+	default:
+		break;
 	}
 
-	return mode;
-}
-
-static void xfrm_put_mode(struct xfrm_mode *mode)
-{
-	module_put(mode->owner);
+	return NULL;
 }
 
 void xfrm_state_free(struct xfrm_state *x)
@@ -432,9 +477,9 @@
 }
 EXPORT_SYMBOL(xfrm_state_free);
 
-static void xfrm_state_gc_destroy(struct xfrm_state *x)
+static void ___xfrm_state_destroy(struct xfrm_state *x)
 {
-	tasklet_hrtimer_cancel(&x->mtimer);
+	hrtimer_cancel(&x->mtimer);
 	del_timer_sync(&x->rtimer);
 	kfree(x->aead);
 	kfree(x->aalg);
@@ -444,18 +489,14 @@
 	kfree(x->coaddr);
 	kfree(x->replay_esn);
 	kfree(x->preplay_esn);
-	if (x->inner_mode)
-		xfrm_put_mode(x->inner_mode);
-	if (x->inner_mode_iaf)
-		xfrm_put_mode(x->inner_mode_iaf);
-	if (x->outer_mode)
-		xfrm_put_mode(x->outer_mode);
 	if (x->type_offload)
 		xfrm_put_type_offload(x->type_offload);
 	if (x->type) {
 		x->type->destructor(x);
 		xfrm_put_type(x->type);
 	}
+	if (x->xfrag.page)
+		put_page(x->xfrag.page);
 	xfrm_dev_state_free(x);
 	security_xfrm_state_free(x);
 	xfrm_state_free(x);
@@ -474,13 +515,13 @@
 	synchronize_rcu();
 
 	hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
-		xfrm_state_gc_destroy(x);
+		___xfrm_state_destroy(x);
 }
 
 static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
 {
-	struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer);
-	struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer);
+	struct xfrm_state *x = container_of(me, struct xfrm_state, mtimer);
+	enum hrtimer_restart ret = HRTIMER_NORESTART;
 	time64_t now = ktime_get_real_seconds();
 	time64_t next = TIME64_MAX;
 	int warn = 0;
@@ -544,7 +585,8 @@
 		km_state_expired(x, 0, 0);
 resched:
 	if (next != TIME64_MAX) {
-		tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL);
+		hrtimer_forward_now(&x->mtimer, ktime_set(next, 0));
+		ret = HRTIMER_RESTART;
 	}
 
 	goto out;
@@ -561,7 +603,7 @@
 
 out:
 	spin_unlock(&x->lock);
-	return HRTIMER_NORESTART;
+	return ret;
 }
 
 static void xfrm_replay_timer_handler(struct timer_list *t);
@@ -580,8 +622,8 @@
 		INIT_HLIST_NODE(&x->bydst);
 		INIT_HLIST_NODE(&x->bysrc);
 		INIT_HLIST_NODE(&x->byspi);
-		tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler,
-					CLOCK_BOOTTIME, HRTIMER_MODE_ABS);
+		hrtimer_init(&x->mtimer, CLOCK_BOOTTIME, HRTIMER_MODE_ABS_SOFT);
+		x->mtimer.function = xfrm_timer_handler;
 		timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0);
 		x->curlft.add_time = ktime_get_real_seconds();
 		x->lft.soft_byte_limit = XFRM_INF;
@@ -590,22 +632,25 @@
 		x->lft.hard_packet_limit = XFRM_INF;
 		x->replay_maxage = 0;
 		x->replay_maxdiff = 0;
-		x->inner_mode = NULL;
-		x->inner_mode_iaf = NULL;
 		spin_lock_init(&x->lock);
 	}
 	return x;
 }
 EXPORT_SYMBOL(xfrm_state_alloc);
 
-void __xfrm_state_destroy(struct xfrm_state *x)
+void __xfrm_state_destroy(struct xfrm_state *x, bool sync)
 {
 	WARN_ON(x->km.state != XFRM_STATE_DEAD);
 
-	spin_lock_bh(&xfrm_state_gc_lock);
-	hlist_add_head(&x->gclist, &xfrm_state_gc_list);
-	spin_unlock_bh(&xfrm_state_gc_lock);
-	schedule_work(&xfrm_state_gc_work);
+	if (sync) {
+		synchronize_rcu();
+		___xfrm_state_destroy(x);
+	} else {
+		spin_lock_bh(&xfrm_state_gc_lock);
+		hlist_add_head(&x->gclist, &xfrm_state_gc_list);
+		spin_unlock_bh(&xfrm_state_gc_lock);
+		schedule_work(&xfrm_state_gc_work);
+	}
 }
 EXPORT_SYMBOL(__xfrm_state_destroy);
 
@@ -708,7 +753,7 @@
 }
 #endif
 
-int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
+int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync)
 {
 	int i, err = 0, cnt = 0;
 
@@ -730,7 +775,10 @@
 				err = xfrm_state_delete(x);
 				xfrm_audit_state_delete(x, err ? 0 : 1,
 							task_valid);
-				xfrm_state_put(x);
+				if (sync)
+					xfrm_state_put_sync(x);
+				else
+					xfrm_state_put(x);
 				if (!err)
 					cnt++;
 
@@ -794,31 +842,86 @@
 {
 	spin_lock_bh(&net->xfrm.xfrm_state_lock);
 	si->sadcnt = net->xfrm.state_num;
-	si->sadhcnt = net->xfrm.state_hmask;
+	si->sadhcnt = net->xfrm.state_hmask + 1;
 	si->sadhmcnt = xfrm_state_hashmax;
 	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 }
 EXPORT_SYMBOL(xfrm_sad_getinfo);
 
 static void
+__xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
+{
+	const struct flowi4 *fl4 = &fl->u.ip4;
+
+	sel->daddr.a4 = fl4->daddr;
+	sel->saddr.a4 = fl4->saddr;
+	sel->dport = xfrm_flowi_dport(fl, &fl4->uli);
+	sel->dport_mask = htons(0xffff);
+	sel->sport = xfrm_flowi_sport(fl, &fl4->uli);
+	sel->sport_mask = htons(0xffff);
+	sel->family = AF_INET;
+	sel->prefixlen_d = 32;
+	sel->prefixlen_s = 32;
+	sel->proto = fl4->flowi4_proto;
+	sel->ifindex = fl4->flowi4_oif;
+}
+
+static void
+__xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
+{
+	const struct flowi6 *fl6 = &fl->u.ip6;
+
+	/* Initialize temporary selector matching only to current session. */
+	*(struct in6_addr *)&sel->daddr = fl6->daddr;
+	*(struct in6_addr *)&sel->saddr = fl6->saddr;
+	sel->dport = xfrm_flowi_dport(fl, &fl6->uli);
+	sel->dport_mask = htons(0xffff);
+	sel->sport = xfrm_flowi_sport(fl, &fl6->uli);
+	sel->sport_mask = htons(0xffff);
+	sel->family = AF_INET6;
+	sel->prefixlen_d = 128;
+	sel->prefixlen_s = 128;
+	sel->proto = fl6->flowi6_proto;
+	sel->ifindex = fl6->flowi6_oif;
+}
+
+static void
 xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
 		    const struct xfrm_tmpl *tmpl,
 		    const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 		    unsigned short family)
 {
-	struct xfrm_state_afinfo *afinfo = xfrm_state_afinfo_get_rcu(family);
-
-	if (!afinfo)
-		return;
-
-	afinfo->init_tempsel(&x->sel, fl);
-
-	if (family != tmpl->encap_family) {
-		afinfo = xfrm_state_afinfo_get_rcu(tmpl->encap_family);
-		if (!afinfo)
-			return;
+	switch (family) {
+	case AF_INET:
+		__xfrm4_init_tempsel(&x->sel, fl);
+		break;
+	case AF_INET6:
+		__xfrm6_init_tempsel(&x->sel, fl);
+		break;
 	}
-	afinfo->init_temprop(x, tmpl, daddr, saddr);
+
+	x->id = tmpl->id;
+
+	switch (tmpl->encap_family) {
+	case AF_INET:
+		if (x->id.daddr.a4 == 0)
+			x->id.daddr.a4 = daddr->a4;
+		x->props.saddr = tmpl->saddr;
+		if (x->props.saddr.a4 == 0)
+			x->props.saddr.a4 = saddr->a4;
+		break;
+	case AF_INET6:
+		if (ipv6_addr_any((struct in6_addr *)&x->id.daddr))
+			memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr));
+		memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr));
+		if (ipv6_addr_any((struct in6_addr *)&x->props.saddr))
+			memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr));
+		break;
+	}
+
+	x->props.mode = tmpl->mode;
+	x->props.reqid = tmpl->reqid;
+	x->props.family = tmpl->encap_family;
 }
 
 static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
@@ -1039,7 +1142,9 @@
 				hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
 			}
 			x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
-			tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
+			hrtimer_start(&x->mtimer,
+				      ktime_set(net->xfrm.sysctl_acq_expires, 0),
+				      HRTIMER_MODE_REL_SOFT);
 			net->xfrm.state_num++;
 			xfrm_hash_grow_check(net, x->bydst.next != NULL);
 			spin_unlock_bh(&net->xfrm.xfrm_state_lock);
@@ -1151,7 +1256,7 @@
 		hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
 	}
 
-	tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
+	hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
 	if (x->replay_maxage)
 		mod_timer(&x->rtimer, jiffies + x->replay_maxage);
 
@@ -1258,7 +1363,9 @@
 		x->mark.m = m->m;
 		x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
 		xfrm_state_hold(x);
-		tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
+		hrtimer_start(&x->mtimer,
+			      ktime_set(net->xfrm.sysctl_acq_expires, 0),
+			      HRTIMER_MODE_REL_SOFT);
 		list_add(&x->km.all, &net->xfrm.state_all);
 		hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
 		h = xfrm_src_hash(net, daddr, saddr, family);
@@ -1563,7 +1670,8 @@
 		memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
 		x1->km.dying = 0;
 
-		tasklet_hrtimer_start(&x1->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
+		hrtimer_start(&x1->mtimer, ktime_set(1, 0),
+			      HRTIMER_MODE_REL_SOFT);
 		if (x1->curlft.use_time)
 			xfrm_state_check_expire(x1);
 
@@ -1602,7 +1710,7 @@
 	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
 	    x->curlft.packets >= x->lft.hard_packet_limit) {
 		x->km.state = XFRM_STATE_EXPIRED;
-		tasklet_hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL);
+		hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL_SOFT);
 		return -EINVAL;
 	}
 
@@ -1659,51 +1767,129 @@
 EXPORT_SYMBOL(xfrm_find_acq);
 
 #ifdef CONFIG_XFRM_SUB_POLICY
-int
-xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
-	       unsigned short family, struct net *net)
+#if IS_ENABLED(CONFIG_IPV6)
+/* distribution counting sort function for xfrm_state and xfrm_tmpl */
+static void
+__xfrm6_sort(void **dst, void **src, int n,
+	     int (*cmp)(const void *p), int maxclass)
+{
+	int count[XFRM_MAX_DEPTH] = { };
+	int class[XFRM_MAX_DEPTH];
+	int i;
+
+	for (i = 0; i < n; i++) {
+		int c = cmp(src[i]);
+
+		class[i] = c;
+		count[c]++;
+	}
+
+	for (i = 2; i < maxclass; i++)
+		count[i] += count[i - 1];
+
+	for (i = 0; i < n; i++) {
+		dst[count[class[i] - 1]++] = src[i];
+		src[i] = NULL;
+	}
+}
+
+/* Rule for xfrm_state:
+ *
+ * rule 1: select IPsec transport except AH
+ * rule 2: select MIPv6 RO or inbound trigger
+ * rule 3: select IPsec transport AH
+ * rule 4: select IPsec tunnel
+ * rule 5: others
+ */
+static int __xfrm6_state_sort_cmp(const void *p)
+{
+	const struct xfrm_state *v = p;
+
+	switch (v->props.mode) {
+	case XFRM_MODE_TRANSPORT:
+		if (v->id.proto != IPPROTO_AH)
+			return 1;
+		else
+			return 3;
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+	case XFRM_MODE_ROUTEOPTIMIZATION:
+	case XFRM_MODE_IN_TRIGGER:
+		return 2;
+#endif
+	case XFRM_MODE_TUNNEL:
+	case XFRM_MODE_BEET:
+		return 4;
+	}
+	return 5;
+}
+
+/* Rule for xfrm_tmpl:
+ *
+ * rule 1: select IPsec transport
+ * rule 2: select MIPv6 RO or inbound trigger
+ * rule 3: select IPsec tunnel
+ * rule 4: others
+ */
+static int __xfrm6_tmpl_sort_cmp(const void *p)
+{
+	const struct xfrm_tmpl *v = p;
+
+	switch (v->mode) {
+	case XFRM_MODE_TRANSPORT:
+		return 1;
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+	case XFRM_MODE_ROUTEOPTIMIZATION:
+	case XFRM_MODE_IN_TRIGGER:
+		return 2;
+#endif
+	case XFRM_MODE_TUNNEL:
+	case XFRM_MODE_BEET:
+		return 3;
+	}
+	return 4;
+}
+#else
+static inline int __xfrm6_state_sort_cmp(const void *p) { return 5; }
+static inline int __xfrm6_tmpl_sort_cmp(const void *p) { return 4; }
+
+static inline void
+__xfrm6_sort(void **dst, void **src, int n,
+	     int (*cmp)(const void *p), int maxclass)
 {
 	int i;
-	int err = 0;
-	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
-	if (!afinfo)
-		return -EAFNOSUPPORT;
 
-	spin_lock_bh(&net->xfrm.xfrm_state_lock); /*FIXME*/
-	if (afinfo->tmpl_sort)
-		err = afinfo->tmpl_sort(dst, src, n);
+	for (i = 0; i < n; i++)
+		dst[i] = src[i];
+}
+#endif /* CONFIG_IPV6 */
+
+void
+xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
+	       unsigned short family)
+{
+	int i;
+
+	if (family == AF_INET6)
+		__xfrm6_sort((void **)dst, (void **)src, n,
+			     __xfrm6_tmpl_sort_cmp, 5);
 	else
 		for (i = 0; i < n; i++)
 			dst[i] = src[i];
-	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
-	rcu_read_unlock();
-	return err;
 }
-EXPORT_SYMBOL(xfrm_tmpl_sort);
 
-int
+void
 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
 		unsigned short family)
 {
 	int i;
-	int err = 0;
-	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
-	struct net *net = xs_net(*src);
 
-	if (!afinfo)
-		return -EAFNOSUPPORT;
-
-	spin_lock_bh(&net->xfrm.xfrm_state_lock);
-	if (afinfo->state_sort)
-		err = afinfo->state_sort(dst, src, n);
+	if (family == AF_INET6)
+		__xfrm6_sort((void **)dst, (void **)src, n,
+			     __xfrm6_state_sort_cmp, 6);
 	else
 		for (i = 0; i < n; i++)
 			dst[i] = src[i];
-	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
-	rcu_read_unlock();
-	return err;
 }
-EXPORT_SYMBOL(xfrm_state_sort);
 #endif
 
 /* Silly enough, but I'm lazy to build resolution list */
@@ -2058,7 +2244,7 @@
 }
 EXPORT_SYMBOL(km_report);
 
-bool km_is_alive(const struct km_event *c)
+static bool km_is_alive(const struct km_event *c)
 {
 	struct xfrm_mgr *km;
 	bool is_alive = false;
@@ -2074,7 +2260,6 @@
 
 	return is_alive;
 }
-EXPORT_SYMBOL(km_is_alive);
 
 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
 {
@@ -2083,10 +2268,8 @@
 	struct xfrm_mgr *km;
 	struct xfrm_policy *pol = NULL;
 
-#ifdef CONFIG_COMPAT
 	if (in_compat_syscall())
 		return -EOPNOTSUPP;
-#endif
 
 	if (!optval && !optlen) {
 		xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
@@ -2189,6 +2372,7 @@
 
 	return rcu_dereference(xfrm_state_afinfo[family]);
 }
+EXPORT_SYMBOL_GPL(xfrm_state_afinfo_get_rcu);
 
 struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
 {
@@ -2217,43 +2401,55 @@
 		if (atomic_read(&t->tunnel_users) == 2)
 			xfrm_state_delete(t);
 		atomic_dec(&t->tunnel_users);
-		xfrm_state_put(t);
+		xfrm_state_put_sync(t);
 		x->tunnel = NULL;
 	}
 }
 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
 
-int xfrm_state_mtu(struct xfrm_state *x, int mtu)
+u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
 {
 	const struct xfrm_type *type = READ_ONCE(x->type);
+	struct crypto_aead *aead;
+	u32 blksize, net_adj = 0;
 
-	if (x->km.state == XFRM_STATE_VALID &&
-	    type && type->get_mtu)
-		return type->get_mtu(x, mtu);
+	if (x->km.state != XFRM_STATE_VALID ||
+	    !type || type->proto != IPPROTO_ESP)
+		return mtu - x->props.header_len;
 
-	return mtu - x->props.header_len;
+	aead = x->data;
+	blksize = ALIGN(crypto_aead_blocksize(aead), 4);
+
+	switch (x->props.mode) {
+	case XFRM_MODE_TRANSPORT:
+	case XFRM_MODE_BEET:
+		if (x->props.family == AF_INET)
+			net_adj = sizeof(struct iphdr);
+		else if (x->props.family == AF_INET6)
+			net_adj = sizeof(struct ipv6hdr);
+		break;
+	case XFRM_MODE_TUNNEL:
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+
+	return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
+		 net_adj) & ~(blksize - 1)) + net_adj - 2;
 }
+EXPORT_SYMBOL_GPL(xfrm_state_mtu);
 
 int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
 {
-	struct xfrm_state_afinfo *afinfo;
-	struct xfrm_mode *inner_mode;
+	const struct xfrm_mode *inner_mode;
+	const struct xfrm_mode *outer_mode;
 	int family = x->props.family;
 	int err;
 
-	err = -EAFNOSUPPORT;
-	afinfo = xfrm_state_get_afinfo(family);
-	if (!afinfo)
-		goto error;
-
-	err = 0;
-	if (afinfo->init_flags)
-		err = afinfo->init_flags(x);
-
-	rcu_read_unlock();
-
-	if (err)
-		goto error;
+	if (family == AF_INET &&
+	    xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)
+		x->props.flags |= XFRM_STATE_NOPMTUDISC;
 
 	err = -EPROTONOSUPPORT;
 
@@ -2263,25 +2459,22 @@
 			goto error;
 
 		if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
-		    family != x->sel.family) {
-			xfrm_put_mode(inner_mode);
+		    family != x->sel.family)
 			goto error;
-		}
 
-		x->inner_mode = inner_mode;
+		x->inner_mode = *inner_mode;
 	} else {
-		struct xfrm_mode *inner_mode_iaf;
+		const struct xfrm_mode *inner_mode_iaf;
 		int iafamily = AF_INET;
 
 		inner_mode = xfrm_get_mode(x->props.mode, x->props.family);
 		if (inner_mode == NULL)
 			goto error;
 
-		if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) {
-			xfrm_put_mode(inner_mode);
+		if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL))
 			goto error;
-		}
-		x->inner_mode = inner_mode;
+
+		x->inner_mode = *inner_mode;
 
 		if (x->props.family == AF_INET)
 			iafamily = AF_INET6;
@@ -2289,9 +2482,7 @@
 		inner_mode_iaf = xfrm_get_mode(x->props.mode, iafamily);
 		if (inner_mode_iaf) {
 			if (inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL)
-				x->inner_mode_iaf = inner_mode_iaf;
-			else
-				xfrm_put_mode(inner_mode_iaf);
+				x->inner_mode_iaf = *inner_mode_iaf;
 		}
 	}
 
@@ -2305,12 +2496,13 @@
 	if (err)
 		goto error;
 
-	x->outer_mode = xfrm_get_mode(x->props.mode, family);
-	if (x->outer_mode == NULL) {
+	outer_mode = xfrm_get_mode(x->props.mode, family);
+	if (!outer_mode) {
 		err = -EPROTONOSUPPORT;
 		goto error;
 	}
 
+	x->outer_mode = *outer_mode;
 	if (init_replay) {
 		err = xfrm_init_replay(x);
 		if (err)
@@ -2377,8 +2569,8 @@
 	unsigned int sz;
 
 	flush_work(&net->xfrm.state_hash_work);
-	xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
 	flush_work(&xfrm_state_gc_work);
+	xfrm_state_flush(net, 0, false, true);
 
 	WARN_ON(!list_empty(&net->xfrm.state_all));
 

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 5669198..b88ba45 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* xfrm_user.c: User interface to configure xfrm engine.
  *
  * Copyright (C) 2002 David S. Miller (davem@redhat.com)
@@ -151,6 +152,25 @@
 	err = -EINVAL;
 	switch (p->family) {
 	case AF_INET:
+		break;
+
+	case AF_INET6:
+#if IS_ENABLED(CONFIG_IPV6)
+		break;
+#else
+		err = -EAFNOSUPPORT;
+		goto out;
+#endif
+
+	default:
+		goto out;
+	}
+
+	switch (p->sel.family) {
+	case AF_UNSPEC:
+		break;
+
+	case AF_INET:
 		if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32)
 			goto out;
 
@@ -1006,8 +1026,8 @@
 		u8 proto = 0;
 		int err;
 
-		err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX, xfrma_policy,
-				  NULL);
+		err = nlmsg_parse_deprecated(cb->nlh, 0, attrs, XFRMA_MAX,
+					     xfrma_policy, cb->extack);
 		if (err < 0)
 			return err;
 
@@ -1424,7 +1444,7 @@
 	ret = verify_policy_dir(p->dir);
 	if (ret)
 		return ret;
-	if (p->index && ((p->index & XFRM_POLICY_MAX) != p->dir))
+	if (p->index && (xfrm_policy_id2dir(p->index) != p->dir))
 		return -EINVAL;
 
 	return 0;
@@ -1488,10 +1508,15 @@
 		if (!ut[i].family)
 			ut[i].family = family;
 
-		if ((ut[i].mode == XFRM_MODE_TRANSPORT) &&
-		    (ut[i].family != prev_family))
-			return -EINVAL;
-
+		switch (ut[i].mode) {
+		case XFRM_MODE_TUNNEL:
+		case XFRM_MODE_BEET:
+			break;
+		default:
+			if (ut[i].family != prev_family)
+				return -EINVAL;
+			break;
+		}
 		if (ut[i].mode >= XFRM_MODE_MAX)
 			return -EINVAL;
 
@@ -1508,20 +1533,8 @@
 			return -EINVAL;
 		}
 
-		switch (ut[i].id.proto) {
-		case IPPROTO_AH:
-		case IPPROTO_ESP:
-		case IPPROTO_COMP:
-#if IS_ENABLED(CONFIG_IPV6)
-		case IPPROTO_ROUTING:
-		case IPPROTO_DSTOPTS:
-#endif
-		case IPSEC_PROTO_ANY:
-			break;
-		default:
+		if (!xfrm_id_proto_valid(ut[i].id.proto))
 			return -EINVAL;
-		}
-
 	}
 
 	return 0;
@@ -1927,7 +1940,7 @@
 	struct xfrm_usersa_flush *p = nlmsg_data(nlh);
 	int err;
 
-	err = xfrm_state_flush(net, p->proto, true);
+	err = xfrm_state_flush(net, p->proto, true, false);
 	if (err) {
 		if (err == -ESRCH) /* empty table */
 			return 0;
@@ -2621,10 +2634,8 @@
 	const struct xfrm_link *link;
 	int type, err;
 
-#ifdef CONFIG_COMPAT
 	if (in_compat_syscall())
 		return -EOPNOTSUPP;
-#endif
 
 	type = nlh->nlmsg_type;
 	if (type > XFRM_MSG_MAX)
@@ -2653,9 +2664,9 @@
 		}
 	}
 
-	err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs,
-			  link->nla_max ? : XFRMA_MAX,
-			  link->nla_pol ? : xfrma_policy, extack);
+	err = nlmsg_parse_deprecated(nlh, xfrm_msg_min[type], attrs,
+				     link->nla_max ? : XFRMA_MAX,
+				     link->nla_pol ? : xfrma_policy, extack);
 	if (err < 0)
 		return err;
commit	0f672f6c0b52b7b0700b0915c72b540721af4465	[log] [tgz]
author	David Brazdil <dbrazdil@google.com>	Tue Dec 10 10:32:29 2019 +0000
committer	David Brazdil <dbrazdil@google.com>	Tue Dec 10 19:03:18 2019 +0000
tree	85c8cba019caa205e4f8920d72d93f6d6deaf29c
parent	3a0ad55d848b50499b68d7141d4eca997fce28ef [diff]